blob: 3c50629db2954950ab05437200b1d9e3fc314051 [file] [log] [blame]
//
// Copyright 2015 gRPC authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include <grpc/support/port_platform.h>
#include "src/core/lib/uri/uri_parser.h"
#include <ctype.h>
#include <stddef.h>
#include <algorithm>
#include <functional>
#include <initializer_list>
#include <map>
#include <string>
#include <utility>
#include "absl/status/status.h"
#include "absl/strings/ascii.h"
#include "absl/strings/escaping.h"
#include "absl/strings/match.h"
#include "absl/strings/str_cat.h"
#include "absl/strings/str_format.h"
#include "absl/strings/str_join.h"
#include "absl/strings/str_split.h"
#include "absl/strings/strip.h"
#include <grpc/support/log.h>
namespace grpc_core {
namespace {
// Returns true for any sub-delim character, as defined in:
// https://datatracker.ietf.org/doc/html/rfc3986#section-2.2
bool IsSubDelimChar(char c) {
switch (c) {
case '!':
case '$':
case '&':
case '\'':
case '(':
case ')':
case '*':
case '+':
case ',':
case ';':
case '=':
return true;
}
return false;
}
// Returns true for any unreserved character, as defined in:
// https://datatracker.ietf.org/doc/html/rfc3986#section-2.3
bool IsUnreservedChar(char c) {
if (absl::ascii_isalnum(c)) return true;
switch (c) {
case '-':
case '.':
case '_':
case '~':
return true;
}
return false;
}
// Returns true for any character in scheme, as defined in:
// https://datatracker.ietf.org/doc/html/rfc3986#section-3.1
bool IsSchemeChar(char c) {
if (absl::ascii_isalnum(c)) return true;
switch (c) {
case '+':
case '-':
case '.':
return true;
}
return false;
}
// Returns true for any character in authority, as defined in:
// https://datatracker.ietf.org/doc/html/rfc3986#section-3.2
bool IsAuthorityChar(char c) {
if (IsUnreservedChar(c)) return true;
if (IsSubDelimChar(c)) return true;
switch (c) {
case ':':
case '[':
case ']':
case '@':
return true;
}
return false;
}
// Returns true for any character in pchar, as defined in:
// https://datatracker.ietf.org/doc/html/rfc3986#section-3.3
bool IsPChar(char c) {
if (IsUnreservedChar(c)) return true;
if (IsSubDelimChar(c)) return true;
switch (c) {
case ':':
case '@':
return true;
}
return false;
}
// Returns true for any character allowed in a URI path, as defined in:
// https://datatracker.ietf.org/doc/html/rfc3986#section-3.3
bool IsPathChar(char c) { return IsPChar(c) || c == '/'; }
// Returns true for any character allowed in a URI query or fragment,
// as defined in:
// See https://tools.ietf.org/html/rfc3986#section-3.4
bool IsQueryOrFragmentChar(char c) {
return IsPChar(c) || c == '/' || c == '?';
}
// Same as IsQueryOrFragmentChar(), but excludes '&' and '='.
bool IsQueryKeyOrValueChar(char c) {
return c != '&' && c != '=' && IsQueryOrFragmentChar(c);
}
// Returns a copy of str, percent-encoding any character for which
// is_allowed_char() returns false.
std::string PercentEncode(absl::string_view str,
std::function<bool(char)> is_allowed_char) {
std::string out;
for (char c : str) {
if (!is_allowed_char(c)) {
std::string hex = absl::BytesToHexString(absl::string_view(&c, 1));
GPR_ASSERT(hex.size() == 2);
// BytesToHexString() returns lower case, but
// https://datatracker.ietf.org/doc/html/rfc3986#section-6.2.2.1 says
// to prefer upper-case.
absl::AsciiStrToUpper(&hex);
out.push_back('%');
out.append(hex);
} else {
out.push_back(c);
}
}
return out;
}
// Checks if this string is made up of query/fragment chars and '%' exclusively.
// See https://tools.ietf.org/html/rfc3986#section-3.4
bool IsQueryOrFragmentString(absl::string_view str) {
for (char c : str) {
if (!IsQueryOrFragmentChar(c) && c != '%') return false;
}
return true;
}
absl::Status MakeInvalidURIStatus(absl::string_view part_name,
absl::string_view uri,
absl::string_view extra) {
return absl::InvalidArgumentError(absl::StrFormat(
"Could not parse '%s' from uri '%s'. %s", part_name, uri, extra));
}
} // namespace
std::string URI::PercentEncodeAuthority(absl::string_view str) {
return PercentEncode(str, IsAuthorityChar);
}
std::string URI::PercentEncodePath(absl::string_view str) {
return PercentEncode(str, IsPathChar);
}
// Similar to `grpc_permissive_percent_decode_slice`, this %-decodes all valid
// triplets, and passes through the rest verbatim.
std::string URI::PercentDecode(absl::string_view str) {
if (str.empty() || !absl::StrContains(str, "%")) {
return std::string(str);
}
std::string out;
std::string unescaped;
out.reserve(str.size());
for (size_t i = 0; i < str.length(); i++) {
unescaped = "";
if (str[i] == '%' && i + 3 <= str.length() &&
absl::CUnescape(absl::StrCat("\\x", str.substr(i + 1, 2)),
&unescaped) &&
unescaped.length() == 1) {
out += unescaped[0];
i += 2;
} else {
out += str[i];
}
}
return out;
}
absl::StatusOr<URI> URI::Parse(absl::string_view uri_text) {
absl::string_view remaining = uri_text;
// parse scheme
size_t offset = remaining.find(':');
if (offset == remaining.npos || offset == 0) {
return MakeInvalidURIStatus("scheme", uri_text, "Scheme not found.");
}
std::string scheme(remaining.substr(0, offset));
if (scheme.find_first_not_of("ABCDEFGHIJKLMNOPQRSTUVWXYZ"
"abcdefghijklmnopqrstuvwxyz"
"0123456789+-.") != std::string::npos) {
return MakeInvalidURIStatus("scheme", uri_text,
"Scheme contains invalid characters.");
}
if (!isalpha(scheme[0])) {
return MakeInvalidURIStatus(
"scheme", uri_text,
"Scheme must begin with an alpha character [A-Za-z].");
}
remaining.remove_prefix(offset + 1);
// parse authority
std::string authority;
if (absl::ConsumePrefix(&remaining, "//")) {
offset = remaining.find_first_of("/?#");
authority = PercentDecode(remaining.substr(0, offset));
if (offset == remaining.npos) {
remaining = "";
} else {
remaining.remove_prefix(offset);
}
}
// parse path
std::string path;
if (!remaining.empty()) {
offset = remaining.find_first_of("?#");
path = PercentDecode(remaining.substr(0, offset));
if (offset == remaining.npos) {
remaining = "";
} else {
remaining.remove_prefix(offset);
}
}
// parse query
std::vector<QueryParam> query_param_pairs;
if (absl::ConsumePrefix(&remaining, "?")) {
offset = remaining.find('#');
absl::string_view tmp_query = remaining.substr(0, offset);
if (tmp_query.empty()) {
return MakeInvalidURIStatus("query", uri_text, "Invalid query string.");
}
if (!IsQueryOrFragmentString(tmp_query)) {
return MakeInvalidURIStatus("query string", uri_text,
"Query string contains invalid characters.");
}
for (absl::string_view query_param : absl::StrSplit(tmp_query, '&')) {
const std::pair<absl::string_view, absl::string_view> possible_kv =
absl::StrSplit(query_param, absl::MaxSplits('=', 1));
if (possible_kv.first.empty()) continue;
query_param_pairs.push_back({PercentDecode(possible_kv.first),
PercentDecode(possible_kv.second)});
}
if (offset == remaining.npos) {
remaining = "";
} else {
remaining.remove_prefix(offset);
}
}
std::string fragment;
if (absl::ConsumePrefix(&remaining, "#")) {
if (!IsQueryOrFragmentString(remaining)) {
return MakeInvalidURIStatus("fragment", uri_text,
"Fragment contains invalid characters.");
}
fragment = PercentDecode(remaining);
}
return URI(std::move(scheme), std::move(authority), std::move(path),
std::move(query_param_pairs), std::move(fragment));
}
absl::StatusOr<URI> URI::Create(std::string scheme, std::string authority,
std::string path,
std::vector<QueryParam> query_parameter_pairs,
std::string fragment) {
if (!authority.empty() && !path.empty() && path[0] != '/') {
return absl::InvalidArgumentError(
"if authority is present, path must start with a '/'");
}
return URI(std::move(scheme), std::move(authority), std::move(path),
std::move(query_parameter_pairs), std::move(fragment));
}
URI::URI(std::string scheme, std::string authority, std::string path,
std::vector<QueryParam> query_parameter_pairs, std::string fragment)
: scheme_(std::move(scheme)),
authority_(std::move(authority)),
path_(std::move(path)),
query_parameter_pairs_(std::move(query_parameter_pairs)),
fragment_(std::move(fragment)) {
for (const auto& kv : query_parameter_pairs_) {
query_parameter_map_[kv.key] = kv.value;
}
}
URI::URI(const URI& other)
: scheme_(other.scheme_),
authority_(other.authority_),
path_(other.path_),
query_parameter_pairs_(other.query_parameter_pairs_),
fragment_(other.fragment_) {
for (const auto& kv : query_parameter_pairs_) {
query_parameter_map_[kv.key] = kv.value;
}
}
URI& URI::operator=(const URI& other) {
if (this == &other) {
return *this;
}
scheme_ = other.scheme_;
authority_ = other.authority_;
path_ = other.path_;
query_parameter_pairs_ = other.query_parameter_pairs_;
fragment_ = other.fragment_;
for (const auto& kv : query_parameter_pairs_) {
query_parameter_map_[kv.key] = kv.value;
}
return *this;
}
namespace {
// A pair formatter for use with absl::StrJoin() for formatting query params.
struct QueryParameterFormatter {
void operator()(std::string* out, const URI::QueryParam& query_param) const {
out->append(
absl::StrCat(PercentEncode(query_param.key, IsQueryKeyOrValueChar), "=",
PercentEncode(query_param.value, IsQueryKeyOrValueChar)));
}
};
} // namespace
std::string URI::ToString() const {
std::vector<std::string> parts = {PercentEncode(scheme_, IsSchemeChar), ":"};
if (!authority_.empty()) {
parts.emplace_back("//");
parts.emplace_back(PercentEncode(authority_, IsAuthorityChar));
}
if (!path_.empty()) {
parts.emplace_back(PercentEncode(path_, IsPathChar));
}
if (!query_parameter_pairs_.empty()) {
parts.push_back("?");
parts.push_back(
absl::StrJoin(query_parameter_pairs_, "&", QueryParameterFormatter()));
}
if (!fragment_.empty()) {
parts.push_back("#");
parts.push_back(PercentEncode(fragment_, IsQueryOrFragmentChar));
}
return absl::StrJoin(parts, "");
}
} // namespace grpc_core