| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| package libcore.net; |
| |
| import java.io.ByteArrayOutputStream; |
| import java.net.URISyntaxException; |
| import java.nio.charset.Charset; |
| import java.nio.charset.Charsets; |
| |
| /** |
| * Encodes and decodes {@code application/x-www-form-urlencoded} content. |
| * Subclasses define exactly which characters are legal. |
| * |
| * <p>By default, UTF-8 is used to encode escaped characters. A single input |
| * character like "\u0080" may be encoded to multiple octets like %C2%80. |
| */ |
| public abstract class UriCodec { |
| |
| /** |
| * Returns true if {@code c} does not need to be escaped. |
| */ |
| protected abstract boolean isRetained(char c); |
| |
| /** |
| * Throws if {@code s} is invalid according to this encoder. |
| */ |
| public final String validate(String uri, int start, int end, String name) |
| throws URISyntaxException { |
| for (int i = start; i < end; ) { |
| char ch = uri.charAt(i); |
| if ((ch >= 'a' && ch <= 'z') |
| || (ch >= 'A' && ch <= 'Z') |
| || (ch >= '0' && ch <= '9') |
| || isRetained(ch)) { |
| i++; |
| } else if (ch == '%') { |
| if (i + 2 >= end) { |
| throw new URISyntaxException(uri, "Incomplete % sequence in " + name, i); |
| } |
| int d1 = hexToInt(uri.charAt(i + 1)); |
| int d2 = hexToInt(uri.charAt(i + 2)); |
| if (d1 == -1 || d2 == -1) { |
| throw new URISyntaxException(uri, "Invalid % sequence: " |
| + uri.substring(i, i + 3) + " in " + name, i); |
| } |
| i += 3; |
| } else { |
| throw new URISyntaxException(uri, "Illegal character in " + name, i); |
| } |
| } |
| return uri.substring(start, end); |
| } |
| |
| /** |
| * Throws if {@code s} contains characters that are not letters, digits or |
| * in {@code legal}. |
| */ |
| public static void validateSimple(String s, String legal) |
| throws URISyntaxException { |
| for (int i = 0; i < s.length(); i++) { |
| char ch = s.charAt(i); |
| if (!((ch >= 'a' && ch <= 'z') |
| || (ch >= 'A' && ch <= 'Z') |
| || (ch >= '0' && ch <= '9') |
| || legal.indexOf(ch) > -1)) { |
| throw new URISyntaxException(s, "Illegal character", i); |
| } |
| } |
| } |
| |
| /** |
| * Encodes {@code s} and appends the result to {@code builder}. |
| * |
| * @param isPartiallyEncoded true to fix input that has already been |
| * partially or fully encoded. For example, input of "hello%20world" is |
| * unchanged with isPartiallyEncoded=true but would be double-escaped to |
| * "hello%2520world" otherwise. |
| */ |
| private void appendEncoded(StringBuilder builder, String s, Charset charset, |
| boolean isPartiallyEncoded) { |
| if (s == null) { |
| throw new NullPointerException("s == null"); |
| } |
| |
| int escapeStart = -1; |
| for (int i = 0; i < s.length(); i++) { |
| char c = s.charAt(i); |
| if ((c >= 'a' && c <= 'z') |
| || (c >= 'A' && c <= 'Z') |
| || (c >= '0' && c <= '9') |
| || isRetained(c) |
| || (c == '%' && isPartiallyEncoded)) { |
| if (escapeStart != -1) { |
| appendHex(builder, s.substring(escapeStart, i), charset); |
| escapeStart = -1; |
| } |
| if (c == '%' && isPartiallyEncoded) { |
| // this is an encoded 3-character sequence like "%20" |
| builder.append(s, i, Math.min(i + 3, s.length())); |
| i += 2; |
| } else if (c == ' ') { |
| builder.append('+'); |
| } else { |
| builder.append(c); |
| } |
| } else if (escapeStart == -1) { |
| escapeStart = i; |
| } |
| } |
| if (escapeStart != -1) { |
| appendHex(builder, s.substring(escapeStart, s.length()), charset); |
| } |
| } |
| |
| public final String encode(String s, Charset charset) { |
| // Guess a bit larger for encoded form |
| StringBuilder builder = new StringBuilder(s.length() + 16); |
| appendEncoded(builder, s, charset, false); |
| return builder.toString(); |
| } |
| |
| public final void appendEncoded(StringBuilder builder, String s) { |
| appendEncoded(builder, s, Charsets.UTF_8, false); |
| } |
| |
| public final void appendPartiallyEncoded(StringBuilder builder, String s) { |
| appendEncoded(builder, s, Charsets.UTF_8, true); |
| } |
| |
| /** |
| * @param convertPlus true to convert '+' to ' '. |
| * @param throwOnFailure true to throw an IllegalArgumentException on |
| * invalid escape sequences; false to replace them with the replacement |
| * character (U+fffd). |
| */ |
| public static String decode(String s, boolean convertPlus, Charset charset, |
| boolean throwOnFailure) { |
| if (s.indexOf('%') == -1 && (!convertPlus || s.indexOf('+') == -1)) { |
| return s; |
| } |
| |
| StringBuilder result = new StringBuilder(s.length()); |
| ByteArrayOutputStream out = new ByteArrayOutputStream(); |
| for (int i = 0; i < s.length();) { |
| char c = s.charAt(i); |
| if (c == '%') { |
| do { |
| int d1, d2; |
| if (i + 2 < s.length() |
| && (d1 = hexToInt(s.charAt(i + 1))) != -1 |
| && (d2 = hexToInt(s.charAt(i + 2))) != -1) { |
| out.write((byte) ((d1 << 4) + d2)); |
| } else if (throwOnFailure) { |
| throw new IllegalArgumentException("Invalid % sequence at " + i + ": " + s); |
| } else { |
| byte[] replacement = "\ufffd".getBytes(charset); |
| out.write(replacement, 0, replacement.length); |
| } |
| i += 3; |
| } while (i < s.length() && s.charAt(i) == '%'); |
| result.append(new String(out.toByteArray(), charset)); |
| out.reset(); |
| } else { |
| if (convertPlus && c == '+') { |
| c = ' '; |
| } |
| result.append(c); |
| i++; |
| } |
| } |
| return result.toString(); |
| } |
| |
| /** |
| * Like {@link Character#digit}, but without support for non-ASCII |
| * characters. |
| */ |
| private static int hexToInt(char c) { |
| if ('0' <= c && c <= '9') { |
| return c - '0'; |
| } else if ('a' <= c && c <= 'f') { |
| return 10 + (c - 'a'); |
| } else if ('A' <= c && c <= 'F') { |
| return 10 + (c - 'A'); |
| } else { |
| return -1; |
| } |
| } |
| |
| public static String decode(String s) { |
| return decode(s, false, Charsets.UTF_8, true); |
| } |
| |
| private static void appendHex(StringBuilder builder, String s, Charset charset) { |
| for (byte b : s.getBytes(charset)) { |
| appendHex(builder, b); |
| } |
| } |
| |
| private static void appendHex(StringBuilder sb, byte b) { |
| sb.append('%'); |
| sb.append(Byte.toHexString(b, true)); |
| } |
| } |