001 // Copyright (c) 2011, Mike Samuel
002 // All rights reserved.
003 //
004 // Redistribution and use in source and binary forms, with or without
005 // modification, are permitted provided that the following conditions
006 // are met:
007 //
008 // Redistributions of source code must retain the above copyright
009 // notice, this list of conditions and the following disclaimer.
010 // Redistributions in binary form must reproduce the above copyright
011 // notice, this list of conditions and the following disclaimer in the
012 // documentation and/or other materials provided with the distribution.
013 // Neither the name of the OWASP nor the names of its contributors may
014 // be used to endorse or promote products derived from this software
015 // without specific prior written permission.
016 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
017 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
018 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
019 // FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
020 // COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
021 // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
022 // BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
023 // LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
024 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
025 // LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
026 // ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
027 // POSSIBILITY OF SUCH DAMAGE.
028
029 package org.owasp.html;
030
031 import javax.annotation.Nullable;
032
033 import com.google.common.collect.ImmutableSet;
034
035 /**
036 * An attribute policy for attributes whose values are URLs that requires that
037 * the value have no protocol or have an allowed protocol.
038 *
039 * <p>
040 * URLs with protocols must match the protocol set passed to the constructor.
041 * URLs without protocols but which specify an origin different from the
042 * containing page (e.g. {@code //example.org}) are only allowed if the
043 * {@link FilterUrlByProtocolAttributePolicy#allowProtocolRelativeUrls policy}
044 * allows both {@code http} and {@code https} which are normally used to serve
045 * HTML.
046 * Same-origin URLs, URLs without any protocol or authority part are always
047 * allowed.
048 * </p>
049 *
050 * <p>
051 * This class assumes that URLs are either hierarchical, or are opaque, but
052 * do not look like they contain an authority portion.
053 * </p>
054 *
055 * @author Mike Samuel <mikesamuel@gmail.com>
056 */
057 @TCB
058 public class FilterUrlByProtocolAttributePolicy implements AttributePolicy {
059 private final ImmutableSet<String> protocols;
060
061 public FilterUrlByProtocolAttributePolicy(
062 Iterable<? extends String> protocols) {
063 this.protocols = ImmutableSet.copyOf(protocols);
064 }
065
066 public @Nullable String apply(
067 String elementName, String attributeName, String s) {
068 protocol_loop:
069 for (int i = 0, n = s.length(); i < n; ++i) {
070 switch (s.charAt(i)) {
071 case '/': case '#': case '?': // No protocol.
072 // Check for domain relative URLs like //www.evil.org/
073 if (s.startsWith("//")
074 // or the protocols by which HTML is normally served are OK.
075 && !allowProtocolRelativeUrls()) {
076 return null;
077 }
078 break protocol_loop;
079 case ':':
080 String protocol = Strings.toLowerCase(s.substring(0, i));
081 if (!protocols.contains(protocol)) { return null; }
082 break protocol_loop;
083 }
084 }
085 return normalizeUri(s);
086 }
087
088 protected boolean allowProtocolRelativeUrls() {
089 return protocols.contains("http") && protocols.contains("https");
090 }
091
092 /** Percent encodes anything that looks like a colon, or a parenthesis. */
093 static String normalizeUri(String s) {
094 int n = s.length();
095 boolean colonsIrrelevant = false;
096 for (int i = 0; i < n; ++i) {
097 char ch = s.charAt(i);
098 switch (ch) {
099 case '/': case '#': case '?': case ':':
100 colonsIrrelevant = true;
101 break;
102 case '(': case ')': case '\uff1a':
103 StringBuilder sb = new StringBuilder(n + 16);
104 int pos = 0;
105 for (; i < n; ++i) {
106 ch = s.charAt(i);
107 switch (ch) {
108 case '(':
109 sb.append(s, pos, i).append("%28");
110 pos = i + 1;
111 break;
112 case ')':
113 sb.append(s, pos, i).append("%29");
114 pos = i + 1;
115 break;
116 default:
117 if (ch > 0x100 && !colonsIrrelevant) {
118 // Other colon like characters.
119 // TODO: do we need to encode non-colon characters if we're
120 // not dealing with URLs that haven't been copy/pasted into
121 // the URL bar?
122 // Is it safe to assume UTF-8 here?
123 switch (ch) {
124 case '\u0589':
125 sb.append(s, pos, i).append("%d6%89");
126 pos = i + 1;
127 break;
128 case '\u05c3':
129 sb.append(s, pos, i).append("%d7%83");
130 pos = i + 1;
131 break;
132 case '\u2236':
133 sb.append(s, pos, i).append("%e2%88%b6");
134 pos = i + 1;
135 break;
136 case '\uff1a':
137 sb.append(s, pos, i).append("%ef%bc%9a");
138 pos = i + 1;
139 break;
140 }
141 }
142 break;
143 }
144 }
145 return sb.append(s, pos, n).toString();
146 }
147 }
148 return s;
149 }
150
151 @Override
152 public boolean equals(Object o) {
153 return o != null && this.getClass() == o.getClass()
154 && protocols.equals(((FilterUrlByProtocolAttributePolicy) o).protocols);
155 }
156
157 @Override
158 public int hashCode() {
159 return protocols.hashCode();
160 }
161
162 }