001 // Copyright (c) 2011, Mike Samuel
002 // All rights reserved.
003 //
004 // Redistribution and use in source and binary forms, with or without
005 // modification, are permitted provided that the following conditions
006 // are met:
007 //
008 // Redistributions of source code must retain the above copyright
009 // notice, this list of conditions and the following disclaimer.
010 // Redistributions in binary form must reproduce the above copyright
011 // notice, this list of conditions and the following disclaimer in the
012 // documentation and/or other materials provided with the distribution.
013 // Neither the name of the OWASP nor the names of its contributors may
014 // be used to endorse or promote products derived from this software
015 // without specific prior written permission.
016 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
017 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
018 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
019 // FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
020 // COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
021 // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
022 // BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
023 // LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
024 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
025 // LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
026 // ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
027 // POSSIBILITY OF SUCH DAMAGE.
028
029 package org.owasp.html;
030
031 import java.util.List;
032 import java.util.Map;
033 import java.util.Set;
034 import java.util.regex.Pattern;
035
036 import javax.annotation.Nullable;
037 import javax.annotation.concurrent.NotThreadSafe;
038
039 import com.google.common.base.Predicate;
040 import com.google.common.collect.ImmutableList;
041 import com.google.common.collect.ImmutableMap;
042 import com.google.common.collect.ImmutableSet;
043 import com.google.common.collect.Maps;
044 import com.google.common.collect.Sets;
045
046
047 /**
048 * Conveniences for configuring policies for the {@link HtmlSanitizer}.
049 *
050 * <h3>Usage</h3>
051 * <p>
052 * To create a policy, first construct an instance of this class; then call
053 * <code>allow…</code> methods to turn on tags, attributes, and other
054 * processing modes; and finally call <code>build(renderer)</code> or
055 * <code>toFactory()</code>.
056 * </p>
057 * <pre class="prettyprint lang-java">
058 * // Define the policy.
059 * Function<HtmlStreamEventReceiver, HtmlSanitizer.Policy> policy
060 * = new HtmlPolicyBuilder()
061 * .allowElements("a", "p")
062 * .allowAttributes("href").onElements("a")
063 * .toFactory();
064 *
065 * // Sanitize your output.
066 * HtmlSanitizer.sanitize(myHtml, policy.apply(myHtmlStreamRenderer));
067 * </pre>
068 *
069 * <h3>Embedded Content</h3>
070 * <p>
071 * Embedded URLs are filtered by
072 * {@link HtmlPolicyBuilder#allowUrlProtocols protocol}.
073 * There is a {@link HtmlPolicyBuilder#allowStandardUrlProtocols canned policy}
074 * so you can easily white-list widely used policies that don't violate the
075 * current pages origin. See "Customization" below for ways to do further
076 * filtering. If you allow links it might be worthwhile to
077 * {@link HtmlPolicyBuilder#requireRelNofollowOnLinks() require}
078 * {@code rel=nofollow}.
079 * </p>
080 * <p>
081 * This class simply throws out all embedded JS.
082 * Use a custom element or attribute policy to allow through
083 * signed or otherwise known-safe code.
084 * Check out the Caja project if you need a way to contain third-party JS.
085 * </p>
086 * <p>
087 * This class does not attempt to faithfully parse and sanitize CSS.
088 * It does provide {@link HtmlPolicyBuilder#allowStyling() one} styling option
089 * that allows through a few CSS properties that allow textual styling, but that
090 * disallow image loading, history stealing, layout breaking, code execution,
091 * etc.
092 * </p>
093 *
094 * <h3>Customization</h3>
095 * <p>
096 * You can easily do custom processing on tags and attributes by supplying your
097 * own {@link ElementPolicy element policy} or
098 * {@link AttributePolicy attribute policy} when calling
099 * <code>allow…</code>.
100 * E.g. to convert headers into {@code <div>}s, you could use an element policy
101 * </p>
102 * <pre class="prettyprint lang-java">
103 * new HtmlPolicyBuilder()
104 * .allowElement(
105 * new ElementPolicy() {
106 * public String apply(String elementName, List<String> attributes) {
107 * attributes.add("class");
108 * attributes.add("header-" + elementName);
109 * return "div";
110 * }
111 * },
112 * "h1", "h2", "h3", "h4", "h5", "h6")
113 * .build(outputChannel)
114 * </pre>
115 *
116 * <h3>Rules of Thumb</h3>
117 * <p>
118 * Throughout this class, several rules hold:
119 * <ul>
120 * <li>Everything is denied by default. There are
121 * <code>disallow…</code> methods, but those reverse
122 * allows instead of rolling back overly permissive defaults.
123 * <li>The order of allows and disallows does not matter.
124 * Disallows trump allows whether they occur before or after them.
125 * The only method that needs to be called in a particular place is
126 * {@link HtmlPolicyBuilder#build}.
127 * Allows or disallows after {@code build} is called have no
128 * effect on the already built policy.
129 * <li>Element and attribute policies are applied in the following order:
130 * element specific attribute policy, global attribute policy, element
131 * policy.
132 * Element policies come last so they can observe all the post-processed
133 * attributes, and so they can add attributes that are exempt from
134 * attribute policies.
135 * Element specific policies go first, so they can normalize content to
136 * a form that might be acceptable to a more simplistic global policy.
137 * </ul>
138 *
139 * <h3>Thread safety and efficiency</h3>
140 * <p>
141 * This class is not thread-safe. The resulting policy will not violate its
142 * security guarantees as a result of race conditions, but is not thread safe
143 * because it maintains state to track whether text inside disallowed elements
144 * should be suppressed.
145 * <p>
146 * The resulting policy can be reused, but if you use the
147 * {@link HtmlPolicyBuilder#toFactory()} method instead of {@link #build}, then
148 * binding policies to output channels is cheap so there's no need.
149 * </p>
150 *
151 * @author Mike Samuel <mikesamuel@gmail.com>
152 */
153 @TCB
154 @NotThreadSafe
155 public class HtmlPolicyBuilder {
156 /**
157 * The default set of elements that are removed if they have no attributes.
158 * Since {@code <img>} is in this set, by default, a policy will remove
159 * {@code <img src=javascript:alert(1337)>} because its URL is not allowed
160 * and it has no other attributes that would warrant it appearing in the
161 * output.
162 */
163 public static final ImmutableSet<String> DEFAULT_SKIP_IF_EMPTY
164 = ImmutableSet.of("a", "font", "img", "input", "span");
165
166 private final Map<String, ElementPolicy> elPolicies = Maps.newLinkedHashMap();
167 private final Map<String, Map<String, AttributePolicy>> attrPolicies
168 = Maps.newLinkedHashMap();
169 private final Map<String, AttributePolicy> globalAttrPolicies
170 = Maps.newLinkedHashMap();
171 private final Set<String> allowedProtocols = Sets.newLinkedHashSet();
172 private final Set<String> skipIfEmpty = Sets.newLinkedHashSet(
173 DEFAULT_SKIP_IF_EMPTY);
174 private final Map<String, Boolean> textContainers = Maps.newLinkedHashMap();
175 private boolean requireRelNofollowOnLinks;
176
177 /**
178 * Allows the named elements.
179 */
180 public HtmlPolicyBuilder allowElements(String... elementNames) {
181 return allowElements(ElementPolicy.IDENTITY_ELEMENT_POLICY, elementNames);
182 }
183
184 /**
185 * Disallows the named elements. Elements are disallowed by default, so
186 * there is no need to disallow elements, unless you are making an exception
187 * based on an earlier allow.
188 */
189 public HtmlPolicyBuilder disallowElements(String... elementNames) {
190 return allowElements(ElementPolicy.REJECT_ALL_ELEMENT_POLICY, elementNames);
191 }
192
193 /**
194 * Allow the given elements with the given policy.
195 *
196 * @param policy May remove or add attributes, change the element name, or
197 * deny the element.
198 */
199 public HtmlPolicyBuilder allowElements(
200 ElementPolicy policy, String... elementNames) {
201 invalidateCompiledState();
202 for (String elementName : elementNames) {
203 elementName = HtmlLexer.canonicalName(elementName);
204 ElementPolicy newPolicy = ElementPolicy.Util.join(
205 elPolicies.get(elementName), policy);
206 // Don't remove if newPolicy is the always reject policy since we want
207 // that to infect later allowElement calls for this particular element
208 // name. rejects should have higher priority than allows.
209 elPolicies.put(elementName, newPolicy);
210 if (!textContainers.containsKey(elementName)
211 && TagBalancingHtmlStreamEventReceiver
212 .allowsPlainTextualContent(elementName)) {
213 textContainers.put(elementName, true);
214 }
215 }
216 return this;
217 }
218
219 /**
220 * A canned policy that allows a number of common formatting elements.
221 */
222 public HtmlPolicyBuilder allowCommonInlineFormattingElements() {
223 return allowElements(
224 "b", "i", "font", "s", "u", "o", "sup", "sub", "ins", "del", "strong",
225 "strike", "tt", "code", "big", "small", "br", "span");
226 }
227
228 /**
229 * A canned policy that allows a number of common block elements.
230 */
231 public HtmlPolicyBuilder allowCommonBlockElements() {
232 return allowElements(
233 "p", "div", "h1", "h2", "h3", "h4", "h5", "h6", "ul", "ol", "li",
234 "blockquote");
235 }
236
237 /**
238 * Allows text content in the named elements.
239 * By default, text content is allowed in any
240 * {@link #allowElements allowed elements} that can contain character data per
241 * the HTML5 spec, but text content is not allowed by default in elements that
242 * contain content of other kinds (like JavaScript in {@code <script>}
243 * elements.
244 * <p>
245 * To write a policy that whitelists {@code <script>} or {@code <style>}
246 * elements, first {@code allowTextIn("script")}.
247 */
248 public HtmlPolicyBuilder allowTextIn(String... elementNames) {
249 invalidateCompiledState();
250 for (String elementName : elementNames) {
251 elementName = HtmlLexer.canonicalName(elementName);
252 textContainers.put(elementName, true);
253 }
254 return this;
255 }
256
257 public HtmlPolicyBuilder disallowTextIn(String... elementNames) {
258 invalidateCompiledState();
259 for (String elementName : elementNames) {
260 elementName = HtmlLexer.canonicalName(elementName);
261 textContainers.put(elementName, false);
262 }
263 return this;
264 }
265
266 /**
267 * Assuming the given elements are allowed, allows them to appear without
268 * attributes.
269 *
270 * @see #DEFAULT_SKIP_IF_EMPTY
271 * @see #disallowWithoutAttributes
272 */
273 public HtmlPolicyBuilder allowWithoutAttributes(String... elementNames) {
274 invalidateCompiledState();
275 for (String elementName : elementNames) {
276 elementName = HtmlLexer.canonicalName(elementName);
277 skipIfEmpty.remove(elementName);
278 }
279 return this;
280 }
281
282 /**
283 * Disallows the given elements from appearing without attributes.
284 *
285 * @see #DEFAULT_SKIP_IF_EMPTY
286 * @see #allowWithoutAttributes
287 */
288 public HtmlPolicyBuilder disallowWithoutAttributes(String... elementNames) {
289 invalidateCompiledState();
290 for (String elementName : elementNames) {
291 elementName = HtmlLexer.canonicalName(elementName);
292 skipIfEmpty.add(elementName);
293 }
294 return this;
295 }
296
297 /**
298 * Returns an object that lets you associate policies with the given
299 * attributes, and allow them globally or on specific elements.
300 */
301 public AttributeBuilder allowAttributes(String... attributeNames) {
302 ImmutableList.Builder<String> b = ImmutableList.builder();
303 for (String attributeName : attributeNames) {
304 b.add(HtmlLexer.canonicalName(attributeName));
305 }
306 return new AttributeBuilder(b.build());
307 }
308
309 /**
310 * Reverse an earlier attribute {@link #allowAttributes allow}.
311 * <p>
312 * For this to have an effect you must call at least one of
313 * {@link AttributeBuilder#globally} and {@link AttributeBuilder#onElements}.
314 * <p>
315 * Attributes are disallowed by default, so there is no need to call this
316 * with a laundry list of attribute/element pairs.
317 */
318 public AttributeBuilder disallowAttributes(String... attributeNames) {
319 return this.allowAttributes(attributeNames)
320 .matching(AttributePolicy.REJECT_ALL_ATTRIBUTE_POLICY);
321 }
322
323
324 private HtmlPolicyBuilder allowAttributesGlobally(
325 AttributePolicy policy, List<String> attributeNames) {
326 invalidateCompiledState();
327 for (String attributeName : attributeNames) {
328 // We reinterpret the identity policy later via policy joining since its
329 // the default passed from the policy-less method, but we don't do
330 // anything here since we don't know until build() is called whether the
331 // policy author wants to allow certain URL protocols or wants to deal
332 // with styles.
333 AttributePolicy oldPolicy = globalAttrPolicies.get(attributeName);
334 globalAttrPolicies.put(
335 attributeName, AttributePolicy.Util.join(oldPolicy, policy));
336 }
337 return this;
338 }
339
340 private HtmlPolicyBuilder allowAttributesOnElements(
341 AttributePolicy policy, List<String> attributeNames,
342 List<String> elementNames) {
343 invalidateCompiledState();
344 for (String elementName : elementNames) {
345 Map<String, AttributePolicy> policies = attrPolicies.get(elementName);
346 if (policies == null) {
347 policies = Maps.newLinkedHashMap();
348 attrPolicies.put(elementName, policies);
349 }
350 for (String attributeName : attributeNames) {
351 AttributePolicy oldPolicy = policies.get(attributeName);
352 policies.put(
353 attributeName,
354 AttributePolicy.Util.join(oldPolicy, policy));
355 }
356 }
357 return this;
358 }
359
360 /**
361 * Adds <a href="http://en.wikipedia.org/wiki/Nofollow"><code>rel=nofollow</code></a>
362 * to links.
363 */
364 public HtmlPolicyBuilder requireRelNofollowOnLinks() {
365 invalidateCompiledState();
366 this.requireRelNofollowOnLinks = true;
367 return this;
368 }
369
370 /**
371 * Adds to the set of protocols that are allowed in URL attributes.
372 * For each URL attribute that is allowed, we further constrain it by
373 * only allowing the value through if it specifies no protocol, or if it
374 * specifies one in the allowedProtocols white-list.
375 * This is done regardless of whether any protocols have been allowed, so
376 * allowing the attribute "href" globally with the identity policy but
377 * not white-listing any protocols, effectively disallows the "href"
378 * attribute globally.
379 * <p>
380 * Do not allow any <code>*script</code> such as <code>javascript</code>
381 * protocols if you might use this policy with untrusted code.
382 */
383 public HtmlPolicyBuilder allowUrlProtocols(String... protocols) {
384 invalidateCompiledState();
385 // If there is at least one allowed protocol, then allow URLs and
386 // add a filter that checks href and src values.
387
388 // Do not allow href and srcs through otherwise, and only allow on images
389 // and links.
390 for (String protocol : protocols) {
391 protocol = Strings.toLowerCase(protocol);
392 allowedProtocols.add(protocol);
393 }
394 return this;
395 }
396
397 /**
398 * Reverses a decision made by {@link #allowUrlProtocols}.
399 */
400 public HtmlPolicyBuilder disallowUrlProtocols(String... protocols) {
401 invalidateCompiledState();
402 for (String protocol : protocols) {
403 protocol = Strings.toLowerCase(protocol);
404 allowedProtocols.remove(protocol);
405 }
406 return this;
407 }
408
409 /**
410 * A canned URL protocol policy that allows <code>http</code>,
411 * <code>https</code>, and <code>mailto</code>.
412 */
413 public HtmlPolicyBuilder allowStandardUrlProtocols() {
414 return allowUrlProtocols("http", "https", "mailto");
415 }
416
417 /**
418 * Convert <code>style="<CSS>"</code> to sanitized CSS which allows
419 * color, font-size, type-face, and other styling using the default schema;
420 * but which does not allow content to escape its clipping context.
421 */
422 public HtmlPolicyBuilder allowStyling() {
423 allowStyling(CssSchema.DEFAULT);
424 return this;
425 }
426
427 /**
428 * Convert <code>style="<CSS>"</code> to sanitized CSS which allows
429 * color, font-size, type-face, and other styling using the given schema.
430 */
431 public HtmlPolicyBuilder allowStyling(CssSchema whitelist) {
432 invalidateCompiledState();
433 allowAttributesGlobally(
434 new StylingPolicy(whitelist), ImmutableList.of("style"));
435 return this;
436 }
437
438 /**
439 * Names of attributes from HTML 4 whose values are URLs.
440 * Other attributes, e.g. <code>style</code> may contain URLs even though
441 * there values are not URLs.
442 */
443 private static final Set<String> URL_ATTRIBUTE_NAMES = ImmutableSet.of(
444 "action", "archive", "background", "cite", "classid", "codebase", "data",
445 "dsync", "formaction", "href", "icon", "longdesc", "manifest", "poster",
446 "profile", "src", "srcset", "usemap");
447
448 /**
449 * Produces a policy based on the allow and disallow calls previously made.
450 *
451 * @param out receives calls to open only tags allowed by
452 * previous calls to this object.
453 * Typically a {@link HtmlStreamRenderer}.
454 */
455 public HtmlSanitizer.Policy build(HtmlStreamEventReceiver out) {
456 return toFactory().apply(out);
457 }
458
459 /**
460 * Produces a policy based on the allow and disallow calls previously made.
461 *
462 * @param out receives calls to open only tags allowed by
463 * previous calls to this object.
464 * Typically a {@link HtmlStreamRenderer}.
465 * @param listener is notified of dropped tags and attributes so that
466 * intrusion detection systems can be alerted to questionable HTML.
467 * If {@code null} then no notifications are sent.
468 * @param context if {@code (listener != null)} then the context value passed
469 * with alerts. This can be used to let the listener know from which
470 * connection or request the questionable HTML was received.
471 */
472 public <CTX> HtmlSanitizer.Policy build(
473 HtmlStreamEventReceiver out,
474 @Nullable HtmlChangeListener<? super CTX> listener,
475 @Nullable CTX context) {
476 return toFactory().apply(out, listener, context);
477 }
478
479 /**
480 * Like {@link #build} but can be reused to create many different policies
481 * each backed by a different output channel.
482 */
483 public PolicyFactory toFactory() {
484 ImmutableSet.Builder<String> textContainers = ImmutableSet.builder();
485 for (Map.Entry<String, Boolean> textContainer
486 : this.textContainers.entrySet()) {
487 if (Boolean.TRUE.equals(textContainer.getValue())) {
488 textContainers.add(textContainer.getKey());
489 }
490 }
491 return new PolicyFactory(compilePolicies(), textContainers.build(),
492 ImmutableMap.copyOf(globalAttrPolicies));
493 }
494
495 // Speed up subsequent builds by caching the compiled policies.
496 private transient ImmutableMap<String, ElementAndAttributePolicies>
497 compiledPolicies;
498
499 /** Called by mutators to signal that any compiled policy is out-of-date. */
500 private void invalidateCompiledState() {
501 compiledPolicies = null;
502 }
503
504 private ImmutableMap<String, ElementAndAttributePolicies> compilePolicies() {
505 if (compiledPolicies != null) { return compiledPolicies; }
506
507 // Copy maps before normalizing in case builder is reused.
508 Map<String, ElementPolicy> elPolicies
509 = Maps.newLinkedHashMap(this.elPolicies);
510 Map<String, Map<String, AttributePolicy>> attrPolicies
511 = Maps.newLinkedHashMap(this.attrPolicies);
512 for (Map.Entry<String, Map<String, AttributePolicy>> e :
513 attrPolicies.entrySet()) {
514 e.setValue(Maps.newLinkedHashMap(e.getValue()));
515 }
516 Map<String, AttributePolicy> globalAttrPolicies
517 = Maps.newLinkedHashMap(this.globalAttrPolicies);
518 Set<String> allowedProtocols = ImmutableSet.copyOf(this.allowedProtocols);
519
520 // Implement requireRelNofollowOnLinks
521 if (requireRelNofollowOnLinks) {
522 ElementPolicy linkPolicy = elPolicies.get("a");
523 if (linkPolicy == null) {
524 linkPolicy = ElementPolicy.REJECT_ALL_ELEMENT_POLICY;
525 }
526 elPolicies.put(
527 "a",
528 ElementPolicy.Util.join(
529 linkPolicy,
530 new ElementPolicy() {
531 public String apply(String elementName, List<String> attrs) {
532 for (int i = 0, n = attrs.size(); i < n; i += 2) {
533 if ("href".equals(attrs.get(i))) {
534 attrs.add("rel");
535 attrs.add("nofollow");
536 break;
537 }
538 }
539 return elementName;
540 }
541 }));
542 }
543
544 // Implement protocol policies.
545 // For each URL attribute that is allowed, we further constrain it by
546 // only allowing the value through if it specifies no protocol, or if it
547 // specifies one in the allowedProtocols white-list.
548 // This is done regardless of whether any protocols have been allowed, so
549 // allowing the attribute "href" globally with the identity policy but
550 // not white-listing any protocols, effectively disallows the "href"
551 // attribute globally.
552 {
553 AttributePolicy urlAttributePolicy;
554 if (allowedProtocols.size() == 3
555 && allowedProtocols.contains("mailto")
556 && allowedProtocols.contains("http")
557 && allowedProtocols.contains("https")) {
558 urlAttributePolicy = StandardUrlAttributePolicy.INSTANCE;
559 } else {
560 urlAttributePolicy = new FilterUrlByProtocolAttributePolicy(
561 allowedProtocols);
562 }
563 Set<String> toGuard = Sets.newLinkedHashSet(URL_ATTRIBUTE_NAMES);
564 for (String urlAttributeName : URL_ATTRIBUTE_NAMES) {
565 if (globalAttrPolicies.containsKey(urlAttributeName)) {
566 toGuard.remove(urlAttributeName);
567 globalAttrPolicies.put(urlAttributeName, AttributePolicy.Util.join(
568 urlAttributePolicy, globalAttrPolicies.get(urlAttributeName)));
569 }
570 }
571 // Implement guards not implemented on global policies in the per-element
572 // policy maps.
573 for (Map.Entry<String, Map<String, AttributePolicy>> e
574 : attrPolicies.entrySet()) {
575 Map<String, AttributePolicy> policies = e.getValue();
576 for (String urlAttributeName : toGuard) {
577 if (policies.containsKey(urlAttributeName)) {
578 policies.put(urlAttributeName, AttributePolicy.Util.join(
579 urlAttributePolicy, policies.get(urlAttributeName)));
580 }
581 }
582 }
583 }
584
585 ImmutableMap.Builder<String, ElementAndAttributePolicies> policiesBuilder
586 = ImmutableMap.builder();
587 for (Map.Entry<String, ElementPolicy> e : elPolicies.entrySet()) {
588 String elementName = e.getKey();
589 ElementPolicy elPolicy = e.getValue();
590 if (ElementPolicy.REJECT_ALL_ELEMENT_POLICY.equals(elPolicy)) {
591 continue;
592 }
593
594 Map<String, AttributePolicy> elAttrPolicies
595 = attrPolicies.get(elementName);
596 if (elAttrPolicies == null) { elAttrPolicies = ImmutableMap.of(); }
597 ImmutableMap.Builder<String, AttributePolicy> attrs
598 = ImmutableMap.builder();
599 for (Map.Entry<String, AttributePolicy> ape : elAttrPolicies.entrySet()) {
600 String attributeName = ape.getKey();
601 // Handle below so we don't end up putting the same key into the map
602 // twice. ImmutableMap.Builder hates that.
603 if (globalAttrPolicies.containsKey(attributeName)) { continue; }
604 AttributePolicy policy = ape.getValue();
605 if (!AttributePolicy.REJECT_ALL_ATTRIBUTE_POLICY.equals(policy)) {
606 attrs.put(attributeName, policy);
607 }
608 }
609 for (Map.Entry<String, AttributePolicy> ape
610 : globalAttrPolicies.entrySet()) {
611 String attributeName = ape.getKey();
612 AttributePolicy policy = AttributePolicy.Util.join(
613 elAttrPolicies.get(attributeName), ape.getValue());
614 if (!AttributePolicy.REJECT_ALL_ATTRIBUTE_POLICY.equals(policy)) {
615 attrs.put(attributeName, policy);
616 }
617 }
618
619 policiesBuilder.put(
620 elementName,
621 new ElementAndAttributePolicies(
622 elementName,
623 elPolicy, attrs.build(), skipIfEmpty.contains(elementName)));
624 }
625 return compiledPolicies = policiesBuilder.build();
626 }
627
628 /**
629 * Builds the relationship between attributes, the values that they may have,
630 * and the elements on which they may appear.
631 *
632 * @author Mike Samuel
633 */
634 public final class AttributeBuilder {
635 private final List<String> attributeNames;
636 private AttributePolicy policy = AttributePolicy.IDENTITY_ATTRIBUTE_POLICY;
637
638 AttributeBuilder(List<? extends String> attributeNames) {
639 this.attributeNames = ImmutableList.copyOf(attributeNames);
640 }
641
642 /**
643 * Filters and/or transforms the attribute values
644 * allowed by later {@code allow*} calls.
645 * Multiple calls to {@code matching} are combined so that the policies
646 * receive the value in order, each seeing the value after any
647 * transformation by a previous policy.
648 */
649 public AttributeBuilder matching(AttributePolicy policy) {
650 this.policy = AttributePolicy.Util.join(this.policy, policy);
651 return this;
652 }
653
654 /**
655 * Restrict the values allowed by later {@code allow*} calls to those
656 * matching the pattern.
657 * Multiple calls to {@code matching} are combined to restrict to the
658 * intersection of possible matched values.
659 */
660 public AttributeBuilder matching(final Pattern pattern) {
661 return matching(new AttributePolicy() {
662 public @Nullable String apply(
663 String elementName, String attributeName, String value) {
664 return pattern.matcher(value).matches() ? value : null;
665 }
666 });
667 }
668
669 /**
670 * Restrict the values allowed by later {@code allow*} calls to those
671 * matching the given predicate.
672 * Multiple calls to {@code matching} are combined to restrict to the
673 * intersection of possible matched values.
674 */
675 public AttributeBuilder matching(
676 final Predicate<? super String> filter) {
677 return matching(new AttributePolicy() {
678 public @Nullable String apply(
679 String elementName, String attributeName, String value) {
680 return filter.apply(value) ? value : null;
681 }
682 });
683 }
684
685 /**
686 * Restrict the values allowed by later {@code allow*} calls to those
687 * supplied.
688 * Multiple calls to {@code matching} are combined to restrict to the
689 * intersection of possible matched values.
690 */
691 public AttributeBuilder matching(
692 boolean ignoreCase, String... allowedValues) {
693 return matching(ignoreCase, ImmutableSet.copyOf(allowedValues));
694 }
695
696 /**
697 * Restrict the values allowed by later {@code allow*} calls to those
698 * supplied.
699 * Multiple calls to {@code matching} are combined to restrict to the
700 * intersection of possible matched values.
701 */
702 public AttributeBuilder matching(
703 final boolean ignoreCase, Set<? extends String> allowedValues) {
704 final ImmutableSet<String> allowed = ImmutableSet.copyOf(allowedValues);
705 return matching(new AttributePolicy() {
706 public @Nullable String apply(
707 String elementName, String attributeName, String value) {
708 if (ignoreCase) { value = Strings.toLowerCase(value); }
709 return allowed.contains(value) ? value : null;
710 }
711 });
712 }
713
714 /**
715 * Allows the given attributes on any elements but filters the
716 * attributes' values based on previous calls to {@code matching(...)}.
717 * Global attribute policies are applied after element specific policies.
718 * Be careful of using this with attributes like <code>type</code> which
719 * have different meanings on different attributes.
720 * Also be careful of allowing globally attributes like <code>href</code>
721 * which can have more far-reaching effects on tags like
722 * <code><base></code> and <code><link></code> than on
723 * <code><a></code> because in the former, they have an effect without
724 * user interaction and can change the behavior of the current page.
725 */
726 public HtmlPolicyBuilder globally() {
727 return HtmlPolicyBuilder.this.allowAttributesGlobally(
728 policy, attributeNames);
729 }
730
731 /**
732 * Allows the named attributes on the given elements but filters the
733 * attributes' values based on previous calls to {@code matching(...)}.
734 */
735 public HtmlPolicyBuilder onElements(String... elementNames) {
736 ImmutableList.Builder<String> b = ImmutableList.builder();
737 for (String elementName : elementNames) {
738 b.add(HtmlLexer.canonicalName(elementName));
739 }
740 return HtmlPolicyBuilder.this.allowAttributesOnElements(
741 policy, attributeNames, b.build());
742 }
743 }
744 }