001 // Copyright (c) 2011, Mike Samuel
002 // All rights reserved.
003 //
004 // Redistribution and use in source and binary forms, with or without
005 // modification, are permitted provided that the following conditions
006 // are met:
007 //
008 // Redistributions of source code must retain the above copyright
009 // notice, this list of conditions and the following disclaimer.
010 // Redistributions in binary form must reproduce the above copyright
011 // notice, this list of conditions and the following disclaimer in the
012 // documentation and/or other materials provided with the distribution.
013 // Neither the name of the OWASP nor the names of its contributors may
014 // be used to endorse or promote products derived from this software
015 // without specific prior written permission.
016 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
017 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
018 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
019 // FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
020 // COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
021 // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
022 // BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
023 // LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
024 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
025 // LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
026 // ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
027 // POSSIBILITY OF SUCH DAMAGE.
028
029 package org.owasp.html;
030
031 import java.util.LinkedHashSet;
032 import java.util.List;
033 import java.util.Set;
034
035 import javax.annotation.Nullable;
036
037 /**
038 * Sits between the HTML parser, and then policy, and the renderer so that it
039 * can report dropped elements and attributes to an {@link HtmlChangeListener}.
040 *
041 * <pre>
042 * HtmlChangeReporter<T> hcr = new HtmlChangeReporter<T>(
043 * renderer, htmlChangeListener, context);
044 * hcr.setPolicy(policyFactory.apply(hcr.getWrappedRenderer()));
045 * HtmlSanitizer.sanitize(html, hcr.getWrappedPolicy());
046 * </pre>
047 *
048 * The renderer receives events from the policy unchanged, but the reporter
049 * notices differences between the events from the lexer and those from the
050 * policy.
051 *
052 * @param <T> The type of context value passed to the
053 */
054 public final class HtmlChangeReporter<T> {
055 private final OutputChannel output;
056 private final InputChannel<T> input;
057
058 public HtmlChangeReporter(
059 HtmlStreamEventReceiver renderer,
060 HtmlChangeListener<? super T> listener, @Nullable T context) {
061 this.output = new OutputChannel(renderer);
062 this.input = new InputChannel<T>(output, listener, context);
063 }
064
065 /**
066 * Associates an input channel. {@code this} receives events and forwards
067 * them to input.
068 */
069 public void setPolicy(HtmlSanitizer.Policy policy) {
070 this.input.policy = policy;
071 }
072
073 public HtmlStreamEventReceiver getWrappedRenderer() { return output; }
074
075 public HtmlSanitizer.Policy getWrappedPolicy() { return input; }
076
077 private static final class InputChannel<T> implements HtmlSanitizer.Policy {
078 HtmlStreamEventReceiver policy;
079 final OutputChannel output;
080 final T context;
081 final HtmlChangeListener<? super T> listener;
082
083 InputChannel(
084 OutputChannel output, HtmlChangeListener<? super T> listener,
085 @Nullable T context) {
086 this.output = output;
087 this.context = context;
088 this.listener = listener;
089 }
090
091 public void openDocument() {
092 policy.openDocument();
093 }
094
095 public void closeDocument() {
096 policy.closeDocument();
097 }
098
099 public void openTag(String elementName, List<String> attrs) {
100 output.expectedElementName = elementName;
101 output.expectedAttrNames.clear();
102 for (int i = 0, n = attrs.size(); i < n; i += 2) {
103 output.expectedAttrNames.add(attrs.get(i));
104 }
105 policy.openTag(elementName, attrs);
106 {
107 // Gather the notification details to avoid any problems with the
108 // listener re-entering the stream event receiver. This shouldn't
109 // occur, but if it does it will be a source of subtle confusing bugs.
110 String discardedElementName = output.expectedElementName;
111 output.expectedElementName = null;
112 int nExpected = output.expectedAttrNames.size();
113 String[] discardedAttrNames =
114 nExpected != 0 && discardedElementName == null
115 ? output.expectedAttrNames.toArray(new String[nExpected])
116 : ZERO_STRINGS;
117 output.expectedAttrNames.clear();
118 // Dispatch notifications to the listener.
119 if (discardedElementName != null) {
120 listener.discardedTag(context, discardedElementName);
121 }
122 if (discardedAttrNames.length != 0) {
123 listener.discardedAttributes(
124 context, elementName, discardedAttrNames);
125 }
126 }
127 }
128
129 public void closeTag(String elementName) {
130 policy.closeTag(elementName);
131 }
132
133 public void text(String textChunk) {
134 policy.text(textChunk);
135 }
136 }
137
138 private static final class OutputChannel implements HtmlStreamEventReceiver {
139 private final HtmlStreamEventReceiver renderer;
140 String expectedElementName;
141 Set<String> expectedAttrNames = new LinkedHashSet<String>();
142
143 OutputChannel(HtmlStreamEventReceiver renderer) {
144 this.renderer = renderer;
145 }
146
147 public void openDocument() {
148 renderer.openDocument();
149 }
150
151 public void closeDocument() {
152 renderer.closeDocument();
153 }
154
155 public void openTag(String elementName, List<String> attrs) {
156 if (elementName.equals(expectedElementName)) {
157 expectedElementName = null;
158 }
159 for (int i = 0, n = attrs.size(); i < n; i += 2) {
160 expectedAttrNames.remove(attrs.get(i));
161 }
162 renderer.openTag(elementName, attrs);
163 }
164
165 public void closeTag(String elementName) {
166 renderer.closeTag(elementName);
167 }
168
169 public void text(String text) {
170 renderer.text(text);
171 }
172 }
173
174 private static final String[] ZERO_STRINGS = new String[0];
175 }