001    // Copyright (c) 2011, Mike Samuel
002    // All rights reserved.
003    //
004    // Redistribution and use in source and binary forms, with or without
005    // modification, are permitted provided that the following conditions
006    // are met:
007    //
008    // Redistributions of source code must retain the above copyright
009    // notice, this list of conditions and the following disclaimer.
010    // Redistributions in binary form must reproduce the above copyright
011    // notice, this list of conditions and the following disclaimer in the
012    // documentation and/or other materials provided with the distribution.
013    // Neither the name of the OWASP nor the names of its contributors may
014    // be used to endorse or promote products derived from this software
015    // without specific prior written permission.
016    // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
017    // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
018    // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
019    // FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
020    // COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
021    // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
022    // BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
023    // LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
024    // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
025    // LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
026    // ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
027    // POSSIBILITY OF SUCH DAMAGE.
028    
029    package org.owasp.html;
030    
031    import java.util.LinkedHashSet;
032    import java.util.List;
033    import java.util.Set;
034    
035    import javax.annotation.Nullable;
036    
037    /**
038     * Sits between the HTML parser, and then policy, and the renderer so that it
039     * can report dropped elements and attributes to an {@link HtmlChangeListener}.
040     *
041     * <pre>
042     * HtmlChangeReporter&lt;T&gt; hcr = new HtmlChangeReporter&lt;T&gt;(
043     *   renderer, htmlChangeListener, context);
044     * hcr.setPolicy(policyFactory.apply(hcr.getWrappedRenderer()));
045     * HtmlSanitizer.sanitize(html, hcr.getWrappedPolicy());
046     * </pre>
047     *
048     * The renderer receives events from the policy unchanged, but the reporter
049     * notices differences between the events from the lexer and those from the
050     * policy.
051     *
052     * @param <T> The type of context value passed to the
053     */
054    public final class HtmlChangeReporter<T> {
055      private final OutputChannel output;
056      private final InputChannel<T> input;
057    
058      public HtmlChangeReporter(
059          HtmlStreamEventReceiver renderer,
060          HtmlChangeListener<? super T> listener, @Nullable T context) {
061        this.output = new OutputChannel(renderer);
062        this.input = new InputChannel<T>(output, listener, context);
063      }
064    
065      /**
066       * Associates an input channel.  {@code this} receives events and forwards
067       * them to input.
068       */
069      public void setPolicy(HtmlSanitizer.Policy policy) {
070        this.input.policy = policy;
071      }
072    
073      public HtmlStreamEventReceiver getWrappedRenderer() { return output; }
074    
075      public HtmlSanitizer.Policy getWrappedPolicy() { return input; }
076    
077      private static final class InputChannel<T> implements HtmlSanitizer.Policy {
078        HtmlStreamEventReceiver policy;
079        final OutputChannel output;
080        final T context;
081        final HtmlChangeListener<? super T> listener;
082    
083        InputChannel(
084            OutputChannel output, HtmlChangeListener<? super T> listener,
085            @Nullable T context) {
086          this.output = output;
087          this.context = context;
088          this.listener = listener;
089        }
090    
091        public void openDocument() {
092          policy.openDocument();
093        }
094    
095        public void closeDocument() {
096          policy.closeDocument();
097        }
098    
099        public void openTag(String elementName, List<String> attrs) {
100          output.expectedElementName = elementName;
101          output.expectedAttrNames.clear();
102          for (int i = 0, n = attrs.size(); i < n; i += 2) {
103            output.expectedAttrNames.add(attrs.get(i));
104          }
105          policy.openTag(elementName, attrs);
106          {
107            // Gather the notification details to avoid any problems with the
108            // listener re-entering the stream event receiver.  This shouldn't
109            // occur, but if it does it will be a source of subtle confusing bugs.
110            String discardedElementName = output.expectedElementName;
111            output.expectedElementName = null;
112            int nExpected = output.expectedAttrNames.size();
113            String[] discardedAttrNames =
114                nExpected != 0 && discardedElementName == null
115                ? output.expectedAttrNames.toArray(new String[nExpected])
116                : ZERO_STRINGS;
117            output.expectedAttrNames.clear();
118            // Dispatch notifications to the listener.
119            if (discardedElementName != null) {
120              listener.discardedTag(context, discardedElementName);
121            }
122            if (discardedAttrNames.length != 0) {
123              listener.discardedAttributes(
124                  context, elementName, discardedAttrNames);
125            }
126          }
127        }
128    
129        public void closeTag(String elementName) {
130          policy.closeTag(elementName);
131        }
132    
133        public void text(String textChunk) {
134          policy.text(textChunk);
135        }
136      }
137    
138      private static final class OutputChannel implements HtmlStreamEventReceiver {
139        private final HtmlStreamEventReceiver renderer;
140        String expectedElementName;
141        Set<String> expectedAttrNames = new LinkedHashSet<String>();
142    
143        OutputChannel(HtmlStreamEventReceiver renderer) {
144          this.renderer = renderer;
145        }
146    
147        public void openDocument() {
148          renderer.openDocument();
149        }
150    
151        public void closeDocument() {
152          renderer.closeDocument();
153        }
154    
155        public void openTag(String elementName, List<String> attrs) {
156          if (elementName.equals(expectedElementName)) {
157            expectedElementName = null;
158          }
159          for (int i = 0, n = attrs.size(); i < n; i += 2) {
160            expectedAttrNames.remove(attrs.get(i));
161          }
162          renderer.openTag(elementName, attrs);
163        }
164    
165        public void closeTag(String elementName) {
166          renderer.closeTag(elementName);
167        }
168    
169        public void text(String text) {
170          renderer.text(text);
171        }
172      }
173    
174      private static final String[] ZERO_STRINGS = new String[0];
175    }