001 // Copyright (c) 2011, Mike Samuel 002 // All rights reserved. 003 // 004 // Redistribution and use in source and binary forms, with or without 005 // modification, are permitted provided that the following conditions 006 // are met: 007 // 008 // Redistributions of source code must retain the above copyright 009 // notice, this list of conditions and the following disclaimer. 010 // Redistributions in binary form must reproduce the above copyright 011 // notice, this list of conditions and the following disclaimer in the 012 // documentation and/or other materials provided with the distribution. 013 // Neither the name of the OWASP nor the names of its contributors may 014 // be used to endorse or promote products derived from this software 015 // without specific prior written permission. 016 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 017 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 018 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 019 // FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 020 // COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 021 // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 022 // BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 023 // LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 024 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 025 // LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 026 // ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 027 // POSSIBILITY OF SUCH DAMAGE. 028 029 package org.owasp.html; 030 031 import java.util.LinkedHashSet; 032 import java.util.List; 033 import java.util.Set; 034 035 import javax.annotation.Nullable; 036 037 /** 038 * Sits between the HTML parser, and then policy, and the renderer so that it 039 * can report dropped elements and attributes to an {@link HtmlChangeListener}. 040 * 041 * <pre> 042 * HtmlChangeReporter<T> hcr = new HtmlChangeReporter<T>( 043 * renderer, htmlChangeListener, context); 044 * hcr.setPolicy(policyFactory.apply(hcr.getWrappedRenderer())); 045 * HtmlSanitizer.sanitize(html, hcr.getWrappedPolicy()); 046 * </pre> 047 * 048 * The renderer receives events from the policy unchanged, but the reporter 049 * notices differences between the events from the lexer and those from the 050 * policy. 051 * 052 * @param <T> The type of context value passed to the 053 */ 054 public final class HtmlChangeReporter<T> { 055 private final OutputChannel output; 056 private final InputChannel<T> input; 057 058 public HtmlChangeReporter( 059 HtmlStreamEventReceiver renderer, 060 HtmlChangeListener<? super T> listener, @Nullable T context) { 061 this.output = new OutputChannel(renderer); 062 this.input = new InputChannel<T>(output, listener, context); 063 } 064 065 /** 066 * Associates an input channel. {@code this} receives events and forwards 067 * them to input. 068 */ 069 public void setPolicy(HtmlSanitizer.Policy policy) { 070 this.input.policy = policy; 071 } 072 073 public HtmlStreamEventReceiver getWrappedRenderer() { return output; } 074 075 public HtmlSanitizer.Policy getWrappedPolicy() { return input; } 076 077 private static final class InputChannel<T> implements HtmlSanitizer.Policy { 078 HtmlStreamEventReceiver policy; 079 final OutputChannel output; 080 final T context; 081 final HtmlChangeListener<? super T> listener; 082 083 InputChannel( 084 OutputChannel output, HtmlChangeListener<? super T> listener, 085 @Nullable T context) { 086 this.output = output; 087 this.context = context; 088 this.listener = listener; 089 } 090 091 public void openDocument() { 092 policy.openDocument(); 093 } 094 095 public void closeDocument() { 096 policy.closeDocument(); 097 } 098 099 public void openTag(String elementName, List<String> attrs) { 100 output.expectedElementName = elementName; 101 output.expectedAttrNames.clear(); 102 for (int i = 0, n = attrs.size(); i < n; i += 2) { 103 output.expectedAttrNames.add(attrs.get(i)); 104 } 105 policy.openTag(elementName, attrs); 106 { 107 // Gather the notification details to avoid any problems with the 108 // listener re-entering the stream event receiver. This shouldn't 109 // occur, but if it does it will be a source of subtle confusing bugs. 110 String discardedElementName = output.expectedElementName; 111 output.expectedElementName = null; 112 int nExpected = output.expectedAttrNames.size(); 113 String[] discardedAttrNames = 114 nExpected != 0 && discardedElementName == null 115 ? output.expectedAttrNames.toArray(new String[nExpected]) 116 : ZERO_STRINGS; 117 output.expectedAttrNames.clear(); 118 // Dispatch notifications to the listener. 119 if (discardedElementName != null) { 120 listener.discardedTag(context, discardedElementName); 121 } 122 if (discardedAttrNames.length != 0) { 123 listener.discardedAttributes( 124 context, elementName, discardedAttrNames); 125 } 126 } 127 } 128 129 public void closeTag(String elementName) { 130 policy.closeTag(elementName); 131 } 132 133 public void text(String textChunk) { 134 policy.text(textChunk); 135 } 136 } 137 138 private static final class OutputChannel implements HtmlStreamEventReceiver { 139 private final HtmlStreamEventReceiver renderer; 140 String expectedElementName; 141 Set<String> expectedAttrNames = new LinkedHashSet<String>(); 142 143 OutputChannel(HtmlStreamEventReceiver renderer) { 144 this.renderer = renderer; 145 } 146 147 public void openDocument() { 148 renderer.openDocument(); 149 } 150 151 public void closeDocument() { 152 renderer.closeDocument(); 153 } 154 155 public void openTag(String elementName, List<String> attrs) { 156 if (elementName.equals(expectedElementName)) { 157 expectedElementName = null; 158 } 159 for (int i = 0, n = attrs.size(); i < n; i += 2) { 160 expectedAttrNames.remove(attrs.get(i)); 161 } 162 renderer.openTag(elementName, attrs); 163 } 164 165 public void closeTag(String elementName) { 166 renderer.closeTag(elementName); 167 } 168 169 public void text(String text) { 170 renderer.text(text); 171 } 172 } 173 174 private static final String[] ZERO_STRINGS = new String[0]; 175 }