001 // Copyright (c) 2013, Mike Samuel 002 // All rights reserved. 003 // 004 // Redistribution and use in source and binary forms, with or without 005 // modification, are permitted provided that the following conditions 006 // are met: 007 // 008 // Redistributions of source code must retain the above copyright 009 // notice, this list of conditions and the following disclaimer. 010 // Redistributions in binary form must reproduce the above copyright 011 // notice, this list of conditions and the following disclaimer in the 012 // documentation and/or other materials provided with the distribution. 013 // Neither the name of the OWASP nor the names of its contributors may 014 // be used to endorse or promote products derived from this software 015 // without specific prior written permission. 016 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 017 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 018 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 019 // FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 020 // COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 021 // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 022 // BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 023 // LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 024 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 025 // LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 026 // ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 027 // POSSIBILITY OF SUCH DAMAGE. 028 029 package org.owasp.html.examples; 030 031 import java.io.IOException; 032 import java.util.ArrayList; 033 import java.util.List; 034 035 import org.owasp.html.Handler; 036 import org.owasp.html.HtmlPolicyBuilder; 037 import org.owasp.html.HtmlSanitizer; 038 import org.owasp.html.HtmlStreamEventReceiver; 039 import org.owasp.html.HtmlStreamRenderer; 040 import org.owasp.html.HtmlTextEscapingMode; 041 import org.owasp.html.PolicyFactory; 042 import org.owasp.html.TagBalancingHtmlStreamEventReceiver; 043 044 /** 045 * Uses a custom event receiver to emit the domain of a link or inline image 046 * after the link or image. 047 */ 048 public class UrlTextExample { 049 050 /** An event receiver that emits the domain of a link or image after it. */ 051 static class AppendDomainAfterText implements HtmlStreamEventReceiver { 052 final HtmlStreamEventReceiver underlying; 053 private final List<String> pendingText = new ArrayList<String>(); 054 055 AppendDomainAfterText(HtmlStreamEventReceiver underlying) { 056 this.underlying = underlying; 057 } 058 059 public void openDocument() { 060 underlying.openDocument(); 061 } 062 public void closeDocument() { 063 underlying.closeDocument(); 064 } 065 public void openTag(String elementName, List<String> attribs) { 066 underlying.openTag(elementName, attribs); 067 068 String trailingText = null; 069 070 if (!attribs.isEmpty()) { 071 // Figure out which attribute we should look for. 072 String urlAttrName = null; 073 if ("a".equals(elementName)) { 074 urlAttrName = "href"; 075 } else if ("img".equals(elementName)) { 076 urlAttrName = "src"; 077 } 078 if (urlAttrName != null) { 079 // Look for the attribute, and after it for its value. 080 for (int i = 0, n = attribs.size(); i < n; i += 2) { 081 if (urlAttrName.equals(attribs.get(i))) { 082 String url = attribs.get(i+1).trim(); 083 String domain = domainOf(url); 084 if (domain != null) { 085 trailingText = " - " + domain; 086 } 087 break; 088 } 089 } 090 } 091 } 092 if (HtmlTextEscapingMode.isVoidElement(elementName)) { 093 // A void element like <img> will not have a corresponding closeTag 094 // call. 095 if (trailingText != null) { 096 text(trailingText); 097 } 098 } else { 099 // Push the trailing text onto a stack so when we see the corresponding 100 // close tag, we can emit the text. 101 pendingText.add(trailingText); 102 } 103 } 104 public void closeTag(String elementName) { 105 underlying.closeTag(elementName); 106 // Pull the trailing text for the recently closed element off the stack. 107 int pendingTextSize = pendingText.size(); 108 if (pendingTextSize != 0) { 109 String trailingText = pendingText.remove(pendingTextSize - 1); 110 if (trailingText != null) { 111 text(trailingText); 112 } 113 } 114 } 115 public void text(String text) { 116 underlying.text(text); 117 } 118 } 119 120 public static void run(Appendable out, String... argv) throws IOException { 121 PolicyFactory policyBuilder = new HtmlPolicyBuilder() 122 .allowAttributes("src").onElements("img") 123 .allowAttributes("href").onElements("a") 124 // Allow some URLs through. 125 .allowStandardUrlProtocols() 126 .allowElements( 127 "a", "label", "h1", "h2", "h3", "h4", "h5", "h6", 128 "p", "i", "b", "u", "strong", "em", "small", "big", "pre", "code", 129 "cite", "samp", "sub", "sup", "strike", "center", "blockquote", 130 "hr", "br", "col", "font", "span", "div", "img", 131 "ul", "ol", "li", "dd", "dt", "dl", "tbody", "thead", "tfoot", 132 "table", "td", "th", "tr", "colgroup", "fieldset", "legend" 133 ).toFactory(); 134 135 StringBuilder htmlOut = new StringBuilder(); 136 HtmlSanitizer.Policy policy = policyBuilder.apply( 137 // The tag balancer passes events to AppendDomainAfterText which 138 // assumes that openTag and closeTag events line up with one-another. 139 new TagBalancingHtmlStreamEventReceiver( 140 // The domain appender forwards events to the HTML renderer, 141 new AppendDomainAfterText( 142 // which puts tags and text onto the output buffer. 143 HtmlStreamRenderer.create(htmlOut, Handler.DO_NOTHING) 144 ) 145 ) 146 ); 147 148 for (String input : argv) { 149 HtmlSanitizer.sanitize(input, policy); 150 } 151 152 out.append(htmlOut); 153 } 154 155 public static void main(String... argv) throws IOException { 156 run(System.out, argv); 157 System.out.println(); 158 } 159 160 161 /** 162 * The domain (actually authority component) of an HTML5 URL. 163 * If the input is not hierarchical, then this has undefined behavior. 164 */ 165 private static String domainOf(String url) { 166 int start = -1; 167 if (url.startsWith("//")) { 168 start = 2; 169 } else { 170 start = url.indexOf("://"); 171 if (start >= 0) { start += 3; } 172 } 173 if (start < 0) { return null; } 174 for (int i = 0; i < start - 3; ++i) { 175 switch (url.charAt(i)) { 176 case '/': case '?': case '#': return null; 177 default: break; 178 } 179 } 180 int end = url.length(); 181 for (int i = start; i < end; ++i) { 182 switch (url.charAt(i)) { 183 case '/': case '?': case '#': end = i; break; 184 default: break; 185 } 186 } 187 if (start < end) { 188 return url.substring(start, end); 189 } else { 190 return null; 191 } 192 } 193 }