001 // Copyright (c) 2011, Mike Samuel
002 // All rights reserved.
003 //
004 // Redistribution and use in source and binary forms, with or without
005 // modification, are permitted provided that the following conditions
006 // are met:
007 //
008 // Redistributions of source code must retain the above copyright
009 // notice, this list of conditions and the following disclaimer.
010 // Redistributions in binary form must reproduce the above copyright
011 // notice, this list of conditions and the following disclaimer in the
012 // documentation and/or other materials provided with the distribution.
013 // Neither the name of the OWASP nor the names of its contributors may
014 // be used to endorse or promote products derived from this software
015 // without specific prior written permission.
016 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
017 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
018 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
019 // FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
020 // COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
021 // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
022 // BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
023 // LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
024 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
025 // LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
026 // ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
027 // POSSIBILITY OF SUCH DAMAGE.
028
029 package org.owasp.html;
030
031 import java.util.List;
032
033 import javax.annotation.Nullable;
034 import javax.annotation.concurrent.Immutable;
035
036 import com.google.common.collect.ImmutableMap;
037 import com.google.common.collect.Lists;
038
039 /**
040 * Wraps an HTML stream event receiver to fill in missing close tags.
041 * If the balancer is given the HTML {@code <p>1<p>2}, the wrapped receiver will
042 * see events equivalent to {@code <p>1</p><p>2</p>}.
043 *
044 * @author Mike Samuel <mikesamuel@gmail.com>
045 */
046 @TCB
047 public class TagBalancingHtmlStreamEventReceiver
048 implements HtmlStreamEventReceiver {
049 private final HtmlStreamEventReceiver underlying;
050 private int nestingLimit = Integer.MAX_VALUE;
051 private final List<ElementContainmentInfo> openElements
052 = Lists.newArrayList();
053
054 public TagBalancingHtmlStreamEventReceiver(
055 HtmlStreamEventReceiver underlying) {
056 this.underlying = underlying;
057 }
058
059 public void setNestingLimit(int limit) {
060 if (openElements.size() > limit) {
061 throw new IllegalStateException();
062 }
063 this.nestingLimit = limit;
064 }
065
066 public void openDocument() {
067 underlying.openDocument();
068 }
069
070 public void closeDocument() {
071 for (int i = Math.min(nestingLimit, openElements.size()); --i >= 0;) {
072 underlying.closeTag(openElements.get(i).elementName);
073 }
074 openElements.clear();
075 underlying.closeDocument();
076 }
077
078 public void openTag(String elementName, List<String> attrs) {
079 String canonElementName = HtmlLexer.canonicalName(elementName);
080 ElementContainmentInfo elInfo = ELEMENT_CONTAINMENT_RELATIONSHIPS.get(
081 canonElementName);
082 // Treat unrecognized tags as void, but emit closing tags in closeTag().
083 if (elInfo == null) {
084 if (openElements.size() < nestingLimit) {
085 underlying.openTag(elementName, attrs);
086 }
087 return;
088 }
089
090 prepareForContent(elInfo);
091
092 if (openElements.size() < nestingLimit) {
093 underlying.openTag(elInfo.elementName, attrs);
094 }
095 if (!elInfo.isVoid) {
096 openElements.add(elInfo);
097 }
098 }
099
100 private void prepareForContent(ElementContainmentInfo elInfo) {
101 int nOpen = openElements.size();
102 if (nOpen != 0) {
103 ElementContainmentInfo top = openElements.get(nOpen - 1);
104 if ((top.contents & elInfo.types) == 0) {
105 ElementContainmentInfo blockContainerChild = top.blockContainerChild;
106 // Open implied elements, such as list-items and table cells & rows.
107 if (blockContainerChild != null
108 && (blockContainerChild.contents & elInfo.types) != 0) {
109 underlying.openTag(
110 blockContainerChild.elementName, Lists.<String>newArrayList());
111 openElements.add(blockContainerChild);
112 top = blockContainerChild;
113 ++nOpen;
114 }
115 }
116
117 // Close all the elements that cannot contain the element to open.
118 List<ElementContainmentInfo> toResumeInReverse = null;
119 while (true) {
120 if ((top.contents & elInfo.types) != 0) { break; }
121 if (openElements.size() < nestingLimit) {
122 underlying.closeTag(top.elementName);
123 }
124 openElements.remove(--nOpen);
125 if (top.resumable) {
126 if (toResumeInReverse == null) {
127 toResumeInReverse = Lists.newArrayList();
128 }
129 toResumeInReverse.add(top);
130 }
131 if (nOpen == 0) { break; }
132 top = openElements.get(nOpen - 1);
133 }
134
135 if (toResumeInReverse != null) {
136 resume(toResumeInReverse);
137 }
138 }
139 }
140
141 public void closeTag(String elementName) {
142 String canonElementName = HtmlLexer.canonicalName(elementName);
143 ElementContainmentInfo elInfo = ELEMENT_CONTAINMENT_RELATIONSHIPS.get(
144 canonElementName);
145 if (elInfo == null) { // Allow unrecognized end tags through.
146 if (openElements.size() < nestingLimit) {
147 underlying.closeTag(elementName);
148 }
149 return;
150 }
151 int index = openElements.lastIndexOf(elInfo);
152 // Let any of </h1>, </h2>, ... close other header tags.
153 if (isHeaderElementName(canonElementName)) {
154 for (int i = openElements.size(), limit = index + 1; -- i >= limit;) {
155 ElementContainmentInfo openEl = openElements.get(i);
156 if (isHeaderElementName(openEl.elementName)) {
157 elInfo = openEl;
158 index = i;
159 canonElementName = openEl.elementName;
160 break;
161 }
162 }
163 }
164 if (index < 0) {
165 return; // Don't close unopened tags.
166 }
167
168 // Ensure that index is in the scope of closeable elements.
169 // This approximates the "has an element in *** scope" predicates defined at
170 // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html
171 // #has-an-element-in-the-specific-scope
172 int blockingScopes = elInfo.blockedByScopes;
173 for (int i = openElements.size(); --i > index;) {
174 if ((openElements.get(i).inScopes & blockingScopes) != 0) {
175 return;
176 }
177 }
178
179 int last = openElements.size();
180 // Close all the elements that cannot contain the element to open.
181 List<ElementContainmentInfo> toResumeInReverse = null;
182 while (--last > index) {
183 ElementContainmentInfo unclosed = openElements.remove(last);
184 if (last + 1 < nestingLimit) {
185 underlying.closeTag(unclosed.elementName);
186 }
187 if (unclosed.resumable) {
188 if (toResumeInReverse == null) {
189 toResumeInReverse = Lists.newArrayList();
190 }
191 toResumeInReverse.add(unclosed);
192 }
193 }
194 if (openElements.size() < nestingLimit) {
195 underlying.closeTag(elInfo.elementName);
196 }
197 openElements.remove(index);
198 if (toResumeInReverse != null) {
199 resume(toResumeInReverse);
200 }
201 }
202
203 private void resume(List<ElementContainmentInfo> toResumeInReverse) {
204 for (ElementContainmentInfo toResume : toResumeInReverse) {
205 // TODO: If resuming of things other than plain formatting tags like <b>
206 // and <i>, then we need to store the attributes for resumable tags so
207 // that we can resume with the appropriate attributes.
208 if (openElements.size() < nestingLimit) {
209 underlying.openTag(toResume.elementName, Lists.<String>newArrayList());
210 }
211 openElements.add(toResume);
212 }
213 }
214
215 private static final long HTML_SPACE_CHAR_BITMASK =
216 (1L << ' ')
217 | (1L << '\t')
218 | (1L << '\n')
219 | (1L << '\u000c')
220 | (1L << '\r');
221
222 public static boolean isInterElementWhitespace(String text) {
223 int n = text.length();
224 for (int i = 0; i < n; ++i) {
225 int ch = text.charAt(i);
226 if (ch > 0x20 || (HTML_SPACE_CHAR_BITMASK & (1L << ch)) == 0) {
227 return false;
228 }
229 }
230 return true;
231 }
232
233 public void text(String text) {
234 if (!isInterElementWhitespace(text)) {
235 prepareForContent(ElementContainmentRelationships.CHARACTER_DATA_ONLY);
236 }
237
238 if (openElements.size() < nestingLimit) {
239 underlying.text(text);
240 }
241 }
242
243 private static boolean isHeaderElementName(String canonElementName) {
244 return canonElementName.length() == 2 && canonElementName.charAt(0) == 'h'
245 && canonElementName.charAt(1) <= '9';
246 }
247
248
249 @Immutable
250 private static final class ElementContainmentInfo {
251 final String elementName;
252 /**
253 * True if the adoption agency algorithm allows an element to be resumed
254 * after a mis-nested end tag closes it.
255 * E.g. in {@code <b>Foo<i>Bar</b>Baz</i>} the {@code <i>} element is
256 * resumed after the {@code <b>} element is closed.
257 */
258 final boolean resumable;
259 /** A set of bits of element groups into which the element falls. */
260 final int types;
261 /** The type of elements that an element can contain. */
262 final int contents;
263 /** True if the element has no content -- not even text content. */
264 final boolean isVoid;
265 /** A legal child of this node that can contain block content. */
266 final @Nullable ElementContainmentInfo blockContainerChild;
267 /** A bit set of close tag scopes that block this element's close tags. */
268 final int blockedByScopes;
269 /** A bit set of scopes groups into which this element falls. */
270 final int inScopes;
271
272 ElementContainmentInfo(
273 String elementName, boolean resumable, int types, int contents,
274 @Nullable ElementContainmentInfo blockContainerChild,
275 int inScopes) {
276 this.elementName = elementName;
277 this.resumable = resumable;
278 this.types = types;
279 this.contents = contents;
280 this.isVoid = contents == 0
281 && HtmlTextEscapingMode.isVoidElement(elementName);
282 this.blockContainerChild = blockContainerChild;
283 this.blockedByScopes =
284 ElementContainmentRelationships.CloseTagScope.ALL & ~inScopes;
285 this.inScopes = inScopes;
286 }
287
288 @Override public String toString() {
289 return "<" + elementName + ">";
290 }
291 }
292
293 static final ImmutableMap<String, ElementContainmentInfo>
294 ELEMENT_CONTAINMENT_RELATIONSHIPS
295 = new ElementContainmentRelationships().toMap();
296
297 private static class ElementContainmentRelationships {
298 private enum ElementGroup {
299 BLOCK,
300 INLINE,
301 INLINE_MINUS_A,
302 MIXED,
303 TABLE_CONTENT,
304 HEAD_CONTENT,
305 TOP_CONTENT,
306 AREA_ELEMENT,
307 FORM_ELEMENT,
308 LEGEND_ELEMENT,
309 LI_ELEMENT,
310 DL_PART,
311 P_ELEMENT,
312 OPTIONS_ELEMENT,
313 OPTION_ELEMENT,
314 PARAM_ELEMENT,
315 TABLE_ELEMENT,
316 TR_ELEMENT,
317 TD_ELEMENT,
318 COL_ELEMENT,
319 CHARACTER_DATA,
320 ;
321 }
322
323 /**
324 * An identifier for one of the "has a *** element in scope" predicates
325 * used by HTML5 to decide when a close tag implicitly closes tags above
326 * the target element on the open element stack.
327 */
328 private enum CloseTagScope {
329 COMMON,
330 BUTTON,
331 LIST_ITEM,
332 TABLE,
333 ;
334
335 static final int ALL = (1 << values().length) - 1;
336 }
337
338 private static int elementGroupBits(ElementGroup a) {
339 return 1 << a.ordinal();
340 }
341
342 private static int elementGroupBits(
343 ElementGroup a, ElementGroup b) {
344 return (1 << a.ordinal()) | (1 << b.ordinal());
345 }
346
347 private static int elementGroupBits(
348 ElementGroup a, ElementGroup b, ElementGroup c) {
349 return (1 << a.ordinal()) | (1 << b.ordinal()) | (1 << c.ordinal());
350 }
351
352 private static int elementGroupBits(
353 ElementGroup... bits) {
354 int bitField = 0;
355 for (ElementGroup bit : bits) {
356 bitField |= (1 << bit.ordinal());
357 }
358 return bitField;
359 }
360
361 private static int scopeBits(CloseTagScope a) {
362 return 1 << a.ordinal();
363 }
364
365 private static int scopeBits(
366 CloseTagScope a, CloseTagScope b, CloseTagScope c) {
367 return (1 << a.ordinal()) | (1 << b.ordinal()) | (1 << c.ordinal());
368 }
369
370 private ImmutableMap.Builder<String, ElementContainmentInfo> definitions
371 = ImmutableMap.builder();
372
373 private ElementContainmentInfo defineElement(
374 String elementName, boolean resumable, int types, int contentTypes) {
375 return defineElement(elementName, resumable, types, contentTypes, null);
376 }
377
378 private ElementContainmentInfo defineElement(
379 String elementName, boolean resumable, int types, int contentTypes,
380 int inScopes) {
381 return defineElement(
382 elementName, resumable, types, contentTypes, null, inScopes);
383 }
384
385 private ElementContainmentInfo defineElement(
386 String elementName, boolean resumable, int types, int contentTypes,
387 @Nullable ElementContainmentInfo blockContainer) {
388 return defineElement(
389 elementName, resumable, types, contentTypes, blockContainer, 0);
390 }
391
392 private ElementContainmentInfo defineElement(
393 String elementName, boolean resumable, int types, int contentTypes,
394 @Nullable ElementContainmentInfo blockContainer, int inScopes) {
395 ElementContainmentInfo info = new ElementContainmentInfo(
396 elementName, resumable, types, contentTypes, blockContainer,
397 inScopes);
398 definitions.put(elementName, info);
399 return info;
400 }
401
402 private ImmutableMap<String, ElementContainmentInfo> toMap() {
403 return definitions.build();
404 }
405
406 {
407 defineElement(
408 "a", false, elementGroupBits(
409 ElementGroup.INLINE
410 ), elementGroupBits(
411 ElementGroup.INLINE_MINUS_A
412 ));
413 defineElement(
414 "abbr", true, elementGroupBits(
415 ElementGroup.INLINE, ElementGroup.INLINE_MINUS_A
416 ), elementGroupBits(
417 ElementGroup.INLINE
418 ));
419 defineElement(
420 "acronym", true, elementGroupBits(
421 ElementGroup.INLINE, ElementGroup.INLINE_MINUS_A
422 ), elementGroupBits(
423 ElementGroup.INLINE
424 ));
425 defineElement(
426 "address", false, elementGroupBits(
427 ElementGroup.BLOCK
428 ), elementGroupBits(
429 ElementGroup.INLINE, ElementGroup.P_ELEMENT
430 ));
431 defineElement(
432 "applet", false, elementGroupBits(
433 ElementGroup.INLINE, ElementGroup.INLINE_MINUS_A
434 ), elementGroupBits(
435 ElementGroup.BLOCK, ElementGroup.INLINE,
436 ElementGroup.PARAM_ELEMENT
437 ), scopeBits(
438 CloseTagScope.COMMON, CloseTagScope.BUTTON,
439 CloseTagScope.LIST_ITEM
440 ));
441 defineElement(
442 "area", false, elementGroupBits(ElementGroup.AREA_ELEMENT), 0);
443 defineElement(
444 "audio", false, elementGroupBits(
445 ElementGroup.INLINE, ElementGroup.INLINE_MINUS_A
446 ), 0);
447 defineElement(
448 "b", true, elementGroupBits(
449 ElementGroup.INLINE, ElementGroup.INLINE_MINUS_A
450 ), elementGroupBits(
451 ElementGroup.INLINE
452 ));
453 defineElement(
454 "base", false, elementGroupBits(ElementGroup.HEAD_CONTENT), 0);
455 defineElement(
456 "basefont", false, elementGroupBits(
457 ElementGroup.INLINE, ElementGroup.INLINE_MINUS_A
458 ), 0);
459 defineElement(
460 "bdi", true, elementGroupBits(
461 ElementGroup.INLINE, ElementGroup.INLINE_MINUS_A
462 ), elementGroupBits(
463 ElementGroup.INLINE
464 ));
465 defineElement(
466 "bdo", true, elementGroupBits(
467 ElementGroup.INLINE, ElementGroup.INLINE_MINUS_A
468 ), elementGroupBits(
469 ElementGroup.INLINE
470 ));
471 defineElement(
472 "big", true, elementGroupBits(
473 ElementGroup.INLINE, ElementGroup.INLINE_MINUS_A
474 ), elementGroupBits(
475 ElementGroup.INLINE
476 ));
477 defineElement(
478 "blink", true, elementGroupBits(
479 ElementGroup.INLINE, ElementGroup.INLINE_MINUS_A
480 ), elementGroupBits(
481 ElementGroup.INLINE
482 ));
483 defineElement(
484 "blockquote", false, elementGroupBits(
485 ElementGroup.BLOCK
486 ), elementGroupBits(
487 ElementGroup.BLOCK, ElementGroup.INLINE
488 ));
489 defineElement(
490 "body", false, elementGroupBits(
491 ElementGroup.TOP_CONTENT
492 ), elementGroupBits(
493 ElementGroup.BLOCK, ElementGroup.INLINE
494 ));
495 defineElement(
496 "br", false, elementGroupBits(
497 ElementGroup.INLINE, ElementGroup.INLINE_MINUS_A
498 ), 0);
499 defineElement(
500 "button", false, elementGroupBits(
501 ElementGroup.INLINE, ElementGroup.INLINE_MINUS_A
502 ), elementGroupBits(
503 ElementGroup.BLOCK, ElementGroup.INLINE
504 ), scopeBits(CloseTagScope.BUTTON));
505 defineElement(
506 "canvas", false, elementGroupBits(
507 ElementGroup.INLINE, ElementGroup.INLINE_MINUS_A
508 ), elementGroupBits(
509 ElementGroup.INLINE
510 ));
511 defineElement(
512 "caption", false, elementGroupBits(
513 ElementGroup.TABLE_CONTENT
514 ), elementGroupBits(
515 ElementGroup.INLINE
516 ), scopeBits(
517 CloseTagScope.COMMON, CloseTagScope.BUTTON,
518 CloseTagScope.LIST_ITEM
519 ));
520 defineElement(
521 "center", false, elementGroupBits(
522 ElementGroup.BLOCK
523 ), elementGroupBits(
524 ElementGroup.BLOCK, ElementGroup.INLINE
525 ));
526 defineElement(
527 "cite", true, elementGroupBits(
528 ElementGroup.INLINE, ElementGroup.INLINE_MINUS_A
529 ), elementGroupBits(
530 ElementGroup.INLINE
531 ));
532 defineElement(
533 "code", true, elementGroupBits(
534 ElementGroup.INLINE, ElementGroup.INLINE_MINUS_A
535 ), elementGroupBits(
536 ElementGroup.INLINE
537 ));
538 defineElement(
539 "col", false, elementGroupBits(
540 ElementGroup.TABLE_CONTENT, ElementGroup.COL_ELEMENT
541 ), 0);
542 defineElement(
543 "colgroup", false, elementGroupBits(
544 ElementGroup.TABLE_CONTENT
545 ), elementGroupBits(
546 ElementGroup.COL_ELEMENT
547 ));
548 ElementContainmentInfo DD = defineElement(
549 "dd", false, elementGroupBits(
550 ElementGroup.DL_PART
551 ), elementGroupBits(
552 ElementGroup.BLOCK, ElementGroup.INLINE
553 ));
554 defineElement(
555 "del", true, elementGroupBits(
556 ElementGroup.BLOCK, ElementGroup.INLINE,
557 ElementGroup.MIXED
558 ), elementGroupBits(
559 ElementGroup.BLOCK, ElementGroup.INLINE
560 ));
561 defineElement(
562 "dfn", true, elementGroupBits(
563 ElementGroup.INLINE, ElementGroup.INLINE_MINUS_A
564 ), elementGroupBits(
565 ElementGroup.INLINE
566 ));
567 defineElement(
568 "dir", false, elementGroupBits(
569 ElementGroup.BLOCK
570 ), elementGroupBits(
571 ElementGroup.LI_ELEMENT
572 ));
573 defineElement(
574 "div", false, elementGroupBits(
575 ElementGroup.BLOCK
576 ), elementGroupBits(
577 ElementGroup.BLOCK, ElementGroup.INLINE
578 ));
579 defineElement(
580 "dl", false, elementGroupBits(
581 ElementGroup.BLOCK
582 ), elementGroupBits(
583 ElementGroup.DL_PART
584 ),
585 DD);
586 defineElement(
587 "dt", false, elementGroupBits(
588 ElementGroup.DL_PART
589 ), elementGroupBits(
590 ElementGroup.INLINE
591 ));
592 defineElement(
593 "em", true, elementGroupBits(
594 ElementGroup.INLINE, ElementGroup.INLINE_MINUS_A
595 ), elementGroupBits(
596 ElementGroup.INLINE
597 ));
598 defineElement(
599 "fieldset", false, elementGroupBits(
600 ElementGroup.BLOCK
601 ), elementGroupBits(
602 ElementGroup.BLOCK, ElementGroup.INLINE,
603 ElementGroup.LEGEND_ELEMENT
604 ));
605 defineElement(
606 "font", false, elementGroupBits(
607 ElementGroup.INLINE, ElementGroup.INLINE_MINUS_A
608 ), elementGroupBits(
609 ElementGroup.INLINE
610 ));
611 defineElement(
612 "form", false, elementGroupBits(
613 ElementGroup.BLOCK, ElementGroup.FORM_ELEMENT
614 ), elementGroupBits(
615 ElementGroup.BLOCK, ElementGroup.INLINE,
616 ElementGroup.INLINE_MINUS_A, ElementGroup.TR_ELEMENT,
617 ElementGroup.TD_ELEMENT
618 ));
619 defineElement(
620 "h1", false, elementGroupBits(
621 ElementGroup.BLOCK
622 ), elementGroupBits(
623 ElementGroup.INLINE
624 ));
625 defineElement(
626 "h2", false, elementGroupBits(
627 ElementGroup.BLOCK
628 ), elementGroupBits(
629 ElementGroup.INLINE
630 ));
631 defineElement(
632 "h3", false, elementGroupBits(
633 ElementGroup.BLOCK
634 ), elementGroupBits(
635 ElementGroup.INLINE
636 ));
637 defineElement(
638 "h4", false, elementGroupBits(
639 ElementGroup.BLOCK
640 ), elementGroupBits(
641 ElementGroup.INLINE
642 ));
643 defineElement(
644 "h5", false, elementGroupBits(
645 ElementGroup.BLOCK
646 ), elementGroupBits(
647 ElementGroup.INLINE
648 ));
649 defineElement(
650 "h6", false, elementGroupBits(
651 ElementGroup.BLOCK
652 ), elementGroupBits(
653 ElementGroup.INLINE
654 ));
655 defineElement(
656 "head", false, elementGroupBits(
657 ElementGroup.TOP_CONTENT
658 ), elementGroupBits(
659 ElementGroup.HEAD_CONTENT
660 ));
661 defineElement(
662 "hr", false, elementGroupBits(ElementGroup.BLOCK), 0);
663 defineElement(
664 "html", false, 0, elementGroupBits(ElementGroup.TOP_CONTENT),
665 CloseTagScope.ALL);
666 defineElement(
667 "i", true, elementGroupBits(
668 ElementGroup.INLINE, ElementGroup.INLINE_MINUS_A
669 ), elementGroupBits(
670 ElementGroup.INLINE
671 ));
672 defineElement(
673 "iframe", false, elementGroupBits(
674 ElementGroup.INLINE, ElementGroup.INLINE_MINUS_A
675 ), elementGroupBits(
676 ElementGroup.BLOCK, ElementGroup.INLINE
677 ));
678 defineElement(
679 "img", false, elementGroupBits(
680 ElementGroup.INLINE, ElementGroup.INLINE_MINUS_A
681 ), 0);
682 defineElement(
683 "input", false, elementGroupBits(
684 ElementGroup.INLINE, ElementGroup.INLINE_MINUS_A
685 ), 0);
686 defineElement(
687 "ins", true, elementGroupBits(
688 ElementGroup.BLOCK, ElementGroup.INLINE
689 ), elementGroupBits(
690 ElementGroup.BLOCK, ElementGroup.INLINE
691 ));
692 defineElement(
693 "isindex", false, elementGroupBits(ElementGroup.INLINE), 0);
694 defineElement(
695 "kbd", true, elementGroupBits(
696 ElementGroup.INLINE, ElementGroup.INLINE_MINUS_A
697 ), elementGroupBits(
698 ElementGroup.INLINE
699 ));
700 defineElement(
701 "label", false, elementGroupBits(
702 ElementGroup.INLINE, ElementGroup.INLINE_MINUS_A
703 ), elementGroupBits(
704 ElementGroup.INLINE
705 ));
706 defineElement(
707 "legend", false, elementGroupBits(
708 ElementGroup.LEGEND_ELEMENT
709 ), elementGroupBits(
710 ElementGroup.INLINE
711 ));
712 ElementContainmentInfo LI = defineElement(
713 "li", false, elementGroupBits(
714 ElementGroup.LI_ELEMENT
715 ), elementGroupBits(
716 ElementGroup.BLOCK, ElementGroup.INLINE
717 ));
718 defineElement(
719 "link", false, elementGroupBits(
720 ElementGroup.INLINE, ElementGroup.HEAD_CONTENT
721 ), 0);
722 defineElement(
723 "listing", false, elementGroupBits(
724 ElementGroup.BLOCK
725 ), elementGroupBits(
726 ElementGroup.INLINE
727 ));
728 defineElement(
729 "map", false, elementGroupBits(
730 ElementGroup.INLINE
731 ), elementGroupBits(
732 ElementGroup.BLOCK, ElementGroup.AREA_ELEMENT
733 ));
734 defineElement(
735 "meta", false, elementGroupBits(ElementGroup.HEAD_CONTENT), 0);
736 defineElement(
737 "nobr", false, elementGroupBits(
738 ElementGroup.INLINE, ElementGroup.INLINE_MINUS_A
739 ), elementGroupBits(
740 ElementGroup.INLINE
741 ));
742 defineElement(
743 "noframes", false, elementGroupBits(
744 ElementGroup.BLOCK, ElementGroup.TOP_CONTENT
745 ), elementGroupBits(
746 ElementGroup.BLOCK, ElementGroup.INLINE,
747 ElementGroup.TOP_CONTENT
748 ));
749 defineElement(
750 "noscript", false, elementGroupBits(
751 ElementGroup.BLOCK
752 ), elementGroupBits(
753 ElementGroup.BLOCK, ElementGroup.INLINE
754 ));
755 defineElement(
756 "object", false, elementGroupBits(
757 ElementGroup.INLINE, ElementGroup.INLINE_MINUS_A,
758 ElementGroup.HEAD_CONTENT
759 ), elementGroupBits(
760 ElementGroup.BLOCK, ElementGroup.INLINE,
761 ElementGroup.PARAM_ELEMENT
762 ), scopeBits(
763 CloseTagScope.COMMON, CloseTagScope.BUTTON,
764 CloseTagScope.LIST_ITEM
765 ));
766 defineElement(
767 "ol", false, elementGroupBits(
768 ElementGroup.BLOCK
769 ), elementGroupBits(
770 ElementGroup.LI_ELEMENT
771 ),
772 LI,
773 scopeBits(CloseTagScope.LIST_ITEM));
774 defineElement(
775 "optgroup", false, elementGroupBits(
776 ElementGroup.OPTIONS_ELEMENT
777 ), elementGroupBits(
778 ElementGroup.OPTIONS_ELEMENT
779 ));
780 defineElement(
781 "option", false, elementGroupBits(
782 ElementGroup.OPTIONS_ELEMENT, ElementGroup.OPTION_ELEMENT
783 ), elementGroupBits(
784 ElementGroup.CHARACTER_DATA
785 ));
786 defineElement(
787 "p", false, elementGroupBits(
788 ElementGroup.BLOCK, ElementGroup.P_ELEMENT
789 ), elementGroupBits(
790 ElementGroup.INLINE, ElementGroup.TABLE_ELEMENT
791 ));
792 defineElement(
793 "param", false, elementGroupBits(ElementGroup.PARAM_ELEMENT), 0);
794 defineElement(
795 "pre", false, elementGroupBits(
796 ElementGroup.BLOCK
797 ), elementGroupBits(
798 ElementGroup.INLINE
799 ));
800 defineElement(
801 "q", true, elementGroupBits(
802 ElementGroup.INLINE, ElementGroup.INLINE_MINUS_A
803 ), elementGroupBits(
804 ElementGroup.INLINE
805 ));
806 defineElement(
807 "s", true, elementGroupBits(
808 ElementGroup.INLINE, ElementGroup.INLINE_MINUS_A
809 ), elementGroupBits(
810 ElementGroup.INLINE
811 ));
812 defineElement(
813 "samp", true, elementGroupBits(
814 ElementGroup.INLINE, ElementGroup.INLINE_MINUS_A
815 ), elementGroupBits(
816 ElementGroup.INLINE
817 ));
818 defineElement(
819 "script", false, elementGroupBits(
820 ElementGroup.BLOCK, ElementGroup.INLINE,
821 ElementGroup.INLINE_MINUS_A, ElementGroup.MIXED,
822 ElementGroup.TABLE_CONTENT, ElementGroup.HEAD_CONTENT,
823 ElementGroup.TOP_CONTENT, ElementGroup.AREA_ELEMENT,
824 ElementGroup.FORM_ELEMENT, ElementGroup.LEGEND_ELEMENT,
825 ElementGroup.LI_ELEMENT, ElementGroup.DL_PART,
826 ElementGroup.P_ELEMENT, ElementGroup.OPTIONS_ELEMENT,
827 ElementGroup.OPTION_ELEMENT, ElementGroup.PARAM_ELEMENT,
828 ElementGroup.TABLE_ELEMENT, ElementGroup.TR_ELEMENT,
829 ElementGroup.TD_ELEMENT, ElementGroup.COL_ELEMENT
830 ), elementGroupBits(
831 ElementGroup.CHARACTER_DATA));
832 defineElement(
833 "select", false, elementGroupBits(
834 ElementGroup.INLINE
835 ), elementGroupBits(
836 ElementGroup.OPTIONS_ELEMENT
837 ));
838 defineElement(
839 "small", true, elementGroupBits(
840 ElementGroup.INLINE, ElementGroup.INLINE_MINUS_A
841 ), elementGroupBits(
842 ElementGroup.INLINE
843 ));
844 defineElement(
845 "span", false, elementGroupBits(
846 ElementGroup.INLINE, ElementGroup.INLINE_MINUS_A
847 ), elementGroupBits(
848 ElementGroup.INLINE
849 ));
850 defineElement(
851 "strike", true, elementGroupBits(
852 ElementGroup.INLINE, ElementGroup.INLINE_MINUS_A
853 ), elementGroupBits(
854 ElementGroup.INLINE
855 ));
856 defineElement(
857 "strong", true, elementGroupBits(
858 ElementGroup.INLINE, ElementGroup.INLINE_MINUS_A
859 ), elementGroupBits(
860 ElementGroup.INLINE
861 ));
862 defineElement(
863 "style", false, elementGroupBits(
864 ElementGroup.INLINE, ElementGroup.HEAD_CONTENT
865 ), elementGroupBits(
866 ElementGroup.CHARACTER_DATA
867 ));
868 defineElement(
869 "sub", true, elementGroupBits(
870 ElementGroup.INLINE, ElementGroup.INLINE_MINUS_A
871 ), elementGroupBits(
872 ElementGroup.INLINE
873 ));
874 defineElement(
875 "sup", true, elementGroupBits(
876 ElementGroup.INLINE, ElementGroup.INLINE_MINUS_A
877 ), elementGroupBits(
878 ElementGroup.INLINE
879 ));
880 defineElement(
881 "table", false, elementGroupBits(
882 ElementGroup.BLOCK, ElementGroup.TABLE_ELEMENT
883 ), elementGroupBits(
884 ElementGroup.TABLE_CONTENT, ElementGroup.FORM_ELEMENT
885 ), CloseTagScope.ALL);
886 defineElement(
887 "tbody", false, elementGroupBits(
888 ElementGroup.TABLE_CONTENT
889 ), elementGroupBits(
890 ElementGroup.TR_ELEMENT
891 ));
892 ElementContainmentInfo TD = defineElement(
893 "td", false, elementGroupBits(
894 ElementGroup.TD_ELEMENT
895 ), elementGroupBits(
896 ElementGroup.BLOCK, ElementGroup.INLINE
897 ), scopeBits(
898 CloseTagScope.COMMON, CloseTagScope.BUTTON,
899 CloseTagScope.LIST_ITEM
900 ));
901 defineElement(
902 "textarea", false,
903 // No, a textarea cannot be inside a link.
904 elementGroupBits(ElementGroup.INLINE),
905 elementGroupBits(ElementGroup.CHARACTER_DATA));
906 defineElement(
907 "tfoot", false, elementGroupBits(
908 ElementGroup.TABLE_CONTENT
909 ), elementGroupBits(
910 ElementGroup.FORM_ELEMENT, ElementGroup.TR_ELEMENT,
911 ElementGroup.TD_ELEMENT
912 ));
913 defineElement(
914 "th", false, elementGroupBits(
915 ElementGroup.TD_ELEMENT
916 ), elementGroupBits(
917 ElementGroup.BLOCK, ElementGroup.INLINE
918 ), scopeBits(
919 CloseTagScope.COMMON, CloseTagScope.BUTTON,
920 CloseTagScope.LIST_ITEM
921 ));
922 defineElement(
923 "thead", false, elementGroupBits(
924 ElementGroup.TABLE_CONTENT
925 ), elementGroupBits(
926 ElementGroup.FORM_ELEMENT, ElementGroup.TR_ELEMENT,
927 ElementGroup.TD_ELEMENT
928 ));
929 defineElement(
930 "title", false, elementGroupBits(ElementGroup.HEAD_CONTENT),
931 elementGroupBits(ElementGroup.CHARACTER_DATA));
932 defineElement(
933 "tr", false, elementGroupBits(
934 ElementGroup.TABLE_CONTENT, ElementGroup.TR_ELEMENT
935 ), elementGroupBits(
936 ElementGroup.FORM_ELEMENT, ElementGroup.TD_ELEMENT
937 ),
938 TD);
939 defineElement(
940 "tt", true, elementGroupBits(
941 ElementGroup.INLINE, ElementGroup.INLINE_MINUS_A
942 ), elementGroupBits(
943 ElementGroup.INLINE
944 ));
945 defineElement(
946 "u", true, elementGroupBits(
947 ElementGroup.INLINE, ElementGroup.INLINE_MINUS_A
948 ), elementGroupBits(
949 ElementGroup.INLINE
950 ));
951 defineElement(
952 "ul", false, elementGroupBits(
953 ElementGroup.BLOCK
954 ), elementGroupBits(
955 ElementGroup.LI_ELEMENT
956 ),
957 LI,
958 scopeBits(CloseTagScope.LIST_ITEM));
959 defineElement(
960 "var", false, elementGroupBits(
961 ElementGroup.INLINE, ElementGroup.INLINE_MINUS_A
962 ), elementGroupBits(
963 ElementGroup.INLINE
964 ));
965 defineElement(
966 "video", false, elementGroupBits(
967 ElementGroup.INLINE, ElementGroup.INLINE_MINUS_A
968 ), 0);
969 defineElement(
970 "wbr", false, elementGroupBits(
971 ElementGroup.INLINE, ElementGroup.INLINE_MINUS_A
972 ), 0);
973 defineElement(
974 "xmp", false, elementGroupBits(
975 ElementGroup.BLOCK
976 ), elementGroupBits(
977 ElementGroup.INLINE
978 ));
979
980 }
981
982 private static final ElementContainmentInfo CHARACTER_DATA_ONLY
983 = new ElementContainmentInfo(
984 "#text", false,
985 elementGroupBits(
986 ElementGroup.INLINE, ElementGroup.INLINE_MINUS_A,
987 ElementGroup.BLOCK, ElementGroup.CHARACTER_DATA),
988 0, null, 0);
989 }
990
991 static boolean allowsPlainTextualContent(String canonElementName) {
992 ElementContainmentInfo info =
993 ELEMENT_CONTAINMENT_RELATIONSHIPS.get(canonElementName);
994 if (info == null
995 || ((info.contents
996 & ElementContainmentRelationships.CHARACTER_DATA_ONLY.types)
997 != 0)) {
998 switch (HtmlTextEscapingMode.getModeForTag(canonElementName)) {
999 case PCDATA: return true;
1000 case RCDATA: return true;
1001 case PLAIN_TEXT: return true;
1002 case VOID: return false;
1003 case CDATA:
1004 case CDATA_SOMETIMES:
1005 return "xmp".equals(canonElementName)
1006 || "listing".equals(canonElementName);
1007 }
1008 }
1009 return false;
1010 }
1011 }