001    // Copyright (c) 2011, Mike Samuel
002    // All rights reserved.
003    //
004    // Redistribution and use in source and binary forms, with or without
005    // modification, are permitted provided that the following conditions
006    // are met:
007    //
008    // Redistributions of source code must retain the above copyright
009    // notice, this list of conditions and the following disclaimer.
010    // Redistributions in binary form must reproduce the above copyright
011    // notice, this list of conditions and the following disclaimer in the
012    // documentation and/or other materials provided with the distribution.
013    // Neither the name of the OWASP nor the names of its contributors may
014    // be used to endorse or promote products derived from this software
015    // without specific prior written permission.
016    // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
017    // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
018    // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
019    // FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
020    // COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
021    // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
022    // BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
023    // LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
024    // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
025    // LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
026    // ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
027    // POSSIBILITY OF SUCH DAMAGE.
028    
029    package org.owasp.html;
030    
031    import java.util.List;
032    
033    import javax.annotation.Nullable;
034    import javax.annotation.concurrent.Immutable;
035    
036    import com.google.common.collect.ImmutableMap;
037    import com.google.common.collect.Lists;
038    
039    /**
040     * Wraps an HTML stream event receiver to fill in missing close tags.
041     * If the balancer is given the HTML {@code <p>1<p>2}, the wrapped receiver will
042     * see events equivalent to {@code <p>1</p><p>2</p>}.
043     *
044     * @author Mike Samuel <mikesamuel@gmail.com>
045     */
046    @TCB
047    public class TagBalancingHtmlStreamEventReceiver
048        implements HtmlStreamEventReceiver {
049      private final HtmlStreamEventReceiver underlying;
050      private int nestingLimit = Integer.MAX_VALUE;
051      private final List<ElementContainmentInfo> openElements
052          = Lists.newArrayList();
053    
054      public TagBalancingHtmlStreamEventReceiver(
055          HtmlStreamEventReceiver underlying) {
056        this.underlying = underlying;
057      }
058    
059      public void setNestingLimit(int limit) {
060        if (openElements.size() > limit) {
061          throw new IllegalStateException();
062        }
063        this.nestingLimit = limit;
064      }
065    
066      public void openDocument() {
067        underlying.openDocument();
068      }
069    
070      public void closeDocument() {
071        for (int i = Math.min(nestingLimit, openElements.size()); --i >= 0;) {
072          underlying.closeTag(openElements.get(i).elementName);
073        }
074        openElements.clear();
075        underlying.closeDocument();
076      }
077    
078      public void openTag(String elementName, List<String> attrs) {
079        String canonElementName = HtmlLexer.canonicalName(elementName);
080        ElementContainmentInfo elInfo = ELEMENT_CONTAINMENT_RELATIONSHIPS.get(
081            canonElementName);
082        // Treat unrecognized tags as void, but emit closing tags in closeTag().
083        if (elInfo == null) {
084          if (openElements.size() < nestingLimit) {
085            underlying.openTag(elementName, attrs);
086          }
087          return;
088        }
089    
090        prepareForContent(elInfo);
091    
092        if (openElements.size() < nestingLimit) {
093          underlying.openTag(elInfo.elementName, attrs);
094        }
095        if (!elInfo.isVoid) {
096          openElements.add(elInfo);
097        }
098      }
099    
100      private void prepareForContent(ElementContainmentInfo elInfo) {
101        int nOpen = openElements.size();
102        if (nOpen != 0) {
103          ElementContainmentInfo top = openElements.get(nOpen - 1);
104          if ((top.contents & elInfo.types) == 0) {
105            ElementContainmentInfo blockContainerChild = top.blockContainerChild;
106            // Open implied elements, such as list-items and table cells & rows.
107            if (blockContainerChild != null
108                && (blockContainerChild.contents & elInfo.types) != 0) {
109              underlying.openTag(
110                  blockContainerChild.elementName, Lists.<String>newArrayList());
111              openElements.add(blockContainerChild);
112              top = blockContainerChild;
113              ++nOpen;
114            }
115          }
116    
117          // Close all the elements that cannot contain the element to open.
118          List<ElementContainmentInfo> toResumeInReverse = null;
119          while (true) {
120            if ((top.contents & elInfo.types) != 0) { break; }
121            if (openElements.size() < nestingLimit) {
122              underlying.closeTag(top.elementName);
123            }
124            openElements.remove(--nOpen);
125            if (top.resumable) {
126              if (toResumeInReverse == null) {
127                toResumeInReverse = Lists.newArrayList();
128              }
129              toResumeInReverse.add(top);
130            }
131            if (nOpen == 0) { break; }
132            top = openElements.get(nOpen - 1);
133          }
134    
135          if (toResumeInReverse != null) {
136            resume(toResumeInReverse);
137          }
138        }
139      }
140    
141      public void closeTag(String elementName) {
142        String canonElementName = HtmlLexer.canonicalName(elementName);
143        ElementContainmentInfo elInfo = ELEMENT_CONTAINMENT_RELATIONSHIPS.get(
144            canonElementName);
145        if (elInfo == null) {  // Allow unrecognized end tags through.
146          if (openElements.size() < nestingLimit) {
147            underlying.closeTag(elementName);
148          }
149          return;
150        }
151        int index = openElements.lastIndexOf(elInfo);
152        // Let any of </h1>, </h2>, ... close other header tags.
153        if (isHeaderElementName(canonElementName)) {
154          for (int i = openElements.size(), limit = index + 1; -- i >= limit;) {
155            ElementContainmentInfo openEl = openElements.get(i);
156            if (isHeaderElementName(openEl.elementName)) {
157              elInfo = openEl;
158              index = i;
159              canonElementName = openEl.elementName;
160              break;
161            }
162          }
163        }
164        if (index < 0) {
165          return;  // Don't close unopened tags.
166        }
167    
168        // Ensure that index is in the scope of closeable elements.
169        // This approximates the "has an element in *** scope" predicates defined at
170        // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html
171        // #has-an-element-in-the-specific-scope
172        int blockingScopes = elInfo.blockedByScopes;
173        for (int i = openElements.size(); --i > index;) {
174          if ((openElements.get(i).inScopes & blockingScopes) != 0) {
175            return;
176          }
177        }
178    
179        int last = openElements.size();
180        // Close all the elements that cannot contain the element to open.
181        List<ElementContainmentInfo> toResumeInReverse = null;
182        while (--last > index) {
183          ElementContainmentInfo unclosed = openElements.remove(last);
184          if (last + 1 < nestingLimit) {
185            underlying.closeTag(unclosed.elementName);
186          }
187          if (unclosed.resumable) {
188            if (toResumeInReverse == null) {
189              toResumeInReverse = Lists.newArrayList();
190            }
191            toResumeInReverse.add(unclosed);
192          }
193        }
194        if (openElements.size() < nestingLimit) {
195          underlying.closeTag(elInfo.elementName);
196        }
197        openElements.remove(index);
198        if (toResumeInReverse != null) {
199          resume(toResumeInReverse);
200        }
201      }
202    
203      private void resume(List<ElementContainmentInfo> toResumeInReverse) {
204        for (ElementContainmentInfo toResume : toResumeInReverse) {
205          // TODO: If resuming of things other than plain formatting tags like <b>
206          // and <i>, then we need to store the attributes for resumable tags so
207          // that we can resume with the appropriate attributes.
208          if (openElements.size() < nestingLimit) {
209            underlying.openTag(toResume.elementName, Lists.<String>newArrayList());
210          }
211          openElements.add(toResume);
212        }
213      }
214    
215      private static final int HTML_SPACE_CHAR_BITMASK =
216          (1 << ' ') | (1 << '\t') | (1 << '\n') | (1 << '\u000c') | (1 << '\r');
217    
218      public void text(String text) {
219        int n = text.length();
220        for (int i = 0; i < n; ++i) {
221          int ch = text.charAt(i);
222          if (ch > 0x20 || (HTML_SPACE_CHAR_BITMASK & (1 << ch)) == 0) {
223            prepareForContent(ElementContainmentRelationships.CHARACTER_DATA_ONLY);
224            break;
225          }
226        }
227    
228        if (openElements.size() < nestingLimit) {
229          underlying.text(text);
230        }
231      }
232    
233      private static boolean isHeaderElementName(String canonElementName) {
234        return canonElementName.length() == 2 && canonElementName.charAt(0) == 'h'
235            && canonElementName.charAt(1) <= '9';
236      }
237    
238    
239      @Immutable
240      private static final class ElementContainmentInfo {
241        final String elementName;
242        /**
243         * True if the adoption agency algorithm allows an element to be resumed
244         * after a mis-nested end tag closes it.
245         * E.g. in {@code <b>Foo<i>Bar</b>Baz</i>} the {@code <i>} element is
246         * resumed after the {@code <b>} element is closed.
247         */
248        final boolean resumable;
249        /** A set of bits of element groups into which the element falls. */
250        final int types;
251        /** The type of elements that an element can contain. */
252        final int contents;
253        /** True if the element has no content -- not even text content. */
254        final boolean isVoid;
255        /** A legal child of this node that can contain block content. */
256        final @Nullable ElementContainmentInfo blockContainerChild;
257        /** A bit set of close tag scopes that block this element's close tags. */
258        final int blockedByScopes;
259        /** A bit set of scopes groups into which this element falls. */
260        final int inScopes;
261    
262        ElementContainmentInfo(
263            String elementName, boolean resumable, int types, int contents,
264            @Nullable ElementContainmentInfo blockContainerChild,
265            int inScopes) {
266          this.elementName = elementName;
267          this.resumable = resumable;
268          this.types = types;
269          this.contents = contents;
270          this.isVoid = contents == 0
271              && HtmlTextEscapingMode.isVoidElement(elementName);
272          this.blockContainerChild = blockContainerChild;
273          this.blockedByScopes =
274              ElementContainmentRelationships.CloseTagScope.ALL & ~inScopes;
275          this.inScopes = inScopes;
276        }
277    
278        @Override public String toString() {
279          return "<" + elementName + ">";
280        }
281      }
282    
283      static final ImmutableMap<String, ElementContainmentInfo>
284          ELEMENT_CONTAINMENT_RELATIONSHIPS
285          = new ElementContainmentRelationships().toMap();
286    
287      private static class ElementContainmentRelationships {
288        private enum ElementGroup {
289          BLOCK,
290          INLINE,
291          INLINE_MINUS_A,
292          MIXED,
293          TABLE_CONTENT,
294          HEAD_CONTENT,
295          TOP_CONTENT,
296          AREA_ELEMENT,
297          FORM_ELEMENT,
298          LEGEND_ELEMENT,
299          LI_ELEMENT,
300          DL_PART,
301          P_ELEMENT,
302          OPTIONS_ELEMENT,
303          OPTION_ELEMENT,
304          PARAM_ELEMENT,
305          TABLE_ELEMENT,
306          TR_ELEMENT,
307          TD_ELEMENT,
308          COL_ELEMENT,
309          CHARACTER_DATA,
310          ;
311        }
312    
313        /**
314         * An identifier for one of the "has a *** element in scope" predicates
315         * used by HTML5 to decide when a close tag implicitly closes tags above
316         * the target element on the open element stack.
317         */
318        private enum CloseTagScope {
319          COMMON,
320          BUTTON,
321          LIST_ITEM,
322          TABLE,
323          ;
324    
325          static final int ALL = (1 << values().length) - 1;
326        }
327    
328        private static int elementGroupBits(ElementGroup a) {
329          return 1 << a.ordinal();
330        }
331    
332        private static int elementGroupBits(
333            ElementGroup a, ElementGroup b) {
334          return (1 << a.ordinal()) | (1 << b.ordinal());
335        }
336    
337        private static int elementGroupBits(
338            ElementGroup a, ElementGroup b, ElementGroup c) {
339          return (1 << a.ordinal()) | (1 << b.ordinal()) | (1 << c.ordinal());
340        }
341    
342        private static int elementGroupBits(
343            ElementGroup... bits) {
344          int bitField = 0;
345          for (ElementGroup bit : bits) {
346            bitField |= (1 << bit.ordinal());
347          }
348          return bitField;
349        }
350    
351        private static int scopeBits(CloseTagScope a) {
352          return 1 << a.ordinal();
353        }
354    
355        private static int scopeBits(
356            CloseTagScope a, CloseTagScope b, CloseTagScope c) {
357          return (1 << a.ordinal()) | (1 << b.ordinal()) | (1 << c.ordinal());
358        }
359    
360        private ImmutableMap.Builder<String, ElementContainmentInfo> definitions
361            = ImmutableMap.builder();
362    
363        private ElementContainmentInfo defineElement(
364            String elementName, boolean resumable, int types, int contentTypes) {
365          return defineElement(elementName, resumable, types, contentTypes, null);
366        }
367    
368        private ElementContainmentInfo defineElement(
369            String elementName, boolean resumable, int types, int contentTypes,
370            int inScopes) {
371          return defineElement(
372              elementName, resumable, types, contentTypes, null, inScopes);
373        }
374    
375        private ElementContainmentInfo defineElement(
376            String elementName, boolean resumable, int types, int contentTypes,
377            @Nullable ElementContainmentInfo blockContainer) {
378          return defineElement(
379              elementName, resumable, types, contentTypes, blockContainer, 0);
380        }
381    
382        private ElementContainmentInfo defineElement(
383            String elementName, boolean resumable, int types, int contentTypes,
384            @Nullable ElementContainmentInfo blockContainer, int inScopes) {
385          ElementContainmentInfo info = new ElementContainmentInfo(
386              elementName, resumable, types, contentTypes, blockContainer,
387              inScopes);
388          definitions.put(elementName, info);
389          return info;
390        }
391    
392        private ImmutableMap<String, ElementContainmentInfo> toMap() {
393          return definitions.build();
394        }
395    
396        {
397          defineElement(
398              "a", false, elementGroupBits(
399                  ElementGroup.INLINE
400              ), elementGroupBits(
401                  ElementGroup.INLINE_MINUS_A
402              ));
403          defineElement(
404              "abbr", true, elementGroupBits(
405                  ElementGroup.INLINE, ElementGroup.INLINE_MINUS_A
406              ), elementGroupBits(
407                  ElementGroup.INLINE
408              ));
409          defineElement(
410              "acronym", true, elementGroupBits(
411                  ElementGroup.INLINE, ElementGroup.INLINE_MINUS_A
412              ), elementGroupBits(
413                  ElementGroup.INLINE
414              ));
415          defineElement(
416              "address", false, elementGroupBits(
417                  ElementGroup.BLOCK
418              ), elementGroupBits(
419                  ElementGroup.INLINE, ElementGroup.P_ELEMENT
420              ));
421          defineElement(
422              "applet", false, elementGroupBits(
423                  ElementGroup.INLINE, ElementGroup.INLINE_MINUS_A
424              ), elementGroupBits(
425                  ElementGroup.BLOCK, ElementGroup.INLINE,
426                  ElementGroup.PARAM_ELEMENT
427              ), scopeBits(
428                  CloseTagScope.COMMON, CloseTagScope.BUTTON,
429                  CloseTagScope.LIST_ITEM
430              ));
431          defineElement(
432              "area", false, elementGroupBits(ElementGroup.AREA_ELEMENT), 0);
433          defineElement(
434              "audio", false, elementGroupBits(
435                  ElementGroup.INLINE, ElementGroup.INLINE_MINUS_A
436              ), 0);
437          defineElement(
438              "b", true, elementGroupBits(
439                  ElementGroup.INLINE, ElementGroup.INLINE_MINUS_A
440              ), elementGroupBits(
441                  ElementGroup.INLINE
442              ));
443          defineElement(
444              "base", false, elementGroupBits(ElementGroup.HEAD_CONTENT), 0);
445          defineElement(
446              "basefont", false, elementGroupBits(
447                  ElementGroup.INLINE, ElementGroup.INLINE_MINUS_A
448              ), 0);
449          defineElement(
450              "bdi", true, elementGroupBits(
451                  ElementGroup.INLINE, ElementGroup.INLINE_MINUS_A
452              ), elementGroupBits(
453                  ElementGroup.INLINE
454              ));
455          defineElement(
456              "bdo", true, elementGroupBits(
457                  ElementGroup.INLINE, ElementGroup.INLINE_MINUS_A
458              ), elementGroupBits(
459                  ElementGroup.INLINE
460              ));
461          defineElement(
462              "big", true, elementGroupBits(
463                  ElementGroup.INLINE, ElementGroup.INLINE_MINUS_A
464              ), elementGroupBits(
465                  ElementGroup.INLINE
466              ));
467          defineElement(
468              "blink", true, elementGroupBits(
469                  ElementGroup.INLINE, ElementGroup.INLINE_MINUS_A
470              ), elementGroupBits(
471                  ElementGroup.INLINE
472              ));
473          defineElement(
474              "blockquote", false, elementGroupBits(
475                  ElementGroup.BLOCK
476              ), elementGroupBits(
477                  ElementGroup.BLOCK, ElementGroup.INLINE
478              ));
479          defineElement(
480              "body", false, elementGroupBits(
481                  ElementGroup.TOP_CONTENT
482              ), elementGroupBits(
483                  ElementGroup.BLOCK, ElementGroup.INLINE
484              ));
485          defineElement(
486              "br", false, elementGroupBits(
487                  ElementGroup.INLINE, ElementGroup.INLINE_MINUS_A
488              ), 0);
489          defineElement(
490              "button", false, elementGroupBits(
491                  ElementGroup.INLINE, ElementGroup.INLINE_MINUS_A
492              ), elementGroupBits(
493                  ElementGroup.BLOCK, ElementGroup.INLINE
494              ), scopeBits(CloseTagScope.BUTTON));
495          defineElement(
496              "canvas", false, elementGroupBits(
497                  ElementGroup.INLINE, ElementGroup.INLINE_MINUS_A
498              ), elementGroupBits(
499                  ElementGroup.INLINE
500              ));
501          defineElement(
502              "caption", false, elementGroupBits(
503                  ElementGroup.TABLE_CONTENT
504              ), elementGroupBits(
505                  ElementGroup.INLINE
506              ), scopeBits(
507                  CloseTagScope.COMMON, CloseTagScope.BUTTON,
508                  CloseTagScope.LIST_ITEM
509              ));
510          defineElement(
511              "center", false, elementGroupBits(
512                  ElementGroup.BLOCK
513              ), elementGroupBits(
514                  ElementGroup.BLOCK, ElementGroup.INLINE
515              ));
516          defineElement(
517              "cite", true, elementGroupBits(
518                  ElementGroup.INLINE, ElementGroup.INLINE_MINUS_A
519              ), elementGroupBits(
520                  ElementGroup.INLINE
521              ));
522          defineElement(
523              "code", true, elementGroupBits(
524                  ElementGroup.INLINE, ElementGroup.INLINE_MINUS_A
525              ), elementGroupBits(
526                  ElementGroup.INLINE
527              ));
528          defineElement(
529              "col", false, elementGroupBits(
530                  ElementGroup.TABLE_CONTENT, ElementGroup.COL_ELEMENT
531              ), 0);
532          defineElement(
533              "colgroup", false, elementGroupBits(
534                  ElementGroup.TABLE_CONTENT
535              ), elementGroupBits(
536                  ElementGroup.COL_ELEMENT
537              ));
538          ElementContainmentInfo DD = defineElement(
539              "dd", false, elementGroupBits(
540                  ElementGroup.DL_PART
541              ), elementGroupBits(
542                  ElementGroup.BLOCK, ElementGroup.INLINE
543              ));
544          defineElement(
545              "del", true, elementGroupBits(
546                  ElementGroup.BLOCK, ElementGroup.INLINE,
547                  ElementGroup.MIXED
548              ), elementGroupBits(
549                  ElementGroup.BLOCK, ElementGroup.INLINE
550              ));
551          defineElement(
552              "dfn", true, elementGroupBits(
553                  ElementGroup.INLINE, ElementGroup.INLINE_MINUS_A
554              ), elementGroupBits(
555                  ElementGroup.INLINE
556              ));
557          defineElement(
558              "dir", false, elementGroupBits(
559                  ElementGroup.BLOCK
560              ), elementGroupBits(
561                  ElementGroup.LI_ELEMENT
562              ));
563          defineElement(
564              "div", false, elementGroupBits(
565                  ElementGroup.BLOCK
566              ), elementGroupBits(
567                  ElementGroup.BLOCK, ElementGroup.INLINE
568              ));
569          defineElement(
570              "dl", false, elementGroupBits(
571                  ElementGroup.BLOCK
572              ), elementGroupBits(
573                  ElementGroup.DL_PART
574              ),
575              DD);
576          defineElement(
577              "dt", false, elementGroupBits(
578                  ElementGroup.DL_PART
579              ), elementGroupBits(
580                  ElementGroup.INLINE
581              ));
582          defineElement(
583              "em", true, elementGroupBits(
584                  ElementGroup.INLINE, ElementGroup.INLINE_MINUS_A
585              ), elementGroupBits(
586                  ElementGroup.INLINE
587              ));
588          defineElement(
589              "fieldset", false, elementGroupBits(
590                  ElementGroup.BLOCK
591              ), elementGroupBits(
592                  ElementGroup.BLOCK, ElementGroup.INLINE,
593                  ElementGroup.LEGEND_ELEMENT
594              ));
595          defineElement(
596              "font", false, elementGroupBits(
597                  ElementGroup.INLINE, ElementGroup.INLINE_MINUS_A
598              ), elementGroupBits(
599                  ElementGroup.INLINE
600              ));
601          defineElement(
602              "form", false, elementGroupBits(
603                  ElementGroup.BLOCK, ElementGroup.FORM_ELEMENT
604              ), elementGroupBits(
605                  ElementGroup.BLOCK, ElementGroup.INLINE,
606                  ElementGroup.INLINE_MINUS_A, ElementGroup.TR_ELEMENT,
607                  ElementGroup.TD_ELEMENT
608              ));
609          defineElement(
610              "h1", false, elementGroupBits(
611                  ElementGroup.BLOCK
612              ), elementGroupBits(
613                  ElementGroup.INLINE
614              ));
615          defineElement(
616              "h2", false, elementGroupBits(
617                  ElementGroup.BLOCK
618              ), elementGroupBits(
619                  ElementGroup.INLINE
620              ));
621          defineElement(
622              "h3", false, elementGroupBits(
623                  ElementGroup.BLOCK
624              ), elementGroupBits(
625                  ElementGroup.INLINE
626              ));
627          defineElement(
628              "h4", false, elementGroupBits(
629                  ElementGroup.BLOCK
630              ), elementGroupBits(
631                  ElementGroup.INLINE
632              ));
633          defineElement(
634              "h5", false, elementGroupBits(
635                  ElementGroup.BLOCK
636              ), elementGroupBits(
637                  ElementGroup.INLINE
638              ));
639          defineElement(
640              "h6", false, elementGroupBits(
641                  ElementGroup.BLOCK
642              ), elementGroupBits(
643                  ElementGroup.INLINE
644              ));
645          defineElement(
646              "head", false, elementGroupBits(
647                  ElementGroup.TOP_CONTENT
648              ), elementGroupBits(
649                  ElementGroup.HEAD_CONTENT
650              ));
651          defineElement(
652              "hr", false, elementGroupBits(ElementGroup.BLOCK), 0);
653          defineElement(
654              "html", false, 0, elementGroupBits(ElementGroup.TOP_CONTENT),
655              CloseTagScope.ALL);
656          defineElement(
657              "i", true, elementGroupBits(
658                  ElementGroup.INLINE, ElementGroup.INLINE_MINUS_A
659              ), elementGroupBits(
660                  ElementGroup.INLINE
661              ));
662          defineElement(
663              "iframe", false, elementGroupBits(
664                  ElementGroup.INLINE, ElementGroup.INLINE_MINUS_A
665              ), elementGroupBits(
666                  ElementGroup.BLOCK, ElementGroup.INLINE
667              ));
668          defineElement(
669              "img", false, elementGroupBits(
670                  ElementGroup.INLINE, ElementGroup.INLINE_MINUS_A
671              ), 0);
672          defineElement(
673              "input", false, elementGroupBits(
674                  ElementGroup.INLINE, ElementGroup.INLINE_MINUS_A
675              ), 0);
676          defineElement(
677              "ins", true, elementGroupBits(
678                  ElementGroup.BLOCK, ElementGroup.INLINE
679              ), elementGroupBits(
680                  ElementGroup.BLOCK, ElementGroup.INLINE
681              ));
682          defineElement(
683              "isindex", false, elementGroupBits(ElementGroup.INLINE), 0);
684          defineElement(
685              "kbd", true, elementGroupBits(
686                  ElementGroup.INLINE, ElementGroup.INLINE_MINUS_A
687              ), elementGroupBits(
688                  ElementGroup.INLINE
689              ));
690          defineElement(
691              "label", false, elementGroupBits(
692                  ElementGroup.INLINE, ElementGroup.INLINE_MINUS_A
693              ), elementGroupBits(
694                  ElementGroup.INLINE
695              ));
696          defineElement(
697              "legend", false, elementGroupBits(
698                  ElementGroup.LEGEND_ELEMENT
699              ), elementGroupBits(
700                  ElementGroup.INLINE
701              ));
702          ElementContainmentInfo LI = defineElement(
703              "li", false, elementGroupBits(
704                  ElementGroup.LI_ELEMENT
705              ), elementGroupBits(
706                  ElementGroup.BLOCK, ElementGroup.INLINE
707              ));
708          defineElement(
709              "link", false, elementGroupBits(
710                  ElementGroup.INLINE, ElementGroup.HEAD_CONTENT
711              ), 0);
712          defineElement(
713              "listing", false, elementGroupBits(
714                  ElementGroup.BLOCK
715              ), elementGroupBits(
716                  ElementGroup.INLINE
717              ));
718          defineElement(
719              "map", false, elementGroupBits(
720                  ElementGroup.INLINE
721              ), elementGroupBits(
722                  ElementGroup.BLOCK, ElementGroup.AREA_ELEMENT
723              ));
724          defineElement(
725              "meta", false, elementGroupBits(ElementGroup.HEAD_CONTENT), 0);
726          defineElement(
727              "nobr", false, elementGroupBits(
728                  ElementGroup.INLINE, ElementGroup.INLINE_MINUS_A
729              ), elementGroupBits(
730                  ElementGroup.INLINE
731              ));
732          defineElement(
733              "noframes", false, elementGroupBits(
734                  ElementGroup.BLOCK, ElementGroup.TOP_CONTENT
735              ), elementGroupBits(
736                  ElementGroup.BLOCK, ElementGroup.INLINE,
737                  ElementGroup.TOP_CONTENT
738              ));
739          defineElement(
740              "noscript", false, elementGroupBits(
741                  ElementGroup.BLOCK
742              ), elementGroupBits(
743                  ElementGroup.BLOCK, ElementGroup.INLINE
744              ));
745          defineElement(
746              "object", false, elementGroupBits(
747                  ElementGroup.INLINE, ElementGroup.INLINE_MINUS_A,
748                  ElementGroup.HEAD_CONTENT
749              ), elementGroupBits(
750                  ElementGroup.BLOCK, ElementGroup.INLINE,
751                  ElementGroup.PARAM_ELEMENT
752              ), scopeBits(
753                  CloseTagScope.COMMON, CloseTagScope.BUTTON,
754                  CloseTagScope.LIST_ITEM
755              ));
756          defineElement(
757              "ol", false, elementGroupBits(
758                  ElementGroup.BLOCK
759              ), elementGroupBits(
760                  ElementGroup.LI_ELEMENT
761              ),
762              LI,
763              scopeBits(CloseTagScope.LIST_ITEM));
764          defineElement(
765              "optgroup", false, elementGroupBits(
766                  ElementGroup.OPTIONS_ELEMENT
767              ), elementGroupBits(
768                  ElementGroup.OPTIONS_ELEMENT
769              ));
770          defineElement(
771              "option", false, elementGroupBits(
772                  ElementGroup.OPTIONS_ELEMENT, ElementGroup.OPTION_ELEMENT
773              ), elementGroupBits(
774                  ElementGroup.CHARACTER_DATA
775              ));
776          defineElement(
777              "p", false, elementGroupBits(
778                  ElementGroup.BLOCK, ElementGroup.P_ELEMENT
779              ), elementGroupBits(
780                  ElementGroup.INLINE, ElementGroup.TABLE_ELEMENT
781              ));
782          defineElement(
783              "param", false, elementGroupBits(ElementGroup.PARAM_ELEMENT), 0);
784          defineElement(
785              "pre", false, elementGroupBits(
786                  ElementGroup.BLOCK
787              ), elementGroupBits(
788                  ElementGroup.INLINE
789              ));
790          defineElement(
791              "q", true, elementGroupBits(
792                  ElementGroup.INLINE, ElementGroup.INLINE_MINUS_A
793              ), elementGroupBits(
794                  ElementGroup.INLINE
795              ));
796          defineElement(
797              "s", true, elementGroupBits(
798                  ElementGroup.INLINE, ElementGroup.INLINE_MINUS_A
799              ), elementGroupBits(
800                  ElementGroup.INLINE
801              ));
802          defineElement(
803              "samp", true, elementGroupBits(
804                  ElementGroup.INLINE, ElementGroup.INLINE_MINUS_A
805              ), elementGroupBits(
806                  ElementGroup.INLINE
807              ));
808          defineElement(
809              "script", false, elementGroupBits(
810                  ElementGroup.BLOCK, ElementGroup.INLINE,
811                  ElementGroup.INLINE_MINUS_A, ElementGroup.MIXED,
812                  ElementGroup.TABLE_CONTENT, ElementGroup.HEAD_CONTENT,
813                  ElementGroup.TOP_CONTENT, ElementGroup.AREA_ELEMENT,
814                  ElementGroup.FORM_ELEMENT, ElementGroup.LEGEND_ELEMENT,
815                  ElementGroup.LI_ELEMENT, ElementGroup.DL_PART,
816                  ElementGroup.P_ELEMENT, ElementGroup.OPTIONS_ELEMENT,
817                  ElementGroup.OPTION_ELEMENT, ElementGroup.PARAM_ELEMENT,
818                  ElementGroup.TABLE_ELEMENT, ElementGroup.TR_ELEMENT,
819                  ElementGroup.TD_ELEMENT, ElementGroup.COL_ELEMENT
820              ), elementGroupBits(
821                  ElementGroup.CHARACTER_DATA));
822          defineElement(
823              "select", false, elementGroupBits(
824                  ElementGroup.INLINE
825              ), elementGroupBits(
826                  ElementGroup.OPTIONS_ELEMENT
827              ));
828          defineElement(
829              "small", true, elementGroupBits(
830                  ElementGroup.INLINE, ElementGroup.INLINE_MINUS_A
831              ), elementGroupBits(
832                  ElementGroup.INLINE
833              ));
834          defineElement(
835              "span", false, elementGroupBits(
836                  ElementGroup.INLINE, ElementGroup.INLINE_MINUS_A
837              ), elementGroupBits(
838                  ElementGroup.INLINE
839              ));
840          defineElement(
841              "strike", true, elementGroupBits(
842                  ElementGroup.INLINE, ElementGroup.INLINE_MINUS_A
843              ), elementGroupBits(
844                  ElementGroup.INLINE
845              ));
846          defineElement(
847              "strong", true, elementGroupBits(
848                  ElementGroup.INLINE, ElementGroup.INLINE_MINUS_A
849              ), elementGroupBits(
850                  ElementGroup.INLINE
851              ));
852          defineElement(
853              "style", false, elementGroupBits(
854                  ElementGroup.INLINE, ElementGroup.HEAD_CONTENT
855              ), elementGroupBits(
856                  ElementGroup.CHARACTER_DATA
857              ));
858          defineElement(
859              "sub", true, elementGroupBits(
860                  ElementGroup.INLINE, ElementGroup.INLINE_MINUS_A
861              ), elementGroupBits(
862                  ElementGroup.INLINE
863              ));
864          defineElement(
865              "sup", true, elementGroupBits(
866                  ElementGroup.INLINE, ElementGroup.INLINE_MINUS_A
867              ), elementGroupBits(
868                  ElementGroup.INLINE
869              ));
870          defineElement(
871              "table", false, elementGroupBits(
872                  ElementGroup.BLOCK, ElementGroup.TABLE_ELEMENT
873              ), elementGroupBits(
874                  ElementGroup.TABLE_CONTENT, ElementGroup.FORM_ELEMENT
875              ), CloseTagScope.ALL);
876          defineElement(
877              "tbody", false, elementGroupBits(
878                  ElementGroup.TABLE_CONTENT
879              ), elementGroupBits(
880                  ElementGroup.TR_ELEMENT
881              ));
882          ElementContainmentInfo TD = defineElement(
883              "td", false, elementGroupBits(
884                  ElementGroup.TD_ELEMENT
885              ), elementGroupBits(
886                  ElementGroup.BLOCK, ElementGroup.INLINE
887              ), scopeBits(
888                  CloseTagScope.COMMON, CloseTagScope.BUTTON,
889                  CloseTagScope.LIST_ITEM
890              ));
891          defineElement(
892              "textarea", false,
893              // No, a textarea cannot be inside a link.
894              elementGroupBits(ElementGroup.INLINE),
895              elementGroupBits(ElementGroup.CHARACTER_DATA));
896          defineElement(
897              "tfoot", false, elementGroupBits(
898                  ElementGroup.TABLE_CONTENT
899              ), elementGroupBits(
900                  ElementGroup.FORM_ELEMENT, ElementGroup.TR_ELEMENT,
901                  ElementGroup.TD_ELEMENT
902              ));
903          defineElement(
904              "th", false, elementGroupBits(
905                  ElementGroup.TD_ELEMENT
906              ), elementGroupBits(
907                  ElementGroup.BLOCK, ElementGroup.INLINE
908              ), scopeBits(
909                  CloseTagScope.COMMON, CloseTagScope.BUTTON,
910                  CloseTagScope.LIST_ITEM
911              ));
912          defineElement(
913              "thead", false, elementGroupBits(
914                  ElementGroup.TABLE_CONTENT
915              ), elementGroupBits(
916                  ElementGroup.FORM_ELEMENT, ElementGroup.TR_ELEMENT,
917                  ElementGroup.TD_ELEMENT
918              ));
919          defineElement(
920              "title", false, elementGroupBits(ElementGroup.HEAD_CONTENT),
921              elementGroupBits(ElementGroup.CHARACTER_DATA));
922          defineElement(
923              "tr", false, elementGroupBits(
924                  ElementGroup.TABLE_CONTENT, ElementGroup.TR_ELEMENT
925              ), elementGroupBits(
926                  ElementGroup.FORM_ELEMENT, ElementGroup.TD_ELEMENT
927              ),
928              TD);
929          defineElement(
930              "tt", true, elementGroupBits(
931                  ElementGroup.INLINE, ElementGroup.INLINE_MINUS_A
932              ), elementGroupBits(
933                  ElementGroup.INLINE
934              ));
935          defineElement(
936              "u", true, elementGroupBits(
937                  ElementGroup.INLINE, ElementGroup.INLINE_MINUS_A
938              ), elementGroupBits(
939                  ElementGroup.INLINE
940              ));
941          defineElement(
942              "ul", false, elementGroupBits(
943                  ElementGroup.BLOCK
944              ), elementGroupBits(
945                  ElementGroup.LI_ELEMENT
946              ),
947              LI,
948              scopeBits(CloseTagScope.LIST_ITEM));
949          defineElement(
950              "var", false, elementGroupBits(
951                  ElementGroup.INLINE, ElementGroup.INLINE_MINUS_A
952              ), elementGroupBits(
953                  ElementGroup.INLINE
954              ));
955          defineElement(
956              "video", false, elementGroupBits(
957                  ElementGroup.INLINE, ElementGroup.INLINE_MINUS_A
958              ), 0);
959          defineElement(
960              "wbr", false, elementGroupBits(
961                  ElementGroup.INLINE, ElementGroup.INLINE_MINUS_A
962              ), 0);
963          defineElement(
964              "xmp", false, elementGroupBits(
965                  ElementGroup.BLOCK
966              ), elementGroupBits(
967                  ElementGroup.INLINE
968              ));
969    
970        }
971    
972        private static final ElementContainmentInfo CHARACTER_DATA_ONLY
973            = new ElementContainmentInfo(
974                "#text", false,
975                elementGroupBits(
976                    ElementGroup.INLINE, ElementGroup.INLINE_MINUS_A,
977                    ElementGroup.BLOCK, ElementGroup.CHARACTER_DATA),
978                0, null, 0);
979      }
980    
981      static boolean allowsPlainTextualContent(String canonElementName) {
982        ElementContainmentInfo info =
983           ELEMENT_CONTAINMENT_RELATIONSHIPS.get(canonElementName);
984        if (info == null
985            || ((info.contents
986                 & ElementContainmentRelationships.CHARACTER_DATA_ONLY.types)
987                != 0)) {
988          switch (HtmlTextEscapingMode.getModeForTag(canonElementName)) {
989            case PCDATA:     return true;
990            case RCDATA:     return true;
991            case PLAIN_TEXT: return true;
992            case VOID:       return false;
993            case CDATA:
994            case CDATA_SOMETIMES:
995              return "xmp".equals(canonElementName)
996                  || "listing".equals(canonElementName);
997          }
998        }
999        return false;
1000      }
1001    }