001    // Copyright (c) 2011, Mike Samuel
002    // All rights reserved.
003    //
004    // Redistribution and use in source and binary forms, with or without
005    // modification, are permitted provided that the following conditions
006    // are met:
007    //
008    // Redistributions of source code must retain the above copyright
009    // notice, this list of conditions and the following disclaimer.
010    // Redistributions in binary form must reproduce the above copyright
011    // notice, this list of conditions and the following disclaimer in the
012    // documentation and/or other materials provided with the distribution.
013    // Neither the name of the OWASP nor the names of its contributors may
014    // be used to endorse or promote products derived from this software
015    // without specific prior written permission.
016    // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
017    // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
018    // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
019    // FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
020    // COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
021    // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
022    // BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
023    // LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
024    // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
025    // LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
026    // ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
027    // POSSIBILITY OF SUCH DAMAGE.
028    
029    package org.owasp.html;
030    
031    import java.util.List;
032    
033    import javax.annotation.Nullable;
034    import javax.annotation.concurrent.Immutable;
035    
036    import com.google.common.collect.ImmutableMap;
037    import com.google.common.collect.Lists;
038    
039    /**
040     * Wraps an HTML stream event receiver to fill in missing close tags.
041     * If the balancer is given the HTML {@code <p>1<p>2}, the wrapped receiver will
042     * see events equivalent to {@code <p>1</p><p>2</p>}.
043     *
044     * @author Mike Samuel <mikesamuel@gmail.com>
045     */
046    @TCB
047    public class TagBalancingHtmlStreamEventReceiver
048        implements HtmlStreamEventReceiver {
049      private final HtmlStreamEventReceiver underlying;
050      private int nestingLimit = Integer.MAX_VALUE;
051      private final List<ElementContainmentInfo> openElements
052          = Lists.newArrayList();
053    
054      public TagBalancingHtmlStreamEventReceiver(
055          HtmlStreamEventReceiver underlying) {
056        this.underlying = underlying;
057      }
058    
059      public void setNestingLimit(int limit) {
060        if (openElements.size() > limit) {
061          throw new IllegalStateException();
062        }
063        this.nestingLimit = limit;
064      }
065    
066      public void openDocument() {
067        underlying.openDocument();
068      }
069    
070      public void closeDocument() {
071        for (int i = Math.min(nestingLimit, openElements.size()); --i >= 0;) {
072          underlying.closeTag(openElements.get(i).elementName);
073        }
074        openElements.clear();
075        underlying.closeDocument();
076      }
077    
078      public void openTag(String elementName, List<String> attrs) {
079        String canonElementName = HtmlLexer.canonicalName(elementName);
080        ElementContainmentInfo elInfo = ELEMENT_CONTAINMENT_RELATIONSHIPS.get(
081            canonElementName);
082        // Treat unrecognized tags as void, but emit closing tags in closeTag().
083        if (elInfo == null) {
084          if (openElements.size() < nestingLimit) {
085            underlying.openTag(elementName, attrs);
086          }
087          return;
088        }
089    
090        prepareForContent(elInfo);
091    
092        if (openElements.size() < nestingLimit) {
093          underlying.openTag(elInfo.elementName, attrs);
094        }
095        if (!elInfo.isVoid) {
096          openElements.add(elInfo);
097        }
098      }
099    
100      private void prepareForContent(ElementContainmentInfo elInfo) {
101        int nOpen = openElements.size();
102        if (nOpen != 0) {
103          ElementContainmentInfo top = openElements.get(nOpen - 1);
104          if ((top.contents & elInfo.types) == 0) {
105            ElementContainmentInfo blockContainerChild = top.blockContainerChild;
106            // Open implied elements, such as list-items and table cells & rows.
107            if (blockContainerChild != null
108                && (blockContainerChild.contents & elInfo.types) != 0) {
109              underlying.openTag(
110                  blockContainerChild.elementName, Lists.<String>newArrayList());
111              openElements.add(blockContainerChild);
112              top = blockContainerChild;
113              ++nOpen;
114            }
115          }
116    
117          // Close all the elements that cannot contain the element to open.
118          List<ElementContainmentInfo> toResumeInReverse = null;
119          while (true) {
120            if ((top.contents & elInfo.types) != 0) { break; }
121            if (openElements.size() < nestingLimit) {
122              underlying.closeTag(top.elementName);
123            }
124            openElements.remove(--nOpen);
125            if (top.resumable) {
126              if (toResumeInReverse == null) {
127                toResumeInReverse = Lists.newArrayList();
128              }
129              toResumeInReverse.add(top);
130            }
131            if (nOpen == 0) { break; }
132            top = openElements.get(nOpen - 1);
133          }
134    
135          if (toResumeInReverse != null) {
136            resume(toResumeInReverse);
137          }
138        }
139      }
140    
141      public void closeTag(String elementName) {
142        String canonElementName = HtmlLexer.canonicalName(elementName);
143        ElementContainmentInfo elInfo = ELEMENT_CONTAINMENT_RELATIONSHIPS.get(
144            canonElementName);
145        if (elInfo == null) {  // Allow unrecognized end tags through.
146          if (openElements.size() < nestingLimit) {
147            underlying.closeTag(elementName);
148          }
149          return;
150        }
151        int index = openElements.lastIndexOf(elInfo);
152        // Let any of </h1>, </h2>, ... close other header tags.
153        if (isHeaderElementName(canonElementName)) {
154          for (int i = openElements.size(), limit = index + 1; -- i >= limit;) {
155            ElementContainmentInfo openEl = openElements.get(i);
156            if (isHeaderElementName(openEl.elementName)) {
157              elInfo = openEl;
158              index = i;
159              canonElementName = openEl.elementName;
160              break;
161            }
162          }
163        }
164        if (index < 0) {
165          return;  // Don't close unopened tags.
166        }
167    
168        // Ensure that index is in the scope of closeable elements.
169        // This approximates the "has an element in *** scope" predicates defined at
170        // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html
171        // #has-an-element-in-the-specific-scope
172        int blockingScopes = elInfo.blockedByScopes;
173        for (int i = openElements.size(); --i > index;) {
174          if ((openElements.get(i).inScopes & blockingScopes) != 0) {
175            return;
176          }
177        }
178    
179        int last = openElements.size();
180        // Close all the elements that cannot contain the element to open.
181        List<ElementContainmentInfo> toResumeInReverse = null;
182        while (--last > index) {
183          ElementContainmentInfo unclosed = openElements.remove(last);
184          if (last + 1 < nestingLimit) {
185            underlying.closeTag(unclosed.elementName);
186          }
187          if (unclosed.resumable) {
188            if (toResumeInReverse == null) {
189              toResumeInReverse = Lists.newArrayList();
190            }
191            toResumeInReverse.add(unclosed);
192          }
193        }
194        if (openElements.size() < nestingLimit) {
195          underlying.closeTag(elInfo.elementName);
196        }
197        openElements.remove(index);
198        if (toResumeInReverse != null) {
199          resume(toResumeInReverse);
200        }
201      }
202    
203      private void resume(List<ElementContainmentInfo> toResumeInReverse) {
204        for (ElementContainmentInfo toResume : toResumeInReverse) {
205          // TODO: If resuming of things other than plain formatting tags like <b>
206          // and <i>, then we need to store the attributes for resumable tags so
207          // that we can resume with the appropriate attributes.
208          if (openElements.size() < nestingLimit) {
209            underlying.openTag(toResume.elementName, Lists.<String>newArrayList());
210          }
211          openElements.add(toResume);
212        }
213      }
214    
215      private static final long HTML_SPACE_CHAR_BITMASK =
216          (1L << ' ')
217        | (1L << '\t')
218        | (1L << '\n')
219        | (1L << '\u000c')
220        | (1L << '\r');
221    
222      public static boolean isInterElementWhitespace(String text) {
223        int n = text.length();
224        for (int i = 0; i < n; ++i) {
225          int ch = text.charAt(i);
226          if (ch > 0x20 || (HTML_SPACE_CHAR_BITMASK & (1L << ch)) == 0) {
227            return false;
228          }
229        }
230        return true;
231      }
232    
233      public void text(String text) {
234        if (!isInterElementWhitespace(text)) {
235          prepareForContent(ElementContainmentRelationships.CHARACTER_DATA_ONLY);
236        }
237    
238        if (openElements.size() < nestingLimit) {
239          underlying.text(text);
240        }
241      }
242    
243      private static boolean isHeaderElementName(String canonElementName) {
244        return canonElementName.length() == 2 && canonElementName.charAt(0) == 'h'
245            && canonElementName.charAt(1) <= '9';
246      }
247    
248    
249      @Immutable
250      private static final class ElementContainmentInfo {
251        final String elementName;
252        /**
253         * True if the adoption agency algorithm allows an element to be resumed
254         * after a mis-nested end tag closes it.
255         * E.g. in {@code <b>Foo<i>Bar</b>Baz</i>} the {@code <i>} element is
256         * resumed after the {@code <b>} element is closed.
257         */
258        final boolean resumable;
259        /** A set of bits of element groups into which the element falls. */
260        final int types;
261        /** The type of elements that an element can contain. */
262        final int contents;
263        /** True if the element has no content -- not even text content. */
264        final boolean isVoid;
265        /** A legal child of this node that can contain block content. */
266        final @Nullable ElementContainmentInfo blockContainerChild;
267        /** A bit set of close tag scopes that block this element's close tags. */
268        final int blockedByScopes;
269        /** A bit set of scopes groups into which this element falls. */
270        final int inScopes;
271    
272        ElementContainmentInfo(
273            String elementName, boolean resumable, int types, int contents,
274            @Nullable ElementContainmentInfo blockContainerChild,
275            int inScopes) {
276          this.elementName = elementName;
277          this.resumable = resumable;
278          this.types = types;
279          this.contents = contents;
280          this.isVoid = contents == 0
281              && HtmlTextEscapingMode.isVoidElement(elementName);
282          this.blockContainerChild = blockContainerChild;
283          this.blockedByScopes =
284              ElementContainmentRelationships.CloseTagScope.ALL & ~inScopes;
285          this.inScopes = inScopes;
286        }
287    
288        @Override public String toString() {
289          return "<" + elementName + ">";
290        }
291      }
292    
293      static final ImmutableMap<String, ElementContainmentInfo>
294          ELEMENT_CONTAINMENT_RELATIONSHIPS
295          = new ElementContainmentRelationships().toMap();
296    
297      private static class ElementContainmentRelationships {
298        private enum ElementGroup {
299          BLOCK,
300          INLINE,
301          INLINE_MINUS_A,
302          MIXED,
303          TABLE_CONTENT,
304          HEAD_CONTENT,
305          TOP_CONTENT,
306          AREA_ELEMENT,
307          FORM_ELEMENT,
308          LEGEND_ELEMENT,
309          LI_ELEMENT,
310          DL_PART,
311          P_ELEMENT,
312          OPTIONS_ELEMENT,
313          OPTION_ELEMENT,
314          PARAM_ELEMENT,
315          TABLE_ELEMENT,
316          TR_ELEMENT,
317          TD_ELEMENT,
318          COL_ELEMENT,
319          CHARACTER_DATA,
320          ;
321        }
322    
323        /**
324         * An identifier for one of the "has a *** element in scope" predicates
325         * used by HTML5 to decide when a close tag implicitly closes tags above
326         * the target element on the open element stack.
327         */
328        private enum CloseTagScope {
329          COMMON,
330          BUTTON,
331          LIST_ITEM,
332          TABLE,
333          ;
334    
335          static final int ALL = (1 << values().length) - 1;
336        }
337    
338        private static int elementGroupBits(ElementGroup a) {
339          return 1 << a.ordinal();
340        }
341    
342        private static int elementGroupBits(
343            ElementGroup a, ElementGroup b) {
344          return (1 << a.ordinal()) | (1 << b.ordinal());
345        }
346    
347        private static int elementGroupBits(
348            ElementGroup a, ElementGroup b, ElementGroup c) {
349          return (1 << a.ordinal()) | (1 << b.ordinal()) | (1 << c.ordinal());
350        }
351    
352        private static int elementGroupBits(
353            ElementGroup... bits) {
354          int bitField = 0;
355          for (ElementGroup bit : bits) {
356            bitField |= (1 << bit.ordinal());
357          }
358          return bitField;
359        }
360    
361        private static int scopeBits(CloseTagScope a) {
362          return 1 << a.ordinal();
363        }
364    
365        private static int scopeBits(
366            CloseTagScope a, CloseTagScope b, CloseTagScope c) {
367          return (1 << a.ordinal()) | (1 << b.ordinal()) | (1 << c.ordinal());
368        }
369    
370        private ImmutableMap.Builder<String, ElementContainmentInfo> definitions
371            = ImmutableMap.builder();
372    
373        private ElementContainmentInfo defineElement(
374            String elementName, boolean resumable, int types, int contentTypes) {
375          return defineElement(elementName, resumable, types, contentTypes, null);
376        }
377    
378        private ElementContainmentInfo defineElement(
379            String elementName, boolean resumable, int types, int contentTypes,
380            int inScopes) {
381          return defineElement(
382              elementName, resumable, types, contentTypes, null, inScopes);
383        }
384    
385        private ElementContainmentInfo defineElement(
386            String elementName, boolean resumable, int types, int contentTypes,
387            @Nullable ElementContainmentInfo blockContainer) {
388          return defineElement(
389              elementName, resumable, types, contentTypes, blockContainer, 0);
390        }
391    
392        private ElementContainmentInfo defineElement(
393            String elementName, boolean resumable, int types, int contentTypes,
394            @Nullable ElementContainmentInfo blockContainer, int inScopes) {
395          ElementContainmentInfo info = new ElementContainmentInfo(
396              elementName, resumable, types, contentTypes, blockContainer,
397              inScopes);
398          definitions.put(elementName, info);
399          return info;
400        }
401    
402        private ImmutableMap<String, ElementContainmentInfo> toMap() {
403          return definitions.build();
404        }
405    
406        {
407          defineElement(
408              "a", false, elementGroupBits(
409                  ElementGroup.INLINE
410              ), elementGroupBits(
411                  ElementGroup.INLINE_MINUS_A
412              ));
413          defineElement(
414              "abbr", true, elementGroupBits(
415                  ElementGroup.INLINE, ElementGroup.INLINE_MINUS_A
416              ), elementGroupBits(
417                  ElementGroup.INLINE
418              ));
419          defineElement(
420              "acronym", true, elementGroupBits(
421                  ElementGroup.INLINE, ElementGroup.INLINE_MINUS_A
422              ), elementGroupBits(
423                  ElementGroup.INLINE
424              ));
425          defineElement(
426              "address", false, elementGroupBits(
427                  ElementGroup.BLOCK
428              ), elementGroupBits(
429                  ElementGroup.INLINE, ElementGroup.P_ELEMENT
430              ));
431          defineElement(
432              "applet", false, elementGroupBits(
433                  ElementGroup.INLINE, ElementGroup.INLINE_MINUS_A
434              ), elementGroupBits(
435                  ElementGroup.BLOCK, ElementGroup.INLINE,
436                  ElementGroup.PARAM_ELEMENT
437              ), scopeBits(
438                  CloseTagScope.COMMON, CloseTagScope.BUTTON,
439                  CloseTagScope.LIST_ITEM
440              ));
441          defineElement(
442              "area", false, elementGroupBits(ElementGroup.AREA_ELEMENT), 0);
443          defineElement(
444              "audio", false, elementGroupBits(
445                  ElementGroup.INLINE, ElementGroup.INLINE_MINUS_A
446              ), 0);
447          defineElement(
448              "b", true, elementGroupBits(
449                  ElementGroup.INLINE, ElementGroup.INLINE_MINUS_A
450              ), elementGroupBits(
451                  ElementGroup.INLINE
452              ));
453          defineElement(
454              "base", false, elementGroupBits(ElementGroup.HEAD_CONTENT), 0);
455          defineElement(
456              "basefont", false, elementGroupBits(
457                  ElementGroup.INLINE, ElementGroup.INLINE_MINUS_A
458              ), 0);
459          defineElement(
460              "bdi", true, elementGroupBits(
461                  ElementGroup.INLINE, ElementGroup.INLINE_MINUS_A
462              ), elementGroupBits(
463                  ElementGroup.INLINE
464              ));
465          defineElement(
466              "bdo", true, elementGroupBits(
467                  ElementGroup.INLINE, ElementGroup.INLINE_MINUS_A
468              ), elementGroupBits(
469                  ElementGroup.INLINE
470              ));
471          defineElement(
472              "big", true, elementGroupBits(
473                  ElementGroup.INLINE, ElementGroup.INLINE_MINUS_A
474              ), elementGroupBits(
475                  ElementGroup.INLINE
476              ));
477          defineElement(
478              "blink", true, elementGroupBits(
479                  ElementGroup.INLINE, ElementGroup.INLINE_MINUS_A
480              ), elementGroupBits(
481                  ElementGroup.INLINE
482              ));
483          defineElement(
484              "blockquote", false, elementGroupBits(
485                  ElementGroup.BLOCK
486              ), elementGroupBits(
487                  ElementGroup.BLOCK, ElementGroup.INLINE
488              ));
489          defineElement(
490              "body", false, elementGroupBits(
491                  ElementGroup.TOP_CONTENT
492              ), elementGroupBits(
493                  ElementGroup.BLOCK, ElementGroup.INLINE
494              ));
495          defineElement(
496              "br", false, elementGroupBits(
497                  ElementGroup.INLINE, ElementGroup.INLINE_MINUS_A
498              ), 0);
499          defineElement(
500              "button", false, elementGroupBits(
501                  ElementGroup.INLINE, ElementGroup.INLINE_MINUS_A
502              ), elementGroupBits(
503                  ElementGroup.BLOCK, ElementGroup.INLINE
504              ), scopeBits(CloseTagScope.BUTTON));
505          defineElement(
506              "canvas", false, elementGroupBits(
507                  ElementGroup.INLINE, ElementGroup.INLINE_MINUS_A
508              ), elementGroupBits(
509                  ElementGroup.INLINE
510              ));
511          defineElement(
512              "caption", false, elementGroupBits(
513                  ElementGroup.TABLE_CONTENT
514              ), elementGroupBits(
515                  ElementGroup.INLINE
516              ), scopeBits(
517                  CloseTagScope.COMMON, CloseTagScope.BUTTON,
518                  CloseTagScope.LIST_ITEM
519              ));
520          defineElement(
521              "center", false, elementGroupBits(
522                  ElementGroup.BLOCK
523              ), elementGroupBits(
524                  ElementGroup.BLOCK, ElementGroup.INLINE
525              ));
526          defineElement(
527              "cite", true, elementGroupBits(
528                  ElementGroup.INLINE, ElementGroup.INLINE_MINUS_A
529              ), elementGroupBits(
530                  ElementGroup.INLINE
531              ));
532          defineElement(
533              "code", true, elementGroupBits(
534                  ElementGroup.INLINE, ElementGroup.INLINE_MINUS_A
535              ), elementGroupBits(
536                  ElementGroup.INLINE
537              ));
538          defineElement(
539              "col", false, elementGroupBits(
540                  ElementGroup.TABLE_CONTENT, ElementGroup.COL_ELEMENT
541              ), 0);
542          defineElement(
543              "colgroup", false, elementGroupBits(
544                  ElementGroup.TABLE_CONTENT
545              ), elementGroupBits(
546                  ElementGroup.COL_ELEMENT
547              ));
548          ElementContainmentInfo DD = defineElement(
549              "dd", false, elementGroupBits(
550                  ElementGroup.DL_PART
551              ), elementGroupBits(
552                  ElementGroup.BLOCK, ElementGroup.INLINE
553              ));
554          defineElement(
555              "del", true, elementGroupBits(
556                  ElementGroup.BLOCK, ElementGroup.INLINE,
557                  ElementGroup.MIXED
558              ), elementGroupBits(
559                  ElementGroup.BLOCK, ElementGroup.INLINE
560              ));
561          defineElement(
562              "dfn", true, elementGroupBits(
563                  ElementGroup.INLINE, ElementGroup.INLINE_MINUS_A
564              ), elementGroupBits(
565                  ElementGroup.INLINE
566              ));
567          defineElement(
568              "dir", false, elementGroupBits(
569                  ElementGroup.BLOCK
570              ), elementGroupBits(
571                  ElementGroup.LI_ELEMENT
572              ));
573          defineElement(
574              "div", false, elementGroupBits(
575                  ElementGroup.BLOCK
576              ), elementGroupBits(
577                  ElementGroup.BLOCK, ElementGroup.INLINE
578              ));
579          defineElement(
580              "dl", false, elementGroupBits(
581                  ElementGroup.BLOCK
582              ), elementGroupBits(
583                  ElementGroup.DL_PART
584              ),
585              DD);
586          defineElement(
587              "dt", false, elementGroupBits(
588                  ElementGroup.DL_PART
589              ), elementGroupBits(
590                  ElementGroup.INLINE
591              ));
592          defineElement(
593              "em", true, elementGroupBits(
594                  ElementGroup.INLINE, ElementGroup.INLINE_MINUS_A
595              ), elementGroupBits(
596                  ElementGroup.INLINE
597              ));
598          defineElement(
599              "fieldset", false, elementGroupBits(
600                  ElementGroup.BLOCK
601              ), elementGroupBits(
602                  ElementGroup.BLOCK, ElementGroup.INLINE,
603                  ElementGroup.LEGEND_ELEMENT
604              ));
605          defineElement(
606              "font", false, elementGroupBits(
607                  ElementGroup.INLINE, ElementGroup.INLINE_MINUS_A
608              ), elementGroupBits(
609                  ElementGroup.INLINE
610              ));
611          defineElement(
612              "form", false, elementGroupBits(
613                  ElementGroup.BLOCK, ElementGroup.FORM_ELEMENT
614              ), elementGroupBits(
615                  ElementGroup.BLOCK, ElementGroup.INLINE,
616                  ElementGroup.INLINE_MINUS_A, ElementGroup.TR_ELEMENT,
617                  ElementGroup.TD_ELEMENT
618              ));
619          defineElement(
620              "h1", false, elementGroupBits(
621                  ElementGroup.BLOCK
622              ), elementGroupBits(
623                  ElementGroup.INLINE
624              ));
625          defineElement(
626              "h2", false, elementGroupBits(
627                  ElementGroup.BLOCK
628              ), elementGroupBits(
629                  ElementGroup.INLINE
630              ));
631          defineElement(
632              "h3", false, elementGroupBits(
633                  ElementGroup.BLOCK
634              ), elementGroupBits(
635                  ElementGroup.INLINE
636              ));
637          defineElement(
638              "h4", false, elementGroupBits(
639                  ElementGroup.BLOCK
640              ), elementGroupBits(
641                  ElementGroup.INLINE
642              ));
643          defineElement(
644              "h5", false, elementGroupBits(
645                  ElementGroup.BLOCK
646              ), elementGroupBits(
647                  ElementGroup.INLINE
648              ));
649          defineElement(
650              "h6", false, elementGroupBits(
651                  ElementGroup.BLOCK
652              ), elementGroupBits(
653                  ElementGroup.INLINE
654              ));
655          defineElement(
656              "head", false, elementGroupBits(
657                  ElementGroup.TOP_CONTENT
658              ), elementGroupBits(
659                  ElementGroup.HEAD_CONTENT
660              ));
661          defineElement(
662              "hr", false, elementGroupBits(ElementGroup.BLOCK), 0);
663          defineElement(
664              "html", false, 0, elementGroupBits(ElementGroup.TOP_CONTENT),
665              CloseTagScope.ALL);
666          defineElement(
667              "i", true, elementGroupBits(
668                  ElementGroup.INLINE, ElementGroup.INLINE_MINUS_A
669              ), elementGroupBits(
670                  ElementGroup.INLINE
671              ));
672          defineElement(
673              "iframe", false, elementGroupBits(
674                  ElementGroup.INLINE, ElementGroup.INLINE_MINUS_A
675              ), elementGroupBits(
676                  ElementGroup.BLOCK, ElementGroup.INLINE
677              ));
678          defineElement(
679              "img", false, elementGroupBits(
680                  ElementGroup.INLINE, ElementGroup.INLINE_MINUS_A
681              ), 0);
682          defineElement(
683              "input", false, elementGroupBits(
684                  ElementGroup.INLINE, ElementGroup.INLINE_MINUS_A
685              ), 0);
686          defineElement(
687              "ins", true, elementGroupBits(
688                  ElementGroup.BLOCK, ElementGroup.INLINE
689              ), elementGroupBits(
690                  ElementGroup.BLOCK, ElementGroup.INLINE
691              ));
692          defineElement(
693              "isindex", false, elementGroupBits(ElementGroup.INLINE), 0);
694          defineElement(
695              "kbd", true, elementGroupBits(
696                  ElementGroup.INLINE, ElementGroup.INLINE_MINUS_A
697              ), elementGroupBits(
698                  ElementGroup.INLINE
699              ));
700          defineElement(
701              "label", false, elementGroupBits(
702                  ElementGroup.INLINE, ElementGroup.INLINE_MINUS_A
703              ), elementGroupBits(
704                  ElementGroup.INLINE
705              ));
706          defineElement(
707              "legend", false, elementGroupBits(
708                  ElementGroup.LEGEND_ELEMENT
709              ), elementGroupBits(
710                  ElementGroup.INLINE
711              ));
712          ElementContainmentInfo LI = defineElement(
713              "li", false, elementGroupBits(
714                  ElementGroup.LI_ELEMENT
715              ), elementGroupBits(
716                  ElementGroup.BLOCK, ElementGroup.INLINE
717              ));
718          defineElement(
719              "link", false, elementGroupBits(
720                  ElementGroup.INLINE, ElementGroup.HEAD_CONTENT
721              ), 0);
722          defineElement(
723              "listing", false, elementGroupBits(
724                  ElementGroup.BLOCK
725              ), elementGroupBits(
726                  ElementGroup.INLINE
727              ));
728          defineElement(
729              "map", false, elementGroupBits(
730                  ElementGroup.INLINE
731              ), elementGroupBits(
732                  ElementGroup.BLOCK, ElementGroup.AREA_ELEMENT
733              ));
734          defineElement(
735              "meta", false, elementGroupBits(ElementGroup.HEAD_CONTENT), 0);
736          defineElement(
737              "nobr", false, elementGroupBits(
738                  ElementGroup.INLINE, ElementGroup.INLINE_MINUS_A
739              ), elementGroupBits(
740                  ElementGroup.INLINE
741              ));
742          defineElement(
743              "noframes", false, elementGroupBits(
744                  ElementGroup.BLOCK, ElementGroup.TOP_CONTENT
745              ), elementGroupBits(
746                  ElementGroup.BLOCK, ElementGroup.INLINE,
747                  ElementGroup.TOP_CONTENT
748              ));
749          defineElement(
750              "noscript", false, elementGroupBits(
751                  ElementGroup.BLOCK
752              ), elementGroupBits(
753                  ElementGroup.BLOCK, ElementGroup.INLINE
754              ));
755          defineElement(
756              "object", false, elementGroupBits(
757                  ElementGroup.INLINE, ElementGroup.INLINE_MINUS_A,
758                  ElementGroup.HEAD_CONTENT
759              ), elementGroupBits(
760                  ElementGroup.BLOCK, ElementGroup.INLINE,
761                  ElementGroup.PARAM_ELEMENT
762              ), scopeBits(
763                  CloseTagScope.COMMON, CloseTagScope.BUTTON,
764                  CloseTagScope.LIST_ITEM
765              ));
766          defineElement(
767              "ol", false, elementGroupBits(
768                  ElementGroup.BLOCK
769              ), elementGroupBits(
770                  ElementGroup.LI_ELEMENT
771              ),
772              LI,
773              scopeBits(CloseTagScope.LIST_ITEM));
774          defineElement(
775              "optgroup", false, elementGroupBits(
776                  ElementGroup.OPTIONS_ELEMENT
777              ), elementGroupBits(
778                  ElementGroup.OPTIONS_ELEMENT
779              ));
780          defineElement(
781              "option", false, elementGroupBits(
782                  ElementGroup.OPTIONS_ELEMENT, ElementGroup.OPTION_ELEMENT
783              ), elementGroupBits(
784                  ElementGroup.CHARACTER_DATA
785              ));
786          defineElement(
787              "p", false, elementGroupBits(
788                  ElementGroup.BLOCK, ElementGroup.P_ELEMENT
789              ), elementGroupBits(
790                  ElementGroup.INLINE, ElementGroup.TABLE_ELEMENT
791              ));
792          defineElement(
793              "param", false, elementGroupBits(ElementGroup.PARAM_ELEMENT), 0);
794          defineElement(
795              "pre", false, elementGroupBits(
796                  ElementGroup.BLOCK
797              ), elementGroupBits(
798                  ElementGroup.INLINE
799              ));
800          defineElement(
801              "q", true, elementGroupBits(
802                  ElementGroup.INLINE, ElementGroup.INLINE_MINUS_A
803              ), elementGroupBits(
804                  ElementGroup.INLINE
805              ));
806          defineElement(
807              "s", true, elementGroupBits(
808                  ElementGroup.INLINE, ElementGroup.INLINE_MINUS_A
809              ), elementGroupBits(
810                  ElementGroup.INLINE
811              ));
812          defineElement(
813              "samp", true, elementGroupBits(
814                  ElementGroup.INLINE, ElementGroup.INLINE_MINUS_A
815              ), elementGroupBits(
816                  ElementGroup.INLINE
817              ));
818          defineElement(
819              "script", false, elementGroupBits(
820                  ElementGroup.BLOCK, ElementGroup.INLINE,
821                  ElementGroup.INLINE_MINUS_A, ElementGroup.MIXED,
822                  ElementGroup.TABLE_CONTENT, ElementGroup.HEAD_CONTENT,
823                  ElementGroup.TOP_CONTENT, ElementGroup.AREA_ELEMENT,
824                  ElementGroup.FORM_ELEMENT, ElementGroup.LEGEND_ELEMENT,
825                  ElementGroup.LI_ELEMENT, ElementGroup.DL_PART,
826                  ElementGroup.P_ELEMENT, ElementGroup.OPTIONS_ELEMENT,
827                  ElementGroup.OPTION_ELEMENT, ElementGroup.PARAM_ELEMENT,
828                  ElementGroup.TABLE_ELEMENT, ElementGroup.TR_ELEMENT,
829                  ElementGroup.TD_ELEMENT, ElementGroup.COL_ELEMENT
830              ), elementGroupBits(
831                  ElementGroup.CHARACTER_DATA));
832          defineElement(
833              "select", false, elementGroupBits(
834                  ElementGroup.INLINE
835              ), elementGroupBits(
836                  ElementGroup.OPTIONS_ELEMENT
837              ));
838          defineElement(
839              "small", true, elementGroupBits(
840                  ElementGroup.INLINE, ElementGroup.INLINE_MINUS_A
841              ), elementGroupBits(
842                  ElementGroup.INLINE
843              ));
844          defineElement(
845              "span", false, elementGroupBits(
846                  ElementGroup.INLINE, ElementGroup.INLINE_MINUS_A
847              ), elementGroupBits(
848                  ElementGroup.INLINE
849              ));
850          defineElement(
851              "strike", true, elementGroupBits(
852                  ElementGroup.INLINE, ElementGroup.INLINE_MINUS_A
853              ), elementGroupBits(
854                  ElementGroup.INLINE
855              ));
856          defineElement(
857              "strong", true, elementGroupBits(
858                  ElementGroup.INLINE, ElementGroup.INLINE_MINUS_A
859              ), elementGroupBits(
860                  ElementGroup.INLINE
861              ));
862          defineElement(
863              "style", false, elementGroupBits(
864                  ElementGroup.INLINE, ElementGroup.HEAD_CONTENT
865              ), elementGroupBits(
866                  ElementGroup.CHARACTER_DATA
867              ));
868          defineElement(
869              "sub", true, elementGroupBits(
870                  ElementGroup.INLINE, ElementGroup.INLINE_MINUS_A
871              ), elementGroupBits(
872                  ElementGroup.INLINE
873              ));
874          defineElement(
875              "sup", true, elementGroupBits(
876                  ElementGroup.INLINE, ElementGroup.INLINE_MINUS_A
877              ), elementGroupBits(
878                  ElementGroup.INLINE
879              ));
880          defineElement(
881              "table", false, elementGroupBits(
882                  ElementGroup.BLOCK, ElementGroup.TABLE_ELEMENT
883              ), elementGroupBits(
884                  ElementGroup.TABLE_CONTENT, ElementGroup.FORM_ELEMENT
885              ), CloseTagScope.ALL);
886          defineElement(
887              "tbody", false, elementGroupBits(
888                  ElementGroup.TABLE_CONTENT
889              ), elementGroupBits(
890                  ElementGroup.TR_ELEMENT
891              ));
892          ElementContainmentInfo TD = defineElement(
893              "td", false, elementGroupBits(
894                  ElementGroup.TD_ELEMENT
895              ), elementGroupBits(
896                  ElementGroup.BLOCK, ElementGroup.INLINE
897              ), scopeBits(
898                  CloseTagScope.COMMON, CloseTagScope.BUTTON,
899                  CloseTagScope.LIST_ITEM
900              ));
901          defineElement(
902              "textarea", false,
903              // No, a textarea cannot be inside a link.
904              elementGroupBits(ElementGroup.INLINE),
905              elementGroupBits(ElementGroup.CHARACTER_DATA));
906          defineElement(
907              "tfoot", false, elementGroupBits(
908                  ElementGroup.TABLE_CONTENT
909              ), elementGroupBits(
910                  ElementGroup.FORM_ELEMENT, ElementGroup.TR_ELEMENT,
911                  ElementGroup.TD_ELEMENT
912              ));
913          defineElement(
914              "th", false, elementGroupBits(
915                  ElementGroup.TD_ELEMENT
916              ), elementGroupBits(
917                  ElementGroup.BLOCK, ElementGroup.INLINE
918              ), scopeBits(
919                  CloseTagScope.COMMON, CloseTagScope.BUTTON,
920                  CloseTagScope.LIST_ITEM
921              ));
922          defineElement(
923              "thead", false, elementGroupBits(
924                  ElementGroup.TABLE_CONTENT
925              ), elementGroupBits(
926                  ElementGroup.FORM_ELEMENT, ElementGroup.TR_ELEMENT,
927                  ElementGroup.TD_ELEMENT
928              ));
929          defineElement(
930              "title", false, elementGroupBits(ElementGroup.HEAD_CONTENT),
931              elementGroupBits(ElementGroup.CHARACTER_DATA));
932          defineElement(
933              "tr", false, elementGroupBits(
934                  ElementGroup.TABLE_CONTENT, ElementGroup.TR_ELEMENT
935              ), elementGroupBits(
936                  ElementGroup.FORM_ELEMENT, ElementGroup.TD_ELEMENT
937              ),
938              TD);
939          defineElement(
940              "tt", true, elementGroupBits(
941                  ElementGroup.INLINE, ElementGroup.INLINE_MINUS_A
942              ), elementGroupBits(
943                  ElementGroup.INLINE
944              ));
945          defineElement(
946              "u", true, elementGroupBits(
947                  ElementGroup.INLINE, ElementGroup.INLINE_MINUS_A
948              ), elementGroupBits(
949                  ElementGroup.INLINE
950              ));
951          defineElement(
952              "ul", false, elementGroupBits(
953                  ElementGroup.BLOCK
954              ), elementGroupBits(
955                  ElementGroup.LI_ELEMENT
956              ),
957              LI,
958              scopeBits(CloseTagScope.LIST_ITEM));
959          defineElement(
960              "var", false, elementGroupBits(
961                  ElementGroup.INLINE, ElementGroup.INLINE_MINUS_A
962              ), elementGroupBits(
963                  ElementGroup.INLINE
964              ));
965          defineElement(
966              "video", false, elementGroupBits(
967                  ElementGroup.INLINE, ElementGroup.INLINE_MINUS_A
968              ), 0);
969          defineElement(
970              "wbr", false, elementGroupBits(
971                  ElementGroup.INLINE, ElementGroup.INLINE_MINUS_A
972              ), 0);
973          defineElement(
974              "xmp", false, elementGroupBits(
975                  ElementGroup.BLOCK
976              ), elementGroupBits(
977                  ElementGroup.INLINE
978              ));
979    
980        }
981    
982        private static final ElementContainmentInfo CHARACTER_DATA_ONLY
983            = new ElementContainmentInfo(
984                "#text", false,
985                elementGroupBits(
986                    ElementGroup.INLINE, ElementGroup.INLINE_MINUS_A,
987                    ElementGroup.BLOCK, ElementGroup.CHARACTER_DATA),
988                0, null, 0);
989      }
990    
991      static boolean allowsPlainTextualContent(String canonElementName) {
992        ElementContainmentInfo info =
993           ELEMENT_CONTAINMENT_RELATIONSHIPS.get(canonElementName);
994        if (info == null
995            || ((info.contents
996                 & ElementContainmentRelationships.CHARACTER_DATA_ONLY.types)
997                != 0)) {
998          switch (HtmlTextEscapingMode.getModeForTag(canonElementName)) {
999            case PCDATA:     return true;
1000            case RCDATA:     return true;
1001            case PLAIN_TEXT: return true;
1002            case VOID:       return false;
1003            case CDATA:
1004            case CDATA_SOMETIMES:
1005              return "xmp".equals(canonElementName)
1006                  || "listing".equals(canonElementName);
1007          }
1008        }
1009        return false;
1010      }
1011    }