blob: 6a2a896bd5f796a5dfd75889d8e0ff59e4327051 [file] [log] [blame]
Daniel Veillardbe70ff71999-07-05 16:50:46 +00001/*
2 * testHTML.c : a small tester program for HTML input.
3 *
4 * See Copyright for the status of this software.
5 *
6 * Daniel.Veillard@w3.org
7 */
8
9#ifdef WIN32
Daniel Veillard3c558c31999-12-22 11:30:41 +000010#include "win32config.h"
Daniel Veillardbe70ff71999-07-05 16:50:46 +000011#else
Daniel Veillard7f7d1111999-09-22 09:46:25 +000012#include "config.h"
Daniel Veillardbe70ff71999-07-05 16:50:46 +000013#endif
Daniel Veillard7f7d1111999-09-22 09:46:25 +000014
Daniel Veillardb71379b2000-10-09 12:30:39 +000015#include <libxml/xmlversion.h>
Daniel Veillard361d8452000-04-03 19:48:13 +000016#ifdef LIBXML_HTML_ENABLED
17
Daniel Veillard7f7d1111999-09-22 09:46:25 +000018#include <stdio.h>
19#include <string.h>
Daniel Veillard7c1206f1999-10-14 09:10:25 +000020#include <stdarg.h>
21
Daniel Veillard7f7d1111999-09-22 09:46:25 +000022
23#ifdef HAVE_SYS_TYPES_H
Daniel Veillardbe70ff71999-07-05 16:50:46 +000024#include <sys/types.h>
Daniel Veillard7f7d1111999-09-22 09:46:25 +000025#endif
Daniel Veillardbe70ff71999-07-05 16:50:46 +000026#ifdef HAVE_SYS_STAT_H
27#include <sys/stat.h>
28#endif
29#ifdef HAVE_FCNTL_H
30#include <fcntl.h>
31#endif
32#ifdef HAVE_UNISTD_H
33#include <unistd.h>
34#endif
Daniel Veillard7f7d1111999-09-22 09:46:25 +000035#ifdef HAVE_STDLIB_H
Daniel Veillardbe70ff71999-07-05 16:50:46 +000036#include <stdlib.h>
Daniel Veillard7f7d1111999-09-22 09:46:25 +000037#endif
Daniel Veillardbe70ff71999-07-05 16:50:46 +000038
Daniel Veillard361d8452000-04-03 19:48:13 +000039#include <libxml/xmlmemory.h>
40#include <libxml/HTMLparser.h>
41#include <libxml/HTMLtree.h>
42#include <libxml/debugXML.h>
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +000043#include <libxml/xmlerror.h>
Daniel Veillardbe70ff71999-07-05 16:50:46 +000044
Daniel Veillard361d8452000-04-03 19:48:13 +000045#ifdef LIBXML_DEBUG_ENABLED
Daniel Veillardbe70ff71999-07-05 16:50:46 +000046static int debug = 0;
Daniel Veillard361d8452000-04-03 19:48:13 +000047#endif
Daniel Veillardbe70ff71999-07-05 16:50:46 +000048static int copy = 0;
Daniel Veillard7c1206f1999-10-14 09:10:25 +000049static int sax = 0;
50static int repeat = 0;
51static int noout = 0;
Daniel Veillard5e5c6231999-12-29 12:49:06 +000052static int push = 0;
Daniel Veillard32bc74e2000-07-14 14:49:25 +000053static char *encoding = NULL;
Daniel Veillardbe70ff71999-07-05 16:50:46 +000054
Daniel Veillard7c1206f1999-10-14 09:10:25 +000055xmlSAXHandler emptySAXHandlerStruct = {
56 NULL, /* internalSubset */
57 NULL, /* isStandalone */
58 NULL, /* hasInternalSubset */
59 NULL, /* hasExternalSubset */
60 NULL, /* resolveEntity */
61 NULL, /* getEntity */
62 NULL, /* entityDecl */
63 NULL, /* notationDecl */
64 NULL, /* attributeDecl */
65 NULL, /* elementDecl */
66 NULL, /* unparsedEntityDecl */
67 NULL, /* setDocumentLocator */
68 NULL, /* startDocument */
69 NULL, /* endDocument */
70 NULL, /* startElement */
71 NULL, /* endElement */
72 NULL, /* reference */
73 NULL, /* characters */
74 NULL, /* ignorableWhitespace */
75 NULL, /* processingInstruction */
76 NULL, /* comment */
77 NULL, /* xmlParserWarning */
78 NULL, /* xmlParserError */
79 NULL, /* xmlParserError */
80 NULL, /* getParameterEntity */
81};
82
83xmlSAXHandlerPtr emptySAXHandler = &emptySAXHandlerStruct;
84extern xmlSAXHandlerPtr debugSAXHandler;
85
86/************************************************************************
87 * *
88 * Debug Handlers *
89 * *
90 ************************************************************************/
91
92/**
93 * isStandaloneDebug:
94 * @ctxt: An XML parser context
95 *
96 * Is this document tagged standalone ?
97 *
98 * Returns 1 if true
99 */
100int
101isStandaloneDebug(void *ctx)
102{
103 fprintf(stdout, "SAX.isStandalone()\n");
104 return(0);
105}
106
107/**
108 * hasInternalSubsetDebug:
109 * @ctxt: An XML parser context
110 *
111 * Does this document has an internal subset
112 *
113 * Returns 1 if true
114 */
115int
116hasInternalSubsetDebug(void *ctx)
117{
118 fprintf(stdout, "SAX.hasInternalSubset()\n");
119 return(0);
120}
121
122/**
123 * hasExternalSubsetDebug:
124 * @ctxt: An XML parser context
125 *
126 * Does this document has an external subset
127 *
128 * Returns 1 if true
129 */
130int
131hasExternalSubsetDebug(void *ctx)
132{
133 fprintf(stdout, "SAX.hasExternalSubset()\n");
134 return(0);
135}
136
137/**
138 * hasInternalSubsetDebug:
139 * @ctxt: An XML parser context
140 *
141 * Does this document has an internal subset
142 */
143void
144internalSubsetDebug(void *ctx, const xmlChar *name,
145 const xmlChar *ExternalID, const xmlChar *SystemID)
146{
Daniel Veillard808a3f12000-08-17 13:50:51 +0000147 fprintf(stdout, "SAX.internalSubset(%s,", name);
148 if (ExternalID == NULL)
149 fprintf(stdout, " ,");
150 else
151 fprintf(stdout, " %s,", ExternalID);
152 if (SystemID == NULL)
153 fprintf(stdout, " )\n");
154 else
155 fprintf(stdout, " %s)\n", SystemID);
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000156}
157
158/**
159 * resolveEntityDebug:
160 * @ctxt: An XML parser context
161 * @publicId: The public ID of the entity
162 * @systemId: The system ID of the entity
163 *
164 * Special entity resolver, better left to the parser, it has
165 * more context than the application layer.
166 * The default behaviour is to NOT resolve the entities, in that case
167 * the ENTITY_REF nodes are built in the structure (and the parameter
168 * values).
169 *
170 * Returns the xmlParserInputPtr if inlined or NULL for DOM behaviour.
171 */
172xmlParserInputPtr
173resolveEntityDebug(void *ctx, const xmlChar *publicId, const xmlChar *systemId)
174{
175 /* xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) ctx; */
176
177
178 fprintf(stdout, "SAX.resolveEntity(");
179 if (publicId != NULL)
180 fprintf(stdout, "%s", (char *)publicId);
181 else
182 fprintf(stdout, " ");
183 if (systemId != NULL)
184 fprintf(stdout, ", %s)\n", (char *)systemId);
185 else
186 fprintf(stdout, ", )\n");
187/*********
188 if (systemId != NULL) {
189 return(xmlNewInputFromFile(ctxt, (char *) systemId));
190 }
191 *********/
192 return(NULL);
193}
194
195/**
196 * getEntityDebug:
197 * @ctxt: An XML parser context
198 * @name: The entity name
199 *
200 * Get an entity by name
201 *
202 * Returns the xmlParserInputPtr if inlined or NULL for DOM behaviour.
203 */
204xmlEntityPtr
205getEntityDebug(void *ctx, const xmlChar *name)
206{
207 fprintf(stdout, "SAX.getEntity(%s)\n", name);
208 return(NULL);
209}
210
211/**
212 * getParameterEntityDebug:
213 * @ctxt: An XML parser context
214 * @name: The entity name
215 *
216 * Get a parameter entity by name
217 *
218 * Returns the xmlParserInputPtr
219 */
220xmlEntityPtr
221getParameterEntityDebug(void *ctx, const xmlChar *name)
222{
223 fprintf(stdout, "SAX.getParameterEntity(%s)\n", name);
224 return(NULL);
225}
226
227
228/**
229 * entityDeclDebug:
230 * @ctxt: An XML parser context
231 * @name: the entity name
232 * @type: the entity type
233 * @publicId: The public ID of the entity
234 * @systemId: The system ID of the entity
235 * @content: the entity value (without processing).
236 *
237 * An entity definition has been parsed
238 */
239void
240entityDeclDebug(void *ctx, const xmlChar *name, int type,
241 const xmlChar *publicId, const xmlChar *systemId, xmlChar *content)
242{
243 fprintf(stdout, "SAX.entityDecl(%s, %d, %s, %s, %s)\n",
244 name, type, publicId, systemId, content);
245}
246
247/**
248 * attributeDeclDebug:
249 * @ctxt: An XML parser context
250 * @name: the attribute name
251 * @type: the attribute type
252 *
253 * An attribute definition has been parsed
254 */
255void
256attributeDeclDebug(void *ctx, const xmlChar *elem, const xmlChar *name,
257 int type, int def, const xmlChar *defaultValue,
258 xmlEnumerationPtr tree)
259{
260 fprintf(stdout, "SAX.attributeDecl(%s, %s, %d, %d, %s, ...)\n",
261 elem, name, type, def, defaultValue);
262}
263
264/**
265 * elementDeclDebug:
266 * @ctxt: An XML parser context
267 * @name: the element name
268 * @type: the element type
269 * @content: the element value (without processing).
270 *
271 * An element definition has been parsed
272 */
273void
274elementDeclDebug(void *ctx, const xmlChar *name, int type,
275 xmlElementContentPtr content)
276{
277 fprintf(stdout, "SAX.elementDecl(%s, %d, ...)\n",
278 name, type);
279}
280
281/**
282 * notationDeclDebug:
283 * @ctxt: An XML parser context
284 * @name: The name of the notation
285 * @publicId: The public ID of the entity
286 * @systemId: The system ID of the entity
287 *
288 * What to do when a notation declaration has been parsed.
289 */
290void
291notationDeclDebug(void *ctx, const xmlChar *name,
292 const xmlChar *publicId, const xmlChar *systemId)
293{
294 fprintf(stdout, "SAX.notationDecl(%s, %s, %s)\n",
295 (char *) name, (char *) publicId, (char *) systemId);
296}
297
298/**
299 * unparsedEntityDeclDebug:
300 * @ctxt: An XML parser context
301 * @name: The name of the entity
302 * @publicId: The public ID of the entity
303 * @systemId: The system ID of the entity
304 * @notationName: the name of the notation
305 *
306 * What to do when an unparsed entity declaration is parsed
307 */
308void
309unparsedEntityDeclDebug(void *ctx, const xmlChar *name,
310 const xmlChar *publicId, const xmlChar *systemId,
311 const xmlChar *notationName)
312{
313 fprintf(stdout, "SAX.unparsedEntityDecl(%s, %s, %s, %s)\n",
314 (char *) name, (char *) publicId, (char *) systemId,
315 (char *) notationName);
316}
317
318/**
319 * setDocumentLocatorDebug:
320 * @ctxt: An XML parser context
321 * @loc: A SAX Locator
322 *
323 * Receive the document locator at startup, actually xmlDefaultSAXLocator
324 * Everything is available on the context, so this is useless in our case.
325 */
326void
327setDocumentLocatorDebug(void *ctx, xmlSAXLocatorPtr loc)
328{
329 fprintf(stdout, "SAX.setDocumentLocator()\n");
330}
331
332/**
333 * startDocumentDebug:
334 * @ctxt: An XML parser context
335 *
336 * called when the document start being processed.
337 */
338void
339startDocumentDebug(void *ctx)
340{
341 fprintf(stdout, "SAX.startDocument()\n");
342}
343
344/**
345 * endDocumentDebug:
346 * @ctxt: An XML parser context
347 *
348 * called when the document end has been detected.
349 */
350void
351endDocumentDebug(void *ctx)
352{
353 fprintf(stdout, "SAX.endDocument()\n");
354}
355
356/**
357 * startElementDebug:
358 * @ctxt: An XML parser context
359 * @name: The element name
360 *
361 * called when an opening tag has been processed.
362 */
363void
364startElementDebug(void *ctx, const xmlChar *name, const xmlChar **atts)
365{
366 int i;
367
368 fprintf(stdout, "SAX.startElement(%s", (char *) name);
369 if (atts != NULL) {
370 for (i = 0;(atts[i] != NULL);i++) {
Daniel Veillard808a3f12000-08-17 13:50:51 +0000371 fprintf(stdout, ", %s", atts[i++]);
Daniel Veillarde010c172000-08-28 10:04:51 +0000372 if (atts[i] != NULL) {
373 unsigned char output[40];
374 const unsigned char *att = atts[i];
375 int outlen, attlen;
376 fprintf(stdout, "='");
377 while ((attlen = strlen((char*)att)) > 0) {
378 outlen = sizeof output - 1;
379 htmlEncodeEntities(output, &outlen, att, &attlen, '\'');
380 fprintf(stdout, "%.*s", outlen, output);
381 att += attlen;
382 }
383 fprintf(stdout, "'");
384 }
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000385 }
386 }
387 fprintf(stdout, ")\n");
388}
389
390/**
391 * endElementDebug:
392 * @ctxt: An XML parser context
393 * @name: The element name
394 *
395 * called when the end of an element has been detected.
396 */
397void
398endElementDebug(void *ctx, const xmlChar *name)
399{
400 fprintf(stdout, "SAX.endElement(%s)\n", (char *) name);
401}
402
403/**
404 * charactersDebug:
405 * @ctxt: An XML parser context
406 * @ch: a xmlChar string
407 * @len: the number of xmlChar
408 *
409 * receiving some chars from the parser.
410 * Question: how much at a time ???
411 */
412void
413charactersDebug(void *ctx, const xmlChar *ch, int len)
414{
Daniel Veillarde010c172000-08-28 10:04:51 +0000415 unsigned char output[40];
Daniel Veillard4948eb42000-08-29 09:41:15 +0000416 int inlen = len, outlen = 30;
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000417
Daniel Veillard4948eb42000-08-29 09:41:15 +0000418 htmlEncodeEntities(output, &outlen, ch, &inlen, 0);
Daniel Veillarde010c172000-08-28 10:04:51 +0000419 output[outlen] = 0;
Daniel Veillard87b95392000-08-12 21:12:04 +0000420
421 fprintf(stdout, "SAX.characters(%s, %d)\n", output, len);
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000422}
423
424/**
Daniel Veillard7eda8452000-10-14 23:38:43 +0000425 * cdataDebug:
426 * @ctxt: An XML parser context
427 * @ch: a xmlChar string
428 * @len: the number of xmlChar
429 *
430 * receiving some cdata chars from the parser.
431 * Question: how much at a time ???
432 */
433void
434cdataDebug(void *ctx, const xmlChar *ch, int len)
435{
436 unsigned char output[40];
437 int inlen = len, outlen = 30;
438
439 htmlEncodeEntities(output, &outlen, ch, &inlen, 0);
440 output[outlen] = 0;
441
442 fprintf(stdout, "SAX.cdata(%s, %d)\n", output, len);
443}
444
445/**
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000446 * referenceDebug:
447 * @ctxt: An XML parser context
448 * @name: The entity name
449 *
450 * called when an entity reference is detected.
451 */
452void
453referenceDebug(void *ctx, const xmlChar *name)
454{
455 fprintf(stdout, "SAX.reference(%s)\n", name);
456}
457
458/**
459 * ignorableWhitespaceDebug:
460 * @ctxt: An XML parser context
461 * @ch: a xmlChar string
462 * @start: the first char in the string
463 * @len: the number of xmlChar
464 *
465 * receiving some ignorable whitespaces from the parser.
466 * Question: how much at a time ???
467 */
468void
469ignorableWhitespaceDebug(void *ctx, const xmlChar *ch, int len)
470{
Daniel Veillard87b95392000-08-12 21:12:04 +0000471 char output[40];
472 int i;
473
474 for (i = 0;(i<len) && (i < 30);i++)
475 output[i] = ch[i];
476 output[i] = 0;
477
478 fprintf(stdout, "SAX.ignorableWhitespace(%s, %d)\n", output, len);
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000479}
480
481/**
482 * processingInstructionDebug:
483 * @ctxt: An XML parser context
484 * @target: the target name
485 * @data: the PI data's
486 * @len: the number of xmlChar
487 *
488 * A processing instruction has been parsed.
489 */
490void
491processingInstructionDebug(void *ctx, const xmlChar *target,
492 const xmlChar *data)
493{
494 fprintf(stdout, "SAX.processingInstruction(%s, %s)\n",
495 (char *) target, (char *) data);
496}
497
498/**
499 * commentDebug:
500 * @ctxt: An XML parser context
501 * @value: the comment content
502 *
503 * A comment has been parsed.
504 */
505void
506commentDebug(void *ctx, const xmlChar *value)
507{
508 fprintf(stdout, "SAX.comment(%s)\n", value);
509}
510
511/**
512 * warningDebug:
513 * @ctxt: An XML parser context
514 * @msg: the message to display/transmit
515 * @...: extra parameters for the message display
516 *
517 * Display and format a warning messages, gives file, line, position and
518 * extra parameters.
519 */
520void
521warningDebug(void *ctx, const char *msg, ...)
522{
523 va_list args;
524
525 va_start(args, msg);
526 fprintf(stdout, "SAX.warning: ");
527 vfprintf(stdout, msg, args);
528 va_end(args);
529}
530
531/**
532 * errorDebug:
533 * @ctxt: An XML parser context
534 * @msg: the message to display/transmit
535 * @...: extra parameters for the message display
536 *
537 * Display and format a error messages, gives file, line, position and
538 * extra parameters.
539 */
540void
541errorDebug(void *ctx, const char *msg, ...)
542{
543 va_list args;
544
545 va_start(args, msg);
546 fprintf(stdout, "SAX.error: ");
547 vfprintf(stdout, msg, args);
548 va_end(args);
549}
550
551/**
552 * fatalErrorDebug:
553 * @ctxt: An XML parser context
554 * @msg: the message to display/transmit
555 * @...: extra parameters for the message display
556 *
557 * Display and format a fatalError messages, gives file, line, position and
558 * extra parameters.
559 */
560void
561fatalErrorDebug(void *ctx, const char *msg, ...)
562{
563 va_list args;
564
565 va_start(args, msg);
566 fprintf(stdout, "SAX.fatalError: ");
567 vfprintf(stdout, msg, args);
568 va_end(args);
569}
570
571xmlSAXHandler debugSAXHandlerStruct = {
572 internalSubsetDebug,
573 isStandaloneDebug,
574 hasInternalSubsetDebug,
575 hasExternalSubsetDebug,
576 resolveEntityDebug,
577 getEntityDebug,
578 entityDeclDebug,
579 notationDeclDebug,
580 attributeDeclDebug,
581 elementDeclDebug,
582 unparsedEntityDeclDebug,
583 setDocumentLocatorDebug,
584 startDocumentDebug,
585 endDocumentDebug,
586 startElementDebug,
587 endElementDebug,
588 referenceDebug,
589 charactersDebug,
590 ignorableWhitespaceDebug,
591 processingInstructionDebug,
592 commentDebug,
593 warningDebug,
594 errorDebug,
595 fatalErrorDebug,
596 getParameterEntityDebug,
Daniel Veillard7eda8452000-10-14 23:38:43 +0000597 cdataDebug,
598 NULL
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000599};
600
601xmlSAXHandlerPtr debugSAXHandler = &debugSAXHandlerStruct;
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000602/************************************************************************
603 * *
604 * Debug *
605 * *
606 ************************************************************************/
607
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000608void parseSAXFile(char *filename) {
Daniel Veillardf0cc7cc2000-08-26 21:40:43 +0000609 htmlDocPtr doc = NULL;
610
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000611 /*
612 * Empty callbacks for checking
613 */
Daniel Veillard87b95392000-08-12 21:12:04 +0000614 if (push) {
615 FILE *f;
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000616
Daniel Veillard87b95392000-08-12 21:12:04 +0000617 f = fopen(filename, "r");
618 if (f != NULL) {
619 int res, size = 3;
620 char chars[4096];
621 htmlParserCtxtPtr ctxt;
622
623 /* if (repeat) */
624 size = 4096;
625 res = fread(chars, 1, 4, f);
626 if (res > 0) {
627 ctxt = htmlCreatePushParserCtxt(emptySAXHandler, NULL,
628 chars, res, filename, 0);
629 while ((res = fread(chars, 1, size, f)) > 0) {
630 htmlParseChunk(ctxt, chars, res, 0);
631 }
632 htmlParseChunk(ctxt, chars, 0, 1);
633 doc = ctxt->myDoc;
634 htmlFreeParserCtxt(ctxt);
635 }
636 if (doc != NULL) {
637 fprintf(stdout, "htmlSAXParseFile returned non-NULL\n");
638 xmlFreeDoc(doc);
639 }
640 fclose(f);
641 }
642 if (!noout) {
643 f = fopen(filename, "r");
644 if (f != NULL) {
645 int res, size = 3;
646 char chars[4096];
647 htmlParserCtxtPtr ctxt;
648
649 /* if (repeat) */
650 size = 4096;
651 res = fread(chars, 1, 4, f);
652 if (res > 0) {
653 ctxt = htmlCreatePushParserCtxt(debugSAXHandler, NULL,
654 chars, res, filename, 0);
655 while ((res = fread(chars, 1, size, f)) > 0) {
656 htmlParseChunk(ctxt, chars, res, 0);
657 }
658 htmlParseChunk(ctxt, chars, 0, 1);
659 doc = ctxt->myDoc;
660 htmlFreeParserCtxt(ctxt);
661 }
662 if (doc != NULL) {
663 fprintf(stdout, "htmlSAXParseFile returned non-NULL\n");
664 xmlFreeDoc(doc);
665 }
666 fclose(f);
667 }
668 }
669 } else {
670 doc = htmlSAXParseFile(filename, NULL, emptySAXHandler, NULL);
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000671 if (doc != NULL) {
672 fprintf(stdout, "htmlSAXParseFile returned non-NULL\n");
673 xmlFreeDoc(doc);
674 }
Daniel Veillard87b95392000-08-12 21:12:04 +0000675
676 if (!noout) {
677 /*
678 * Debug callback
679 */
680 doc = htmlSAXParseFile(filename, NULL, debugSAXHandler, NULL);
681 if (doc != NULL) {
682 fprintf(stdout, "htmlSAXParseFile returned non-NULL\n");
683 xmlFreeDoc(doc);
684 }
685 }
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000686 }
687}
688
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000689void parseAndPrintFile(char *filename) {
Daniel Veillard2eac5032000-01-09 21:08:56 +0000690 htmlDocPtr doc = NULL, tmp;
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000691
692 /*
693 * build an HTML tree from a string;
694 */
Daniel Veillard5e5c6231999-12-29 12:49:06 +0000695 if (push) {
696 FILE *f;
697
698 f = fopen(filename, "r");
699 if (f != NULL) {
700 int res, size = 3;
Daniel Veillard87b95392000-08-12 21:12:04 +0000701 char chars[4096];
Daniel Veillard5e5c6231999-12-29 12:49:06 +0000702 htmlParserCtxtPtr ctxt;
703
Daniel Veillard87b95392000-08-12 21:12:04 +0000704 /* if (repeat) */
705 size = 4096;
Daniel Veillard5e5c6231999-12-29 12:49:06 +0000706 res = fread(chars, 1, 4, f);
707 if (res > 0) {
708 ctxt = htmlCreatePushParserCtxt(NULL, NULL,
709 chars, res, filename, 0);
710 while ((res = fread(chars, 1, size, f)) > 0) {
711 htmlParseChunk(ctxt, chars, res, 0);
712 }
713 htmlParseChunk(ctxt, chars, 0, 1);
714 doc = ctxt->myDoc;
715 htmlFreeParserCtxt(ctxt);
716 }
Daniel Veillard87b95392000-08-12 21:12:04 +0000717 fclose(f);
Daniel Veillard5e5c6231999-12-29 12:49:06 +0000718 }
719 } else {
720 doc = htmlParseFile(filename, NULL);
721 }
722 if (doc == NULL) {
Daniel Veillardd6d7f7b2000-10-25 19:56:55 +0000723 xmlGenericError(xmlGenericErrorContext,
724 "Could not parse %s\n", filename);
Daniel Veillard5e5c6231999-12-29 12:49:06 +0000725 }
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000726
727 /*
728 * test intermediate copy if needed.
729 */
730 if (copy) {
731 tmp = doc;
732 doc = xmlCopyDoc(doc, 1);
733 xmlFreeDoc(tmp);
734 }
735
736 /*
737 * print it.
738 */
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000739 if (!noout) {
Daniel Veillard361d8452000-04-03 19:48:13 +0000740#ifdef LIBXML_DEBUG_ENABLED
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000741 if (!debug) {
742 if (encoding)
743 htmlSaveFileEnc("-", doc, encoding);
744 else
745 htmlDocDump(stdout, doc);
746 } else
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000747 xmlDebugDumpDocument(stdout, doc);
Daniel Veillard361d8452000-04-03 19:48:13 +0000748#else
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000749 if (encoding)
750 htmlSaveFileEnc("-", doc, encoding);
751 else
752 htmlDocDump(stdout, doc);
Daniel Veillard361d8452000-04-03 19:48:13 +0000753#endif
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000754 }
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000755
756 /*
757 * free it.
758 */
759 xmlFreeDoc(doc);
760}
761
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000762int main(int argc, char **argv) {
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000763 int i, count;
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000764 int files = 0;
765
766 for (i = 1; i < argc ; i++) {
Daniel Veillard361d8452000-04-03 19:48:13 +0000767#ifdef LIBXML_DEBUG_ENABLED
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000768 if ((!strcmp(argv[i], "-debug")) || (!strcmp(argv[i], "--debug")))
769 debug++;
Daniel Veillard361d8452000-04-03 19:48:13 +0000770 else
771#endif
772 if ((!strcmp(argv[i], "-copy")) || (!strcmp(argv[i], "--copy")))
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000773 copy++;
Daniel Veillard5e5c6231999-12-29 12:49:06 +0000774 else if ((!strcmp(argv[i], "-push")) || (!strcmp(argv[i], "--push")))
775 push++;
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000776 else if ((!strcmp(argv[i], "-sax")) || (!strcmp(argv[i], "--sax")))
777 sax++;
778 else if ((!strcmp(argv[i], "-noout")) || (!strcmp(argv[i], "--noout")))
779 noout++;
780 else if ((!strcmp(argv[i], "-repeat")) ||
781 (!strcmp(argv[i], "--repeat")))
782 repeat++;
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000783 else if ((!strcmp(argv[i], "-encode")) ||
784 (!strcmp(argv[i], "--encode"))) {
785 i++;
786 encoding = argv[i];
787 }
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000788 }
789 for (i = 1; i < argc ; i++) {
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000790 if ((!strcmp(argv[i], "-encode")) ||
791 (!strcmp(argv[i], "--encode"))) {
792 i++;
793 continue;
794 }
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000795 if (argv[i][0] != '-') {
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000796 if (repeat) {
797 for (count = 0;count < 100 * repeat;count++) {
798 if (sax)
799 parseSAXFile(argv[i]);
800 else
801 parseAndPrintFile(argv[i]);
802 }
803 } else {
804 if (sax)
805 parseSAXFile(argv[i]);
806 else
807 parseAndPrintFile(argv[i]);
808 }
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000809 files ++;
810 }
811 }
812 if (files == 0) {
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000813 printf("Usage : %s [--debug] [--copy] [--copy] HTMLfiles ...\n",
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000814 argv[0]);
815 printf("\tParse the HTML files and output the result of the parsing\n");
Daniel Veillard361d8452000-04-03 19:48:13 +0000816#ifdef LIBXML_DEBUG_ENABLED
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000817 printf("\t--debug : dump a debug tree of the in-memory document\n");
Daniel Veillard361d8452000-04-03 19:48:13 +0000818#endif
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000819 printf("\t--copy : used to test the internal copy implementation\n");
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000820 printf("\t--sax : debug the sequence of SAX callbacks\n");
Daniel Veillard5e5c6231999-12-29 12:49:06 +0000821 printf("\t--repeat : parse the file 100 times, for timing\n");
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000822 printf("\t--noout : do not print the result\n");
Daniel Veillard5e5c6231999-12-29 12:49:06 +0000823 printf("\t--push : use the push mode parser\n");
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000824 printf("\t--encode encoding : output in the given encoding\n");
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000825 }
Daniel Veillardf5c2c871999-12-01 09:51:45 +0000826 xmlCleanupParser();
Daniel Veillard7c1206f1999-10-14 09:10:25 +0000827 xmlMemoryDump();
Daniel Veillardbe70ff71999-07-05 16:50:46 +0000828
829 return(0);
830}
Daniel Veillard361d8452000-04-03 19:48:13 +0000831#else /* !LIBXML_HTML_ENABLED */
832#include <stdio.h>
833int main(int argc, char **argv) {
834 printf("%s : HTML support not compiled in\n", argv[0]);
835 return(0);
836}
837#endif