xref: /aosp_15_r20/external/cronet/third_party/libxml/src/testchar.c (revision 6777b5387eb2ff775bb5750e3f5d96f37fb7352b)
1 /**
2  * Test the UTF-8 decoding routines
3  *
4  * author: Daniel Veillard
5  * copy: see Copyright for the status of this software.
6  */
7 
8 #define XML_DEPRECATED
9 
10 #include <stdio.h>
11 #include <string.h>
12 #include <libxml/tree.h>
13 #include <libxml/parser.h>
14 #include <libxml/parserInternals.h>
15 
16 int lastError;
17 
errorHandler(void * unused,const xmlError * err)18 static void errorHandler(void *unused, const xmlError *err) {
19     if ((unused == NULL) && (err != NULL) && (lastError == 0)) {
20         lastError = err->code;
21     }
22 }
23 
24 char document1[100] = "<doc>XXXX</doc>";
25 char document2[100] = "<doc foo='XXXX'/>";
26 
testDocumentRangeByte1(xmlParserCtxtPtr ctxt,char * document,int len,char * data,int forbid1,int forbid2)27 static int testDocumentRangeByte1(xmlParserCtxtPtr ctxt, char *document,
28                   int len,  char *data, int forbid1, int forbid2) {
29     int i;
30     xmlDocPtr res;
31 
32     for (i = 0;i <= 0xFF;i++) {
33 	lastError = 0;
34 	xmlCtxtReset(ctxt);
35 
36         data[0] = (char) i;
37 
38 	res = xmlReadMemory(document, len, "test", NULL, 0);
39 
40 	if ((i == forbid1) || (i == forbid2)) {
41 	    if ((lastError == 0) || (res != NULL)) {
42 	        fprintf(stderr,
43 		    "Failed to detect invalid char for Byte 0x%02X: %c\n",
44 		        i, i);
45 		return(1);
46 	    }
47 	}
48 
49 	else if ((i == '<') || (i == '&')) {
50 	    if ((lastError == 0) || (res != NULL)) {
51 	        fprintf(stderr,
52 		    "Failed to detect illegal char %c for Byte 0x%02X\n", i, i);
53 		return(1);
54 	    }
55 	}
56 	else if (((i < 0x20) || (i >= 0x80)) &&
57 	    (i != 0x9) && (i != 0xA) && (i != 0xD)) {
58 	    if ((lastError != XML_ERR_INVALID_CHAR) && (res != NULL)) {
59 	        fprintf(stderr,
60 		    "Failed to detect invalid char for Byte 0x%02X\n", i);
61 		return(1);
62 	    }
63 	}
64 	else if (res == NULL) {
65 	    fprintf(stderr,
66 		"Failed to parse valid char for Byte 0x%02X : %c\n", i, i);
67 		return(1);
68 	}
69 	if (res != NULL)
70 	    xmlFreeDoc(res);
71     }
72     return(0);
73 }
74 
testDocumentRangeByte2(xmlParserCtxtPtr ctxt,char * document,int len,char * data)75 static int testDocumentRangeByte2(xmlParserCtxtPtr ctxt, char *document,
76                   int len,  char *data) {
77     int i, j;
78     xmlDocPtr res;
79 
80     for (i = 0x80;i <= 0xFF;i++) {
81     for (j = 0;j <= 0xFF;j++) {
82 	lastError = 0;
83 	xmlCtxtReset(ctxt);
84 
85         data[0] = (char) i;
86         data[1] = (char) j;
87 
88 	res = xmlReadMemory(document, len, "test", NULL, 0);
89 
90 	/* if first bit of first char is set, then second bit must too */
91 	if ((i & 0x80) && ((i & 0x40) == 0)) {
92 	    if ((lastError == 0) || (res != NULL)) {
93 		fprintf(stderr,
94 		"Failed to detect invalid char for Bytes 0x%02X 0x%02X\n",
95 			i, j);
96 		return(1);
97 	    }
98 	}
99 
100 	/*
101 	 * if first bit of first char is set, then second char first
102 	 * bits must be 10
103 	 */
104 	else if ((i & 0x80) && ((j & 0xC0) != 0x80)) {
105 	    if ((lastError == 0) || (res != NULL)) {
106 		fprintf(stderr,
107 	    "Failed to detect invalid char for Bytes 0x%02X 0x%02X\n",
108 			i, j);
109 		return(1);
110 	    }
111 	}
112 
113 	/*
114 	 * if using a 2 byte encoding then the value must be greater
115 	 * than 0x80, i.e. one of bits 5 to 1 of i must be set
116 	 */
117 	else if ((i & 0x80) && ((i & 0x1E) == 0)) {
118 	    if ((lastError == 0) || (res != NULL)) {
119 		fprintf(stderr,
120 	    "Failed to detect invalid char for Bytes 0x%02X 0x%02X\n",
121 			i, j);
122 		return(1);
123 	    }
124 	}
125 
126 	/*
127 	 * if third bit of first char is set, then the sequence would need
128 	 * at least 3 bytes, but we give only 2 !
129 	 */
130 	else if ((i & 0xE0) == 0xE0) {
131 	    if ((lastError == 0) || (res != NULL)) {
132 		fprintf(stderr,
133 	    "Failed to detect invalid char for Bytes 0x%02X 0x%02X 0x00\n",
134 			i, j);
135 		return(1);
136 	    }
137 	}
138 
139 	/*
140 	 * We should see no error in remaining cases
141 	 */
142 	else if ((lastError != 0) || (res == NULL)) {
143 	    fprintf(stderr,
144 		"Failed to parse document for Bytes 0x%02X 0x%02X\n", i, j);
145 	    return(1);
146 	}
147 	if (res != NULL)
148 	    xmlFreeDoc(res);
149     }
150     }
151     return(0);
152 }
153 
154 /**
155  * testDocumentRanges:
156  *
157  * Test the correct UTF8 character parsing in context of XML documents
158  * Those are in-context injection tests checking the parser behaviour on
159  * edge case values at different point in content, beginning and end of
160  * CDATA in text or in attribute values.
161  */
162 
testDocumentRanges(void)163 static int testDocumentRanges(void) {
164     xmlParserCtxtPtr ctxt;
165     char *data;
166     int test_ret = 0;
167 
168     /*
169      * Set up a parsing context using the first document as
170      * the current input source.
171      */
172     ctxt = xmlNewParserCtxt();
173     if (ctxt == NULL) {
174         fprintf(stderr, "Failed to allocate parser context\n");
175 	return(1);
176     }
177 
178     printf("testing 1 byte char in document: 1");
179     fflush(stdout);
180     data = &document1[5];
181     data[0] = ' ';
182     data[1] = ' ';
183     data[2] = ' ';
184     data[3] = ' ';
185     /* test 1 byte injection at beginning of area */
186     test_ret += testDocumentRangeByte1(ctxt, &document1[0], strlen(document1),
187                            data, -1, -1);
188     printf(" 2");
189     fflush(stdout);
190     data[0] = ' ';
191     data[1] = ' ';
192     data[2] = ' ';
193     data[3] = ' ';
194     /* test 1 byte injection at end of area */
195     test_ret += testDocumentRangeByte1(ctxt, &document1[0], strlen(document1),
196                            data + 3, -1, -1);
197 
198     printf(" 3");
199     fflush(stdout);
200     data = &document2[10];
201     data[0] = ' ';
202     data[1] = ' ';
203     data[2] = ' ';
204     data[3] = ' ';
205     /* test 1 byte injection at beginning of area */
206     test_ret += testDocumentRangeByte1(ctxt, &document2[0], strlen(document2),
207                            data, '\'', -1);
208     printf(" 4");
209     fflush(stdout);
210     data[0] = ' ';
211     data[1] = ' ';
212     data[2] = ' ';
213     data[3] = ' ';
214     /* test 1 byte injection at end of area */
215     test_ret += testDocumentRangeByte1(ctxt, &document2[0], strlen(document2),
216                            data + 3, '\'', -1);
217     printf(" done\n");
218 
219     printf("testing 2 byte char in document: 1");
220     fflush(stdout);
221     data = &document1[5];
222     data[0] = ' ';
223     data[1] = ' ';
224     data[2] = ' ';
225     data[3] = ' ';
226     /* test 2 byte injection at beginning of area */
227     test_ret += testDocumentRangeByte2(ctxt, &document1[0], strlen(document1),
228                            data);
229     printf(" 2");
230     fflush(stdout);
231     data[0] = ' ';
232     data[1] = ' ';
233     data[2] = ' ';
234     data[3] = ' ';
235     /* test 2 byte injection at end of area */
236     test_ret += testDocumentRangeByte2(ctxt, &document1[0], strlen(document1),
237                            data + 2);
238 
239     printf(" 3");
240     fflush(stdout);
241     data = &document2[10];
242     data[0] = ' ';
243     data[1] = ' ';
244     data[2] = ' ';
245     data[3] = ' ';
246     /* test 2 byte injection at beginning of area */
247     test_ret += testDocumentRangeByte2(ctxt, &document2[0], strlen(document2),
248                            data);
249     printf(" 4");
250     fflush(stdout);
251     data[0] = ' ';
252     data[1] = ' ';
253     data[2] = ' ';
254     data[3] = ' ';
255     /* test 2 byte injection at end of area */
256     test_ret += testDocumentRangeByte2(ctxt, &document2[0], strlen(document2),
257                            data + 2);
258     printf(" done\n");
259 
260     xmlFreeParserCtxt(ctxt);
261     return(test_ret);
262 }
263 
264 static int
testCurrentChar(xmlParserCtxtPtr ctxt,int * len)265 testCurrentChar(xmlParserCtxtPtr ctxt, int *len) {
266     const xmlChar *oldcur;
267     int c, err, len2;
268 
269     lastError = 0;
270     c = xmlCurrentChar(ctxt, len);
271     ctxt->input->flags = 0;
272     err = lastError;
273 
274     oldcur = ctxt->input->cur;
275     lastError = 0;
276     xmlNextChar(ctxt);
277     ctxt->input->flags = 0;
278     len2 = ctxt->input->cur - oldcur;
279     ctxt->input->cur = oldcur;
280 
281     if ((*ctxt->input->cur != 0) && (err != lastError)) {
282         fprintf(stderr, "xmlCurrentChar and xmlNextChar report different "
283                 "errors: %d %d\n", err, lastError);
284         return(-1);
285     }
286 
287     if ((err == 0) && (*len != len2)) {
288         fprintf(stderr, "xmlCurrentChar and xmlNextChar report different "
289                 "lengths: %d %d\n", *len, len2);
290         return(-1);
291     }
292 
293     lastError = err;
294 
295     return(c);
296 }
297 
testCharRangeByte1(xmlParserCtxtPtr ctxt)298 static int testCharRangeByte1(xmlParserCtxtPtr ctxt) {
299     int i = 0;
300     int len, c;
301     char *data = (char *) ctxt->input->cur;
302 
303     data[1] = 0;
304     data[2] = 0;
305     data[3] = 0;
306     for (i = 0;i <= 0xFF;i++) {
307         data[0] = (char) i;
308         ctxt->nbErrors = 0;
309 
310         c = testCurrentChar(ctxt, &len);
311         if (c < 0)
312             continue;
313 	if (i >= 0x80) {
314 	    /* we must see an error there */
315 	    if (lastError != XML_ERR_INVALID_ENCODING) {
316 	        fprintf(stderr,
317 		    "Failed to detect invalid char for Byte 0x%02X\n", i);
318 		return(1);
319 	    }
320 	} else if (i == 0xD) {
321 	    if ((c != 0xA) || (len != 1)) {
322 		fprintf(stderr, "Failed to convert char for Byte 0x%02X\n", i);
323 		return(1);
324 	    }
325 	} else if ((c != i) || (len != 1)) {
326 	    fprintf(stderr, "Failed to parse char for Byte 0x%02X\n", i);
327 	    return(1);
328 	}
329     }
330     return(0);
331 }
332 
testCharRangeByte2(xmlParserCtxtPtr ctxt)333 static int testCharRangeByte2(xmlParserCtxtPtr ctxt) {
334     int i, j;
335     int len, c;
336     char *data = (char *) ctxt->input->cur;
337 
338     data[2] = 0;
339     data[3] = 0;
340     for (i = 0x80;i <= 0xFF;i++) {
341 	for (j = 0;j <= 0xFF;j++) {
342 	    data[0] = (char) i;
343 	    data[1] = (char) j;
344             ctxt->nbErrors = 0;
345 
346             c = testCurrentChar(ctxt, &len);
347             if (c < 0)
348                 continue;
349 
350 	    /* if first bit of first char is set, then second bit must too */
351 	    if ((i & 0x80) && ((i & 0x40) == 0)) {
352 		if (lastError != XML_ERR_INVALID_ENCODING) {
353 		    fprintf(stderr,
354 		    "Failed to detect invalid char for Bytes 0x%02X 0x%02X\n",
355 		            i, j);
356 		    return(1);
357 		}
358 	    }
359 
360 	    /*
361 	     * if first bit of first char is set, then second char first
362 	     * bits must be 10
363 	     */
364 	    else if ((i & 0x80) && ((j & 0xC0) != 0x80)) {
365 		if (lastError != XML_ERR_INVALID_ENCODING) {
366 		    fprintf(stderr,
367 		"Failed to detect invalid char for Bytes 0x%02X 0x%02X: %d\n",
368 		            i, j, c);
369 		    return(1);
370 		}
371 	    }
372 
373 	    /*
374 	     * if using a 2 byte encoding then the value must be greater
375 	     * than 0x80, i.e. one of bits 5 to 1 of i must be set
376 	     */
377 	    else if ((i & 0x80) && ((i & 0x1E) == 0)) {
378 		if (lastError != XML_ERR_INVALID_ENCODING) {
379 		    fprintf(stderr,
380 		"Failed to detect invalid char for Bytes 0x%02X 0x%02X: %d\n",
381 		            i, j, c);
382 		    return(1);
383 		}
384 	    }
385 
386 	    /*
387 	     * if third bit of first char is set, then the sequence would need
388 	     * at least 3 bytes, but we give only 2 !
389 	     */
390 	    else if ((i & 0xE0) == 0xE0) {
391 		if (lastError != XML_ERR_INVALID_ENCODING) {
392 		    fprintf(stderr,
393 		"Failed to detect invalid char for Bytes 0x%02X 0x%02X 0x00\n",
394 		            i, j);
395 		    return(1);
396 		}
397 	    }
398 
399             /*
400 	     * We should see no error in remaining cases
401 	     */
402 	    else if ((lastError != 0) || (len != 2)) {
403 		fprintf(stderr,
404 		    "Failed to parse char for Bytes 0x%02X 0x%02X\n", i, j);
405 		return(1);
406 	    }
407 
408             /*
409 	     * Finally check the value is right
410 	     */
411 	    else if (c != (j & 0x3F) + ((i & 0x1F) << 6)) {
412 		fprintf(stderr,
413 	"Failed to parse char for Bytes 0x%02X 0x%02X: expect %d got %d\n",
414 	                i, j, ((j & 0x3F) + ((i & 0x1F) << 6)), c);
415 		return(1);
416 	    }
417         }
418     }
419     return(0);
420 }
421 
testCharRangeByte3(xmlParserCtxtPtr ctxt)422 static int testCharRangeByte3(xmlParserCtxtPtr ctxt) {
423     int i, j, k, K;
424     int len, c;
425     unsigned char lows[6] = {0, 0x80, 0x81, 0xC1, 0xFF, 0xBF};
426     char *data = (char *) ctxt->input->cur;
427     int value;
428 
429     data[3] = 0;
430     for (i = 0xE0;i <= 0xFF;i++) {
431     for (j = 0;j <= 0xFF;j++) {
432     for (k = 0;k < 6;k++) {
433 	data[0] = (char) i;
434 	data[1] = (char) j;
435 	K = lows[k];
436 	data[2] = (char) K;
437 	value = (K & 0x3F) + ((j & 0x3F) << 6) + ((i & 0xF) << 12);
438         ctxt->nbErrors = 0;
439 
440         c = testCurrentChar(ctxt, &len);
441         if (c < 0)
442             continue;
443 
444 	/*
445 	 * if fourth bit of first char is set, then the sequence would need
446 	 * at least 4 bytes, but we give only 3 !
447 	 */
448 	if ((i & 0xF0) == 0xF0) {
449 	    if (lastError != XML_ERR_INVALID_ENCODING) {
450 		fprintf(stderr,
451 	"Failed to detect invalid char for Bytes 0x%02X 0x%02X 0x%02X 0x%02X\n",
452 			i, j, K, data[3]);
453 		return(1);
454 	    }
455 	}
456 
457         /*
458 	 * The second and the third bytes must start with 10
459 	 */
460 	else if (((j & 0xC0) != 0x80) || ((K & 0xC0) != 0x80)) {
461 	    if (lastError != XML_ERR_INVALID_ENCODING) {
462 		fprintf(stderr,
463 	"Failed to detect invalid char for Bytes 0x%02X 0x%02X 0x%02X\n",
464 			i, j, K);
465 		return(1);
466 	    }
467 	}
468 
469 	/*
470 	 * if using a 3 byte encoding then the value must be greater
471 	 * than 0x800, i.e. one of bits 4 to 0 of i must be set or
472 	 * the 6th byte of data[1] must be set
473 	 */
474 	else if (((i & 0xF) == 0) && ((j & 0x20) == 0)) {
475 	    if (lastError != XML_ERR_INVALID_ENCODING) {
476 		fprintf(stderr,
477 	    "Failed to detect invalid char for Bytes 0x%02X 0x%02X 0x%02X\n",
478 			i, j, K);
479 		return(1);
480 	    }
481 	}
482 
483         /*
484 	 * There are values that are not allowed in UTF-8
485 	 */
486 	else if ((value > 0xD7FF) && (value <0xE000)) {
487 	    if (lastError != XML_ERR_INVALID_ENCODING) {
488 		fprintf(stderr,
489 	"Failed to detect invalid char 0x%04X for Bytes 0x%02X 0x%02X 0x%02X\n",
490 			value, i, j, K);
491 		return(1);
492 	    }
493 	}
494 
495 	/*
496 	 * We should see no error in remaining cases
497 	 */
498 	else if ((lastError != 0) || (len != 3)) {
499 	    fprintf(stderr,
500 		"Failed to parse char for Bytes 0x%02X 0x%02X 0x%02X\n",
501 		    i, j, K);
502 	    return(1);
503 	}
504 
505 	/*
506 	 * Finally check the value is right
507 	 */
508 	else if (c != value) {
509 	    fprintf(stderr,
510     "Failed to parse char for Bytes 0x%02X 0x%02X 0x%02X: expect %d got %d\n",
511 		i, j, data[2], value, c);
512 	    return(1);
513 	}
514     }
515     }
516     }
517     return(0);
518 }
519 
testCharRangeByte4(xmlParserCtxtPtr ctxt)520 static int testCharRangeByte4(xmlParserCtxtPtr ctxt) {
521     int i, j, k, K, l, L;
522     int len, c;
523     unsigned char lows[6] = {0, 0x80, 0x81, 0xC1, 0xFF, 0xBF};
524     char *data = (char *) ctxt->input->cur;
525     int value;
526 
527     data[4] = 0;
528     for (i = 0xF0;i <= 0xFF;i++) {
529     for (j = 0;j <= 0xFF;j++) {
530     for (k = 0;k < 6;k++) {
531     for (l = 0;l < 6;l++) {
532 	data[0] = (char) i;
533 	data[1] = (char) j;
534 	K = lows[k];
535 	data[2] = (char) K;
536 	L = lows[l];
537 	data[3] = (char) L;
538 	value = (L & 0x3F) + ((K & 0x3F) << 6) + ((j & 0x3F) << 12) +
539 	        ((i & 0x7) << 18);
540         ctxt->nbErrors = 0;
541 
542         c = testCurrentChar(ctxt, &len);
543         if (c < 0)
544             continue;
545 
546 	/*
547 	 * if fifth bit of first char is set, then the sequence would need
548 	 * at least 5 bytes, but we give only 4 !
549 	 */
550 	if ((i & 0xF8) == 0xF8) {
551 	    if (lastError != XML_ERR_INVALID_ENCODING) {
552 		fprintf(stderr,
553   "Failed to detect invalid char for Bytes 0x%02X 0x%02X 0x%02X 0x%02X\n",
554 			i, j, K, data[3]);
555 		return(1);
556 	    }
557 	}
558 
559         /*
560 	 * The second, third and fourth bytes must start with 10
561 	 */
562 	else if (((j & 0xC0) != 0x80) || ((K & 0xC0) != 0x80) ||
563 	         ((L & 0xC0) != 0x80)) {
564 	    if (lastError != XML_ERR_INVALID_ENCODING) {
565 		fprintf(stderr,
566 	"Failed to detect invalid char for Bytes 0x%02X 0x%02X 0x%02X 0x%02X\n",
567 			i, j, K, L);
568 		return(1);
569 	    }
570 	}
571 
572 	/*
573 	 * if using a 3 byte encoding then the value must be greater
574 	 * than 0x10000, i.e. one of bits 3 to 0 of i must be set or
575 	 * the 6 or 5th byte of j must be set
576 	 */
577 	else if (((i & 0x7) == 0) && ((j & 0x30) == 0)) {
578 	    if (lastError != XML_ERR_INVALID_ENCODING) {
579 		fprintf(stderr,
580 	"Failed to detect invalid char for Bytes 0x%02X 0x%02X 0x%02X 0x%02X\n",
581 			i, j, K, L);
582 		return(1);
583 	    }
584 	}
585 
586         /*
587 	 * There are values in that are not allowed in UTF-8
588 	 */
589 	else if (((value > 0xD7FF) && (value < 0xE000)) ||
590 		 (value > 0x10FFFF)) {
591 	    if (lastError != XML_ERR_INVALID_ENCODING) {
592 		fprintf(stderr,
593 "Failed to detect invalid char 0x%04X for Bytes 0x%02X 0x%02X 0x%02X 0x%02X\n",
594 			value, i, j, K, L);
595 		return(1);
596 	    }
597 	}
598 
599 	/*
600 	 * We should see no error in remaining cases
601 	 */
602 	else if ((lastError != 0) || (len != 4)) {
603 	    fprintf(stderr,
604 		"Failed to parse char for Bytes 0x%02X 0x%02X 0x%02X\n",
605 		    i, j, K);
606 	    return(1);
607 	}
608 
609 	/*
610 	 * Finally check the value is right
611 	 */
612 	else if (c != value) {
613 	    fprintf(stderr,
614     "Failed to parse char for Bytes 0x%02X 0x%02X 0x%02X: expect %d got %d\n",
615 		i, j, data[2], value, c);
616 	    return(1);
617 	}
618     }
619     }
620     }
621     }
622     return(0);
623 }
624 
625 /**
626  * testCharRanges:
627  *
628  * Test the correct UTF8 character parsing in isolation i.e.
629  * not when parsing a full document, this is less expensive and we can
630  * cover the full range of UTF-8 chars accepted by XML-1.0
631  */
632 
testCharRanges(void)633 static int testCharRanges(void) {
634     char data[5];
635     xmlParserCtxtPtr ctxt;
636     xmlParserInputBufferPtr buf;
637     xmlParserInputPtr input;
638     int test_ret = 0;
639 
640     memset(data, 0, 5);
641 
642     /*
643      * Set up a parsing context using the above data buffer as
644      * the current input source.
645      */
646     ctxt = xmlNewParserCtxt();
647     if (ctxt == NULL) {
648         fprintf(stderr, "Failed to allocate parser context\n");
649 	return(1);
650     }
651     buf = xmlParserInputBufferCreateStatic(data, sizeof(data),
652                                            XML_CHAR_ENCODING_NONE);
653     if (buf == NULL) {
654         fprintf(stderr, "Failed to allocate input buffer\n");
655 	test_ret = 1;
656 	goto error;
657     }
658     input = xmlNewInputStream(ctxt);
659     if (input == NULL) {
660         xmlFreeParserInputBuffer(buf);
661 	test_ret = 1;
662 	goto error;
663     }
664     input->filename = NULL;
665     input->buf = buf;
666     input->cur =
667     input->base = xmlBufContent(input->buf->buffer);
668     input->end = input->base + 4;
669     inputPush(ctxt, input);
670 
671     printf("testing char range: 1");
672     fflush(stdout);
673     test_ret += testCharRangeByte1(ctxt);
674     printf(" 2");
675     fflush(stdout);
676     test_ret += testCharRangeByte2(ctxt);
677     printf(" 3");
678     fflush(stdout);
679     test_ret += testCharRangeByte3(ctxt);
680     printf(" 4");
681     fflush(stdout);
682     test_ret += testCharRangeByte4(ctxt);
683     printf(" done\n");
684     fflush(stdout);
685 
686 error:
687     xmlFreeParserCtxt(ctxt);
688     return(test_ret);
689 }
690 
691 static int
testUserEncoding(void)692 testUserEncoding(void) {
693     /*
694      * Create a document encoded as UTF-16LE with an ISO-8859-1 encoding
695      * declaration, then parse it with xmlReadMemory and the encoding
696      * argument set to UTF-16LE.
697      */
698     xmlDocPtr doc = NULL;
699     const char *start = "<?xml version='1.0' encoding='ISO-8859-1'?><d>";
700     const char *end = "</d>";
701     char *buf = NULL;
702     xmlChar *text;
703     int startSize = strlen(start);
704     int textSize = 100000; /* Make sure to exceed internal buffer sizes. */
705     int endSize = strlen(end);
706     int totalSize = startSize + textSize + endSize;
707     int k = 0;
708     int i;
709     int ret = 1;
710 
711     buf = xmlMalloc(2 * totalSize);
712     for (i = 0; start[i] != 0; i++) {
713         buf[k++] = start[i];
714         buf[k++] = 0;
715     }
716     for (i = 0; i < textSize; i++) {
717         buf[k++] = 'x';
718         buf[k++] = 0;
719     }
720     for (i = 0; end[i] != 0; i++) {
721         buf[k++] = end[i];
722         buf[k++] = 0;
723     }
724 
725     doc = xmlReadMemory(buf, 2 * totalSize, NULL, "UTF-16LE", 0);
726     if (doc == NULL) {
727         fprintf(stderr, "failed to parse document\n");
728         goto error;
729     }
730 
731     text = doc->children->children->content;
732     for (i = 0; i < textSize; i++) {
733         if (text[i] != 'x') {
734             fprintf(stderr, "text node has wrong content at offset %d\n", k);
735             goto error;
736         }
737     }
738 
739     ret = 0;
740 
741 error:
742     xmlFreeDoc(doc);
743     xmlFree(buf);
744 
745     return ret;
746 }
747 
748 #if defined(LIBXML_PUSH_ENABLED) && defined(LIBXML_OUTPUT_ENABLED)
749 
750 static char *
convert(xmlCharEncodingHandlerPtr handler,const char * utf8,int size,int * outSize)751 convert(xmlCharEncodingHandlerPtr handler, const char *utf8, int size,
752         int *outSize) {
753     char *ret;
754     int inlen;
755     int res;
756 
757     inlen = size;
758     *outSize = size * 2;
759     ret = xmlMalloc(*outSize);
760     if (ret == NULL)
761         return(NULL);
762     res = handler->output(BAD_CAST ret, outSize, BAD_CAST utf8, &inlen);
763     if ((res < 0) || (inlen != size)) {
764         xmlFree(ret);
765         return(NULL);
766     }
767 
768     return(ret);
769 }
770 
771 static int
testUserEncodingPush(void)772 testUserEncodingPush(void) {
773     xmlCharEncodingHandlerPtr handler;
774     xmlParserCtxtPtr ctxt;
775     xmlDocPtr doc;
776     char buf[] =
777         "\xEF\xBB\xBF"
778         "<?xml version='1.0' encoding='ISO-8859-1'?>\n"
779         "<d>text</d>\n";
780     char *utf16;
781     int utf16Size;
782     int ret = 1;
783 
784     handler = xmlGetCharEncodingHandler(XML_CHAR_ENCODING_UTF16LE);
785     utf16 = convert(handler, buf, sizeof(buf) - 1, &utf16Size);
786     ctxt = xmlCreatePushParserCtxt(NULL, NULL, NULL, 0, NULL);
787     xmlSwitchEncoding(ctxt, XML_CHAR_ENCODING_UTF16LE);
788     xmlParseChunk(ctxt, utf16, utf16Size, 0);
789     xmlParseChunk(ctxt, NULL, 0, 1);
790     doc = ctxt->myDoc;
791 
792     if ((doc != NULL) &&
793         (doc->children != NULL) &&
794         (doc->children->children != NULL) &&
795         (xmlStrcmp(doc->children->children->content, BAD_CAST "text") == 0))
796         ret = 0;
797 
798     xmlFreeDoc(doc);
799     xmlFreeParserCtxt(ctxt);
800     xmlFree(utf16);
801 
802     return(ret);
803 }
804 
805 static int
testUTF8Chunks(void)806 testUTF8Chunks(void) {
807     xmlParserCtxtPtr ctxt;
808     xmlChar *out;
809     int outSize;
810     char *buf;
811     int i;
812     int ret = 0;
813 
814     ctxt = xmlCreatePushParserCtxt(NULL, NULL, NULL, 0, NULL);
815 
816     xmlParseChunk(ctxt, "<d>", 3, 0);
817     xmlParseChunk(ctxt, "\xF0", 1, 0);
818     xmlParseChunk(ctxt, "\x9F", 1, 0);
819     xmlParseChunk(ctxt, "\x98", 1, 0);
820     xmlParseChunk(ctxt, "\x8A", 1, 0);
821     xmlParseChunk(ctxt, "</d>", 4, 1);
822 
823     xmlDocDumpMemory(ctxt->myDoc, &out, &outSize);
824     if (strcmp((char *) out,
825                "<?xml version=\"1.0\"?>\n<d>&#x1F60A;</d>\n") != 0) {
826         fprintf(stderr, "failed UTF-8 chunk test 1\n");
827         ret += 1;
828     }
829 
830     xmlFree(out);
831     xmlFreeDoc(ctxt->myDoc);
832     xmlFreeParserCtxt(ctxt);
833 
834     ctxt = xmlCreatePushParserCtxt(NULL, NULL, NULL, 0, NULL);
835 
836     xmlParseChunk(ctxt, "<d>", 3, 0);
837 
838     /*
839      * Create a chunk longer than XML_PARSER_BIG_BUFFER_SIZE (300) ending
840      * with an incomplete UTF-8 sequence.
841      */
842     buf = xmlMalloc(1000 * 2 + 1);
843     for (i = 0; i < 2000; i += 2)
844         memcpy(buf + i, "\xCE\xB1", 2);
845     buf[i] = '\xCE';
846     xmlParseChunk(ctxt, buf, 2001, 0);
847     xmlFree(buf);
848 
849     xmlParseChunk(ctxt, "\xB1</d>", 4, 0);
850     xmlParseChunk(ctxt, NULL, 0, 0);
851 
852     xmlDocDumpMemory(ctxt->myDoc, &out, &outSize);
853     if (strncmp((char *) out, "<?xml version=\"1.0\"?>\n<d>", 25) != 0) {
854         fprintf(stderr, "failed UTF-8 chunk test 2-1\n");
855         ret += 1;
856         goto error;
857     }
858     for (i = 25; i < 25 + 1001 * 7; i += 7) {
859         if (memcmp(out + i, "&#x3B1;", 7) != 0) {
860             fprintf(stderr, "failed UTF-8 chunk test 2-2 %d\n", i);
861             ret += 1;
862             goto error;
863         }
864     }
865     if (strcmp((char *) out + i, "</d>\n") != 0) {
866         fprintf(stderr, "failed UTF-8 chunk test 2-3\n");
867         ret += 1;
868         goto error;
869     }
870 
871 error:
872     xmlFree(out);
873     xmlFreeDoc(ctxt->myDoc);
874     xmlFreeParserCtxt(ctxt);
875 
876     return(ret);
877     return(0);
878 }
879 
880 #endif
881 
main(void)882 int main(void) {
883 
884     int ret = 0;
885 
886     /*
887      * this initialize the library and check potential ABI mismatches
888      * between the version it was compiled for and the actual shared
889      * library used.
890      */
891     LIBXML_TEST_VERSION
892 
893     /*
894      * Catch errors separately
895      */
896 
897     xmlSetStructuredErrorFunc(NULL, errorHandler);
898 
899     /*
900      * Run the tests
901      */
902     ret += testCharRanges();
903     ret += testDocumentRanges();
904     ret += testUserEncoding();
905 #if defined(LIBXML_PUSH_ENABLED) && defined(LIBXML_OUTPUT_ENABLED)
906     ret += testUserEncodingPush();
907     ret += testUTF8Chunks();
908 #endif
909 
910     /*
911      * Cleanup function for the XML library.
912      */
913     xmlCleanupParser();
914     return(ret ? 1 : 0);
915 }
916