1*6777b538SAndroid Build Coastguard Worker /**
2*6777b538SAndroid Build Coastguard Worker * Test the UTF-8 decoding routines
3*6777b538SAndroid Build Coastguard Worker *
4*6777b538SAndroid Build Coastguard Worker * author: Daniel Veillard
5*6777b538SAndroid Build Coastguard Worker * copy: see Copyright for the status of this software.
6*6777b538SAndroid Build Coastguard Worker */
7*6777b538SAndroid Build Coastguard Worker
8*6777b538SAndroid Build Coastguard Worker #define XML_DEPRECATED
9*6777b538SAndroid Build Coastguard Worker
10*6777b538SAndroid Build Coastguard Worker #include <stdio.h>
11*6777b538SAndroid Build Coastguard Worker #include <string.h>
12*6777b538SAndroid Build Coastguard Worker #include <libxml/tree.h>
13*6777b538SAndroid Build Coastguard Worker #include <libxml/parser.h>
14*6777b538SAndroid Build Coastguard Worker #include <libxml/parserInternals.h>
15*6777b538SAndroid Build Coastguard Worker
16*6777b538SAndroid Build Coastguard Worker int lastError;
17*6777b538SAndroid Build Coastguard Worker
errorHandler(void * unused,const xmlError * err)18*6777b538SAndroid Build Coastguard Worker static void errorHandler(void *unused, const xmlError *err) {
19*6777b538SAndroid Build Coastguard Worker if ((unused == NULL) && (err != NULL) && (lastError == 0)) {
20*6777b538SAndroid Build Coastguard Worker lastError = err->code;
21*6777b538SAndroid Build Coastguard Worker }
22*6777b538SAndroid Build Coastguard Worker }
23*6777b538SAndroid Build Coastguard Worker
24*6777b538SAndroid Build Coastguard Worker char document1[100] = "<doc>XXXX</doc>";
25*6777b538SAndroid Build Coastguard Worker char document2[100] = "<doc foo='XXXX'/>";
26*6777b538SAndroid Build Coastguard Worker
testDocumentRangeByte1(xmlParserCtxtPtr ctxt,char * document,int len,char * data,int forbid1,int forbid2)27*6777b538SAndroid Build Coastguard Worker static int testDocumentRangeByte1(xmlParserCtxtPtr ctxt, char *document,
28*6777b538SAndroid Build Coastguard Worker int len, char *data, int forbid1, int forbid2) {
29*6777b538SAndroid Build Coastguard Worker int i;
30*6777b538SAndroid Build Coastguard Worker xmlDocPtr res;
31*6777b538SAndroid Build Coastguard Worker
32*6777b538SAndroid Build Coastguard Worker for (i = 0;i <= 0xFF;i++) {
33*6777b538SAndroid Build Coastguard Worker lastError = 0;
34*6777b538SAndroid Build Coastguard Worker xmlCtxtReset(ctxt);
35*6777b538SAndroid Build Coastguard Worker
36*6777b538SAndroid Build Coastguard Worker data[0] = (char) i;
37*6777b538SAndroid Build Coastguard Worker
38*6777b538SAndroid Build Coastguard Worker res = xmlReadMemory(document, len, "test", NULL, 0);
39*6777b538SAndroid Build Coastguard Worker
40*6777b538SAndroid Build Coastguard Worker if ((i == forbid1) || (i == forbid2)) {
41*6777b538SAndroid Build Coastguard Worker if ((lastError == 0) || (res != NULL)) {
42*6777b538SAndroid Build Coastguard Worker fprintf(stderr,
43*6777b538SAndroid Build Coastguard Worker "Failed to detect invalid char for Byte 0x%02X: %c\n",
44*6777b538SAndroid Build Coastguard Worker i, i);
45*6777b538SAndroid Build Coastguard Worker return(1);
46*6777b538SAndroid Build Coastguard Worker }
47*6777b538SAndroid Build Coastguard Worker }
48*6777b538SAndroid Build Coastguard Worker
49*6777b538SAndroid Build Coastguard Worker else if ((i == '<') || (i == '&')) {
50*6777b538SAndroid Build Coastguard Worker if ((lastError == 0) || (res != NULL)) {
51*6777b538SAndroid Build Coastguard Worker fprintf(stderr,
52*6777b538SAndroid Build Coastguard Worker "Failed to detect illegal char %c for Byte 0x%02X\n", i, i);
53*6777b538SAndroid Build Coastguard Worker return(1);
54*6777b538SAndroid Build Coastguard Worker }
55*6777b538SAndroid Build Coastguard Worker }
56*6777b538SAndroid Build Coastguard Worker else if (((i < 0x20) || (i >= 0x80)) &&
57*6777b538SAndroid Build Coastguard Worker (i != 0x9) && (i != 0xA) && (i != 0xD)) {
58*6777b538SAndroid Build Coastguard Worker if ((lastError != XML_ERR_INVALID_CHAR) && (res != NULL)) {
59*6777b538SAndroid Build Coastguard Worker fprintf(stderr,
60*6777b538SAndroid Build Coastguard Worker "Failed to detect invalid char for Byte 0x%02X\n", i);
61*6777b538SAndroid Build Coastguard Worker return(1);
62*6777b538SAndroid Build Coastguard Worker }
63*6777b538SAndroid Build Coastguard Worker }
64*6777b538SAndroid Build Coastguard Worker else if (res == NULL) {
65*6777b538SAndroid Build Coastguard Worker fprintf(stderr,
66*6777b538SAndroid Build Coastguard Worker "Failed to parse valid char for Byte 0x%02X : %c\n", i, i);
67*6777b538SAndroid Build Coastguard Worker return(1);
68*6777b538SAndroid Build Coastguard Worker }
69*6777b538SAndroid Build Coastguard Worker if (res != NULL)
70*6777b538SAndroid Build Coastguard Worker xmlFreeDoc(res);
71*6777b538SAndroid Build Coastguard Worker }
72*6777b538SAndroid Build Coastguard Worker return(0);
73*6777b538SAndroid Build Coastguard Worker }
74*6777b538SAndroid Build Coastguard Worker
testDocumentRangeByte2(xmlParserCtxtPtr ctxt,char * document,int len,char * data)75*6777b538SAndroid Build Coastguard Worker static int testDocumentRangeByte2(xmlParserCtxtPtr ctxt, char *document,
76*6777b538SAndroid Build Coastguard Worker int len, char *data) {
77*6777b538SAndroid Build Coastguard Worker int i, j;
78*6777b538SAndroid Build Coastguard Worker xmlDocPtr res;
79*6777b538SAndroid Build Coastguard Worker
80*6777b538SAndroid Build Coastguard Worker for (i = 0x80;i <= 0xFF;i++) {
81*6777b538SAndroid Build Coastguard Worker for (j = 0;j <= 0xFF;j++) {
82*6777b538SAndroid Build Coastguard Worker lastError = 0;
83*6777b538SAndroid Build Coastguard Worker xmlCtxtReset(ctxt);
84*6777b538SAndroid Build Coastguard Worker
85*6777b538SAndroid Build Coastguard Worker data[0] = (char) i;
86*6777b538SAndroid Build Coastguard Worker data[1] = (char) j;
87*6777b538SAndroid Build Coastguard Worker
88*6777b538SAndroid Build Coastguard Worker res = xmlReadMemory(document, len, "test", NULL, 0);
89*6777b538SAndroid Build Coastguard Worker
90*6777b538SAndroid Build Coastguard Worker /* if first bit of first char is set, then second bit must too */
91*6777b538SAndroid Build Coastguard Worker if ((i & 0x80) && ((i & 0x40) == 0)) {
92*6777b538SAndroid Build Coastguard Worker if ((lastError == 0) || (res != NULL)) {
93*6777b538SAndroid Build Coastguard Worker fprintf(stderr,
94*6777b538SAndroid Build Coastguard Worker "Failed to detect invalid char for Bytes 0x%02X 0x%02X\n",
95*6777b538SAndroid Build Coastguard Worker i, j);
96*6777b538SAndroid Build Coastguard Worker return(1);
97*6777b538SAndroid Build Coastguard Worker }
98*6777b538SAndroid Build Coastguard Worker }
99*6777b538SAndroid Build Coastguard Worker
100*6777b538SAndroid Build Coastguard Worker /*
101*6777b538SAndroid Build Coastguard Worker * if first bit of first char is set, then second char first
102*6777b538SAndroid Build Coastguard Worker * bits must be 10
103*6777b538SAndroid Build Coastguard Worker */
104*6777b538SAndroid Build Coastguard Worker else if ((i & 0x80) && ((j & 0xC0) != 0x80)) {
105*6777b538SAndroid Build Coastguard Worker if ((lastError == 0) || (res != NULL)) {
106*6777b538SAndroid Build Coastguard Worker fprintf(stderr,
107*6777b538SAndroid Build Coastguard Worker "Failed to detect invalid char for Bytes 0x%02X 0x%02X\n",
108*6777b538SAndroid Build Coastguard Worker i, j);
109*6777b538SAndroid Build Coastguard Worker return(1);
110*6777b538SAndroid Build Coastguard Worker }
111*6777b538SAndroid Build Coastguard Worker }
112*6777b538SAndroid Build Coastguard Worker
113*6777b538SAndroid Build Coastguard Worker /*
114*6777b538SAndroid Build Coastguard Worker * if using a 2 byte encoding then the value must be greater
115*6777b538SAndroid Build Coastguard Worker * than 0x80, i.e. one of bits 5 to 1 of i must be set
116*6777b538SAndroid Build Coastguard Worker */
117*6777b538SAndroid Build Coastguard Worker else if ((i & 0x80) && ((i & 0x1E) == 0)) {
118*6777b538SAndroid Build Coastguard Worker if ((lastError == 0) || (res != NULL)) {
119*6777b538SAndroid Build Coastguard Worker fprintf(stderr,
120*6777b538SAndroid Build Coastguard Worker "Failed to detect invalid char for Bytes 0x%02X 0x%02X\n",
121*6777b538SAndroid Build Coastguard Worker i, j);
122*6777b538SAndroid Build Coastguard Worker return(1);
123*6777b538SAndroid Build Coastguard Worker }
124*6777b538SAndroid Build Coastguard Worker }
125*6777b538SAndroid Build Coastguard Worker
126*6777b538SAndroid Build Coastguard Worker /*
127*6777b538SAndroid Build Coastguard Worker * if third bit of first char is set, then the sequence would need
128*6777b538SAndroid Build Coastguard Worker * at least 3 bytes, but we give only 2 !
129*6777b538SAndroid Build Coastguard Worker */
130*6777b538SAndroid Build Coastguard Worker else if ((i & 0xE0) == 0xE0) {
131*6777b538SAndroid Build Coastguard Worker if ((lastError == 0) || (res != NULL)) {
132*6777b538SAndroid Build Coastguard Worker fprintf(stderr,
133*6777b538SAndroid Build Coastguard Worker "Failed to detect invalid char for Bytes 0x%02X 0x%02X 0x00\n",
134*6777b538SAndroid Build Coastguard Worker i, j);
135*6777b538SAndroid Build Coastguard Worker return(1);
136*6777b538SAndroid Build Coastguard Worker }
137*6777b538SAndroid Build Coastguard Worker }
138*6777b538SAndroid Build Coastguard Worker
139*6777b538SAndroid Build Coastguard Worker /*
140*6777b538SAndroid Build Coastguard Worker * We should see no error in remaining cases
141*6777b538SAndroid Build Coastguard Worker */
142*6777b538SAndroid Build Coastguard Worker else if ((lastError != 0) || (res == NULL)) {
143*6777b538SAndroid Build Coastguard Worker fprintf(stderr,
144*6777b538SAndroid Build Coastguard Worker "Failed to parse document for Bytes 0x%02X 0x%02X\n", i, j);
145*6777b538SAndroid Build Coastguard Worker return(1);
146*6777b538SAndroid Build Coastguard Worker }
147*6777b538SAndroid Build Coastguard Worker if (res != NULL)
148*6777b538SAndroid Build Coastguard Worker xmlFreeDoc(res);
149*6777b538SAndroid Build Coastguard Worker }
150*6777b538SAndroid Build Coastguard Worker }
151*6777b538SAndroid Build Coastguard Worker return(0);
152*6777b538SAndroid Build Coastguard Worker }
153*6777b538SAndroid Build Coastguard Worker
154*6777b538SAndroid Build Coastguard Worker /**
155*6777b538SAndroid Build Coastguard Worker * testDocumentRanges:
156*6777b538SAndroid Build Coastguard Worker *
157*6777b538SAndroid Build Coastguard Worker * Test the correct UTF8 character parsing in context of XML documents
158*6777b538SAndroid Build Coastguard Worker * Those are in-context injection tests checking the parser behaviour on
159*6777b538SAndroid Build Coastguard Worker * edge case values at different point in content, beginning and end of
160*6777b538SAndroid Build Coastguard Worker * CDATA in text or in attribute values.
161*6777b538SAndroid Build Coastguard Worker */
162*6777b538SAndroid Build Coastguard Worker
testDocumentRanges(void)163*6777b538SAndroid Build Coastguard Worker static int testDocumentRanges(void) {
164*6777b538SAndroid Build Coastguard Worker xmlParserCtxtPtr ctxt;
165*6777b538SAndroid Build Coastguard Worker char *data;
166*6777b538SAndroid Build Coastguard Worker int test_ret = 0;
167*6777b538SAndroid Build Coastguard Worker
168*6777b538SAndroid Build Coastguard Worker /*
169*6777b538SAndroid Build Coastguard Worker * Set up a parsing context using the first document as
170*6777b538SAndroid Build Coastguard Worker * the current input source.
171*6777b538SAndroid Build Coastguard Worker */
172*6777b538SAndroid Build Coastguard Worker ctxt = xmlNewParserCtxt();
173*6777b538SAndroid Build Coastguard Worker if (ctxt == NULL) {
174*6777b538SAndroid Build Coastguard Worker fprintf(stderr, "Failed to allocate parser context\n");
175*6777b538SAndroid Build Coastguard Worker return(1);
176*6777b538SAndroid Build Coastguard Worker }
177*6777b538SAndroid Build Coastguard Worker
178*6777b538SAndroid Build Coastguard Worker printf("testing 1 byte char in document: 1");
179*6777b538SAndroid Build Coastguard Worker fflush(stdout);
180*6777b538SAndroid Build Coastguard Worker data = &document1[5];
181*6777b538SAndroid Build Coastguard Worker data[0] = ' ';
182*6777b538SAndroid Build Coastguard Worker data[1] = ' ';
183*6777b538SAndroid Build Coastguard Worker data[2] = ' ';
184*6777b538SAndroid Build Coastguard Worker data[3] = ' ';
185*6777b538SAndroid Build Coastguard Worker /* test 1 byte injection at beginning of area */
186*6777b538SAndroid Build Coastguard Worker test_ret += testDocumentRangeByte1(ctxt, &document1[0], strlen(document1),
187*6777b538SAndroid Build Coastguard Worker data, -1, -1);
188*6777b538SAndroid Build Coastguard Worker printf(" 2");
189*6777b538SAndroid Build Coastguard Worker fflush(stdout);
190*6777b538SAndroid Build Coastguard Worker data[0] = ' ';
191*6777b538SAndroid Build Coastguard Worker data[1] = ' ';
192*6777b538SAndroid Build Coastguard Worker data[2] = ' ';
193*6777b538SAndroid Build Coastguard Worker data[3] = ' ';
194*6777b538SAndroid Build Coastguard Worker /* test 1 byte injection at end of area */
195*6777b538SAndroid Build Coastguard Worker test_ret += testDocumentRangeByte1(ctxt, &document1[0], strlen(document1),
196*6777b538SAndroid Build Coastguard Worker data + 3, -1, -1);
197*6777b538SAndroid Build Coastguard Worker
198*6777b538SAndroid Build Coastguard Worker printf(" 3");
199*6777b538SAndroid Build Coastguard Worker fflush(stdout);
200*6777b538SAndroid Build Coastguard Worker data = &document2[10];
201*6777b538SAndroid Build Coastguard Worker data[0] = ' ';
202*6777b538SAndroid Build Coastguard Worker data[1] = ' ';
203*6777b538SAndroid Build Coastguard Worker data[2] = ' ';
204*6777b538SAndroid Build Coastguard Worker data[3] = ' ';
205*6777b538SAndroid Build Coastguard Worker /* test 1 byte injection at beginning of area */
206*6777b538SAndroid Build Coastguard Worker test_ret += testDocumentRangeByte1(ctxt, &document2[0], strlen(document2),
207*6777b538SAndroid Build Coastguard Worker data, '\'', -1);
208*6777b538SAndroid Build Coastguard Worker printf(" 4");
209*6777b538SAndroid Build Coastguard Worker fflush(stdout);
210*6777b538SAndroid Build Coastguard Worker data[0] = ' ';
211*6777b538SAndroid Build Coastguard Worker data[1] = ' ';
212*6777b538SAndroid Build Coastguard Worker data[2] = ' ';
213*6777b538SAndroid Build Coastguard Worker data[3] = ' ';
214*6777b538SAndroid Build Coastguard Worker /* test 1 byte injection at end of area */
215*6777b538SAndroid Build Coastguard Worker test_ret += testDocumentRangeByte1(ctxt, &document2[0], strlen(document2),
216*6777b538SAndroid Build Coastguard Worker data + 3, '\'', -1);
217*6777b538SAndroid Build Coastguard Worker printf(" done\n");
218*6777b538SAndroid Build Coastguard Worker
219*6777b538SAndroid Build Coastguard Worker printf("testing 2 byte char in document: 1");
220*6777b538SAndroid Build Coastguard Worker fflush(stdout);
221*6777b538SAndroid Build Coastguard Worker data = &document1[5];
222*6777b538SAndroid Build Coastguard Worker data[0] = ' ';
223*6777b538SAndroid Build Coastguard Worker data[1] = ' ';
224*6777b538SAndroid Build Coastguard Worker data[2] = ' ';
225*6777b538SAndroid Build Coastguard Worker data[3] = ' ';
226*6777b538SAndroid Build Coastguard Worker /* test 2 byte injection at beginning of area */
227*6777b538SAndroid Build Coastguard Worker test_ret += testDocumentRangeByte2(ctxt, &document1[0], strlen(document1),
228*6777b538SAndroid Build Coastguard Worker data);
229*6777b538SAndroid Build Coastguard Worker printf(" 2");
230*6777b538SAndroid Build Coastguard Worker fflush(stdout);
231*6777b538SAndroid Build Coastguard Worker data[0] = ' ';
232*6777b538SAndroid Build Coastguard Worker data[1] = ' ';
233*6777b538SAndroid Build Coastguard Worker data[2] = ' ';
234*6777b538SAndroid Build Coastguard Worker data[3] = ' ';
235*6777b538SAndroid Build Coastguard Worker /* test 2 byte injection at end of area */
236*6777b538SAndroid Build Coastguard Worker test_ret += testDocumentRangeByte2(ctxt, &document1[0], strlen(document1),
237*6777b538SAndroid Build Coastguard Worker data + 2);
238*6777b538SAndroid Build Coastguard Worker
239*6777b538SAndroid Build Coastguard Worker printf(" 3");
240*6777b538SAndroid Build Coastguard Worker fflush(stdout);
241*6777b538SAndroid Build Coastguard Worker data = &document2[10];
242*6777b538SAndroid Build Coastguard Worker data[0] = ' ';
243*6777b538SAndroid Build Coastguard Worker data[1] = ' ';
244*6777b538SAndroid Build Coastguard Worker data[2] = ' ';
245*6777b538SAndroid Build Coastguard Worker data[3] = ' ';
246*6777b538SAndroid Build Coastguard Worker /* test 2 byte injection at beginning of area */
247*6777b538SAndroid Build Coastguard Worker test_ret += testDocumentRangeByte2(ctxt, &document2[0], strlen(document2),
248*6777b538SAndroid Build Coastguard Worker data);
249*6777b538SAndroid Build Coastguard Worker printf(" 4");
250*6777b538SAndroid Build Coastguard Worker fflush(stdout);
251*6777b538SAndroid Build Coastguard Worker data[0] = ' ';
252*6777b538SAndroid Build Coastguard Worker data[1] = ' ';
253*6777b538SAndroid Build Coastguard Worker data[2] = ' ';
254*6777b538SAndroid Build Coastguard Worker data[3] = ' ';
255*6777b538SAndroid Build Coastguard Worker /* test 2 byte injection at end of area */
256*6777b538SAndroid Build Coastguard Worker test_ret += testDocumentRangeByte2(ctxt, &document2[0], strlen(document2),
257*6777b538SAndroid Build Coastguard Worker data + 2);
258*6777b538SAndroid Build Coastguard Worker printf(" done\n");
259*6777b538SAndroid Build Coastguard Worker
260*6777b538SAndroid Build Coastguard Worker xmlFreeParserCtxt(ctxt);
261*6777b538SAndroid Build Coastguard Worker return(test_ret);
262*6777b538SAndroid Build Coastguard Worker }
263*6777b538SAndroid Build Coastguard Worker
264*6777b538SAndroid Build Coastguard Worker static int
testCurrentChar(xmlParserCtxtPtr ctxt,int * len)265*6777b538SAndroid Build Coastguard Worker testCurrentChar(xmlParserCtxtPtr ctxt, int *len) {
266*6777b538SAndroid Build Coastguard Worker const xmlChar *oldcur;
267*6777b538SAndroid Build Coastguard Worker int c, err, len2;
268*6777b538SAndroid Build Coastguard Worker
269*6777b538SAndroid Build Coastguard Worker lastError = 0;
270*6777b538SAndroid Build Coastguard Worker c = xmlCurrentChar(ctxt, len);
271*6777b538SAndroid Build Coastguard Worker ctxt->input->flags = 0;
272*6777b538SAndroid Build Coastguard Worker err = lastError;
273*6777b538SAndroid Build Coastguard Worker
274*6777b538SAndroid Build Coastguard Worker oldcur = ctxt->input->cur;
275*6777b538SAndroid Build Coastguard Worker lastError = 0;
276*6777b538SAndroid Build Coastguard Worker xmlNextChar(ctxt);
277*6777b538SAndroid Build Coastguard Worker ctxt->input->flags = 0;
278*6777b538SAndroid Build Coastguard Worker len2 = ctxt->input->cur - oldcur;
279*6777b538SAndroid Build Coastguard Worker ctxt->input->cur = oldcur;
280*6777b538SAndroid Build Coastguard Worker
281*6777b538SAndroid Build Coastguard Worker if ((*ctxt->input->cur != 0) && (err != lastError)) {
282*6777b538SAndroid Build Coastguard Worker fprintf(stderr, "xmlCurrentChar and xmlNextChar report different "
283*6777b538SAndroid Build Coastguard Worker "errors: %d %d\n", err, lastError);
284*6777b538SAndroid Build Coastguard Worker return(-1);
285*6777b538SAndroid Build Coastguard Worker }
286*6777b538SAndroid Build Coastguard Worker
287*6777b538SAndroid Build Coastguard Worker if ((err == 0) && (*len != len2)) {
288*6777b538SAndroid Build Coastguard Worker fprintf(stderr, "xmlCurrentChar and xmlNextChar report different "
289*6777b538SAndroid Build Coastguard Worker "lengths: %d %d\n", *len, len2);
290*6777b538SAndroid Build Coastguard Worker return(-1);
291*6777b538SAndroid Build Coastguard Worker }
292*6777b538SAndroid Build Coastguard Worker
293*6777b538SAndroid Build Coastguard Worker lastError = err;
294*6777b538SAndroid Build Coastguard Worker
295*6777b538SAndroid Build Coastguard Worker return(c);
296*6777b538SAndroid Build Coastguard Worker }
297*6777b538SAndroid Build Coastguard Worker
testCharRangeByte1(xmlParserCtxtPtr ctxt)298*6777b538SAndroid Build Coastguard Worker static int testCharRangeByte1(xmlParserCtxtPtr ctxt) {
299*6777b538SAndroid Build Coastguard Worker int i = 0;
300*6777b538SAndroid Build Coastguard Worker int len, c;
301*6777b538SAndroid Build Coastguard Worker char *data = (char *) ctxt->input->cur;
302*6777b538SAndroid Build Coastguard Worker
303*6777b538SAndroid Build Coastguard Worker data[1] = 0;
304*6777b538SAndroid Build Coastguard Worker data[2] = 0;
305*6777b538SAndroid Build Coastguard Worker data[3] = 0;
306*6777b538SAndroid Build Coastguard Worker for (i = 0;i <= 0xFF;i++) {
307*6777b538SAndroid Build Coastguard Worker data[0] = (char) i;
308*6777b538SAndroid Build Coastguard Worker ctxt->nbErrors = 0;
309*6777b538SAndroid Build Coastguard Worker
310*6777b538SAndroid Build Coastguard Worker c = testCurrentChar(ctxt, &len);
311*6777b538SAndroid Build Coastguard Worker if (c < 0)
312*6777b538SAndroid Build Coastguard Worker continue;
313*6777b538SAndroid Build Coastguard Worker if (i >= 0x80) {
314*6777b538SAndroid Build Coastguard Worker /* we must see an error there */
315*6777b538SAndroid Build Coastguard Worker if (lastError != XML_ERR_INVALID_ENCODING) {
316*6777b538SAndroid Build Coastguard Worker fprintf(stderr,
317*6777b538SAndroid Build Coastguard Worker "Failed to detect invalid char for Byte 0x%02X\n", i);
318*6777b538SAndroid Build Coastguard Worker return(1);
319*6777b538SAndroid Build Coastguard Worker }
320*6777b538SAndroid Build Coastguard Worker } else if (i == 0xD) {
321*6777b538SAndroid Build Coastguard Worker if ((c != 0xA) || (len != 1)) {
322*6777b538SAndroid Build Coastguard Worker fprintf(stderr, "Failed to convert char for Byte 0x%02X\n", i);
323*6777b538SAndroid Build Coastguard Worker return(1);
324*6777b538SAndroid Build Coastguard Worker }
325*6777b538SAndroid Build Coastguard Worker } else if ((c != i) || (len != 1)) {
326*6777b538SAndroid Build Coastguard Worker fprintf(stderr, "Failed to parse char for Byte 0x%02X\n", i);
327*6777b538SAndroid Build Coastguard Worker return(1);
328*6777b538SAndroid Build Coastguard Worker }
329*6777b538SAndroid Build Coastguard Worker }
330*6777b538SAndroid Build Coastguard Worker return(0);
331*6777b538SAndroid Build Coastguard Worker }
332*6777b538SAndroid Build Coastguard Worker
testCharRangeByte2(xmlParserCtxtPtr ctxt)333*6777b538SAndroid Build Coastguard Worker static int testCharRangeByte2(xmlParserCtxtPtr ctxt) {
334*6777b538SAndroid Build Coastguard Worker int i, j;
335*6777b538SAndroid Build Coastguard Worker int len, c;
336*6777b538SAndroid Build Coastguard Worker char *data = (char *) ctxt->input->cur;
337*6777b538SAndroid Build Coastguard Worker
338*6777b538SAndroid Build Coastguard Worker data[2] = 0;
339*6777b538SAndroid Build Coastguard Worker data[3] = 0;
340*6777b538SAndroid Build Coastguard Worker for (i = 0x80;i <= 0xFF;i++) {
341*6777b538SAndroid Build Coastguard Worker for (j = 0;j <= 0xFF;j++) {
342*6777b538SAndroid Build Coastguard Worker data[0] = (char) i;
343*6777b538SAndroid Build Coastguard Worker data[1] = (char) j;
344*6777b538SAndroid Build Coastguard Worker ctxt->nbErrors = 0;
345*6777b538SAndroid Build Coastguard Worker
346*6777b538SAndroid Build Coastguard Worker c = testCurrentChar(ctxt, &len);
347*6777b538SAndroid Build Coastguard Worker if (c < 0)
348*6777b538SAndroid Build Coastguard Worker continue;
349*6777b538SAndroid Build Coastguard Worker
350*6777b538SAndroid Build Coastguard Worker /* if first bit of first char is set, then second bit must too */
351*6777b538SAndroid Build Coastguard Worker if ((i & 0x80) && ((i & 0x40) == 0)) {
352*6777b538SAndroid Build Coastguard Worker if (lastError != XML_ERR_INVALID_ENCODING) {
353*6777b538SAndroid Build Coastguard Worker fprintf(stderr,
354*6777b538SAndroid Build Coastguard Worker "Failed to detect invalid char for Bytes 0x%02X 0x%02X\n",
355*6777b538SAndroid Build Coastguard Worker i, j);
356*6777b538SAndroid Build Coastguard Worker return(1);
357*6777b538SAndroid Build Coastguard Worker }
358*6777b538SAndroid Build Coastguard Worker }
359*6777b538SAndroid Build Coastguard Worker
360*6777b538SAndroid Build Coastguard Worker /*
361*6777b538SAndroid Build Coastguard Worker * if first bit of first char is set, then second char first
362*6777b538SAndroid Build Coastguard Worker * bits must be 10
363*6777b538SAndroid Build Coastguard Worker */
364*6777b538SAndroid Build Coastguard Worker else if ((i & 0x80) && ((j & 0xC0) != 0x80)) {
365*6777b538SAndroid Build Coastguard Worker if (lastError != XML_ERR_INVALID_ENCODING) {
366*6777b538SAndroid Build Coastguard Worker fprintf(stderr,
367*6777b538SAndroid Build Coastguard Worker "Failed to detect invalid char for Bytes 0x%02X 0x%02X: %d\n",
368*6777b538SAndroid Build Coastguard Worker i, j, c);
369*6777b538SAndroid Build Coastguard Worker return(1);
370*6777b538SAndroid Build Coastguard Worker }
371*6777b538SAndroid Build Coastguard Worker }
372*6777b538SAndroid Build Coastguard Worker
373*6777b538SAndroid Build Coastguard Worker /*
374*6777b538SAndroid Build Coastguard Worker * if using a 2 byte encoding then the value must be greater
375*6777b538SAndroid Build Coastguard Worker * than 0x80, i.e. one of bits 5 to 1 of i must be set
376*6777b538SAndroid Build Coastguard Worker */
377*6777b538SAndroid Build Coastguard Worker else if ((i & 0x80) && ((i & 0x1E) == 0)) {
378*6777b538SAndroid Build Coastguard Worker if (lastError != XML_ERR_INVALID_ENCODING) {
379*6777b538SAndroid Build Coastguard Worker fprintf(stderr,
380*6777b538SAndroid Build Coastguard Worker "Failed to detect invalid char for Bytes 0x%02X 0x%02X: %d\n",
381*6777b538SAndroid Build Coastguard Worker i, j, c);
382*6777b538SAndroid Build Coastguard Worker return(1);
383*6777b538SAndroid Build Coastguard Worker }
384*6777b538SAndroid Build Coastguard Worker }
385*6777b538SAndroid Build Coastguard Worker
386*6777b538SAndroid Build Coastguard Worker /*
387*6777b538SAndroid Build Coastguard Worker * if third bit of first char is set, then the sequence would need
388*6777b538SAndroid Build Coastguard Worker * at least 3 bytes, but we give only 2 !
389*6777b538SAndroid Build Coastguard Worker */
390*6777b538SAndroid Build Coastguard Worker else if ((i & 0xE0) == 0xE0) {
391*6777b538SAndroid Build Coastguard Worker if (lastError != XML_ERR_INVALID_ENCODING) {
392*6777b538SAndroid Build Coastguard Worker fprintf(stderr,
393*6777b538SAndroid Build Coastguard Worker "Failed to detect invalid char for Bytes 0x%02X 0x%02X 0x00\n",
394*6777b538SAndroid Build Coastguard Worker i, j);
395*6777b538SAndroid Build Coastguard Worker return(1);
396*6777b538SAndroid Build Coastguard Worker }
397*6777b538SAndroid Build Coastguard Worker }
398*6777b538SAndroid Build Coastguard Worker
399*6777b538SAndroid Build Coastguard Worker /*
400*6777b538SAndroid Build Coastguard Worker * We should see no error in remaining cases
401*6777b538SAndroid Build Coastguard Worker */
402*6777b538SAndroid Build Coastguard Worker else if ((lastError != 0) || (len != 2)) {
403*6777b538SAndroid Build Coastguard Worker fprintf(stderr,
404*6777b538SAndroid Build Coastguard Worker "Failed to parse char for Bytes 0x%02X 0x%02X\n", i, j);
405*6777b538SAndroid Build Coastguard Worker return(1);
406*6777b538SAndroid Build Coastguard Worker }
407*6777b538SAndroid Build Coastguard Worker
408*6777b538SAndroid Build Coastguard Worker /*
409*6777b538SAndroid Build Coastguard Worker * Finally check the value is right
410*6777b538SAndroid Build Coastguard Worker */
411*6777b538SAndroid Build Coastguard Worker else if (c != (j & 0x3F) + ((i & 0x1F) << 6)) {
412*6777b538SAndroid Build Coastguard Worker fprintf(stderr,
413*6777b538SAndroid Build Coastguard Worker "Failed to parse char for Bytes 0x%02X 0x%02X: expect %d got %d\n",
414*6777b538SAndroid Build Coastguard Worker i, j, ((j & 0x3F) + ((i & 0x1F) << 6)), c);
415*6777b538SAndroid Build Coastguard Worker return(1);
416*6777b538SAndroid Build Coastguard Worker }
417*6777b538SAndroid Build Coastguard Worker }
418*6777b538SAndroid Build Coastguard Worker }
419*6777b538SAndroid Build Coastguard Worker return(0);
420*6777b538SAndroid Build Coastguard Worker }
421*6777b538SAndroid Build Coastguard Worker
testCharRangeByte3(xmlParserCtxtPtr ctxt)422*6777b538SAndroid Build Coastguard Worker static int testCharRangeByte3(xmlParserCtxtPtr ctxt) {
423*6777b538SAndroid Build Coastguard Worker int i, j, k, K;
424*6777b538SAndroid Build Coastguard Worker int len, c;
425*6777b538SAndroid Build Coastguard Worker unsigned char lows[6] = {0, 0x80, 0x81, 0xC1, 0xFF, 0xBF};
426*6777b538SAndroid Build Coastguard Worker char *data = (char *) ctxt->input->cur;
427*6777b538SAndroid Build Coastguard Worker int value;
428*6777b538SAndroid Build Coastguard Worker
429*6777b538SAndroid Build Coastguard Worker data[3] = 0;
430*6777b538SAndroid Build Coastguard Worker for (i = 0xE0;i <= 0xFF;i++) {
431*6777b538SAndroid Build Coastguard Worker for (j = 0;j <= 0xFF;j++) {
432*6777b538SAndroid Build Coastguard Worker for (k = 0;k < 6;k++) {
433*6777b538SAndroid Build Coastguard Worker data[0] = (char) i;
434*6777b538SAndroid Build Coastguard Worker data[1] = (char) j;
435*6777b538SAndroid Build Coastguard Worker K = lows[k];
436*6777b538SAndroid Build Coastguard Worker data[2] = (char) K;
437*6777b538SAndroid Build Coastguard Worker value = (K & 0x3F) + ((j & 0x3F) << 6) + ((i & 0xF) << 12);
438*6777b538SAndroid Build Coastguard Worker ctxt->nbErrors = 0;
439*6777b538SAndroid Build Coastguard Worker
440*6777b538SAndroid Build Coastguard Worker c = testCurrentChar(ctxt, &len);
441*6777b538SAndroid Build Coastguard Worker if (c < 0)
442*6777b538SAndroid Build Coastguard Worker continue;
443*6777b538SAndroid Build Coastguard Worker
444*6777b538SAndroid Build Coastguard Worker /*
445*6777b538SAndroid Build Coastguard Worker * if fourth bit of first char is set, then the sequence would need
446*6777b538SAndroid Build Coastguard Worker * at least 4 bytes, but we give only 3 !
447*6777b538SAndroid Build Coastguard Worker */
448*6777b538SAndroid Build Coastguard Worker if ((i & 0xF0) == 0xF0) {
449*6777b538SAndroid Build Coastguard Worker if (lastError != XML_ERR_INVALID_ENCODING) {
450*6777b538SAndroid Build Coastguard Worker fprintf(stderr,
451*6777b538SAndroid Build Coastguard Worker "Failed to detect invalid char for Bytes 0x%02X 0x%02X 0x%02X 0x%02X\n",
452*6777b538SAndroid Build Coastguard Worker i, j, K, data[3]);
453*6777b538SAndroid Build Coastguard Worker return(1);
454*6777b538SAndroid Build Coastguard Worker }
455*6777b538SAndroid Build Coastguard Worker }
456*6777b538SAndroid Build Coastguard Worker
457*6777b538SAndroid Build Coastguard Worker /*
458*6777b538SAndroid Build Coastguard Worker * The second and the third bytes must start with 10
459*6777b538SAndroid Build Coastguard Worker */
460*6777b538SAndroid Build Coastguard Worker else if (((j & 0xC0) != 0x80) || ((K & 0xC0) != 0x80)) {
461*6777b538SAndroid Build Coastguard Worker if (lastError != XML_ERR_INVALID_ENCODING) {
462*6777b538SAndroid Build Coastguard Worker fprintf(stderr,
463*6777b538SAndroid Build Coastguard Worker "Failed to detect invalid char for Bytes 0x%02X 0x%02X 0x%02X\n",
464*6777b538SAndroid Build Coastguard Worker i, j, K);
465*6777b538SAndroid Build Coastguard Worker return(1);
466*6777b538SAndroid Build Coastguard Worker }
467*6777b538SAndroid Build Coastguard Worker }
468*6777b538SAndroid Build Coastguard Worker
469*6777b538SAndroid Build Coastguard Worker /*
470*6777b538SAndroid Build Coastguard Worker * if using a 3 byte encoding then the value must be greater
471*6777b538SAndroid Build Coastguard Worker * than 0x800, i.e. one of bits 4 to 0 of i must be set or
472*6777b538SAndroid Build Coastguard Worker * the 6th byte of data[1] must be set
473*6777b538SAndroid Build Coastguard Worker */
474*6777b538SAndroid Build Coastguard Worker else if (((i & 0xF) == 0) && ((j & 0x20) == 0)) {
475*6777b538SAndroid Build Coastguard Worker if (lastError != XML_ERR_INVALID_ENCODING) {
476*6777b538SAndroid Build Coastguard Worker fprintf(stderr,
477*6777b538SAndroid Build Coastguard Worker "Failed to detect invalid char for Bytes 0x%02X 0x%02X 0x%02X\n",
478*6777b538SAndroid Build Coastguard Worker i, j, K);
479*6777b538SAndroid Build Coastguard Worker return(1);
480*6777b538SAndroid Build Coastguard Worker }
481*6777b538SAndroid Build Coastguard Worker }
482*6777b538SAndroid Build Coastguard Worker
483*6777b538SAndroid Build Coastguard Worker /*
484*6777b538SAndroid Build Coastguard Worker * There are values that are not allowed in UTF-8
485*6777b538SAndroid Build Coastguard Worker */
486*6777b538SAndroid Build Coastguard Worker else if ((value > 0xD7FF) && (value <0xE000)) {
487*6777b538SAndroid Build Coastguard Worker if (lastError != XML_ERR_INVALID_ENCODING) {
488*6777b538SAndroid Build Coastguard Worker fprintf(stderr,
489*6777b538SAndroid Build Coastguard Worker "Failed to detect invalid char 0x%04X for Bytes 0x%02X 0x%02X 0x%02X\n",
490*6777b538SAndroid Build Coastguard Worker value, i, j, K);
491*6777b538SAndroid Build Coastguard Worker return(1);
492*6777b538SAndroid Build Coastguard Worker }
493*6777b538SAndroid Build Coastguard Worker }
494*6777b538SAndroid Build Coastguard Worker
495*6777b538SAndroid Build Coastguard Worker /*
496*6777b538SAndroid Build Coastguard Worker * We should see no error in remaining cases
497*6777b538SAndroid Build Coastguard Worker */
498*6777b538SAndroid Build Coastguard Worker else if ((lastError != 0) || (len != 3)) {
499*6777b538SAndroid Build Coastguard Worker fprintf(stderr,
500*6777b538SAndroid Build Coastguard Worker "Failed to parse char for Bytes 0x%02X 0x%02X 0x%02X\n",
501*6777b538SAndroid Build Coastguard Worker i, j, K);
502*6777b538SAndroid Build Coastguard Worker return(1);
503*6777b538SAndroid Build Coastguard Worker }
504*6777b538SAndroid Build Coastguard Worker
505*6777b538SAndroid Build Coastguard Worker /*
506*6777b538SAndroid Build Coastguard Worker * Finally check the value is right
507*6777b538SAndroid Build Coastguard Worker */
508*6777b538SAndroid Build Coastguard Worker else if (c != value) {
509*6777b538SAndroid Build Coastguard Worker fprintf(stderr,
510*6777b538SAndroid Build Coastguard Worker "Failed to parse char for Bytes 0x%02X 0x%02X 0x%02X: expect %d got %d\n",
511*6777b538SAndroid Build Coastguard Worker i, j, data[2], value, c);
512*6777b538SAndroid Build Coastguard Worker return(1);
513*6777b538SAndroid Build Coastguard Worker }
514*6777b538SAndroid Build Coastguard Worker }
515*6777b538SAndroid Build Coastguard Worker }
516*6777b538SAndroid Build Coastguard Worker }
517*6777b538SAndroid Build Coastguard Worker return(0);
518*6777b538SAndroid Build Coastguard Worker }
519*6777b538SAndroid Build Coastguard Worker
testCharRangeByte4(xmlParserCtxtPtr ctxt)520*6777b538SAndroid Build Coastguard Worker static int testCharRangeByte4(xmlParserCtxtPtr ctxt) {
521*6777b538SAndroid Build Coastguard Worker int i, j, k, K, l, L;
522*6777b538SAndroid Build Coastguard Worker int len, c;
523*6777b538SAndroid Build Coastguard Worker unsigned char lows[6] = {0, 0x80, 0x81, 0xC1, 0xFF, 0xBF};
524*6777b538SAndroid Build Coastguard Worker char *data = (char *) ctxt->input->cur;
525*6777b538SAndroid Build Coastguard Worker int value;
526*6777b538SAndroid Build Coastguard Worker
527*6777b538SAndroid Build Coastguard Worker data[4] = 0;
528*6777b538SAndroid Build Coastguard Worker for (i = 0xF0;i <= 0xFF;i++) {
529*6777b538SAndroid Build Coastguard Worker for (j = 0;j <= 0xFF;j++) {
530*6777b538SAndroid Build Coastguard Worker for (k = 0;k < 6;k++) {
531*6777b538SAndroid Build Coastguard Worker for (l = 0;l < 6;l++) {
532*6777b538SAndroid Build Coastguard Worker data[0] = (char) i;
533*6777b538SAndroid Build Coastguard Worker data[1] = (char) j;
534*6777b538SAndroid Build Coastguard Worker K = lows[k];
535*6777b538SAndroid Build Coastguard Worker data[2] = (char) K;
536*6777b538SAndroid Build Coastguard Worker L = lows[l];
537*6777b538SAndroid Build Coastguard Worker data[3] = (char) L;
538*6777b538SAndroid Build Coastguard Worker value = (L & 0x3F) + ((K & 0x3F) << 6) + ((j & 0x3F) << 12) +
539*6777b538SAndroid Build Coastguard Worker ((i & 0x7) << 18);
540*6777b538SAndroid Build Coastguard Worker ctxt->nbErrors = 0;
541*6777b538SAndroid Build Coastguard Worker
542*6777b538SAndroid Build Coastguard Worker c = testCurrentChar(ctxt, &len);
543*6777b538SAndroid Build Coastguard Worker if (c < 0)
544*6777b538SAndroid Build Coastguard Worker continue;
545*6777b538SAndroid Build Coastguard Worker
546*6777b538SAndroid Build Coastguard Worker /*
547*6777b538SAndroid Build Coastguard Worker * if fifth bit of first char is set, then the sequence would need
548*6777b538SAndroid Build Coastguard Worker * at least 5 bytes, but we give only 4 !
549*6777b538SAndroid Build Coastguard Worker */
550*6777b538SAndroid Build Coastguard Worker if ((i & 0xF8) == 0xF8) {
551*6777b538SAndroid Build Coastguard Worker if (lastError != XML_ERR_INVALID_ENCODING) {
552*6777b538SAndroid Build Coastguard Worker fprintf(stderr,
553*6777b538SAndroid Build Coastguard Worker "Failed to detect invalid char for Bytes 0x%02X 0x%02X 0x%02X 0x%02X\n",
554*6777b538SAndroid Build Coastguard Worker i, j, K, data[3]);
555*6777b538SAndroid Build Coastguard Worker return(1);
556*6777b538SAndroid Build Coastguard Worker }
557*6777b538SAndroid Build Coastguard Worker }
558*6777b538SAndroid Build Coastguard Worker
559*6777b538SAndroid Build Coastguard Worker /*
560*6777b538SAndroid Build Coastguard Worker * The second, third and fourth bytes must start with 10
561*6777b538SAndroid Build Coastguard Worker */
562*6777b538SAndroid Build Coastguard Worker else if (((j & 0xC0) != 0x80) || ((K & 0xC0) != 0x80) ||
563*6777b538SAndroid Build Coastguard Worker ((L & 0xC0) != 0x80)) {
564*6777b538SAndroid Build Coastguard Worker if (lastError != XML_ERR_INVALID_ENCODING) {
565*6777b538SAndroid Build Coastguard Worker fprintf(stderr,
566*6777b538SAndroid Build Coastguard Worker "Failed to detect invalid char for Bytes 0x%02X 0x%02X 0x%02X 0x%02X\n",
567*6777b538SAndroid Build Coastguard Worker i, j, K, L);
568*6777b538SAndroid Build Coastguard Worker return(1);
569*6777b538SAndroid Build Coastguard Worker }
570*6777b538SAndroid Build Coastguard Worker }
571*6777b538SAndroid Build Coastguard Worker
572*6777b538SAndroid Build Coastguard Worker /*
573*6777b538SAndroid Build Coastguard Worker * if using a 3 byte encoding then the value must be greater
574*6777b538SAndroid Build Coastguard Worker * than 0x10000, i.e. one of bits 3 to 0 of i must be set or
575*6777b538SAndroid Build Coastguard Worker * the 6 or 5th byte of j must be set
576*6777b538SAndroid Build Coastguard Worker */
577*6777b538SAndroid Build Coastguard Worker else if (((i & 0x7) == 0) && ((j & 0x30) == 0)) {
578*6777b538SAndroid Build Coastguard Worker if (lastError != XML_ERR_INVALID_ENCODING) {
579*6777b538SAndroid Build Coastguard Worker fprintf(stderr,
580*6777b538SAndroid Build Coastguard Worker "Failed to detect invalid char for Bytes 0x%02X 0x%02X 0x%02X 0x%02X\n",
581*6777b538SAndroid Build Coastguard Worker i, j, K, L);
582*6777b538SAndroid Build Coastguard Worker return(1);
583*6777b538SAndroid Build Coastguard Worker }
584*6777b538SAndroid Build Coastguard Worker }
585*6777b538SAndroid Build Coastguard Worker
586*6777b538SAndroid Build Coastguard Worker /*
587*6777b538SAndroid Build Coastguard Worker * There are values in that are not allowed in UTF-8
588*6777b538SAndroid Build Coastguard Worker */
589*6777b538SAndroid Build Coastguard Worker else if (((value > 0xD7FF) && (value < 0xE000)) ||
590*6777b538SAndroid Build Coastguard Worker (value > 0x10FFFF)) {
591*6777b538SAndroid Build Coastguard Worker if (lastError != XML_ERR_INVALID_ENCODING) {
592*6777b538SAndroid Build Coastguard Worker fprintf(stderr,
593*6777b538SAndroid Build Coastguard Worker "Failed to detect invalid char 0x%04X for Bytes 0x%02X 0x%02X 0x%02X 0x%02X\n",
594*6777b538SAndroid Build Coastguard Worker value, i, j, K, L);
595*6777b538SAndroid Build Coastguard Worker return(1);
596*6777b538SAndroid Build Coastguard Worker }
597*6777b538SAndroid Build Coastguard Worker }
598*6777b538SAndroid Build Coastguard Worker
599*6777b538SAndroid Build Coastguard Worker /*
600*6777b538SAndroid Build Coastguard Worker * We should see no error in remaining cases
601*6777b538SAndroid Build Coastguard Worker */
602*6777b538SAndroid Build Coastguard Worker else if ((lastError != 0) || (len != 4)) {
603*6777b538SAndroid Build Coastguard Worker fprintf(stderr,
604*6777b538SAndroid Build Coastguard Worker "Failed to parse char for Bytes 0x%02X 0x%02X 0x%02X\n",
605*6777b538SAndroid Build Coastguard Worker i, j, K);
606*6777b538SAndroid Build Coastguard Worker return(1);
607*6777b538SAndroid Build Coastguard Worker }
608*6777b538SAndroid Build Coastguard Worker
609*6777b538SAndroid Build Coastguard Worker /*
610*6777b538SAndroid Build Coastguard Worker * Finally check the value is right
611*6777b538SAndroid Build Coastguard Worker */
612*6777b538SAndroid Build Coastguard Worker else if (c != value) {
613*6777b538SAndroid Build Coastguard Worker fprintf(stderr,
614*6777b538SAndroid Build Coastguard Worker "Failed to parse char for Bytes 0x%02X 0x%02X 0x%02X: expect %d got %d\n",
615*6777b538SAndroid Build Coastguard Worker i, j, data[2], value, c);
616*6777b538SAndroid Build Coastguard Worker return(1);
617*6777b538SAndroid Build Coastguard Worker }
618*6777b538SAndroid Build Coastguard Worker }
619*6777b538SAndroid Build Coastguard Worker }
620*6777b538SAndroid Build Coastguard Worker }
621*6777b538SAndroid Build Coastguard Worker }
622*6777b538SAndroid Build Coastguard Worker return(0);
623*6777b538SAndroid Build Coastguard Worker }
624*6777b538SAndroid Build Coastguard Worker
625*6777b538SAndroid Build Coastguard Worker /**
626*6777b538SAndroid Build Coastguard Worker * testCharRanges:
627*6777b538SAndroid Build Coastguard Worker *
628*6777b538SAndroid Build Coastguard Worker * Test the correct UTF8 character parsing in isolation i.e.
629*6777b538SAndroid Build Coastguard Worker * not when parsing a full document, this is less expensive and we can
630*6777b538SAndroid Build Coastguard Worker * cover the full range of UTF-8 chars accepted by XML-1.0
631*6777b538SAndroid Build Coastguard Worker */
632*6777b538SAndroid Build Coastguard Worker
testCharRanges(void)633*6777b538SAndroid Build Coastguard Worker static int testCharRanges(void) {
634*6777b538SAndroid Build Coastguard Worker char data[5];
635*6777b538SAndroid Build Coastguard Worker xmlParserCtxtPtr ctxt;
636*6777b538SAndroid Build Coastguard Worker xmlParserInputBufferPtr buf;
637*6777b538SAndroid Build Coastguard Worker xmlParserInputPtr input;
638*6777b538SAndroid Build Coastguard Worker int test_ret = 0;
639*6777b538SAndroid Build Coastguard Worker
640*6777b538SAndroid Build Coastguard Worker memset(data, 0, 5);
641*6777b538SAndroid Build Coastguard Worker
642*6777b538SAndroid Build Coastguard Worker /*
643*6777b538SAndroid Build Coastguard Worker * Set up a parsing context using the above data buffer as
644*6777b538SAndroid Build Coastguard Worker * the current input source.
645*6777b538SAndroid Build Coastguard Worker */
646*6777b538SAndroid Build Coastguard Worker ctxt = xmlNewParserCtxt();
647*6777b538SAndroid Build Coastguard Worker if (ctxt == NULL) {
648*6777b538SAndroid Build Coastguard Worker fprintf(stderr, "Failed to allocate parser context\n");
649*6777b538SAndroid Build Coastguard Worker return(1);
650*6777b538SAndroid Build Coastguard Worker }
651*6777b538SAndroid Build Coastguard Worker buf = xmlParserInputBufferCreateStatic(data, sizeof(data),
652*6777b538SAndroid Build Coastguard Worker XML_CHAR_ENCODING_NONE);
653*6777b538SAndroid Build Coastguard Worker if (buf == NULL) {
654*6777b538SAndroid Build Coastguard Worker fprintf(stderr, "Failed to allocate input buffer\n");
655*6777b538SAndroid Build Coastguard Worker test_ret = 1;
656*6777b538SAndroid Build Coastguard Worker goto error;
657*6777b538SAndroid Build Coastguard Worker }
658*6777b538SAndroid Build Coastguard Worker input = xmlNewInputStream(ctxt);
659*6777b538SAndroid Build Coastguard Worker if (input == NULL) {
660*6777b538SAndroid Build Coastguard Worker xmlFreeParserInputBuffer(buf);
661*6777b538SAndroid Build Coastguard Worker test_ret = 1;
662*6777b538SAndroid Build Coastguard Worker goto error;
663*6777b538SAndroid Build Coastguard Worker }
664*6777b538SAndroid Build Coastguard Worker input->filename = NULL;
665*6777b538SAndroid Build Coastguard Worker input->buf = buf;
666*6777b538SAndroid Build Coastguard Worker input->cur =
667*6777b538SAndroid Build Coastguard Worker input->base = xmlBufContent(input->buf->buffer);
668*6777b538SAndroid Build Coastguard Worker input->end = input->base + 4;
669*6777b538SAndroid Build Coastguard Worker inputPush(ctxt, input);
670*6777b538SAndroid Build Coastguard Worker
671*6777b538SAndroid Build Coastguard Worker printf("testing char range: 1");
672*6777b538SAndroid Build Coastguard Worker fflush(stdout);
673*6777b538SAndroid Build Coastguard Worker test_ret += testCharRangeByte1(ctxt);
674*6777b538SAndroid Build Coastguard Worker printf(" 2");
675*6777b538SAndroid Build Coastguard Worker fflush(stdout);
676*6777b538SAndroid Build Coastguard Worker test_ret += testCharRangeByte2(ctxt);
677*6777b538SAndroid Build Coastguard Worker printf(" 3");
678*6777b538SAndroid Build Coastguard Worker fflush(stdout);
679*6777b538SAndroid Build Coastguard Worker test_ret += testCharRangeByte3(ctxt);
680*6777b538SAndroid Build Coastguard Worker printf(" 4");
681*6777b538SAndroid Build Coastguard Worker fflush(stdout);
682*6777b538SAndroid Build Coastguard Worker test_ret += testCharRangeByte4(ctxt);
683*6777b538SAndroid Build Coastguard Worker printf(" done\n");
684*6777b538SAndroid Build Coastguard Worker fflush(stdout);
685*6777b538SAndroid Build Coastguard Worker
686*6777b538SAndroid Build Coastguard Worker error:
687*6777b538SAndroid Build Coastguard Worker xmlFreeParserCtxt(ctxt);
688*6777b538SAndroid Build Coastguard Worker return(test_ret);
689*6777b538SAndroid Build Coastguard Worker }
690*6777b538SAndroid Build Coastguard Worker
691*6777b538SAndroid Build Coastguard Worker static int
testUserEncoding(void)692*6777b538SAndroid Build Coastguard Worker testUserEncoding(void) {
693*6777b538SAndroid Build Coastguard Worker /*
694*6777b538SAndroid Build Coastguard Worker * Create a document encoded as UTF-16LE with an ISO-8859-1 encoding
695*6777b538SAndroid Build Coastguard Worker * declaration, then parse it with xmlReadMemory and the encoding
696*6777b538SAndroid Build Coastguard Worker * argument set to UTF-16LE.
697*6777b538SAndroid Build Coastguard Worker */
698*6777b538SAndroid Build Coastguard Worker xmlDocPtr doc = NULL;
699*6777b538SAndroid Build Coastguard Worker const char *start = "<?xml version='1.0' encoding='ISO-8859-1'?><d>";
700*6777b538SAndroid Build Coastguard Worker const char *end = "</d>";
701*6777b538SAndroid Build Coastguard Worker char *buf = NULL;
702*6777b538SAndroid Build Coastguard Worker xmlChar *text;
703*6777b538SAndroid Build Coastguard Worker int startSize = strlen(start);
704*6777b538SAndroid Build Coastguard Worker int textSize = 100000; /* Make sure to exceed internal buffer sizes. */
705*6777b538SAndroid Build Coastguard Worker int endSize = strlen(end);
706*6777b538SAndroid Build Coastguard Worker int totalSize = startSize + textSize + endSize;
707*6777b538SAndroid Build Coastguard Worker int k = 0;
708*6777b538SAndroid Build Coastguard Worker int i;
709*6777b538SAndroid Build Coastguard Worker int ret = 1;
710*6777b538SAndroid Build Coastguard Worker
711*6777b538SAndroid Build Coastguard Worker buf = xmlMalloc(2 * totalSize);
712*6777b538SAndroid Build Coastguard Worker for (i = 0; start[i] != 0; i++) {
713*6777b538SAndroid Build Coastguard Worker buf[k++] = start[i];
714*6777b538SAndroid Build Coastguard Worker buf[k++] = 0;
715*6777b538SAndroid Build Coastguard Worker }
716*6777b538SAndroid Build Coastguard Worker for (i = 0; i < textSize; i++) {
717*6777b538SAndroid Build Coastguard Worker buf[k++] = 'x';
718*6777b538SAndroid Build Coastguard Worker buf[k++] = 0;
719*6777b538SAndroid Build Coastguard Worker }
720*6777b538SAndroid Build Coastguard Worker for (i = 0; end[i] != 0; i++) {
721*6777b538SAndroid Build Coastguard Worker buf[k++] = end[i];
722*6777b538SAndroid Build Coastguard Worker buf[k++] = 0;
723*6777b538SAndroid Build Coastguard Worker }
724*6777b538SAndroid Build Coastguard Worker
725*6777b538SAndroid Build Coastguard Worker doc = xmlReadMemory(buf, 2 * totalSize, NULL, "UTF-16LE", 0);
726*6777b538SAndroid Build Coastguard Worker if (doc == NULL) {
727*6777b538SAndroid Build Coastguard Worker fprintf(stderr, "failed to parse document\n");
728*6777b538SAndroid Build Coastguard Worker goto error;
729*6777b538SAndroid Build Coastguard Worker }
730*6777b538SAndroid Build Coastguard Worker
731*6777b538SAndroid Build Coastguard Worker text = doc->children->children->content;
732*6777b538SAndroid Build Coastguard Worker for (i = 0; i < textSize; i++) {
733*6777b538SAndroid Build Coastguard Worker if (text[i] != 'x') {
734*6777b538SAndroid Build Coastguard Worker fprintf(stderr, "text node has wrong content at offset %d\n", k);
735*6777b538SAndroid Build Coastguard Worker goto error;
736*6777b538SAndroid Build Coastguard Worker }
737*6777b538SAndroid Build Coastguard Worker }
738*6777b538SAndroid Build Coastguard Worker
739*6777b538SAndroid Build Coastguard Worker ret = 0;
740*6777b538SAndroid Build Coastguard Worker
741*6777b538SAndroid Build Coastguard Worker error:
742*6777b538SAndroid Build Coastguard Worker xmlFreeDoc(doc);
743*6777b538SAndroid Build Coastguard Worker xmlFree(buf);
744*6777b538SAndroid Build Coastguard Worker
745*6777b538SAndroid Build Coastguard Worker return ret;
746*6777b538SAndroid Build Coastguard Worker }
747*6777b538SAndroid Build Coastguard Worker
748*6777b538SAndroid Build Coastguard Worker #if defined(LIBXML_PUSH_ENABLED) && defined(LIBXML_OUTPUT_ENABLED)
749*6777b538SAndroid Build Coastguard Worker
750*6777b538SAndroid Build Coastguard Worker static char *
convert(xmlCharEncodingHandlerPtr handler,const char * utf8,int size,int * outSize)751*6777b538SAndroid Build Coastguard Worker convert(xmlCharEncodingHandlerPtr handler, const char *utf8, int size,
752*6777b538SAndroid Build Coastguard Worker int *outSize) {
753*6777b538SAndroid Build Coastguard Worker char *ret;
754*6777b538SAndroid Build Coastguard Worker int inlen;
755*6777b538SAndroid Build Coastguard Worker int res;
756*6777b538SAndroid Build Coastguard Worker
757*6777b538SAndroid Build Coastguard Worker inlen = size;
758*6777b538SAndroid Build Coastguard Worker *outSize = size * 2;
759*6777b538SAndroid Build Coastguard Worker ret = xmlMalloc(*outSize);
760*6777b538SAndroid Build Coastguard Worker if (ret == NULL)
761*6777b538SAndroid Build Coastguard Worker return(NULL);
762*6777b538SAndroid Build Coastguard Worker res = handler->output(BAD_CAST ret, outSize, BAD_CAST utf8, &inlen);
763*6777b538SAndroid Build Coastguard Worker if ((res < 0) || (inlen != size)) {
764*6777b538SAndroid Build Coastguard Worker xmlFree(ret);
765*6777b538SAndroid Build Coastguard Worker return(NULL);
766*6777b538SAndroid Build Coastguard Worker }
767*6777b538SAndroid Build Coastguard Worker
768*6777b538SAndroid Build Coastguard Worker return(ret);
769*6777b538SAndroid Build Coastguard Worker }
770*6777b538SAndroid Build Coastguard Worker
771*6777b538SAndroid Build Coastguard Worker static int
testUserEncodingPush(void)772*6777b538SAndroid Build Coastguard Worker testUserEncodingPush(void) {
773*6777b538SAndroid Build Coastguard Worker xmlCharEncodingHandlerPtr handler;
774*6777b538SAndroid Build Coastguard Worker xmlParserCtxtPtr ctxt;
775*6777b538SAndroid Build Coastguard Worker xmlDocPtr doc;
776*6777b538SAndroid Build Coastguard Worker char buf[] =
777*6777b538SAndroid Build Coastguard Worker "\xEF\xBB\xBF"
778*6777b538SAndroid Build Coastguard Worker "<?xml version='1.0' encoding='ISO-8859-1'?>\n"
779*6777b538SAndroid Build Coastguard Worker "<d>text</d>\n";
780*6777b538SAndroid Build Coastguard Worker char *utf16;
781*6777b538SAndroid Build Coastguard Worker int utf16Size;
782*6777b538SAndroid Build Coastguard Worker int ret = 1;
783*6777b538SAndroid Build Coastguard Worker
784*6777b538SAndroid Build Coastguard Worker handler = xmlGetCharEncodingHandler(XML_CHAR_ENCODING_UTF16LE);
785*6777b538SAndroid Build Coastguard Worker utf16 = convert(handler, buf, sizeof(buf) - 1, &utf16Size);
786*6777b538SAndroid Build Coastguard Worker ctxt = xmlCreatePushParserCtxt(NULL, NULL, NULL, 0, NULL);
787*6777b538SAndroid Build Coastguard Worker xmlSwitchEncoding(ctxt, XML_CHAR_ENCODING_UTF16LE);
788*6777b538SAndroid Build Coastguard Worker xmlParseChunk(ctxt, utf16, utf16Size, 0);
789*6777b538SAndroid Build Coastguard Worker xmlParseChunk(ctxt, NULL, 0, 1);
790*6777b538SAndroid Build Coastguard Worker doc = ctxt->myDoc;
791*6777b538SAndroid Build Coastguard Worker
792*6777b538SAndroid Build Coastguard Worker if ((doc != NULL) &&
793*6777b538SAndroid Build Coastguard Worker (doc->children != NULL) &&
794*6777b538SAndroid Build Coastguard Worker (doc->children->children != NULL) &&
795*6777b538SAndroid Build Coastguard Worker (xmlStrcmp(doc->children->children->content, BAD_CAST "text") == 0))
796*6777b538SAndroid Build Coastguard Worker ret = 0;
797*6777b538SAndroid Build Coastguard Worker
798*6777b538SAndroid Build Coastguard Worker xmlFreeDoc(doc);
799*6777b538SAndroid Build Coastguard Worker xmlFreeParserCtxt(ctxt);
800*6777b538SAndroid Build Coastguard Worker xmlFree(utf16);
801*6777b538SAndroid Build Coastguard Worker
802*6777b538SAndroid Build Coastguard Worker return(ret);
803*6777b538SAndroid Build Coastguard Worker }
804*6777b538SAndroid Build Coastguard Worker
805*6777b538SAndroid Build Coastguard Worker static int
testUTF8Chunks(void)806*6777b538SAndroid Build Coastguard Worker testUTF8Chunks(void) {
807*6777b538SAndroid Build Coastguard Worker xmlParserCtxtPtr ctxt;
808*6777b538SAndroid Build Coastguard Worker xmlChar *out;
809*6777b538SAndroid Build Coastguard Worker int outSize;
810*6777b538SAndroid Build Coastguard Worker char *buf;
811*6777b538SAndroid Build Coastguard Worker int i;
812*6777b538SAndroid Build Coastguard Worker int ret = 0;
813*6777b538SAndroid Build Coastguard Worker
814*6777b538SAndroid Build Coastguard Worker ctxt = xmlCreatePushParserCtxt(NULL, NULL, NULL, 0, NULL);
815*6777b538SAndroid Build Coastguard Worker
816*6777b538SAndroid Build Coastguard Worker xmlParseChunk(ctxt, "<d>", 3, 0);
817*6777b538SAndroid Build Coastguard Worker xmlParseChunk(ctxt, "\xF0", 1, 0);
818*6777b538SAndroid Build Coastguard Worker xmlParseChunk(ctxt, "\x9F", 1, 0);
819*6777b538SAndroid Build Coastguard Worker xmlParseChunk(ctxt, "\x98", 1, 0);
820*6777b538SAndroid Build Coastguard Worker xmlParseChunk(ctxt, "\x8A", 1, 0);
821*6777b538SAndroid Build Coastguard Worker xmlParseChunk(ctxt, "</d>", 4, 1);
822*6777b538SAndroid Build Coastguard Worker
823*6777b538SAndroid Build Coastguard Worker xmlDocDumpMemory(ctxt->myDoc, &out, &outSize);
824*6777b538SAndroid Build Coastguard Worker if (strcmp((char *) out,
825*6777b538SAndroid Build Coastguard Worker "<?xml version=\"1.0\"?>\n<d>😊</d>\n") != 0) {
826*6777b538SAndroid Build Coastguard Worker fprintf(stderr, "failed UTF-8 chunk test 1\n");
827*6777b538SAndroid Build Coastguard Worker ret += 1;
828*6777b538SAndroid Build Coastguard Worker }
829*6777b538SAndroid Build Coastguard Worker
830*6777b538SAndroid Build Coastguard Worker xmlFree(out);
831*6777b538SAndroid Build Coastguard Worker xmlFreeDoc(ctxt->myDoc);
832*6777b538SAndroid Build Coastguard Worker xmlFreeParserCtxt(ctxt);
833*6777b538SAndroid Build Coastguard Worker
834*6777b538SAndroid Build Coastguard Worker ctxt = xmlCreatePushParserCtxt(NULL, NULL, NULL, 0, NULL);
835*6777b538SAndroid Build Coastguard Worker
836*6777b538SAndroid Build Coastguard Worker xmlParseChunk(ctxt, "<d>", 3, 0);
837*6777b538SAndroid Build Coastguard Worker
838*6777b538SAndroid Build Coastguard Worker /*
839*6777b538SAndroid Build Coastguard Worker * Create a chunk longer than XML_PARSER_BIG_BUFFER_SIZE (300) ending
840*6777b538SAndroid Build Coastguard Worker * with an incomplete UTF-8 sequence.
841*6777b538SAndroid Build Coastguard Worker */
842*6777b538SAndroid Build Coastguard Worker buf = xmlMalloc(1000 * 2 + 1);
843*6777b538SAndroid Build Coastguard Worker for (i = 0; i < 2000; i += 2)
844*6777b538SAndroid Build Coastguard Worker memcpy(buf + i, "\xCE\xB1", 2);
845*6777b538SAndroid Build Coastguard Worker buf[i] = '\xCE';
846*6777b538SAndroid Build Coastguard Worker xmlParseChunk(ctxt, buf, 2001, 0);
847*6777b538SAndroid Build Coastguard Worker xmlFree(buf);
848*6777b538SAndroid Build Coastguard Worker
849*6777b538SAndroid Build Coastguard Worker xmlParseChunk(ctxt, "\xB1</d>", 4, 0);
850*6777b538SAndroid Build Coastguard Worker xmlParseChunk(ctxt, NULL, 0, 0);
851*6777b538SAndroid Build Coastguard Worker
852*6777b538SAndroid Build Coastguard Worker xmlDocDumpMemory(ctxt->myDoc, &out, &outSize);
853*6777b538SAndroid Build Coastguard Worker if (strncmp((char *) out, "<?xml version=\"1.0\"?>\n<d>", 25) != 0) {
854*6777b538SAndroid Build Coastguard Worker fprintf(stderr, "failed UTF-8 chunk test 2-1\n");
855*6777b538SAndroid Build Coastguard Worker ret += 1;
856*6777b538SAndroid Build Coastguard Worker goto error;
857*6777b538SAndroid Build Coastguard Worker }
858*6777b538SAndroid Build Coastguard Worker for (i = 25; i < 25 + 1001 * 7; i += 7) {
859*6777b538SAndroid Build Coastguard Worker if (memcmp(out + i, "α", 7) != 0) {
860*6777b538SAndroid Build Coastguard Worker fprintf(stderr, "failed UTF-8 chunk test 2-2 %d\n", i);
861*6777b538SAndroid Build Coastguard Worker ret += 1;
862*6777b538SAndroid Build Coastguard Worker goto error;
863*6777b538SAndroid Build Coastguard Worker }
864*6777b538SAndroid Build Coastguard Worker }
865*6777b538SAndroid Build Coastguard Worker if (strcmp((char *) out + i, "</d>\n") != 0) {
866*6777b538SAndroid Build Coastguard Worker fprintf(stderr, "failed UTF-8 chunk test 2-3\n");
867*6777b538SAndroid Build Coastguard Worker ret += 1;
868*6777b538SAndroid Build Coastguard Worker goto error;
869*6777b538SAndroid Build Coastguard Worker }
870*6777b538SAndroid Build Coastguard Worker
871*6777b538SAndroid Build Coastguard Worker error:
872*6777b538SAndroid Build Coastguard Worker xmlFree(out);
873*6777b538SAndroid Build Coastguard Worker xmlFreeDoc(ctxt->myDoc);
874*6777b538SAndroid Build Coastguard Worker xmlFreeParserCtxt(ctxt);
875*6777b538SAndroid Build Coastguard Worker
876*6777b538SAndroid Build Coastguard Worker return(ret);
877*6777b538SAndroid Build Coastguard Worker return(0);
878*6777b538SAndroid Build Coastguard Worker }
879*6777b538SAndroid Build Coastguard Worker
880*6777b538SAndroid Build Coastguard Worker #endif
881*6777b538SAndroid Build Coastguard Worker
main(void)882*6777b538SAndroid Build Coastguard Worker int main(void) {
883*6777b538SAndroid Build Coastguard Worker
884*6777b538SAndroid Build Coastguard Worker int ret = 0;
885*6777b538SAndroid Build Coastguard Worker
886*6777b538SAndroid Build Coastguard Worker /*
887*6777b538SAndroid Build Coastguard Worker * this initialize the library and check potential ABI mismatches
888*6777b538SAndroid Build Coastguard Worker * between the version it was compiled for and the actual shared
889*6777b538SAndroid Build Coastguard Worker * library used.
890*6777b538SAndroid Build Coastguard Worker */
891*6777b538SAndroid Build Coastguard Worker LIBXML_TEST_VERSION
892*6777b538SAndroid Build Coastguard Worker
893*6777b538SAndroid Build Coastguard Worker /*
894*6777b538SAndroid Build Coastguard Worker * Catch errors separately
895*6777b538SAndroid Build Coastguard Worker */
896*6777b538SAndroid Build Coastguard Worker
897*6777b538SAndroid Build Coastguard Worker xmlSetStructuredErrorFunc(NULL, errorHandler);
898*6777b538SAndroid Build Coastguard Worker
899*6777b538SAndroid Build Coastguard Worker /*
900*6777b538SAndroid Build Coastguard Worker * Run the tests
901*6777b538SAndroid Build Coastguard Worker */
902*6777b538SAndroid Build Coastguard Worker ret += testCharRanges();
903*6777b538SAndroid Build Coastguard Worker ret += testDocumentRanges();
904*6777b538SAndroid Build Coastguard Worker ret += testUserEncoding();
905*6777b538SAndroid Build Coastguard Worker #if defined(LIBXML_PUSH_ENABLED) && defined(LIBXML_OUTPUT_ENABLED)
906*6777b538SAndroid Build Coastguard Worker ret += testUserEncodingPush();
907*6777b538SAndroid Build Coastguard Worker ret += testUTF8Chunks();
908*6777b538SAndroid Build Coastguard Worker #endif
909*6777b538SAndroid Build Coastguard Worker
910*6777b538SAndroid Build Coastguard Worker /*
911*6777b538SAndroid Build Coastguard Worker * Cleanup function for the XML library.
912*6777b538SAndroid Build Coastguard Worker */
913*6777b538SAndroid Build Coastguard Worker xmlCleanupParser();
914*6777b538SAndroid Build Coastguard Worker return(ret ? 1 : 0);
915*6777b538SAndroid Build Coastguard Worker }
916