1 /*
2 * xmlSeed.c: Generate the XML seed corpus for fuzzing.
3 *
4 * See Copyright for the status of this software.
5 */
6
7 #include <stdio.h>
8 #include <string.h>
9 #include <glob.h>
10 #include <libgen.h>
11 #include <sys/stat.h>
12
13 #ifdef _WIN32
14 #include <direct.h>
15 #else
16 #include <unistd.h>
17 #endif
18
19 #include <libxml/parser.h>
20 #include <libxml/parserInternals.h>
21 #include <libxml/HTMLparser.h>
22 #include <libxml/xinclude.h>
23 #include <libxml/xmlschemas.h>
24 #include "fuzz.h"
25
26 #define PATH_SIZE 500
27 #define SEED_BUF_SIZE 16384
28 #define EXPR_SIZE 4500
29
30 #define FLAG_READER (1 << 0)
31 #define FLAG_LINT (1 << 1)
32
33 typedef int
34 (*fileFunc)(const char *base, FILE *out);
35
36 typedef int
37 (*mainFunc)(const char *arg);
38
39 static struct {
40 FILE *out;
41 xmlHashTablePtr entities; /* Maps URLs to xmlFuzzEntityInfos */
42 xmlExternalEntityLoader oldLoader;
43 fileFunc processFile;
44 const char *fuzzer;
45 int counter;
46 char cwd[PATH_SIZE];
47 int flags;
48 } globalData;
49
50 #if defined(HAVE_SCHEMA_FUZZER) || \
51 defined(HAVE_XML_FUZZER)
52 /*
53 * A custom resource loader that writes all external DTDs or entities to a
54 * single file in the format expected by xmlFuzzResourceLoader.
55 */
56 static int
fuzzResourceRecorder(void * data ATTRIBUTE_UNUSED,const char * URL,const char * ID ATTRIBUTE_UNUSED,xmlResourceType type ATTRIBUTE_UNUSED,int flags,xmlParserInputPtr * out)57 fuzzResourceRecorder(void *data ATTRIBUTE_UNUSED, const char *URL,
58 const char *ID ATTRIBUTE_UNUSED,
59 xmlResourceType type ATTRIBUTE_UNUSED, int flags,
60 xmlParserInputPtr *out) {
61 xmlParserInputPtr in;
62 static const int chunkSize = 16384;
63 int code, len;
64
65 *out = NULL;
66
67 code = xmlNewInputFromUrl(URL, flags, &in);
68 if (code != XML_ERR_OK)
69 return(code);
70
71 if (globalData.entities == NULL) {
72 globalData.entities = xmlHashCreate(4);
73 } else if (xmlHashLookup(globalData.entities,
74 (const xmlChar *) URL) != NULL) {
75 *out = in;
76 return(XML_ERR_OK);
77 }
78
79 do {
80 len = xmlParserInputGrow(in, chunkSize);
81 if (len < 0) {
82 fprintf(stderr, "Error reading %s\n", URL);
83 xmlFreeInputStream(in);
84 return(in->buf->error);
85 }
86 } while (len > 0);
87
88 xmlFuzzWriteString(globalData.out, URL);
89 xmlFuzzWriteString(globalData.out,
90 (char *) xmlBufContent(in->buf->buffer));
91
92 xmlFreeInputStream(in);
93
94 xmlHashAddEntry(globalData.entities, (const xmlChar *) URL,
95 globalData.entities);
96
97 return(xmlNewInputFromUrl(URL, flags, out));
98 }
99
100 static void
fuzzRecorderInit(FILE * out)101 fuzzRecorderInit(FILE *out) {
102 globalData.out = out;
103 globalData.entities = xmlHashCreate(8);
104 globalData.oldLoader = xmlGetExternalEntityLoader();
105 }
106
107 static void
fuzzRecorderCleanup(void)108 fuzzRecorderCleanup(void) {
109 xmlHashFree(globalData.entities, NULL);
110 globalData.out = NULL;
111 globalData.entities = NULL;
112 globalData.oldLoader = NULL;
113 }
114 #endif
115
116 #ifdef HAVE_XML_FUZZER
117 static int
processXml(const char * docFile,FILE * out)118 processXml(const char *docFile, FILE *out) {
119 int opts = XML_PARSE_NOENT | XML_PARSE_DTDLOAD;
120 xmlParserCtxtPtr ctxt;
121 xmlDocPtr doc;
122
123 if (globalData.flags & FLAG_LINT) {
124 /* Switches */
125 xmlFuzzWriteInt(out, 0, 4);
126 xmlFuzzWriteInt(out, 0, 4);
127 /* maxmem */
128 xmlFuzzWriteInt(out, 0, 4);
129 /* max-ampl */
130 xmlFuzzWriteInt(out, 0, 1);
131 /* pretty */
132 xmlFuzzWriteInt(out, 0, 1);
133 /* encode */
134 xmlFuzzWriteString(out, "");
135 /* pattern */
136 xmlFuzzWriteString(out, "");
137 /* xpath */
138 xmlFuzzWriteString(out, "");
139 } else {
140 /* Parser options. */
141 xmlFuzzWriteInt(out, opts, 4);
142 /* Max allocations. */
143 xmlFuzzWriteInt(out, 0, 4);
144
145 if (globalData.flags & FLAG_READER) {
146 /* Initial reader program with a couple of OP_READs */
147 xmlFuzzWriteString(out, "\x01\x01\x01\x01\x01\x01\x01\x01");
148 }
149 }
150
151 fuzzRecorderInit(out);
152
153 ctxt = xmlNewParserCtxt();
154 xmlCtxtSetErrorHandler(ctxt, xmlFuzzSErrorFunc, NULL);
155 xmlCtxtSetResourceLoader(ctxt, fuzzResourceRecorder, NULL);
156 doc = xmlCtxtReadFile(ctxt, docFile, NULL, opts);
157 #ifdef LIBXML_XINCLUDE_ENABLED
158 {
159 xmlXIncludeCtxtPtr xinc = xmlXIncludeNewContext(doc);
160
161 xmlXIncludeSetErrorHandler(xinc, xmlFuzzSErrorFunc, NULL);
162 xmlXIncludeSetResourceLoader(xinc, fuzzResourceRecorder, NULL);
163 xmlXIncludeSetFlags(xinc, opts);
164 xmlXIncludeProcessNode(xinc, (xmlNodePtr) doc);
165 xmlXIncludeFreeContext(xinc);
166 }
167 #endif
168 xmlFreeDoc(doc);
169 xmlFreeParserCtxt(ctxt);
170
171 fuzzRecorderCleanup();
172
173 return(0);
174 }
175 #endif
176
177 #ifdef HAVE_HTML_FUZZER
178 static int
processHtml(const char * docFile,FILE * out)179 processHtml(const char *docFile, FILE *out) {
180 char buf[SEED_BUF_SIZE];
181 FILE *file;
182 size_t size;
183
184 /* Parser options. */
185 xmlFuzzWriteInt(out, 0, 4);
186 /* Max allocations. */
187 xmlFuzzWriteInt(out, 0, 4);
188
189 /* Copy file */
190 file = fopen(docFile, "rb");
191 if (file == NULL) {
192 fprintf(stderr, "couldn't open %s\n", docFile);
193 return(0);
194 }
195 do {
196 size = fread(buf, 1, SEED_BUF_SIZE, file);
197 if (size > 0)
198 fwrite(buf, 1, size, out);
199 } while (size == SEED_BUF_SIZE);
200 fclose(file);
201
202 return(0);
203 }
204 #endif
205
206 #ifdef HAVE_SCHEMA_FUZZER
207 static int
processSchema(const char * docFile,FILE * out)208 processSchema(const char *docFile, FILE *out) {
209 xmlSchemaPtr schema;
210 xmlSchemaParserCtxtPtr pctxt;
211
212 /* Max allocations. */
213 xmlFuzzWriteInt(out, 0, 4);
214
215 fuzzRecorderInit(out);
216
217 pctxt = xmlSchemaNewParserCtxt(docFile);
218 xmlSchemaSetParserStructuredErrors(pctxt, xmlFuzzSErrorFunc, NULL);
219 xmlSchemaSetResourceLoader(pctxt, fuzzResourceRecorder, NULL);
220 schema = xmlSchemaParse(pctxt);
221 xmlSchemaFreeParserCtxt(pctxt);
222 xmlSchemaFree(schema);
223
224 fuzzRecorderCleanup();
225
226 return(0);
227 }
228 #endif
229
230 #if defined(HAVE_HTML_FUZZER) || \
231 defined(HAVE_SCHEMA_FUZZER) || \
232 defined(HAVE_XML_FUZZER)
233 static int
processPattern(const char * pattern)234 processPattern(const char *pattern) {
235 glob_t globbuf;
236 int ret = 0;
237 int res;
238 size_t i;
239
240 res = glob(pattern, 0, NULL, &globbuf);
241 if (res == GLOB_NOMATCH)
242 return(0);
243 if (res != 0) {
244 fprintf(stderr, "couldn't match pattern %s\n", pattern);
245 return(-1);
246 }
247
248 for (i = 0; i < globbuf.gl_pathc; i++) {
249 struct stat statbuf;
250 char outPath[PATH_SIZE];
251 char *dirBuf = NULL;
252 char *baseBuf = NULL;
253 const char *path, *dir, *base;
254 FILE *out = NULL;
255 int dirChanged = 0;
256 size_t size;
257
258 path = globbuf.gl_pathv[i];
259
260 if ((stat(path, &statbuf) != 0) || (!S_ISREG(statbuf.st_mode)))
261 continue;
262
263 dirBuf = (char *) xmlCharStrdup(path);
264 baseBuf = (char *) xmlCharStrdup(path);
265 if ((dirBuf == NULL) || (baseBuf == NULL)) {
266 fprintf(stderr, "memory allocation failed\n");
267 ret = -1;
268 goto error;
269 }
270 dir = dirname(dirBuf);
271 base = basename(baseBuf);
272
273 size = snprintf(outPath, sizeof(outPath), "seed/%s/%s",
274 globalData.fuzzer, base);
275 if (size >= PATH_SIZE) {
276 fprintf(stderr, "creating path failed\n");
277 ret = -1;
278 goto error;
279 }
280 out = fopen(outPath, "wb");
281 if (out == NULL) {
282 fprintf(stderr, "couldn't open %s for writing\n", outPath);
283 ret = -1;
284 goto error;
285 }
286 if (chdir(dir) != 0) {
287 fprintf(stderr, "couldn't chdir to %s\n", dir);
288 ret = -1;
289 goto error;
290 }
291 dirChanged = 1;
292 if (globalData.processFile(base, out) != 0)
293 ret = -1;
294
295 error:
296 if (out != NULL)
297 fclose(out);
298 xmlFree(dirBuf);
299 xmlFree(baseBuf);
300 if ((dirChanged) && (chdir(globalData.cwd) != 0)) {
301 fprintf(stderr, "couldn't chdir to %s\n", globalData.cwd);
302 ret = -1;
303 break;
304 }
305 }
306
307 globfree(&globbuf);
308 return(ret);
309 }
310 #endif
311
312 #ifdef HAVE_XPATH_FUZZER
313 static int
processXPath(const char * testDir,const char * prefix,const char * name,const char * data,const char * subdir,int xptr)314 processXPath(const char *testDir, const char *prefix, const char *name,
315 const char *data, const char *subdir, int xptr) {
316 char pattern[PATH_SIZE];
317 glob_t globbuf;
318 size_t i, size;
319 int ret = 0, res;
320
321 size = snprintf(pattern, sizeof(pattern), "%s/%s/%s*",
322 testDir, subdir, prefix);
323 if (size >= PATH_SIZE)
324 return(-1);
325 res = glob(pattern, 0, NULL, &globbuf);
326 if (res == GLOB_NOMATCH)
327 return(0);
328 if (res != 0) {
329 fprintf(stderr, "couldn't match pattern %s\n", pattern);
330 return(-1);
331 }
332
333 for (i = 0; i < globbuf.gl_pathc; i++) {
334 char *path = globbuf.gl_pathv[i];
335 struct stat statbuf;
336 FILE *in;
337 char expr[EXPR_SIZE];
338
339 if ((stat(path, &statbuf) != 0) || (!S_ISREG(statbuf.st_mode)))
340 continue;
341
342 in = fopen(path, "rb");
343 if (in == NULL) {
344 ret = -1;
345 continue;
346 }
347
348 while (fgets(expr, EXPR_SIZE, in) != NULL) {
349 char outPath[PATH_SIZE];
350 FILE *out;
351 int j;
352
353 for (j = 0; expr[j] != 0; j++)
354 if (expr[j] == '\r' || expr[j] == '\n')
355 break;
356 expr[j] = 0;
357
358 size = snprintf(outPath, sizeof(outPath), "seed/xpath/%s-%d",
359 name, globalData.counter);
360 if (size >= PATH_SIZE) {
361 ret = -1;
362 continue;
363 }
364 out = fopen(outPath, "wb");
365 if (out == NULL) {
366 ret = -1;
367 continue;
368 }
369
370 /* Max allocations. */
371 xmlFuzzWriteInt(out, 0, 4);
372
373 if (xptr) {
374 xmlFuzzWriteString(out, expr);
375 } else {
376 char xptrExpr[EXPR_SIZE+100];
377
378 /* Wrap XPath expressions as XPointer */
379 snprintf(xptrExpr, sizeof(xptrExpr), "xpointer(%s)", expr);
380 xmlFuzzWriteString(out, xptrExpr);
381 }
382
383 xmlFuzzWriteString(out, data);
384
385 fclose(out);
386 globalData.counter++;
387 }
388
389 fclose(in);
390 }
391
392 globfree(&globbuf);
393
394 return(ret);
395 }
396
397 static int
processXPathDir(const char * testDir)398 processXPathDir(const char *testDir) {
399 char pattern[PATH_SIZE];
400 glob_t globbuf;
401 size_t i, size;
402 int ret = 0;
403
404 globalData.counter = 1;
405 if (processXPath(testDir, "", "expr", "<d></d>", "expr", 0) != 0)
406 ret = -1;
407
408 size = snprintf(pattern, sizeof(pattern), "%s/docs/*", testDir);
409 if (size >= PATH_SIZE)
410 return(1);
411 if (glob(pattern, 0, NULL, &globbuf) != 0)
412 return(1);
413
414 for (i = 0; i < globbuf.gl_pathc; i++) {
415 char *path = globbuf.gl_pathv[i];
416 char *data;
417 const char *docFile;
418
419 data = xmlSlurpFile(path, NULL);
420 if (data == NULL) {
421 ret = -1;
422 continue;
423 }
424 docFile = basename(path);
425
426 globalData.counter = 1;
427 if (processXPath(testDir, docFile, docFile, data, "tests", 0) != 0)
428 ret = -1;
429 if (processXPath(testDir, docFile, docFile, data, "xptr", 1) != 0)
430 ret = -1;
431 if (processXPath(testDir, docFile, docFile, data, "xptr-xp1", 1) != 0)
432 ret = -1;
433
434 xmlFree(data);
435 }
436
437 globfree(&globbuf);
438
439 return(ret);
440 }
441 #endif
442
443 int
main(int argc,const char ** argv)444 main(int argc, const char **argv) {
445 mainFunc processArg = NULL;
446 const char *fuzzer;
447 int ret = 0;
448 int i;
449
450 if (argc < 3) {
451 fprintf(stderr, "usage: seed [FUZZER] [PATTERN...]\n");
452 return(1);
453 }
454
455 fuzzer = argv[1];
456 if (strcmp(fuzzer, "html") == 0) {
457 #ifdef HAVE_HTML_FUZZER
458 processArg = processPattern;
459 globalData.processFile = processHtml;
460 #endif
461 } else if (strcmp(fuzzer, "lint") == 0) {
462 #ifdef HAVE_LINT_FUZZER
463 processArg = processPattern;
464 globalData.flags |= FLAG_LINT;
465 globalData.processFile = processXml;
466 #endif
467 } else if (strcmp(fuzzer, "reader") == 0) {
468 #ifdef HAVE_READER_FUZZER
469 processArg = processPattern;
470 globalData.flags |= FLAG_READER;
471 globalData.processFile = processXml;
472 #endif
473 } else if (strcmp(fuzzer, "schema") == 0) {
474 #ifdef HAVE_SCHEMA_FUZZER
475 processArg = processPattern;
476 globalData.processFile = processSchema;
477 #endif
478 } else if (strcmp(fuzzer, "valid") == 0) {
479 #ifdef HAVE_VALID_FUZZER
480 processArg = processPattern;
481 globalData.processFile = processXml;
482 #endif
483 } else if (strcmp(fuzzer, "xinclude") == 0) {
484 #ifdef HAVE_XINCLUDE_FUZZER
485 processArg = processPattern;
486 globalData.processFile = processXml;
487 #endif
488 } else if (strcmp(fuzzer, "xml") == 0) {
489 #ifdef HAVE_XML_FUZZER
490 processArg = processPattern;
491 globalData.processFile = processXml;
492 #endif
493 } else if (strcmp(fuzzer, "xpath") == 0) {
494 #ifdef HAVE_XPATH_FUZZER
495 processArg = processXPathDir;
496 #endif
497 } else {
498 fprintf(stderr, "unknown fuzzer %s\n", fuzzer);
499 return(1);
500 }
501 globalData.fuzzer = fuzzer;
502
503 if (getcwd(globalData.cwd, PATH_SIZE) == NULL) {
504 fprintf(stderr, "couldn't get current directory\n");
505 return(1);
506 }
507
508 if (processArg != NULL)
509 for (i = 2; i < argc; i++)
510 processArg(argv[i]);
511
512 return(ret);
513 }
514
515