xref: /aosp_15_r20/external/libxml2/fuzz/genSeed.c (revision 7c5688314b92172186c154356a6374bf7684c3ca)
1 /*
2  * xmlSeed.c: Generate the XML seed corpus for fuzzing.
3  *
4  * See Copyright for the status of this software.
5  */
6 
7 #include <stdio.h>
8 #include <string.h>
9 #include <glob.h>
10 #include <libgen.h>
11 #include <sys/stat.h>
12 
13 #ifdef _WIN32
14 #include <direct.h>
15 #else
16 #include <unistd.h>
17 #endif
18 
19 #include <libxml/parser.h>
20 #include <libxml/parserInternals.h>
21 #include <libxml/HTMLparser.h>
22 #include <libxml/xinclude.h>
23 #include <libxml/xmlschemas.h>
24 #include "fuzz.h"
25 
26 #define PATH_SIZE 500
27 #define SEED_BUF_SIZE 16384
28 #define EXPR_SIZE 4500
29 
30 #define FLAG_READER (1 << 0)
31 #define FLAG_LINT   (1 << 1)
32 
33 typedef int
34 (*fileFunc)(const char *base, FILE *out);
35 
36 typedef int
37 (*mainFunc)(const char *arg);
38 
39 static struct {
40     FILE *out;
41     xmlHashTablePtr entities; /* Maps URLs to xmlFuzzEntityInfos */
42     xmlExternalEntityLoader oldLoader;
43     fileFunc processFile;
44     const char *fuzzer;
45     int counter;
46     char cwd[PATH_SIZE];
47     int flags;
48 } globalData;
49 
50 #if defined(HAVE_SCHEMA_FUZZER) || \
51     defined(HAVE_XML_FUZZER)
52 /*
53  * A custom resource loader that writes all external DTDs or entities to a
54  * single file in the format expected by xmlFuzzResourceLoader.
55  */
56 static int
fuzzResourceRecorder(void * data ATTRIBUTE_UNUSED,const char * URL,const char * ID ATTRIBUTE_UNUSED,xmlResourceType type ATTRIBUTE_UNUSED,int flags,xmlParserInputPtr * out)57 fuzzResourceRecorder(void *data ATTRIBUTE_UNUSED, const char *URL,
58                      const char *ID ATTRIBUTE_UNUSED,
59                      xmlResourceType type ATTRIBUTE_UNUSED, int flags,
60                      xmlParserInputPtr *out) {
61     xmlParserInputPtr in;
62     static const int chunkSize = 16384;
63     int code, len;
64 
65     *out = NULL;
66 
67     code = xmlNewInputFromUrl(URL, flags, &in);
68     if (code != XML_ERR_OK)
69         return(code);
70 
71     if (globalData.entities == NULL) {
72         globalData.entities = xmlHashCreate(4);
73     } else if (xmlHashLookup(globalData.entities,
74                              (const xmlChar *) URL) != NULL) {
75         *out = in;
76         return(XML_ERR_OK);
77     }
78 
79     do {
80         len = xmlParserInputGrow(in, chunkSize);
81         if (len < 0) {
82             fprintf(stderr, "Error reading %s\n", URL);
83             xmlFreeInputStream(in);
84             return(in->buf->error);
85         }
86     } while (len > 0);
87 
88     xmlFuzzWriteString(globalData.out, URL);
89     xmlFuzzWriteString(globalData.out,
90                        (char *) xmlBufContent(in->buf->buffer));
91 
92     xmlFreeInputStream(in);
93 
94     xmlHashAddEntry(globalData.entities, (const xmlChar *) URL,
95                     globalData.entities);
96 
97     return(xmlNewInputFromUrl(URL, flags, out));
98 }
99 
100 static void
fuzzRecorderInit(FILE * out)101 fuzzRecorderInit(FILE *out) {
102     globalData.out = out;
103     globalData.entities = xmlHashCreate(8);
104     globalData.oldLoader = xmlGetExternalEntityLoader();
105 }
106 
107 static void
fuzzRecorderCleanup(void)108 fuzzRecorderCleanup(void) {
109     xmlHashFree(globalData.entities, NULL);
110     globalData.out = NULL;
111     globalData.entities = NULL;
112     globalData.oldLoader = NULL;
113 }
114 #endif
115 
116 #ifdef HAVE_XML_FUZZER
117 static int
processXml(const char * docFile,FILE * out)118 processXml(const char *docFile, FILE *out) {
119     int opts = XML_PARSE_NOENT | XML_PARSE_DTDLOAD;
120     xmlParserCtxtPtr ctxt;
121     xmlDocPtr doc;
122 
123     if (globalData.flags & FLAG_LINT) {
124         /* Switches */
125         xmlFuzzWriteInt(out, 0, 4);
126         xmlFuzzWriteInt(out, 0, 4);
127         /* maxmem */
128         xmlFuzzWriteInt(out, 0, 4);
129         /* max-ampl */
130         xmlFuzzWriteInt(out, 0, 1);
131         /* pretty */
132         xmlFuzzWriteInt(out, 0, 1);
133         /* encode */
134         xmlFuzzWriteString(out, "");
135         /* pattern */
136         xmlFuzzWriteString(out, "");
137         /* xpath */
138         xmlFuzzWriteString(out, "");
139     } else {
140         /* Parser options. */
141         xmlFuzzWriteInt(out, opts, 4);
142         /* Max allocations. */
143         xmlFuzzWriteInt(out, 0, 4);
144 
145         if (globalData.flags & FLAG_READER) {
146             /* Initial reader program with a couple of OP_READs */
147             xmlFuzzWriteString(out, "\x01\x01\x01\x01\x01\x01\x01\x01");
148         }
149     }
150 
151     fuzzRecorderInit(out);
152 
153     ctxt = xmlNewParserCtxt();
154     xmlCtxtSetErrorHandler(ctxt, xmlFuzzSErrorFunc, NULL);
155     xmlCtxtSetResourceLoader(ctxt, fuzzResourceRecorder, NULL);
156     doc = xmlCtxtReadFile(ctxt, docFile, NULL, opts);
157 #ifdef LIBXML_XINCLUDE_ENABLED
158     {
159         xmlXIncludeCtxtPtr xinc = xmlXIncludeNewContext(doc);
160 
161         xmlXIncludeSetErrorHandler(xinc, xmlFuzzSErrorFunc, NULL);
162         xmlXIncludeSetResourceLoader(xinc, fuzzResourceRecorder, NULL);
163         xmlXIncludeSetFlags(xinc, opts);
164         xmlXIncludeProcessNode(xinc, (xmlNodePtr) doc);
165         xmlXIncludeFreeContext(xinc);
166     }
167 #endif
168     xmlFreeDoc(doc);
169     xmlFreeParserCtxt(ctxt);
170 
171     fuzzRecorderCleanup();
172 
173     return(0);
174 }
175 #endif
176 
177 #ifdef HAVE_HTML_FUZZER
178 static int
processHtml(const char * docFile,FILE * out)179 processHtml(const char *docFile, FILE *out) {
180     char buf[SEED_BUF_SIZE];
181     FILE *file;
182     size_t size;
183 
184     /* Parser options. */
185     xmlFuzzWriteInt(out, 0, 4);
186     /* Max allocations. */
187     xmlFuzzWriteInt(out, 0, 4);
188 
189     /* Copy file */
190     file = fopen(docFile, "rb");
191     if (file == NULL) {
192         fprintf(stderr, "couldn't open %s\n", docFile);
193         return(0);
194     }
195     do {
196         size = fread(buf, 1, SEED_BUF_SIZE, file);
197         if (size > 0)
198             fwrite(buf, 1, size, out);
199     } while (size == SEED_BUF_SIZE);
200     fclose(file);
201 
202     return(0);
203 }
204 #endif
205 
206 #ifdef HAVE_SCHEMA_FUZZER
207 static int
processSchema(const char * docFile,FILE * out)208 processSchema(const char *docFile, FILE *out) {
209     xmlSchemaPtr schema;
210     xmlSchemaParserCtxtPtr pctxt;
211 
212     /* Max allocations. */
213     xmlFuzzWriteInt(out, 0, 4);
214 
215     fuzzRecorderInit(out);
216 
217     pctxt = xmlSchemaNewParserCtxt(docFile);
218     xmlSchemaSetParserStructuredErrors(pctxt, xmlFuzzSErrorFunc, NULL);
219     xmlSchemaSetResourceLoader(pctxt, fuzzResourceRecorder, NULL);
220     schema = xmlSchemaParse(pctxt);
221     xmlSchemaFreeParserCtxt(pctxt);
222     xmlSchemaFree(schema);
223 
224     fuzzRecorderCleanup();
225 
226     return(0);
227 }
228 #endif
229 
230 #if defined(HAVE_HTML_FUZZER) || \
231     defined(HAVE_SCHEMA_FUZZER) || \
232     defined(HAVE_XML_FUZZER)
233 static int
processPattern(const char * pattern)234 processPattern(const char *pattern) {
235     glob_t globbuf;
236     int ret = 0;
237     int res;
238     size_t i;
239 
240     res = glob(pattern, 0, NULL, &globbuf);
241     if (res == GLOB_NOMATCH)
242         return(0);
243     if (res != 0) {
244         fprintf(stderr, "couldn't match pattern %s\n", pattern);
245         return(-1);
246     }
247 
248     for (i = 0; i < globbuf.gl_pathc; i++) {
249         struct stat statbuf;
250         char outPath[PATH_SIZE];
251         char *dirBuf = NULL;
252         char *baseBuf = NULL;
253         const char *path, *dir, *base;
254         FILE *out = NULL;
255         int dirChanged = 0;
256         size_t size;
257 
258         path = globbuf.gl_pathv[i];
259 
260         if ((stat(path, &statbuf) != 0) || (!S_ISREG(statbuf.st_mode)))
261             continue;
262 
263         dirBuf = (char *) xmlCharStrdup(path);
264         baseBuf = (char *) xmlCharStrdup(path);
265         if ((dirBuf == NULL) || (baseBuf == NULL)) {
266             fprintf(stderr, "memory allocation failed\n");
267             ret = -1;
268             goto error;
269         }
270         dir = dirname(dirBuf);
271         base = basename(baseBuf);
272 
273         size = snprintf(outPath, sizeof(outPath), "seed/%s/%s",
274                         globalData.fuzzer, base);
275         if (size >= PATH_SIZE) {
276             fprintf(stderr, "creating path failed\n");
277             ret = -1;
278             goto error;
279         }
280         out = fopen(outPath, "wb");
281         if (out == NULL) {
282             fprintf(stderr, "couldn't open %s for writing\n", outPath);
283             ret = -1;
284             goto error;
285         }
286         if (chdir(dir) != 0) {
287             fprintf(stderr, "couldn't chdir to %s\n", dir);
288             ret = -1;
289             goto error;
290         }
291         dirChanged = 1;
292         if (globalData.processFile(base, out) != 0)
293             ret = -1;
294 
295 error:
296         if (out != NULL)
297             fclose(out);
298         xmlFree(dirBuf);
299         xmlFree(baseBuf);
300         if ((dirChanged) && (chdir(globalData.cwd) != 0)) {
301             fprintf(stderr, "couldn't chdir to %s\n", globalData.cwd);
302             ret = -1;
303             break;
304         }
305     }
306 
307     globfree(&globbuf);
308     return(ret);
309 }
310 #endif
311 
312 #ifdef HAVE_XPATH_FUZZER
313 static int
processXPath(const char * testDir,const char * prefix,const char * name,const char * data,const char * subdir,int xptr)314 processXPath(const char *testDir, const char *prefix, const char *name,
315              const char *data, const char *subdir, int xptr) {
316     char pattern[PATH_SIZE];
317     glob_t globbuf;
318     size_t i, size;
319     int ret = 0, res;
320 
321     size = snprintf(pattern, sizeof(pattern), "%s/%s/%s*",
322                     testDir, subdir, prefix);
323     if (size >= PATH_SIZE)
324         return(-1);
325     res = glob(pattern, 0, NULL, &globbuf);
326     if (res == GLOB_NOMATCH)
327         return(0);
328     if (res != 0) {
329         fprintf(stderr, "couldn't match pattern %s\n", pattern);
330         return(-1);
331     }
332 
333     for (i = 0; i < globbuf.gl_pathc; i++) {
334         char *path = globbuf.gl_pathv[i];
335         struct stat statbuf;
336         FILE *in;
337         char expr[EXPR_SIZE];
338 
339         if ((stat(path, &statbuf) != 0) || (!S_ISREG(statbuf.st_mode)))
340             continue;
341 
342         in = fopen(path, "rb");
343         if (in == NULL) {
344             ret = -1;
345             continue;
346         }
347 
348         while (fgets(expr, EXPR_SIZE, in) != NULL) {
349             char outPath[PATH_SIZE];
350             FILE *out;
351             int j;
352 
353             for (j = 0; expr[j] != 0; j++)
354                 if (expr[j] == '\r' || expr[j] == '\n')
355                     break;
356             expr[j] = 0;
357 
358             size = snprintf(outPath, sizeof(outPath), "seed/xpath/%s-%d",
359                             name, globalData.counter);
360             if (size >= PATH_SIZE) {
361                 ret = -1;
362                 continue;
363             }
364             out = fopen(outPath, "wb");
365             if (out == NULL) {
366                 ret = -1;
367                 continue;
368             }
369 
370             /* Max allocations. */
371             xmlFuzzWriteInt(out, 0, 4);
372 
373             if (xptr) {
374                 xmlFuzzWriteString(out, expr);
375             } else {
376                 char xptrExpr[EXPR_SIZE+100];
377 
378                 /* Wrap XPath expressions as XPointer */
379                 snprintf(xptrExpr, sizeof(xptrExpr), "xpointer(%s)", expr);
380                 xmlFuzzWriteString(out, xptrExpr);
381             }
382 
383             xmlFuzzWriteString(out, data);
384 
385             fclose(out);
386             globalData.counter++;
387         }
388 
389         fclose(in);
390     }
391 
392     globfree(&globbuf);
393 
394     return(ret);
395 }
396 
397 static int
processXPathDir(const char * testDir)398 processXPathDir(const char *testDir) {
399     char pattern[PATH_SIZE];
400     glob_t globbuf;
401     size_t i, size;
402     int ret = 0;
403 
404     globalData.counter = 1;
405     if (processXPath(testDir, "", "expr", "<d></d>", "expr", 0) != 0)
406         ret = -1;
407 
408     size = snprintf(pattern, sizeof(pattern), "%s/docs/*", testDir);
409     if (size >= PATH_SIZE)
410         return(1);
411     if (glob(pattern, 0, NULL, &globbuf) != 0)
412         return(1);
413 
414     for (i = 0; i < globbuf.gl_pathc; i++) {
415         char *path = globbuf.gl_pathv[i];
416         char *data;
417         const char *docFile;
418 
419         data = xmlSlurpFile(path, NULL);
420         if (data == NULL) {
421             ret = -1;
422             continue;
423         }
424         docFile = basename(path);
425 
426         globalData.counter = 1;
427         if (processXPath(testDir, docFile, docFile, data, "tests", 0) != 0)
428             ret = -1;
429         if (processXPath(testDir, docFile, docFile, data, "xptr", 1) != 0)
430             ret = -1;
431         if (processXPath(testDir, docFile, docFile, data, "xptr-xp1", 1) != 0)
432             ret = -1;
433 
434         xmlFree(data);
435     }
436 
437     globfree(&globbuf);
438 
439     return(ret);
440 }
441 #endif
442 
443 int
main(int argc,const char ** argv)444 main(int argc, const char **argv) {
445     mainFunc processArg = NULL;
446     const char *fuzzer;
447     int ret = 0;
448     int i;
449 
450     if (argc < 3) {
451         fprintf(stderr, "usage: seed [FUZZER] [PATTERN...]\n");
452         return(1);
453     }
454 
455     fuzzer = argv[1];
456     if (strcmp(fuzzer, "html") == 0) {
457 #ifdef HAVE_HTML_FUZZER
458         processArg = processPattern;
459         globalData.processFile = processHtml;
460 #endif
461     } else if (strcmp(fuzzer, "lint") == 0) {
462 #ifdef HAVE_LINT_FUZZER
463         processArg = processPattern;
464         globalData.flags |= FLAG_LINT;
465         globalData.processFile = processXml;
466 #endif
467     } else if (strcmp(fuzzer, "reader") == 0) {
468 #ifdef HAVE_READER_FUZZER
469         processArg = processPattern;
470         globalData.flags |= FLAG_READER;
471         globalData.processFile = processXml;
472 #endif
473     } else if (strcmp(fuzzer, "schema") == 0) {
474 #ifdef HAVE_SCHEMA_FUZZER
475         processArg = processPattern;
476         globalData.processFile = processSchema;
477 #endif
478     } else if (strcmp(fuzzer, "valid") == 0) {
479 #ifdef HAVE_VALID_FUZZER
480         processArg = processPattern;
481         globalData.processFile = processXml;
482 #endif
483     } else if (strcmp(fuzzer, "xinclude") == 0) {
484 #ifdef HAVE_XINCLUDE_FUZZER
485         processArg = processPattern;
486         globalData.processFile = processXml;
487 #endif
488     } else if (strcmp(fuzzer, "xml") == 0) {
489 #ifdef HAVE_XML_FUZZER
490         processArg = processPattern;
491         globalData.processFile = processXml;
492 #endif
493     } else if (strcmp(fuzzer, "xpath") == 0) {
494 #ifdef HAVE_XPATH_FUZZER
495         processArg = processXPathDir;
496 #endif
497     } else {
498         fprintf(stderr, "unknown fuzzer %s\n", fuzzer);
499         return(1);
500     }
501     globalData.fuzzer = fuzzer;
502 
503     if (getcwd(globalData.cwd, PATH_SIZE) == NULL) {
504         fprintf(stderr, "couldn't get current directory\n");
505         return(1);
506     }
507 
508     if (processArg != NULL)
509         for (i = 2; i < argc; i++)
510             processArg(argv[i]);
511 
512     return(ret);
513 }
514 
515