1 /*
2 * *****************************************************************************
3 *
4 * SPDX-License-Identifier: BSD-2-Clause
5 *
6 * Copyright (c) 2018-2024 Gavin D. Howard and contributors.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions are met:
10 *
11 * * Redistributions of source code must retain the above copyright notice, this
12 * list of conditions and the following disclaimer.
13 *
14 * * Redistributions in binary form must reproduce the above copyright notice,
15 * this list of conditions and the following disclaimer in the documentation
16 * and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
22 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
26 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
27 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
28 * POSSIBILITY OF SUCH DAMAGE.
29 *
30 * *****************************************************************************
31 *
32 * Generates a const array from a bc script.
33 *
34 */
35
36 #include <assert.h>
37 #include <stdbool.h>
38 #include <stdio.h>
39 #include <stdlib.h>
40 #include <string.h>
41
42 #include <errno.h>
43
44 #include <fcntl.h>
45 #include <sys/stat.h>
46
47 #ifndef _WIN32
48 #include <unistd.h>
49 #endif // _WIN32
50
51 // For some reason, Windows can't have this header.
52 #ifndef _WIN32
53 #include <libgen.h>
54 #endif // _WIN32
55
56 // This pulls in cross-platform stuff.
57 #include <status.h>
58
59 // clang-format off
60
61 // The usage help.
62 static const char* const bc_gen_usage =
63 "usage: %s input output exclude name [label [define [remove_tabs]]]\n";
64
65 static const char* const bc_gen_ex_start = "{{ A H N HN }}";
66 static const char* const bc_gen_ex_end = "{{ end }}";
67
68 // This is exactly what it looks like. It just slaps a simple license header on
69 // the generated C source file.
70 static const char* const bc_gen_header =
71 "// Copyright (c) 2018-2024 Gavin D. Howard and contributors.\n"
72 "// Licensed under the 2-clause BSD license.\n"
73 "// *** AUTOMATICALLY GENERATED FROM %s. DO NOT MODIFY. ***\n\n";
74 // clang-format on
75
76 // These are just format strings used to generate the C source.
77 static const char* const bc_gen_label = "const char *%s = \"%s\";\n\n";
78 static const char* const bc_gen_label_extern = "extern const char *%s;\n\n";
79 static const char* const bc_gen_ifdef = "#if %s\n";
80 static const char* const bc_gen_endif = "#endif // %s\n";
81 static const char* const bc_gen_name = "const char %s[] = {\n";
82 static const char* const bc_gen_name_extern = "extern const char %s[];\n\n";
83
84 // Error codes. We can't use 0 because these are used as exit statuses, and 0
85 // as an exit status is not an error.
86 #define IO_ERR (1)
87 #define INVALID_INPUT_FILE (2)
88 #define INVALID_PARAMS (3)
89
90 // This is the max width to print characters to the screen. This is to ensure
91 // that lines don't go much over 80 characters.
92 #define MAX_WIDTH (72)
93
94 /**
95 * Open a file. This function is to smooth over differences between POSIX and
96 * Windows.
97 * @param f A pointer to the FILE pointer that will be initialized.
98 * @param filename The name of the file.
99 * @param mode The mode to open the file in.
100 */
101 static void
open_file(FILE ** f,const char * filename,const char * mode)102 open_file(FILE** f, const char* filename, const char* mode)
103 {
104 #ifndef _WIN32
105
106 *f = fopen(filename, mode);
107
108 #else // _WIN32
109
110 // We want the file pointer to be NULL on failure, but fopen_s() is not
111 // guaranteed to set it.
112 *f = NULL;
113 fopen_s(f, filename, mode);
114
115 #endif // _WIN32
116 }
117
118 /**
119 * A portability file open function. This is copied from src/read.c. Make sure
120 * to update that if this changes.
121 * @param path The path to the file to open.
122 * @param mode The mode to open in.
123 */
124 static int
bc_read_open(const char * path,int mode)125 bc_read_open(const char* path, int mode)
126 {
127 int fd;
128
129 #ifndef _WIN32
130 fd = open(path, mode);
131 #else // _WIN32
132 fd = -1;
133 open(&fd, path, mode);
134 #endif
135
136 return fd;
137 }
138
139 /**
140 * Reads a file and returns the file as a string. This has been copied from
141 * src/read.c. Make sure to change that if this changes.
142 * @param path The path to the file.
143 * @return The contents of the file as a string.
144 */
145 static char*
bc_read_file(const char * path)146 bc_read_file(const char* path)
147 {
148 int e = IO_ERR;
149 size_t size, to_read;
150 struct stat pstat;
151 int fd;
152 char* buf;
153 char* buf2;
154
155 // This has been copied from src/read.c. Make sure to change that if this
156 // changes.
157
158 assert(path != NULL);
159
160 #if BC_DEBUG
161 // Need this to quiet MSan.
162 // NOLINTNEXTLINE
163 memset(&pstat, 0, sizeof(struct stat));
164 #endif // BC_DEBUG
165
166 fd = bc_read_open(path, O_RDONLY);
167
168 // If we can't read a file, we just barf.
169 if (BC_ERR(fd < 0))
170 {
171 fprintf(stderr, "Could not open file: %s\n", path);
172 exit(INVALID_INPUT_FILE);
173 }
174
175 // The reason we call fstat is to eliminate TOCTOU race conditions. This
176 // way, we have an open file, so it's not going anywhere.
177 if (BC_ERR(fstat(fd, &pstat) == -1))
178 {
179 fprintf(stderr, "Could not stat file: %s\n", path);
180 exit(INVALID_INPUT_FILE);
181 }
182
183 // Make sure it's not a directory.
184 if (BC_ERR(S_ISDIR(pstat.st_mode)))
185 {
186 fprintf(stderr, "Path is directory: %s\n", path);
187 exit(INVALID_INPUT_FILE);
188 }
189
190 // Get the size of the file and allocate that much.
191 size = (size_t) pstat.st_size;
192 buf = (char*) malloc(size + 1);
193 if (buf == NULL)
194 {
195 fprintf(stderr, "Could not malloc\n");
196 exit(INVALID_INPUT_FILE);
197 }
198 buf2 = buf;
199 to_read = size;
200
201 do
202 {
203 // Read the file. We just bail if a signal interrupts. This is so that
204 // users can interrupt the reading of big files if they want.
205 ssize_t r = read(fd, buf2, to_read);
206 if (BC_ERR(r < 0)) exit(e);
207 to_read -= (size_t) r;
208 buf2 += (size_t) r;
209 }
210 while (to_read);
211
212 // Got to have a nul byte.
213 buf[size] = '\0';
214
215 close(fd);
216
217 return buf;
218 }
219
220 /**
221 * Outputs a label, which is a string literal that the code can use as a name
222 * for the file that is being turned into a string. This is important for the
223 * math libraries because the parse and lex code expects a filename. The label
224 * becomes the filename for the purposes of lexing and parsing.
225 *
226 * The label is generated from bc_gen_label (above). It has the form:
227 *
228 * const char *<label_name> = <label>;
229 *
230 * This function is also needed to smooth out differences between POSIX and
231 * Windows, specifically, the fact that Windows uses backslashes for filenames
232 * and that backslashes have to be escaped in a string literal.
233 *
234 * @param out The file to output to.
235 * @param label The label name.
236 * @param name The actual label text, which is a filename.
237 * @return Positive if no error, negative on error, just like *printf().
238 */
239 static int
output_label(FILE * out,const char * label,const char * name)240 output_label(FILE* out, const char* label, const char* name)
241 {
242 #ifndef _WIN32
243
244 return fprintf(out, bc_gen_label, label, name);
245
246 #else // _WIN32
247
248 size_t i, count = 0, len = strlen(name);
249 char* buf;
250 int ret;
251
252 // This loop counts how many backslashes there are in the label.
253 for (i = 0; i < len; ++i)
254 {
255 count += (name[i] == '\\');
256 }
257
258 buf = (char*) malloc(len + 1 + count);
259 if (buf == NULL) return -1;
260
261 count = 0;
262
263 // This loop is the meat of the Windows version. What it does is copy the
264 // label byte-for-byte, unless it encounters a backslash, in which case, it
265 // copies the backslash twice to have it escaped properly in the string
266 // literal.
267 for (i = 0; i < len; ++i)
268 {
269 buf[i + count] = name[i];
270
271 if (name[i] == '\\')
272 {
273 count += 1;
274 buf[i + count] = name[i];
275 }
276 }
277
278 buf[i + count] = '\0';
279
280 ret = fprintf(out, bc_gen_label, label, buf);
281
282 free(buf);
283
284 return ret;
285
286 #endif // _WIN32
287 }
288
289 /**
290 * This program generates C strings (well, actually, C char arrays) from text
291 * files. It generates 1 C source file. The resulting file has this structure:
292 *
293 * <Copyright Header>
294 *
295 * [<Label Extern>]
296 *
297 * <Char Array Extern>
298 *
299 * [<Preprocessor Guard Begin>]
300 * [<Label Definition>]
301 *
302 * <Char Array Definition>
303 * [<Preprocessor Guard End>]
304 *
305 * Anything surrounded by square brackets may not be in the final generated
306 * source file.
307 *
308 * The required command-line parameters are:
309 *
310 * input Input filename.
311 * output Output filename.
312 * exclude Whether to exclude extra math-only stuff.
313 * name The name of the char array.
314 *
315 * The optional parameters are:
316 *
317 * label If given, a label for the char array. See the comment for the
318 * output_label() function. It is meant as a "filename" for the
319 * text when processed by bc and dc. If label is given, then the
320 * <Label Extern> and <Label Definition> will exist in the
321 * generated source file.
322 * define If given, a preprocessor macro that should be used as a guard
323 * for the char array and its label. If define is given, then
324 * <Preprocessor Guard Begin> will exist in the form
325 * "#if <define>" as part of the generated source file, and
326 * <Preprocessor Guard End> will exist in the form
327 * "endif // <define>".
328 * remove_tabs If this parameter exists, it must be an integer. If it is
329 * non-zero, then tabs are removed from the input file text before
330 * outputting to the output char array.
331 *
332 * All text files that are transformed have license comments. This program finds
333 * the end of that comment and strips it out as well.
334 */
335 int
main(int argc,char * argv[])336 main(int argc, char* argv[])
337 {
338 char* in;
339 FILE* out;
340 const char* label;
341 const char* define;
342 char* name;
343 unsigned int count, slashes, err = IO_ERR;
344 bool has_label, has_define, remove_tabs, exclude_extra_math;
345 size_t i;
346
347 if (argc < 5)
348 {
349 printf(bc_gen_usage, argv[0]);
350 return INVALID_PARAMS;
351 }
352
353 exclude_extra_math = (strtoul(argv[3], NULL, 10) != 0);
354
355 name = argv[4];
356
357 has_label = (argc > 5 && strcmp("", argv[5]) != 0);
358 label = has_label ? argv[5] : "";
359
360 has_define = (argc > 6 && strcmp("", argv[6]) != 0);
361 define = has_define ? argv[6] : "";
362
363 remove_tabs = (argc > 7 && atoi(argv[7]) != 0);
364
365 in = bc_read_file(argv[1]);
366 if (in == NULL) return INVALID_INPUT_FILE;
367
368 open_file(&out, argv[2], "w");
369 if (out == NULL) goto out_err;
370
371 if (fprintf(out, bc_gen_header, argv[1]) < 0) goto err;
372 if (has_label && fprintf(out, bc_gen_label_extern, label) < 0) goto err;
373 if (fprintf(out, bc_gen_name_extern, name) < 0) goto err;
374 if (has_define && fprintf(out, bc_gen_ifdef, define) < 0) goto err;
375 if (has_label && output_label(out, label, argv[1]) < 0) goto err;
376 if (fprintf(out, bc_gen_name, name) < 0) goto err;
377
378 i = count = slashes = 0;
379
380 // This is where the end of the license comment is found.
381 while (slashes < 2 && in[i] > 0)
382 {
383 if (slashes == 1 && in[i] == '*' && in[i + 1] == '/' &&
384 (in[i + 2] == '\n' || in[i + 2] == '\r'))
385 {
386 slashes += 1;
387 i += 2;
388 }
389 else if (!slashes && in[i] == '/' && in[i + 1] == '*')
390 {
391 slashes += 1;
392 i += 1;
393 }
394
395 i += 1;
396 }
397
398 // The file is invalid if the end of the license comment could not be found.
399 if (in[i] == 0)
400 {
401 fprintf(stderr, "Could not find end of license comment\n");
402 err = INVALID_INPUT_FILE;
403 goto err;
404 }
405
406 i += 1;
407
408 // Do not put extra newlines at the beginning of the char array.
409 while (in[i] == '\n' || in[i] == '\r')
410 {
411 i += 1;
412 }
413
414 // This loop is what generates the actual char array. It counts how many
415 // chars it has printed per line in order to insert newlines at appropriate
416 // places. It also skips tabs if they should be removed.
417 while (in[i] != 0)
418 {
419 int val;
420
421 if (in[i] == '\r')
422 {
423 i += 1;
424 continue;
425 }
426
427 if (!remove_tabs || in[i] != '\t')
428 {
429 // Check for excluding something for extra math.
430 if (in[i] == '{')
431 {
432 // If we found the start...
433 if (!strncmp(in + i, bc_gen_ex_start, strlen(bc_gen_ex_start)))
434 {
435 if (exclude_extra_math)
436 {
437 // Get past the braces.
438 i += 2;
439
440 // Find the end of the end.
441 while (in[i] != '{' && strncmp(in + i, bc_gen_ex_end,
442 strlen(bc_gen_ex_end)))
443 {
444 i += 1;
445 }
446
447 i += strlen(bc_gen_ex_end);
448
449 // Skip the last newline.
450 if (in[i] == '\r') i += 1;
451 i += 1;
452 continue;
453 }
454 else
455 {
456 i += strlen(bc_gen_ex_start);
457
458 // Skip the last newline.
459 if (in[i] == '\r') i += 1;
460 i += 1;
461 continue;
462 }
463 }
464 else if (!exclude_extra_math &&
465 !strncmp(in + i, bc_gen_ex_end, strlen(bc_gen_ex_end)))
466 {
467 i += strlen(bc_gen_ex_end);
468
469 // Skip the last newline.
470 if (in[i] == '\r') i += 1;
471 i += 1;
472 continue;
473 }
474 }
475
476 // Print a tab if we are at the beginning of a line.
477 if (!count && fputc('\t', out) == EOF) goto err;
478
479 // Print the character.
480 val = fprintf(out, "%d,", in[i]);
481 if (val < 0) goto err;
482
483 // Adjust the count.
484 count += (unsigned int) val;
485 if (count > MAX_WIDTH)
486 {
487 count = 0;
488 if (fputc('\n', out) == EOF) goto err;
489 }
490 }
491
492 i += 1;
493 }
494
495 // Make sure the end looks nice and insert the NUL byte at the end.
496 if (!count && (fputc(' ', out) == EOF || fputc(' ', out) == EOF)) goto err;
497 if (fprintf(out, "0\n};\n") < 0) goto err;
498
499 err = (has_define && fprintf(out, bc_gen_endif, define) < 0);
500
501 err:
502 fclose(out);
503 out_err:
504 free(in);
505 return (int) err;
506 }
507