xref: /aosp_15_r20/external/toybox/lib/args.c (revision cf5a6c84e2b8763fc1a7db14496fd4742913b199)
1 /* args.c - Command line argument parsing.
2  *
3  * Copyright 2006 Rob Landley <[email protected]>
4  */
5 
6 // NOTE: If option parsing segfaults, switch on TOYBOX_DEBUG in menuconfig to
7 // add syntax checks to option string parsing which aren't needed in the final
8 // code (since get_opt string is hardwired and should be correct when you ship)
9 
10 #include "toys.h"
11 
12 // Design goals:
13 //   Don't use getopt() out of libc.
14 //   Don't permute original arguments (screwing up ps/top output).
15 //   Integrated --long options "(noshort)a(along)b(blong1)(blong2)"
16 
17 /* This uses a getopt-like option string, but not getopt() itself. We call
18  * it the get_opt string.
19  *
20  * Each option in the get_opt string corresponds to a bit position in the
21  * return value. The rightmost argument is (1<<0), the next to last is (1<<1)
22  * and so on. If the option isn't seen in argv[], its bit remains 0.
23  *
24  * Options which have an argument fill in the corresponding slot in the global
25  * union "this" (see generated/globals.h), which it treats as an array of longs
26  * (note that sizeof(long)==sizeof(pointer) is guaranteed by LP64).
27  *
28  * You don't have to free the option strings, which point into the environment
29  * space. List objects should be freed by main() when command_main() returns.
30  *
31  * Example:
32  *   Calling get_optflags() when toys.which->options="ab:c:d" and
33  *   argv = ["command", "-b", "fruit", "-d", "walrus"] results in:
34  *
35  *     Changes to struct toys:
36  *       toys.optflags = 5 (I.E. 0101 so -b = 4 | -d = 1)
37  *       toys.optargs[0] = "walrus" (leftover argument)
38  *       toys.optargs[1] = NULL (end of list)
39  *       toys.optc = 1 (there was 1 leftover argument)
40  *
41  *     Changes to union this:
42  *       this[0]=NULL (because -c didn't get an argument this time)
43  *       this[1]="fruit" (argument to -b)
44  */
45 
46 // What you can put in a get_opt string:
47 //   Any otherwise unused character (all letters, unprefixed numbers) specify
48 //   an option that sets a flag. The bit value is the same as the binary digit
49 //   if you string the option characters together in order.
50 //   So in "abcdefgh" a = 128, h = 1
51 //
52 //   Suffixes specify that this option takes an argument (stored in GLOBALS):
53 //       Note that pointer and long are always the same size, even on 64 bit.
54 //     : string argument, keep most recent if more than one
55 //     * string argument, appended to a struct arg_list linked list.
56 //     # signed long argument
57 //       <LOW     - die if less than LOW
58 //       >HIGH    - die if greater than HIGH
59 //       =DEFAULT - value if not specified
60 //     - signed long argument defaulting to negative (say + for positive)
61 //     . double precision floating point argument (with CFG_TOYBOX_FLOAT)
62 //       Chop this option out with USE_TOYBOX_FLOAT() in option string
63 //       Same <LOW>HIGH=DEFAULT as #
64 //     @ occurrence counter (which is a long)
65 //     % time offset in milliseconds with optional s/m/h/d suffix
66 //     (longopt)
67 //     | this is required. If more than one marked, only one required.
68 //     ; Option's argument is optional, and must be collated: -aARG or --a=ARG
69 //     ^ Stop parsing after encountering this argument
70 //    " " (space char) the "plus an argument" must be separate
71 //        I.E. "-j 3" not "-j3". So "kill -stop" != "kill -s top"
72 //
73 //   At the beginning of the get_opt string (before any options):
74 //     <0 die if less than # leftover arguments (default 0)
75 //     >9 die if > # leftover arguments (default MAX_INT)
76 //     0 Include argv[0] in optargs
77 //     ^ stop at first nonoption argument
78 //     ? Pass unknown arguments through to command (implied when no flags).
79 //     & first arg has imaginary dash (ala tar/ps/ar) which sets FLAGS_NODASH
80 //     ~ Collate following bare longopts (as if under short opt, repeatable)
81 //
82 //   At the end: [groups] of previously seen options
83 //     - Only one in group (switch off)    [-abc] means -ab=-b, -ba=-a, -abc=-c
84 //     + Synonyms (switch on all)          [+abc] means -ab=-abc, -c=-abc
85 //     ! More than one in group is error   [!abc] means -ab calls error_exit()
86 //       primarily useful if you can switch things back off again.
87 //
88 //   You may use octal escapes with the high bit (128) set to use a control
89 //   character as an option flag. For example, \300 would be the option -@
90 
91 // Notes from getopt man page
92 //   - and -- cannot be arguments.
93 //     -- force end of arguments
94 //     - is a synonym for stdin in file arguments
95 //   -abcd means -a -b -c -d (but if -b takes an argument, then it's -a -b cd)
96 
97 // Linked list of all known options (option string parsed into this).
98 // Hangs off getoptflagstate, freed at end of option parsing.
99 struct opts {
100   struct opts *next;
101   long *arg;         // Pointer into union "this" to store arguments at.
102   int c;             // Argument character to match
103   int flags;         // |=1, ^=2, " "=4, ;=8
104   unsigned long long dex[3]; // bits to disable/enable/exclude in toys.optflags
105   char type;         // Type of arguments to store union "this"
106   union {
107     long l;
108     FLOAT f;
109   } val[3];          // low, high, default - range of allowed values
110 };
111 
112 // linked list of long options. (Hangs off getoptflagstate, free at end of
113 // option parsing, details about flag to set and global slot to fill out
114 // stored in related short option struct, but if opt->c = -1 the long option
115 // is "bare" (has no corresponding short option).
116 struct longopts {
117   struct longopts *next;
118   struct opts *opt;
119   char *str;
120   int len;
121 };
122 
123 // State during argument parsing.
124 struct getoptflagstate
125 {
126   int argc, minargs, maxargs;
127   char *arg;
128   struct opts *opts;
129   struct longopts *longopts;
130   int noerror, nodash_now, stopearly;
131   unsigned excludes, requires;
132 };
133 
forget_arg(struct opts * opt)134 static void forget_arg(struct opts *opt)
135 {
136   if (opt->arg) {
137     if (opt->type=='*') llist_traverse((void *)*opt->arg, free);
138     *opt->arg = opt->val[2].l;
139   }
140 }
141 
142 // Use getoptflagstate to parse one command line option from argv
143 // Sets flags, saves/clears opt->arg, advances gof->arg/gof->argc as necessary
gotflag(struct getoptflagstate * gof,struct opts * opt,int longopt)144 static void gotflag(struct getoptflagstate *gof, struct opts *opt, int longopt)
145 {
146   unsigned long long i;
147   struct opts *and;
148   char *arg;
149   int type;
150 
151   // Did we recognize this option?
152   if (!opt) help_exit("Unknown option '%s'", gof->arg);
153 
154   // Might enabling this switch off something else?
155   if (toys.optflags & opt->dex[0]) {
156     // Forget saved argument for flag we switch back off
157     for (and = gof->opts, i = 1; and; and = and->next, i<<=1)
158       if (i & toys.optflags & opt->dex[0]) forget_arg(and);
159     toys.optflags &= ~opt->dex[0];
160   }
161 
162   // Set flags
163   toys.optflags |= opt->dex[1];
164   gof->excludes |= opt->dex[2];
165   if (opt->flags&2) gof->stopearly=2;
166 
167   if (toys.optflags & gof->excludes) {
168     for (and = gof->opts, i = 1; and; and = and->next, i<<=1) {
169       if (opt == and || !(i & toys.optflags)) continue;
170       if (toys.optflags & and->dex[2]) break;
171     }
172     if (and) help_exit("No '%c' with '%c'", opt->c, and->c);
173   }
174 
175   // Are we NOT saving an argument? (Type 0, '@', unattached ';', short ' ')
176   if (*(arg = gof->arg)) gof->arg++;
177   if ((type = opt->type) == '@') {
178     ++*opt->arg;
179     return;
180   }
181   if (!longopt && *gof->arg && (opt->flags & 4)) return forget_arg(opt);
182   if (!type || (!arg[!longopt] && (opt->flags & 8))) return forget_arg(opt);
183 
184   // Handle "-xblah" and "-x blah", but also a third case: "abxc blah"
185   // to make "tar xCjfv blah1 blah2 thingy" work like
186   // "tar -x -C blah1 -j -f blah2 -v thingy"
187 
188   if (longopt && *arg) arg++;
189   else arg = (gof->nodash_now||!*gof->arg) ? toys.argv[++gof->argc] : gof->arg;
190   if (!gof->nodash_now) gof->arg = "";
191   if (!arg) {
192     struct longopts *lo;
193 
194     arg = "Missing argument to ";
195     if (opt->c != -1) help_exit("%s-%c", arg, opt->c);
196     for (lo = gof->longopts; lo->opt != opt; lo = lo->next);
197     help_exit("%s--%.*s", arg, lo->len, lo->str);
198   }
199 
200   // Parse argument by type
201   if (type == ':') *(opt->arg) = (long)arg;
202   else if (type == '*') {
203     struct arg_list **list;
204 
205     list = (struct arg_list **)opt->arg;
206     while (*list) list=&((*list)->next);
207     *list = xzalloc(sizeof(struct arg_list));
208     (*list)->arg = arg;
209   } else if (type == '#' || type == '-' || type == '%') {
210     long long l = (type == '%') ? xparsemillitime(arg) : atolx(arg);
211 
212     if (type == '-' && !ispunct(*arg)) l*=-1;
213     arg = (type == '%') ? "ms" : "";
214     if (l < opt->val[0].l) help_exit("-%c < %ld%s", opt->c, opt->val[0].l, arg);
215     if (l > opt->val[1].l) help_exit("-%c > %ld%s", opt->c, opt->val[1].l, arg);
216 
217     *(opt->arg) = l;
218   } else if (CFG_TOYBOX_FLOAT && type == '.') {
219     FLOAT *f = (FLOAT *)(opt->arg);
220 
221     *f = strtod(arg, &arg);
222     if (opt->val[0].l != LONG_MIN && *f < opt->val[0].f)
223       help_exit("-%c < %lf", opt->c, (double)opt->val[0].f);
224     if (opt->val[1].l != LONG_MAX && *f > opt->val[1].f)
225       help_exit("-%c > %lf", opt->c, (double)opt->val[1].f);
226   }
227 }
228 
229 // Parse this command's options string into struct getoptflagstate, which
230 // includes a struct opts linked list in reverse order (I.E. right-to-left)
parse_optflaglist(struct getoptflagstate * gof)231 static int parse_optflaglist(struct getoptflagstate *gof)
232 {
233   char *options = toys.which->options;
234   long *nextarg = (long *)&this;
235   struct opts *new = 0;
236   int idx, rc = 0;
237 
238   // Parse option format string
239   memset(gof, 0, sizeof(struct getoptflagstate));
240   gof->maxargs = INT_MAX;
241   if (!options) return 0;
242 
243   // Parse leading special behavior indicators
244   for (;;) {
245     if (*options == '^') gof->stopearly++;
246     else if (*options == '<') gof->minargs=*(++options)-'0';
247     else if (*options == '>') gof->maxargs=*(++options)-'0';
248     else if (*options == '?') gof->noerror++;
249     else if (*options == '&') gof->nodash_now = 1;
250     else if (*options == '0') rc = 1;
251     else break;
252     options++;
253   }
254 
255   // Parse option string into a linked list of options with attributes.
256 
257   if (!*options) gof->noerror++;
258   while (*options) {
259     char *temp;
260 
261     // Option groups come after all options are defined
262     if (*options == '[') break;
263 
264     // Allocate a new list entry when necessary
265     if (!new) {
266       new = xzalloc(sizeof(struct opts));
267       new->next = gof->opts;
268       gof->opts = new;
269       new->val[0].l = LONG_MIN;
270       new->val[1].l = LONG_MAX;
271     }
272     // Each option must start with "(" or an option character. (Bare
273     // longopts only come at the start of the string.)
274     if (*options == '(' && new->c != -1) {
275       char *end;
276       struct longopts *lo;
277 
278       // Find the end of the longopt
279       for (end = ++options; *end && *end != ')'; end++);
280       if (CFG_TOYBOX_DEBUG && !*end) error_exit("(longopt) didn't end");
281 
282       // init a new struct longopts
283       lo = xmalloc(sizeof(struct longopts));
284       lo->next = gof->longopts;
285       lo->opt = new;
286       lo->str = options;
287       lo->len = end-options;
288       gof->longopts = lo;
289       options = ++end;
290 
291       // Mark this struct opt as used, even when no short opt.
292       if (!new->c) new->c = -1;
293 
294       continue;
295 
296     // If this is the start of a new option that wasn't a longopt,
297 
298     } else if (strchr(":*#@.-%", *options)) {
299       if (CFG_TOYBOX_DEBUG && new->type)
300         error_exit("multiple types %c:%c%c", new->c, new->type, *options);
301       new->type = *options;
302     } else if (-1 != (idx = stridx("|^ ;", *options))) new->flags |= 1<<idx;
303     // bounds checking
304     else if (-1 != (idx = stridx("<>=", *options))) {
305       if (new->type == '#' || new->type == '%') {
306         long l = strtol(++options, &temp, 10);
307         if (temp != options) new->val[idx].l = l;
308       } else if (CFG_TOYBOX_FLOAT && new->type == '.') {
309         FLOAT f = strtod(++options, &temp);
310         if (temp != options) new->val[idx].f = f;
311       } else error_exit("<>= only after .#%%");
312       options = --temp;
313 
314     // At this point, we've hit the end of the previous option. The
315     // current character is the start of a new option. If we've already
316     // assigned an option to this struct, loop to allocate a new one.
317     // (It'll get back here afterwards and fall through to next else.)
318     } else if (new->c) {
319       new = 0;
320       continue;
321 
322     // Claim this option, loop to see what's after it.
323     } else new->c = *options;
324 
325     options++;
326   }
327 
328   // Initialize enable/disable/exclude masks and pointers to store arguments.
329   // (This goes right to left so we need the whole list before we can start.)
330   idx = 0;
331   for (new = gof->opts; new; new = new->next) {
332     unsigned long long u = 1LL<<idx++;
333 
334     if (new->c == 1 || new->c=='~') new->c = 0;
335     else new->c &= 127;
336     new->dex[1] = u;
337     if (new->flags & 1) gof->requires |= u;
338     if (new->type) {
339       new->arg = (void *)nextarg;
340       *(nextarg++) = new->val[2].l;
341     }
342   }
343 
344   // Parse trailing group indicators
345   while (*options) {
346     unsigned long long bits = 0;
347 
348     if (CFG_TOYBOX_DEBUG && *options != '[') error_exit("trailing %s", options);
349 
350     idx = stridx("-+!", *++options);
351     if (CFG_TOYBOX_DEBUG && idx == -1) error_exit("[ needs +-!");
352     if (CFG_TOYBOX_DEBUG && (options[1] == ']' || !options[1]))
353       error_exit("empty []");
354 
355     // Don't advance past ] but do process it once in loop.
356     while (*options++ != ']') {
357       struct opts *opt;
358       long long ll;
359 
360       if (CFG_TOYBOX_DEBUG && !*options) error_exit("[ without ]");
361       // Find this option flag (in previously parsed struct opt)
362       for (ll = 1, opt = gof->opts; ; ll <<= 1, opt = opt->next) {
363         if (*options == ']') {
364           if (!opt) break;
365           if (bits&ll) opt->dex[idx] |= bits&~ll;
366         } else {
367           if (*options==1) break;
368           if (CFG_TOYBOX_DEBUG && !opt)
369             error_exit("[] unknown target %c", *options);
370           if (opt->c == (127&*options)) {
371             bits |= ll;
372             break;
373           }
374         }
375       }
376     }
377   }
378 
379   return rc;
380 }
381 
382 // Fill out toys.optflags, toys.optargs, and this[] from toys.argv
383 
get_optflags(void)384 void get_optflags(void)
385 {
386   struct getoptflagstate gof;
387   struct opts *catch;
388   unsigned long long saveflags;
389   char *letters[]={"s",""}, *ss;
390 
391   // Option parsing is a two stage process: parse the option string into
392   // a struct opts list, then use that list to process argv[];
393 
394   toys.exitval = toys.which->flags >> 24;
395 
396   // Allocate memory for optargs
397   saveflags = toys.optc = parse_optflaglist(&gof);
398   while (toys.argv[saveflags++]);
399   toys.optargs = xzalloc(sizeof(char *)*saveflags);
400   if (toys.optc) *toys.optargs = *toys.argv;
401 
402   if (toys.argv[1] && toys.argv[1][0] == '-') gof.nodash_now = 0;
403 
404   // Iterate through command line arguments, skipping argv[0]
405   for (gof.argc=1; toys.argv[gof.argc]; gof.argc++) {
406     gof.arg = toys.argv[gof.argc];
407     catch = 0;
408 
409     // Parse this argument
410     if (gof.stopearly>1) goto notflag;
411 
412     if (gof.argc>1 || *gof.arg=='-') gof.nodash_now = 0;
413 
414     // Various things with dashes
415     if (*gof.arg == '-') {
416 
417       // Handle -
418       if (!gof.arg[1]) goto notflag;
419       gof.arg++;
420       if (*gof.arg=='-') {
421         struct longopts *lo;
422         struct arg_list *al = 0, *al2;
423         int ii;
424 
425         gof.arg++;
426         // Handle --
427         if (!*gof.arg) {
428           gof.stopearly += 2;
429           continue;
430         }
431 
432         // unambiguously match the start of a known --longopt?
433         check_help(toys.argv+gof.argc);
434         for (lo = gof.longopts; lo; lo = lo->next) {
435           for (ii = 0; ii<lo->len; ii++) if (gof.arg[ii] != lo->str[ii]) break;
436 
437           // = only terminates when we can take an argument, not type 0 or '@'
438           if (!gof.arg[ii] || (gof.arg[ii]=='=' && !strchr("@", lo->opt->type)))
439           {
440             al2 = xmalloc(sizeof(struct arg_list));
441             al2->next = al;
442             al2->arg = (void *)lo;
443             al = al2;
444 
445             // Exact match is unambigous even when longer options available
446             if (ii==lo->len) {
447               llist_traverse(al, free);
448               al = 0;
449 
450               break;
451             }
452           }
453         }
454         // How many matches?
455         if (al) {
456           *libbuf = 0;
457           if (al->next) for (ss = libbuf, al2 = al; al2; al2 = al2->next) {
458             lo = (void *)al2->arg;
459             ss += sprintf(ss, " %.*s"+(al2==al), lo->len, lo->str);
460           } else lo = (void *)al->arg;
461           llist_traverse(al, free);
462           if (*libbuf) error_exit("bad --%s (%s)", gof.arg, libbuf);
463         }
464 
465         // One unambiguous match?
466         if (lo) {
467           catch = lo->opt;
468           while (!strchr("=", *gof.arg)) gof.arg++;
469         // Should we handle this --longopt as a non-option argument?
470         } else if (gof.noerror) {
471           gof.arg -= 2;
472           goto notflag;
473         }
474 
475         // Long option parsed, handle option.
476         gotflag(&gof, catch, 1);
477         continue;
478       }
479 
480     // Handle things that don't start with a dash.
481     } else {
482       if (gof.nodash_now) toys.optflags |= FLAGS_NODASH;
483       else goto notflag;
484     }
485 
486     // At this point, we have the args part of -args. Loop through
487     // each entry (could be -abc meaning -a -b -c)
488     saveflags = toys.optflags;
489     while (gof.arg && *gof.arg) {
490 
491       // Identify next option char.
492       for (catch = gof.opts; catch; catch = catch->next)
493         if (*gof.arg == catch->c)
494           if (!gof.arg[1] || (catch->flags&(4|8))!=4) break;
495 
496       if (!catch && gof.noerror) {
497         toys.optflags = saveflags;
498         gof.arg = toys.argv[gof.argc];
499         goto notflag;
500       }
501 
502       // Handle option char (advancing past what was used)
503       gotflag(&gof, catch, 0);
504     }
505     continue;
506 
507     // Not a flag, save value in toys.optargs[]
508 notflag:
509     if (gof.stopearly) gof.stopearly++;
510     toys.optargs[toys.optc++] = toys.argv[gof.argc];
511   }
512 
513   // Sanity check
514   if (toys.optc<gof.minargs)
515     help_exit("Need%s %d argument%s", letters[!!(gof.minargs-1)],
516       gof.minargs, letters[!(gof.minargs-1)]);
517   if (toys.optc>gof.maxargs)
518     help_exit("Max %d argument%s", gof.maxargs, letters[!(gof.maxargs-1)]);
519   if (gof.requires && !(gof.requires & toys.optflags)) {
520     struct opts *req;
521     char needs[32], *s = needs;
522 
523     for (req = gof.opts; req; req = req->next)
524       if (req->flags & 1) *(s++) = req->c;
525     *s = 0;
526 
527     help_exit("Needs %s-%s", s[1] ? "one of " : "", needs);
528   }
529 
530   toys.exitval = 0;
531 
532   if (CFG_TOYBOX_FREE) {
533     llist_traverse(gof.opts, free);
534     llist_traverse(gof.longopts, free);
535   }
536 }
537