xref: /aosp_15_r20/external/toybox/toys/pending/expr.c (revision cf5a6c84e2b8763fc1a7db14496fd4742913b199)
1 /* expr.c - evaluate expression
2  *
3  * Copyright 2016 Google Inc.
4  * Copyright 2013 Daniel Verkamp <[email protected]>
5  *
6  * http://pubs.opengroup.org/onlinepubs/9699919799/utilities/expr.html
7  *
8  * The web standard is incomplete (precedence grouping missing), see:
9  * http://permalink.gmane.org/gmane.comp.standards.posix.austin.general/10141
10  *
11  * eval_expr() uses the recursive "Precedence Climbing" algorithm:
12  *
13  * Clarke, Keith. "The top-down parsing of expressions." University of London.
14  * Queen Mary College. Department of Computer Science and Statistics, 1986.
15  *
16  * http://www.antlr.org/papers/Clarke-expr-parsing-1986.pdf
17  *
18  * Nice explanation and Python implementation:
19  * http://eli.thegreenplace.net/2012/08/02/parsing-expressions-by-precedence-climbing
20 
21 USE_EXPR(NEWTOY(expr, 0, TOYFLAG_USR|TOYFLAG_BIN|TOYFLAG_ARGFAIL(2)))
22 
23 config EXPR
24   bool "expr"
25   default n
26   help
27     usage: expr ARG1 OPERATOR ARG2...
28 
29     Evaluate expression and print result. For example, "expr 1 + 2" prints "3".
30 
31     The supported operators are (grouped from highest to lowest priority):
32 
33       ( )    :    * / %    + -    != <= < >= > =    &    |
34 
35     Each constant and operator must be a separate command line argument.
36     All operators are infix, requiring a value on each side of the operator.
37     Operators of the same priority are evaluated left to right. Parentheses
38     elevate the priority of expression they contain. The & and | operators
39     are logical (not bitwise).
40 
41     All operators yield integers, and most operators expect integer arguments.
42     Comparisons may alphabetically compare strings, logical operators treat a
43     blank string as false and nonblank as true, and the regex operator
44     (str : pattern) yields the initial number of matching bytes. (So
45     "abc : ab" is 2, but "abc : bc" is 0.)
46 
47     Calling expr from a command shell requires a lot of \( or '*' escaping
48     to avoid interpreting shell control characters, vs the shell's "$((1+6/3))".
49 */
50 
51 // TODO: int overflow checking
52 
53 #define FOR_expr
54 #include "toys.h"
55 
56 GLOBALS(
57   char **tok, *delete;
58 )
59 
60 // If s string, otherwise int.
61 struct value {
62   char *s;
63   long long i;
64 };
65 
66 // Get the value as a string.
get_str(struct value * v)67 char *get_str(struct value *v)
68 {
69   if (v->s) return v->s;
70   else return xmprintf("%lld", v->i);
71 }
72 
73 // Get the value as an integer and return 1, or return 0 on error.
get_int(struct value * v,long long * ret)74 int get_int(struct value *v, long long *ret)
75 {
76   char *end;
77 
78   if (v->s) {
79     *ret = strtoll(v->s, &end, 10); // base 10 or autodetect?
80     if (*end) return 0;
81   } else *ret = v->i;
82 
83   return 1;
84 }
85 
86 // Preserve the invariant that v.s is NULL when the value is an integer.
assign_int(struct value * v,long long i)87 void assign_int(struct value *v, long long i)
88 {
89   v->i = i;
90   v->s = 0;
91 }
92 
93 // Check if v is 0 or the empty string.
is_false(struct value * v)94 static int is_false(struct value *v)
95 {
96   return get_int(v, &v->i) && !v->i;
97 }
98 
99 // 'ret' is filled with a string capture or int match position.
re(char * target,char * pattern,struct value * ret)100 static void re(char *target, char *pattern, struct value *ret)
101 {
102   regex_t pat;
103   regmatch_t m[2];
104 
105   xregcomp(&pat, pattern, 0);
106   // must match at pos 0
107   if (!regexec(&pat, target, 2, m, 0) && !m[0].rm_so) {
108     // Return first parenthesized subexpression as string, or length of match
109     if (pat.re_nsub>0) {
110       ret->s = xmprintf("%.*s", (int)(m[1].rm_eo-m[1].rm_so), target+m[1].rm_so);
111       free(TT.delete);
112       TT.delete = ret->s;
113     } else assign_int(ret, m[0].rm_eo);
114   } else {
115     if (pat.re_nsub>0) ret->s = "";
116     else assign_int(ret, 0);
117   }
118   regfree(&pat);
119 }
120 
121 // 4 different signatures of operators.  S = string, I = int, SI = string or
122 // int.
123 enum { SI_TO_SI = 1, SI_TO_I, I_TO_I, S_TO_SI };
124 
125 enum { OR = 1, AND, EQ, NE, GT, GTE, LT, LTE, ADD, SUB, MUL, DIVI, MOD, RE };
126 
127 // operators grouped by precedence
128 static struct op_def {
129   char *tok;
130   char prec, sig, op; // precedence, signature for type coercion, operator ID
131 } OPS[] = {
132   // logical ops, precedence 1 and 2, signature SI_TO_SI
133   {"|", 1, SI_TO_SI, OR  },
134   {"&", 2, SI_TO_SI, AND },
135   // comparison ops, precedence 3, signature SI_TO_I
136   {"=", 3, SI_TO_I, EQ }, {"==", 3, SI_TO_I, EQ  }, {"!=", 3, SI_TO_I, NE },
137   {">", 3, SI_TO_I, GT }, {">=", 3, SI_TO_I, GTE },
138   {"<", 3, SI_TO_I, LT }, {"<=", 3, SI_TO_I, LTE },
139   // arithmetic ops, precedence 4 and 5, signature I_TO_I
140   {"+", 4, I_TO_I, ADD }, {"-",  4, I_TO_I, SUB },
141   {"*", 5, I_TO_I, MUL }, {"/",  5, I_TO_I, DIVI }, {"%", 5, I_TO_I, MOD },
142   // regex match, precedence 6, signature S_TO_SI
143   {":", 6, S_TO_SI, RE },
144   {NULL, 0, 0, 0}, // sentinel
145 };
146 
eval_op(struct op_def * o,struct value * ret,struct value * rhs)147 void eval_op(struct op_def *o, struct value *ret, struct value *rhs)
148 {
149   long long cmp, a, b, x = 0; // x = a OP b for ints.
150   char *s, *t; // string operands
151 
152   switch (o->sig) {
153 
154   case SI_TO_SI:
155     switch (o->op) {
156     case OR:  if (is_false(ret)) *ret = *rhs; break;
157     case AND: if (is_false(ret) || is_false(rhs)) assign_int(ret, 0); break;
158     }
159     break;
160 
161   case SI_TO_I:
162     if (get_int(ret, &a) && get_int(rhs, &b)) { // both are ints
163       cmp = a - b;
164     } else { // otherwise compare both as strings
165       cmp = strcmp(s = get_str(ret), t = get_str(rhs));
166       if (ret->s != s) free(s);
167       if (rhs->s != t) free(t);
168     }
169     switch (o->op) {
170     case EQ:  x = cmp == 0; break;
171     case NE:  x = cmp != 0; break;
172     case GT:  x = cmp >  0; break;
173     case GTE: x = cmp >= 0; break;
174     case LT:  x = cmp <  0; break;
175     case LTE: x = cmp <= 0; break;
176     }
177     assign_int(ret, x);
178     break;
179 
180   case I_TO_I:
181     if (!get_int(ret, &a) || !get_int(rhs, &b))
182       error_exit("non-integer argument");
183     switch (o->op) {
184     case ADD: x = a + b; break;
185     case SUB: x = a - b; break;
186     case MUL: x = a * b; break;
187     case DIVI: if (b == 0) error_exit("division by zero"); x = a / b; break;
188     case MOD:  if (b == 0) error_exit("division by zero"); x = a % b; break;
189     }
190     assign_int(ret, x);
191     break;
192 
193   case S_TO_SI: // op == RE
194     s = get_str(ret);
195     cmp = ret->s!=s; // ret overwritten by re so check now
196     re(s, t = get_str(rhs), ret);
197     if (cmp) free(s);
198     if (rhs->s!=t) free(t);
199     break;
200   }
201 }
202 
203 // Recurive "Precedence Climbing" evaluation of compound expression, setting ret
eval_expr(struct value * ret,int min_prec)204 static void eval_expr(struct value *ret, int min_prec)
205 {
206   struct value rhs;
207 
208   if (!*TT.tok) error_exit("need arg @%td", TT.tok-toys.optargs);
209 
210   // Everything is infix, so set ret to first value, handling parentheses
211   if (!strcmp(*TT.tok, "(")) {
212     TT.tok++;
213     eval_expr(ret, 1);        // We're inside ( ), so min_prec = 1
214     if (ret->s && !strcmp(ret->s, ")")) error_exit("empty ( )");
215     if (!*TT.tok || strcmp(*TT.tok, ")"))
216       error_exit("Expected ) @%td", TT.tok-toys.optargs);
217   } else ret->s = *TT.tok;  // simple literal, all values start as strings
218   TT.tok++;
219 
220   // Evaluate RHS and apply operator until precedence is too low.
221   while (*TT.tok) {
222     struct op_def *o = OPS;
223 
224     while (o->tok) { // Look up operator
225       if (!strcmp(*TT.tok, o->tok)) break;
226       o++;
227     }
228     if (!o->tok) break; // Not an operator (extra input will fail later)
229     if (o->prec < min_prec) break; // Precedence too low, pop a stack frame
230     TT.tok++;
231 
232     eval_expr(&rhs, o->prec + 1); // Evaluate RHS, with higher min precedence
233     eval_op(o, ret, &rhs); // Apply operator, setting 'ret'
234   }
235 }
236 
expr_main(void)237 void expr_main(void)
238 {
239   struct value ret = {0};
240 
241   TT.tok = toys.optargs; // initialize global token
242   eval_expr(&ret, 1);
243   if (*TT.tok) error_exit("Unexpected extra input '%s'\n", *TT.tok);
244 
245   if (ret.s) printf("%s\n", ret.s);
246   else printf("%lld\n", ret.i);
247 
248   toys.exitval = is_false(&ret);
249 
250   if (CFG_TOYBOX_FREE && TT.delete) free(TT.delete);
251 }
252