1 /* expr.c - evaluate expression
2 *
3 * Copyright 2016 Google Inc.
4 * Copyright 2013 Daniel Verkamp <[email protected]>
5 *
6 * http://pubs.opengroup.org/onlinepubs/9699919799/utilities/expr.html
7 *
8 * The web standard is incomplete (precedence grouping missing), see:
9 * http://permalink.gmane.org/gmane.comp.standards.posix.austin.general/10141
10 *
11 * eval_expr() uses the recursive "Precedence Climbing" algorithm:
12 *
13 * Clarke, Keith. "The top-down parsing of expressions." University of London.
14 * Queen Mary College. Department of Computer Science and Statistics, 1986.
15 *
16 * http://www.antlr.org/papers/Clarke-expr-parsing-1986.pdf
17 *
18 * Nice explanation and Python implementation:
19 * http://eli.thegreenplace.net/2012/08/02/parsing-expressions-by-precedence-climbing
20
21 USE_EXPR(NEWTOY(expr, 0, TOYFLAG_USR|TOYFLAG_BIN|TOYFLAG_ARGFAIL(2)))
22
23 config EXPR
24 bool "expr"
25 default n
26 help
27 usage: expr ARG1 OPERATOR ARG2...
28
29 Evaluate expression and print result. For example, "expr 1 + 2" prints "3".
30
31 The supported operators are (grouped from highest to lowest priority):
32
33 ( ) : * / % + - != <= < >= > = & |
34
35 Each constant and operator must be a separate command line argument.
36 All operators are infix, requiring a value on each side of the operator.
37 Operators of the same priority are evaluated left to right. Parentheses
38 elevate the priority of expression they contain. The & and | operators
39 are logical (not bitwise).
40
41 All operators yield integers, and most operators expect integer arguments.
42 Comparisons may alphabetically compare strings, logical operators treat a
43 blank string as false and nonblank as true, and the regex operator
44 (str : pattern) yields the initial number of matching bytes. (So
45 "abc : ab" is 2, but "abc : bc" is 0.)
46
47 Calling expr from a command shell requires a lot of \( or '*' escaping
48 to avoid interpreting shell control characters, vs the shell's "$((1+6/3))".
49 */
50
51 // TODO: int overflow checking
52
53 #define FOR_expr
54 #include "toys.h"
55
56 GLOBALS(
57 char **tok, *delete;
58 )
59
60 // If s string, otherwise int.
61 struct value {
62 char *s;
63 long long i;
64 };
65
66 // Get the value as a string.
get_str(struct value * v)67 char *get_str(struct value *v)
68 {
69 if (v->s) return v->s;
70 else return xmprintf("%lld", v->i);
71 }
72
73 // Get the value as an integer and return 1, or return 0 on error.
get_int(struct value * v,long long * ret)74 int get_int(struct value *v, long long *ret)
75 {
76 char *end;
77
78 if (v->s) {
79 *ret = strtoll(v->s, &end, 10); // base 10 or autodetect?
80 if (*end) return 0;
81 } else *ret = v->i;
82
83 return 1;
84 }
85
86 // Preserve the invariant that v.s is NULL when the value is an integer.
assign_int(struct value * v,long long i)87 void assign_int(struct value *v, long long i)
88 {
89 v->i = i;
90 v->s = 0;
91 }
92
93 // Check if v is 0 or the empty string.
is_false(struct value * v)94 static int is_false(struct value *v)
95 {
96 return get_int(v, &v->i) && !v->i;
97 }
98
99 // 'ret' is filled with a string capture or int match position.
re(char * target,char * pattern,struct value * ret)100 static void re(char *target, char *pattern, struct value *ret)
101 {
102 regex_t pat;
103 regmatch_t m[2];
104
105 xregcomp(&pat, pattern, 0);
106 // must match at pos 0
107 if (!regexec(&pat, target, 2, m, 0) && !m[0].rm_so) {
108 // Return first parenthesized subexpression as string, or length of match
109 if (pat.re_nsub>0) {
110 ret->s = xmprintf("%.*s", (int)(m[1].rm_eo-m[1].rm_so), target+m[1].rm_so);
111 free(TT.delete);
112 TT.delete = ret->s;
113 } else assign_int(ret, m[0].rm_eo);
114 } else {
115 if (pat.re_nsub>0) ret->s = "";
116 else assign_int(ret, 0);
117 }
118 regfree(&pat);
119 }
120
121 // 4 different signatures of operators. S = string, I = int, SI = string or
122 // int.
123 enum { SI_TO_SI = 1, SI_TO_I, I_TO_I, S_TO_SI };
124
125 enum { OR = 1, AND, EQ, NE, GT, GTE, LT, LTE, ADD, SUB, MUL, DIVI, MOD, RE };
126
127 // operators grouped by precedence
128 static struct op_def {
129 char *tok;
130 char prec, sig, op; // precedence, signature for type coercion, operator ID
131 } OPS[] = {
132 // logical ops, precedence 1 and 2, signature SI_TO_SI
133 {"|", 1, SI_TO_SI, OR },
134 {"&", 2, SI_TO_SI, AND },
135 // comparison ops, precedence 3, signature SI_TO_I
136 {"=", 3, SI_TO_I, EQ }, {"==", 3, SI_TO_I, EQ }, {"!=", 3, SI_TO_I, NE },
137 {">", 3, SI_TO_I, GT }, {">=", 3, SI_TO_I, GTE },
138 {"<", 3, SI_TO_I, LT }, {"<=", 3, SI_TO_I, LTE },
139 // arithmetic ops, precedence 4 and 5, signature I_TO_I
140 {"+", 4, I_TO_I, ADD }, {"-", 4, I_TO_I, SUB },
141 {"*", 5, I_TO_I, MUL }, {"/", 5, I_TO_I, DIVI }, {"%", 5, I_TO_I, MOD },
142 // regex match, precedence 6, signature S_TO_SI
143 {":", 6, S_TO_SI, RE },
144 {NULL, 0, 0, 0}, // sentinel
145 };
146
eval_op(struct op_def * o,struct value * ret,struct value * rhs)147 void eval_op(struct op_def *o, struct value *ret, struct value *rhs)
148 {
149 long long cmp, a, b, x = 0; // x = a OP b for ints.
150 char *s, *t; // string operands
151
152 switch (o->sig) {
153
154 case SI_TO_SI:
155 switch (o->op) {
156 case OR: if (is_false(ret)) *ret = *rhs; break;
157 case AND: if (is_false(ret) || is_false(rhs)) assign_int(ret, 0); break;
158 }
159 break;
160
161 case SI_TO_I:
162 if (get_int(ret, &a) && get_int(rhs, &b)) { // both are ints
163 cmp = a - b;
164 } else { // otherwise compare both as strings
165 cmp = strcmp(s = get_str(ret), t = get_str(rhs));
166 if (ret->s != s) free(s);
167 if (rhs->s != t) free(t);
168 }
169 switch (o->op) {
170 case EQ: x = cmp == 0; break;
171 case NE: x = cmp != 0; break;
172 case GT: x = cmp > 0; break;
173 case GTE: x = cmp >= 0; break;
174 case LT: x = cmp < 0; break;
175 case LTE: x = cmp <= 0; break;
176 }
177 assign_int(ret, x);
178 break;
179
180 case I_TO_I:
181 if (!get_int(ret, &a) || !get_int(rhs, &b))
182 error_exit("non-integer argument");
183 switch (o->op) {
184 case ADD: x = a + b; break;
185 case SUB: x = a - b; break;
186 case MUL: x = a * b; break;
187 case DIVI: if (b == 0) error_exit("division by zero"); x = a / b; break;
188 case MOD: if (b == 0) error_exit("division by zero"); x = a % b; break;
189 }
190 assign_int(ret, x);
191 break;
192
193 case S_TO_SI: // op == RE
194 s = get_str(ret);
195 cmp = ret->s!=s; // ret overwritten by re so check now
196 re(s, t = get_str(rhs), ret);
197 if (cmp) free(s);
198 if (rhs->s!=t) free(t);
199 break;
200 }
201 }
202
203 // Recurive "Precedence Climbing" evaluation of compound expression, setting ret
eval_expr(struct value * ret,int min_prec)204 static void eval_expr(struct value *ret, int min_prec)
205 {
206 struct value rhs;
207
208 if (!*TT.tok) error_exit("need arg @%td", TT.tok-toys.optargs);
209
210 // Everything is infix, so set ret to first value, handling parentheses
211 if (!strcmp(*TT.tok, "(")) {
212 TT.tok++;
213 eval_expr(ret, 1); // We're inside ( ), so min_prec = 1
214 if (ret->s && !strcmp(ret->s, ")")) error_exit("empty ( )");
215 if (!*TT.tok || strcmp(*TT.tok, ")"))
216 error_exit("Expected ) @%td", TT.tok-toys.optargs);
217 } else ret->s = *TT.tok; // simple literal, all values start as strings
218 TT.tok++;
219
220 // Evaluate RHS and apply operator until precedence is too low.
221 while (*TT.tok) {
222 struct op_def *o = OPS;
223
224 while (o->tok) { // Look up operator
225 if (!strcmp(*TT.tok, o->tok)) break;
226 o++;
227 }
228 if (!o->tok) break; // Not an operator (extra input will fail later)
229 if (o->prec < min_prec) break; // Precedence too low, pop a stack frame
230 TT.tok++;
231
232 eval_expr(&rhs, o->prec + 1); // Evaluate RHS, with higher min precedence
233 eval_op(o, ret, &rhs); // Apply operator, setting 'ret'
234 }
235 }
236
expr_main(void)237 void expr_main(void)
238 {
239 struct value ret = {0};
240
241 TT.tok = toys.optargs; // initialize global token
242 eval_expr(&ret, 1);
243 if (*TT.tok) error_exit("Unexpected extra input '%s'\n", *TT.tok);
244
245 if (ret.s) printf("%s\n", ret.s);
246 else printf("%lld\n", ret.i);
247
248 toys.exitval = is_false(&ret);
249
250 if (CFG_TOYBOX_FREE && TT.delete) free(TT.delete);
251 }
252