1 /*
2 * *****************************************************************************
3 *
4 * SPDX-License-Identifier: BSD-2-Clause
5 *
6 * Copyright (c) 2018-2024 Gavin D. Howard and contributors.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions are met:
10 *
11 * * Redistributions of source code must retain the above copyright notice, this
12 * list of conditions and the following disclaimer.
13 *
14 * * Redistributions in binary form must reproduce the above copyright notice,
15 * this list of conditions and the following disclaimer in the documentation
16 * and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
22 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
26 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
27 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
28 * POSSIBILITY OF SUCH DAMAGE.
29 *
30 * *****************************************************************************
31 *
32 * The parser for dc.
33 *
34 */
35
36 #if DC_ENABLED
37
38 #include <assert.h>
39 #include <stdlib.h>
40 #include <string.h>
41 #include <setjmp.h>
42
43 #include <dc.h>
44 #include <program.h>
45 #include <vm.h>
46
47 /**
48 * Parses a register. The lexer should have already lexed the true name of the
49 * register, per extended registers and such.
50 * @param p The parser.
51 * @param var True if the parser is for a variable, false otherwise.
52 */
53 static void
dc_parse_register(BcParse * p,bool var)54 dc_parse_register(BcParse* p, bool var)
55 {
56 bc_lex_next(&p->l);
57 if (p->l.t != BC_LEX_NAME) bc_parse_err(p, BC_ERR_PARSE_TOKEN);
58
59 bc_parse_pushName(p, p->l.str.v, var);
60 }
61
62 /**
63 * Parses a dc string.
64 * @param p The parser.
65 */
66 static inline void
dc_parse_string(BcParse * p)67 dc_parse_string(BcParse* p)
68 {
69 bc_parse_addString(p);
70 bc_lex_next(&p->l);
71 }
72
73 /**
74 * Parses a token that requires a memory operation, like load or store.
75 * @param p The parser.
76 * @param inst The instruction to push for the memory operation.
77 * @param name Whether the load or store is to a variable or array, and not to
78 * a global.
79 * @param store True if the operation is a store, false otherwise.
80 */
81 static void
dc_parse_mem(BcParse * p,uchar inst,bool name,bool store)82 dc_parse_mem(BcParse* p, uchar inst, bool name, bool store)
83 {
84 // Push the instruction.
85 bc_parse_push(p, inst);
86
87 // Parse the register if necessary.
88 if (name) dc_parse_register(p, inst != BC_INST_ARRAY_ELEM);
89
90 // Stores use the bc assign infrastructure, but they need to do a swap
91 // first.
92 if (store)
93 {
94 bc_parse_push(p, BC_INST_SWAP);
95 bc_parse_push(p, BC_INST_ASSIGN_NO_VAL);
96 }
97
98 bc_lex_next(&p->l);
99 }
100
101 /**
102 * Parses a conditional execution instruction.
103 * @param p The parser.
104 * @param inst The instruction for the condition.
105 */
106 static void
dc_parse_cond(BcParse * p,uchar inst)107 dc_parse_cond(BcParse* p, uchar inst)
108 {
109 // Push the instruction for the condition and the conditional execution.
110 bc_parse_push(p, inst);
111 bc_parse_push(p, BC_INST_EXEC_COND);
112
113 // Parse the register.
114 dc_parse_register(p, true);
115
116 bc_lex_next(&p->l);
117
118 // If the next token is an else, parse the else.
119 if (p->l.t == BC_LEX_KW_ELSE)
120 {
121 dc_parse_register(p, true);
122 bc_lex_next(&p->l);
123 }
124 // Otherwise, push a marker for no else.
125 else bc_parse_pushIndex(p, SIZE_MAX);
126 }
127
128 /**
129 * Parses a token for dc.
130 * @param p The parser.
131 * @param t The token to parse.
132 * @param flags The flags that say what is allowed or not.
133 */
134 static void
dc_parse_token(BcParse * p,BcLexType t,uint8_t flags)135 dc_parse_token(BcParse* p, BcLexType t, uint8_t flags)
136 {
137 uchar inst;
138 bool assign, get_token = false;
139
140 switch (t)
141 {
142 case BC_LEX_OP_REL_EQ:
143 case BC_LEX_OP_REL_LE:
144 case BC_LEX_OP_REL_GE:
145 case BC_LEX_OP_REL_NE:
146 case BC_LEX_OP_REL_LT:
147 case BC_LEX_OP_REL_GT:
148 {
149 inst = (uchar) (t - BC_LEX_OP_REL_EQ + BC_INST_REL_EQ);
150 dc_parse_cond(p, inst);
151 break;
152 }
153
154 case BC_LEX_SCOLON:
155 case BC_LEX_COLON:
156 {
157 dc_parse_mem(p, BC_INST_ARRAY_ELEM, true, t == BC_LEX_COLON);
158 break;
159 }
160
161 case BC_LEX_STR:
162 {
163 dc_parse_string(p);
164 break;
165 }
166
167 case BC_LEX_NEG:
168 {
169 // This tells us whether or not the neg is for a command or at the
170 // beginning of a number. If it's a command, push it. Otherwise,
171 // fallthrough and parse the number.
172 if (dc_lex_negCommand(&p->l))
173 {
174 bc_parse_push(p, BC_INST_NEG);
175 get_token = true;
176 break;
177 }
178
179 bc_lex_next(&p->l);
180
181 // Fallthrough.
182 BC_FALLTHROUGH
183 }
184
185 case BC_LEX_NUMBER:
186 {
187 bc_parse_number(p);
188
189 // Push the negative instruction if we fell through from above.
190 if (t == BC_LEX_NEG) bc_parse_push(p, BC_INST_NEG);
191 get_token = true;
192
193 break;
194 }
195
196 case BC_LEX_KW_READ:
197 {
198 // Make sure the read is not recursive.
199 if (BC_ERR(flags & BC_PARSE_NOREAD))
200 {
201 bc_parse_err(p, BC_ERR_EXEC_REC_READ);
202 }
203 else bc_parse_push(p, BC_INST_READ);
204
205 get_token = true;
206
207 break;
208 }
209
210 case BC_LEX_OP_ASSIGN:
211 case BC_LEX_STORE_PUSH:
212 {
213 assign = t == BC_LEX_OP_ASSIGN;
214 inst = assign ? BC_INST_VAR : BC_INST_PUSH_TO_VAR;
215 dc_parse_mem(p, inst, true, assign);
216 break;
217 }
218
219 case BC_LEX_LOAD:
220 case BC_LEX_LOAD_POP:
221 {
222 inst = t == BC_LEX_LOAD_POP ? BC_INST_PUSH_VAR : BC_INST_LOAD;
223 dc_parse_mem(p, inst, true, false);
224 break;
225 }
226
227 case BC_LEX_REG_STACK_LEVEL:
228 {
229 dc_parse_mem(p, BC_INST_REG_STACK_LEN, true, false);
230 break;
231 }
232
233 case BC_LEX_STORE_IBASE:
234 case BC_LEX_STORE_OBASE:
235 case BC_LEX_STORE_SCALE:
236 #if BC_ENABLE_EXTRA_MATH
237 case BC_LEX_STORE_SEED:
238 #endif // BC_ENABLE_EXTRA_MATH
239 {
240 inst = (uchar) (t - BC_LEX_STORE_IBASE + BC_INST_IBASE);
241 dc_parse_mem(p, inst, false, true);
242 break;
243 }
244
245 case BC_LEX_ARRAY_LENGTH:
246 {
247 // Need to push the array first, based on how length is implemented.
248 bc_parse_push(p, BC_INST_ARRAY);
249 dc_parse_register(p, false);
250
251 bc_parse_push(p, BC_INST_LENGTH);
252
253 get_token = true;
254
255 break;
256 }
257
258 case BC_LEX_EOF:
259 case BC_LEX_INVALID:
260 #if BC_ENABLED
261 case BC_LEX_OP_INC:
262 case BC_LEX_OP_DEC:
263 #endif // BC_ENABLED
264 case BC_LEX_OP_BOOL_NOT:
265 #if BC_ENABLE_EXTRA_MATH
266 case BC_LEX_OP_TRUNC:
267 #endif // BC_ENABLE_EXTRA_MATH
268 case BC_LEX_OP_POWER:
269 case BC_LEX_OP_MULTIPLY:
270 case BC_LEX_OP_DIVIDE:
271 case BC_LEX_OP_MODULUS:
272 case BC_LEX_OP_PLUS:
273 case BC_LEX_OP_MINUS:
274 #if BC_ENABLE_EXTRA_MATH
275 case BC_LEX_OP_PLACES:
276 case BC_LEX_OP_LSHIFT:
277 case BC_LEX_OP_RSHIFT:
278 #endif // BC_ENABLE_EXTRA_MATH
279 case BC_LEX_OP_BOOL_OR:
280 case BC_LEX_OP_BOOL_AND:
281 #if BC_ENABLED
282 case BC_LEX_OP_ASSIGN_POWER:
283 case BC_LEX_OP_ASSIGN_MULTIPLY:
284 case BC_LEX_OP_ASSIGN_DIVIDE:
285 case BC_LEX_OP_ASSIGN_MODULUS:
286 case BC_LEX_OP_ASSIGN_PLUS:
287 case BC_LEX_OP_ASSIGN_MINUS:
288 #if BC_ENABLE_EXTRA_MATH
289 case BC_LEX_OP_ASSIGN_PLACES:
290 case BC_LEX_OP_ASSIGN_LSHIFT:
291 case BC_LEX_OP_ASSIGN_RSHIFT:
292 #endif // BC_ENABLE_EXTRA_MATH
293 #endif // BC_ENABLED
294 case BC_LEX_NLINE:
295 case BC_LEX_WHITESPACE:
296 case BC_LEX_LPAREN:
297 case BC_LEX_RPAREN:
298 case BC_LEX_LBRACKET:
299 case BC_LEX_COMMA:
300 case BC_LEX_RBRACKET:
301 case BC_LEX_LBRACE:
302 case BC_LEX_NAME:
303 case BC_LEX_RBRACE:
304 #if BC_ENABLED
305 case BC_LEX_KW_AUTO:
306 case BC_LEX_KW_BREAK:
307 case BC_LEX_KW_CONTINUE:
308 case BC_LEX_KW_DEFINE:
309 case BC_LEX_KW_FOR:
310 case BC_LEX_KW_IF:
311 case BC_LEX_KW_LIMITS:
312 case BC_LEX_KW_RETURN:
313 case BC_LEX_KW_WHILE:
314 case BC_LEX_KW_HALT:
315 case BC_LEX_KW_LAST:
316 #endif // BC_ENABLED
317 case BC_LEX_KW_IBASE:
318 case BC_LEX_KW_OBASE:
319 case BC_LEX_KW_SCALE:
320 #if BC_ENABLE_EXTRA_MATH
321 case BC_LEX_KW_SEED:
322 #endif // BC_ENABLE_EXTRA_MATH
323 case BC_LEX_KW_LENGTH:
324 case BC_LEX_KW_PRINT:
325 case BC_LEX_KW_SQRT:
326 case BC_LEX_KW_ABS:
327 case BC_LEX_KW_IS_NUMBER:
328 case BC_LEX_KW_IS_STRING:
329 #if BC_ENABLE_EXTRA_MATH
330 case BC_LEX_KW_IRAND:
331 #endif // BC_ENABLE_EXTRA_MATH
332 case BC_LEX_KW_ASCIIFY:
333 case BC_LEX_KW_MODEXP:
334 case BC_LEX_KW_DIVMOD:
335 case BC_LEX_KW_QUIT:
336 #if BC_ENABLE_EXTRA_MATH
337 case BC_LEX_KW_RAND:
338 #endif // BC_ENABLE_EXTRA_MATH
339 case BC_LEX_KW_MAXIBASE:
340 case BC_LEX_KW_MAXOBASE:
341 case BC_LEX_KW_MAXSCALE:
342 #if BC_ENABLE_EXTRA_MATH
343 case BC_LEX_KW_MAXRAND:
344 #endif // BC_ENABLE_EXTRA_MATH
345 case BC_LEX_KW_LINE_LENGTH:
346 #if BC_ENABLED
347 case BC_LEX_KW_GLOBAL_STACKS:
348 #endif // BC_ENABLED
349 case BC_LEX_KW_LEADING_ZERO:
350 case BC_LEX_KW_STREAM:
351 case BC_LEX_KW_ELSE:
352 case BC_LEX_EXTENDED_REGISTERS:
353 case BC_LEX_EQ_NO_REG:
354 case BC_LEX_EXECUTE:
355 case BC_LEX_PRINT_STACK:
356 case BC_LEX_CLEAR_STACK:
357 case BC_LEX_STACK_LEVEL:
358 case BC_LEX_DUPLICATE:
359 case BC_LEX_SWAP:
360 case BC_LEX_POP:
361 case BC_LEX_PRINT_POP:
362 case BC_LEX_NQUIT:
363 case BC_LEX_EXEC_STACK_LENGTH:
364 case BC_LEX_SCALE_FACTOR:
365 {
366 // All other tokens should be taken care of by the caller, or they
367 // actually *are* invalid.
368 bc_parse_err(p, BC_ERR_PARSE_TOKEN);
369 }
370 }
371
372 if (get_token) bc_lex_next(&p->l);
373 }
374
375 void
dc_parse_expr(BcParse * p,uint8_t flags)376 dc_parse_expr(BcParse* p, uint8_t flags)
377 {
378 BcInst inst;
379 BcLexType t;
380 bool need_expr, have_expr = false;
381
382 need_expr = ((flags & BC_PARSE_NOREAD) != 0);
383
384 // dc can just keep parsing forever basically, unlike bc, which has to have
385 // a whole bunch of complicated nonsense because its language was horribly
386 // designed.
387
388 // While we don't have EOF...
389 while ((t = p->l.t) != BC_LEX_EOF)
390 {
391 // Eat newline.
392 if (t == BC_LEX_NLINE)
393 {
394 bc_lex_next(&p->l);
395 continue;
396 }
397
398 // Get the instruction that corresponds to the token.
399 inst = dc_parse_insts[t];
400
401 // If the instruction is invalid, that means we have to do some harder
402 // parsing. So if not invalid, just push the instruction; otherwise,
403 // parse the token.
404 if (inst != BC_INST_INVALID)
405 {
406 bc_parse_push(p, inst);
407 bc_lex_next(&p->l);
408 }
409 else dc_parse_token(p, t, flags);
410
411 have_expr = true;
412 }
413
414 // If we don't have an expression and need one, barf. Otherwise, just push a
415 // BC_INST_POP_EXEC if we have EOF and BC_PARSE_NOCALL, which dc uses to
416 // indicate that it is executing a string.
417 if (BC_ERR(need_expr && !have_expr)) bc_err(BC_ERR_EXEC_READ_EXPR);
418 else if (p->l.t == BC_LEX_EOF && (flags & BC_PARSE_NOCALL))
419 {
420 bc_parse_push(p, BC_INST_POP_EXEC);
421 }
422 }
423
424 void
dc_parse_parse(BcParse * p)425 dc_parse_parse(BcParse* p)
426 {
427 assert(p != NULL);
428
429 BC_SETJMP_LOCKED(vm, exit);
430
431 // If we have EOF, someone called this function one too many times.
432 // Otherwise, parse.
433 if (BC_ERR(p->l.t == BC_LEX_EOF)) bc_parse_err(p, BC_ERR_PARSE_EOF);
434 else dc_parse_expr(p, 0);
435
436 exit:
437
438 // Need to reset if there was an error.
439 if (BC_SIG_EXC(vm)) bc_parse_reset(p);
440
441 BC_LONGJMP_CONT(vm);
442 BC_SIG_MAYLOCK;
443 }
444 #endif // DC_ENABLED
445