1 /* SPDX-License-Identifier: GPL-2.0-or-later */
2 /*
3  * Lexical analysis for genksyms.
4  * Copyright 1996, 1997 Linux International.
5  *
6  * New implementation contributed by Richard Henderson <[email protected]>
7  * Based on original work by Bjorn Ekwall <[email protected]>
8  *
9  * Taken from Linux modutils 2.4.22.
10  */
11 
12 %{
13 
14 #include <limits.h>
15 #include <stdbool.h>
16 #include <stdlib.h>
17 #include <string.h>
18 #include <ctype.h>
19 
20 #include "genksyms.h"
21 #include "parse.tab.h"
22 
23 /* We've got a two-level lexer here.  We let flex do basic tokenization
24    and then we categorize those basic tokens in the second stage.  */
25 #define YY_DECL		static int yylex1(void)
26 
27 %}
28 
29 IDENT			[A-Za-z_\$][A-Za-z0-9_\$]*
30 
31 O_INT			0[0-7]*
32 D_INT			[1-9][0-9]*
33 X_INT			0[Xx][0-9A-Fa-f]+
34 I_SUF			[Uu]|[Ll]|[Uu][Ll]|[Ll][Uu]
35 INT			({O_INT}|{D_INT}|{X_INT}){I_SUF}?
36 
37 FRAC			([0-9]*\.[0-9]+)|([0-9]+\.)
38 EXP			[Ee][+-]?[0-9]+
39 F_SUF			[FfLl]
40 REAL			({FRAC}{EXP}?{F_SUF}?)|([0-9]+{EXP}{F_SUF}?)
41 
42 STRING			L?\"([^\\\"]*\\.)*[^\\\"]*\"
43 CHAR			L?\'([^\\\']*\\.)*[^\\\']*\'
44 
45 MC_TOKEN		([~%^&*+=|<>/-]=)|(&&)|("||")|(->)|(<<)|(>>)
46 
47 /* We don't do multiple input files.  */
48 %option noyywrap
49 
50 %option noinput
51 
52 %%
53 
54 u?int(8|16|32|64)x(1|2|4|8|16)_t	return BUILTIN_INT_KEYW;
55 
56  /* Keep track of our location in the original source files.  */
57 ^#[ \t]+{INT}[ \t]+\"[^\"\n]+\".*\n	return FILENAME;
58 ^#.*\n					cur_line++;
59 \n					cur_line++;
60 
61  /* Ignore all other whitespace.  */
62 [ \t\f\v\r]+				;
63 
64 
65 {STRING}				return STRING;
66 {CHAR}					return CHAR;
67 {IDENT}					return IDENT;
68 
69  /* The Pedant requires that the other C multi-character tokens be
70     recognized as tokens.  We don't actually use them since we don't
71     parse expressions, but we do want whitespace to be arranged
72     around them properly.  */
73 {MC_TOKEN}				return OTHER;
74 {INT}					return INT;
75 {REAL}					return REAL;
76 
77 "..."					return DOTS;
78 
79  /* All other tokens are single characters.  */
80 .					return yytext[0];
81 
82 
83 %%
84 
85 /* Bring in the keyword recognizer.  */
86 
87 #include "keywords.c"
88 
89 
90 /* Macros to append to our phrase collection list.  */
91 
92 /*
93  * We mark any token, that that equals to a known enumerator, as
94  * SYM_ENUM_CONST. The parser will change this for struct and union tags later,
95  * the only problem is struct and union members:
96  *    enum e { a, b }; struct s { int a, b; }
97  * but in this case, the only effect will be, that the ABI checksums become
98  * more volatile, which is acceptable. Also, such collisions are quite rare,
99  * so far it was only observed in include/linux/telephony.h.
100  */
101 #define _APP(T,L)	do {						   \
102 			  cur_node = next_node;				   \
103 			  next_node = xmalloc(sizeof(*next_node));	   \
104 			  next_node->next = cur_node;			   \
105 			  cur_node->string = memcpy(xmalloc(L+1), T, L+1); \
106 			  cur_node->tag =				   \
107 			    find_symbol(cur_node->string, SYM_ENUM_CONST, 1)?\
108 			    SYM_ENUM_CONST : SYM_NORMAL ;		   \
109 			  cur_node->in_source_file = in_source_file;       \
110 			} while (0)
111 
112 #define APP		_APP(yytext, yyleng)
113 
114 
115 /* The second stage lexer.  Here we incorporate knowledge of the state
116    of the parser to tailor the tokens that are returned.  */
117 
118 /*
119  * The lexer cannot distinguish whether a typedef'ed string is a TYPE or an
120  * IDENT. We need a hint from the parser to handle this accurately.
121  */
122 bool dont_want_type_specifier;
123 
124 int
125 yylex(void)
126 {
127   static enum {
128     ST_NOTSTARTED, ST_NORMAL, ST_ATTRIBUTE, ST_ASM, ST_TYPEOF, ST_TYPEOF_1,
129     ST_BRACKET, ST_BRACE, ST_EXPRESSION, ST_STATIC_ASSERT,
130   } lexstate = ST_NOTSTARTED;
131 
132   static int suppress_type_lookup, dont_want_brace_phrase;
133   static struct string_list *next_node;
134   static char *source_file;
135 
136   int token, count = 0;
137   struct string_list *cur_node;
138 
139   if (lexstate == ST_NOTSTARTED)
140     {
141       next_node = xmalloc(sizeof(*next_node));
142       next_node->next = NULL;
143       lexstate = ST_NORMAL;
144     }
145 
146 repeat:
147   token = yylex1();
148 
149   if (token == 0)
150     return 0;
151   else if (token == FILENAME)
152     {
153       char *file, *e;
154 
155       /* Save the filename and line number for later error messages.  */
156 
157       if (cur_filename)
158 	free(cur_filename);
159 
160       file = strchr(yytext, '\"')+1;
161       e = strchr(file, '\"');
162       *e = '\0';
163       cur_filename = memcpy(xmalloc(e-file+1), file, e-file+1);
164       cur_line = atoi(yytext+2);
165 
166       if (!source_file) {
167         source_file = xstrdup(cur_filename);
168         in_source_file = 1;
169       } else {
170         in_source_file = (strcmp(cur_filename, source_file) == 0);
171       }
172 
173       goto repeat;
174     }
175 
176   switch (lexstate)
177     {
178     case ST_NORMAL:
179       switch (token)
180 	{
181 	case IDENT:
182 	  APP;
183 	  {
184 	    int r = is_reserved_word(yytext, yyleng);
185 	    if (r >= 0)
186 	      {
187 		switch (token = r)
188 		  {
189 		  case ATTRIBUTE_KEYW:
190 		    lexstate = ST_ATTRIBUTE;
191 		    count = 0;
192 		    goto repeat;
193 		  case ASM_KEYW:
194 		    lexstate = ST_ASM;
195 		    count = 0;
196 		    goto repeat;
197 		  case TYPEOF_KEYW:
198 		    lexstate = ST_TYPEOF;
199 		    count = 0;
200 		    goto repeat;
201 
202 		  case STRUCT_KEYW:
203 		  case UNION_KEYW:
204 		  case ENUM_KEYW:
205 		    dont_want_brace_phrase = 3;
206 		    suppress_type_lookup = 2;
207 		    goto fini;
208 
209 		  case EXPORT_SYMBOL_KEYW:
210 		      goto fini;
211 
212 		  case STATIC_ASSERT_KEYW:
213 		    lexstate = ST_STATIC_ASSERT;
214 		    count = 0;
215 		    goto repeat;
216 		  }
217 	      }
218 	    if (!suppress_type_lookup && !dont_want_type_specifier)
219 	      {
220 		if (find_symbol(yytext, SYM_TYPEDEF, 1))
221 		  token = TYPE;
222 	      }
223 	  }
224 	  break;
225 
226 	case '[':
227 	  APP;
228 	  lexstate = ST_BRACKET;
229 	  count = 1;
230 	  goto repeat;
231 
232 	case '{':
233 	  APP;
234 	  if (dont_want_brace_phrase)
235 	    break;
236 	  lexstate = ST_BRACE;
237 	  count = 1;
238 	  goto repeat;
239 
240 	case '=': case ':':
241 	  APP;
242 	  lexstate = ST_EXPRESSION;
243 	  break;
244 
245 	default:
246 	  APP;
247 	  break;
248 	}
249       break;
250 
251     case ST_ATTRIBUTE:
252       APP;
253       switch (token)
254 	{
255 	case '(':
256 	  ++count;
257 	  goto repeat;
258 	case ')':
259 	  if (--count == 0)
260 	    {
261 	      lexstate = ST_NORMAL;
262 	      token = ATTRIBUTE_PHRASE;
263 	      break;
264 	    }
265 	  goto repeat;
266 	default:
267 	  goto repeat;
268 	}
269       break;
270 
271     case ST_ASM:
272       APP;
273       switch (token)
274 	{
275 	case '(':
276 	  ++count;
277 	  goto repeat;
278 	case ')':
279 	  if (--count == 0)
280 	    {
281 	      lexstate = ST_NORMAL;
282 	      token = ASM_PHRASE;
283 	      break;
284 	    }
285 	  goto repeat;
286 	default:
287 	  goto repeat;
288 	}
289       break;
290 
291     case ST_TYPEOF_1:
292       if (token == IDENT)
293 	{
294 	  if (is_reserved_word(yytext, yyleng) >= 0
295 	      || find_symbol(yytext, SYM_TYPEDEF, 1))
296 	    {
297 	      yyless(0);
298 	      unput('(');
299 	      lexstate = ST_NORMAL;
300 	      token = TYPEOF_KEYW;
301 	      break;
302 	    }
303 	  _APP("(", 1);
304 	}
305 	lexstate = ST_TYPEOF;
306 	/* FALLTHRU */
307 
308     case ST_TYPEOF:
309       switch (token)
310 	{
311 	case '(':
312 	  if ( ++count == 1 )
313 	    lexstate = ST_TYPEOF_1;
314 	  else
315 	    APP;
316 	  goto repeat;
317 	case ')':
318 	  APP;
319 	  if (--count == 0)
320 	    {
321 	      lexstate = ST_NORMAL;
322 	      token = TYPEOF_PHRASE;
323 	      break;
324 	    }
325 	  goto repeat;
326 	default:
327 	  APP;
328 	  goto repeat;
329 	}
330       break;
331 
332     case ST_BRACKET:
333       APP;
334       switch (token)
335 	{
336 	case '[':
337 	  ++count;
338 	  goto repeat;
339 	case ']':
340 	  if (--count == 0)
341 	    {
342 	      lexstate = ST_NORMAL;
343 	      token = BRACKET_PHRASE;
344 	      break;
345 	    }
346 	  goto repeat;
347 	default:
348 	  goto repeat;
349 	}
350       break;
351 
352     case ST_BRACE:
353       APP;
354       switch (token)
355 	{
356 	case '{':
357 	  ++count;
358 	  goto repeat;
359 	case '}':
360 	  if (--count == 0)
361 	    {
362 	      lexstate = ST_NORMAL;
363 	      token = BRACE_PHRASE;
364 	      break;
365 	    }
366 	  goto repeat;
367 	default:
368 	  goto repeat;
369 	}
370       break;
371 
372     case ST_EXPRESSION:
373       switch (token)
374 	{
375 	case '(': case '[': case '{':
376 	  ++count;
377 	  APP;
378 	  goto repeat;
379 	case '}':
380 	  /* is this the last line of an enum declaration? */
381 	  if (count == 0)
382 	    {
383 	      /* Put back the token we just read so's we can find it again
384 		 after registering the expression.  */
385 	      unput(token);
386 
387 	      lexstate = ST_NORMAL;
388 	      token = EXPRESSION_PHRASE;
389 	      break;
390 	    }
391 	  /* FALLTHRU */
392 	case ')': case ']':
393 	  --count;
394 	  APP;
395 	  goto repeat;
396 	case ',': case ';':
397 	  if (count == 0)
398 	    {
399 	      /* Put back the token we just read so's we can find it again
400 		 after registering the expression.  */
401 	      unput(token);
402 
403 	      lexstate = ST_NORMAL;
404 	      token = EXPRESSION_PHRASE;
405 	      break;
406 	    }
407 	  APP;
408 	  goto repeat;
409 	default:
410 	  APP;
411 	  goto repeat;
412 	}
413       break;
414 
415     case ST_STATIC_ASSERT:
416       APP;
417       switch (token)
418 	{
419 	case '(':
420 	  ++count;
421 	  goto repeat;
422 	case ')':
423 	  if (--count == 0)
424 	    {
425 	      lexstate = ST_NORMAL;
426 	      token = STATIC_ASSERT_PHRASE;
427 	      break;
428 	    }
429 	  goto repeat;
430 	default:
431 	  goto repeat;
432 	}
433       break;
434 
435     default:
436       exit(1);
437     }
438 fini:
439 
440   if (suppress_type_lookup > 0)
441     --suppress_type_lookup;
442 
443   /*
444    *  __attribute__() can be placed immediately after the 'struct' keyword.
445    *  e.g.) struct __attribute__((__packed__)) foo { ... };
446    */
447   if (token != ATTRIBUTE_PHRASE && dont_want_brace_phrase > 0)
448     --dont_want_brace_phrase;
449 
450   yylval = &next_node->next;
451 
452   return token;
453 }
454