1 /* SPDX-License-Identifier: GPL-2.0-or-later */ 2 /* 3 * Lexical analysis for genksyms. 4 * Copyright 1996, 1997 Linux International. 5 * 6 * New implementation contributed by Richard Henderson <[email protected]> 7 * Based on original work by Bjorn Ekwall <[email protected]> 8 * 9 * Taken from Linux modutils 2.4.22. 10 */ 11 12 %{ 13 14 #include <limits.h> 15 #include <stdbool.h> 16 #include <stdlib.h> 17 #include <string.h> 18 #include <ctype.h> 19 20 #include "genksyms.h" 21 #include "parse.tab.h" 22 23 /* We've got a two-level lexer here. We let flex do basic tokenization 24 and then we categorize those basic tokens in the second stage. */ 25 #define YY_DECL static int yylex1(void) 26 27 %} 28 29 IDENT [A-Za-z_\$][A-Za-z0-9_\$]* 30 31 O_INT 0[0-7]* 32 D_INT [1-9][0-9]* 33 X_INT 0[Xx][0-9A-Fa-f]+ 34 I_SUF [Uu]|[Ll]|[Uu][Ll]|[Ll][Uu] 35 INT ({O_INT}|{D_INT}|{X_INT}){I_SUF}? 36 37 FRAC ([0-9]*\.[0-9]+)|([0-9]+\.) 38 EXP [Ee][+-]?[0-9]+ 39 F_SUF [FfLl] 40 REAL ({FRAC}{EXP}?{F_SUF}?)|([0-9]+{EXP}{F_SUF}?) 41 42 STRING L?\"([^\\\"]*\\.)*[^\\\"]*\" 43 CHAR L?\'([^\\\']*\\.)*[^\\\']*\' 44 45 MC_TOKEN ([~%^&*+=|<>/-]=)|(&&)|("||")|(->)|(<<)|(>>) 46 47 /* We don't do multiple input files. */ 48 %option noyywrap 49 50 %option noinput 51 52 %% 53 54 u?int(8|16|32|64)x(1|2|4|8|16)_t return BUILTIN_INT_KEYW; 55 56 /* Keep track of our location in the original source files. */ 57 ^#[ \t]+{INT}[ \t]+\"[^\"\n]+\".*\n return FILENAME; 58 ^#.*\n cur_line++; 59 \n cur_line++; 60 61 /* Ignore all other whitespace. */ 62 [ \t\f\v\r]+ ; 63 64 65 {STRING} return STRING; 66 {CHAR} return CHAR; 67 {IDENT} return IDENT; 68 69 /* The Pedant requires that the other C multi-character tokens be 70 recognized as tokens. We don't actually use them since we don't 71 parse expressions, but we do want whitespace to be arranged 72 around them properly. */ 73 {MC_TOKEN} return OTHER; 74 {INT} return INT; 75 {REAL} return REAL; 76 77 "..." return DOTS; 78 79 /* All other tokens are single characters. */ 80 . return yytext[0]; 81 82 83 %% 84 85 /* Bring in the keyword recognizer. */ 86 87 #include "keywords.c" 88 89 90 /* Macros to append to our phrase collection list. */ 91 92 /* 93 * We mark any token, that that equals to a known enumerator, as 94 * SYM_ENUM_CONST. The parser will change this for struct and union tags later, 95 * the only problem is struct and union members: 96 * enum e { a, b }; struct s { int a, b; } 97 * but in this case, the only effect will be, that the ABI checksums become 98 * more volatile, which is acceptable. Also, such collisions are quite rare, 99 * so far it was only observed in include/linux/telephony.h. 100 */ 101 #define _APP(T,L) do { \ 102 cur_node = next_node; \ 103 next_node = xmalloc(sizeof(*next_node)); \ 104 next_node->next = cur_node; \ 105 cur_node->string = memcpy(xmalloc(L+1), T, L+1); \ 106 cur_node->tag = \ 107 find_symbol(cur_node->string, SYM_ENUM_CONST, 1)?\ 108 SYM_ENUM_CONST : SYM_NORMAL ; \ 109 cur_node->in_source_file = in_source_file; \ 110 } while (0) 111 112 #define APP _APP(yytext, yyleng) 113 114 115 /* The second stage lexer. Here we incorporate knowledge of the state 116 of the parser to tailor the tokens that are returned. */ 117 118 /* 119 * The lexer cannot distinguish whether a typedef'ed string is a TYPE or an 120 * IDENT. We need a hint from the parser to handle this accurately. 121 */ 122 bool dont_want_type_specifier; 123 124 int 125 yylex(void) 126 { 127 static enum { 128 ST_NOTSTARTED, ST_NORMAL, ST_ATTRIBUTE, ST_ASM, ST_TYPEOF, ST_TYPEOF_1, 129 ST_BRACKET, ST_BRACE, ST_EXPRESSION, ST_STATIC_ASSERT, 130 } lexstate = ST_NOTSTARTED; 131 132 static int suppress_type_lookup, dont_want_brace_phrase; 133 static struct string_list *next_node; 134 static char *source_file; 135 136 int token, count = 0; 137 struct string_list *cur_node; 138 139 if (lexstate == ST_NOTSTARTED) 140 { 141 next_node = xmalloc(sizeof(*next_node)); 142 next_node->next = NULL; 143 lexstate = ST_NORMAL; 144 } 145 146 repeat: 147 token = yylex1(); 148 149 if (token == 0) 150 return 0; 151 else if (token == FILENAME) 152 { 153 char *file, *e; 154 155 /* Save the filename and line number for later error messages. */ 156 157 if (cur_filename) 158 free(cur_filename); 159 160 file = strchr(yytext, '\"')+1; 161 e = strchr(file, '\"'); 162 *e = '\0'; 163 cur_filename = memcpy(xmalloc(e-file+1), file, e-file+1); 164 cur_line = atoi(yytext+2); 165 166 if (!source_file) { 167 source_file = xstrdup(cur_filename); 168 in_source_file = 1; 169 } else { 170 in_source_file = (strcmp(cur_filename, source_file) == 0); 171 } 172 173 goto repeat; 174 } 175 176 switch (lexstate) 177 { 178 case ST_NORMAL: 179 switch (token) 180 { 181 case IDENT: 182 APP; 183 { 184 int r = is_reserved_word(yytext, yyleng); 185 if (r >= 0) 186 { 187 switch (token = r) 188 { 189 case ATTRIBUTE_KEYW: 190 lexstate = ST_ATTRIBUTE; 191 count = 0; 192 goto repeat; 193 case ASM_KEYW: 194 lexstate = ST_ASM; 195 count = 0; 196 goto repeat; 197 case TYPEOF_KEYW: 198 lexstate = ST_TYPEOF; 199 count = 0; 200 goto repeat; 201 202 case STRUCT_KEYW: 203 case UNION_KEYW: 204 case ENUM_KEYW: 205 dont_want_brace_phrase = 3; 206 suppress_type_lookup = 2; 207 goto fini; 208 209 case EXPORT_SYMBOL_KEYW: 210 goto fini; 211 212 case STATIC_ASSERT_KEYW: 213 lexstate = ST_STATIC_ASSERT; 214 count = 0; 215 goto repeat; 216 } 217 } 218 if (!suppress_type_lookup && !dont_want_type_specifier) 219 { 220 if (find_symbol(yytext, SYM_TYPEDEF, 1)) 221 token = TYPE; 222 } 223 } 224 break; 225 226 case '[': 227 APP; 228 lexstate = ST_BRACKET; 229 count = 1; 230 goto repeat; 231 232 case '{': 233 APP; 234 if (dont_want_brace_phrase) 235 break; 236 lexstate = ST_BRACE; 237 count = 1; 238 goto repeat; 239 240 case '=': case ':': 241 APP; 242 lexstate = ST_EXPRESSION; 243 break; 244 245 default: 246 APP; 247 break; 248 } 249 break; 250 251 case ST_ATTRIBUTE: 252 APP; 253 switch (token) 254 { 255 case '(': 256 ++count; 257 goto repeat; 258 case ')': 259 if (--count == 0) 260 { 261 lexstate = ST_NORMAL; 262 token = ATTRIBUTE_PHRASE; 263 break; 264 } 265 goto repeat; 266 default: 267 goto repeat; 268 } 269 break; 270 271 case ST_ASM: 272 APP; 273 switch (token) 274 { 275 case '(': 276 ++count; 277 goto repeat; 278 case ')': 279 if (--count == 0) 280 { 281 lexstate = ST_NORMAL; 282 token = ASM_PHRASE; 283 break; 284 } 285 goto repeat; 286 default: 287 goto repeat; 288 } 289 break; 290 291 case ST_TYPEOF_1: 292 if (token == IDENT) 293 { 294 if (is_reserved_word(yytext, yyleng) >= 0 295 || find_symbol(yytext, SYM_TYPEDEF, 1)) 296 { 297 yyless(0); 298 unput('('); 299 lexstate = ST_NORMAL; 300 token = TYPEOF_KEYW; 301 break; 302 } 303 _APP("(", 1); 304 } 305 lexstate = ST_TYPEOF; 306 /* FALLTHRU */ 307 308 case ST_TYPEOF: 309 switch (token) 310 { 311 case '(': 312 if ( ++count == 1 ) 313 lexstate = ST_TYPEOF_1; 314 else 315 APP; 316 goto repeat; 317 case ')': 318 APP; 319 if (--count == 0) 320 { 321 lexstate = ST_NORMAL; 322 token = TYPEOF_PHRASE; 323 break; 324 } 325 goto repeat; 326 default: 327 APP; 328 goto repeat; 329 } 330 break; 331 332 case ST_BRACKET: 333 APP; 334 switch (token) 335 { 336 case '[': 337 ++count; 338 goto repeat; 339 case ']': 340 if (--count == 0) 341 { 342 lexstate = ST_NORMAL; 343 token = BRACKET_PHRASE; 344 break; 345 } 346 goto repeat; 347 default: 348 goto repeat; 349 } 350 break; 351 352 case ST_BRACE: 353 APP; 354 switch (token) 355 { 356 case '{': 357 ++count; 358 goto repeat; 359 case '}': 360 if (--count == 0) 361 { 362 lexstate = ST_NORMAL; 363 token = BRACE_PHRASE; 364 break; 365 } 366 goto repeat; 367 default: 368 goto repeat; 369 } 370 break; 371 372 case ST_EXPRESSION: 373 switch (token) 374 { 375 case '(': case '[': case '{': 376 ++count; 377 APP; 378 goto repeat; 379 case '}': 380 /* is this the last line of an enum declaration? */ 381 if (count == 0) 382 { 383 /* Put back the token we just read so's we can find it again 384 after registering the expression. */ 385 unput(token); 386 387 lexstate = ST_NORMAL; 388 token = EXPRESSION_PHRASE; 389 break; 390 } 391 /* FALLTHRU */ 392 case ')': case ']': 393 --count; 394 APP; 395 goto repeat; 396 case ',': case ';': 397 if (count == 0) 398 { 399 /* Put back the token we just read so's we can find it again 400 after registering the expression. */ 401 unput(token); 402 403 lexstate = ST_NORMAL; 404 token = EXPRESSION_PHRASE; 405 break; 406 } 407 APP; 408 goto repeat; 409 default: 410 APP; 411 goto repeat; 412 } 413 break; 414 415 case ST_STATIC_ASSERT: 416 APP; 417 switch (token) 418 { 419 case '(': 420 ++count; 421 goto repeat; 422 case ')': 423 if (--count == 0) 424 { 425 lexstate = ST_NORMAL; 426 token = STATIC_ASSERT_PHRASE; 427 break; 428 } 429 goto repeat; 430 default: 431 goto repeat; 432 } 433 break; 434 435 default: 436 exit(1); 437 } 438 fini: 439 440 if (suppress_type_lookup > 0) 441 --suppress_type_lookup; 442 443 /* 444 * __attribute__() can be placed immediately after the 'struct' keyword. 445 * e.g.) struct __attribute__((__packed__)) foo { ... }; 446 */ 447 if (token != ATTRIBUTE_PHRASE && dont_want_brace_phrase > 0) 448 --dont_want_brace_phrase; 449 450 yylval = &next_node->next; 451 452 return token; 453 } 454