xref: /aosp_15_r20/external/one-true-awk/awk.h (revision 9a7741de182b2776d7b30d6355f2585c0780a51b)
1*9a7741deSElliott Hughes /****************************************************************
2*9a7741deSElliott Hughes Copyright (C) Lucent Technologies 1997
3*9a7741deSElliott Hughes All Rights Reserved
4*9a7741deSElliott Hughes 
5*9a7741deSElliott Hughes Permission to use, copy, modify, and distribute this software and
6*9a7741deSElliott Hughes its documentation for any purpose and without fee is hereby
7*9a7741deSElliott Hughes granted, provided that the above copyright notice appear in all
8*9a7741deSElliott Hughes copies and that both that the copyright notice and this
9*9a7741deSElliott Hughes permission notice and warranty disclaimer appear in supporting
10*9a7741deSElliott Hughes documentation, and that the name Lucent Technologies or any of
11*9a7741deSElliott Hughes its entities not be used in advertising or publicity pertaining
12*9a7741deSElliott Hughes to distribution of the software without specific, written prior
13*9a7741deSElliott Hughes permission.
14*9a7741deSElliott Hughes 
15*9a7741deSElliott Hughes LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
16*9a7741deSElliott Hughes INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
17*9a7741deSElliott Hughes IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
18*9a7741deSElliott Hughes SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
19*9a7741deSElliott Hughes WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
20*9a7741deSElliott Hughes IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
21*9a7741deSElliott Hughes ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
22*9a7741deSElliott Hughes THIS SOFTWARE.
23*9a7741deSElliott Hughes ****************************************************************/
24*9a7741deSElliott Hughes 
25*9a7741deSElliott Hughes #include <assert.h>
26*9a7741deSElliott Hughes #include <stdint.h>
27*9a7741deSElliott Hughes #include <stdbool.h>
28*9a7741deSElliott Hughes #if __STDC_VERSION__ <= 199901L
29*9a7741deSElliott Hughes #define noreturn
30*9a7741deSElliott Hughes #else
31*9a7741deSElliott Hughes #include <stdnoreturn.h>
32*9a7741deSElliott Hughes #endif
33*9a7741deSElliott Hughes 
34*9a7741deSElliott Hughes typedef double	Awkfloat;
35*9a7741deSElliott Hughes 
36*9a7741deSElliott Hughes /* unsigned char is more trouble than it's worth */
37*9a7741deSElliott Hughes 
38*9a7741deSElliott Hughes typedef	unsigned char uschar;
39*9a7741deSElliott Hughes 
40*9a7741deSElliott Hughes #define	xfree(a)	{ free((void *)(intptr_t)(a)); (a) = NULL; }
41*9a7741deSElliott Hughes /*
42*9a7741deSElliott Hughes  * We sometimes cheat writing read-only pointers to NUL-terminate them
43*9a7741deSElliott Hughes  * and then put back the original value
44*9a7741deSElliott Hughes  */
45*9a7741deSElliott Hughes #define setptr(ptr, a)	(*(char *)(intptr_t)(ptr)) = (a)
46*9a7741deSElliott Hughes 
47*9a7741deSElliott Hughes #define	NN(p)	((p) ? (p) : "(null)")	/* guaranteed non-null for DPRINTF
48*9a7741deSElliott Hughes */
49*9a7741deSElliott Hughes #define	DEBUG
50*9a7741deSElliott Hughes #ifdef	DEBUG
51*9a7741deSElliott Hughes #	define	DPRINTF(...)	if (dbg) printf(__VA_ARGS__)
52*9a7741deSElliott Hughes #else
53*9a7741deSElliott Hughes #	define	DPRINTF(...)
54*9a7741deSElliott Hughes #endif
55*9a7741deSElliott Hughes 
56*9a7741deSElliott Hughes extern enum compile_states {
57*9a7741deSElliott Hughes 	RUNNING,
58*9a7741deSElliott Hughes 	COMPILING,
59*9a7741deSElliott Hughes 	ERROR_PRINTING
60*9a7741deSElliott Hughes } compile_time;
61*9a7741deSElliott Hughes 
62*9a7741deSElliott Hughes extern bool	safe;		/* false => unsafe, true => safe */
63*9a7741deSElliott Hughes 
64*9a7741deSElliott Hughes #define	RECSIZE	(8 * 1024)	/* sets limit on records, fields, etc., etc. */
65*9a7741deSElliott Hughes extern int	recsize;	/* size of current record, orig RECSIZE */
66*9a7741deSElliott Hughes 
67*9a7741deSElliott Hughes extern size_t	awk_mb_cur_max;	/* max size of a multi-byte character */
68*9a7741deSElliott Hughes 
69*9a7741deSElliott Hughes extern char	EMPTY[];	/* this avoid -Wwritable-strings issues */
70*9a7741deSElliott Hughes extern char	**FS;
71*9a7741deSElliott Hughes extern char	**RS;
72*9a7741deSElliott Hughes extern char	**ORS;
73*9a7741deSElliott Hughes extern char	**OFS;
74*9a7741deSElliott Hughes extern char	**OFMT;
75*9a7741deSElliott Hughes extern Awkfloat *NR;
76*9a7741deSElliott Hughes extern Awkfloat *FNR;
77*9a7741deSElliott Hughes extern Awkfloat *NF;
78*9a7741deSElliott Hughes extern char	**FILENAME;
79*9a7741deSElliott Hughes extern char	**SUBSEP;
80*9a7741deSElliott Hughes extern Awkfloat *RSTART;
81*9a7741deSElliott Hughes extern Awkfloat *RLENGTH;
82*9a7741deSElliott Hughes 
83*9a7741deSElliott Hughes extern bool	CSV;		/* true for csv input */
84*9a7741deSElliott Hughes 
85*9a7741deSElliott Hughes extern char	*record;	/* points to $0 */
86*9a7741deSElliott Hughes extern int	lineno;		/* line number in awk program */
87*9a7741deSElliott Hughes extern int	errorflag;	/* 1 if error has occurred */
88*9a7741deSElliott Hughes extern bool	donefld;	/* true if record broken into fields */
89*9a7741deSElliott Hughes extern bool	donerec;	/* true if record is valid (no fld has changed */
90*9a7741deSElliott Hughes extern int	dbg;
91*9a7741deSElliott Hughes 
92*9a7741deSElliott Hughes extern const char *patbeg;	/* beginning of pattern matched */
93*9a7741deSElliott Hughes extern	int	patlen;		/* length of pattern matched.  set in b.c */
94*9a7741deSElliott Hughes 
95*9a7741deSElliott Hughes /* Cell:  all information about a variable or constant */
96*9a7741deSElliott Hughes 
97*9a7741deSElliott Hughes typedef struct Cell {
98*9a7741deSElliott Hughes 	uschar	ctype;		/* OCELL, OBOOL, OJUMP, etc. */
99*9a7741deSElliott Hughes 	uschar	csub;		/* CCON, CTEMP, CFLD, etc. */
100*9a7741deSElliott Hughes 	char	*nval;		/* name, for variables only */
101*9a7741deSElliott Hughes 	char	*sval;		/* string value */
102*9a7741deSElliott Hughes 	Awkfloat fval;		/* value as number */
103*9a7741deSElliott Hughes 	int	 tval;		/* type info: STR|NUM|ARR|FCN|FLD|CON|DONTFREE|CONVC|CONVO */
104*9a7741deSElliott Hughes 	char	*fmt;		/* CONVFMT/OFMT value used to convert from number */
105*9a7741deSElliott Hughes 	struct Cell *cnext;	/* ptr to next if chained */
106*9a7741deSElliott Hughes } Cell;
107*9a7741deSElliott Hughes 
108*9a7741deSElliott Hughes typedef struct Array {		/* symbol table array */
109*9a7741deSElliott Hughes 	int	nelem;		/* elements in table right now */
110*9a7741deSElliott Hughes 	int	size;		/* size of tab */
111*9a7741deSElliott Hughes 	Cell	**tab;		/* hash table pointers */
112*9a7741deSElliott Hughes } Array;
113*9a7741deSElliott Hughes 
114*9a7741deSElliott Hughes #define	NSYMTAB	50	/* initial size of a symbol table */
115*9a7741deSElliott Hughes extern Array	*symtab;
116*9a7741deSElliott Hughes 
117*9a7741deSElliott Hughes extern Cell	*nrloc;		/* NR */
118*9a7741deSElliott Hughes extern Cell	*fnrloc;	/* FNR */
119*9a7741deSElliott Hughes extern Cell	*fsloc;		/* FS */
120*9a7741deSElliott Hughes extern Cell	*nfloc;		/* NF */
121*9a7741deSElliott Hughes extern Cell	*ofsloc;	/* OFS */
122*9a7741deSElliott Hughes extern Cell	*orsloc;	/* ORS */
123*9a7741deSElliott Hughes extern Cell	*rsloc;		/* RS */
124*9a7741deSElliott Hughes extern Cell	*rstartloc;	/* RSTART */
125*9a7741deSElliott Hughes extern Cell	*rlengthloc;	/* RLENGTH */
126*9a7741deSElliott Hughes extern Cell	*subseploc;	/* SUBSEP */
127*9a7741deSElliott Hughes extern Cell	*symtabloc;	/* SYMTAB */
128*9a7741deSElliott Hughes 
129*9a7741deSElliott Hughes /* Cell.tval values: */
130*9a7741deSElliott Hughes #define	NUM	01	/* number value is valid */
131*9a7741deSElliott Hughes #define	STR	02	/* string value is valid */
132*9a7741deSElliott Hughes #define DONTFREE 04	/* string space is not freeable */
133*9a7741deSElliott Hughes #define	CON	010	/* this is a constant */
134*9a7741deSElliott Hughes #define	ARR	020	/* this is an array */
135*9a7741deSElliott Hughes #define	FCN	040	/* this is a function name */
136*9a7741deSElliott Hughes #define FLD	0100	/* this is a field $1, $2, ... */
137*9a7741deSElliott Hughes #define	REC	0200	/* this is $0 */
138*9a7741deSElliott Hughes #define CONVC	0400	/* string was converted from number via CONVFMT */
139*9a7741deSElliott Hughes #define CONVO	01000	/* string was converted from number via OFMT */
140*9a7741deSElliott Hughes 
141*9a7741deSElliott Hughes 
142*9a7741deSElliott Hughes /* function types */
143*9a7741deSElliott Hughes #define	FLENGTH	1
144*9a7741deSElliott Hughes #define	FSQRT	2
145*9a7741deSElliott Hughes #define	FEXP	3
146*9a7741deSElliott Hughes #define	FLOG	4
147*9a7741deSElliott Hughes #define	FINT	5
148*9a7741deSElliott Hughes #define	FSYSTEM	6
149*9a7741deSElliott Hughes #define	FRAND	7
150*9a7741deSElliott Hughes #define	FSRAND	8
151*9a7741deSElliott Hughes #define	FSIN	9
152*9a7741deSElliott Hughes #define	FCOS	10
153*9a7741deSElliott Hughes #define	FATAN	11
154*9a7741deSElliott Hughes #define	FTOUPPER 12
155*9a7741deSElliott Hughes #define	FTOLOWER 13
156*9a7741deSElliott Hughes #define	FFLUSH	14
157*9a7741deSElliott Hughes 
158*9a7741deSElliott Hughes /* Node:  parse tree is made of nodes, with Cell's at bottom */
159*9a7741deSElliott Hughes 
160*9a7741deSElliott Hughes typedef struct Node {
161*9a7741deSElliott Hughes 	int	ntype;
162*9a7741deSElliott Hughes 	struct	Node *nnext;
163*9a7741deSElliott Hughes 	int	lineno;
164*9a7741deSElliott Hughes 	int	nobj;
165*9a7741deSElliott Hughes 	struct	Node *narg[1];	/* variable: actual size set by calling malloc */
166*9a7741deSElliott Hughes } Node;
167*9a7741deSElliott Hughes 
168*9a7741deSElliott Hughes #define	NIL	((Node *) 0)
169*9a7741deSElliott Hughes 
170*9a7741deSElliott Hughes extern Node	*winner;
171*9a7741deSElliott Hughes extern Node	*nullnode;
172*9a7741deSElliott Hughes 
173*9a7741deSElliott Hughes /* ctypes */
174*9a7741deSElliott Hughes #define OCELL	1
175*9a7741deSElliott Hughes #define OBOOL	2
176*9a7741deSElliott Hughes #define OJUMP	3
177*9a7741deSElliott Hughes 
178*9a7741deSElliott Hughes /* Cell subtypes: csub */
179*9a7741deSElliott Hughes #define	CFREE	7
180*9a7741deSElliott Hughes #define CCOPY	6
181*9a7741deSElliott Hughes #define CCON	5
182*9a7741deSElliott Hughes #define CTEMP	4
183*9a7741deSElliott Hughes #define CNAME	3
184*9a7741deSElliott Hughes #define CVAR	2
185*9a7741deSElliott Hughes #define CFLD	1
186*9a7741deSElliott Hughes #define	CUNK	0
187*9a7741deSElliott Hughes 
188*9a7741deSElliott Hughes /* bool subtypes */
189*9a7741deSElliott Hughes #define BTRUE	11
190*9a7741deSElliott Hughes #define BFALSE	12
191*9a7741deSElliott Hughes 
192*9a7741deSElliott Hughes /* jump subtypes */
193*9a7741deSElliott Hughes #define JEXIT	21
194*9a7741deSElliott Hughes #define JNEXT	22
195*9a7741deSElliott Hughes #define	JBREAK	23
196*9a7741deSElliott Hughes #define	JCONT	24
197*9a7741deSElliott Hughes #define	JRET	25
198*9a7741deSElliott Hughes #define	JNEXTFILE	26
199*9a7741deSElliott Hughes 
200*9a7741deSElliott Hughes /* node types */
201*9a7741deSElliott Hughes #define NVALUE	1
202*9a7741deSElliott Hughes #define NSTAT	2
203*9a7741deSElliott Hughes #define NEXPR	3
204*9a7741deSElliott Hughes 
205*9a7741deSElliott Hughes 
206*9a7741deSElliott Hughes extern	int	pairstack[], paircnt;
207*9a7741deSElliott Hughes 
208*9a7741deSElliott Hughes #define notlegal(n)	(n <= FIRSTTOKEN || n >= LASTTOKEN || proctab[n-FIRSTTOKEN] == nullproc)
209*9a7741deSElliott Hughes #define isvalue(n)	((n)->ntype == NVALUE)
210*9a7741deSElliott Hughes #define isexpr(n)	((n)->ntype == NEXPR)
211*9a7741deSElliott Hughes #define isjump(n)	((n)->ctype == OJUMP)
212*9a7741deSElliott Hughes #define isexit(n)	((n)->csub == JEXIT)
213*9a7741deSElliott Hughes #define	isbreak(n)	((n)->csub == JBREAK)
214*9a7741deSElliott Hughes #define	iscont(n)	((n)->csub == JCONT)
215*9a7741deSElliott Hughes #define	isnext(n)	((n)->csub == JNEXT || (n)->csub == JNEXTFILE)
216*9a7741deSElliott Hughes #define	isret(n)	((n)->csub == JRET)
217*9a7741deSElliott Hughes #define isrec(n)	((n)->tval & REC)
218*9a7741deSElliott Hughes #define isfld(n)	((n)->tval & FLD)
219*9a7741deSElliott Hughes #define isstr(n)	((n)->tval & STR)
220*9a7741deSElliott Hughes #define isnum(n)	((n)->tval & NUM)
221*9a7741deSElliott Hughes #define isarr(n)	((n)->tval & ARR)
222*9a7741deSElliott Hughes #define isfcn(n)	((n)->tval & FCN)
223*9a7741deSElliott Hughes #define istrue(n)	((n)->csub == BTRUE)
224*9a7741deSElliott Hughes #define istemp(n)	((n)->csub == CTEMP)
225*9a7741deSElliott Hughes #define	isargument(n)	((n)->nobj == ARG)
226*9a7741deSElliott Hughes /* #define freeable(p)	(!((p)->tval & DONTFREE)) */
227*9a7741deSElliott Hughes #define freeable(p)	( ((p)->tval & (STR|DONTFREE)) == STR )
228*9a7741deSElliott Hughes 
229*9a7741deSElliott Hughes /* structures used by regular expression matching machinery, mostly b.c: */
230*9a7741deSElliott Hughes 
231*9a7741deSElliott Hughes #define NCHARS	(1256+3)		/* 256 handles 8-bit chars; 128 does 7-bit */
232*9a7741deSElliott Hughes 				/* BUG: some overflows (caught) if we use 256 */
233*9a7741deSElliott Hughes 				/* watch out in match(), etc. */
234*9a7741deSElliott Hughes #define	HAT	(NCHARS+2)	/* matches ^ in regular expr */
235*9a7741deSElliott Hughes #define NSTATES	32
236*9a7741deSElliott Hughes 
237*9a7741deSElliott Hughes typedef struct rrow {
238*9a7741deSElliott Hughes 	long	ltype;	/* long avoids pointer warnings on 64-bit */
239*9a7741deSElliott Hughes 	union {
240*9a7741deSElliott Hughes 		int i;
241*9a7741deSElliott Hughes 		Node *np;
242*9a7741deSElliott Hughes 		uschar *up;
243*9a7741deSElliott Hughes 		int *rp; /* rune representation of char class */
244*9a7741deSElliott Hughes 	} lval;		/* because Al stores a pointer in it! */
245*9a7741deSElliott Hughes 	int	*lfollow;
246*9a7741deSElliott Hughes } rrow;
247*9a7741deSElliott Hughes 
248*9a7741deSElliott Hughes typedef struct gtte { /* gototab entry */
249*9a7741deSElliott Hughes 	unsigned int ch;
250*9a7741deSElliott Hughes 	unsigned int state;
251*9a7741deSElliott Hughes } gtte;
252*9a7741deSElliott Hughes 
253*9a7741deSElliott Hughes typedef struct gtt {	/* gototab */
254*9a7741deSElliott Hughes 	size_t	allocated;
255*9a7741deSElliott Hughes 	size_t	inuse;
256*9a7741deSElliott Hughes 	gtte	*entries;
257*9a7741deSElliott Hughes } gtt;
258*9a7741deSElliott Hughes 
259*9a7741deSElliott Hughes typedef struct fa {
260*9a7741deSElliott Hughes 	gtt	*gototab;
261*9a7741deSElliott Hughes 	uschar	*out;
262*9a7741deSElliott Hughes 	uschar	*restr;
263*9a7741deSElliott Hughes 	int	**posns;
264*9a7741deSElliott Hughes 	int	state_count;
265*9a7741deSElliott Hughes 	bool	anchor;
266*9a7741deSElliott Hughes 	int	use;
267*9a7741deSElliott Hughes 	int	initstat;
268*9a7741deSElliott Hughes 	int	curstat;
269*9a7741deSElliott Hughes 	int	accept;
270*9a7741deSElliott Hughes 	struct	rrow re[1];	/* variable: actual size set by calling malloc */
271*9a7741deSElliott Hughes } fa;
272*9a7741deSElliott Hughes 
273*9a7741deSElliott Hughes 
274*9a7741deSElliott Hughes #include "proto.h"
275