xref: /aosp_15_r20/external/antlr/runtime/C/include/antlr3intstream.h (revision 16467b971bd3e2009fad32dd79016f2c7e421deb)
1*16467b97STreehugger Robot /** \file
2*16467b97STreehugger Robot  * Defines the the class interface for an antlr3 INTSTREAM.
3*16467b97STreehugger Robot  *
4*16467b97STreehugger Robot  * Certain functionality (such as DFAs for instance) abstract the stream of tokens
5*16467b97STreehugger Robot  * or characters in to a steam of integers. Hence this structure should be included
6*16467b97STreehugger Robot  * in any stream that is able to provide the output as a stream of integers (which is anything
7*16467b97STreehugger Robot  * basically.
8*16467b97STreehugger Robot  *
9*16467b97STreehugger Robot  * There are no specific implementations of the methods in this interface in general. Though
10*16467b97STreehugger Robot  * for purposes of casting and so on, it may be necesssary to implement a function with
11*16467b97STreehugger Robot  * the signature in this interface which abstracts the base immplementation. In essence though
12*16467b97STreehugger Robot  * the base stream provides a pointer to this interface, within which it installs its
13*16467b97STreehugger Robot  * normal match() functions and so on. Interaces such as DFA are then passed the pANTLR3_INT_STREAM
14*16467b97STreehugger Robot  * and can treat any input as an int stream.
15*16467b97STreehugger Robot  *
16*16467b97STreehugger Robot  * For instance, a lexer implements a pANTLR3_BASE_RECOGNIZER, within which there is a pANTLR3_INT_STREAM.
17*16467b97STreehugger Robot  * However, a pANTLR3_INPUT_STREAM also provides a pANTLR3_INT_STREAM, which it has constructed from
18*16467b97STreehugger Robot  * it's normal interface when it was created. This is then pointed at by the pANTLR_BASE_RECOGNIZER
19*16467b97STreehugger Robot  * when it is intialized with a pANTLR3_INPUT_STREAM.
20*16467b97STreehugger Robot  *
21*16467b97STreehugger Robot  * Similarly if a pANTLR3_BASE_RECOGNIZER is initialized with a pANTLR3_TOKEN_STREAM, then the
22*16467b97STreehugger Robot  * pANTLR3_INT_STREAM is taken from the pANTLR3_TOKEN_STREAM.
23*16467b97STreehugger Robot  *
24*16467b97STreehugger Robot  * If a pANTLR3_BASE_RECOGNIZER is initialized with a pANTLR3_TREENODE_STREAM, then guess where
25*16467b97STreehugger Robot  * the pANTLR3_INT_STREAM comes from?
26*16467b97STreehugger Robot  *
27*16467b97STreehugger Robot  * Note that because the context pointer points to the actual interface structure that is providing
28*16467b97STreehugger Robot  * the ANTLR3_INT_STREAM it is defined as a (void *) in this interface. There is no direct implementation
29*16467b97STreehugger Robot  * of an ANTLR3_INT_STREAM (unless someone did not understand what I was doing here =;?P
30*16467b97STreehugger Robot  */
31*16467b97STreehugger Robot #ifndef	_ANTLR3_INTSTREAM_H
32*16467b97STreehugger Robot #define	_ANTLR3_INTSTREAM_H
33*16467b97STreehugger Robot 
34*16467b97STreehugger Robot // [The "BSD licence"]
35*16467b97STreehugger Robot // Copyright (c) 2005-2009 Jim Idle, Temporal Wave LLC
36*16467b97STreehugger Robot // http://www.temporal-wave.com
37*16467b97STreehugger Robot // http://www.linkedin.com/in/jimidle
38*16467b97STreehugger Robot //
39*16467b97STreehugger Robot // All rights reserved.
40*16467b97STreehugger Robot //
41*16467b97STreehugger Robot // Redistribution and use in source and binary forms, with or without
42*16467b97STreehugger Robot // modification, are permitted provided that the following conditions
43*16467b97STreehugger Robot // are met:
44*16467b97STreehugger Robot // 1. Redistributions of source code must retain the above copyright
45*16467b97STreehugger Robot //    notice, this list of conditions and the following disclaimer.
46*16467b97STreehugger Robot // 2. Redistributions in binary form must reproduce the above copyright
47*16467b97STreehugger Robot //    notice, this list of conditions and the following disclaimer in the
48*16467b97STreehugger Robot //    documentation and/or other materials provided with the distribution.
49*16467b97STreehugger Robot // 3. The name of the author may not be used to endorse or promote products
50*16467b97STreehugger Robot //    derived from this software without specific prior written permission.
51*16467b97STreehugger Robot //
52*16467b97STreehugger Robot // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
53*16467b97STreehugger Robot // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
54*16467b97STreehugger Robot // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
55*16467b97STreehugger Robot // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
56*16467b97STreehugger Robot // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
57*16467b97STreehugger Robot // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
58*16467b97STreehugger Robot // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
59*16467b97STreehugger Robot // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
60*16467b97STreehugger Robot // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
61*16467b97STreehugger Robot // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
62*16467b97STreehugger Robot 
63*16467b97STreehugger Robot #include    <antlr3defs.h>
64*16467b97STreehugger Robot #include    <antlr3commontoken.h>
65*16467b97STreehugger Robot 
66*16467b97STreehugger Robot /** Type indicator for a character stream
67*16467b97STreehugger Robot  * \remark if a custom stream is created but it can be treated as
68*16467b97STreehugger Robot  * a char stream, then you may OR in this value to your type indicator
69*16467b97STreehugger Robot  */
70*16467b97STreehugger Robot #define	ANTLR3_CHARSTREAM	0x0001
71*16467b97STreehugger Robot 
72*16467b97STreehugger Robot /** Type indicator for a Token stream
73*16467b97STreehugger Robot  * \remark if a custom stream is created but it can be treated as
74*16467b97STreehugger Robot  * a token stream, then you may OR in this value to your type indicator
75*16467b97STreehugger Robot  */
76*16467b97STreehugger Robot #define	ANTLR3_TOKENSTREAM	0x0002
77*16467b97STreehugger Robot 
78*16467b97STreehugger Robot /** Type indicator for a common tree node stream
79*16467b97STreehugger Robot  * \remark if a custom stream is created but it can be treated as
80*16467b97STreehugger Robot  * a common tree node stream, then you may OR in this value to your type indicator
81*16467b97STreehugger Robot  */
82*16467b97STreehugger Robot #define	ANTLR3_COMMONTREENODE	0x0004
83*16467b97STreehugger Robot 
84*16467b97STreehugger Robot /** Type mask for input stream so we can switch in the above types
85*16467b97STreehugger Robot  *  \remark DO NOT USE 0x0000 as a stream type!
86*16467b97STreehugger Robot  */
87*16467b97STreehugger Robot #define	ANTLR3_INPUT_MASK	0x0007
88*16467b97STreehugger Robot 
89*16467b97STreehugger Robot #ifdef __cplusplus
90*16467b97STreehugger Robot extern "C" {
91*16467b97STreehugger Robot #endif
92*16467b97STreehugger Robot 
93*16467b97STreehugger Robot typedef	struct ANTLR3_INT_STREAM_struct
94*16467b97STreehugger Robot {
95*16467b97STreehugger Robot     /** Input stream type indicator. Sometimes useful for error reporting etc.
96*16467b97STreehugger Robot      */
97*16467b97STreehugger Robot     ANTLR3_UINT32	    type;
98*16467b97STreehugger Robot 
99*16467b97STreehugger Robot     /** Potentially useful in error reporting and so on, this string is
100*16467b97STreehugger Robot      *  an identification of the input source. It may be NULL, so anything
101*16467b97STreehugger Robot      *  attempting to access it needs to check this and substitute a sensible
102*16467b97STreehugger Robot      *  default.
103*16467b97STreehugger Robot      */
104*16467b97STreehugger Robot     pANTLR3_STRING	      streamName;
105*16467b97STreehugger Robot 
106*16467b97STreehugger Robot     /** Pointer to the super structure that contains this interface. This
107*16467b97STreehugger Robot      *  will usually be a token stream or a tree stream.
108*16467b97STreehugger Robot      */
109*16467b97STreehugger Robot     void		    * super;
110*16467b97STreehugger Robot 
111*16467b97STreehugger Robot     /** Last marker position allocated
112*16467b97STreehugger Robot      */
113*16467b97STreehugger Robot     ANTLR3_MARKER	    lastMarker;
114*16467b97STreehugger Robot 
115*16467b97STreehugger Robot 	// Return a string that identifies the input source
116*16467b97STreehugger Robot 	//
117*16467b97STreehugger Robot 	pANTLR3_STRING		(*getSourceName)	(struct ANTLR3_INT_STREAM_struct * intStream);
118*16467b97STreehugger Robot 
119*16467b97STreehugger Robot     /** Consume the next 'ANTR3_UINT32' in the stream
120*16467b97STreehugger Robot      */
121*16467b97STreehugger Robot     void		    (*consume)	    (struct ANTLR3_INT_STREAM_struct * intStream);
122*16467b97STreehugger Robot 
123*16467b97STreehugger Robot     /** Get ANTLR3_UINT32 at current input pointer + i ahead where i=1 is next ANTLR3_UINT32
124*16467b97STreehugger Robot      */
125*16467b97STreehugger Robot     ANTLR3_UINT32	    (*_LA)	    (struct ANTLR3_INT_STREAM_struct * intStream, ANTLR3_INT32 i);
126*16467b97STreehugger Robot 
127*16467b97STreehugger Robot     /** Tell the stream to start buffering if it hasn't already.  Return
128*16467b97STreehugger Robot      *  current input position, index(), or some other marker so that
129*16467b97STreehugger Robot      *  when passed to rewind() you get back to the same spot.
130*16467b97STreehugger Robot      *  rewind(mark()) should not affect the input cursor.
131*16467b97STreehugger Robot      */
132*16467b97STreehugger Robot     ANTLR3_MARKER	    (*mark)	    (struct ANTLR3_INT_STREAM_struct * intStream);
133*16467b97STreehugger Robot 
134*16467b97STreehugger Robot     /** Return the current input symbol index 0..n where n indicates the
135*16467b97STreehugger Robot      *  last symbol has been read.
136*16467b97STreehugger Robot      */
137*16467b97STreehugger Robot     ANTLR3_MARKER	    (*index)	    (struct ANTLR3_INT_STREAM_struct * intStream);
138*16467b97STreehugger Robot 
139*16467b97STreehugger Robot     /** Reset the stream so that next call to index would return marker.
140*16467b97STreehugger Robot      *  The marker will usually be index() but it doesn't have to be.  It's
141*16467b97STreehugger Robot      *  just a marker to indicate what state the stream was in.  This is
142*16467b97STreehugger Robot      *  essentially calling release() and seek().  If there are markers
143*16467b97STreehugger Robot      *  created after this marker argument, this routine must unroll them
144*16467b97STreehugger Robot      *  like a stack.  Assume the state the stream was in when this marker
145*16467b97STreehugger Robot      *  was created.
146*16467b97STreehugger Robot      */
147*16467b97STreehugger Robot     void		    (*rewind)	    (struct ANTLR3_INT_STREAM_struct * intStream, ANTLR3_MARKER marker);
148*16467b97STreehugger Robot 
149*16467b97STreehugger Robot     /** Reset the stream to the last marker position, witouh destryoing the
150*16467b97STreehugger Robot      *  last marker position.
151*16467b97STreehugger Robot      */
152*16467b97STreehugger Robot     void		    (*rewindLast)   (struct ANTLR3_INT_STREAM_struct * intStream);
153*16467b97STreehugger Robot 
154*16467b97STreehugger Robot     /** You may want to commit to a backtrack but don't want to force the
155*16467b97STreehugger Robot      *  stream to keep bookkeeping objects around for a marker that is
156*16467b97STreehugger Robot      *  no longer necessary.  This will have the same behavior as
157*16467b97STreehugger Robot      *  rewind() except it releases resources without the backward seek.
158*16467b97STreehugger Robot      */
159*16467b97STreehugger Robot     void		    (*release)	    (struct ANTLR3_INT_STREAM_struct * intStream, ANTLR3_MARKER mark);
160*16467b97STreehugger Robot 
161*16467b97STreehugger Robot     /** Set the input cursor to the position indicated by index.  This is
162*16467b97STreehugger Robot      *  normally used to seek ahead in the input stream.  No buffering is
163*16467b97STreehugger Robot      *  required to do this unless you know your stream will use seek to
164*16467b97STreehugger Robot      *  move backwards such as when backtracking.
165*16467b97STreehugger Robot      *
166*16467b97STreehugger Robot      *  This is different from rewind in its multi-directional
167*16467b97STreehugger Robot      *  requirement and in that its argument is strictly an input cursor (index).
168*16467b97STreehugger Robot      *
169*16467b97STreehugger Robot      *  For char streams, seeking forward must update the stream state such
170*16467b97STreehugger Robot      *  as line number.  For seeking backwards, you will be presumably
171*16467b97STreehugger Robot      *  backtracking using the mark/rewind mechanism that restores state and
172*16467b97STreehugger Robot      *  so this method does not need to update state when seeking backwards.
173*16467b97STreehugger Robot      *
174*16467b97STreehugger Robot      *  Currently, this method is only used for efficient backtracking, but
175*16467b97STreehugger Robot      *  in the future it may be used for incremental parsing.
176*16467b97STreehugger Robot      */
177*16467b97STreehugger Robot     void		    (*seek)	    (struct ANTLR3_INT_STREAM_struct * intStream, ANTLR3_MARKER index);
178*16467b97STreehugger Robot 
179*16467b97STreehugger Robot     /** Only makes sense for streams that buffer everything up probably, but
180*16467b97STreehugger Robot      *  might be useful to display the entire stream or for testing.
181*16467b97STreehugger Robot      */
182*16467b97STreehugger Robot     ANTLR3_UINT32	    (*size)	    (struct ANTLR3_INT_STREAM_struct * intStream);
183*16467b97STreehugger Robot 
184*16467b97STreehugger Robot     /** Because the indirect call, though small in individual cases can
185*16467b97STreehugger Robot      *  mount up if there are thousands of tokens (very large input streams), callers
186*16467b97STreehugger Robot      *  of size can optionally use this cached size field.
187*16467b97STreehugger Robot      */
188*16467b97STreehugger Robot     ANTLR3_UINT32	    cachedSize;
189*16467b97STreehugger Robot 
190*16467b97STreehugger Robot     /** Frees any resources that were allocated for the implementation of this
191*16467b97STreehugger Robot      *  interface. Usually this is just releasing the memory allocated
192*16467b97STreehugger Robot      *  for the structure itself, but it may of course do anything it need to
193*16467b97STreehugger Robot      *  so long as it does not stamp on anything else.
194*16467b97STreehugger Robot      */
195*16467b97STreehugger Robot     void		    (*free)	    (struct ANTLR3_INT_STREAM_struct * stream);
196*16467b97STreehugger Robot 
197*16467b97STreehugger Robot }
198*16467b97STreehugger Robot     ANTLR3_INT_STREAM;
199*16467b97STreehugger Robot 
200*16467b97STreehugger Robot #ifdef __cplusplus
201*16467b97STreehugger Robot }
202*16467b97STreehugger Robot #endif
203*16467b97STreehugger Robot 
204*16467b97STreehugger Robot #endif
205*16467b97STreehugger Robot 
206