1*16467b97STreehugger Robot /** \file 2*16467b97STreehugger Robot * Defines the the class interface for an antlr3 INTSTREAM. 3*16467b97STreehugger Robot * 4*16467b97STreehugger Robot * Certain functionality (such as DFAs for instance) abstract the stream of tokens 5*16467b97STreehugger Robot * or characters in to a steam of integers. Hence this structure should be included 6*16467b97STreehugger Robot * in any stream that is able to provide the output as a stream of integers (which is anything 7*16467b97STreehugger Robot * basically. 8*16467b97STreehugger Robot * 9*16467b97STreehugger Robot * There are no specific implementations of the methods in this interface in general. Though 10*16467b97STreehugger Robot * for purposes of casting and so on, it may be necesssary to implement a function with 11*16467b97STreehugger Robot * the signature in this interface which abstracts the base immplementation. In essence though 12*16467b97STreehugger Robot * the base stream provides a pointer to this interface, within which it installs its 13*16467b97STreehugger Robot * normal match() functions and so on. Interaces such as DFA are then passed the pANTLR3_INT_STREAM 14*16467b97STreehugger Robot * and can treat any input as an int stream. 15*16467b97STreehugger Robot * 16*16467b97STreehugger Robot * For instance, a lexer implements a pANTLR3_BASE_RECOGNIZER, within which there is a pANTLR3_INT_STREAM. 17*16467b97STreehugger Robot * However, a pANTLR3_INPUT_STREAM also provides a pANTLR3_INT_STREAM, which it has constructed from 18*16467b97STreehugger Robot * it's normal interface when it was created. This is then pointed at by the pANTLR_BASE_RECOGNIZER 19*16467b97STreehugger Robot * when it is intialized with a pANTLR3_INPUT_STREAM. 20*16467b97STreehugger Robot * 21*16467b97STreehugger Robot * Similarly if a pANTLR3_BASE_RECOGNIZER is initialized with a pANTLR3_TOKEN_STREAM, then the 22*16467b97STreehugger Robot * pANTLR3_INT_STREAM is taken from the pANTLR3_TOKEN_STREAM. 23*16467b97STreehugger Robot * 24*16467b97STreehugger Robot * If a pANTLR3_BASE_RECOGNIZER is initialized with a pANTLR3_TREENODE_STREAM, then guess where 25*16467b97STreehugger Robot * the pANTLR3_INT_STREAM comes from? 26*16467b97STreehugger Robot * 27*16467b97STreehugger Robot * Note that because the context pointer points to the actual interface structure that is providing 28*16467b97STreehugger Robot * the ANTLR3_INT_STREAM it is defined as a (void *) in this interface. There is no direct implementation 29*16467b97STreehugger Robot * of an ANTLR3_INT_STREAM (unless someone did not understand what I was doing here =;?P 30*16467b97STreehugger Robot */ 31*16467b97STreehugger Robot #ifndef _ANTLR3_INTSTREAM_H 32*16467b97STreehugger Robot #define _ANTLR3_INTSTREAM_H 33*16467b97STreehugger Robot 34*16467b97STreehugger Robot // [The "BSD licence"] 35*16467b97STreehugger Robot // Copyright (c) 2005-2009 Jim Idle, Temporal Wave LLC 36*16467b97STreehugger Robot // http://www.temporal-wave.com 37*16467b97STreehugger Robot // http://www.linkedin.com/in/jimidle 38*16467b97STreehugger Robot // 39*16467b97STreehugger Robot // All rights reserved. 40*16467b97STreehugger Robot // 41*16467b97STreehugger Robot // Redistribution and use in source and binary forms, with or without 42*16467b97STreehugger Robot // modification, are permitted provided that the following conditions 43*16467b97STreehugger Robot // are met: 44*16467b97STreehugger Robot // 1. Redistributions of source code must retain the above copyright 45*16467b97STreehugger Robot // notice, this list of conditions and the following disclaimer. 46*16467b97STreehugger Robot // 2. Redistributions in binary form must reproduce the above copyright 47*16467b97STreehugger Robot // notice, this list of conditions and the following disclaimer in the 48*16467b97STreehugger Robot // documentation and/or other materials provided with the distribution. 49*16467b97STreehugger Robot // 3. The name of the author may not be used to endorse or promote products 50*16467b97STreehugger Robot // derived from this software without specific prior written permission. 51*16467b97STreehugger Robot // 52*16467b97STreehugger Robot // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 53*16467b97STreehugger Robot // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 54*16467b97STreehugger Robot // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 55*16467b97STreehugger Robot // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 56*16467b97STreehugger Robot // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 57*16467b97STreehugger Robot // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 58*16467b97STreehugger Robot // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 59*16467b97STreehugger Robot // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 60*16467b97STreehugger Robot // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 61*16467b97STreehugger Robot // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 62*16467b97STreehugger Robot 63*16467b97STreehugger Robot #include <antlr3defs.h> 64*16467b97STreehugger Robot #include <antlr3commontoken.h> 65*16467b97STreehugger Robot 66*16467b97STreehugger Robot /** Type indicator for a character stream 67*16467b97STreehugger Robot * \remark if a custom stream is created but it can be treated as 68*16467b97STreehugger Robot * a char stream, then you may OR in this value to your type indicator 69*16467b97STreehugger Robot */ 70*16467b97STreehugger Robot #define ANTLR3_CHARSTREAM 0x0001 71*16467b97STreehugger Robot 72*16467b97STreehugger Robot /** Type indicator for a Token stream 73*16467b97STreehugger Robot * \remark if a custom stream is created but it can be treated as 74*16467b97STreehugger Robot * a token stream, then you may OR in this value to your type indicator 75*16467b97STreehugger Robot */ 76*16467b97STreehugger Robot #define ANTLR3_TOKENSTREAM 0x0002 77*16467b97STreehugger Robot 78*16467b97STreehugger Robot /** Type indicator for a common tree node stream 79*16467b97STreehugger Robot * \remark if a custom stream is created but it can be treated as 80*16467b97STreehugger Robot * a common tree node stream, then you may OR in this value to your type indicator 81*16467b97STreehugger Robot */ 82*16467b97STreehugger Robot #define ANTLR3_COMMONTREENODE 0x0004 83*16467b97STreehugger Robot 84*16467b97STreehugger Robot /** Type mask for input stream so we can switch in the above types 85*16467b97STreehugger Robot * \remark DO NOT USE 0x0000 as a stream type! 86*16467b97STreehugger Robot */ 87*16467b97STreehugger Robot #define ANTLR3_INPUT_MASK 0x0007 88*16467b97STreehugger Robot 89*16467b97STreehugger Robot #ifdef __cplusplus 90*16467b97STreehugger Robot extern "C" { 91*16467b97STreehugger Robot #endif 92*16467b97STreehugger Robot 93*16467b97STreehugger Robot typedef struct ANTLR3_INT_STREAM_struct 94*16467b97STreehugger Robot { 95*16467b97STreehugger Robot /** Input stream type indicator. Sometimes useful for error reporting etc. 96*16467b97STreehugger Robot */ 97*16467b97STreehugger Robot ANTLR3_UINT32 type; 98*16467b97STreehugger Robot 99*16467b97STreehugger Robot /** Potentially useful in error reporting and so on, this string is 100*16467b97STreehugger Robot * an identification of the input source. It may be NULL, so anything 101*16467b97STreehugger Robot * attempting to access it needs to check this and substitute a sensible 102*16467b97STreehugger Robot * default. 103*16467b97STreehugger Robot */ 104*16467b97STreehugger Robot pANTLR3_STRING streamName; 105*16467b97STreehugger Robot 106*16467b97STreehugger Robot /** Pointer to the super structure that contains this interface. This 107*16467b97STreehugger Robot * will usually be a token stream or a tree stream. 108*16467b97STreehugger Robot */ 109*16467b97STreehugger Robot void * super; 110*16467b97STreehugger Robot 111*16467b97STreehugger Robot /** Last marker position allocated 112*16467b97STreehugger Robot */ 113*16467b97STreehugger Robot ANTLR3_MARKER lastMarker; 114*16467b97STreehugger Robot 115*16467b97STreehugger Robot // Return a string that identifies the input source 116*16467b97STreehugger Robot // 117*16467b97STreehugger Robot pANTLR3_STRING (*getSourceName) (struct ANTLR3_INT_STREAM_struct * intStream); 118*16467b97STreehugger Robot 119*16467b97STreehugger Robot /** Consume the next 'ANTR3_UINT32' in the stream 120*16467b97STreehugger Robot */ 121*16467b97STreehugger Robot void (*consume) (struct ANTLR3_INT_STREAM_struct * intStream); 122*16467b97STreehugger Robot 123*16467b97STreehugger Robot /** Get ANTLR3_UINT32 at current input pointer + i ahead where i=1 is next ANTLR3_UINT32 124*16467b97STreehugger Robot */ 125*16467b97STreehugger Robot ANTLR3_UINT32 (*_LA) (struct ANTLR3_INT_STREAM_struct * intStream, ANTLR3_INT32 i); 126*16467b97STreehugger Robot 127*16467b97STreehugger Robot /** Tell the stream to start buffering if it hasn't already. Return 128*16467b97STreehugger Robot * current input position, index(), or some other marker so that 129*16467b97STreehugger Robot * when passed to rewind() you get back to the same spot. 130*16467b97STreehugger Robot * rewind(mark()) should not affect the input cursor. 131*16467b97STreehugger Robot */ 132*16467b97STreehugger Robot ANTLR3_MARKER (*mark) (struct ANTLR3_INT_STREAM_struct * intStream); 133*16467b97STreehugger Robot 134*16467b97STreehugger Robot /** Return the current input symbol index 0..n where n indicates the 135*16467b97STreehugger Robot * last symbol has been read. 136*16467b97STreehugger Robot */ 137*16467b97STreehugger Robot ANTLR3_MARKER (*index) (struct ANTLR3_INT_STREAM_struct * intStream); 138*16467b97STreehugger Robot 139*16467b97STreehugger Robot /** Reset the stream so that next call to index would return marker. 140*16467b97STreehugger Robot * The marker will usually be index() but it doesn't have to be. It's 141*16467b97STreehugger Robot * just a marker to indicate what state the stream was in. This is 142*16467b97STreehugger Robot * essentially calling release() and seek(). If there are markers 143*16467b97STreehugger Robot * created after this marker argument, this routine must unroll them 144*16467b97STreehugger Robot * like a stack. Assume the state the stream was in when this marker 145*16467b97STreehugger Robot * was created. 146*16467b97STreehugger Robot */ 147*16467b97STreehugger Robot void (*rewind) (struct ANTLR3_INT_STREAM_struct * intStream, ANTLR3_MARKER marker); 148*16467b97STreehugger Robot 149*16467b97STreehugger Robot /** Reset the stream to the last marker position, witouh destryoing the 150*16467b97STreehugger Robot * last marker position. 151*16467b97STreehugger Robot */ 152*16467b97STreehugger Robot void (*rewindLast) (struct ANTLR3_INT_STREAM_struct * intStream); 153*16467b97STreehugger Robot 154*16467b97STreehugger Robot /** You may want to commit to a backtrack but don't want to force the 155*16467b97STreehugger Robot * stream to keep bookkeeping objects around for a marker that is 156*16467b97STreehugger Robot * no longer necessary. This will have the same behavior as 157*16467b97STreehugger Robot * rewind() except it releases resources without the backward seek. 158*16467b97STreehugger Robot */ 159*16467b97STreehugger Robot void (*release) (struct ANTLR3_INT_STREAM_struct * intStream, ANTLR3_MARKER mark); 160*16467b97STreehugger Robot 161*16467b97STreehugger Robot /** Set the input cursor to the position indicated by index. This is 162*16467b97STreehugger Robot * normally used to seek ahead in the input stream. No buffering is 163*16467b97STreehugger Robot * required to do this unless you know your stream will use seek to 164*16467b97STreehugger Robot * move backwards such as when backtracking. 165*16467b97STreehugger Robot * 166*16467b97STreehugger Robot * This is different from rewind in its multi-directional 167*16467b97STreehugger Robot * requirement and in that its argument is strictly an input cursor (index). 168*16467b97STreehugger Robot * 169*16467b97STreehugger Robot * For char streams, seeking forward must update the stream state such 170*16467b97STreehugger Robot * as line number. For seeking backwards, you will be presumably 171*16467b97STreehugger Robot * backtracking using the mark/rewind mechanism that restores state and 172*16467b97STreehugger Robot * so this method does not need to update state when seeking backwards. 173*16467b97STreehugger Robot * 174*16467b97STreehugger Robot * Currently, this method is only used for efficient backtracking, but 175*16467b97STreehugger Robot * in the future it may be used for incremental parsing. 176*16467b97STreehugger Robot */ 177*16467b97STreehugger Robot void (*seek) (struct ANTLR3_INT_STREAM_struct * intStream, ANTLR3_MARKER index); 178*16467b97STreehugger Robot 179*16467b97STreehugger Robot /** Only makes sense for streams that buffer everything up probably, but 180*16467b97STreehugger Robot * might be useful to display the entire stream or for testing. 181*16467b97STreehugger Robot */ 182*16467b97STreehugger Robot ANTLR3_UINT32 (*size) (struct ANTLR3_INT_STREAM_struct * intStream); 183*16467b97STreehugger Robot 184*16467b97STreehugger Robot /** Because the indirect call, though small in individual cases can 185*16467b97STreehugger Robot * mount up if there are thousands of tokens (very large input streams), callers 186*16467b97STreehugger Robot * of size can optionally use this cached size field. 187*16467b97STreehugger Robot */ 188*16467b97STreehugger Robot ANTLR3_UINT32 cachedSize; 189*16467b97STreehugger Robot 190*16467b97STreehugger Robot /** Frees any resources that were allocated for the implementation of this 191*16467b97STreehugger Robot * interface. Usually this is just releasing the memory allocated 192*16467b97STreehugger Robot * for the structure itself, but it may of course do anything it need to 193*16467b97STreehugger Robot * so long as it does not stamp on anything else. 194*16467b97STreehugger Robot */ 195*16467b97STreehugger Robot void (*free) (struct ANTLR3_INT_STREAM_struct * stream); 196*16467b97STreehugger Robot 197*16467b97STreehugger Robot } 198*16467b97STreehugger Robot ANTLR3_INT_STREAM; 199*16467b97STreehugger Robot 200*16467b97STreehugger Robot #ifdef __cplusplus 201*16467b97STreehugger Robot } 202*16467b97STreehugger Robot #endif 203*16467b97STreehugger Robot 204*16467b97STreehugger Robot #endif 205*16467b97STreehugger Robot 206