xref: /aosp_15_r20/external/antlr/runtime/C/doxygen/buildrec.dox (revision 16467b971bd3e2009fad32dd79016f2c7e421deb)
1*16467b97STreehugger Robot/// \page buildrec How to build Generated C Code
2*16467b97STreehugger Robot///
3*16467b97STreehugger Robot/// \section generated Generated Files
4*16467b97STreehugger Robot///
5*16467b97STreehugger Robot/// The antlr tool jar, run against a grammar file that targets the C language, will generate the following files
6*16467b97STreehugger Robot/// according to whether your grammar file contains a lexer, parser, combined or treeparser specification.
7*16467b97STreehugger Robot/// Your grammar file name and the subject of the grammar line in your file are expected to match. Here the generic name G is used:
8*16467b97STreehugger Robot///
9*16467b97STreehugger Robot/// <table>
10*16467b97STreehugger Robot/// <tr>
11*16467b97STreehugger Robot/// <th> Suffix </th>
12*16467b97STreehugger Robot/// <th> Generated files </th>
13*16467b97STreehugger Robot/// </tr>
14*16467b97STreehugger Robot/// <tr>
15*16467b97STreehugger Robot/// <td> lexer grammar (G.g3l) </td>
16*16467b97STreehugger Robot/// <td> GLexer.c GLexer.h</td>
17*16467b97STreehugger Robot/// </tr>
18*16467b97STreehugger Robot/// <tr>
19*16467b97STreehugger Robot/// <td> parser grammar (G.g3p) </td>
20*16467b97STreehugger Robot/// <td> GParser.c GParser.h </td>
21*16467b97STreehugger Robot/// </tr>
22*16467b97STreehugger Robot/// <tr>
23*16467b97STreehugger Robot/// <td> grammar G (G.g3pl) </td>
24*16467b97STreehugger Robot/// <td> GParser.c GParser.h GLexer.c GLexer.h</td>
25*16467b97STreehugger Robot/// </tr>
26*16467b97STreehugger Robot/// <tr>
27*16467b97STreehugger Robot/// <td> tree grammar G; (G.g3t) </td>
28*16467b97STreehugger Robot/// <td> G.c G.h </td>
29*16467b97STreehugger Robot/// </tr>
30*16467b97STreehugger Robot/// </table>
31*16467b97STreehugger Robot///
32*16467b97STreehugger Robot/// The generated .c files reference the .h files using <G.h>, so you must use <code>-I.</code> on your compiler command line
33*16467b97STreehugger Robot/// (or include the current directory in your include paths in Visual Studio). Additionally, the generated .h files reference
34*16467b97STreehugger Robot/// <code>antlr3.h</code>, so you must use <code>-I/path/to/antlr/include</code> (E.g. <code>-I /usr/local/include</code>) to reference the standard ANTLR include files.
35*16467b97STreehugger Robot///
36*16467b97STreehugger Robot/// In order to reference the library file at compile time (you can/should only reference one) you need to use the
37*16467b97STreehugger Robot/// <code>-L/path/to/antlr/lib</code> (E.g. <code>-L /usr/local/lib</code>) on Unix, or add the path to your "Additional Library Path" in
38*16467b97STreehugger Robot/// Visual Studio. You also need to specify the library using <code>-L</code> on Unix (E.g. <code>-L /usr/local/lib -l antlr3c</code>) or add <code>antlr3c_dll.lib</code>
39*16467b97STreehugger Robot/// to your Additional Library Dependencies in Visual Studio.
40*16467b97STreehugger Robot///
41*16467b97STreehugger Robot/// In case it isn't obvious, the generated files may be used to produce either a library or an executable (.EXE on Windows) file.
42*16467b97STreehugger Robot///
43*16467b97STreehugger Robot/// If you use the shared version of the libraries, DLL or .so/.so/.a then you must ship the library with your
44*16467b97STreehugger Robot/// application must run in an environment whereby the library can be found by the runtime linker/loader.
45*16467b97STreehugger Robot/// This usually involves specifying the directory in which the library lives to an environment variable.
46*16467b97STreehugger Robot/// On Windows, X:{yourwininstalldir}\system32 will be searched automatically.
47*16467b97STreehugger Robot///
48*16467b97STreehugger Robot/// \section invoke Invoking Your Generated Recognizer
49*16467b97STreehugger Robot///
50*16467b97STreehugger Robot/// In order to run your lexer/parser/tree parser combination, you will need a small function (or main)
51*16467b97STreehugger Robot/// function that controls the sequence of events, from reading the input file or string, through to
52*16467b97STreehugger Robot/// invoking the tree parser(s) and retrieving the results. See "Using the ANTLR3C C Target" for more
53*16467b97STreehugger Robot/// detailed instructions, but if you just want to get going as fast as possible, study the following
54*16467b97STreehugger Robot/// code example.
55*16467b97STreehugger Robot///
56*16467b97STreehugger Robot/// \code
57*16467b97STreehugger Robot///
58*16467b97STreehugger Robot/// // You may adopt your own practices by all means, but in general it is best
59*16467b97STreehugger Robot/// // to create a single include for your project, that will include the ANTLR3 C
60*16467b97STreehugger Robot/// // runtime header files, the generated header files (all of which are safe to include
61*16467b97STreehugger Robot/// // multiple times) and your own project related header files. Use <> to include and
62*16467b97STreehugger Robot/// // -I on the compile line (which vs2005 now handles, where vs2003 did not).
63*16467b97STreehugger Robot/// //
64*16467b97STreehugger Robot/// #include    <treeparser.h>
65*16467b97STreehugger Robot///
66*16467b97STreehugger Robot/// // Main entry point for this example
67*16467b97STreehugger Robot/// //
68*16467b97STreehugger Robot/// int ANTLR3_CDECL
69*16467b97STreehugger Robot/// main	(int argc, char *argv[])
70*16467b97STreehugger Robot/// {
71*16467b97STreehugger Robot///     // Now we declare the ANTLR related local variables we need.
72*16467b97STreehugger Robot///     // Note that unless you are convinced you will never need thread safe
73*16467b97STreehugger Robot///     // versions for your project, then you should always create such things
74*16467b97STreehugger Robot///     // as instance variables for each invocation.
75*16467b97STreehugger Robot///     // -------------------
76*16467b97STreehugger Robot///
77*16467b97STreehugger Robot///     // Name of the input file. Note that we always use the abstract type pANTLR3_UINT8
78*16467b97STreehugger Robot///     // for ASCII/8 bit strings - the runtime library guarantees that this will be
79*16467b97STreehugger Robot///     // good on all platforms. This is a general rule - always use the ANTLR3 supplied
80*16467b97STreehugger Robot///     // typedefs for pointers/types/etc.
81*16467b97STreehugger Robot///     //
82*16467b97STreehugger Robot///     pANTLR3_UINT8	    fName;
83*16467b97STreehugger Robot///
84*16467b97STreehugger Robot///     // The ANTLR3 character input stream, which abstracts the input source such that
85*16467b97STreehugger Robot///     // it is easy to privide inpput from different sources such as files, or
86*16467b97STreehugger Robot///     // memory strings.
87*16467b97STreehugger Robot///     //
88*16467b97STreehugger Robot///     // For an 8Bit/latin-1/etc memory string use:
89*16467b97STreehugger Robot///     //	    input = antlr3New8BitStringInPlaceStream (stringtouse, (ANTLR3_UINT32) length, NULL);
90*16467b97STreehugger Robot///     //
91*16467b97STreehugger Robot///     // For a UTF16 memory string use:
92*16467b97STreehugger Robot///     //	    input = antlr3NewUTF16StringInPlaceStream (stringtouse, (ANTLR3_UINT32) length, NULL);
93*16467b97STreehugger Robot///     //
94*16467b97STreehugger Robot///     // For input from a file, see code below
95*16467b97STreehugger Robot///     //
96*16467b97STreehugger Robot///     // Note that this is essentially a pointer to a structure containing pointers to functions.
97*16467b97STreehugger Robot///     // You can create your own input stream type (copy one of the existing ones) and override any
98*16467b97STreehugger Robot///     // individual function by installing your own pointer after you have created the standard
99*16467b97STreehugger Robot///     // version.
100*16467b97STreehugger Robot///     //
101*16467b97STreehugger Robot///     pANTLR3_INPUT_STREAM	    input;
102*16467b97STreehugger Robot///
103*16467b97STreehugger Robot///     // The lexer is of course generated by ANTLR, and so the lexer type is not upper case.
104*16467b97STreehugger Robot///     // The lexer is supplied with a pANTLR3_INPUT_STREAM from whence it consumes its
105*16467b97STreehugger Robot///     // input and generates a token stream as output. This is the ctx (CTX macro) pointer
106*16467b97STreehugger Robot///		// for your lexer.
107*16467b97STreehugger Robot///     //
108*16467b97STreehugger Robot///     pLangLexer			    lxr;
109*16467b97STreehugger Robot///
110*16467b97STreehugger Robot///     // The token stream is produced by the ANTLR3 generated lexer. Again it is a structure based
111*16467b97STreehugger Robot///     // API/Object, which you can customise and override methods of as you wish. a Token stream is
112*16467b97STreehugger Robot///     // supplied to the generated parser, and you can write your own token stream and pass this in
113*16467b97STreehugger Robot///     // if you wish.
114*16467b97STreehugger Robot///     //
115*16467b97STreehugger Robot///     pANTLR3_COMMON_TOKEN_STREAM	    tstream;
116*16467b97STreehugger Robot///
117*16467b97STreehugger Robot///     // The Lang parser is also generated by ANTLR and accepts a token stream as explained
118*16467b97STreehugger Robot///     // above. The token stream can be any source in fact, so long as it implements the
119*16467b97STreehugger Robot///     // ANTLR3_TOKEN_SOURCE interface. In this case the parser does not return anything
120*16467b97STreehugger Robot///     // but it can of course specify any kind of return type from the rule you invoke
121*16467b97STreehugger Robot///     // when calling it. This is the ctx (CTX macro) pointer for your parser.
122*16467b97STreehugger Robot///     //
123*16467b97STreehugger Robot///     pLangParser			    psr;
124*16467b97STreehugger Robot///
125*16467b97STreehugger Robot///     // The parser produces an AST, which is returned as a member of the return type of
126*16467b97STreehugger Robot///     // the starting rule (any rule can start first of course). This is a generated type
127*16467b97STreehugger Robot///     // based upon the rule we start with.
128*16467b97STreehugger Robot///     //
129*16467b97STreehugger Robot///     LangParser_decl_return	    langAST;
130*16467b97STreehugger Robot///
131*16467b97STreehugger Robot///
132*16467b97STreehugger Robot///     // The tree nodes are managed by a tree adaptor, which doles
133*16467b97STreehugger Robot///     // out the nodes upon request. You can make your own tree types and adaptors
134*16467b97STreehugger Robot///     // and override the built in versions. See runtime source for details and
135*16467b97STreehugger Robot///     // eventually the wiki entry for the C target.
136*16467b97STreehugger Robot///     //
137*16467b97STreehugger Robot///     pANTLR3_COMMON_TREE_NODE_STREAM	nodes;
138*16467b97STreehugger Robot///
139*16467b97STreehugger Robot///     // Finally, when the parser runs, it will produce an AST that can be traversed by the
140*16467b97STreehugger Robot///     // the tree parser: c.f. LangDumpDecl.g3t This is the ctx (CTX macro) pointer for your
141*16467b97STreehugger Robot///		// tree parser.
142*16467b97STreehugger Robot///     //
143*16467b97STreehugger Robot///     pLangDumpDecl		    treePsr;
144*16467b97STreehugger Robot///
145*16467b97STreehugger Robot///     // Create the input stream based upon the argument supplied to us on the command line
146*16467b97STreehugger Robot///     // for this example, the input will always default to ./input if there is no explicit
147*16467b97STreehugger Robot///     // argument.
148*16467b97STreehugger Robot///     //
149*16467b97STreehugger Robot/// 	if (argc < 2 || argv[1] == NULL)
150*16467b97STreehugger Robot/// 	{
151*16467b97STreehugger Robot/// 		fName	=(pANTLR3_UINT8)"./input"; // Note in VS2005 debug, working directory must be configured
152*16467b97STreehugger Robot/// 	}
153*16467b97STreehugger Robot/// 	else
154*16467b97STreehugger Robot/// 	{
155*16467b97STreehugger Robot/// 		fName	= (pANTLR3_UINT8)argv[1];
156*16467b97STreehugger Robot/// 	}
157*16467b97STreehugger Robot///
158*16467b97STreehugger Robot///     // Create the input stream using the supplied file name
159*16467b97STreehugger Robot///     // (Use antlr38BitFileStreamNew for UTF16 input).
160*16467b97STreehugger Robot///     //
161*16467b97STreehugger Robot///     input	= antlr38BitFileStreamNew(fName);
162*16467b97STreehugger Robot///
163*16467b97STreehugger Robot///     // The input will be created successfully, providing that there is enough
164*16467b97STreehugger Robot///     // memory and the file exists etc
165*16467b97STreehugger Robot///     //
166*16467b97STreehugger Robot///     if ( input == NULL )
167*16467b97STreehugger Robot///     {
168*16467b97STreehugger Robot/// 			ANTLR3_FPRINTF(stderr, "Unable to open file %s due to malloc() failure1\n", (char *)fName);
169*16467b97STreehugger Robot///     }
170*16467b97STreehugger Robot///
171*16467b97STreehugger Robot///     // Our input stream is now open and all set to go, so we can create a new instance of our
172*16467b97STreehugger Robot///     // lexer and set the lexer input to our input stream:
173*16467b97STreehugger Robot///     //  (file | memory | ?) --> inputstream -> lexer --> tokenstream --> parser ( --> treeparser )?
174*16467b97STreehugger Robot///     //
175*16467b97STreehugger Robot///     lxr	    = LangLexerNew(input);	    // CLexerNew is generated by ANTLR
176*16467b97STreehugger Robot///
177*16467b97STreehugger Robot///     // Need to check for errors
178*16467b97STreehugger Robot///     //
179*16467b97STreehugger Robot///     if ( lxr == NULL )
180*16467b97STreehugger Robot///     {
181*16467b97STreehugger Robot/// 			ANTLR3_FPRINTF(stderr, "Unable to create the lexer due to malloc() failure1\n");
182*16467b97STreehugger Robot/// 			exit(ANTLR3_ERR_NOMEM);
183*16467b97STreehugger Robot///     }
184*16467b97STreehugger Robot///
185*16467b97STreehugger Robot///     // Our lexer is in place, so we can create the token stream from it
186*16467b97STreehugger Robot///     // NB: Nothing happens yet other than the file has been read. We are just
187*16467b97STreehugger Robot///     // connecting all these things together and they will be invoked when we
188*16467b97STreehugger Robot///     // call the parser rule. ANTLR3_SIZE_HINT can be left at the default usually
189*16467b97STreehugger Robot///     // unless you have a very large token stream/input. Each generated lexer
190*16467b97STreehugger Robot///     // provides a token source interface, which is the second argument to the
191*16467b97STreehugger Robot///     // token stream creator.
192*16467b97STreehugger Robot///     // Note tha even if you implement your own token structure, it will always
193*16467b97STreehugger Robot///     // contain a standard common token within it and this is the pointer that
194*16467b97STreehugger Robot///     // you pass around to everything else. A common token as a pointer within
195*16467b97STreehugger Robot///     // it that should point to your own outer token structure.
196*16467b97STreehugger Robot///     //
197*16467b97STreehugger Robot///     tstream = antlr3CommonTokenStreamSourceNew(ANTLR3_SIZE_HINT, lxr->pLexer->tokSource);
198*16467b97STreehugger Robot///
199*16467b97STreehugger Robot///     if (tstream == NULL)
200*16467b97STreehugger Robot///     {
201*16467b97STreehugger Robot/// 		ANTLR3_FPRINTF(stderr, "Out of memory trying to allocate token stream\n");
202*16467b97STreehugger Robot/// 		exit(ANTLR3_ERR_NOMEM);
203*16467b97STreehugger Robot///     }
204*16467b97STreehugger Robot///
205*16467b97STreehugger Robot///     // Finally, now that we have our lexer constructed, we can create the parser
206*16467b97STreehugger Robot///     //
207*16467b97STreehugger Robot///     psr	    = LangParserNew(tstream);  // CParserNew is generated by ANTLR3
208*16467b97STreehugger Robot///
209*16467b97STreehugger Robot///     if (psr == NULL)
210*16467b97STreehugger Robot///     {
211*16467b97STreehugger Robot/// 		ANTLR3_FPRINTF(stderr, "Out of memory trying to allocate parser\n");
212*16467b97STreehugger Robot/// 		exit(ANTLR3_ERR_NOMEM);
213*16467b97STreehugger Robot///     }
214*16467b97STreehugger Robot///
215*16467b97STreehugger Robot///     // We are all ready to go. Though that looked complicated at first glance,
216*16467b97STreehugger Robot///     // I am sure, you will see that in fact most of the code above is dealing
217*16467b97STreehugger Robot///     // with errors and there isn;t really that much to do (isn;t this always the
218*16467b97STreehugger Robot///     // case in C? ;-).
219*16467b97STreehugger Robot///     //
220*16467b97STreehugger Robot///     // So, we now invoke the parser. All elements of ANTLR3 generated C components
221*16467b97STreehugger Robot///     // as well as the ANTLR C runtime library itself are pseudo objects. This means
222*16467b97STreehugger Robot///     // that they are represented as pointers to structures, which contain any
223*16467b97STreehugger Robot///     // instance data they need, and a set of pointers to other interfaces or
224*16467b97STreehugger Robot///     // 'methods'. Note that in general, these few pointers we have created here are
225*16467b97STreehugger Robot///     // the only things you will ever explicitly free() as everything else is created
226*16467b97STreehugger Robot///     // via factories, that allocate memory efficiently and free() everything they use
227*16467b97STreehugger Robot///     // automatically when you close the parser/lexer/etc.
228*16467b97STreehugger Robot///     //
229*16467b97STreehugger Robot///     // Note that this means only that the methods are always called via the object
230*16467b97STreehugger Robot///     // pointer and the first argument to any method, is a pointer to the structure itself.
231*16467b97STreehugger Robot///     // It also has the side advantage, if you are using an IDE such as VS2005 that can do it
232*16467b97STreehugger Robot///     // that when you type ->, you will see a list of all the methods the object supports.
233*16467b97STreehugger Robot///     //
234*16467b97STreehugger Robot///     langAST = psr->decl(psr);
235*16467b97STreehugger Robot///
236*16467b97STreehugger Robot///     // If the parser ran correctly, we will have a tree to parse. In general I recommend
237*16467b97STreehugger Robot///     // keeping your own flags as part of the error trapping, but here is how you can
238*16467b97STreehugger Robot///     // work out if there were errors if you are using the generic error messages
239*16467b97STreehugger Robot///     //
240*16467b97STreehugger Robot/// 	if (psr->pParser->rec->errorCount > 0)
241*16467b97STreehugger Robot/// 	{
242*16467b97STreehugger Robot/// 		ANTLR3_FPRINTF(stderr, "The parser returned %d errors, tree walking aborted.\n", psr->pParser->rec->errorCount);
243*16467b97STreehugger Robot///
244*16467b97STreehugger Robot/// 	}
245*16467b97STreehugger Robot/// 	else
246*16467b97STreehugger Robot/// 	{
247*16467b97STreehugger Robot/// 		nodes	= antlr3CommonTreeNodeStreamNewTree(langAST.tree, ANTLR3_SIZE_HINT); // sIZE HINT WILL SOON BE DEPRECATED!!
248*16467b97STreehugger Robot///
249*16467b97STreehugger Robot/// 		// Tree parsers are given a common tree node stream (or your override)
250*16467b97STreehugger Robot/// 		//
251*16467b97STreehugger Robot/// 		treePsr	= LangDumpDeclNew(nodes);
252*16467b97STreehugger Robot///
253*16467b97STreehugger Robot/// 		treePsr->decl(treePsr);
254*16467b97STreehugger Robot/// 		nodes   ->free  (nodes);	    nodes	= NULL;
255*16467b97STreehugger Robot/// 		treePsr ->free  (treePsr);	    treePsr	= NULL;
256*16467b97STreehugger Robot/// 	}
257*16467b97STreehugger Robot///
258*16467b97STreehugger Robot/// 	// We did not return anything from this parser rule, so we can finish. It only remains
259*16467b97STreehugger Robot/// 	// to close down our open objects, in the reverse order we created them
260*16467b97STreehugger Robot/// 	//
261*16467b97STreehugger Robot/// 	psr	    ->free  (psr);		psr		= NULL;
262*16467b97STreehugger Robot/// 	tstream ->free  (tstream);	tstream	= NULL;
263*16467b97STreehugger Robot/// 	lxr	    ->free  (lxr);	    lxr		= NULL;
264*16467b97STreehugger Robot/// 	input   ->close (input);	input	= NULL;
265*16467b97STreehugger Robot///
266*16467b97STreehugger Robot///     return 0;
267*16467b97STreehugger Robot/// }
268*16467b97STreehugger Robot/// \endcode
269*16467b97STreehugger Robot///
270