1*16467b97STreehugger Robot/// \page buildrec How to build Generated C Code 2*16467b97STreehugger Robot/// 3*16467b97STreehugger Robot/// \section generated Generated Files 4*16467b97STreehugger Robot/// 5*16467b97STreehugger Robot/// The antlr tool jar, run against a grammar file that targets the C language, will generate the following files 6*16467b97STreehugger Robot/// according to whether your grammar file contains a lexer, parser, combined or treeparser specification. 7*16467b97STreehugger Robot/// Your grammar file name and the subject of the grammar line in your file are expected to match. Here the generic name G is used: 8*16467b97STreehugger Robot/// 9*16467b97STreehugger Robot/// <table> 10*16467b97STreehugger Robot/// <tr> 11*16467b97STreehugger Robot/// <th> Suffix </th> 12*16467b97STreehugger Robot/// <th> Generated files </th> 13*16467b97STreehugger Robot/// </tr> 14*16467b97STreehugger Robot/// <tr> 15*16467b97STreehugger Robot/// <td> lexer grammar (G.g3l) </td> 16*16467b97STreehugger Robot/// <td> GLexer.c GLexer.h</td> 17*16467b97STreehugger Robot/// </tr> 18*16467b97STreehugger Robot/// <tr> 19*16467b97STreehugger Robot/// <td> parser grammar (G.g3p) </td> 20*16467b97STreehugger Robot/// <td> GParser.c GParser.h </td> 21*16467b97STreehugger Robot/// </tr> 22*16467b97STreehugger Robot/// <tr> 23*16467b97STreehugger Robot/// <td> grammar G (G.g3pl) </td> 24*16467b97STreehugger Robot/// <td> GParser.c GParser.h GLexer.c GLexer.h</td> 25*16467b97STreehugger Robot/// </tr> 26*16467b97STreehugger Robot/// <tr> 27*16467b97STreehugger Robot/// <td> tree grammar G; (G.g3t) </td> 28*16467b97STreehugger Robot/// <td> G.c G.h </td> 29*16467b97STreehugger Robot/// </tr> 30*16467b97STreehugger Robot/// </table> 31*16467b97STreehugger Robot/// 32*16467b97STreehugger Robot/// The generated .c files reference the .h files using <G.h>, so you must use <code>-I.</code> on your compiler command line 33*16467b97STreehugger Robot/// (or include the current directory in your include paths in Visual Studio). Additionally, the generated .h files reference 34*16467b97STreehugger Robot/// <code>antlr3.h</code>, so you must use <code>-I/path/to/antlr/include</code> (E.g. <code>-I /usr/local/include</code>) to reference the standard ANTLR include files. 35*16467b97STreehugger Robot/// 36*16467b97STreehugger Robot/// In order to reference the library file at compile time (you can/should only reference one) you need to use the 37*16467b97STreehugger Robot/// <code>-L/path/to/antlr/lib</code> (E.g. <code>-L /usr/local/lib</code>) on Unix, or add the path to your "Additional Library Path" in 38*16467b97STreehugger Robot/// Visual Studio. You also need to specify the library using <code>-L</code> on Unix (E.g. <code>-L /usr/local/lib -l antlr3c</code>) or add <code>antlr3c_dll.lib</code> 39*16467b97STreehugger Robot/// to your Additional Library Dependencies in Visual Studio. 40*16467b97STreehugger Robot/// 41*16467b97STreehugger Robot/// In case it isn't obvious, the generated files may be used to produce either a library or an executable (.EXE on Windows) file. 42*16467b97STreehugger Robot/// 43*16467b97STreehugger Robot/// If you use the shared version of the libraries, DLL or .so/.so/.a then you must ship the library with your 44*16467b97STreehugger Robot/// application must run in an environment whereby the library can be found by the runtime linker/loader. 45*16467b97STreehugger Robot/// This usually involves specifying the directory in which the library lives to an environment variable. 46*16467b97STreehugger Robot/// On Windows, X:{yourwininstalldir}\system32 will be searched automatically. 47*16467b97STreehugger Robot/// 48*16467b97STreehugger Robot/// \section invoke Invoking Your Generated Recognizer 49*16467b97STreehugger Robot/// 50*16467b97STreehugger Robot/// In order to run your lexer/parser/tree parser combination, you will need a small function (or main) 51*16467b97STreehugger Robot/// function that controls the sequence of events, from reading the input file or string, through to 52*16467b97STreehugger Robot/// invoking the tree parser(s) and retrieving the results. See "Using the ANTLR3C C Target" for more 53*16467b97STreehugger Robot/// detailed instructions, but if you just want to get going as fast as possible, study the following 54*16467b97STreehugger Robot/// code example. 55*16467b97STreehugger Robot/// 56*16467b97STreehugger Robot/// \code 57*16467b97STreehugger Robot/// 58*16467b97STreehugger Robot/// // You may adopt your own practices by all means, but in general it is best 59*16467b97STreehugger Robot/// // to create a single include for your project, that will include the ANTLR3 C 60*16467b97STreehugger Robot/// // runtime header files, the generated header files (all of which are safe to include 61*16467b97STreehugger Robot/// // multiple times) and your own project related header files. Use <> to include and 62*16467b97STreehugger Robot/// // -I on the compile line (which vs2005 now handles, where vs2003 did not). 63*16467b97STreehugger Robot/// // 64*16467b97STreehugger Robot/// #include <treeparser.h> 65*16467b97STreehugger Robot/// 66*16467b97STreehugger Robot/// // Main entry point for this example 67*16467b97STreehugger Robot/// // 68*16467b97STreehugger Robot/// int ANTLR3_CDECL 69*16467b97STreehugger Robot/// main (int argc, char *argv[]) 70*16467b97STreehugger Robot/// { 71*16467b97STreehugger Robot/// // Now we declare the ANTLR related local variables we need. 72*16467b97STreehugger Robot/// // Note that unless you are convinced you will never need thread safe 73*16467b97STreehugger Robot/// // versions for your project, then you should always create such things 74*16467b97STreehugger Robot/// // as instance variables for each invocation. 75*16467b97STreehugger Robot/// // ------------------- 76*16467b97STreehugger Robot/// 77*16467b97STreehugger Robot/// // Name of the input file. Note that we always use the abstract type pANTLR3_UINT8 78*16467b97STreehugger Robot/// // for ASCII/8 bit strings - the runtime library guarantees that this will be 79*16467b97STreehugger Robot/// // good on all platforms. This is a general rule - always use the ANTLR3 supplied 80*16467b97STreehugger Robot/// // typedefs for pointers/types/etc. 81*16467b97STreehugger Robot/// // 82*16467b97STreehugger Robot/// pANTLR3_UINT8 fName; 83*16467b97STreehugger Robot/// 84*16467b97STreehugger Robot/// // The ANTLR3 character input stream, which abstracts the input source such that 85*16467b97STreehugger Robot/// // it is easy to privide inpput from different sources such as files, or 86*16467b97STreehugger Robot/// // memory strings. 87*16467b97STreehugger Robot/// // 88*16467b97STreehugger Robot/// // For an 8Bit/latin-1/etc memory string use: 89*16467b97STreehugger Robot/// // input = antlr3New8BitStringInPlaceStream (stringtouse, (ANTLR3_UINT32) length, NULL); 90*16467b97STreehugger Robot/// // 91*16467b97STreehugger Robot/// // For a UTF16 memory string use: 92*16467b97STreehugger Robot/// // input = antlr3NewUTF16StringInPlaceStream (stringtouse, (ANTLR3_UINT32) length, NULL); 93*16467b97STreehugger Robot/// // 94*16467b97STreehugger Robot/// // For input from a file, see code below 95*16467b97STreehugger Robot/// // 96*16467b97STreehugger Robot/// // Note that this is essentially a pointer to a structure containing pointers to functions. 97*16467b97STreehugger Robot/// // You can create your own input stream type (copy one of the existing ones) and override any 98*16467b97STreehugger Robot/// // individual function by installing your own pointer after you have created the standard 99*16467b97STreehugger Robot/// // version. 100*16467b97STreehugger Robot/// // 101*16467b97STreehugger Robot/// pANTLR3_INPUT_STREAM input; 102*16467b97STreehugger Robot/// 103*16467b97STreehugger Robot/// // The lexer is of course generated by ANTLR, and so the lexer type is not upper case. 104*16467b97STreehugger Robot/// // The lexer is supplied with a pANTLR3_INPUT_STREAM from whence it consumes its 105*16467b97STreehugger Robot/// // input and generates a token stream as output. This is the ctx (CTX macro) pointer 106*16467b97STreehugger Robot/// // for your lexer. 107*16467b97STreehugger Robot/// // 108*16467b97STreehugger Robot/// pLangLexer lxr; 109*16467b97STreehugger Robot/// 110*16467b97STreehugger Robot/// // The token stream is produced by the ANTLR3 generated lexer. Again it is a structure based 111*16467b97STreehugger Robot/// // API/Object, which you can customise and override methods of as you wish. a Token stream is 112*16467b97STreehugger Robot/// // supplied to the generated parser, and you can write your own token stream and pass this in 113*16467b97STreehugger Robot/// // if you wish. 114*16467b97STreehugger Robot/// // 115*16467b97STreehugger Robot/// pANTLR3_COMMON_TOKEN_STREAM tstream; 116*16467b97STreehugger Robot/// 117*16467b97STreehugger Robot/// // The Lang parser is also generated by ANTLR and accepts a token stream as explained 118*16467b97STreehugger Robot/// // above. The token stream can be any source in fact, so long as it implements the 119*16467b97STreehugger Robot/// // ANTLR3_TOKEN_SOURCE interface. In this case the parser does not return anything 120*16467b97STreehugger Robot/// // but it can of course specify any kind of return type from the rule you invoke 121*16467b97STreehugger Robot/// // when calling it. This is the ctx (CTX macro) pointer for your parser. 122*16467b97STreehugger Robot/// // 123*16467b97STreehugger Robot/// pLangParser psr; 124*16467b97STreehugger Robot/// 125*16467b97STreehugger Robot/// // The parser produces an AST, which is returned as a member of the return type of 126*16467b97STreehugger Robot/// // the starting rule (any rule can start first of course). This is a generated type 127*16467b97STreehugger Robot/// // based upon the rule we start with. 128*16467b97STreehugger Robot/// // 129*16467b97STreehugger Robot/// LangParser_decl_return langAST; 130*16467b97STreehugger Robot/// 131*16467b97STreehugger Robot/// 132*16467b97STreehugger Robot/// // The tree nodes are managed by a tree adaptor, which doles 133*16467b97STreehugger Robot/// // out the nodes upon request. You can make your own tree types and adaptors 134*16467b97STreehugger Robot/// // and override the built in versions. See runtime source for details and 135*16467b97STreehugger Robot/// // eventually the wiki entry for the C target. 136*16467b97STreehugger Robot/// // 137*16467b97STreehugger Robot/// pANTLR3_COMMON_TREE_NODE_STREAM nodes; 138*16467b97STreehugger Robot/// 139*16467b97STreehugger Robot/// // Finally, when the parser runs, it will produce an AST that can be traversed by the 140*16467b97STreehugger Robot/// // the tree parser: c.f. LangDumpDecl.g3t This is the ctx (CTX macro) pointer for your 141*16467b97STreehugger Robot/// // tree parser. 142*16467b97STreehugger Robot/// // 143*16467b97STreehugger Robot/// pLangDumpDecl treePsr; 144*16467b97STreehugger Robot/// 145*16467b97STreehugger Robot/// // Create the input stream based upon the argument supplied to us on the command line 146*16467b97STreehugger Robot/// // for this example, the input will always default to ./input if there is no explicit 147*16467b97STreehugger Robot/// // argument. 148*16467b97STreehugger Robot/// // 149*16467b97STreehugger Robot/// if (argc < 2 || argv[1] == NULL) 150*16467b97STreehugger Robot/// { 151*16467b97STreehugger Robot/// fName =(pANTLR3_UINT8)"./input"; // Note in VS2005 debug, working directory must be configured 152*16467b97STreehugger Robot/// } 153*16467b97STreehugger Robot/// else 154*16467b97STreehugger Robot/// { 155*16467b97STreehugger Robot/// fName = (pANTLR3_UINT8)argv[1]; 156*16467b97STreehugger Robot/// } 157*16467b97STreehugger Robot/// 158*16467b97STreehugger Robot/// // Create the input stream using the supplied file name 159*16467b97STreehugger Robot/// // (Use antlr38BitFileStreamNew for UTF16 input). 160*16467b97STreehugger Robot/// // 161*16467b97STreehugger Robot/// input = antlr38BitFileStreamNew(fName); 162*16467b97STreehugger Robot/// 163*16467b97STreehugger Robot/// // The input will be created successfully, providing that there is enough 164*16467b97STreehugger Robot/// // memory and the file exists etc 165*16467b97STreehugger Robot/// // 166*16467b97STreehugger Robot/// if ( input == NULL ) 167*16467b97STreehugger Robot/// { 168*16467b97STreehugger Robot/// ANTLR3_FPRINTF(stderr, "Unable to open file %s due to malloc() failure1\n", (char *)fName); 169*16467b97STreehugger Robot/// } 170*16467b97STreehugger Robot/// 171*16467b97STreehugger Robot/// // Our input stream is now open and all set to go, so we can create a new instance of our 172*16467b97STreehugger Robot/// // lexer and set the lexer input to our input stream: 173*16467b97STreehugger Robot/// // (file | memory | ?) --> inputstream -> lexer --> tokenstream --> parser ( --> treeparser )? 174*16467b97STreehugger Robot/// // 175*16467b97STreehugger Robot/// lxr = LangLexerNew(input); // CLexerNew is generated by ANTLR 176*16467b97STreehugger Robot/// 177*16467b97STreehugger Robot/// // Need to check for errors 178*16467b97STreehugger Robot/// // 179*16467b97STreehugger Robot/// if ( lxr == NULL ) 180*16467b97STreehugger Robot/// { 181*16467b97STreehugger Robot/// ANTLR3_FPRINTF(stderr, "Unable to create the lexer due to malloc() failure1\n"); 182*16467b97STreehugger Robot/// exit(ANTLR3_ERR_NOMEM); 183*16467b97STreehugger Robot/// } 184*16467b97STreehugger Robot/// 185*16467b97STreehugger Robot/// // Our lexer is in place, so we can create the token stream from it 186*16467b97STreehugger Robot/// // NB: Nothing happens yet other than the file has been read. We are just 187*16467b97STreehugger Robot/// // connecting all these things together and they will be invoked when we 188*16467b97STreehugger Robot/// // call the parser rule. ANTLR3_SIZE_HINT can be left at the default usually 189*16467b97STreehugger Robot/// // unless you have a very large token stream/input. Each generated lexer 190*16467b97STreehugger Robot/// // provides a token source interface, which is the second argument to the 191*16467b97STreehugger Robot/// // token stream creator. 192*16467b97STreehugger Robot/// // Note tha even if you implement your own token structure, it will always 193*16467b97STreehugger Robot/// // contain a standard common token within it and this is the pointer that 194*16467b97STreehugger Robot/// // you pass around to everything else. A common token as a pointer within 195*16467b97STreehugger Robot/// // it that should point to your own outer token structure. 196*16467b97STreehugger Robot/// // 197*16467b97STreehugger Robot/// tstream = antlr3CommonTokenStreamSourceNew(ANTLR3_SIZE_HINT, lxr->pLexer->tokSource); 198*16467b97STreehugger Robot/// 199*16467b97STreehugger Robot/// if (tstream == NULL) 200*16467b97STreehugger Robot/// { 201*16467b97STreehugger Robot/// ANTLR3_FPRINTF(stderr, "Out of memory trying to allocate token stream\n"); 202*16467b97STreehugger Robot/// exit(ANTLR3_ERR_NOMEM); 203*16467b97STreehugger Robot/// } 204*16467b97STreehugger Robot/// 205*16467b97STreehugger Robot/// // Finally, now that we have our lexer constructed, we can create the parser 206*16467b97STreehugger Robot/// // 207*16467b97STreehugger Robot/// psr = LangParserNew(tstream); // CParserNew is generated by ANTLR3 208*16467b97STreehugger Robot/// 209*16467b97STreehugger Robot/// if (psr == NULL) 210*16467b97STreehugger Robot/// { 211*16467b97STreehugger Robot/// ANTLR3_FPRINTF(stderr, "Out of memory trying to allocate parser\n"); 212*16467b97STreehugger Robot/// exit(ANTLR3_ERR_NOMEM); 213*16467b97STreehugger Robot/// } 214*16467b97STreehugger Robot/// 215*16467b97STreehugger Robot/// // We are all ready to go. Though that looked complicated at first glance, 216*16467b97STreehugger Robot/// // I am sure, you will see that in fact most of the code above is dealing 217*16467b97STreehugger Robot/// // with errors and there isn;t really that much to do (isn;t this always the 218*16467b97STreehugger Robot/// // case in C? ;-). 219*16467b97STreehugger Robot/// // 220*16467b97STreehugger Robot/// // So, we now invoke the parser. All elements of ANTLR3 generated C components 221*16467b97STreehugger Robot/// // as well as the ANTLR C runtime library itself are pseudo objects. This means 222*16467b97STreehugger Robot/// // that they are represented as pointers to structures, which contain any 223*16467b97STreehugger Robot/// // instance data they need, and a set of pointers to other interfaces or 224*16467b97STreehugger Robot/// // 'methods'. Note that in general, these few pointers we have created here are 225*16467b97STreehugger Robot/// // the only things you will ever explicitly free() as everything else is created 226*16467b97STreehugger Robot/// // via factories, that allocate memory efficiently and free() everything they use 227*16467b97STreehugger Robot/// // automatically when you close the parser/lexer/etc. 228*16467b97STreehugger Robot/// // 229*16467b97STreehugger Robot/// // Note that this means only that the methods are always called via the object 230*16467b97STreehugger Robot/// // pointer and the first argument to any method, is a pointer to the structure itself. 231*16467b97STreehugger Robot/// // It also has the side advantage, if you are using an IDE such as VS2005 that can do it 232*16467b97STreehugger Robot/// // that when you type ->, you will see a list of all the methods the object supports. 233*16467b97STreehugger Robot/// // 234*16467b97STreehugger Robot/// langAST = psr->decl(psr); 235*16467b97STreehugger Robot/// 236*16467b97STreehugger Robot/// // If the parser ran correctly, we will have a tree to parse. In general I recommend 237*16467b97STreehugger Robot/// // keeping your own flags as part of the error trapping, but here is how you can 238*16467b97STreehugger Robot/// // work out if there were errors if you are using the generic error messages 239*16467b97STreehugger Robot/// // 240*16467b97STreehugger Robot/// if (psr->pParser->rec->errorCount > 0) 241*16467b97STreehugger Robot/// { 242*16467b97STreehugger Robot/// ANTLR3_FPRINTF(stderr, "The parser returned %d errors, tree walking aborted.\n", psr->pParser->rec->errorCount); 243*16467b97STreehugger Robot/// 244*16467b97STreehugger Robot/// } 245*16467b97STreehugger Robot/// else 246*16467b97STreehugger Robot/// { 247*16467b97STreehugger Robot/// nodes = antlr3CommonTreeNodeStreamNewTree(langAST.tree, ANTLR3_SIZE_HINT); // sIZE HINT WILL SOON BE DEPRECATED!! 248*16467b97STreehugger Robot/// 249*16467b97STreehugger Robot/// // Tree parsers are given a common tree node stream (or your override) 250*16467b97STreehugger Robot/// // 251*16467b97STreehugger Robot/// treePsr = LangDumpDeclNew(nodes); 252*16467b97STreehugger Robot/// 253*16467b97STreehugger Robot/// treePsr->decl(treePsr); 254*16467b97STreehugger Robot/// nodes ->free (nodes); nodes = NULL; 255*16467b97STreehugger Robot/// treePsr ->free (treePsr); treePsr = NULL; 256*16467b97STreehugger Robot/// } 257*16467b97STreehugger Robot/// 258*16467b97STreehugger Robot/// // We did not return anything from this parser rule, so we can finish. It only remains 259*16467b97STreehugger Robot/// // to close down our open objects, in the reverse order we created them 260*16467b97STreehugger Robot/// // 261*16467b97STreehugger Robot/// psr ->free (psr); psr = NULL; 262*16467b97STreehugger Robot/// tstream ->free (tstream); tstream = NULL; 263*16467b97STreehugger Robot/// lxr ->free (lxr); lxr = NULL; 264*16467b97STreehugger Robot/// input ->close (input); input = NULL; 265*16467b97STreehugger Robot/// 266*16467b97STreehugger Robot/// return 0; 267*16467b97STreehugger Robot/// } 268*16467b97STreehugger Robot/// \endcode 269*16467b97STreehugger Robot/// 270