1*ed573b14SMatthias Ringwald /* Copyright (c) 2013-2014 Yoran Heling
2*ed573b14SMatthias Ringwald
3*ed573b14SMatthias Ringwald Permission is hereby granted, free of charge, to any person obtaining
4*ed573b14SMatthias Ringwald a copy of this software and associated documentation files (the
5*ed573b14SMatthias Ringwald "Software"), to deal in the Software without restriction, including
6*ed573b14SMatthias Ringwald without limitation the rights to use, copy, modify, merge, publish,
7*ed573b14SMatthias Ringwald distribute, sublicense, and/or sell copies of the Software, and to
8*ed573b14SMatthias Ringwald permit persons to whom the Software is furnished to do so, subject to
9*ed573b14SMatthias Ringwald the following conditions:
10*ed573b14SMatthias Ringwald
11*ed573b14SMatthias Ringwald The above copyright notice and this permission notice shall be included
12*ed573b14SMatthias Ringwald in all copies or substantial portions of the Software.
13*ed573b14SMatthias Ringwald
14*ed573b14SMatthias Ringwald THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15*ed573b14SMatthias Ringwald EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16*ed573b14SMatthias Ringwald MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
17*ed573b14SMatthias Ringwald IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
18*ed573b14SMatthias Ringwald CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
19*ed573b14SMatthias Ringwald TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
20*ed573b14SMatthias Ringwald SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
21*ed573b14SMatthias Ringwald */
22*ed573b14SMatthias Ringwald
23*ed573b14SMatthias Ringwald #ifndef YXML_H
24*ed573b14SMatthias Ringwald #define YXML_H
25*ed573b14SMatthias Ringwald
26*ed573b14SMatthias Ringwald #include <stdint.h>
27*ed573b14SMatthias Ringwald #include <stddef.h>
28*ed573b14SMatthias Ringwald
29*ed573b14SMatthias Ringwald #if defined(_MSC_VER) && !defined(__cplusplus) && !defined(inline)
30*ed573b14SMatthias Ringwald #define inline __inline
31*ed573b14SMatthias Ringwald #endif
32*ed573b14SMatthias Ringwald
33*ed573b14SMatthias Ringwald /* Full API documentation for this library can be found in the "yxml.pod" file
34*ed573b14SMatthias Ringwald * in the yxml git repository, or online at http://dev.yorhel.nl/yxml/man */
35*ed573b14SMatthias Ringwald
36*ed573b14SMatthias Ringwald typedef enum {
37*ed573b14SMatthias Ringwald YXML_EEOF = -5, /* Unexpected EOF */
38*ed573b14SMatthias Ringwald YXML_EREF = -4, /* Invalid character or entity reference (&whatever;) */
39*ed573b14SMatthias Ringwald YXML_ECLOSE = -3, /* Close tag does not match open tag (<Tag> .. </OtherTag>) */
40*ed573b14SMatthias Ringwald YXML_ESTACK = -2, /* Stack overflow (too deeply nested tags or too long element/attribute name) */
41*ed573b14SMatthias Ringwald YXML_ESYN = -1, /* Syntax error (unexpected byte) */
42*ed573b14SMatthias Ringwald YXML_OK = 0, /* Character consumed, no new token present */
43*ed573b14SMatthias Ringwald YXML_ELEMSTART = 1, /* Start of an element: '<Tag ..' */
44*ed573b14SMatthias Ringwald YXML_CONTENT = 2, /* Element content */
45*ed573b14SMatthias Ringwald YXML_ELEMEND = 3, /* End of an element: '.. />' or '</Tag>' */
46*ed573b14SMatthias Ringwald YXML_ATTRSTART = 4, /* Attribute: 'Name=..' */
47*ed573b14SMatthias Ringwald YXML_ATTRVAL = 5, /* Attribute value */
48*ed573b14SMatthias Ringwald YXML_ATTREND = 6, /* End of attribute '.."' */
49*ed573b14SMatthias Ringwald YXML_PISTART = 7, /* Start of a processing instruction */
50*ed573b14SMatthias Ringwald YXML_PICONTENT = 8, /* Content of a PI */
51*ed573b14SMatthias Ringwald YXML_PIEND = 9 /* End of a processing instruction */
52*ed573b14SMatthias Ringwald } yxml_ret_t;
53*ed573b14SMatthias Ringwald
54*ed573b14SMatthias Ringwald /* When, exactly, are tokens returned?
55*ed573b14SMatthias Ringwald *
56*ed573b14SMatthias Ringwald * <TagName
57*ed573b14SMatthias Ringwald * '>' ELEMSTART
58*ed573b14SMatthias Ringwald * '/' ELEMSTART, '>' ELEMEND
59*ed573b14SMatthias Ringwald * ' ' ELEMSTART
60*ed573b14SMatthias Ringwald * '>'
61*ed573b14SMatthias Ringwald * '/', '>' ELEMEND
62*ed573b14SMatthias Ringwald * Attr
63*ed573b14SMatthias Ringwald * '=' ATTRSTART
64*ed573b14SMatthias Ringwald * "X ATTRVAL
65*ed573b14SMatthias Ringwald * 'Y' ATTRVAL
66*ed573b14SMatthias Ringwald * 'Z' ATTRVAL
67*ed573b14SMatthias Ringwald * '"' ATTREND
68*ed573b14SMatthias Ringwald * '>'
69*ed573b14SMatthias Ringwald * '/', '>' ELEMEND
70*ed573b14SMatthias Ringwald *
71*ed573b14SMatthias Ringwald * </TagName
72*ed573b14SMatthias Ringwald * '>' ELEMEND
73*ed573b14SMatthias Ringwald */
74*ed573b14SMatthias Ringwald
75*ed573b14SMatthias Ringwald
76*ed573b14SMatthias Ringwald typedef struct {
77*ed573b14SMatthias Ringwald /* PUBLIC (read-only) */
78*ed573b14SMatthias Ringwald
79*ed573b14SMatthias Ringwald /* Name of the current element, zero-length if not in any element. Changed
80*ed573b14SMatthias Ringwald * after YXML_ELEMSTART. The pointer will remain valid up to and including
81*ed573b14SMatthias Ringwald * the next non-YXML_ATTR* token, the pointed-to buffer will remain valid
82*ed573b14SMatthias Ringwald * up to and including the YXML_ELEMEND for the corresponding element. */
83*ed573b14SMatthias Ringwald char *elem;
84*ed573b14SMatthias Ringwald
85*ed573b14SMatthias Ringwald /* The last read character(s) of an attribute value (YXML_ATTRVAL), element
86*ed573b14SMatthias Ringwald * data (YXML_CONTENT), or processing instruction (YXML_PICONTENT). Changed
87*ed573b14SMatthias Ringwald * after one of the respective YXML_ values is returned, and only valid
88*ed573b14SMatthias Ringwald * until the next yxml_parse() call. Usually, this string only consists of
89*ed573b14SMatthias Ringwald * a single byte, but multiple bytes are returned in the following cases:
90*ed573b14SMatthias Ringwald * - "<?SomePI ?x ?>": The two characters "?x"
91*ed573b14SMatthias Ringwald * - "<![CDATA[ ]x ]]>": The two characters "]x"
92*ed573b14SMatthias Ringwald * - "<![CDATA[ ]]x ]]>": The three characters "]]x"
93*ed573b14SMatthias Ringwald * - "&#N;" and "&#xN;", where dec(n) > 127. The referenced Unicode
94*ed573b14SMatthias Ringwald * character is then encoded in multiple UTF-8 bytes.
95*ed573b14SMatthias Ringwald */
96*ed573b14SMatthias Ringwald char data[8];
97*ed573b14SMatthias Ringwald
98*ed573b14SMatthias Ringwald /* Name of the current attribute. Changed after YXML_ATTRSTART, valid up to
99*ed573b14SMatthias Ringwald * and including the next YXML_ATTREND. */
100*ed573b14SMatthias Ringwald char *attr;
101*ed573b14SMatthias Ringwald
102*ed573b14SMatthias Ringwald /* Name/target of the current processing instruction, zero-length if not in
103*ed573b14SMatthias Ringwald * a PI. Changed after YXML_PISTART, valid up to (but excluding)
104*ed573b14SMatthias Ringwald * the next YXML_PIEND. */
105*ed573b14SMatthias Ringwald char *pi;
106*ed573b14SMatthias Ringwald
107*ed573b14SMatthias Ringwald /* Line number, byte offset within that line, and total bytes read. These
108*ed573b14SMatthias Ringwald * values refer to the position _after_ the last byte given to
109*ed573b14SMatthias Ringwald * yxml_parse(). These are useful for debugging and error reporting. */
110*ed573b14SMatthias Ringwald uint64_t byte;
111*ed573b14SMatthias Ringwald uint64_t total;
112*ed573b14SMatthias Ringwald uint32_t line;
113*ed573b14SMatthias Ringwald
114*ed573b14SMatthias Ringwald
115*ed573b14SMatthias Ringwald /* PRIVATE */
116*ed573b14SMatthias Ringwald int state;
117*ed573b14SMatthias Ringwald unsigned char *stack; /* Stack of element names + attribute/PI name, separated by \0. Also starts with a \0. */
118*ed573b14SMatthias Ringwald size_t stacksize, stacklen;
119*ed573b14SMatthias Ringwald unsigned reflen;
120*ed573b14SMatthias Ringwald unsigned quote;
121*ed573b14SMatthias Ringwald int nextstate; /* Used for '@' state remembering and for the "string" consuming state */
122*ed573b14SMatthias Ringwald unsigned ignore;
123*ed573b14SMatthias Ringwald unsigned char *string;
124*ed573b14SMatthias Ringwald } yxml_t;
125*ed573b14SMatthias Ringwald
126*ed573b14SMatthias Ringwald
127*ed573b14SMatthias Ringwald #ifdef __cplusplus
128*ed573b14SMatthias Ringwald extern "C" {
129*ed573b14SMatthias Ringwald #endif
130*ed573b14SMatthias Ringwald
131*ed573b14SMatthias Ringwald void yxml_init(yxml_t *, void *, size_t);
132*ed573b14SMatthias Ringwald
133*ed573b14SMatthias Ringwald
134*ed573b14SMatthias Ringwald yxml_ret_t yxml_parse(yxml_t *, int);
135*ed573b14SMatthias Ringwald
136*ed573b14SMatthias Ringwald
137*ed573b14SMatthias Ringwald /* May be called after the last character has been given to yxml_parse().
138*ed573b14SMatthias Ringwald * Returns YXML_OK if the XML document is valid, YXML_EEOF otherwise. Using
139*ed573b14SMatthias Ringwald * this function isn't really necessary, but can be used to detect documents
140*ed573b14SMatthias Ringwald * that don't end correctly. In particular, an error is returned when the XML
141*ed573b14SMatthias Ringwald * document did not contain a (complete) root element, or when the document
142*ed573b14SMatthias Ringwald * ended while in a comment or processing instruction. */
143*ed573b14SMatthias Ringwald yxml_ret_t yxml_eof(yxml_t *);
144*ed573b14SMatthias Ringwald
145*ed573b14SMatthias Ringwald #ifdef __cplusplus
146*ed573b14SMatthias Ringwald }
147*ed573b14SMatthias Ringwald #endif
148*ed573b14SMatthias Ringwald
149*ed573b14SMatthias Ringwald
150*ed573b14SMatthias Ringwald /* Returns the length of the element name (x->elem), attribute name (x->attr),
151*ed573b14SMatthias Ringwald * or PI name (x->pi). This function should ONLY be used directly after the
152*ed573b14SMatthias Ringwald * YXML_ELEMSTART, YXML_ATTRSTART or YXML_PISTART (respectively) tokens have
153*ed573b14SMatthias Ringwald * been returned by yxml_parse(), calling this at any other time may not give
154*ed573b14SMatthias Ringwald * the correct results. This function should also NOT be used on strings other
155*ed573b14SMatthias Ringwald * than x->elem, x->attr or x->pi. */
yxml_symlen(yxml_t * x,const char * s)156*ed573b14SMatthias Ringwald static inline size_t yxml_symlen(yxml_t *x, const char *s) {
157*ed573b14SMatthias Ringwald return (x->stack + x->stacklen) - (const unsigned char*)s;
158*ed573b14SMatthias Ringwald }
159*ed573b14SMatthias Ringwald
160*ed573b14SMatthias Ringwald #endif
161*ed573b14SMatthias Ringwald
162*ed573b14SMatthias Ringwald /* vim: set noet sw=4 ts=4: */
163