1 /*
2 __ __ _
3 ___\ \/ /_ __ __ _| |_
4 / _ \\ /| '_ \ / _` | __|
5 | __// \| |_) | (_| | |_
6 \___/_/\_\ .__/ \__,_|\__|
7 |_| XML parser
8
9 Copyright (c) 1997-2000 Thai Open Source Software Center Ltd
10 Copyright (c) 2000 Clark Cooper <[email protected]>
11 Copyright (c) 2002 Fred L. Drake, Jr. <[email protected]>
12 Copyright (c) 2016-2024 Sebastian Pipping <[email protected]>
13 Licensed under the MIT license:
14
15 Permission is hereby granted, free of charge, to any person obtaining
16 a copy of this software and associated documentation files (the
17 "Software"), to deal in the Software without restriction, including
18 without limitation the rights to use, copy, modify, merge, publish,
19 distribute, sublicense, and/or sell copies of the Software, and to permit
20 persons to whom the Software is furnished to do so, subject to the
21 following conditions:
22
23 The above copyright notice and this permission notice shall be included
24 in all copies or substantial portions of the Software.
25
26 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
27 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
28 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
29 NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
30 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
31 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
32 USE OR OTHER DEALINGS IN THE SOFTWARE.
33 */
34
35 #include <string.h>
36 #include <stdio.h>
37 #include <stddef.h>
38
39 struct range {
40 int start;
41 int end;
42 };
43
44 struct range nmstrt[] = {
45 {'_'},
46 {':'},
47 /* BaseChar */
48 {0x0041, 0x005a},
49 {0x0061, 0x007a},
50 {0x00c0, 0x00d6},
51 {0x00d8, 0x00f6},
52 {0x00f8, 0x00ff},
53 {0x0100, 0x0131},
54 {0x0134, 0x013e},
55 {0x0141, 0x0148},
56 {0x014a, 0x017e},
57 {0x0180, 0x01c3},
58 {0x01cd, 0x01f0},
59 {0x01f4, 0x01f5},
60 {0x01fa, 0x0217},
61 {0x0250, 0x02a8},
62 {0x02bb, 0x02c1},
63 {0x0386},
64 {0x0388, 0x038a},
65 {0x038c},
66 {0x038e, 0x03a1},
67 {0x03a3, 0x03ce},
68 {0x03d0, 0x03d6},
69 {0x03da},
70 {0x03dc},
71 {0x03de},
72 {0x03e0},
73 {0x03e2, 0x03f3},
74 {0x0401, 0x040c},
75 {0x040e, 0x044f},
76 {0x0451, 0x045c},
77 {0x045e, 0x0481},
78 {0x0490, 0x04c4},
79 {0x04c7, 0x04c8},
80 {0x04cb, 0x04cc},
81 {0x04d0, 0x04eb},
82 {0x04ee, 0x04f5},
83 {0x04f8, 0x04f9},
84 {0x0531, 0x0556},
85 {0x0559},
86 {0x0561, 0x0586},
87 {0x05d0, 0x05ea},
88 {0x05f0, 0x05f2},
89 {0x0621, 0x063a},
90 {0x0641, 0x064a},
91 {0x0671, 0x06b7},
92 {0x06ba, 0x06be},
93 {0x06c0, 0x06ce},
94 {0x06d0, 0x06d3},
95 {0x06d5},
96 {0x06e5, 0x06e6},
97 {0x0905, 0x0939},
98 {0x093d},
99 {0x0958, 0x0961},
100 {0x0985, 0x098c},
101 {0x098f, 0x0990},
102 {0x0993, 0x09a8},
103 {0x09aa, 0x09b0},
104 {0x09b2},
105 {0x09b6, 0x09b9},
106 {0x09dc, 0x09dd},
107 {0x09df, 0x09e1},
108 {0x09f0, 0x09f1},
109 {0x0a05, 0x0a0a},
110 {0x0a0f, 0x0a10},
111 {0x0a13, 0x0a28},
112 {0x0a2a, 0x0a30},
113 {0x0a32, 0x0a33},
114 {0x0a35, 0x0a36},
115 {0x0a38, 0x0a39},
116 {0x0a59, 0x0a5c},
117 {0x0a5e},
118 {0x0a72, 0x0a74},
119 {0x0a85, 0x0a8b},
120 {0x0a8d},
121 {0x0a8f, 0x0a91},
122 {0x0a93, 0x0aa8},
123 {0x0aaa, 0x0ab0},
124 {0x0ab2, 0x0ab3},
125 {0x0ab5, 0x0ab9},
126 {0x0abd},
127 {0x0ae0},
128 {0x0b05, 0x0b0c},
129 {0x0b0f, 0x0b10},
130 {0x0b13, 0x0b28},
131 {0x0b2a, 0x0b30},
132 {0x0b32, 0x0b33},
133 {0x0b36, 0x0b39},
134 {0x0b3d},
135 {0x0b5c, 0x0b5d},
136 {0x0b5f, 0x0b61},
137 {0x0b85, 0x0b8a},
138 {0x0b8e, 0x0b90},
139 {0x0b92, 0x0b95},
140 {0x0b99, 0x0b9a},
141 {0x0b9c},
142 {0x0b9e, 0x0b9f},
143 {0x0ba3, 0x0ba4},
144 {0x0ba8, 0x0baa},
145 {0x0bae, 0x0bb5},
146 {0x0bb7, 0x0bb9},
147 {0x0c05, 0x0c0c},
148 {0x0c0e, 0x0c10},
149 {0x0c12, 0x0c28},
150 {0x0c2a, 0x0c33},
151 {0x0c35, 0x0c39},
152 {0x0c60, 0x0c61},
153 {0x0c85, 0x0c8c},
154 {0x0c8e, 0x0c90},
155 {0x0c92, 0x0ca8},
156 {0x0caa, 0x0cb3},
157 {0x0cb5, 0x0cb9},
158 {0x0cde},
159 {0x0ce0, 0x0ce1},
160 {0x0d05, 0x0d0c},
161 {0x0d0e, 0x0d10},
162 {0x0d12, 0x0d28},
163 {0x0d2a, 0x0d39},
164 {0x0d60, 0x0d61},
165 {0x0e01, 0x0e2e},
166 {0x0e30},
167 {0x0e32, 0x0e33},
168 {0x0e40, 0x0e45},
169 {0x0e81, 0x0e82},
170 {0x0e84},
171 {0x0e87, 0x0e88},
172 {0x0e8a},
173 {0x0e8d},
174 {0x0e94, 0x0e97},
175 {0x0e99, 0x0e9f},
176 {0x0ea1, 0x0ea3},
177 {0x0ea5},
178 {0x0ea7},
179 {0x0eaa, 0x0eab},
180 {0x0ead, 0x0eae},
181 {0x0eb0},
182 {0x0eb2, 0x0eb3},
183 {0x0ebd},
184 {0x0ec0, 0x0ec4},
185 {0x0f40, 0x0f47},
186 {0x0f49, 0x0f69},
187 {0x10a0, 0x10c5},
188 {0x10d0, 0x10f6},
189 {0x1100},
190 {0x1102, 0x1103},
191 {0x1105, 0x1107},
192 {0x1109},
193 {0x110b, 0x110c},
194 {0x110e, 0x1112},
195 {0x113c},
196 {0x113e},
197 {0x1140},
198 {0x114c},
199 {0x114e},
200 {0x1150},
201 {0x1154, 0x1155},
202 {0x1159},
203 {0x115f, 0x1161},
204 {0x1163},
205 {0x1165},
206 {0x1167},
207 {0x1169},
208 {0x116d, 0x116e},
209 {0x1172, 0x1173},
210 {0x1175},
211 {0x119e},
212 {0x11a8},
213 {0x11ab},
214 {0x11ae, 0x11af},
215 {0x11b7, 0x11b8},
216 {0x11ba},
217 {0x11bc, 0x11c2},
218 {0x11eb},
219 {0x11f0},
220 {0x11f9},
221 {0x1e00, 0x1e9b},
222 {0x1ea0, 0x1ef9},
223 {0x1f00, 0x1f15},
224 {0x1f18, 0x1f1d},
225 {0x1f20, 0x1f45},
226 {0x1f48, 0x1f4d},
227 {0x1f50, 0x1f57},
228 {0x1f59},
229 {0x1f5b},
230 {0x1f5d},
231 {0x1f5f, 0x1f7d},
232 {0x1f80, 0x1fb4},
233 {0x1fb6, 0x1fbc},
234 {0x1fbe},
235 {0x1fc2, 0x1fc4},
236 {0x1fc6, 0x1fcc},
237 {0x1fd0, 0x1fd3},
238 {0x1fd6, 0x1fdb},
239 {0x1fe0, 0x1fec},
240 {0x1ff2, 0x1ff4},
241 {0x1ff6, 0x1ffc},
242 {0x2126},
243 {0x212a, 0x212b},
244 {0x212e},
245 {0x2180, 0x2182},
246 {0x3041, 0x3094},
247 {0x30a1, 0x30fa},
248 {0x3105, 0x312c},
249 {0xac00, 0xd7a3},
250 /* Ideographic */
251 {0x4e00, 0x9fa5},
252 {0x3007},
253 {0x3021, 0x3029},
254 };
255
256 /* name chars that are not name start chars */
257 struct range name[] = {
258 {'.'},
259 {'-'},
260 /* CombiningChar */
261 {0x0300, 0x0345},
262 {0x0360, 0x0361},
263 {0x0483, 0x0486},
264 {0x0591, 0x05a1},
265 {0x05a3, 0x05b9},
266 {0x05bb, 0x05bd},
267 {0x05bf},
268 {0x05c1, 0x05c2},
269 {0x05c4},
270 {0x064b, 0x0652},
271 {0x0670},
272 {0x06d6, 0x06dc},
273 {0x06dd, 0x06df},
274 {0x06e0, 0x06e4},
275 {0x06e7, 0x06e8},
276 {0x06ea, 0x06ed},
277 {0x0901, 0x0903},
278 {0x093c},
279 {0x093e, 0x094c},
280 {0x094d},
281 {0x0951, 0x0954},
282 {0x0962, 0x0963},
283 {0x0981, 0x0983},
284 {0x09bc},
285 {0x09be},
286 {0x09bf},
287 {0x09c0, 0x09c4},
288 {0x09c7, 0x09c8},
289 {0x09cb, 0x09cd},
290 {0x09d7},
291 {0x09e2, 0x09e3},
292 {0x0a02},
293 {0x0a3c},
294 {0x0a3e},
295 {0x0a3f},
296 {0x0a40, 0x0a42},
297 {0x0a47, 0x0a48},
298 {0x0a4b, 0x0a4d},
299 {0x0a70, 0x0a71},
300 {0x0a81, 0x0a83},
301 {0x0abc},
302 {0x0abe, 0x0ac5},
303 {0x0ac7, 0x0ac9},
304 {0x0acb, 0x0acd},
305 {0x0b01, 0x0b03},
306 {0x0b3c},
307 {0x0b3e, 0x0b43},
308 {0x0b47, 0x0b48},
309 {0x0b4b, 0x0b4d},
310 {0x0b56, 0x0b57},
311 {0x0b82, 0x0b83},
312 {0x0bbe, 0x0bc2},
313 {0x0bc6, 0x0bc8},
314 {0x0bca, 0x0bcd},
315 {0x0bd7},
316 {0x0c01, 0x0c03},
317 {0x0c3e, 0x0c44},
318 {0x0c46, 0x0c48},
319 {0x0c4a, 0x0c4d},
320 {0x0c55, 0x0c56},
321 {0x0c82, 0x0c83},
322 {0x0cbe, 0x0cc4},
323 {0x0cc6, 0x0cc8},
324 {0x0cca, 0x0ccd},
325 {0x0cd5, 0x0cd6},
326 {0x0d02, 0x0d03},
327 {0x0d3e, 0x0d43},
328 {0x0d46, 0x0d48},
329 {0x0d4a, 0x0d4d},
330 {0x0d57},
331 {0x0e31},
332 {0x0e34, 0x0e3a},
333 {0x0e47, 0x0e4e},
334 {0x0eb1},
335 {0x0eb4, 0x0eb9},
336 {0x0ebb, 0x0ebc},
337 {0x0ec8, 0x0ecd},
338 {0x0f18, 0x0f19},
339 {0x0f35},
340 {0x0f37},
341 {0x0f39},
342 {0x0f3e},
343 {0x0f3f},
344 {0x0f71, 0x0f84},
345 {0x0f86, 0x0f8b},
346 {0x0f90, 0x0f95},
347 {0x0f97},
348 {0x0f99, 0x0fad},
349 {0x0fb1, 0x0fb7},
350 {0x0fb9},
351 {0x20d0, 0x20dc},
352 {0x20e1},
353 {0x302a, 0x302f},
354 {0x3099},
355 {0x309a},
356 /* Digit */
357 {0x0030, 0x0039},
358 {0x0660, 0x0669},
359 {0x06f0, 0x06f9},
360 {0x0966, 0x096f},
361 {0x09e6, 0x09ef},
362 {0x0a66, 0x0a6f},
363 {0x0ae6, 0x0aef},
364 {0x0b66, 0x0b6f},
365 {0x0be7, 0x0bef},
366 {0x0c66, 0x0c6f},
367 {0x0ce6, 0x0cef},
368 {0x0d66, 0x0d6f},
369 {0x0e50, 0x0e59},
370 {0x0ed0, 0x0ed9},
371 {0x0f20, 0x0f29},
372 /* Extender */
373 {0xb7},
374 {0x02d0},
375 {0x02d1},
376 {0x0387},
377 {0x0640},
378 {0x0e46},
379 {0x0ec6},
380 {0x3005},
381 {0x3031, 0x3035},
382 {0x309d, 0x309e},
383 {0x30fc, 0x30fe},
384 };
385
386 static void
setTab(char * tab,struct range * ranges,size_t nRanges)387 setTab(char *tab, struct range *ranges, size_t nRanges) {
388 size_t i;
389 int j;
390 for (i = 0; i < nRanges; i++) {
391 if (ranges[i].end) {
392 for (j = ranges[i].start; j <= ranges[i].end; j++)
393 tab[j] = 1;
394 } else
395 tab[ranges[i].start] = 1;
396 }
397 }
398
399 static void
printTabs(char * tab)400 printTabs(char *tab) {
401 int nBitmaps = 2;
402 int i, j, k;
403 unsigned char pageIndex[512];
404
405 printf("static const unsigned namingBitmap[] = {\n\
406 0x00000000, 0x00000000, 0x00000000, 0x00000000,\n\
407 0x00000000, 0x00000000, 0x00000000, 0x00000000,\n\
408 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,\n\
409 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,\n");
410 for (i = 0; i < 512; i++) {
411 int kind = tab[i * 256];
412 for (j = 1; j < 256; j++)
413 if (tab[i * 256 + j] != kind) {
414 kind = -1;
415 break;
416 }
417 if (i >= 256 && memcmp(tab + (i - 256) * 256, tab + i * 256, 256) == 0)
418 pageIndex[i] = pageIndex[i - 256];
419 else if (kind == -1) {
420 pageIndex[i] = nBitmaps++;
421 for (j = 0; j < 8; j++) {
422 unsigned val = 0;
423 for (k = 0; k < 32; k++) {
424 if (tab[i * 256 + j * 32 + k])
425 val |= (1 << k);
426 }
427 printf("0x%08X,", val);
428 putchar((((j + 1) & 3) == 0) ? '\n' : ' ');
429 }
430 } else
431 pageIndex[i] = kind;
432 }
433 printf("};\n");
434 printf("static const unsigned char nmstrtPages[] = {\n");
435 for (i = 0; i < 512; i++) {
436 if (i == 256)
437 printf("};\nstatic const unsigned char namePages[] = {\n");
438 printf("0x%02X,", pageIndex[i]);
439 putchar((((i + 1) & 7) == 0) ? '\n' : ' ');
440 }
441 printf("};\n");
442 }
443
444 int
main(void)445 main(void) {
446 char tab[2 * 65536];
447 memset(tab, 0, 65536);
448 setTab(tab, nmstrt, sizeof(nmstrt) / sizeof(nmstrt[0]));
449 memcpy(tab + 65536, tab, 65536);
450 setTab(tab + 65536, name, sizeof(name) / sizeof(name[0]));
451 printTabs(tab);
452 return 0;
453 }
454