1 // © 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 //--------------------------------------------------------------------------------- 4 // 5 // Generated Header File. Do not edit by hand. 6 // This file contains the state table for the ICU Regular Expression Pattern Parser 7 // It is generated by the Perl script "regexcst.pl" from 8 // the rule parser state definitions file "regexcst.txt". 9 // 10 // Copyright (C) 2002-2016 International Business Machines Corporation 11 // and others. All rights reserved. 12 // 13 //--------------------------------------------------------------------------------- 14 #ifndef RBBIRPT_H 15 #define RBBIRPT_H 16 17 #include "unicode/utypes.h" 18 19 U_NAMESPACE_BEGIN 20 // 21 // Character classes for regex pattern scanning. 22 // 23 static const uint8_t kRuleSet_digit_char = 128; 24 static const uint8_t kRuleSet_ascii_letter = 129; 25 static const uint8_t kRuleSet_rule_char = 130; 26 constexpr uint32_t kRuleSet_count = 131-128; 27 28 enum Regex_PatternParseAction { 29 doSetBackslash_D, 30 doBackslashh, 31 doBackslashH, 32 doSetLiteralEscaped, 33 doOpenLookAheadNeg, 34 doCompleteNamedBackRef, 35 doPatStart, 36 doBackslashS, 37 doBackslashD, 38 doNGStar, 39 doNOP, 40 doBackslashX, 41 doSetLiteral, 42 doContinueNamedCapture, 43 doBackslashG, 44 doBackslashR, 45 doSetBegin, 46 doSetBackslash_v, 47 doPossessivePlus, 48 doPerlInline, 49 doBackslashZ, 50 doSetAddAmp, 51 doSetBeginDifference1, 52 doIntervalError, 53 doSetNegate, 54 doIntervalInit, 55 doSetIntersection2, 56 doPossessiveInterval, 57 doRuleError, 58 doBackslashW, 59 doContinueNamedBackRef, 60 doOpenNonCaptureParen, 61 doExit, 62 doSetNamedChar, 63 doSetBackslash_V, 64 doConditionalExpr, 65 doEscapeError, 66 doBadOpenParenType, 67 doPossessiveStar, 68 doSetAddDash, 69 doEscapedLiteralChar, 70 doSetBackslash_w, 71 doIntervalUpperDigit, 72 doBackslashv, 73 doSetBackslash_S, 74 doSetNoCloseError, 75 doSetProp, 76 doBackslashB, 77 doSetEnd, 78 doSetRange, 79 doMatchModeParen, 80 doPlus, 81 doBackslashV, 82 doSetMatchMode, 83 doBackslashz, 84 doSetNamedRange, 85 doOpenLookBehindNeg, 86 doInterval, 87 doBadNamedCapture, 88 doBeginMatchMode, 89 doBackslashd, 90 doPatFinish, 91 doNamedChar, 92 doNGPlus, 93 doSetDifference2, 94 doSetBackslash_H, 95 doCloseParen, 96 doDotAny, 97 doOpenCaptureParen, 98 doEnterQuoteMode, 99 doOpenAtomicParen, 100 doBadModeFlag, 101 doSetBackslash_d, 102 doSetFinish, 103 doProperty, 104 doBeginNamedBackRef, 105 doBackRef, 106 doOpt, 107 doDollar, 108 doBeginNamedCapture, 109 doNGInterval, 110 doSetOpError, 111 doSetPosixProp, 112 doSetBeginIntersection1, 113 doBackslashb, 114 doSetBeginUnion, 115 doIntevalLowerDigit, 116 doSetBackslash_h, 117 doStar, 118 doMatchMode, 119 doBackslashA, 120 doOpenLookBehind, 121 doPossessiveOpt, 122 doOrOperator, 123 doBackslashw, 124 doBackslashs, 125 doLiteralChar, 126 doSuppressComments, 127 doCaret, 128 doIntervalSame, 129 doNGOpt, 130 doOpenLookAhead, 131 doSetBackslash_W, 132 doMismatchedParenErr, 133 doSetBackslash_s, 134 rbbiLastAction}; 135 136 //------------------------------------------------------------------------------- 137 // 138 // RegexTableEl represents the structure of a row in the transition table 139 // for the pattern parser state machine. 140 //------------------------------------------------------------------------------- 141 struct RegexTableEl { 142 Regex_PatternParseAction fAction; 143 uint8_t fCharClass; // 0-127: an individual ASCII character 144 // 128-255: character class index 145 uint8_t fNextState; // 0-250: normal next-state numbers 146 // 255: pop next-state from stack. 147 uint8_t fPushState; 148 UBool fNextChar; 149 }; 150 151 static const struct RegexTableEl gRuleParseStateTable[] = { 152 {doNOP, 0, 0, 0, true} 153 , {doPatStart, 255, 2,0, false} // 1 start 154 , {doLiteralChar, 254, 14,0, true} // 2 term 155 , {doLiteralChar, 130, 14,0, true} // 3 156 , {doSetBegin, 91 /* [ */, 123, 205, true} // 4 157 , {doNOP, 40 /* ( */, 27,0, true} // 5 158 , {doDotAny, 46 /* . */, 14,0, true} // 6 159 , {doCaret, 94 /* ^ */, 14,0, true} // 7 160 , {doDollar, 36 /* $ */, 14,0, true} // 8 161 , {doNOP, 92 /* \ */, 89,0, true} // 9 162 , {doOrOperator, 124 /* | */, 2,0, true} // 10 163 , {doCloseParen, 41 /* ) */, 255,0, true} // 11 164 , {doPatFinish, 253, 2,0, false} // 12 165 , {doRuleError, 255, 206,0, false} // 13 166 , {doNOP, 42 /* * */, 68,0, true} // 14 expr-quant 167 , {doNOP, 43 /* + */, 71,0, true} // 15 168 , {doNOP, 63 /* ? */, 74,0, true} // 16 169 , {doIntervalInit, 123 /* { */, 77,0, true} // 17 170 , {doNOP, 40 /* ( */, 23,0, true} // 18 171 , {doNOP, 255, 20,0, false} // 19 172 , {doOrOperator, 124 /* | */, 2,0, true} // 20 expr-cont 173 , {doCloseParen, 41 /* ) */, 255,0, true} // 21 174 , {doNOP, 255, 2,0, false} // 22 175 , {doSuppressComments, 63 /* ? */, 25,0, true} // 23 open-paren-quant 176 , {doNOP, 255, 27,0, false} // 24 177 , {doNOP, 35 /* # */, 50, 14, true} // 25 open-paren-quant2 178 , {doNOP, 255, 29,0, false} // 26 179 , {doSuppressComments, 63 /* ? */, 29,0, true} // 27 open-paren 180 , {doOpenCaptureParen, 255, 2, 14, false} // 28 181 , {doOpenNonCaptureParen, 58 /* : */, 2, 14, true} // 29 open-paren-extended 182 , {doOpenAtomicParen, 62 /* > */, 2, 14, true} // 30 183 , {doOpenLookAhead, 61 /* = */, 2, 20, true} // 31 184 , {doOpenLookAheadNeg, 33 /* ! */, 2, 20, true} // 32 185 , {doNOP, 60 /* < */, 46,0, true} // 33 186 , {doNOP, 35 /* # */, 50, 2, true} // 34 187 , {doBeginMatchMode, 105 /* i */, 53,0, false} // 35 188 , {doBeginMatchMode, 100 /* d */, 53,0, false} // 36 189 , {doBeginMatchMode, 109 /* m */, 53,0, false} // 37 190 , {doBeginMatchMode, 115 /* s */, 53,0, false} // 38 191 , {doBeginMatchMode, 117 /* u */, 53,0, false} // 39 192 , {doBeginMatchMode, 119 /* w */, 53,0, false} // 40 193 , {doBeginMatchMode, 120 /* x */, 53,0, false} // 41 194 , {doBeginMatchMode, 45 /* - */, 53,0, false} // 42 195 , {doConditionalExpr, 40 /* ( */, 206,0, true} // 43 196 , {doPerlInline, 123 /* { */, 206,0, true} // 44 197 , {doBadOpenParenType, 255, 206,0, false} // 45 198 , {doOpenLookBehind, 61 /* = */, 2, 20, true} // 46 open-paren-lookbehind 199 , {doOpenLookBehindNeg, 33 /* ! */, 2, 20, true} // 47 200 , {doBeginNamedCapture, 129, 64,0, false} // 48 201 , {doBadOpenParenType, 255, 206,0, false} // 49 202 , {doNOP, 41 /* ) */, 255,0, true} // 50 paren-comment 203 , {doMismatchedParenErr, 253, 206,0, false} // 51 204 , {doNOP, 255, 50,0, true} // 52 205 , {doMatchMode, 105 /* i */, 53,0, true} // 53 paren-flag 206 , {doMatchMode, 100 /* d */, 53,0, true} // 54 207 , {doMatchMode, 109 /* m */, 53,0, true} // 55 208 , {doMatchMode, 115 /* s */, 53,0, true} // 56 209 , {doMatchMode, 117 /* u */, 53,0, true} // 57 210 , {doMatchMode, 119 /* w */, 53,0, true} // 58 211 , {doMatchMode, 120 /* x */, 53,0, true} // 59 212 , {doMatchMode, 45 /* - */, 53,0, true} // 60 213 , {doSetMatchMode, 41 /* ) */, 2,0, true} // 61 214 , {doMatchModeParen, 58 /* : */, 2, 14, true} // 62 215 , {doBadModeFlag, 255, 206,0, false} // 63 216 , {doContinueNamedCapture, 129, 64,0, true} // 64 named-capture 217 , {doContinueNamedCapture, 128, 64,0, true} // 65 218 , {doOpenCaptureParen, 62 /* > */, 2, 14, true} // 66 219 , {doBadNamedCapture, 255, 206,0, false} // 67 220 , {doNGStar, 63 /* ? */, 20,0, true} // 68 quant-star 221 , {doPossessiveStar, 43 /* + */, 20,0, true} // 69 222 , {doStar, 255, 20,0, false} // 70 223 , {doNGPlus, 63 /* ? */, 20,0, true} // 71 quant-plus 224 , {doPossessivePlus, 43 /* + */, 20,0, true} // 72 225 , {doPlus, 255, 20,0, false} // 73 226 , {doNGOpt, 63 /* ? */, 20,0, true} // 74 quant-opt 227 , {doPossessiveOpt, 43 /* + */, 20,0, true} // 75 228 , {doOpt, 255, 20,0, false} // 76 229 , {doNOP, 128, 79,0, false} // 77 interval-open 230 , {doIntervalError, 255, 206,0, false} // 78 231 , {doIntevalLowerDigit, 128, 79,0, true} // 79 interval-lower 232 , {doNOP, 44 /* , */, 83,0, true} // 80 233 , {doIntervalSame, 125 /* } */, 86,0, true} // 81 234 , {doIntervalError, 255, 206,0, false} // 82 235 , {doIntervalUpperDigit, 128, 83,0, true} // 83 interval-upper 236 , {doNOP, 125 /* } */, 86,0, true} // 84 237 , {doIntervalError, 255, 206,0, false} // 85 238 , {doNGInterval, 63 /* ? */, 20,0, true} // 86 interval-type 239 , {doPossessiveInterval, 43 /* + */, 20,0, true} // 87 240 , {doInterval, 255, 20,0, false} // 88 241 , {doBackslashA, 65 /* A */, 2,0, true} // 89 backslash 242 , {doBackslashB, 66 /* B */, 2,0, true} // 90 243 , {doBackslashb, 98 /* b */, 2,0, true} // 91 244 , {doBackslashd, 100 /* d */, 14,0, true} // 92 245 , {doBackslashD, 68 /* D */, 14,0, true} // 93 246 , {doBackslashG, 71 /* G */, 2,0, true} // 94 247 , {doBackslashh, 104 /* h */, 14,0, true} // 95 248 , {doBackslashH, 72 /* H */, 14,0, true} // 96 249 , {doNOP, 107 /* k */, 115,0, true} // 97 250 , {doNamedChar, 78 /* N */, 14,0, false} // 98 251 , {doProperty, 112 /* p */, 14,0, false} // 99 252 , {doProperty, 80 /* P */, 14,0, false} // 100 253 , {doBackslashR, 82 /* R */, 14,0, true} // 101 254 , {doEnterQuoteMode, 81 /* Q */, 2,0, true} // 102 255 , {doBackslashS, 83 /* S */, 14,0, true} // 103 256 , {doBackslashs, 115 /* s */, 14,0, true} // 104 257 , {doBackslashv, 118 /* v */, 14,0, true} // 105 258 , {doBackslashV, 86 /* V */, 14,0, true} // 106 259 , {doBackslashW, 87 /* W */, 14,0, true} // 107 260 , {doBackslashw, 119 /* w */, 14,0, true} // 108 261 , {doBackslashX, 88 /* X */, 14,0, true} // 109 262 , {doBackslashZ, 90 /* Z */, 2,0, true} // 110 263 , {doBackslashz, 122 /* z */, 2,0, true} // 111 264 , {doBackRef, 128, 14,0, true} // 112 265 , {doEscapeError, 253, 206,0, false} // 113 266 , {doEscapedLiteralChar, 255, 14,0, true} // 114 267 , {doBeginNamedBackRef, 60 /* < */, 117,0, true} // 115 named-backref 268 , {doBadNamedCapture, 255, 206,0, false} // 116 269 , {doContinueNamedBackRef, 129, 119,0, true} // 117 named-backref-2 270 , {doBadNamedCapture, 255, 206,0, false} // 118 271 , {doContinueNamedBackRef, 129, 119,0, true} // 119 named-backref-3 272 , {doContinueNamedBackRef, 128, 119,0, true} // 120 273 , {doCompleteNamedBackRef, 62 /* > */, 14,0, true} // 121 274 , {doBadNamedCapture, 255, 206,0, false} // 122 275 , {doSetNegate, 94 /* ^ */, 126,0, true} // 123 set-open 276 , {doSetPosixProp, 58 /* : */, 128,0, false} // 124 277 , {doNOP, 255, 126,0, false} // 125 278 , {doSetLiteral, 93 /* ] */, 141,0, true} // 126 set-open2 279 , {doNOP, 255, 131,0, false} // 127 280 , {doSetEnd, 93 /* ] */, 255,0, true} // 128 set-posix 281 , {doNOP, 58 /* : */, 131,0, false} // 129 282 , {doRuleError, 255, 206,0, false} // 130 283 , {doSetEnd, 93 /* ] */, 255,0, true} // 131 set-start 284 , {doSetBeginUnion, 91 /* [ */, 123, 148, true} // 132 285 , {doNOP, 92 /* \ */, 191,0, true} // 133 286 , {doNOP, 45 /* - */, 137,0, true} // 134 287 , {doNOP, 38 /* & */, 139,0, true} // 135 288 , {doSetLiteral, 255, 141,0, true} // 136 289 , {doRuleError, 45 /* - */, 206,0, false} // 137 set-start-dash 290 , {doSetAddDash, 255, 141,0, false} // 138 291 , {doRuleError, 38 /* & */, 206,0, false} // 139 set-start-amp 292 , {doSetAddAmp, 255, 141,0, false} // 140 293 , {doSetEnd, 93 /* ] */, 255,0, true} // 141 set-after-lit 294 , {doSetBeginUnion, 91 /* [ */, 123, 148, true} // 142 295 , {doNOP, 45 /* - */, 178,0, true} // 143 296 , {doNOP, 38 /* & */, 169,0, true} // 144 297 , {doNOP, 92 /* \ */, 191,0, true} // 145 298 , {doSetNoCloseError, 253, 206,0, false} // 146 299 , {doSetLiteral, 255, 141,0, true} // 147 300 , {doSetEnd, 93 /* ] */, 255,0, true} // 148 set-after-set 301 , {doSetBeginUnion, 91 /* [ */, 123, 148, true} // 149 302 , {doNOP, 45 /* - */, 171,0, true} // 150 303 , {doNOP, 38 /* & */, 166,0, true} // 151 304 , {doNOP, 92 /* \ */, 191,0, true} // 152 305 , {doSetNoCloseError, 253, 206,0, false} // 153 306 , {doSetLiteral, 255, 141,0, true} // 154 307 , {doSetEnd, 93 /* ] */, 255,0, true} // 155 set-after-range 308 , {doSetBeginUnion, 91 /* [ */, 123, 148, true} // 156 309 , {doNOP, 45 /* - */, 174,0, true} // 157 310 , {doNOP, 38 /* & */, 176,0, true} // 158 311 , {doNOP, 92 /* \ */, 191,0, true} // 159 312 , {doSetNoCloseError, 253, 206,0, false} // 160 313 , {doSetLiteral, 255, 141,0, true} // 161 314 , {doSetBeginUnion, 91 /* [ */, 123, 148, true} // 162 set-after-op 315 , {doSetOpError, 93 /* ] */, 206,0, false} // 163 316 , {doNOP, 92 /* \ */, 191,0, true} // 164 317 , {doSetLiteral, 255, 141,0, true} // 165 318 , {doSetBeginIntersection1, 91 /* [ */, 123, 148, true} // 166 set-set-amp 319 , {doSetIntersection2, 38 /* & */, 162,0, true} // 167 320 , {doSetAddAmp, 255, 141,0, false} // 168 321 , {doSetIntersection2, 38 /* & */, 162,0, true} // 169 set-lit-amp 322 , {doSetAddAmp, 255, 141,0, false} // 170 323 , {doSetBeginDifference1, 91 /* [ */, 123, 148, true} // 171 set-set-dash 324 , {doSetDifference2, 45 /* - */, 162,0, true} // 172 325 , {doSetAddDash, 255, 141,0, false} // 173 326 , {doSetDifference2, 45 /* - */, 162,0, true} // 174 set-range-dash 327 , {doSetAddDash, 255, 141,0, false} // 175 328 , {doSetIntersection2, 38 /* & */, 162,0, true} // 176 set-range-amp 329 , {doSetAddAmp, 255, 141,0, false} // 177 330 , {doSetDifference2, 45 /* - */, 162,0, true} // 178 set-lit-dash 331 , {doSetAddDash, 91 /* [ */, 141,0, false} // 179 332 , {doSetAddDash, 93 /* ] */, 141,0, false} // 180 333 , {doNOP, 92 /* \ */, 183,0, true} // 181 334 , {doSetRange, 255, 155,0, true} // 182 335 , {doSetOpError, 115 /* s */, 206,0, false} // 183 set-lit-dash-escape 336 , {doSetOpError, 83 /* S */, 206,0, false} // 184 337 , {doSetOpError, 119 /* w */, 206,0, false} // 185 338 , {doSetOpError, 87 /* W */, 206,0, false} // 186 339 , {doSetOpError, 100 /* d */, 206,0, false} // 187 340 , {doSetOpError, 68 /* D */, 206,0, false} // 188 341 , {doSetNamedRange, 78 /* N */, 155,0, false} // 189 342 , {doSetRange, 255, 155,0, true} // 190 343 , {doSetProp, 112 /* p */, 148,0, false} // 191 set-escape 344 , {doSetProp, 80 /* P */, 148,0, false} // 192 345 , {doSetNamedChar, 78 /* N */, 141,0, false} // 193 346 , {doSetBackslash_s, 115 /* s */, 155,0, true} // 194 347 , {doSetBackslash_S, 83 /* S */, 155,0, true} // 195 348 , {doSetBackslash_w, 119 /* w */, 155,0, true} // 196 349 , {doSetBackslash_W, 87 /* W */, 155,0, true} // 197 350 , {doSetBackslash_d, 100 /* d */, 155,0, true} // 198 351 , {doSetBackslash_D, 68 /* D */, 155,0, true} // 199 352 , {doSetBackslash_h, 104 /* h */, 155,0, true} // 200 353 , {doSetBackslash_H, 72 /* H */, 155,0, true} // 201 354 , {doSetBackslash_v, 118 /* v */, 155,0, true} // 202 355 , {doSetBackslash_V, 86 /* V */, 155,0, true} // 203 356 , {doSetLiteralEscaped, 255, 141,0, true} // 204 357 , {doSetFinish, 255, 14,0, false} // 205 set-finish 358 , {doExit, 255, 206,0, true} // 206 errorDeath 359 }; 360 static const char * const RegexStateNames[] = {nullptr, 361 "start", 362 "term", 363 nullptr, 364 nullptr, 365 nullptr, 366 nullptr, 367 nullptr, 368 nullptr, 369 nullptr, 370 nullptr, 371 nullptr, 372 nullptr, 373 nullptr, 374 "expr-quant", 375 nullptr, 376 nullptr, 377 nullptr, 378 nullptr, 379 nullptr, 380 "expr-cont", 381 nullptr, 382 nullptr, 383 "open-paren-quant", 384 nullptr, 385 "open-paren-quant2", 386 nullptr, 387 "open-paren", 388 nullptr, 389 "open-paren-extended", 390 nullptr, 391 nullptr, 392 nullptr, 393 nullptr, 394 nullptr, 395 nullptr, 396 nullptr, 397 nullptr, 398 nullptr, 399 nullptr, 400 nullptr, 401 nullptr, 402 nullptr, 403 nullptr, 404 nullptr, 405 nullptr, 406 "open-paren-lookbehind", 407 nullptr, 408 nullptr, 409 nullptr, 410 "paren-comment", 411 nullptr, 412 nullptr, 413 "paren-flag", 414 nullptr, 415 nullptr, 416 nullptr, 417 nullptr, 418 nullptr, 419 nullptr, 420 nullptr, 421 nullptr, 422 nullptr, 423 nullptr, 424 "named-capture", 425 nullptr, 426 nullptr, 427 nullptr, 428 "quant-star", 429 nullptr, 430 nullptr, 431 "quant-plus", 432 nullptr, 433 nullptr, 434 "quant-opt", 435 nullptr, 436 nullptr, 437 "interval-open", 438 nullptr, 439 "interval-lower", 440 nullptr, 441 nullptr, 442 nullptr, 443 "interval-upper", 444 nullptr, 445 nullptr, 446 "interval-type", 447 nullptr, 448 nullptr, 449 "backslash", 450 nullptr, 451 nullptr, 452 nullptr, 453 nullptr, 454 nullptr, 455 nullptr, 456 nullptr, 457 nullptr, 458 nullptr, 459 nullptr, 460 nullptr, 461 nullptr, 462 nullptr, 463 nullptr, 464 nullptr, 465 nullptr, 466 nullptr, 467 nullptr, 468 nullptr, 469 nullptr, 470 nullptr, 471 nullptr, 472 nullptr, 473 nullptr, 474 nullptr, 475 "named-backref", 476 nullptr, 477 "named-backref-2", 478 nullptr, 479 "named-backref-3", 480 nullptr, 481 nullptr, 482 nullptr, 483 "set-open", 484 nullptr, 485 nullptr, 486 "set-open2", 487 nullptr, 488 "set-posix", 489 nullptr, 490 nullptr, 491 "set-start", 492 nullptr, 493 nullptr, 494 nullptr, 495 nullptr, 496 nullptr, 497 "set-start-dash", 498 nullptr, 499 "set-start-amp", 500 nullptr, 501 "set-after-lit", 502 nullptr, 503 nullptr, 504 nullptr, 505 nullptr, 506 nullptr, 507 nullptr, 508 "set-after-set", 509 nullptr, 510 nullptr, 511 nullptr, 512 nullptr, 513 nullptr, 514 nullptr, 515 "set-after-range", 516 nullptr, 517 nullptr, 518 nullptr, 519 nullptr, 520 nullptr, 521 nullptr, 522 "set-after-op", 523 nullptr, 524 nullptr, 525 nullptr, 526 "set-set-amp", 527 nullptr, 528 nullptr, 529 "set-lit-amp", 530 nullptr, 531 "set-set-dash", 532 nullptr, 533 nullptr, 534 "set-range-dash", 535 nullptr, 536 "set-range-amp", 537 nullptr, 538 "set-lit-dash", 539 nullptr, 540 nullptr, 541 nullptr, 542 nullptr, 543 "set-lit-dash-escape", 544 nullptr, 545 nullptr, 546 nullptr, 547 nullptr, 548 nullptr, 549 nullptr, 550 nullptr, 551 "set-escape", 552 nullptr, 553 nullptr, 554 nullptr, 555 nullptr, 556 nullptr, 557 nullptr, 558 nullptr, 559 nullptr, 560 nullptr, 561 nullptr, 562 nullptr, 563 nullptr, 564 nullptr, 565 "set-finish", 566 "errorDeath", 567 nullptr}; 568 569 U_NAMESPACE_END 570 #endif 571