1*6777b538SAndroid Build Coastguard WorkerThis is a dump from Google's source control system of the change 2*6777b538SAndroid Build Coastguard Workerthat removed UCS-2 support from RE2. As the explanation below 3*6777b538SAndroid Build Coastguard Workersays, UCS-2 mode is fundamentally at odds with things like ^ and $, 4*6777b538SAndroid Build Coastguard Workerso it never really worked very well. But if you are interested in using 5*6777b538SAndroid Build Coastguard Workerit without those operators, it did work for that. It assumed that the 6*6777b538SAndroid Build Coastguard WorkerUCS-2 data was in the native host byte order. 7*6777b538SAndroid Build Coastguard Worker 8*6777b538SAndroid Build Coastguard WorkerIf you are interested in adding UCS-2 mode back, this patch might 9*6777b538SAndroid Build Coastguard Workerbe a good starting point. 10*6777b538SAndroid Build Coastguard Worker 11*6777b538SAndroid Build Coastguard Worker 12*6777b538SAndroid Build Coastguard WorkerChange 12780686 by rsc@rsc-re2 on 2009/09/16 15:30:15 13*6777b538SAndroid Build Coastguard Worker 14*6777b538SAndroid Build Coastguard Worker Retire UCS-2 mode. 15*6777b538SAndroid Build Coastguard Worker 16*6777b538SAndroid Build Coastguard Worker I added it as an experiment for V8, but it 17*6777b538SAndroid Build Coastguard Worker requires 2-byte lookahead to do completely, 18*6777b538SAndroid Build Coastguard Worker and RE2 has 1-byte lookahead (enough for UTF-8) 19*6777b538SAndroid Build Coastguard Worker as a fairly deep fundamental assumption, 20*6777b538SAndroid Build Coastguard Worker so it did not support ^ or $. 21*6777b538SAndroid Build Coastguard Worker 22*6777b538SAndroid Build Coastguard Worker==== re2/bitstate.cc#2 - re2/bitstate.cc#3 ==== 23*6777b538SAndroid Build Coastguard Workerre2/bitstate.cc#2:314,321 - re2/bitstate.cc#3:314,319 24*6777b538SAndroid Build Coastguard Worker cap_[0] = p; 25*6777b538SAndroid Build Coastguard Worker if (TrySearch(prog_->start(), p)) // Match must be leftmost; done. 26*6777b538SAndroid Build Coastguard Worker return true; 27*6777b538SAndroid Build Coastguard Worker- if (prog_->flags() & Regexp::UCS2) 28*6777b538SAndroid Build Coastguard Worker- p++; 29*6777b538SAndroid Build Coastguard Worker } 30*6777b538SAndroid Build Coastguard Worker return false; 31*6777b538SAndroid Build Coastguard Worker } 32*6777b538SAndroid Build Coastguard Worker==== re2/compile.cc#17 - re2/compile.cc#18 ==== 33*6777b538SAndroid Build Coastguard Workerre2/compile.cc#17:95,101 - re2/compile.cc#18:95,100 34*6777b538SAndroid Build Coastguard Worker // Input encodings. 35*6777b538SAndroid Build Coastguard Worker enum Encoding { 36*6777b538SAndroid Build Coastguard Worker kEncodingUTF8 = 1, // UTF-8 (0-10FFFF) 37*6777b538SAndroid Build Coastguard Worker- kEncodingUCS2, // UCS-2 (0-FFFF), native byte order 38*6777b538SAndroid Build Coastguard Worker kEncodingLatin1, // Latin1 (0-FF) 39*6777b538SAndroid Build Coastguard Worker }; 40*6777b538SAndroid Build Coastguard Worker 41*6777b538SAndroid Build Coastguard Workerre2/compile.cc#17:168,176 - re2/compile.cc#18:167,172 42*6777b538SAndroid Build Coastguard Worker void AddRuneRangeLatin1(Rune lo, Rune hi, bool foldcase); 43*6777b538SAndroid Build Coastguard Worker void AddRuneRangeUTF8(Rune lo, Rune hi, bool foldcase); 44*6777b538SAndroid Build Coastguard Worker void Add_80_10ffff(); 45*6777b538SAndroid Build Coastguard Worker- void AddRuneRangeUCS2(Rune lo, Rune hi, bool foldcase); 46*6777b538SAndroid Build Coastguard Worker- void AddUCS2Pair(uint8 lo1, uint8 hi1, bool fold1, 47*6777b538SAndroid Build Coastguard Worker- uint8 lo2, uint8 hi2, bool fold2); 48*6777b538SAndroid Build Coastguard Worker 49*6777b538SAndroid Build Coastguard Worker // New suffix that matches the byte range lo-hi, then goes to next. 50*6777b538SAndroid Build Coastguard Worker Inst* RuneByteSuffix(uint8 lo, uint8 hi, bool foldcase, Inst* next); 51*6777b538SAndroid Build Coastguard Workerre2/compile.cc#17:475,481 - re2/compile.cc#18:471,477 52*6777b538SAndroid Build Coastguard Worker 53*6777b538SAndroid Build Coastguard Worker // Converts rune range lo-hi into a fragment that recognizes 54*6777b538SAndroid Build Coastguard Worker // the bytes that would make up those runes in the current 55*6777b538SAndroid Build Coastguard Worker- // encoding (Latin 1, UTF-8, or UCS-2). 56*6777b538SAndroid Build Coastguard Worker+ // encoding (Latin 1 or UTF-8). 57*6777b538SAndroid Build Coastguard Worker // This lets the machine work byte-by-byte even when 58*6777b538SAndroid Build Coastguard Worker // using multibyte encodings. 59*6777b538SAndroid Build Coastguard Worker 60*6777b538SAndroid Build Coastguard Workerre2/compile.cc#17:488,496 - re2/compile.cc#18:484,489 61*6777b538SAndroid Build Coastguard Worker case kEncodingLatin1: 62*6777b538SAndroid Build Coastguard Worker AddRuneRangeLatin1(lo, hi, foldcase); 63*6777b538SAndroid Build Coastguard Worker break; 64*6777b538SAndroid Build Coastguard Worker- case kEncodingUCS2: 65*6777b538SAndroid Build Coastguard Worker- AddRuneRangeUCS2(lo, hi, foldcase); 66*6777b538SAndroid Build Coastguard Worker- break; 67*6777b538SAndroid Build Coastguard Worker } 68*6777b538SAndroid Build Coastguard Worker } 69*6777b538SAndroid Build Coastguard Worker 70*6777b538SAndroid Build Coastguard Workerre2/compile.cc#17:503,581 - re2/compile.cc#18:496,501 71*6777b538SAndroid Build Coastguard Worker AddSuffix(RuneByteSuffix(lo, hi, foldcase, NULL)); 72*6777b538SAndroid Build Coastguard Worker } 73*6777b538SAndroid Build Coastguard Worker 74*6777b538SAndroid Build Coastguard Worker- // Test whether 16-bit values are big or little endian. 75*6777b538SAndroid Build Coastguard Worker- static bool BigEndian() { 76*6777b538SAndroid Build Coastguard Worker- union { 77*6777b538SAndroid Build Coastguard Worker- char byte[2]; 78*6777b538SAndroid Build Coastguard Worker- int16 endian; 79*6777b538SAndroid Build Coastguard Worker- } u; 80*6777b538SAndroid Build Coastguard Worker- 81*6777b538SAndroid Build Coastguard Worker- u.byte[0] = 1; 82*6777b538SAndroid Build Coastguard Worker- u.byte[1] = 2; 83*6777b538SAndroid Build Coastguard Worker- return u.endian == 0x0102; 84*6777b538SAndroid Build Coastguard Worker- } 85*6777b538SAndroid Build Coastguard Worker- 86*6777b538SAndroid Build Coastguard Worker- void Compiler::AddUCS2Pair(uint8 lo1, uint8 hi1, bool fold1, 87*6777b538SAndroid Build Coastguard Worker- uint8 lo2, uint8 hi2, bool fold2) { 88*6777b538SAndroid Build Coastguard Worker- Inst* ip; 89*6777b538SAndroid Build Coastguard Worker- if (reversed_) { 90*6777b538SAndroid Build Coastguard Worker- ip = RuneByteSuffix(lo1, hi1, fold1, NULL); 91*6777b538SAndroid Build Coastguard Worker- ip = RuneByteSuffix(lo2, hi2, fold2, ip); 92*6777b538SAndroid Build Coastguard Worker- } else { 93*6777b538SAndroid Build Coastguard Worker- ip = RuneByteSuffix(lo2, hi2, fold2, NULL); 94*6777b538SAndroid Build Coastguard Worker- ip = RuneByteSuffix(lo1, hi1, fold1, ip); 95*6777b538SAndroid Build Coastguard Worker- } 96*6777b538SAndroid Build Coastguard Worker- AddSuffix(ip); 97*6777b538SAndroid Build Coastguard Worker- } 98*6777b538SAndroid Build Coastguard Worker- 99*6777b538SAndroid Build Coastguard Worker- void Compiler::AddRuneRangeUCS2(Rune lo, Rune hi, bool foldcase) { 100*6777b538SAndroid Build Coastguard Worker- if (lo > hi || lo > 0xFFFF) 101*6777b538SAndroid Build Coastguard Worker- return; 102*6777b538SAndroid Build Coastguard Worker- if (hi > 0xFFFF) 103*6777b538SAndroid Build Coastguard Worker- hi = 0xFFFF; 104*6777b538SAndroid Build Coastguard Worker- 105*6777b538SAndroid Build Coastguard Worker- // We'll assemble a pattern assuming big endian. 106*6777b538SAndroid Build Coastguard Worker- // If the machine isn't, tell Cat to reverse its arguments. 107*6777b538SAndroid Build Coastguard Worker- bool oldreversed = reversed_; 108*6777b538SAndroid Build Coastguard Worker- if (!BigEndian()) { 109*6777b538SAndroid Build Coastguard Worker- reversed_ = !oldreversed; 110*6777b538SAndroid Build Coastguard Worker- } 111*6777b538SAndroid Build Coastguard Worker- 112*6777b538SAndroid Build Coastguard Worker- // Split into bytes. 113*6777b538SAndroid Build Coastguard Worker- int lo1 = lo >> 8; 114*6777b538SAndroid Build Coastguard Worker- int lo2 = lo & 0xFF; 115*6777b538SAndroid Build Coastguard Worker- int hi1 = hi >> 8; 116*6777b538SAndroid Build Coastguard Worker- int hi2 = hi & 0xFF; 117*6777b538SAndroid Build Coastguard Worker- 118*6777b538SAndroid Build Coastguard Worker- if (lo1 == hi1) { 119*6777b538SAndroid Build Coastguard Worker- // Easy case: high bits are same in both. 120*6777b538SAndroid Build Coastguard Worker- // Only do ASCII case folding on the second byte if the top byte is 00. 121*6777b538SAndroid Build Coastguard Worker- AddUCS2Pair(lo1, lo1, false, lo2, hi2, lo1==0 && foldcase); 122*6777b538SAndroid Build Coastguard Worker- } else { 123*6777b538SAndroid Build Coastguard Worker- // Harder case: different second byte ranges depending on first byte. 124*6777b538SAndroid Build Coastguard Worker- 125*6777b538SAndroid Build Coastguard Worker- // Initial fragment. 126*6777b538SAndroid Build Coastguard Worker- if (lo2 > 0) { 127*6777b538SAndroid Build Coastguard Worker- AddUCS2Pair(lo1, lo1, false, lo2, 0xFF, lo1==0 && foldcase); 128*6777b538SAndroid Build Coastguard Worker- lo1++; 129*6777b538SAndroid Build Coastguard Worker- } 130*6777b538SAndroid Build Coastguard Worker- 131*6777b538SAndroid Build Coastguard Worker- // Trailing fragment. 132*6777b538SAndroid Build Coastguard Worker- if (hi2 < 0xFF) { 133*6777b538SAndroid Build Coastguard Worker- AddUCS2Pair(hi1, hi1, false, 0, hi2, false); 134*6777b538SAndroid Build Coastguard Worker- hi1--; 135*6777b538SAndroid Build Coastguard Worker- } 136*6777b538SAndroid Build Coastguard Worker- 137*6777b538SAndroid Build Coastguard Worker- // Inner ranges. 138*6777b538SAndroid Build Coastguard Worker- if (lo1 <= hi1) { 139*6777b538SAndroid Build Coastguard Worker- AddUCS2Pair(lo1, hi1, false, 0, 0xFF, false); 140*6777b538SAndroid Build Coastguard Worker- } 141*6777b538SAndroid Build Coastguard Worker- } 142*6777b538SAndroid Build Coastguard Worker- 143*6777b538SAndroid Build Coastguard Worker- // Restore reverse setting. 144*6777b538SAndroid Build Coastguard Worker- reversed_ = oldreversed; 145*6777b538SAndroid Build Coastguard Worker- } 146*6777b538SAndroid Build Coastguard Worker- 147*6777b538SAndroid Build Coastguard Worker // Table describing how to make a UTF-8 matching machine 148*6777b538SAndroid Build Coastguard Worker // for the rune range 80-10FFFF (Runeself-Runemax). 149*6777b538SAndroid Build Coastguard Worker // This range happens frequently enough (for example /./ and /[^a-z]/) 150*6777b538SAndroid Build Coastguard Workerre2/compile.cc#17:707,716 - re2/compile.cc#18:627,634 151*6777b538SAndroid Build Coastguard Worker 152*6777b538SAndroid Build Coastguard Worker Frag Compiler::Literal(Rune r, bool foldcase) { 153*6777b538SAndroid Build Coastguard Worker switch (encoding_) { 154*6777b538SAndroid Build Coastguard Worker- default: // UCS-2 or something new 155*6777b538SAndroid Build Coastguard Worker- BeginRange(); 156*6777b538SAndroid Build Coastguard Worker- AddRuneRange(r, r, foldcase); 157*6777b538SAndroid Build Coastguard Worker- return EndRange(); 158*6777b538SAndroid Build Coastguard Worker+ default: 159*6777b538SAndroid Build Coastguard Worker+ return kNullFrag; 160*6777b538SAndroid Build Coastguard Worker 161*6777b538SAndroid Build Coastguard Worker case kEncodingLatin1: 162*6777b538SAndroid Build Coastguard Worker return ByteRange(r, r, foldcase); 163*6777b538SAndroid Build Coastguard Workerre2/compile.cc#17:927,934 - re2/compile.cc#18:845,850 164*6777b538SAndroid Build Coastguard Worker 165*6777b538SAndroid Build Coastguard Worker if (re->parse_flags() & Regexp::Latin1) 166*6777b538SAndroid Build Coastguard Worker c.encoding_ = kEncodingLatin1; 167*6777b538SAndroid Build Coastguard Worker- else if (re->parse_flags() & Regexp::UCS2) 168*6777b538SAndroid Build Coastguard Worker- c.encoding_ = kEncodingUCS2; 169*6777b538SAndroid Build Coastguard Worker c.reversed_ = reversed; 170*6777b538SAndroid Build Coastguard Worker if (max_mem <= 0) { 171*6777b538SAndroid Build Coastguard Worker c.max_inst_ = 100000; // more than enough 172*6777b538SAndroid Build Coastguard Workerre2/compile.cc#17:983,993 - re2/compile.cc#18:899,905 173*6777b538SAndroid Build Coastguard Worker c.prog_->set_start_unanchored(c.prog_->start()); 174*6777b538SAndroid Build Coastguard Worker } else { 175*6777b538SAndroid Build Coastguard Worker Frag dot; 176*6777b538SAndroid Build Coastguard Worker- if (c.encoding_ == kEncodingUCS2) { 177*6777b538SAndroid Build Coastguard Worker- dot = c.Cat(c.ByteRange(0x00, 0xFF, false), c.ByteRange(0x00, 0xFF, false)); 178*6777b538SAndroid Build Coastguard Worker- } else { 179*6777b538SAndroid Build Coastguard Worker- dot = c.ByteRange(0x00, 0xFF, false); 180*6777b538SAndroid Build Coastguard Worker- } 181*6777b538SAndroid Build Coastguard Worker+ dot = c.ByteRange(0x00, 0xFF, false); 182*6777b538SAndroid Build Coastguard Worker Frag dotloop = c.Star(dot, true); 183*6777b538SAndroid Build Coastguard Worker Frag unanchored = c.Cat(dotloop, all); 184*6777b538SAndroid Build Coastguard Worker c.prog_->set_start_unanchored(unanchored.begin); 185*6777b538SAndroid Build Coastguard Worker==== re2/nfa.cc#8 - re2/nfa.cc#9 ==== 186*6777b538SAndroid Build Coastguard Workerre2/nfa.cc#8:426,432 - re2/nfa.cc#9:426,431 187*6777b538SAndroid Build Coastguard Worker const char* bp = context.begin(); 188*6777b538SAndroid Build Coastguard Worker int c = -1; 189*6777b538SAndroid Build Coastguard Worker int wasword = 0; 190*6777b538SAndroid Build Coastguard Worker- bool ucs2 = prog_->flags() & Regexp::UCS2; 191*6777b538SAndroid Build Coastguard Worker 192*6777b538SAndroid Build Coastguard Worker if (text.begin() > context.begin()) { 193*6777b538SAndroid Build Coastguard Worker c = text.begin()[-1] & 0xFF; 194*6777b538SAndroid Build Coastguard Workerre2/nfa.cc#8:492,498 - re2/nfa.cc#9:491,497 195*6777b538SAndroid Build Coastguard Worker // If there's a required first byte for an unanchored search 196*6777b538SAndroid Build Coastguard Worker // and we're not in the middle of any possible matches, 197*6777b538SAndroid Build Coastguard Worker // use memchr to search for the byte quickly. 198*6777b538SAndroid Build Coastguard Worker- if (!ucs2 && !anchored && first_byte_ >= 0 && runq->size() == 0 && 199*6777b538SAndroid Build Coastguard Worker+ if (!anchored && first_byte_ >= 0 && runq->size() == 0 && 200*6777b538SAndroid Build Coastguard Worker p < text.end() && (p[0] & 0xFF) != first_byte_) { 201*6777b538SAndroid Build Coastguard Worker p = reinterpret_cast<const char*>(memchr(p, first_byte_, 202*6777b538SAndroid Build Coastguard Worker text.end() - p)); 203*6777b538SAndroid Build Coastguard Workerre2/nfa.cc#8:505,526 - re2/nfa.cc#9:504,514 204*6777b538SAndroid Build Coastguard Worker flag = Prog::EmptyFlags(context, p); 205*6777b538SAndroid Build Coastguard Worker } 206*6777b538SAndroid Build Coastguard Worker 207*6777b538SAndroid Build Coastguard Worker- // In UCS-2 mode, if we need to start a new thread, 208*6777b538SAndroid Build Coastguard Worker- // make sure to do it on an even boundary. 209*6777b538SAndroid Build Coastguard Worker- if(ucs2 && runq->size() == 0 && 210*6777b538SAndroid Build Coastguard Worker- (p - context.begin()) % 2 && p < text.end()) { 211*6777b538SAndroid Build Coastguard Worker- p++; 212*6777b538SAndroid Build Coastguard Worker- flag = Prog::EmptyFlags(context, p); 213*6777b538SAndroid Build Coastguard Worker- } 214*6777b538SAndroid Build Coastguard Worker- 215*6777b538SAndroid Build Coastguard Worker // Steal match storage (cleared but unused as of yet) 216*6777b538SAndroid Build Coastguard Worker // temporarily to hold match boundaries for new thread. 217*6777b538SAndroid Build Coastguard Worker- // In UCS-2 mode, only start the thread on a 2-byte boundary. 218*6777b538SAndroid Build Coastguard Worker- if(!ucs2 || (p - context.begin()) % 2 == 0) { 219*6777b538SAndroid Build Coastguard Worker- match_[0] = p; 220*6777b538SAndroid Build Coastguard Worker- AddToThreadq(runq, start_, flag, p, match_); 221*6777b538SAndroid Build Coastguard Worker- match_[0] = NULL; 222*6777b538SAndroid Build Coastguard Worker- } 223*6777b538SAndroid Build Coastguard Worker+ match_[0] = p; 224*6777b538SAndroid Build Coastguard Worker+ AddToThreadq(runq, start_, flag, p, match_); 225*6777b538SAndroid Build Coastguard Worker+ match_[0] = NULL; 226*6777b538SAndroid Build Coastguard Worker } 227*6777b538SAndroid Build Coastguard Worker 228*6777b538SAndroid Build Coastguard Worker // If all the threads have died, stop early. 229*6777b538SAndroid Build Coastguard Worker==== re2/parse.cc#22 - re2/parse.cc#23 ==== 230*6777b538SAndroid Build Coastguard Workerre2/parse.cc#22:160,167 - re2/parse.cc#23:160,165 231*6777b538SAndroid Build Coastguard Worker status_(status), stacktop_(NULL), ncap_(0) { 232*6777b538SAndroid Build Coastguard Worker if (flags_ & Latin1) 233*6777b538SAndroid Build Coastguard Worker rune_max_ = 0xFF; 234*6777b538SAndroid Build Coastguard Worker- else if (flags & UCS2) 235*6777b538SAndroid Build Coastguard Worker- rune_max_ = 0xFFFF; 236*6777b538SAndroid Build Coastguard Worker else 237*6777b538SAndroid Build Coastguard Worker rune_max_ = Runemax; 238*6777b538SAndroid Build Coastguard Worker } 239*6777b538SAndroid Build Coastguard Workerre2/parse.cc#22:365,387 - re2/parse.cc#23:363,374 240*6777b538SAndroid Build Coastguard Worker bool Regexp::ParseState::PushCarat() { 241*6777b538SAndroid Build Coastguard Worker if (flags_ & OneLine) { 242*6777b538SAndroid Build Coastguard Worker return PushSimpleOp(kRegexpBeginText); 243*6777b538SAndroid Build Coastguard Worker- } else { 244*6777b538SAndroid Build Coastguard Worker- if (flags_ & UCS2) { 245*6777b538SAndroid Build Coastguard Worker- status_->set_code(kRegexpUnsupported); 246*6777b538SAndroid Build Coastguard Worker- status_->set_error_arg("multiline ^ in UCS-2 mode"); 247*6777b538SAndroid Build Coastguard Worker- return false; 248*6777b538SAndroid Build Coastguard Worker- } 249*6777b538SAndroid Build Coastguard Worker- return PushSimpleOp(kRegexpBeginLine); 250*6777b538SAndroid Build Coastguard Worker } 251*6777b538SAndroid Build Coastguard Worker+ return PushSimpleOp(kRegexpBeginLine); 252*6777b538SAndroid Build Coastguard Worker } 253*6777b538SAndroid Build Coastguard Worker 254*6777b538SAndroid Build Coastguard Worker // Pushes a \b or \B onto the stack. 255*6777b538SAndroid Build Coastguard Worker bool Regexp::ParseState::PushWordBoundary(bool word) { 256*6777b538SAndroid Build Coastguard Worker- if (flags_ & UCS2) { 257*6777b538SAndroid Build Coastguard Worker- status_->set_code(kRegexpUnsupported); 258*6777b538SAndroid Build Coastguard Worker- status_->set_error_arg("\\b or \\B in UCS-2 mode"); 259*6777b538SAndroid Build Coastguard Worker- return false; 260*6777b538SAndroid Build Coastguard Worker- } 261*6777b538SAndroid Build Coastguard Worker if (word) 262*6777b538SAndroid Build Coastguard Worker return PushSimpleOp(kRegexpWordBoundary); 263*6777b538SAndroid Build Coastguard Worker return PushSimpleOp(kRegexpNoWordBoundary); 264*6777b538SAndroid Build Coastguard Workerre2/parse.cc#22:397,407 - re2/parse.cc#23:384,389 265*6777b538SAndroid Build Coastguard Worker bool ret = PushSimpleOp(kRegexpEndText); 266*6777b538SAndroid Build Coastguard Worker flags_ = oflags; 267*6777b538SAndroid Build Coastguard Worker return ret; 268*6777b538SAndroid Build Coastguard Worker- } 269*6777b538SAndroid Build Coastguard Worker- if (flags_ & UCS2) { 270*6777b538SAndroid Build Coastguard Worker- status_->set_code(kRegexpUnsupported); 271*6777b538SAndroid Build Coastguard Worker- status_->set_error_arg("multiline $ in UCS-2 mode"); 272*6777b538SAndroid Build Coastguard Worker- return false; 273*6777b538SAndroid Build Coastguard Worker } 274*6777b538SAndroid Build Coastguard Worker return PushSimpleOp(kRegexpEndLine); 275*6777b538SAndroid Build Coastguard Worker } 276*6777b538SAndroid Build Coastguard Worker==== re2/re2.cc#34 - re2/re2.cc#35 ==== 277*6777b538SAndroid Build Coastguard Workerre2/re2.cc#34:79,86 - re2/re2.cc#35:79,84 278*6777b538SAndroid Build Coastguard Worker return RE2::ErrorBadUTF8; 279*6777b538SAndroid Build Coastguard Worker case re2::kRegexpBadNamedCapture: 280*6777b538SAndroid Build Coastguard Worker return RE2::ErrorBadNamedCapture; 281*6777b538SAndroid Build Coastguard Worker- case re2::kRegexpUnsupported: 282*6777b538SAndroid Build Coastguard Worker- return RE2::ErrorUnsupported; 283*6777b538SAndroid Build Coastguard Worker } 284*6777b538SAndroid Build Coastguard Worker return RE2::ErrorInternal; 285*6777b538SAndroid Build Coastguard Worker } 286*6777b538SAndroid Build Coastguard Workerre2/re2.cc#34:122,130 - re2/re2.cc#35:120,125 287*6777b538SAndroid Build Coastguard Worker break; 288*6777b538SAndroid Build Coastguard Worker case RE2::Options::EncodingLatin1: 289*6777b538SAndroid Build Coastguard Worker flags |= Regexp::Latin1; 290*6777b538SAndroid Build Coastguard Worker- break; 291*6777b538SAndroid Build Coastguard Worker- case RE2::Options::EncodingUCS2: 292*6777b538SAndroid Build Coastguard Worker- flags |= Regexp::UCS2; 293*6777b538SAndroid Build Coastguard Worker break; 294*6777b538SAndroid Build Coastguard Worker } 295*6777b538SAndroid Build Coastguard Worker 296*6777b538SAndroid Build Coastguard Worker==== re2/re2.h#36 - re2/re2.h#37 ==== 297*6777b538SAndroid Build Coastguard Workerre2/re2.h#36:246,252 - re2/re2.h#37:246,251 298*6777b538SAndroid Build Coastguard Worker ErrorBadUTF8, // invalid UTF-8 in regexp 299*6777b538SAndroid Build Coastguard Worker ErrorBadNamedCapture, // bad named capture group 300*6777b538SAndroid Build Coastguard Worker ErrorPatternTooLarge, // pattern too large (compile failed) 301*6777b538SAndroid Build Coastguard Worker- ErrorUnsupported, // unsupported feature (in UCS-2 mode) 302*6777b538SAndroid Build Coastguard Worker }; 303*6777b538SAndroid Build Coastguard Worker 304*6777b538SAndroid Build Coastguard Worker // Predefined common options. 305*6777b538SAndroid Build Coastguard Workerre2/re2.h#36:570,576 - re2/re2.h#37:569,574 306*6777b538SAndroid Build Coastguard Worker 307*6777b538SAndroid Build Coastguard Worker enum Encoding { 308*6777b538SAndroid Build Coastguard Worker EncodingUTF8 = 1, 309*6777b538SAndroid Build Coastguard Worker- EncodingUCS2, // 16-bit Unicode 0-FFFF only 310*6777b538SAndroid Build Coastguard Worker EncodingLatin1 311*6777b538SAndroid Build Coastguard Worker }; 312*6777b538SAndroid Build Coastguard Worker 313*6777b538SAndroid Build Coastguard Worker==== re2/regexp.cc#15 - re2/regexp.cc#16 ==== 314*6777b538SAndroid Build Coastguard Workerre2/regexp.cc#15:324,333 - re2/regexp.cc#16:324,329 315*6777b538SAndroid Build Coastguard Worker // the regexp that remains after the prefix. The prefix might 316*6777b538SAndroid Build Coastguard Worker // be ASCII case-insensitive. 317*6777b538SAndroid Build Coastguard Worker bool Regexp::RequiredPrefix(string *prefix, bool *foldcase, Regexp** suffix) { 318*6777b538SAndroid Build Coastguard Worker- // Don't even bother for UCS-2; it's time to throw that code away. 319*6777b538SAndroid Build Coastguard Worker- if (parse_flags_ & UCS2) 320*6777b538SAndroid Build Coastguard Worker- return false; 321*6777b538SAndroid Build Coastguard Worker- 322*6777b538SAndroid Build Coastguard Worker // No need for a walker: the regexp must be of the form 323*6777b538SAndroid Build Coastguard Worker // 1. some number of ^ anchors 324*6777b538SAndroid Build Coastguard Worker // 2. a literal char or string 325*6777b538SAndroid Build Coastguard Worker==== re2/regexp.h#20 - re2/regexp.h#21 ==== 326*6777b538SAndroid Build Coastguard Workerre2/regexp.h#20:187,193 - re2/regexp.h#21:187,192 327*6777b538SAndroid Build Coastguard Worker kRegexpBadPerlOp, // bad perl operator 328*6777b538SAndroid Build Coastguard Worker kRegexpBadUTF8, // invalid UTF-8 in regexp 329*6777b538SAndroid Build Coastguard Worker kRegexpBadNamedCapture, // bad named capture 330*6777b538SAndroid Build Coastguard Worker- kRegexpUnsupported, // unsupported operator 331*6777b538SAndroid Build Coastguard Worker }; 332*6777b538SAndroid Build Coastguard Worker 333*6777b538SAndroid Build Coastguard Worker // Error status for certain operations. 334*6777b538SAndroid Build Coastguard Workerre2/regexp.h#20:307,316 - re2/regexp.h#21:306,314 335*6777b538SAndroid Build Coastguard Worker // \Q and \E to disable/enable metacharacters 336*6777b538SAndroid Build Coastguard Worker // (?P<name>expr) for named captures 337*6777b538SAndroid Build Coastguard Worker // \C to match any single byte 338*6777b538SAndroid Build Coastguard Worker- UCS2 = 1<<10, // Text is in UCS-2, regexp is in UTF-8. 339*6777b538SAndroid Build Coastguard Worker- UnicodeGroups = 1<<11, // Allow \p{Han} for Unicode Han group 340*6777b538SAndroid Build Coastguard Worker+ UnicodeGroups = 1<<10, // Allow \p{Han} for Unicode Han group 341*6777b538SAndroid Build Coastguard Worker // and \P{Han} for its negation. 342*6777b538SAndroid Build Coastguard Worker- NeverNL = 1<<12, // Never match NL, even if the regexp mentions 343*6777b538SAndroid Build Coastguard Worker+ NeverNL = 1<<11, // Never match NL, even if the regexp mentions 344*6777b538SAndroid Build Coastguard Worker // it explicitly. 345*6777b538SAndroid Build Coastguard Worker 346*6777b538SAndroid Build Coastguard Worker // As close to Perl as we can get. 347*6777b538SAndroid Build Coastguard Worker==== re2/testing/backtrack.cc#4 - re2/testing/backtrack.cc#5 ==== 348*6777b538SAndroid Build Coastguard Workerre2/testing/backtrack.cc#4:134,141 - re2/testing/backtrack.cc#5:134,139 349*6777b538SAndroid Build Coastguard Worker cap_[0] = p; 350*6777b538SAndroid Build Coastguard Worker if (Visit(prog_->start(), p)) // Match must be leftmost; done. 351*6777b538SAndroid Build Coastguard Worker return true; 352*6777b538SAndroid Build Coastguard Worker- if (prog_->flags() & Regexp::UCS2) 353*6777b538SAndroid Build Coastguard Worker- p++; 354*6777b538SAndroid Build Coastguard Worker } 355*6777b538SAndroid Build Coastguard Worker return false; 356*6777b538SAndroid Build Coastguard Worker } 357*6777b538SAndroid Build Coastguard Worker==== re2/testing/tester.cc#12 - re2/testing/tester.cc#13 ==== 358*6777b538SAndroid Build Coastguard Workerre2/testing/tester.cc#12:144,154 - re2/testing/tester.cc#13:144,152 359*6777b538SAndroid Build Coastguard Worker static ParseMode parse_modes[] = { 360*6777b538SAndroid Build Coastguard Worker { single_line, "single-line" }, 361*6777b538SAndroid Build Coastguard Worker { single_line|Regexp::Latin1, "single-line, latin1" }, 362*6777b538SAndroid Build Coastguard Worker- { single_line|Regexp::UCS2, "single-line, ucs2" }, 363*6777b538SAndroid Build Coastguard Worker { multi_line, "multiline" }, 364*6777b538SAndroid Build Coastguard Worker { multi_line|Regexp::NonGreedy, "multiline, nongreedy" }, 365*6777b538SAndroid Build Coastguard Worker { multi_line|Regexp::Latin1, "multiline, latin1" }, 366*6777b538SAndroid Build Coastguard Worker- { multi_line|Regexp::UCS2, "multiline, ucs2" }, 367*6777b538SAndroid Build Coastguard Worker }; 368*6777b538SAndroid Build Coastguard Worker 369*6777b538SAndroid Build Coastguard Worker static string FormatMode(Regexp::ParseFlags flags) { 370*6777b538SAndroid Build Coastguard Workerre2/testing/tester.cc#12:179,189 - re2/testing/tester.cc#13:177,185 371*6777b538SAndroid Build Coastguard Worker RegexpStatus status; 372*6777b538SAndroid Build Coastguard Worker regexp_ = Regexp::Parse(regexp_str, flags, &status); 373*6777b538SAndroid Build Coastguard Worker if (regexp_ == NULL) { 374*6777b538SAndroid Build Coastguard Worker- if (status.code() != kRegexpUnsupported) { 375*6777b538SAndroid Build Coastguard Worker- LOG(INFO) << "Cannot parse: " << CEscape(regexp_str_) 376*6777b538SAndroid Build Coastguard Worker- << " mode: " << FormatMode(flags); 377*6777b538SAndroid Build Coastguard Worker- error_ = true; 378*6777b538SAndroid Build Coastguard Worker- } 379*6777b538SAndroid Build Coastguard Worker+ LOG(INFO) << "Cannot parse: " << CEscape(regexp_str_) 380*6777b538SAndroid Build Coastguard Worker+ << " mode: " << FormatMode(flags); 381*6777b538SAndroid Build Coastguard Worker+ error_ = true; 382*6777b538SAndroid Build Coastguard Worker return; 383*6777b538SAndroid Build Coastguard Worker } 384*6777b538SAndroid Build Coastguard Worker prog_ = regexp_->CompileToProg(0); 385*6777b538SAndroid Build Coastguard Workerre2/testing/tester.cc#12:230,237 - re2/testing/tester.cc#13:226,231 386*6777b538SAndroid Build Coastguard Worker RE2::Options options; 387*6777b538SAndroid Build Coastguard Worker if (flags & Regexp::Latin1) 388*6777b538SAndroid Build Coastguard Worker options.set_encoding(RE2::Options::EncodingLatin1); 389*6777b538SAndroid Build Coastguard Worker- else if (flags & Regexp::UCS2) 390*6777b538SAndroid Build Coastguard Worker- options.set_encoding(RE2::Options::EncodingUCS2); 391*6777b538SAndroid Build Coastguard Worker if (kind_ == Prog::kLongestMatch) 392*6777b538SAndroid Build Coastguard Worker options.set_longest_match(true); 393*6777b538SAndroid Build Coastguard Worker re2_ = new RE2(re, options); 394*6777b538SAndroid Build Coastguard Workerre2/testing/tester.cc#12:281,379 - re2/testing/tester.cc#13:275,280 395*6777b538SAndroid Build Coastguard Worker delete re2_; 396*6777b538SAndroid Build Coastguard Worker } 397*6777b538SAndroid Build Coastguard Worker 398*6777b538SAndroid Build Coastguard Worker- // Converts UTF-8 string in text into UCS-2 string in new_text. 399*6777b538SAndroid Build Coastguard Worker- static bool ConvertUTF8ToUCS2(const StringPiece& text, StringPiece* new_text) { 400*6777b538SAndroid Build Coastguard Worker- const char* p = text.begin(); 401*6777b538SAndroid Build Coastguard Worker- const char* ep = text.end(); 402*6777b538SAndroid Build Coastguard Worker- uint16* q = new uint16[ep - p]; 403*6777b538SAndroid Build Coastguard Worker- uint16* q0 = q; 404*6777b538SAndroid Build Coastguard Worker- 405*6777b538SAndroid Build Coastguard Worker- int n; 406*6777b538SAndroid Build Coastguard Worker- Rune r; 407*6777b538SAndroid Build Coastguard Worker- for (; p < ep; p += n) { 408*6777b538SAndroid Build Coastguard Worker- if (!fullrune(p, ep - p)) { 409*6777b538SAndroid Build Coastguard Worker- delete[] q0; 410*6777b538SAndroid Build Coastguard Worker- return false; 411*6777b538SAndroid Build Coastguard Worker- } 412*6777b538SAndroid Build Coastguard Worker- n = chartorune(&r, p); 413*6777b538SAndroid Build Coastguard Worker- if (r > 0xFFFF) { 414*6777b538SAndroid Build Coastguard Worker- delete[] q0; 415*6777b538SAndroid Build Coastguard Worker- return false; 416*6777b538SAndroid Build Coastguard Worker- } 417*6777b538SAndroid Build Coastguard Worker- *q++ = r; 418*6777b538SAndroid Build Coastguard Worker- } 419*6777b538SAndroid Build Coastguard Worker- *new_text = StringPiece(reinterpret_cast<char*>(q0), 2*(q - q0)); 420*6777b538SAndroid Build Coastguard Worker- return true; 421*6777b538SAndroid Build Coastguard Worker- } 422*6777b538SAndroid Build Coastguard Worker- 423*6777b538SAndroid Build Coastguard Worker- // Rewrites *sp from being a pointer into text8 (UTF-8) 424*6777b538SAndroid Build Coastguard Worker- // to being a pointer into text16 (equivalent text but in UCS-2). 425*6777b538SAndroid Build Coastguard Worker- static void AdjustUTF8ToUCS2(const StringPiece& text8, const StringPiece& text16, 426*6777b538SAndroid Build Coastguard Worker- StringPiece *sp) { 427*6777b538SAndroid Build Coastguard Worker- if (sp->begin() == NULL && text8.begin() != NULL) 428*6777b538SAndroid Build Coastguard Worker- return; 429*6777b538SAndroid Build Coastguard Worker- 430*6777b538SAndroid Build Coastguard Worker- int nrune = 0; 431*6777b538SAndroid Build Coastguard Worker- int n; 432*6777b538SAndroid Build Coastguard Worker- Rune r; 433*6777b538SAndroid Build Coastguard Worker- const char* p = text8.begin(); 434*6777b538SAndroid Build Coastguard Worker- const char* ep = text8.end(); 435*6777b538SAndroid Build Coastguard Worker- const char* spbegin = NULL; 436*6777b538SAndroid Build Coastguard Worker- const char* spend = NULL; 437*6777b538SAndroid Build Coastguard Worker- for (;;) { 438*6777b538SAndroid Build Coastguard Worker- if (p == sp->begin()) 439*6777b538SAndroid Build Coastguard Worker- spbegin = text16.begin() + sizeof(uint16)*nrune; 440*6777b538SAndroid Build Coastguard Worker- if (p == sp->end()) 441*6777b538SAndroid Build Coastguard Worker- spend = text16.begin() + sizeof(uint16)*nrune; 442*6777b538SAndroid Build Coastguard Worker- if (p >= ep) 443*6777b538SAndroid Build Coastguard Worker- break; 444*6777b538SAndroid Build Coastguard Worker- n = chartorune(&r, p); 445*6777b538SAndroid Build Coastguard Worker- p += n; 446*6777b538SAndroid Build Coastguard Worker- nrune++; 447*6777b538SAndroid Build Coastguard Worker- } 448*6777b538SAndroid Build Coastguard Worker- if (spbegin == NULL || spend == NULL) { 449*6777b538SAndroid Build Coastguard Worker- LOG(FATAL) << "Error in AdjustUTF8ToUCS2 " 450*6777b538SAndroid Build Coastguard Worker- << CEscape(text8) << " " 451*6777b538SAndroid Build Coastguard Worker- << (int)(sp->begin() - text8.begin()) << " " 452*6777b538SAndroid Build Coastguard Worker- << (int)(sp->end() - text8.begin()); 453*6777b538SAndroid Build Coastguard Worker- } 454*6777b538SAndroid Build Coastguard Worker- *sp = StringPiece(spbegin, spend - spbegin); 455*6777b538SAndroid Build Coastguard Worker- } 456*6777b538SAndroid Build Coastguard Worker- 457*6777b538SAndroid Build Coastguard Worker- // Rewrites *sp from begin a pointer into text16 (UCS-2) 458*6777b538SAndroid Build Coastguard Worker- // to being a pointer into text8 (equivalent text but in UTF-8). 459*6777b538SAndroid Build Coastguard Worker- static void AdjustUCS2ToUTF8(const StringPiece& text16, const StringPiece& text8, 460*6777b538SAndroid Build Coastguard Worker- StringPiece* sp) { 461*6777b538SAndroid Build Coastguard Worker- if (sp->begin() == NULL) 462*6777b538SAndroid Build Coastguard Worker- return; 463*6777b538SAndroid Build Coastguard Worker- 464*6777b538SAndroid Build Coastguard Worker- int nrune = 0; 465*6777b538SAndroid Build Coastguard Worker- int n; 466*6777b538SAndroid Build Coastguard Worker- Rune r; 467*6777b538SAndroid Build Coastguard Worker- const char* p = text8.begin(); 468*6777b538SAndroid Build Coastguard Worker- const char* ep = text8.end(); 469*6777b538SAndroid Build Coastguard Worker- const char* spbegin = NULL; 470*6777b538SAndroid Build Coastguard Worker- const char* spend = NULL; 471*6777b538SAndroid Build Coastguard Worker- for (;;) { 472*6777b538SAndroid Build Coastguard Worker- if (nrune == (sp->begin() - text16.begin())/2) 473*6777b538SAndroid Build Coastguard Worker- spbegin = p; 474*6777b538SAndroid Build Coastguard Worker- if (nrune == (sp->end() - text16.begin())/2) 475*6777b538SAndroid Build Coastguard Worker- spend = p; 476*6777b538SAndroid Build Coastguard Worker- if (p >= ep) 477*6777b538SAndroid Build Coastguard Worker- break; 478*6777b538SAndroid Build Coastguard Worker- n = chartorune(&r, p); 479*6777b538SAndroid Build Coastguard Worker- p += n; 480*6777b538SAndroid Build Coastguard Worker- nrune++; 481*6777b538SAndroid Build Coastguard Worker- } 482*6777b538SAndroid Build Coastguard Worker- if (text8.begin() != NULL && (spbegin == NULL || spend == NULL)) { 483*6777b538SAndroid Build Coastguard Worker- LOG(FATAL) << "Error in AdjustUCS2ToUTF8 " 484*6777b538SAndroid Build Coastguard Worker- << CEscape(text16) << " " 485*6777b538SAndroid Build Coastguard Worker- << (int)(sp->begin() - text16.begin()) << " " 486*6777b538SAndroid Build Coastguard Worker- << (int)(sp->end() - text16.begin()); 487*6777b538SAndroid Build Coastguard Worker- } 488*6777b538SAndroid Build Coastguard Worker- *sp = StringPiece(spbegin, spend - spbegin); 489*6777b538SAndroid Build Coastguard Worker- } 490*6777b538SAndroid Build Coastguard Worker- 491*6777b538SAndroid Build Coastguard Worker // Runs a single search using the named engine type. 492*6777b538SAndroid Build Coastguard Worker // This interface hides all the irregularities of the various 493*6777b538SAndroid Build Coastguard Worker // engine interfaces from the rest of this file. 494*6777b538SAndroid Build Coastguard Workerre2/testing/tester.cc#12:393,411 - re2/testing/tester.cc#13:294,300 495*6777b538SAndroid Build Coastguard Worker 496*6777b538SAndroid Build Coastguard Worker StringPiece text = orig_text; 497*6777b538SAndroid Build Coastguard Worker StringPiece context = orig_context; 498*6777b538SAndroid Build Coastguard Worker- bool ucs2 = false; 499*6777b538SAndroid Build Coastguard Worker 500*6777b538SAndroid Build Coastguard Worker- if ((flags() & Regexp::UCS2) && type != kEnginePCRE) { 501*6777b538SAndroid Build Coastguard Worker- if (!ConvertUTF8ToUCS2(orig_context, &context)) { 502*6777b538SAndroid Build Coastguard Worker- result->skipped = true; 503*6777b538SAndroid Build Coastguard Worker- return; 504*6777b538SAndroid Build Coastguard Worker- } 505*6777b538SAndroid Build Coastguard Worker- 506*6777b538SAndroid Build Coastguard Worker- // Rewrite context to refer to new text. 507*6777b538SAndroid Build Coastguard Worker- AdjustUTF8ToUCS2(orig_context, context, &text); 508*6777b538SAndroid Build Coastguard Worker- ucs2 = true; 509*6777b538SAndroid Build Coastguard Worker- } 510*6777b538SAndroid Build Coastguard Worker- 511*6777b538SAndroid Build Coastguard Worker switch (type) { 512*6777b538SAndroid Build Coastguard Worker default: 513*6777b538SAndroid Build Coastguard Worker LOG(FATAL) << "Bad RunSearch type: " << (int)type; 514*6777b538SAndroid Build Coastguard Workerre2/testing/tester.cc#12:557,577 - re2/testing/tester.cc#13:446,451 515*6777b538SAndroid Build Coastguard Worker } 516*6777b538SAndroid Build Coastguard Worker } 517*6777b538SAndroid Build Coastguard Worker 518*6777b538SAndroid Build Coastguard Worker- // If we did UCS-2 matching, rewrite the matches to refer 519*6777b538SAndroid Build Coastguard Worker- // to the original UTF-8 text. 520*6777b538SAndroid Build Coastguard Worker- if (ucs2) { 521*6777b538SAndroid Build Coastguard Worker- if (result->matched) { 522*6777b538SAndroid Build Coastguard Worker- if (result->have_submatch0) { 523*6777b538SAndroid Build Coastguard Worker- AdjustUCS2ToUTF8(context, orig_context, &result->submatch[0]); 524*6777b538SAndroid Build Coastguard Worker- } else if (result->have_submatch) { 525*6777b538SAndroid Build Coastguard Worker- for (int i = 0; i < nsubmatch; i++) { 526*6777b538SAndroid Build Coastguard Worker- AdjustUCS2ToUTF8(context, orig_context, &result->submatch[i]); 527*6777b538SAndroid Build Coastguard Worker- } 528*6777b538SAndroid Build Coastguard Worker- } 529*6777b538SAndroid Build Coastguard Worker- } 530*6777b538SAndroid Build Coastguard Worker- delete[] context.begin(); 531*6777b538SAndroid Build Coastguard Worker- } 532*6777b538SAndroid Build Coastguard Worker- 533*6777b538SAndroid Build Coastguard Worker if (!result->matched) 534*6777b538SAndroid Build Coastguard Worker memset(result->submatch, 0, sizeof result->submatch); 535*6777b538SAndroid Build Coastguard Worker } 536*6777b538SAndroid Build Coastguard Workerre2/testing/tester.cc#12:596,617 - re2/testing/tester.cc#13:470,475 537*6777b538SAndroid Build Coastguard Worker return true; 538*6777b538SAndroid Build Coastguard Worker } 539*6777b538SAndroid Build Coastguard Worker 540*6777b538SAndroid Build Coastguard Worker- // Check whether text uses only Unicode points <= 0xFFFF 541*6777b538SAndroid Build Coastguard Worker- // (in the BMP). 542*6777b538SAndroid Build Coastguard Worker- static bool IsBMP(const StringPiece& text) { 543*6777b538SAndroid Build Coastguard Worker- const char* p = text.begin(); 544*6777b538SAndroid Build Coastguard Worker- const char* ep = text.end(); 545*6777b538SAndroid Build Coastguard Worker- while (p < ep) { 546*6777b538SAndroid Build Coastguard Worker- if (!fullrune(p, ep - p)) 547*6777b538SAndroid Build Coastguard Worker- return false; 548*6777b538SAndroid Build Coastguard Worker- Rune r; 549*6777b538SAndroid Build Coastguard Worker- p += chartorune(&r, p); 550*6777b538SAndroid Build Coastguard Worker- if (r > 0xFFFF) 551*6777b538SAndroid Build Coastguard Worker- return false; 552*6777b538SAndroid Build Coastguard Worker- } 553*6777b538SAndroid Build Coastguard Worker- return true; 554*6777b538SAndroid Build Coastguard Worker- } 555*6777b538SAndroid Build Coastguard Worker- 556*6777b538SAndroid Build Coastguard Worker // Runs a single test. 557*6777b538SAndroid Build Coastguard Worker bool TestInstance::RunCase(const StringPiece& text, const StringPiece& context, 558*6777b538SAndroid Build Coastguard Worker Prog::Anchor anchor) { 559*6777b538SAndroid Build Coastguard Workerre2/testing/tester.cc#12:619,625 - re2/testing/tester.cc#13:477,483 560*6777b538SAndroid Build Coastguard Worker Result correct; 561*6777b538SAndroid Build Coastguard Worker RunSearch(kEngineBacktrack, text, context, anchor, &correct); 562*6777b538SAndroid Build Coastguard Worker if (correct.skipped) { 563*6777b538SAndroid Build Coastguard Worker- if (regexp_ == NULL || !IsBMP(context)) // okay to skip in UCS-2 mode 564*6777b538SAndroid Build Coastguard Worker+ if (regexp_ == NULL) 565*6777b538SAndroid Build Coastguard Worker return true; 566*6777b538SAndroid Build Coastguard Worker LOG(ERROR) << "Skipped backtracking! " << CEscape(regexp_str_) 567*6777b538SAndroid Build Coastguard Worker << " " << FormatMode(flags_); 568