xref: /aosp_15_r20/external/cronet/third_party/re2/src/ucs2.diff (revision 6777b5387eb2ff775bb5750e3f5d96f37fb7352b)
1*6777b538SAndroid Build Coastguard WorkerThis is a dump from Google's source control system of the change
2*6777b538SAndroid Build Coastguard Workerthat removed UCS-2 support from RE2.  As the explanation below
3*6777b538SAndroid Build Coastguard Workersays, UCS-2 mode is fundamentally at odds with things like ^ and $,
4*6777b538SAndroid Build Coastguard Workerso it never really worked very well.  But if you are interested in using
5*6777b538SAndroid Build Coastguard Workerit without those operators, it did work for that.  It assumed that the
6*6777b538SAndroid Build Coastguard WorkerUCS-2 data was in the native host byte order.
7*6777b538SAndroid Build Coastguard Worker
8*6777b538SAndroid Build Coastguard WorkerIf you are interested in adding UCS-2 mode back, this patch might
9*6777b538SAndroid Build Coastguard Workerbe a good starting point.
10*6777b538SAndroid Build Coastguard Worker
11*6777b538SAndroid Build Coastguard Worker
12*6777b538SAndroid Build Coastguard WorkerChange 12780686 by rsc@rsc-re2 on 2009/09/16 15:30:15
13*6777b538SAndroid Build Coastguard Worker
14*6777b538SAndroid Build Coastguard Worker	Retire UCS-2 mode.
15*6777b538SAndroid Build Coastguard Worker
16*6777b538SAndroid Build Coastguard Worker	I added it as an experiment for V8, but it
17*6777b538SAndroid Build Coastguard Worker	requires 2-byte lookahead to do completely,
18*6777b538SAndroid Build Coastguard Worker	and RE2 has 1-byte lookahead (enough for UTF-8)
19*6777b538SAndroid Build Coastguard Worker	as a fairly deep fundamental assumption,
20*6777b538SAndroid Build Coastguard Worker	so it did not support ^ or $.
21*6777b538SAndroid Build Coastguard Worker
22*6777b538SAndroid Build Coastguard Worker==== re2/bitstate.cc#2 - re2/bitstate.cc#3 ====
23*6777b538SAndroid Build Coastguard Workerre2/bitstate.cc#2:314,321 - re2/bitstate.cc#3:314,319
24*6777b538SAndroid Build Coastguard Worker      cap_[0] = p;
25*6777b538SAndroid Build Coastguard Worker      if (TrySearch(prog_->start(), p))  // Match must be leftmost; done.
26*6777b538SAndroid Build Coastguard Worker        return true;
27*6777b538SAndroid Build Coastguard Worker-     if (prog_->flags() & Regexp::UCS2)
28*6777b538SAndroid Build Coastguard Worker-       p++;
29*6777b538SAndroid Build Coastguard Worker    }
30*6777b538SAndroid Build Coastguard Worker    return false;
31*6777b538SAndroid Build Coastguard Worker  }
32*6777b538SAndroid Build Coastguard Worker==== re2/compile.cc#17 - re2/compile.cc#18 ====
33*6777b538SAndroid Build Coastguard Workerre2/compile.cc#17:95,101 - re2/compile.cc#18:95,100
34*6777b538SAndroid Build Coastguard Worker  // Input encodings.
35*6777b538SAndroid Build Coastguard Worker  enum Encoding {
36*6777b538SAndroid Build Coastguard Worker    kEncodingUTF8 = 1,  // UTF-8 (0-10FFFF)
37*6777b538SAndroid Build Coastguard Worker-   kEncodingUCS2,     // UCS-2 (0-FFFF), native byte order
38*6777b538SAndroid Build Coastguard Worker    kEncodingLatin1,    // Latin1 (0-FF)
39*6777b538SAndroid Build Coastguard Worker  };
40*6777b538SAndroid Build Coastguard Worker
41*6777b538SAndroid Build Coastguard Workerre2/compile.cc#17:168,176 - re2/compile.cc#18:167,172
42*6777b538SAndroid Build Coastguard Worker    void AddRuneRangeLatin1(Rune lo, Rune hi, bool foldcase);
43*6777b538SAndroid Build Coastguard Worker    void AddRuneRangeUTF8(Rune lo, Rune hi, bool foldcase);
44*6777b538SAndroid Build Coastguard Worker    void Add_80_10ffff();
45*6777b538SAndroid Build Coastguard Worker-   void AddRuneRangeUCS2(Rune lo, Rune hi, bool foldcase);
46*6777b538SAndroid Build Coastguard Worker-   void AddUCS2Pair(uint8 lo1, uint8 hi1, bool fold1,
47*6777b538SAndroid Build Coastguard Worker-                    uint8 lo2, uint8 hi2, bool fold2);
48*6777b538SAndroid Build Coastguard Worker
49*6777b538SAndroid Build Coastguard Worker    // New suffix that matches the byte range lo-hi, then goes to next.
50*6777b538SAndroid Build Coastguard Worker    Inst* RuneByteSuffix(uint8 lo, uint8 hi, bool foldcase, Inst* next);
51*6777b538SAndroid Build Coastguard Workerre2/compile.cc#17:475,481 - re2/compile.cc#18:471,477
52*6777b538SAndroid Build Coastguard Worker
53*6777b538SAndroid Build Coastguard Worker  // Converts rune range lo-hi into a fragment that recognizes
54*6777b538SAndroid Build Coastguard Worker  // the bytes that would make up those runes in the current
55*6777b538SAndroid Build Coastguard Worker- // encoding (Latin 1, UTF-8, or UCS-2).
56*6777b538SAndroid Build Coastguard Worker+ // encoding (Latin 1 or UTF-8).
57*6777b538SAndroid Build Coastguard Worker  // This lets the machine work byte-by-byte even when
58*6777b538SAndroid Build Coastguard Worker  // using multibyte encodings.
59*6777b538SAndroid Build Coastguard Worker
60*6777b538SAndroid Build Coastguard Workerre2/compile.cc#17:488,496 - re2/compile.cc#18:484,489
61*6777b538SAndroid Build Coastguard Worker      case kEncodingLatin1:
62*6777b538SAndroid Build Coastguard Worker        AddRuneRangeLatin1(lo, hi, foldcase);
63*6777b538SAndroid Build Coastguard Worker        break;
64*6777b538SAndroid Build Coastguard Worker-     case kEncodingUCS2:
65*6777b538SAndroid Build Coastguard Worker-       AddRuneRangeUCS2(lo, hi, foldcase);
66*6777b538SAndroid Build Coastguard Worker-       break;
67*6777b538SAndroid Build Coastguard Worker    }
68*6777b538SAndroid Build Coastguard Worker  }
69*6777b538SAndroid Build Coastguard Worker
70*6777b538SAndroid Build Coastguard Workerre2/compile.cc#17:503,581 - re2/compile.cc#18:496,501
71*6777b538SAndroid Build Coastguard Worker    AddSuffix(RuneByteSuffix(lo, hi, foldcase, NULL));
72*6777b538SAndroid Build Coastguard Worker  }
73*6777b538SAndroid Build Coastguard Worker
74*6777b538SAndroid Build Coastguard Worker- // Test whether 16-bit values are big or little endian.
75*6777b538SAndroid Build Coastguard Worker- static bool BigEndian() {
76*6777b538SAndroid Build Coastguard Worker-   union {
77*6777b538SAndroid Build Coastguard Worker-     char byte[2];
78*6777b538SAndroid Build Coastguard Worker-     int16 endian;
79*6777b538SAndroid Build Coastguard Worker-   } u;
80*6777b538SAndroid Build Coastguard Worker-
81*6777b538SAndroid Build Coastguard Worker-   u.byte[0] = 1;
82*6777b538SAndroid Build Coastguard Worker-   u.byte[1] = 2;
83*6777b538SAndroid Build Coastguard Worker-   return u.endian == 0x0102;
84*6777b538SAndroid Build Coastguard Worker- }
85*6777b538SAndroid Build Coastguard Worker-
86*6777b538SAndroid Build Coastguard Worker- void Compiler::AddUCS2Pair(uint8 lo1, uint8 hi1, bool fold1,
87*6777b538SAndroid Build Coastguard Worker-                            uint8 lo2, uint8 hi2, bool fold2) {
88*6777b538SAndroid Build Coastguard Worker-   Inst* ip;
89*6777b538SAndroid Build Coastguard Worker-   if (reversed_) {
90*6777b538SAndroid Build Coastguard Worker-     ip = RuneByteSuffix(lo1, hi1, fold1, NULL);
91*6777b538SAndroid Build Coastguard Worker-     ip = RuneByteSuffix(lo2, hi2, fold2, ip);
92*6777b538SAndroid Build Coastguard Worker-   } else {
93*6777b538SAndroid Build Coastguard Worker-     ip = RuneByteSuffix(lo2, hi2, fold2, NULL);
94*6777b538SAndroid Build Coastguard Worker-     ip = RuneByteSuffix(lo1, hi1, fold1, ip);
95*6777b538SAndroid Build Coastguard Worker-   }
96*6777b538SAndroid Build Coastguard Worker-   AddSuffix(ip);
97*6777b538SAndroid Build Coastguard Worker- }
98*6777b538SAndroid Build Coastguard Worker-
99*6777b538SAndroid Build Coastguard Worker- void Compiler::AddRuneRangeUCS2(Rune lo, Rune hi, bool foldcase) {
100*6777b538SAndroid Build Coastguard Worker-   if (lo > hi || lo > 0xFFFF)
101*6777b538SAndroid Build Coastguard Worker-     return;
102*6777b538SAndroid Build Coastguard Worker-   if (hi > 0xFFFF)
103*6777b538SAndroid Build Coastguard Worker-     hi = 0xFFFF;
104*6777b538SAndroid Build Coastguard Worker-
105*6777b538SAndroid Build Coastguard Worker-   // We'll assemble a pattern assuming big endian.
106*6777b538SAndroid Build Coastguard Worker-   // If the machine isn't, tell Cat to reverse its arguments.
107*6777b538SAndroid Build Coastguard Worker-   bool oldreversed = reversed_;
108*6777b538SAndroid Build Coastguard Worker-   if (!BigEndian()) {
109*6777b538SAndroid Build Coastguard Worker-     reversed_ = !oldreversed;
110*6777b538SAndroid Build Coastguard Worker-   }
111*6777b538SAndroid Build Coastguard Worker-
112*6777b538SAndroid Build Coastguard Worker-   // Split into bytes.
113*6777b538SAndroid Build Coastguard Worker-   int lo1 = lo >> 8;
114*6777b538SAndroid Build Coastguard Worker-   int lo2 = lo & 0xFF;
115*6777b538SAndroid Build Coastguard Worker-   int hi1 = hi >> 8;
116*6777b538SAndroid Build Coastguard Worker-   int hi2 = hi & 0xFF;
117*6777b538SAndroid Build Coastguard Worker-
118*6777b538SAndroid Build Coastguard Worker-   if (lo1 == hi1) {
119*6777b538SAndroid Build Coastguard Worker-     // Easy case: high bits are same in both.
120*6777b538SAndroid Build Coastguard Worker-     // Only do ASCII case folding on the second byte if the top byte is 00.
121*6777b538SAndroid Build Coastguard Worker-     AddUCS2Pair(lo1, lo1, false, lo2, hi2, lo1==0 && foldcase);
122*6777b538SAndroid Build Coastguard Worker-   } else {
123*6777b538SAndroid Build Coastguard Worker-     // Harder case: different second byte ranges depending on first byte.
124*6777b538SAndroid Build Coastguard Worker-
125*6777b538SAndroid Build Coastguard Worker-     // Initial fragment.
126*6777b538SAndroid Build Coastguard Worker-     if (lo2 > 0) {
127*6777b538SAndroid Build Coastguard Worker-       AddUCS2Pair(lo1, lo1, false, lo2, 0xFF, lo1==0 && foldcase);
128*6777b538SAndroid Build Coastguard Worker-       lo1++;
129*6777b538SAndroid Build Coastguard Worker-     }
130*6777b538SAndroid Build Coastguard Worker-
131*6777b538SAndroid Build Coastguard Worker-     // Trailing fragment.
132*6777b538SAndroid Build Coastguard Worker-     if (hi2 < 0xFF) {
133*6777b538SAndroid Build Coastguard Worker-       AddUCS2Pair(hi1, hi1, false, 0, hi2, false);
134*6777b538SAndroid Build Coastguard Worker-       hi1--;
135*6777b538SAndroid Build Coastguard Worker-     }
136*6777b538SAndroid Build Coastguard Worker-
137*6777b538SAndroid Build Coastguard Worker-     // Inner ranges.
138*6777b538SAndroid Build Coastguard Worker-     if (lo1 <= hi1) {
139*6777b538SAndroid Build Coastguard Worker-       AddUCS2Pair(lo1, hi1, false, 0, 0xFF, false);
140*6777b538SAndroid Build Coastguard Worker-     }
141*6777b538SAndroid Build Coastguard Worker-   }
142*6777b538SAndroid Build Coastguard Worker-
143*6777b538SAndroid Build Coastguard Worker-   // Restore reverse setting.
144*6777b538SAndroid Build Coastguard Worker-   reversed_ = oldreversed;
145*6777b538SAndroid Build Coastguard Worker- }
146*6777b538SAndroid Build Coastguard Worker-
147*6777b538SAndroid Build Coastguard Worker  // Table describing how to make a UTF-8 matching machine
148*6777b538SAndroid Build Coastguard Worker  // for the rune range 80-10FFFF (Runeself-Runemax).
149*6777b538SAndroid Build Coastguard Worker  // This range happens frequently enough (for example /./ and /[^a-z]/)
150*6777b538SAndroid Build Coastguard Workerre2/compile.cc#17:707,716 - re2/compile.cc#18:627,634
151*6777b538SAndroid Build Coastguard Worker
152*6777b538SAndroid Build Coastguard Worker  Frag Compiler::Literal(Rune r, bool foldcase) {
153*6777b538SAndroid Build Coastguard Worker    switch (encoding_) {
154*6777b538SAndroid Build Coastguard Worker-     default:  // UCS-2 or something new
155*6777b538SAndroid Build Coastguard Worker-       BeginRange();
156*6777b538SAndroid Build Coastguard Worker-       AddRuneRange(r, r, foldcase);
157*6777b538SAndroid Build Coastguard Worker-       return EndRange();
158*6777b538SAndroid Build Coastguard Worker+     default:
159*6777b538SAndroid Build Coastguard Worker+       return kNullFrag;
160*6777b538SAndroid Build Coastguard Worker
161*6777b538SAndroid Build Coastguard Worker      case kEncodingLatin1:
162*6777b538SAndroid Build Coastguard Worker        return ByteRange(r, r, foldcase);
163*6777b538SAndroid Build Coastguard Workerre2/compile.cc#17:927,934 - re2/compile.cc#18:845,850
164*6777b538SAndroid Build Coastguard Worker
165*6777b538SAndroid Build Coastguard Worker    if (re->parse_flags() & Regexp::Latin1)
166*6777b538SAndroid Build Coastguard Worker      c.encoding_ = kEncodingLatin1;
167*6777b538SAndroid Build Coastguard Worker-   else if (re->parse_flags() & Regexp::UCS2)
168*6777b538SAndroid Build Coastguard Worker-     c.encoding_ = kEncodingUCS2;
169*6777b538SAndroid Build Coastguard Worker    c.reversed_ = reversed;
170*6777b538SAndroid Build Coastguard Worker    if (max_mem <= 0) {
171*6777b538SAndroid Build Coastguard Worker      c.max_inst_ = 100000;  // more than enough
172*6777b538SAndroid Build Coastguard Workerre2/compile.cc#17:983,993 - re2/compile.cc#18:899,905
173*6777b538SAndroid Build Coastguard Worker      c.prog_->set_start_unanchored(c.prog_->start());
174*6777b538SAndroid Build Coastguard Worker    } else {
175*6777b538SAndroid Build Coastguard Worker      Frag dot;
176*6777b538SAndroid Build Coastguard Worker-     if (c.encoding_ == kEncodingUCS2) {
177*6777b538SAndroid Build Coastguard Worker-       dot = c.Cat(c.ByteRange(0x00, 0xFF, false), c.ByteRange(0x00, 0xFF, false));
178*6777b538SAndroid Build Coastguard Worker-     } else {
179*6777b538SAndroid Build Coastguard Worker-       dot = c.ByteRange(0x00, 0xFF, false);
180*6777b538SAndroid Build Coastguard Worker-     }
181*6777b538SAndroid Build Coastguard Worker+     dot = c.ByteRange(0x00, 0xFF, false);
182*6777b538SAndroid Build Coastguard Worker      Frag dotloop = c.Star(dot, true);
183*6777b538SAndroid Build Coastguard Worker      Frag unanchored = c.Cat(dotloop, all);
184*6777b538SAndroid Build Coastguard Worker      c.prog_->set_start_unanchored(unanchored.begin);
185*6777b538SAndroid Build Coastguard Worker==== re2/nfa.cc#8 - re2/nfa.cc#9 ====
186*6777b538SAndroid Build Coastguard Workerre2/nfa.cc#8:426,432 - re2/nfa.cc#9:426,431
187*6777b538SAndroid Build Coastguard Worker    const char* bp = context.begin();
188*6777b538SAndroid Build Coastguard Worker    int c = -1;
189*6777b538SAndroid Build Coastguard Worker    int wasword = 0;
190*6777b538SAndroid Build Coastguard Worker-   bool ucs2 = prog_->flags() & Regexp::UCS2;
191*6777b538SAndroid Build Coastguard Worker
192*6777b538SAndroid Build Coastguard Worker    if (text.begin() > context.begin()) {
193*6777b538SAndroid Build Coastguard Worker      c = text.begin()[-1] & 0xFF;
194*6777b538SAndroid Build Coastguard Workerre2/nfa.cc#8:492,498 - re2/nfa.cc#9:491,497
195*6777b538SAndroid Build Coastguard Worker        // If there's a required first byte for an unanchored search
196*6777b538SAndroid Build Coastguard Worker        // and we're not in the middle of any possible matches,
197*6777b538SAndroid Build Coastguard Worker        // use memchr to search for the byte quickly.
198*6777b538SAndroid Build Coastguard Worker-       if (!ucs2 && !anchored && first_byte_ >= 0 && runq->size() == 0 &&
199*6777b538SAndroid Build Coastguard Worker+       if (!anchored && first_byte_ >= 0 && runq->size() == 0 &&
200*6777b538SAndroid Build Coastguard Worker            p < text.end() && (p[0] & 0xFF) != first_byte_) {
201*6777b538SAndroid Build Coastguard Worker          p = reinterpret_cast<const char*>(memchr(p, first_byte_,
202*6777b538SAndroid Build Coastguard Worker                                                   text.end() - p));
203*6777b538SAndroid Build Coastguard Workerre2/nfa.cc#8:505,526 - re2/nfa.cc#9:504,514
204*6777b538SAndroid Build Coastguard Worker          flag = Prog::EmptyFlags(context, p);
205*6777b538SAndroid Build Coastguard Worker        }
206*6777b538SAndroid Build Coastguard Worker
207*6777b538SAndroid Build Coastguard Worker-       // In UCS-2 mode, if we need to start a new thread,
208*6777b538SAndroid Build Coastguard Worker-       // make sure to do it on an even boundary.
209*6777b538SAndroid Build Coastguard Worker-       if(ucs2 && runq->size() == 0 &&
210*6777b538SAndroid Build Coastguard Worker-           (p - context.begin()) % 2 && p < text.end()) {
211*6777b538SAndroid Build Coastguard Worker-         p++;
212*6777b538SAndroid Build Coastguard Worker-         flag = Prog::EmptyFlags(context, p);
213*6777b538SAndroid Build Coastguard Worker-       }
214*6777b538SAndroid Build Coastguard Worker-
215*6777b538SAndroid Build Coastguard Worker        // Steal match storage (cleared but unused as of yet)
216*6777b538SAndroid Build Coastguard Worker        // temporarily to hold match boundaries for new thread.
217*6777b538SAndroid Build Coastguard Worker-       // In UCS-2 mode, only start the thread on a 2-byte boundary.
218*6777b538SAndroid Build Coastguard Worker-       if(!ucs2 || (p - context.begin()) % 2 == 0) {
219*6777b538SAndroid Build Coastguard Worker-         match_[0] = p;
220*6777b538SAndroid Build Coastguard Worker-         AddToThreadq(runq, start_, flag, p, match_);
221*6777b538SAndroid Build Coastguard Worker-         match_[0] = NULL;
222*6777b538SAndroid Build Coastguard Worker-       }
223*6777b538SAndroid Build Coastguard Worker+       match_[0] = p;
224*6777b538SAndroid Build Coastguard Worker+       AddToThreadq(runq, start_, flag, p, match_);
225*6777b538SAndroid Build Coastguard Worker+       match_[0] = NULL;
226*6777b538SAndroid Build Coastguard Worker      }
227*6777b538SAndroid Build Coastguard Worker
228*6777b538SAndroid Build Coastguard Worker      // If all the threads have died, stop early.
229*6777b538SAndroid Build Coastguard Worker==== re2/parse.cc#22 - re2/parse.cc#23 ====
230*6777b538SAndroid Build Coastguard Workerre2/parse.cc#22:160,167 - re2/parse.cc#23:160,165
231*6777b538SAndroid Build Coastguard Worker      status_(status), stacktop_(NULL), ncap_(0) {
232*6777b538SAndroid Build Coastguard Worker    if (flags_ & Latin1)
233*6777b538SAndroid Build Coastguard Worker      rune_max_ = 0xFF;
234*6777b538SAndroid Build Coastguard Worker-   else if (flags & UCS2)
235*6777b538SAndroid Build Coastguard Worker-     rune_max_ = 0xFFFF;
236*6777b538SAndroid Build Coastguard Worker    else
237*6777b538SAndroid Build Coastguard Worker      rune_max_ = Runemax;
238*6777b538SAndroid Build Coastguard Worker  }
239*6777b538SAndroid Build Coastguard Workerre2/parse.cc#22:365,387 - re2/parse.cc#23:363,374
240*6777b538SAndroid Build Coastguard Worker  bool Regexp::ParseState::PushCarat() {
241*6777b538SAndroid Build Coastguard Worker    if (flags_ & OneLine) {
242*6777b538SAndroid Build Coastguard Worker      return PushSimpleOp(kRegexpBeginText);
243*6777b538SAndroid Build Coastguard Worker-   } else {
244*6777b538SAndroid Build Coastguard Worker-     if (flags_ & UCS2) {
245*6777b538SAndroid Build Coastguard Worker-       status_->set_code(kRegexpUnsupported);
246*6777b538SAndroid Build Coastguard Worker-       status_->set_error_arg("multiline ^ in UCS-2 mode");
247*6777b538SAndroid Build Coastguard Worker-       return false;
248*6777b538SAndroid Build Coastguard Worker-     }
249*6777b538SAndroid Build Coastguard Worker-     return PushSimpleOp(kRegexpBeginLine);
250*6777b538SAndroid Build Coastguard Worker    }
251*6777b538SAndroid Build Coastguard Worker+   return PushSimpleOp(kRegexpBeginLine);
252*6777b538SAndroid Build Coastguard Worker  }
253*6777b538SAndroid Build Coastguard Worker
254*6777b538SAndroid Build Coastguard Worker  // Pushes a \b or \B onto the stack.
255*6777b538SAndroid Build Coastguard Worker  bool Regexp::ParseState::PushWordBoundary(bool word) {
256*6777b538SAndroid Build Coastguard Worker-   if (flags_ & UCS2) {
257*6777b538SAndroid Build Coastguard Worker-     status_->set_code(kRegexpUnsupported);
258*6777b538SAndroid Build Coastguard Worker-     status_->set_error_arg("\\b or \\B in UCS-2 mode");
259*6777b538SAndroid Build Coastguard Worker-     return false;
260*6777b538SAndroid Build Coastguard Worker-   }
261*6777b538SAndroid Build Coastguard Worker    if (word)
262*6777b538SAndroid Build Coastguard Worker      return PushSimpleOp(kRegexpWordBoundary);
263*6777b538SAndroid Build Coastguard Worker    return PushSimpleOp(kRegexpNoWordBoundary);
264*6777b538SAndroid Build Coastguard Workerre2/parse.cc#22:397,407 - re2/parse.cc#23:384,389
265*6777b538SAndroid Build Coastguard Worker      bool ret = PushSimpleOp(kRegexpEndText);
266*6777b538SAndroid Build Coastguard Worker      flags_ = oflags;
267*6777b538SAndroid Build Coastguard Worker      return ret;
268*6777b538SAndroid Build Coastguard Worker-   }
269*6777b538SAndroid Build Coastguard Worker-   if (flags_ & UCS2) {
270*6777b538SAndroid Build Coastguard Worker-     status_->set_code(kRegexpUnsupported);
271*6777b538SAndroid Build Coastguard Worker-     status_->set_error_arg("multiline $ in UCS-2 mode");
272*6777b538SAndroid Build Coastguard Worker-     return false;
273*6777b538SAndroid Build Coastguard Worker    }
274*6777b538SAndroid Build Coastguard Worker    return PushSimpleOp(kRegexpEndLine);
275*6777b538SAndroid Build Coastguard Worker  }
276*6777b538SAndroid Build Coastguard Worker==== re2/re2.cc#34 - re2/re2.cc#35 ====
277*6777b538SAndroid Build Coastguard Workerre2/re2.cc#34:79,86 - re2/re2.cc#35:79,84
278*6777b538SAndroid Build Coastguard Worker        return RE2::ErrorBadUTF8;
279*6777b538SAndroid Build Coastguard Worker      case re2::kRegexpBadNamedCapture:
280*6777b538SAndroid Build Coastguard Worker        return RE2::ErrorBadNamedCapture;
281*6777b538SAndroid Build Coastguard Worker-     case re2::kRegexpUnsupported:
282*6777b538SAndroid Build Coastguard Worker-       return RE2::ErrorUnsupported;
283*6777b538SAndroid Build Coastguard Worker    }
284*6777b538SAndroid Build Coastguard Worker    return RE2::ErrorInternal;
285*6777b538SAndroid Build Coastguard Worker  }
286*6777b538SAndroid Build Coastguard Workerre2/re2.cc#34:122,130 - re2/re2.cc#35:120,125
287*6777b538SAndroid Build Coastguard Worker        break;
288*6777b538SAndroid Build Coastguard Worker      case RE2::Options::EncodingLatin1:
289*6777b538SAndroid Build Coastguard Worker        flags |= Regexp::Latin1;
290*6777b538SAndroid Build Coastguard Worker-       break;
291*6777b538SAndroid Build Coastguard Worker-     case RE2::Options::EncodingUCS2:
292*6777b538SAndroid Build Coastguard Worker-       flags |= Regexp::UCS2;
293*6777b538SAndroid Build Coastguard Worker        break;
294*6777b538SAndroid Build Coastguard Worker    }
295*6777b538SAndroid Build Coastguard Worker
296*6777b538SAndroid Build Coastguard Worker==== re2/re2.h#36 - re2/re2.h#37 ====
297*6777b538SAndroid Build Coastguard Workerre2/re2.h#36:246,252 - re2/re2.h#37:246,251
298*6777b538SAndroid Build Coastguard Worker      ErrorBadUTF8,            // invalid UTF-8 in regexp
299*6777b538SAndroid Build Coastguard Worker      ErrorBadNamedCapture,    // bad named capture group
300*6777b538SAndroid Build Coastguard Worker      ErrorPatternTooLarge,    // pattern too large (compile failed)
301*6777b538SAndroid Build Coastguard Worker-     ErrorUnsupported,        // unsupported feature (in UCS-2 mode)
302*6777b538SAndroid Build Coastguard Worker    };
303*6777b538SAndroid Build Coastguard Worker
304*6777b538SAndroid Build Coastguard Worker    // Predefined common options.
305*6777b538SAndroid Build Coastguard Workerre2/re2.h#36:570,576 - re2/re2.h#37:569,574
306*6777b538SAndroid Build Coastguard Worker
307*6777b538SAndroid Build Coastguard Worker      enum Encoding {
308*6777b538SAndroid Build Coastguard Worker        EncodingUTF8 = 1,
309*6777b538SAndroid Build Coastguard Worker-       EncodingUCS2,      // 16-bit Unicode 0-FFFF only
310*6777b538SAndroid Build Coastguard Worker        EncodingLatin1
311*6777b538SAndroid Build Coastguard Worker      };
312*6777b538SAndroid Build Coastguard Worker
313*6777b538SAndroid Build Coastguard Worker==== re2/regexp.cc#15 - re2/regexp.cc#16 ====
314*6777b538SAndroid Build Coastguard Workerre2/regexp.cc#15:324,333 - re2/regexp.cc#16:324,329
315*6777b538SAndroid Build Coastguard Worker  // the regexp that remains after the prefix.  The prefix might
316*6777b538SAndroid Build Coastguard Worker  // be ASCII case-insensitive.
317*6777b538SAndroid Build Coastguard Worker  bool Regexp::RequiredPrefix(string *prefix, bool *foldcase, Regexp** suffix) {
318*6777b538SAndroid Build Coastguard Worker-   // Don't even bother for UCS-2; it's time to throw that code away.
319*6777b538SAndroid Build Coastguard Worker-   if (parse_flags_ & UCS2)
320*6777b538SAndroid Build Coastguard Worker-     return false;
321*6777b538SAndroid Build Coastguard Worker-
322*6777b538SAndroid Build Coastguard Worker    // No need for a walker: the regexp must be of the form
323*6777b538SAndroid Build Coastguard Worker    // 1. some number of ^ anchors
324*6777b538SAndroid Build Coastguard Worker    // 2. a literal char or string
325*6777b538SAndroid Build Coastguard Worker==== re2/regexp.h#20 - re2/regexp.h#21 ====
326*6777b538SAndroid Build Coastguard Workerre2/regexp.h#20:187,193 - re2/regexp.h#21:187,192
327*6777b538SAndroid Build Coastguard Worker    kRegexpBadPerlOp,          // bad perl operator
328*6777b538SAndroid Build Coastguard Worker    kRegexpBadUTF8,            // invalid UTF-8 in regexp
329*6777b538SAndroid Build Coastguard Worker    kRegexpBadNamedCapture,    // bad named capture
330*6777b538SAndroid Build Coastguard Worker-   kRegexpUnsupported,        // unsupported operator
331*6777b538SAndroid Build Coastguard Worker  };
332*6777b538SAndroid Build Coastguard Worker
333*6777b538SAndroid Build Coastguard Worker  // Error status for certain operations.
334*6777b538SAndroid Build Coastguard Workerre2/regexp.h#20:307,316 - re2/regexp.h#21:306,314
335*6777b538SAndroid Build Coastguard Worker                             //   \Q and \E to disable/enable metacharacters
336*6777b538SAndroid Build Coastguard Worker                             //   (?P<name>expr) for named captures
337*6777b538SAndroid Build Coastguard Worker                             //   \C to match any single byte
338*6777b538SAndroid Build Coastguard Worker-     UCS2         = 1<<10,  // Text is in UCS-2, regexp is in UTF-8.
339*6777b538SAndroid Build Coastguard Worker-     UnicodeGroups = 1<<11, // Allow \p{Han} for Unicode Han group
340*6777b538SAndroid Build Coastguard Worker+     UnicodeGroups = 1<<10, // Allow \p{Han} for Unicode Han group
341*6777b538SAndroid Build Coastguard Worker                             //   and \P{Han} for its negation.
342*6777b538SAndroid Build Coastguard Worker-     NeverNL      = 1<<12,  // Never match NL, even if the regexp mentions
343*6777b538SAndroid Build Coastguard Worker+     NeverNL      = 1<<11,  // Never match NL, even if the regexp mentions
344*6777b538SAndroid Build Coastguard Worker                             //   it explicitly.
345*6777b538SAndroid Build Coastguard Worker
346*6777b538SAndroid Build Coastguard Worker      // As close to Perl as we can get.
347*6777b538SAndroid Build Coastguard Worker==== re2/testing/backtrack.cc#4 - re2/testing/backtrack.cc#5 ====
348*6777b538SAndroid Build Coastguard Workerre2/testing/backtrack.cc#4:134,141 - re2/testing/backtrack.cc#5:134,139
349*6777b538SAndroid Build Coastguard Worker      cap_[0] = p;
350*6777b538SAndroid Build Coastguard Worker      if (Visit(prog_->start(), p))  // Match must be leftmost; done.
351*6777b538SAndroid Build Coastguard Worker        return true;
352*6777b538SAndroid Build Coastguard Worker-     if (prog_->flags() & Regexp::UCS2)
353*6777b538SAndroid Build Coastguard Worker-       p++;
354*6777b538SAndroid Build Coastguard Worker    }
355*6777b538SAndroid Build Coastguard Worker    return false;
356*6777b538SAndroid Build Coastguard Worker  }
357*6777b538SAndroid Build Coastguard Worker==== re2/testing/tester.cc#12 - re2/testing/tester.cc#13 ====
358*6777b538SAndroid Build Coastguard Workerre2/testing/tester.cc#12:144,154 - re2/testing/tester.cc#13:144,152
359*6777b538SAndroid Build Coastguard Worker  static ParseMode parse_modes[] = {
360*6777b538SAndroid Build Coastguard Worker    { single_line,                   "single-line"          },
361*6777b538SAndroid Build Coastguard Worker    { single_line|Regexp::Latin1,    "single-line, latin1"  },
362*6777b538SAndroid Build Coastguard Worker-   { single_line|Regexp::UCS2,     "single-line, ucs2"   },
363*6777b538SAndroid Build Coastguard Worker    { multi_line,                    "multiline"            },
364*6777b538SAndroid Build Coastguard Worker    { multi_line|Regexp::NonGreedy,  "multiline, nongreedy" },
365*6777b538SAndroid Build Coastguard Worker    { multi_line|Regexp::Latin1,     "multiline, latin1"    },
366*6777b538SAndroid Build Coastguard Worker-   { multi_line|Regexp::UCS2,      "multiline, ucs2"     },
367*6777b538SAndroid Build Coastguard Worker  };
368*6777b538SAndroid Build Coastguard Worker
369*6777b538SAndroid Build Coastguard Worker  static string FormatMode(Regexp::ParseFlags flags) {
370*6777b538SAndroid Build Coastguard Workerre2/testing/tester.cc#12:179,189 - re2/testing/tester.cc#13:177,185
371*6777b538SAndroid Build Coastguard Worker    RegexpStatus status;
372*6777b538SAndroid Build Coastguard Worker    regexp_ = Regexp::Parse(regexp_str, flags, &status);
373*6777b538SAndroid Build Coastguard Worker    if (regexp_ == NULL) {
374*6777b538SAndroid Build Coastguard Worker-     if (status.code() != kRegexpUnsupported) {
375*6777b538SAndroid Build Coastguard Worker-       LOG(INFO) << "Cannot parse: " << CEscape(regexp_str_)
376*6777b538SAndroid Build Coastguard Worker-                 << " mode: " << FormatMode(flags);
377*6777b538SAndroid Build Coastguard Worker-       error_ = true;
378*6777b538SAndroid Build Coastguard Worker-     }
379*6777b538SAndroid Build Coastguard Worker+     LOG(INFO) << "Cannot parse: " << CEscape(regexp_str_)
380*6777b538SAndroid Build Coastguard Worker+               << " mode: " << FormatMode(flags);
381*6777b538SAndroid Build Coastguard Worker+     error_ = true;
382*6777b538SAndroid Build Coastguard Worker      return;
383*6777b538SAndroid Build Coastguard Worker    }
384*6777b538SAndroid Build Coastguard Worker    prog_ = regexp_->CompileToProg(0);
385*6777b538SAndroid Build Coastguard Workerre2/testing/tester.cc#12:230,237 - re2/testing/tester.cc#13:226,231
386*6777b538SAndroid Build Coastguard Worker      RE2::Options options;
387*6777b538SAndroid Build Coastguard Worker      if (flags & Regexp::Latin1)
388*6777b538SAndroid Build Coastguard Worker        options.set_encoding(RE2::Options::EncodingLatin1);
389*6777b538SAndroid Build Coastguard Worker-     else if (flags & Regexp::UCS2)
390*6777b538SAndroid Build Coastguard Worker-       options.set_encoding(RE2::Options::EncodingUCS2);
391*6777b538SAndroid Build Coastguard Worker      if (kind_ == Prog::kLongestMatch)
392*6777b538SAndroid Build Coastguard Worker        options.set_longest_match(true);
393*6777b538SAndroid Build Coastguard Worker      re2_ = new RE2(re, options);
394*6777b538SAndroid Build Coastguard Workerre2/testing/tester.cc#12:281,379 - re2/testing/tester.cc#13:275,280
395*6777b538SAndroid Build Coastguard Worker      delete re2_;
396*6777b538SAndroid Build Coastguard Worker  }
397*6777b538SAndroid Build Coastguard Worker
398*6777b538SAndroid Build Coastguard Worker- // Converts UTF-8 string in text into UCS-2 string in new_text.
399*6777b538SAndroid Build Coastguard Worker- static bool ConvertUTF8ToUCS2(const StringPiece& text, StringPiece* new_text) {
400*6777b538SAndroid Build Coastguard Worker-   const char* p = text.begin();
401*6777b538SAndroid Build Coastguard Worker-   const char* ep = text.end();
402*6777b538SAndroid Build Coastguard Worker-   uint16* q = new uint16[ep - p];
403*6777b538SAndroid Build Coastguard Worker-   uint16* q0 = q;
404*6777b538SAndroid Build Coastguard Worker-
405*6777b538SAndroid Build Coastguard Worker-   int n;
406*6777b538SAndroid Build Coastguard Worker-   Rune r;
407*6777b538SAndroid Build Coastguard Worker-   for (; p < ep; p += n) {
408*6777b538SAndroid Build Coastguard Worker-     if (!fullrune(p, ep - p)) {
409*6777b538SAndroid Build Coastguard Worker-       delete[] q0;
410*6777b538SAndroid Build Coastguard Worker-       return false;
411*6777b538SAndroid Build Coastguard Worker-     }
412*6777b538SAndroid Build Coastguard Worker-     n = chartorune(&r, p);
413*6777b538SAndroid Build Coastguard Worker-     if (r > 0xFFFF) {
414*6777b538SAndroid Build Coastguard Worker-       delete[] q0;
415*6777b538SAndroid Build Coastguard Worker-       return false;
416*6777b538SAndroid Build Coastguard Worker-     }
417*6777b538SAndroid Build Coastguard Worker-     *q++ = r;
418*6777b538SAndroid Build Coastguard Worker-   }
419*6777b538SAndroid Build Coastguard Worker-   *new_text = StringPiece(reinterpret_cast<char*>(q0), 2*(q - q0));
420*6777b538SAndroid Build Coastguard Worker-   return true;
421*6777b538SAndroid Build Coastguard Worker- }
422*6777b538SAndroid Build Coastguard Worker-
423*6777b538SAndroid Build Coastguard Worker- // Rewrites *sp from being a pointer into text8 (UTF-8)
424*6777b538SAndroid Build Coastguard Worker- // to being a pointer into text16 (equivalent text but in UCS-2).
425*6777b538SAndroid Build Coastguard Worker- static void AdjustUTF8ToUCS2(const StringPiece& text8, const StringPiece& text16,
426*6777b538SAndroid Build Coastguard Worker-                               StringPiece *sp) {
427*6777b538SAndroid Build Coastguard Worker-   if (sp->begin() == NULL && text8.begin() != NULL)
428*6777b538SAndroid Build Coastguard Worker-     return;
429*6777b538SAndroid Build Coastguard Worker-
430*6777b538SAndroid Build Coastguard Worker-   int nrune = 0;
431*6777b538SAndroid Build Coastguard Worker-   int n;
432*6777b538SAndroid Build Coastguard Worker-   Rune r;
433*6777b538SAndroid Build Coastguard Worker-   const char* p = text8.begin();
434*6777b538SAndroid Build Coastguard Worker-   const char* ep = text8.end();
435*6777b538SAndroid Build Coastguard Worker-   const char* spbegin = NULL;
436*6777b538SAndroid Build Coastguard Worker-   const char* spend = NULL;
437*6777b538SAndroid Build Coastguard Worker-   for (;;) {
438*6777b538SAndroid Build Coastguard Worker-     if (p == sp->begin())
439*6777b538SAndroid Build Coastguard Worker-       spbegin = text16.begin() + sizeof(uint16)*nrune;
440*6777b538SAndroid Build Coastguard Worker-     if (p == sp->end())
441*6777b538SAndroid Build Coastguard Worker-       spend = text16.begin() + sizeof(uint16)*nrune;
442*6777b538SAndroid Build Coastguard Worker-     if (p >= ep)
443*6777b538SAndroid Build Coastguard Worker-       break;
444*6777b538SAndroid Build Coastguard Worker-     n = chartorune(&r, p);
445*6777b538SAndroid Build Coastguard Worker-     p += n;
446*6777b538SAndroid Build Coastguard Worker-     nrune++;
447*6777b538SAndroid Build Coastguard Worker-   }
448*6777b538SAndroid Build Coastguard Worker-   if (spbegin == NULL || spend == NULL) {
449*6777b538SAndroid Build Coastguard Worker-     LOG(FATAL) << "Error in AdjustUTF8ToUCS2 "
450*6777b538SAndroid Build Coastguard Worker-                << CEscape(text8) << " "
451*6777b538SAndroid Build Coastguard Worker-                << (int)(sp->begin() - text8.begin()) << " "
452*6777b538SAndroid Build Coastguard Worker-                << (int)(sp->end() - text8.begin());
453*6777b538SAndroid Build Coastguard Worker-   }
454*6777b538SAndroid Build Coastguard Worker-   *sp = StringPiece(spbegin, spend - spbegin);
455*6777b538SAndroid Build Coastguard Worker- }
456*6777b538SAndroid Build Coastguard Worker-
457*6777b538SAndroid Build Coastguard Worker- // Rewrites *sp from begin a pointer into text16 (UCS-2)
458*6777b538SAndroid Build Coastguard Worker- // to being a pointer into text8 (equivalent text but in UTF-8).
459*6777b538SAndroid Build Coastguard Worker- static void AdjustUCS2ToUTF8(const StringPiece& text16, const StringPiece& text8,
460*6777b538SAndroid Build Coastguard Worker-                               StringPiece* sp) {
461*6777b538SAndroid Build Coastguard Worker-   if (sp->begin() == NULL)
462*6777b538SAndroid Build Coastguard Worker-     return;
463*6777b538SAndroid Build Coastguard Worker-
464*6777b538SAndroid Build Coastguard Worker-   int nrune = 0;
465*6777b538SAndroid Build Coastguard Worker-   int n;
466*6777b538SAndroid Build Coastguard Worker-   Rune r;
467*6777b538SAndroid Build Coastguard Worker-   const char* p = text8.begin();
468*6777b538SAndroid Build Coastguard Worker-   const char* ep = text8.end();
469*6777b538SAndroid Build Coastguard Worker-   const char* spbegin = NULL;
470*6777b538SAndroid Build Coastguard Worker-   const char* spend = NULL;
471*6777b538SAndroid Build Coastguard Worker-   for (;;) {
472*6777b538SAndroid Build Coastguard Worker-     if (nrune == (sp->begin() - text16.begin())/2)
473*6777b538SAndroid Build Coastguard Worker-       spbegin = p;
474*6777b538SAndroid Build Coastguard Worker-     if (nrune == (sp->end() - text16.begin())/2)
475*6777b538SAndroid Build Coastguard Worker-       spend = p;
476*6777b538SAndroid Build Coastguard Worker-     if (p >= ep)
477*6777b538SAndroid Build Coastguard Worker-       break;
478*6777b538SAndroid Build Coastguard Worker-     n = chartorune(&r, p);
479*6777b538SAndroid Build Coastguard Worker-     p += n;
480*6777b538SAndroid Build Coastguard Worker-     nrune++;
481*6777b538SAndroid Build Coastguard Worker-   }
482*6777b538SAndroid Build Coastguard Worker-   if (text8.begin() != NULL && (spbegin == NULL || spend == NULL)) {
483*6777b538SAndroid Build Coastguard Worker-     LOG(FATAL) << "Error in AdjustUCS2ToUTF8 "
484*6777b538SAndroid Build Coastguard Worker-                << CEscape(text16) << " "
485*6777b538SAndroid Build Coastguard Worker-                << (int)(sp->begin() - text16.begin()) << " "
486*6777b538SAndroid Build Coastguard Worker-                << (int)(sp->end() - text16.begin());
487*6777b538SAndroid Build Coastguard Worker-   }
488*6777b538SAndroid Build Coastguard Worker-   *sp = StringPiece(spbegin, spend - spbegin);
489*6777b538SAndroid Build Coastguard Worker- }
490*6777b538SAndroid Build Coastguard Worker-
491*6777b538SAndroid Build Coastguard Worker  // Runs a single search using the named engine type.
492*6777b538SAndroid Build Coastguard Worker  // This interface hides all the irregularities of the various
493*6777b538SAndroid Build Coastguard Worker  // engine interfaces from the rest of this file.
494*6777b538SAndroid Build Coastguard Workerre2/testing/tester.cc#12:393,411 - re2/testing/tester.cc#13:294,300
495*6777b538SAndroid Build Coastguard Worker
496*6777b538SAndroid Build Coastguard Worker    StringPiece text = orig_text;
497*6777b538SAndroid Build Coastguard Worker    StringPiece context = orig_context;
498*6777b538SAndroid Build Coastguard Worker-   bool ucs2 = false;
499*6777b538SAndroid Build Coastguard Worker
500*6777b538SAndroid Build Coastguard Worker-   if ((flags() & Regexp::UCS2) && type != kEnginePCRE) {
501*6777b538SAndroid Build Coastguard Worker-     if (!ConvertUTF8ToUCS2(orig_context, &context)) {
502*6777b538SAndroid Build Coastguard Worker-       result->skipped = true;
503*6777b538SAndroid Build Coastguard Worker-       return;
504*6777b538SAndroid Build Coastguard Worker-     }
505*6777b538SAndroid Build Coastguard Worker-
506*6777b538SAndroid Build Coastguard Worker-     // Rewrite context to refer to new text.
507*6777b538SAndroid Build Coastguard Worker-     AdjustUTF8ToUCS2(orig_context, context, &text);
508*6777b538SAndroid Build Coastguard Worker-     ucs2 = true;
509*6777b538SAndroid Build Coastguard Worker-   }
510*6777b538SAndroid Build Coastguard Worker-
511*6777b538SAndroid Build Coastguard Worker    switch (type) {
512*6777b538SAndroid Build Coastguard Worker      default:
513*6777b538SAndroid Build Coastguard Worker        LOG(FATAL) << "Bad RunSearch type: " << (int)type;
514*6777b538SAndroid Build Coastguard Workerre2/testing/tester.cc#12:557,577 - re2/testing/tester.cc#13:446,451
515*6777b538SAndroid Build Coastguard Worker      }
516*6777b538SAndroid Build Coastguard Worker    }
517*6777b538SAndroid Build Coastguard Worker
518*6777b538SAndroid Build Coastguard Worker-   // If we did UCS-2 matching, rewrite the matches to refer
519*6777b538SAndroid Build Coastguard Worker-   // to the original UTF-8 text.
520*6777b538SAndroid Build Coastguard Worker-   if (ucs2) {
521*6777b538SAndroid Build Coastguard Worker-     if (result->matched) {
522*6777b538SAndroid Build Coastguard Worker-       if (result->have_submatch0) {
523*6777b538SAndroid Build Coastguard Worker-         AdjustUCS2ToUTF8(context, orig_context, &result->submatch[0]);
524*6777b538SAndroid Build Coastguard Worker-       } else if (result->have_submatch) {
525*6777b538SAndroid Build Coastguard Worker-         for (int i = 0; i < nsubmatch; i++) {
526*6777b538SAndroid Build Coastguard Worker-           AdjustUCS2ToUTF8(context, orig_context, &result->submatch[i]);
527*6777b538SAndroid Build Coastguard Worker-         }
528*6777b538SAndroid Build Coastguard Worker-       }
529*6777b538SAndroid Build Coastguard Worker-     }
530*6777b538SAndroid Build Coastguard Worker-     delete[] context.begin();
531*6777b538SAndroid Build Coastguard Worker-   }
532*6777b538SAndroid Build Coastguard Worker-
533*6777b538SAndroid Build Coastguard Worker    if (!result->matched)
534*6777b538SAndroid Build Coastguard Worker      memset(result->submatch, 0, sizeof result->submatch);
535*6777b538SAndroid Build Coastguard Worker  }
536*6777b538SAndroid Build Coastguard Workerre2/testing/tester.cc#12:596,617 - re2/testing/tester.cc#13:470,475
537*6777b538SAndroid Build Coastguard Worker    return true;
538*6777b538SAndroid Build Coastguard Worker  }
539*6777b538SAndroid Build Coastguard Worker
540*6777b538SAndroid Build Coastguard Worker- // Check whether text uses only Unicode points <= 0xFFFF
541*6777b538SAndroid Build Coastguard Worker- // (in the BMP).
542*6777b538SAndroid Build Coastguard Worker- static bool IsBMP(const StringPiece& text) {
543*6777b538SAndroid Build Coastguard Worker-   const char* p = text.begin();
544*6777b538SAndroid Build Coastguard Worker-   const char* ep = text.end();
545*6777b538SAndroid Build Coastguard Worker-   while (p < ep) {
546*6777b538SAndroid Build Coastguard Worker-     if (!fullrune(p, ep - p))
547*6777b538SAndroid Build Coastguard Worker-       return false;
548*6777b538SAndroid Build Coastguard Worker-     Rune r;
549*6777b538SAndroid Build Coastguard Worker-     p += chartorune(&r, p);
550*6777b538SAndroid Build Coastguard Worker-     if (r > 0xFFFF)
551*6777b538SAndroid Build Coastguard Worker-       return false;
552*6777b538SAndroid Build Coastguard Worker-   }
553*6777b538SAndroid Build Coastguard Worker-   return true;
554*6777b538SAndroid Build Coastguard Worker- }
555*6777b538SAndroid Build Coastguard Worker-
556*6777b538SAndroid Build Coastguard Worker  // Runs a single test.
557*6777b538SAndroid Build Coastguard Worker  bool TestInstance::RunCase(const StringPiece& text, const StringPiece& context,
558*6777b538SAndroid Build Coastguard Worker                             Prog::Anchor anchor) {
559*6777b538SAndroid Build Coastguard Workerre2/testing/tester.cc#12:619,625 - re2/testing/tester.cc#13:477,483
560*6777b538SAndroid Build Coastguard Worker    Result correct;
561*6777b538SAndroid Build Coastguard Worker    RunSearch(kEngineBacktrack, text, context, anchor, &correct);
562*6777b538SAndroid Build Coastguard Worker    if (correct.skipped) {
563*6777b538SAndroid Build Coastguard Worker-     if (regexp_ == NULL || !IsBMP(context))  // okay to skip in UCS-2 mode
564*6777b538SAndroid Build Coastguard Worker+     if (regexp_ == NULL)
565*6777b538SAndroid Build Coastguard Worker        return true;
566*6777b538SAndroid Build Coastguard Worker      LOG(ERROR) << "Skipped backtracking! " << CEscape(regexp_str_)
567*6777b538SAndroid Build Coastguard Worker                 << " " << FormatMode(flags_);
568