1 // Copyright 2006 The RE2 Authors. All Rights Reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 // Rewrite POSIX and other features in re
6 // to use simple extended regular expression features.
7 // Also sort and simplify character classes.
8
9 #include <algorithm>
10 #include <string>
11
12 #include "util/logging.h"
13 #include "util/utf.h"
14 #include "re2/pod_array.h"
15 #include "re2/regexp.h"
16 #include "re2/walker-inl.h"
17
18 namespace re2 {
19
20 // Parses the regexp src and then simplifies it and sets *dst to the
21 // string representation of the simplified form. Returns true on success.
22 // Returns false and sets *error (if error != NULL) on error.
SimplifyRegexp(absl::string_view src,ParseFlags flags,std::string * dst,RegexpStatus * status)23 bool Regexp::SimplifyRegexp(absl::string_view src, ParseFlags flags,
24 std::string* dst, RegexpStatus* status) {
25 Regexp* re = Parse(src, flags, status);
26 if (re == NULL)
27 return false;
28 Regexp* sre = re->Simplify();
29 re->Decref();
30 if (sre == NULL) {
31 if (status) {
32 status->set_code(kRegexpInternalError);
33 status->set_error_arg(src);
34 }
35 return false;
36 }
37 *dst = sre->ToString();
38 sre->Decref();
39 return true;
40 }
41
42 // Assuming the simple_ flags on the children are accurate,
43 // is this Regexp* simple?
ComputeSimple()44 bool Regexp::ComputeSimple() {
45 Regexp** subs;
46 switch (op_) {
47 case kRegexpNoMatch:
48 case kRegexpEmptyMatch:
49 case kRegexpLiteral:
50 case kRegexpLiteralString:
51 case kRegexpBeginLine:
52 case kRegexpEndLine:
53 case kRegexpBeginText:
54 case kRegexpWordBoundary:
55 case kRegexpNoWordBoundary:
56 case kRegexpEndText:
57 case kRegexpAnyChar:
58 case kRegexpAnyByte:
59 case kRegexpHaveMatch:
60 return true;
61 case kRegexpConcat:
62 case kRegexpAlternate:
63 // These are simple as long as the subpieces are simple.
64 subs = sub();
65 for (int i = 0; i < nsub_; i++)
66 if (!subs[i]->simple())
67 return false;
68 return true;
69 case kRegexpCharClass:
70 // Simple as long as the char class is not empty, not full.
71 if (ccb_ != NULL)
72 return !ccb_->empty() && !ccb_->full();
73 return !cc_->empty() && !cc_->full();
74 case kRegexpCapture:
75 subs = sub();
76 return subs[0]->simple();
77 case kRegexpStar:
78 case kRegexpPlus:
79 case kRegexpQuest:
80 subs = sub();
81 if (!subs[0]->simple())
82 return false;
83 switch (subs[0]->op_) {
84 case kRegexpStar:
85 case kRegexpPlus:
86 case kRegexpQuest:
87 case kRegexpEmptyMatch:
88 case kRegexpNoMatch:
89 return false;
90 default:
91 break;
92 }
93 return true;
94 case kRegexpRepeat:
95 return false;
96 }
97 LOG(DFATAL) << "Case not handled in ComputeSimple: " << op_;
98 return false;
99 }
100
101 // Walker subclass used by Simplify.
102 // Coalesces runs of star/plus/quest/repeat of the same literal along with any
103 // occurrences of that literal into repeats of that literal. It also works for
104 // char classes, any char and any byte.
105 // PostVisit creates the coalesced result, which should then be simplified.
106 class CoalesceWalker : public Regexp::Walker<Regexp*> {
107 public:
CoalesceWalker()108 CoalesceWalker() {}
109 virtual Regexp* PostVisit(Regexp* re, Regexp* parent_arg, Regexp* pre_arg,
110 Regexp** child_args, int nchild_args);
111 virtual Regexp* Copy(Regexp* re);
112 virtual Regexp* ShortVisit(Regexp* re, Regexp* parent_arg);
113
114 private:
115 // These functions are declared inside CoalesceWalker so that
116 // they can edit the private fields of the Regexps they construct.
117
118 // Returns true if r1 and r2 can be coalesced. In particular, ensures that
119 // the parse flags are consistent. (They will not be checked again later.)
120 static bool CanCoalesce(Regexp* r1, Regexp* r2);
121
122 // Coalesces *r1ptr and *r2ptr. In most cases, the array elements afterwards
123 // will be empty match and the coalesced op. In other cases, where part of a
124 // literal string was removed to be coalesced, the array elements afterwards
125 // will be the coalesced op and the remainder of the literal string.
126 static void DoCoalesce(Regexp** r1ptr, Regexp** r2ptr);
127
128 CoalesceWalker(const CoalesceWalker&) = delete;
129 CoalesceWalker& operator=(const CoalesceWalker&) = delete;
130 };
131
132 // Walker subclass used by Simplify.
133 // The simplify walk is purely post-recursive: given the simplified children,
134 // PostVisit creates the simplified result.
135 // The child_args are simplified Regexp*s.
136 class SimplifyWalker : public Regexp::Walker<Regexp*> {
137 public:
SimplifyWalker()138 SimplifyWalker() {}
139 virtual Regexp* PreVisit(Regexp* re, Regexp* parent_arg, bool* stop);
140 virtual Regexp* PostVisit(Regexp* re, Regexp* parent_arg, Regexp* pre_arg,
141 Regexp** child_args, int nchild_args);
142 virtual Regexp* Copy(Regexp* re);
143 virtual Regexp* ShortVisit(Regexp* re, Regexp* parent_arg);
144
145 private:
146 // These functions are declared inside SimplifyWalker so that
147 // they can edit the private fields of the Regexps they construct.
148
149 // Creates a concatenation of two Regexp, consuming refs to re1 and re2.
150 // Caller must Decref return value when done with it.
151 static Regexp* Concat2(Regexp* re1, Regexp* re2, Regexp::ParseFlags flags);
152
153 // Simplifies the expression re{min,max} in terms of *, +, and ?.
154 // Returns a new regexp. Does not edit re. Does not consume reference to re.
155 // Caller must Decref return value when done with it.
156 static Regexp* SimplifyRepeat(Regexp* re, int min, int max,
157 Regexp::ParseFlags parse_flags);
158
159 // Simplifies a character class by expanding any named classes
160 // into rune ranges. Does not edit re. Does not consume ref to re.
161 // Caller must Decref return value when done with it.
162 static Regexp* SimplifyCharClass(Regexp* re);
163
164 SimplifyWalker(const SimplifyWalker&) = delete;
165 SimplifyWalker& operator=(const SimplifyWalker&) = delete;
166 };
167
168 // Simplifies a regular expression, returning a new regexp.
169 // The new regexp uses traditional Unix egrep features only,
170 // plus the Perl (?:) non-capturing parentheses.
171 // Otherwise, no POSIX or Perl additions. The new regexp
172 // captures exactly the same subexpressions (with the same indices)
173 // as the original.
174 // Does not edit current object.
175 // Caller must Decref() return value when done with it.
176
Simplify()177 Regexp* Regexp::Simplify() {
178 CoalesceWalker cw;
179 Regexp* cre = cw.Walk(this, NULL);
180 if (cre == NULL)
181 return NULL;
182 if (cw.stopped_early()) {
183 cre->Decref();
184 return NULL;
185 }
186 SimplifyWalker sw;
187 Regexp* sre = sw.Walk(cre, NULL);
188 cre->Decref();
189 if (sre == NULL)
190 return NULL;
191 if (sw.stopped_early()) {
192 sre->Decref();
193 return NULL;
194 }
195 return sre;
196 }
197
198 #define Simplify DontCallSimplify // Avoid accidental recursion
199
200 // Utility function for PostVisit implementations that compares re->sub() with
201 // child_args to determine whether any child_args changed. In the common case,
202 // where nothing changed, calls Decref() for all child_args and returns false,
203 // so PostVisit must return re->Incref(). Otherwise, returns true.
ChildArgsChanged(Regexp * re,Regexp ** child_args)204 static bool ChildArgsChanged(Regexp* re, Regexp** child_args) {
205 for (int i = 0; i < re->nsub(); i++) {
206 Regexp* sub = re->sub()[i];
207 Regexp* newsub = child_args[i];
208 if (newsub != sub)
209 return true;
210 }
211 for (int i = 0; i < re->nsub(); i++) {
212 Regexp* newsub = child_args[i];
213 newsub->Decref();
214 }
215 return false;
216 }
217
Copy(Regexp * re)218 Regexp* CoalesceWalker::Copy(Regexp* re) {
219 return re->Incref();
220 }
221
ShortVisit(Regexp * re,Regexp * parent_arg)222 Regexp* CoalesceWalker::ShortVisit(Regexp* re, Regexp* parent_arg) {
223 // Should never be called: we use Walk(), not WalkExponential().
224 #ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
225 LOG(DFATAL) << "CoalesceWalker::ShortVisit called";
226 #endif
227 return re->Incref();
228 }
229
PostVisit(Regexp * re,Regexp * parent_arg,Regexp * pre_arg,Regexp ** child_args,int nchild_args)230 Regexp* CoalesceWalker::PostVisit(Regexp* re,
231 Regexp* parent_arg,
232 Regexp* pre_arg,
233 Regexp** child_args,
234 int nchild_args) {
235 if (re->nsub() == 0)
236 return re->Incref();
237
238 if (re->op() != kRegexpConcat) {
239 if (!ChildArgsChanged(re, child_args))
240 return re->Incref();
241
242 // Something changed. Build a new op.
243 Regexp* nre = new Regexp(re->op(), re->parse_flags());
244 nre->AllocSub(re->nsub());
245 Regexp** nre_subs = nre->sub();
246 for (int i = 0; i < re->nsub(); i++)
247 nre_subs[i] = child_args[i];
248 // Repeats and Captures have additional data that must be copied.
249 if (re->op() == kRegexpRepeat) {
250 nre->min_ = re->min();
251 nre->max_ = re->max();
252 } else if (re->op() == kRegexpCapture) {
253 nre->cap_ = re->cap();
254 }
255 return nre;
256 }
257
258 bool can_coalesce = false;
259 for (int i = 0; i < re->nsub(); i++) {
260 if (i+1 < re->nsub() &&
261 CanCoalesce(child_args[i], child_args[i+1])) {
262 can_coalesce = true;
263 break;
264 }
265 }
266 if (!can_coalesce) {
267 if (!ChildArgsChanged(re, child_args))
268 return re->Incref();
269
270 // Something changed. Build a new op.
271 Regexp* nre = new Regexp(re->op(), re->parse_flags());
272 nre->AllocSub(re->nsub());
273 Regexp** nre_subs = nre->sub();
274 for (int i = 0; i < re->nsub(); i++)
275 nre_subs[i] = child_args[i];
276 return nre;
277 }
278
279 for (int i = 0; i < re->nsub(); i++) {
280 if (i+1 < re->nsub() &&
281 CanCoalesce(child_args[i], child_args[i+1]))
282 DoCoalesce(&child_args[i], &child_args[i+1]);
283 }
284 // Determine how many empty matches were left by DoCoalesce.
285 int n = 0;
286 for (int i = n; i < re->nsub(); i++) {
287 if (child_args[i]->op() == kRegexpEmptyMatch)
288 n++;
289 }
290 // Build a new op.
291 Regexp* nre = new Regexp(re->op(), re->parse_flags());
292 nre->AllocSub(re->nsub() - n);
293 Regexp** nre_subs = nre->sub();
294 for (int i = 0, j = 0; i < re->nsub(); i++) {
295 if (child_args[i]->op() == kRegexpEmptyMatch) {
296 child_args[i]->Decref();
297 continue;
298 }
299 nre_subs[j] = child_args[i];
300 j++;
301 }
302 return nre;
303 }
304
CanCoalesce(Regexp * r1,Regexp * r2)305 bool CoalesceWalker::CanCoalesce(Regexp* r1, Regexp* r2) {
306 // r1 must be a star/plus/quest/repeat of a literal, char class, any char or
307 // any byte.
308 if ((r1->op() == kRegexpStar ||
309 r1->op() == kRegexpPlus ||
310 r1->op() == kRegexpQuest ||
311 r1->op() == kRegexpRepeat) &&
312 (r1->sub()[0]->op() == kRegexpLiteral ||
313 r1->sub()[0]->op() == kRegexpCharClass ||
314 r1->sub()[0]->op() == kRegexpAnyChar ||
315 r1->sub()[0]->op() == kRegexpAnyByte)) {
316 // r2 must be a star/plus/quest/repeat of the same literal, char class,
317 // any char or any byte.
318 if ((r2->op() == kRegexpStar ||
319 r2->op() == kRegexpPlus ||
320 r2->op() == kRegexpQuest ||
321 r2->op() == kRegexpRepeat) &&
322 Regexp::Equal(r1->sub()[0], r2->sub()[0]) &&
323 // The parse flags must be consistent.
324 ((r1->parse_flags() & Regexp::NonGreedy) ==
325 (r2->parse_flags() & Regexp::NonGreedy))) {
326 return true;
327 }
328 // ... OR an occurrence of that literal, char class, any char or any byte
329 if (Regexp::Equal(r1->sub()[0], r2)) {
330 return true;
331 }
332 // ... OR a literal string that begins with that literal.
333 if (r1->sub()[0]->op() == kRegexpLiteral &&
334 r2->op() == kRegexpLiteralString &&
335 r2->runes()[0] == r1->sub()[0]->rune() &&
336 // The parse flags must be consistent.
337 ((r1->sub()[0]->parse_flags() & Regexp::FoldCase) ==
338 (r2->parse_flags() & Regexp::FoldCase))) {
339 return true;
340 }
341 }
342 return false;
343 }
344
DoCoalesce(Regexp ** r1ptr,Regexp ** r2ptr)345 void CoalesceWalker::DoCoalesce(Regexp** r1ptr, Regexp** r2ptr) {
346 Regexp* r1 = *r1ptr;
347 Regexp* r2 = *r2ptr;
348
349 Regexp* nre = Regexp::Repeat(
350 r1->sub()[0]->Incref(), r1->parse_flags(), 0, 0);
351
352 switch (r1->op()) {
353 case kRegexpStar:
354 nre->min_ = 0;
355 nre->max_ = -1;
356 break;
357
358 case kRegexpPlus:
359 nre->min_ = 1;
360 nre->max_ = -1;
361 break;
362
363 case kRegexpQuest:
364 nre->min_ = 0;
365 nre->max_ = 1;
366 break;
367
368 case kRegexpRepeat:
369 nre->min_ = r1->min();
370 nre->max_ = r1->max();
371 break;
372
373 default:
374 nre->Decref();
375 LOG(DFATAL) << "DoCoalesce failed: r1->op() is " << r1->op();
376 return;
377 }
378
379 switch (r2->op()) {
380 case kRegexpStar:
381 nre->max_ = -1;
382 goto LeaveEmpty;
383
384 case kRegexpPlus:
385 nre->min_++;
386 nre->max_ = -1;
387 goto LeaveEmpty;
388
389 case kRegexpQuest:
390 if (nre->max() != -1)
391 nre->max_++;
392 goto LeaveEmpty;
393
394 case kRegexpRepeat:
395 nre->min_ += r2->min();
396 if (r2->max() == -1)
397 nre->max_ = -1;
398 else if (nre->max() != -1)
399 nre->max_ += r2->max();
400 goto LeaveEmpty;
401
402 case kRegexpLiteral:
403 case kRegexpCharClass:
404 case kRegexpAnyChar:
405 case kRegexpAnyByte:
406 nre->min_++;
407 if (nre->max() != -1)
408 nre->max_++;
409 goto LeaveEmpty;
410
411 LeaveEmpty:
412 *r1ptr = new Regexp(kRegexpEmptyMatch, Regexp::NoParseFlags);
413 *r2ptr = nre;
414 break;
415
416 case kRegexpLiteralString: {
417 Rune r = r1->sub()[0]->rune();
418 // Determine how much of the literal string is removed.
419 // We know that we have at least one rune. :)
420 int n = 1;
421 while (n < r2->nrunes() && r2->runes()[n] == r)
422 n++;
423 nre->min_ += n;
424 if (nre->max() != -1)
425 nre->max_ += n;
426 if (n == r2->nrunes())
427 goto LeaveEmpty;
428 *r1ptr = nre;
429 *r2ptr = Regexp::LiteralString(
430 &r2->runes()[n], r2->nrunes() - n, r2->parse_flags());
431 break;
432 }
433
434 default:
435 nre->Decref();
436 LOG(DFATAL) << "DoCoalesce failed: r2->op() is " << r2->op();
437 return;
438 }
439
440 r1->Decref();
441 r2->Decref();
442 }
443
Copy(Regexp * re)444 Regexp* SimplifyWalker::Copy(Regexp* re) {
445 return re->Incref();
446 }
447
ShortVisit(Regexp * re,Regexp * parent_arg)448 Regexp* SimplifyWalker::ShortVisit(Regexp* re, Regexp* parent_arg) {
449 // Should never be called: we use Walk(), not WalkExponential().
450 #ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
451 LOG(DFATAL) << "SimplifyWalker::ShortVisit called";
452 #endif
453 return re->Incref();
454 }
455
PreVisit(Regexp * re,Regexp * parent_arg,bool * stop)456 Regexp* SimplifyWalker::PreVisit(Regexp* re, Regexp* parent_arg, bool* stop) {
457 if (re->simple()) {
458 *stop = true;
459 return re->Incref();
460 }
461 return NULL;
462 }
463
PostVisit(Regexp * re,Regexp * parent_arg,Regexp * pre_arg,Regexp ** child_args,int nchild_args)464 Regexp* SimplifyWalker::PostVisit(Regexp* re,
465 Regexp* parent_arg,
466 Regexp* pre_arg,
467 Regexp** child_args,
468 int nchild_args) {
469 switch (re->op()) {
470 case kRegexpNoMatch:
471 case kRegexpEmptyMatch:
472 case kRegexpLiteral:
473 case kRegexpLiteralString:
474 case kRegexpBeginLine:
475 case kRegexpEndLine:
476 case kRegexpBeginText:
477 case kRegexpWordBoundary:
478 case kRegexpNoWordBoundary:
479 case kRegexpEndText:
480 case kRegexpAnyChar:
481 case kRegexpAnyByte:
482 case kRegexpHaveMatch:
483 // All these are always simple.
484 re->simple_ = true;
485 return re->Incref();
486
487 case kRegexpConcat:
488 case kRegexpAlternate: {
489 // These are simple as long as the subpieces are simple.
490 if (!ChildArgsChanged(re, child_args)) {
491 re->simple_ = true;
492 return re->Incref();
493 }
494 Regexp* nre = new Regexp(re->op(), re->parse_flags());
495 nre->AllocSub(re->nsub());
496 Regexp** nre_subs = nre->sub();
497 for (int i = 0; i < re->nsub(); i++)
498 nre_subs[i] = child_args[i];
499 nre->simple_ = true;
500 return nre;
501 }
502
503 case kRegexpCapture: {
504 Regexp* newsub = child_args[0];
505 if (newsub == re->sub()[0]) {
506 newsub->Decref();
507 re->simple_ = true;
508 return re->Incref();
509 }
510 Regexp* nre = new Regexp(kRegexpCapture, re->parse_flags());
511 nre->AllocSub(1);
512 nre->sub()[0] = newsub;
513 nre->cap_ = re->cap();
514 nre->simple_ = true;
515 return nre;
516 }
517
518 case kRegexpStar:
519 case kRegexpPlus:
520 case kRegexpQuest: {
521 Regexp* newsub = child_args[0];
522 // Special case: repeat the empty string as much as
523 // you want, but it's still the empty string.
524 if (newsub->op() == kRegexpEmptyMatch)
525 return newsub;
526
527 // These are simple as long as the subpiece is simple.
528 if (newsub == re->sub()[0]) {
529 newsub->Decref();
530 re->simple_ = true;
531 return re->Incref();
532 }
533
534 // These are also idempotent if flags are constant.
535 if (re->op() == newsub->op() &&
536 re->parse_flags() == newsub->parse_flags())
537 return newsub;
538
539 Regexp* nre = new Regexp(re->op(), re->parse_flags());
540 nre->AllocSub(1);
541 nre->sub()[0] = newsub;
542 nre->simple_ = true;
543 return nre;
544 }
545
546 case kRegexpRepeat: {
547 Regexp* newsub = child_args[0];
548 // Special case: repeat the empty string as much as
549 // you want, but it's still the empty string.
550 if (newsub->op() == kRegexpEmptyMatch)
551 return newsub;
552
553 Regexp* nre = SimplifyRepeat(newsub, re->min_, re->max_,
554 re->parse_flags());
555 newsub->Decref();
556 nre->simple_ = true;
557 return nre;
558 }
559
560 case kRegexpCharClass: {
561 Regexp* nre = SimplifyCharClass(re);
562 nre->simple_ = true;
563 return nre;
564 }
565 }
566
567 LOG(ERROR) << "Simplify case not handled: " << re->op();
568 return re->Incref();
569 }
570
571 // Creates a concatenation of two Regexp, consuming refs to re1 and re2.
572 // Returns a new Regexp, handing the ref to the caller.
Concat2(Regexp * re1,Regexp * re2,Regexp::ParseFlags parse_flags)573 Regexp* SimplifyWalker::Concat2(Regexp* re1, Regexp* re2,
574 Regexp::ParseFlags parse_flags) {
575 Regexp* re = new Regexp(kRegexpConcat, parse_flags);
576 re->AllocSub(2);
577 Regexp** subs = re->sub();
578 subs[0] = re1;
579 subs[1] = re2;
580 return re;
581 }
582
583 // Returns true if re is an empty-width op.
IsEmptyOp(Regexp * re)584 static bool IsEmptyOp(Regexp* re) {
585 return (re->op() == kRegexpBeginLine ||
586 re->op() == kRegexpEndLine ||
587 re->op() == kRegexpWordBoundary ||
588 re->op() == kRegexpNoWordBoundary ||
589 re->op() == kRegexpBeginText ||
590 re->op() == kRegexpEndText);
591 }
592
593 // Simplifies the expression re{min,max} in terms of *, +, and ?.
594 // Returns a new regexp. Does not edit re. Does not consume reference to re.
595 // Caller must Decref return value when done with it.
596 // The result will *not* necessarily have the right capturing parens
597 // if you call ToString() and re-parse it: (x){2} becomes (x)(x),
598 // but in the Regexp* representation, both (x) are marked as $1.
SimplifyRepeat(Regexp * re,int min,int max,Regexp::ParseFlags f)599 Regexp* SimplifyWalker::SimplifyRepeat(Regexp* re, int min, int max,
600 Regexp::ParseFlags f) {
601 // For an empty-width op OR a concatenation or alternation of empty-width
602 // ops, cap the repetition count at 1.
603 if (IsEmptyOp(re) ||
604 ((re->op() == kRegexpConcat ||
605 re->op() == kRegexpAlternate) &&
606 std::all_of(re->sub(), re->sub() + re->nsub(), IsEmptyOp))) {
607 min = std::min(min, 1);
608 max = std::min(max, 1);
609 }
610
611 // x{n,} means at least n matches of x.
612 if (max == -1) {
613 // Special case: x{0,} is x*
614 if (min == 0)
615 return Regexp::Star(re->Incref(), f);
616
617 // Special case: x{1,} is x+
618 if (min == 1)
619 return Regexp::Plus(re->Incref(), f);
620
621 // General case: x{4,} is xxxx+
622 PODArray<Regexp*> nre_subs(min);
623 for (int i = 0; i < min-1; i++)
624 nre_subs[i] = re->Incref();
625 nre_subs[min-1] = Regexp::Plus(re->Incref(), f);
626 return Regexp::Concat(nre_subs.data(), min, f);
627 }
628
629 // Special case: (x){0} matches only empty string.
630 if (min == 0 && max == 0)
631 return new Regexp(kRegexpEmptyMatch, f);
632
633 // Special case: x{1} is just x.
634 if (min == 1 && max == 1)
635 return re->Incref();
636
637 // General case: x{n,m} means n copies of x and m copies of x?.
638 // The machine will do less work if we nest the final m copies,
639 // so that x{2,5} = xx(x(x(x)?)?)?
640
641 // Build leading prefix: xx. Capturing only on the last one.
642 Regexp* nre = NULL;
643 if (min > 0) {
644 PODArray<Regexp*> nre_subs(min);
645 for (int i = 0; i < min; i++)
646 nre_subs[i] = re->Incref();
647 nre = Regexp::Concat(nre_subs.data(), min, f);
648 }
649
650 // Build and attach suffix: (x(x(x)?)?)?
651 if (max > min) {
652 Regexp* suf = Regexp::Quest(re->Incref(), f);
653 for (int i = min+1; i < max; i++)
654 suf = Regexp::Quest(Concat2(re->Incref(), suf, f), f);
655 if (nre == NULL)
656 nre = suf;
657 else
658 nre = Concat2(nre, suf, f);
659 }
660
661 if (nre == NULL) {
662 // Some degenerate case, like min > max, or min < max < 0.
663 // This shouldn't happen, because the parser rejects such regexps.
664 LOG(DFATAL) << "Malformed repeat " << re->ToString() << " " << min << " " << max;
665 return new Regexp(kRegexpNoMatch, f);
666 }
667
668 return nre;
669 }
670
671 // Simplifies a character class.
672 // Caller must Decref return value when done with it.
SimplifyCharClass(Regexp * re)673 Regexp* SimplifyWalker::SimplifyCharClass(Regexp* re) {
674 CharClass* cc = re->cc();
675
676 // Special cases
677 if (cc->empty())
678 return new Regexp(kRegexpNoMatch, re->parse_flags());
679 if (cc->full())
680 return new Regexp(kRegexpAnyChar, re->parse_flags());
681
682 return re->Incref();
683 }
684
685 } // namespace re2
686