1#!/usr/bin/perl 2# Copyright 2008 The RE2 Authors. All Rights Reserved. 3# Use of this source code is governed by a BSD-style 4# license that can be found in the LICENSE file. 5 6# Generate table entries giving character ranges 7# for POSIX/Perl character classes. Rather than 8# figure out what the definition is, it is easier to ask 9# Perl about each letter from 0-128 and write down 10# its answer. 11 12@posixclasses = ( 13 "[:alnum:]", 14 "[:alpha:]", 15 "[:ascii:]", 16 "[:blank:]", 17 "[:cntrl:]", 18 "[:digit:]", 19 "[:graph:]", 20 "[:lower:]", 21 "[:print:]", 22 "[:punct:]", 23 "[:space:]", 24 "[:upper:]", 25 "[:word:]", 26 "[:xdigit:]", 27); 28 29@perlclasses = ( 30 "\\d", 31 "\\s", 32 "\\w", 33); 34 35%overrides = ( 36 # Prior to Perl 5.18, \s did not match vertical tab. 37 # RE2 preserves that original behaviour. 38 "\\s:11" => 0, 39); 40 41sub ComputeClass($) { 42 my ($cname) = @_; 43 my @ranges; 44 my $regexp = qr/[$cname]/; 45 my $start = -1; 46 for (my $i=0; $i<=129; $i++) { 47 if ($i == 129) { $i = 256; } 48 if ($i <= 128 && ($overrides{"$cname:$i"} // chr($i) =~ $regexp)) { 49 if ($start < 0) { 50 $start = $i; 51 } 52 } else { 53 if ($start >= 0) { 54 push @ranges, [$start, $i-1]; 55 } 56 $start = -1; 57 } 58 } 59 return @ranges; 60} 61 62sub PrintClass($$@) { 63 my ($cnum, $cname, @ranges) = @_; 64 print "static const URange16 code${cnum}[] = { /* $cname */\n"; 65 for (my $i=0; $i<@ranges; $i++) { 66 my @a = @{$ranges[$i]}; 67 printf "\t{ 0x%x, 0x%x },\n", $a[0], $a[1]; 68 } 69 print "};\n"; 70 my $n = @ranges; 71 my $escname = $cname; 72 $escname =~ s/\\/\\\\/g; 73 $negname = $escname; 74 if ($negname =~ /:/) { 75 $negname =~ s/:/:^/; 76 } else { 77 $negname =~ y/a-z/A-Z/; 78 } 79 return "{ \"$escname\", +1, code$cnum, $n }", "{ \"$negname\", -1, code$cnum, $n }"; 80} 81 82my $cnum = 0; 83 84sub PrintClasses($@) { 85 my ($pname, @classes) = @_; 86 my @entries; 87 foreach my $cname (@classes) { 88 my @ranges = ComputeClass($cname); 89 push @entries, PrintClass(++$cnum, $cname, @ranges); 90 } 91 print "const UGroup ${pname}_groups[] = {\n"; 92 foreach my $e (@entries) { 93 print "\t$e,\n"; 94 } 95 print "};\n"; 96 my $count = @entries; 97 print "const int num_${pname}_groups = $count;\n"; 98} 99 100print <<EOF; 101// GENERATED BY make_perl_groups.pl; DO NOT EDIT. 102// make_perl_groups.pl >perl_groups.cc 103 104#include "re2/unicode_groups.h" 105 106namespace re2 { 107 108EOF 109 110PrintClasses("perl", @perlclasses); 111PrintClasses("posix", @posixclasses); 112 113print <<EOF; 114 115} // namespace re2 116EOF 117