1*ccdc9c3eSSadaf Ebrahimi#!/usr/bin/perl 2*ccdc9c3eSSadaf Ebrahimi# Copyright 2008 The RE2 Authors. All Rights Reserved. 3*ccdc9c3eSSadaf Ebrahimi# Use of this source code is governed by a BSD-style 4*ccdc9c3eSSadaf Ebrahimi# license that can be found in the LICENSE file. 5*ccdc9c3eSSadaf Ebrahimi 6*ccdc9c3eSSadaf Ebrahimi# Generate table entries giving character ranges 7*ccdc9c3eSSadaf Ebrahimi# for POSIX/Perl character classes. Rather than 8*ccdc9c3eSSadaf Ebrahimi# figure out what the definition is, it is easier to ask 9*ccdc9c3eSSadaf Ebrahimi# Perl about each letter from 0-128 and write down 10*ccdc9c3eSSadaf Ebrahimi# its answer. 11*ccdc9c3eSSadaf Ebrahimi 12*ccdc9c3eSSadaf Ebrahimi@posixclasses = ( 13*ccdc9c3eSSadaf Ebrahimi "[:alnum:]", 14*ccdc9c3eSSadaf Ebrahimi "[:alpha:]", 15*ccdc9c3eSSadaf Ebrahimi "[:ascii:]", 16*ccdc9c3eSSadaf Ebrahimi "[:blank:]", 17*ccdc9c3eSSadaf Ebrahimi "[:cntrl:]", 18*ccdc9c3eSSadaf Ebrahimi "[:digit:]", 19*ccdc9c3eSSadaf Ebrahimi "[:graph:]", 20*ccdc9c3eSSadaf Ebrahimi "[:lower:]", 21*ccdc9c3eSSadaf Ebrahimi "[:print:]", 22*ccdc9c3eSSadaf Ebrahimi "[:punct:]", 23*ccdc9c3eSSadaf Ebrahimi "[:space:]", 24*ccdc9c3eSSadaf Ebrahimi "[:upper:]", 25*ccdc9c3eSSadaf Ebrahimi "[:word:]", 26*ccdc9c3eSSadaf Ebrahimi "[:xdigit:]", 27*ccdc9c3eSSadaf Ebrahimi); 28*ccdc9c3eSSadaf Ebrahimi 29*ccdc9c3eSSadaf Ebrahimi@perlclasses = ( 30*ccdc9c3eSSadaf Ebrahimi "\\d", 31*ccdc9c3eSSadaf Ebrahimi "\\s", 32*ccdc9c3eSSadaf Ebrahimi "\\w", 33*ccdc9c3eSSadaf Ebrahimi); 34*ccdc9c3eSSadaf Ebrahimi 35*ccdc9c3eSSadaf Ebrahimi%overrides = ( 36*ccdc9c3eSSadaf Ebrahimi # Prior to Perl 5.18, \s did not match vertical tab. 37*ccdc9c3eSSadaf Ebrahimi # RE2 preserves that original behaviour. 38*ccdc9c3eSSadaf Ebrahimi "\\s:11" => 0, 39*ccdc9c3eSSadaf Ebrahimi); 40*ccdc9c3eSSadaf Ebrahimi 41*ccdc9c3eSSadaf Ebrahimisub ComputeClass($) { 42*ccdc9c3eSSadaf Ebrahimi my ($cname) = @_; 43*ccdc9c3eSSadaf Ebrahimi my @ranges; 44*ccdc9c3eSSadaf Ebrahimi my $regexp = qr/[$cname]/; 45*ccdc9c3eSSadaf Ebrahimi my $start = -1; 46*ccdc9c3eSSadaf Ebrahimi for (my $i=0; $i<=129; $i++) { 47*ccdc9c3eSSadaf Ebrahimi if ($i == 129) { $i = 256; } 48*ccdc9c3eSSadaf Ebrahimi if ($i <= 128 && ($overrides{"$cname:$i"} // chr($i) =~ $regexp)) { 49*ccdc9c3eSSadaf Ebrahimi if ($start < 0) { 50*ccdc9c3eSSadaf Ebrahimi $start = $i; 51*ccdc9c3eSSadaf Ebrahimi } 52*ccdc9c3eSSadaf Ebrahimi } else { 53*ccdc9c3eSSadaf Ebrahimi if ($start >= 0) { 54*ccdc9c3eSSadaf Ebrahimi push @ranges, [$start, $i-1]; 55*ccdc9c3eSSadaf Ebrahimi } 56*ccdc9c3eSSadaf Ebrahimi $start = -1; 57*ccdc9c3eSSadaf Ebrahimi } 58*ccdc9c3eSSadaf Ebrahimi } 59*ccdc9c3eSSadaf Ebrahimi return @ranges; 60*ccdc9c3eSSadaf Ebrahimi} 61*ccdc9c3eSSadaf Ebrahimi 62*ccdc9c3eSSadaf Ebrahimisub PrintClass($$@) { 63*ccdc9c3eSSadaf Ebrahimi my ($cnum, $cname, @ranges) = @_; 64*ccdc9c3eSSadaf Ebrahimi print "static const URange16 code${cnum}[] = { /* $cname */\n"; 65*ccdc9c3eSSadaf Ebrahimi for (my $i=0; $i<@ranges; $i++) { 66*ccdc9c3eSSadaf Ebrahimi my @a = @{$ranges[$i]}; 67*ccdc9c3eSSadaf Ebrahimi printf "\t{ 0x%x, 0x%x },\n", $a[0], $a[1]; 68*ccdc9c3eSSadaf Ebrahimi } 69*ccdc9c3eSSadaf Ebrahimi print "};\n"; 70*ccdc9c3eSSadaf Ebrahimi my $n = @ranges; 71*ccdc9c3eSSadaf Ebrahimi my $escname = $cname; 72*ccdc9c3eSSadaf Ebrahimi $escname =~ s/\\/\\\\/g; 73*ccdc9c3eSSadaf Ebrahimi $negname = $escname; 74*ccdc9c3eSSadaf Ebrahimi if ($negname =~ /:/) { 75*ccdc9c3eSSadaf Ebrahimi $negname =~ s/:/:^/; 76*ccdc9c3eSSadaf Ebrahimi } else { 77*ccdc9c3eSSadaf Ebrahimi $negname =~ y/a-z/A-Z/; 78*ccdc9c3eSSadaf Ebrahimi } 79*ccdc9c3eSSadaf Ebrahimi return "{ \"$escname\", +1, code$cnum, $n }", "{ \"$negname\", -1, code$cnum, $n }"; 80*ccdc9c3eSSadaf Ebrahimi} 81*ccdc9c3eSSadaf Ebrahimi 82*ccdc9c3eSSadaf Ebrahimimy $cnum = 0; 83*ccdc9c3eSSadaf Ebrahimi 84*ccdc9c3eSSadaf Ebrahimisub PrintClasses($@) { 85*ccdc9c3eSSadaf Ebrahimi my ($pname, @classes) = @_; 86*ccdc9c3eSSadaf Ebrahimi my @entries; 87*ccdc9c3eSSadaf Ebrahimi foreach my $cname (@classes) { 88*ccdc9c3eSSadaf Ebrahimi my @ranges = ComputeClass($cname); 89*ccdc9c3eSSadaf Ebrahimi push @entries, PrintClass(++$cnum, $cname, @ranges); 90*ccdc9c3eSSadaf Ebrahimi } 91*ccdc9c3eSSadaf Ebrahimi print "const UGroup ${pname}_groups[] = {\n"; 92*ccdc9c3eSSadaf Ebrahimi foreach my $e (@entries) { 93*ccdc9c3eSSadaf Ebrahimi print "\t$e,\n"; 94*ccdc9c3eSSadaf Ebrahimi } 95*ccdc9c3eSSadaf Ebrahimi print "};\n"; 96*ccdc9c3eSSadaf Ebrahimi my $count = @entries; 97*ccdc9c3eSSadaf Ebrahimi print "const int num_${pname}_groups = $count;\n"; 98*ccdc9c3eSSadaf Ebrahimi} 99*ccdc9c3eSSadaf Ebrahimi 100*ccdc9c3eSSadaf Ebrahimiprint <<EOF; 101*ccdc9c3eSSadaf Ebrahimi// GENERATED BY make_perl_groups.pl; DO NOT EDIT. 102*ccdc9c3eSSadaf Ebrahimi// make_perl_groups.pl >perl_groups.cc 103*ccdc9c3eSSadaf Ebrahimi 104*ccdc9c3eSSadaf Ebrahimi#include "re2/unicode_groups.h" 105*ccdc9c3eSSadaf Ebrahimi 106*ccdc9c3eSSadaf Ebrahiminamespace re2 { 107*ccdc9c3eSSadaf Ebrahimi 108*ccdc9c3eSSadaf EbrahimiEOF 109*ccdc9c3eSSadaf Ebrahimi 110*ccdc9c3eSSadaf EbrahimiPrintClasses("perl", @perlclasses); 111*ccdc9c3eSSadaf EbrahimiPrintClasses("posix", @posixclasses); 112*ccdc9c3eSSadaf Ebrahimi 113*ccdc9c3eSSadaf Ebrahimiprint <<EOF; 114*ccdc9c3eSSadaf Ebrahimi 115*ccdc9c3eSSadaf Ebrahimi} // namespace re2 116*ccdc9c3eSSadaf EbrahimiEOF 117