xref: /aosp_15_r20/external/regex-re2/re2/make_perl_groups.pl (revision ccdc9c3e24c519bfa4832a66aa2e83a52c19f295)
1#!/usr/bin/perl
2# Copyright 2008 The RE2 Authors.  All Rights Reserved.
3# Use of this source code is governed by a BSD-style
4# license that can be found in the LICENSE file.
5
6# Generate table entries giving character ranges
7# for POSIX/Perl character classes.  Rather than
8# figure out what the definition is, it is easier to ask
9# Perl about each letter from 0-128 and write down
10# its answer.
11
12@posixclasses = (
13	"[:alnum:]",
14	"[:alpha:]",
15	"[:ascii:]",
16	"[:blank:]",
17	"[:cntrl:]",
18	"[:digit:]",
19	"[:graph:]",
20	"[:lower:]",
21	"[:print:]",
22	"[:punct:]",
23	"[:space:]",
24	"[:upper:]",
25	"[:word:]",
26	"[:xdigit:]",
27);
28
29@perlclasses = (
30	"\\d",
31	"\\s",
32	"\\w",
33);
34
35%overrides = (
36	# Prior to Perl 5.18, \s did not match vertical tab.
37	# RE2 preserves that original behaviour.
38	"\\s:11" => 0,
39);
40
41sub ComputeClass($) {
42  my ($cname) = @_;
43  my @ranges;
44  my $regexp = qr/[$cname]/;
45  my $start = -1;
46  for (my $i=0; $i<=129; $i++) {
47    if ($i == 129) { $i = 256; }
48    if ($i <= 128 && ($overrides{"$cname:$i"} // chr($i) =~ $regexp)) {
49      if ($start < 0) {
50        $start = $i;
51      }
52    } else {
53      if ($start >= 0) {
54        push @ranges, [$start, $i-1];
55      }
56      $start = -1;
57    }
58  }
59  return @ranges;
60}
61
62sub PrintClass($$@) {
63  my ($cnum, $cname, @ranges) = @_;
64  print "static const URange16 code${cnum}[] = {  /* $cname */\n";
65  for (my $i=0; $i<@ranges; $i++) {
66    my @a = @{$ranges[$i]};
67    printf "\t{ 0x%x, 0x%x },\n", $a[0], $a[1];
68  }
69  print "};\n";
70  my $n = @ranges;
71  my $escname = $cname;
72  $escname =~ s/\\/\\\\/g;
73  $negname = $escname;
74  if ($negname =~ /:/) {
75    $negname =~ s/:/:^/;
76  } else {
77    $negname =~ y/a-z/A-Z/;
78  }
79  return "{ \"$escname\", +1, code$cnum, $n }", "{ \"$negname\", -1, code$cnum, $n }";
80}
81
82my $cnum = 0;
83
84sub PrintClasses($@) {
85  my ($pname, @classes) = @_;
86  my @entries;
87  foreach my $cname (@classes) {
88    my @ranges = ComputeClass($cname);
89    push @entries, PrintClass(++$cnum, $cname, @ranges);
90  }
91  print "const UGroup ${pname}_groups[] = {\n";
92  foreach my $e (@entries) {
93    print "\t$e,\n";
94  }
95  print "};\n";
96  my $count = @entries;
97  print "const int num_${pname}_groups = $count;\n";
98}
99
100print <<EOF;
101// GENERATED BY make_perl_groups.pl; DO NOT EDIT.
102// make_perl_groups.pl >perl_groups.cc
103
104#include "re2/unicode_groups.h"
105
106namespace re2 {
107
108EOF
109
110PrintClasses("perl", @perlclasses);
111PrintClasses("posix", @posixclasses);
112
113print <<EOF;
114
115}  // namespace re2
116EOF
117