xref: /aosp_15_r20/external/pcre/132html (revision 22dc650d8ae982c6770746019a6f94af92b0f024)
1*22dc650dSSadaf Ebrahimi#! /usr/bin/perl -w
2*22dc650dSSadaf Ebrahimi
3*22dc650dSSadaf Ebrahimi# Script to turn PCRE2 man pages into HTML
4*22dc650dSSadaf Ebrahimi
5*22dc650dSSadaf Ebrahimi
6*22dc650dSSadaf Ebrahimi# Subroutine to handle font changes and other escapes
7*22dc650dSSadaf Ebrahimi
8*22dc650dSSadaf Ebrahimisub do_line {
9*22dc650dSSadaf Ebrahimimy($s) = $_[0];
10*22dc650dSSadaf Ebrahimi
11*22dc650dSSadaf Ebrahimi$s =~ s/</&#60;/g;                   # Deal with < and >
12*22dc650dSSadaf Ebrahimi$s =~ s/>/&#62;/g;
13*22dc650dSSadaf Ebrahimi$s =~ s"\\fI(.*?)\\f[RP]"<i>$1</i>"g;
14*22dc650dSSadaf Ebrahimi$s =~ s"\\fB(.*?)\\f[RP]"<b>$1</b>"g;
15*22dc650dSSadaf Ebrahimi$s =~ s"\\e"\\"g;
16*22dc650dSSadaf Ebrahimi$s =~ s/(?<=Copyright )\(c\)/&copy;/g;
17*22dc650dSSadaf Ebrahimi$s;
18*22dc650dSSadaf Ebrahimi}
19*22dc650dSSadaf Ebrahimi
20*22dc650dSSadaf Ebrahimi# Subroutine to ensure not in a paragraph
21*22dc650dSSadaf Ebrahimi
22*22dc650dSSadaf Ebrahimisub end_para {
23*22dc650dSSadaf Ebrahimiif ($inpara)
24*22dc650dSSadaf Ebrahimi  {
25*22dc650dSSadaf Ebrahimi  print TEMP "</PRE>\n" if ($inpre);
26*22dc650dSSadaf Ebrahimi  print TEMP "</P>\n";
27*22dc650dSSadaf Ebrahimi  }
28*22dc650dSSadaf Ebrahimi$inpara = $inpre = 0;
29*22dc650dSSadaf Ebrahimi$wrotetext = 0;
30*22dc650dSSadaf Ebrahimi}
31*22dc650dSSadaf Ebrahimi
32*22dc650dSSadaf Ebrahimi# Subroutine to start a new paragraph
33*22dc650dSSadaf Ebrahimi
34*22dc650dSSadaf Ebrahimisub new_para {
35*22dc650dSSadaf Ebrahimi&end_para();
36*22dc650dSSadaf Ebrahimiprint TEMP "<P>\n";
37*22dc650dSSadaf Ebrahimi$inpara = 1;
38*22dc650dSSadaf Ebrahimi}
39*22dc650dSSadaf Ebrahimi
40*22dc650dSSadaf Ebrahimi
41*22dc650dSSadaf Ebrahimi# Main program
42*22dc650dSSadaf Ebrahimi
43*22dc650dSSadaf Ebrahimi$innf = 0;
44*22dc650dSSadaf Ebrahimi$inpara = 0;
45*22dc650dSSadaf Ebrahimi$inpre = 0;
46*22dc650dSSadaf Ebrahimi$wrotetext = 0;
47*22dc650dSSadaf Ebrahimi$toc = 0;
48*22dc650dSSadaf Ebrahimi$ref = 1;
49*22dc650dSSadaf Ebrahimi
50*22dc650dSSadaf Ebrahimiwhile ($#ARGV >= 0 && $ARGV[0] =~ /^-/)
51*22dc650dSSadaf Ebrahimi  {
52*22dc650dSSadaf Ebrahimi  $toc = 1 if $ARGV[0] eq "-toc";
53*22dc650dSSadaf Ebrahimi  shift;
54*22dc650dSSadaf Ebrahimi  }
55*22dc650dSSadaf Ebrahimi
56*22dc650dSSadaf Ebrahimi# Initial output to STDOUT
57*22dc650dSSadaf Ebrahimi
58*22dc650dSSadaf Ebrahimiprint <<End ;
59*22dc650dSSadaf Ebrahimi<html>
60*22dc650dSSadaf Ebrahimi<head>
61*22dc650dSSadaf Ebrahimi<title>$ARGV[0] specification</title>
62*22dc650dSSadaf Ebrahimi</head>
63*22dc650dSSadaf Ebrahimi<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
64*22dc650dSSadaf Ebrahimi<h1>$ARGV[0] man page</h1>
65*22dc650dSSadaf Ebrahimi<p>
66*22dc650dSSadaf EbrahimiReturn to the <a href="index.html">PCRE2 index page</a>.
67*22dc650dSSadaf Ebrahimi</p>
68*22dc650dSSadaf Ebrahimi<p>
69*22dc650dSSadaf EbrahimiThis page is part of the PCRE2 HTML documentation. It was generated
70*22dc650dSSadaf Ebrahimiautomatically from the original man page. If there is any nonsense in it,
71*22dc650dSSadaf Ebrahimiplease consult the man page, in case the conversion went wrong.
72*22dc650dSSadaf Ebrahimi<br>
73*22dc650dSSadaf EbrahimiEnd
74*22dc650dSSadaf Ebrahimi
75*22dc650dSSadaf Ebrahimiprint "<ul>\n" if ($toc);
76*22dc650dSSadaf Ebrahimi
77*22dc650dSSadaf Ebrahimiopen(TEMP, ">/tmp/$$") || die "Can't open /tmp/$$ for output\n";
78*22dc650dSSadaf Ebrahimi
79*22dc650dSSadaf Ebrahimiwhile (<STDIN>)
80*22dc650dSSadaf Ebrahimi  {
81*22dc650dSSadaf Ebrahimi  # Handle lines beginning with a dot
82*22dc650dSSadaf Ebrahimi
83*22dc650dSSadaf Ebrahimi  if (/^\./)
84*22dc650dSSadaf Ebrahimi    {
85*22dc650dSSadaf Ebrahimi    # Some of the PCRE2 man pages used to contain instances of .br. However,
86*22dc650dSSadaf Ebrahimi    # they should have all been removed because they cause trouble in some
87*22dc650dSSadaf Ebrahimi    # (other) automated systems that translate man pages to HTML. Complain if
88*22dc650dSSadaf Ebrahimi    # we find .br or .in (another macro that is deprecated).
89*22dc650dSSadaf Ebrahimi
90*22dc650dSSadaf Ebrahimi    if (/^\.br/ || /^\.in/)
91*22dc650dSSadaf Ebrahimi      {
92*22dc650dSSadaf Ebrahimi      print STDERR "\n*** Deprecated macro encountered - rewrite needed\n";
93*22dc650dSSadaf Ebrahimi      print STDERR "*** $_\n";
94*22dc650dSSadaf Ebrahimi      die "*** Processing abandoned\n";
95*22dc650dSSadaf Ebrahimi      }
96*22dc650dSSadaf Ebrahimi
97*22dc650dSSadaf Ebrahimi    # Instead of .br, relevant "literal" sections are enclosed in .nf/.fi.
98*22dc650dSSadaf Ebrahimi
99*22dc650dSSadaf Ebrahimi    elsif (/^\.nf/)
100*22dc650dSSadaf Ebrahimi      {
101*22dc650dSSadaf Ebrahimi      $innf = 1;
102*22dc650dSSadaf Ebrahimi      }
103*22dc650dSSadaf Ebrahimi
104*22dc650dSSadaf Ebrahimi    elsif (/^\.fi/)
105*22dc650dSSadaf Ebrahimi      {
106*22dc650dSSadaf Ebrahimi      $innf = 0;
107*22dc650dSSadaf Ebrahimi      }
108*22dc650dSSadaf Ebrahimi
109*22dc650dSSadaf Ebrahimi    # Handling .sp is subtle. If it is inside a literal section, do nothing if
110*22dc650dSSadaf Ebrahimi    # the next line is a non literal text line; similarly, if not inside a
111*22dc650dSSadaf Ebrahimi    # literal section, do nothing if a literal follows, unless we are inside
112*22dc650dSSadaf Ebrahimi    # a .nf/.fi section or about to enter one. The point being that the <pre>
113*22dc650dSSadaf Ebrahimi    # and </pre> that delimit literal sections will do the spacing. Always skip
114*22dc650dSSadaf Ebrahimi    # if no previous output.
115*22dc650dSSadaf Ebrahimi
116*22dc650dSSadaf Ebrahimi    elsif (/^\.sp/)
117*22dc650dSSadaf Ebrahimi      {
118*22dc650dSSadaf Ebrahimi      if ($wrotetext)
119*22dc650dSSadaf Ebrahimi        {
120*22dc650dSSadaf Ebrahimi        $_ = <STDIN>;
121*22dc650dSSadaf Ebrahimi        if ($inpre)
122*22dc650dSSadaf Ebrahimi          {
123*22dc650dSSadaf Ebrahimi          print TEMP "\n" if (/^[\s.]/);
124*22dc650dSSadaf Ebrahimi          }
125*22dc650dSSadaf Ebrahimi        else
126*22dc650dSSadaf Ebrahimi          {
127*22dc650dSSadaf Ebrahimi          print TEMP "<br>\n<br>\n" if ($innf || /^\.nf/ || !/^[\s.]/);
128*22dc650dSSadaf Ebrahimi          }
129*22dc650dSSadaf Ebrahimi        redo;    # Now process the lookahead line we just read
130*22dc650dSSadaf Ebrahimi        }
131*22dc650dSSadaf Ebrahimi      }
132*22dc650dSSadaf Ebrahimi    elsif (/^\.TP/ || /^\.PP/ || /^\.P/)
133*22dc650dSSadaf Ebrahimi      {
134*22dc650dSSadaf Ebrahimi      &new_para();
135*22dc650dSSadaf Ebrahimi      }
136*22dc650dSSadaf Ebrahimi    elsif (/^\.SH\s*("?)(.*)\1/)
137*22dc650dSSadaf Ebrahimi      {
138*22dc650dSSadaf Ebrahimi      # Ignore the NAME section
139*22dc650dSSadaf Ebrahimi      if ($2 =~ /^NAME\b/)
140*22dc650dSSadaf Ebrahimi        {
141*22dc650dSSadaf Ebrahimi        <STDIN>;
142*22dc650dSSadaf Ebrahimi        next;
143*22dc650dSSadaf Ebrahimi        }
144*22dc650dSSadaf Ebrahimi
145*22dc650dSSadaf Ebrahimi      &end_para();
146*22dc650dSSadaf Ebrahimi      my($title) = &do_line($2);
147*22dc650dSSadaf Ebrahimi      if ($toc)
148*22dc650dSSadaf Ebrahimi        {
149*22dc650dSSadaf Ebrahimi        printf("<li><a name=\"TOC%d\" href=\"#SEC%d\">$title</a>\n",
150*22dc650dSSadaf Ebrahimi          $ref, $ref);
151*22dc650dSSadaf Ebrahimi        printf TEMP ("<br><a name=\"SEC%d\" href=\"#TOC1\">$title</a><br>\n",
152*22dc650dSSadaf Ebrahimi          $ref);
153*22dc650dSSadaf Ebrahimi        $ref++;
154*22dc650dSSadaf Ebrahimi        }
155*22dc650dSSadaf Ebrahimi      else
156*22dc650dSSadaf Ebrahimi        {
157*22dc650dSSadaf Ebrahimi        print TEMP "<br><b>\n$title\n</b><br>\n";
158*22dc650dSSadaf Ebrahimi        }
159*22dc650dSSadaf Ebrahimi      }
160*22dc650dSSadaf Ebrahimi    elsif (/^\.SS\s*("?)(.*)\1/)
161*22dc650dSSadaf Ebrahimi      {
162*22dc650dSSadaf Ebrahimi      &end_para();
163*22dc650dSSadaf Ebrahimi      my($title) = &do_line($2);
164*22dc650dSSadaf Ebrahimi      print TEMP "<br><b>\n$title\n</b><br>\n";
165*22dc650dSSadaf Ebrahimi      }
166*22dc650dSSadaf Ebrahimi    elsif (/^\.B\s*(.*)/)
167*22dc650dSSadaf Ebrahimi      {
168*22dc650dSSadaf Ebrahimi      &new_para() if (!$inpara);
169*22dc650dSSadaf Ebrahimi      $_ = &do_line($1);
170*22dc650dSSadaf Ebrahimi      s/"(.*?)"/$1/g;
171*22dc650dSSadaf Ebrahimi      print TEMP "<b>$_</b>\n";
172*22dc650dSSadaf Ebrahimi      $wrotetext = 1;
173*22dc650dSSadaf Ebrahimi      }
174*22dc650dSSadaf Ebrahimi    elsif (/^\.I\s*(.*)/)
175*22dc650dSSadaf Ebrahimi      {
176*22dc650dSSadaf Ebrahimi      &new_para() if (!$inpara);
177*22dc650dSSadaf Ebrahimi      $_ = &do_line($1);
178*22dc650dSSadaf Ebrahimi      s/"(.*?)"/$1/g;
179*22dc650dSSadaf Ebrahimi      print TEMP "<i>$_</i>\n";
180*22dc650dSSadaf Ebrahimi      $wrotetext = 1;
181*22dc650dSSadaf Ebrahimi      }
182*22dc650dSSadaf Ebrahimi
183*22dc650dSSadaf Ebrahimi    # Remove the "AUTOMATICALLY GENERATED" warning from pcre2demo.3
184*22dc650dSSadaf Ebrahimi    elsif (/^\.\\"AUTOMATICALLY GENERATED/) { next; }
185*22dc650dSSadaf Ebrahimi
186*22dc650dSSadaf Ebrahimi    # A comment that starts "HREF" takes the next line as a name that
187*22dc650dSSadaf Ebrahimi    # is turned into a hyperlink, using the text given, which might be
188*22dc650dSSadaf Ebrahimi    # in a special font. If it ends in () or (digits) or punctuation, they
189*22dc650dSSadaf Ebrahimi    # aren't part of the link.
190*22dc650dSSadaf Ebrahimi
191*22dc650dSSadaf Ebrahimi    elsif (/^\.\\"\s*HREF/)
192*22dc650dSSadaf Ebrahimi      {
193*22dc650dSSadaf Ebrahimi      $_=<STDIN>;
194*22dc650dSSadaf Ebrahimi      chomp;
195*22dc650dSSadaf Ebrahimi      $_ = &do_line($_);
196*22dc650dSSadaf Ebrahimi      $_ =~ s/\s+$//;
197*22dc650dSSadaf Ebrahimi      $_ =~ /^(?:<.>)?([^<(]+)(?:\(\))?(?:<\/.>)?(?:\(\d+\))?[.,;:]?$/;
198*22dc650dSSadaf Ebrahimi      print TEMP "<a href=\"$1.html\">$_</a>\n";
199*22dc650dSSadaf Ebrahimi      }
200*22dc650dSSadaf Ebrahimi
201*22dc650dSSadaf Ebrahimi    # A comment that starts "HTML" inserts literal HTML
202*22dc650dSSadaf Ebrahimi
203*22dc650dSSadaf Ebrahimi    elsif (/^\.\\"\s*HTML\s*(.*)/)
204*22dc650dSSadaf Ebrahimi      {
205*22dc650dSSadaf Ebrahimi      print TEMP $1;
206*22dc650dSSadaf Ebrahimi      }
207*22dc650dSSadaf Ebrahimi
208*22dc650dSSadaf Ebrahimi    # A comment that starts < inserts that HTML at the end of the
209*22dc650dSSadaf Ebrahimi    # *next* input line - so as not to get a newline between them.
210*22dc650dSSadaf Ebrahimi
211*22dc650dSSadaf Ebrahimi    elsif (/^\.\\"\s*(<.*>)/)
212*22dc650dSSadaf Ebrahimi      {
213*22dc650dSSadaf Ebrahimi      my($markup) = $1;
214*22dc650dSSadaf Ebrahimi      $_=<STDIN>;
215*22dc650dSSadaf Ebrahimi      chomp;
216*22dc650dSSadaf Ebrahimi      $_ = &do_line($_);
217*22dc650dSSadaf Ebrahimi      $_ =~ s/\s+$//;
218*22dc650dSSadaf Ebrahimi      print TEMP "$_$markup\n";
219*22dc650dSSadaf Ebrahimi      }
220*22dc650dSSadaf Ebrahimi
221*22dc650dSSadaf Ebrahimi    # A comment that starts JOIN joins the next two lines together, with one
222*22dc650dSSadaf Ebrahimi    # space between them. Then that line is processed. This is used in some
223*22dc650dSSadaf Ebrahimi    # displays where two lines are needed for the "man" version. JOINSH works
224*22dc650dSSadaf Ebrahimi    # the same, except that it assumes this is a shell command, so removes
225*22dc650dSSadaf Ebrahimi    # continuation backslashes.
226*22dc650dSSadaf Ebrahimi
227*22dc650dSSadaf Ebrahimi    elsif (/^\.\\"\s*JOIN(SH)?/)
228*22dc650dSSadaf Ebrahimi      {
229*22dc650dSSadaf Ebrahimi      my($one,$two);
230*22dc650dSSadaf Ebrahimi      $one = <STDIN>;
231*22dc650dSSadaf Ebrahimi      $two = <STDIN>;
232*22dc650dSSadaf Ebrahimi      $one =~ s/\s*\\e\s*$// if (defined($1));
233*22dc650dSSadaf Ebrahimi      chomp($one);
234*22dc650dSSadaf Ebrahimi      $two =~ s/^\s+//;
235*22dc650dSSadaf Ebrahimi      $_ = "$one $two";
236*22dc650dSSadaf Ebrahimi      redo;            # Process the joined lines
237*22dc650dSSadaf Ebrahimi      }
238*22dc650dSSadaf Ebrahimi
239*22dc650dSSadaf Ebrahimi    # .EX/.EE are used in the pcre2demo page to bracket the entire program,
240*22dc650dSSadaf Ebrahimi    # which is unmodified except for turning backslash into "\e".
241*22dc650dSSadaf Ebrahimi
242*22dc650dSSadaf Ebrahimi    elsif (/^\.EX\s*$/)
243*22dc650dSSadaf Ebrahimi      {
244*22dc650dSSadaf Ebrahimi      print TEMP "<PRE>\n";
245*22dc650dSSadaf Ebrahimi      while (<STDIN>)
246*22dc650dSSadaf Ebrahimi        {
247*22dc650dSSadaf Ebrahimi        last if /^\.EE\s*$/;
248*22dc650dSSadaf Ebrahimi        s/\\e/\\/g;
249*22dc650dSSadaf Ebrahimi        s/&/&amp;/g;
250*22dc650dSSadaf Ebrahimi        s/</&lt;/g;
251*22dc650dSSadaf Ebrahimi        s/>/&gt;/g;
252*22dc650dSSadaf Ebrahimi        print TEMP;
253*22dc650dSSadaf Ebrahimi        }
254*22dc650dSSadaf Ebrahimi      }
255*22dc650dSSadaf Ebrahimi
256*22dc650dSSadaf Ebrahimi    # Ignore anything not recognized
257*22dc650dSSadaf Ebrahimi
258*22dc650dSSadaf Ebrahimi    next;
259*22dc650dSSadaf Ebrahimi    }
260*22dc650dSSadaf Ebrahimi
261*22dc650dSSadaf Ebrahimi  # Line does not begin with a dot. Replace blank lines with new paragraphs
262*22dc650dSSadaf Ebrahimi
263*22dc650dSSadaf Ebrahimi  if (/^\s*$/)
264*22dc650dSSadaf Ebrahimi    {
265*22dc650dSSadaf Ebrahimi    &end_para() if ($wrotetext);
266*22dc650dSSadaf Ebrahimi    next;
267*22dc650dSSadaf Ebrahimi    }
268*22dc650dSSadaf Ebrahimi
269*22dc650dSSadaf Ebrahimi  # Convert fonts changes and output an ordinary line. Ensure that indented
270*22dc650dSSadaf Ebrahimi  # lines are marked as literal.
271*22dc650dSSadaf Ebrahimi
272*22dc650dSSadaf Ebrahimi  $_ = &do_line($_);
273*22dc650dSSadaf Ebrahimi  &new_para() if (!$inpara);
274*22dc650dSSadaf Ebrahimi
275*22dc650dSSadaf Ebrahimi  if (/^\s/)
276*22dc650dSSadaf Ebrahimi    {
277*22dc650dSSadaf Ebrahimi    if (!$inpre)
278*22dc650dSSadaf Ebrahimi      {
279*22dc650dSSadaf Ebrahimi      print TEMP "<pre>\n";
280*22dc650dSSadaf Ebrahimi      $inpre = 1;
281*22dc650dSSadaf Ebrahimi      }
282*22dc650dSSadaf Ebrahimi    }
283*22dc650dSSadaf Ebrahimi  elsif ($inpre)
284*22dc650dSSadaf Ebrahimi    {
285*22dc650dSSadaf Ebrahimi    print TEMP "</pre>\n";
286*22dc650dSSadaf Ebrahimi    $inpre = 0;
287*22dc650dSSadaf Ebrahimi    }
288*22dc650dSSadaf Ebrahimi
289*22dc650dSSadaf Ebrahimi  # Add <br> to the end of a non-literal line if we are within .nf/.fi
290*22dc650dSSadaf Ebrahimi
291*22dc650dSSadaf Ebrahimi  $_ .= "<br>\n" if (!$inpre && $innf);
292*22dc650dSSadaf Ebrahimi
293*22dc650dSSadaf Ebrahimi  print TEMP;
294*22dc650dSSadaf Ebrahimi  $wrotetext = 1;
295*22dc650dSSadaf Ebrahimi  }
296*22dc650dSSadaf Ebrahimi
297*22dc650dSSadaf Ebrahimi# The TOC, if present, will have been written - terminate it
298*22dc650dSSadaf Ebrahimi
299*22dc650dSSadaf Ebrahimiprint "</ul>\n" if ($toc);
300*22dc650dSSadaf Ebrahimi
301*22dc650dSSadaf Ebrahimi# Copy the remainder to the standard output
302*22dc650dSSadaf Ebrahimi
303*22dc650dSSadaf Ebrahimiclose(TEMP);
304*22dc650dSSadaf Ebrahimiopen(TEMP, "/tmp/$$") || die "Can't open /tmp/$$ for input\n";
305*22dc650dSSadaf Ebrahimi
306*22dc650dSSadaf Ebrahimiprint while (<TEMP>);
307*22dc650dSSadaf Ebrahimi
308*22dc650dSSadaf Ebrahimiprint <<End ;
309*22dc650dSSadaf Ebrahimi<p>
310*22dc650dSSadaf EbrahimiReturn to the <a href="index.html">PCRE2 index page</a>.
311*22dc650dSSadaf Ebrahimi</p>
312*22dc650dSSadaf EbrahimiEnd
313*22dc650dSSadaf Ebrahimi
314*22dc650dSSadaf Ebrahimiclose(TEMP);
315*22dc650dSSadaf Ebrahimiunlink("/tmp/$$");
316*22dc650dSSadaf Ebrahimi
317*22dc650dSSadaf Ebrahimi# End
318