xref: /aosp_15_r20/external/pcre/132html (revision 22dc650d8ae982c6770746019a6f94af92b0f024)
1#! /usr/bin/perl -w
2
3# Script to turn PCRE2 man pages into HTML
4
5
6# Subroutine to handle font changes and other escapes
7
8sub do_line {
9my($s) = $_[0];
10
11$s =~ s/</&#60;/g;                   # Deal with < and >
12$s =~ s/>/&#62;/g;
13$s =~ s"\\fI(.*?)\\f[RP]"<i>$1</i>"g;
14$s =~ s"\\fB(.*?)\\f[RP]"<b>$1</b>"g;
15$s =~ s"\\e"\\"g;
16$s =~ s/(?<=Copyright )\(c\)/&copy;/g;
17$s;
18}
19
20# Subroutine to ensure not in a paragraph
21
22sub end_para {
23if ($inpara)
24  {
25  print TEMP "</PRE>\n" if ($inpre);
26  print TEMP "</P>\n";
27  }
28$inpara = $inpre = 0;
29$wrotetext = 0;
30}
31
32# Subroutine to start a new paragraph
33
34sub new_para {
35&end_para();
36print TEMP "<P>\n";
37$inpara = 1;
38}
39
40
41# Main program
42
43$innf = 0;
44$inpara = 0;
45$inpre = 0;
46$wrotetext = 0;
47$toc = 0;
48$ref = 1;
49
50while ($#ARGV >= 0 && $ARGV[0] =~ /^-/)
51  {
52  $toc = 1 if $ARGV[0] eq "-toc";
53  shift;
54  }
55
56# Initial output to STDOUT
57
58print <<End ;
59<html>
60<head>
61<title>$ARGV[0] specification</title>
62</head>
63<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
64<h1>$ARGV[0] man page</h1>
65<p>
66Return to the <a href="index.html">PCRE2 index page</a>.
67</p>
68<p>
69This page is part of the PCRE2 HTML documentation. It was generated
70automatically from the original man page. If there is any nonsense in it,
71please consult the man page, in case the conversion went wrong.
72<br>
73End
74
75print "<ul>\n" if ($toc);
76
77open(TEMP, ">/tmp/$$") || die "Can't open /tmp/$$ for output\n";
78
79while (<STDIN>)
80  {
81  # Handle lines beginning with a dot
82
83  if (/^\./)
84    {
85    # Some of the PCRE2 man pages used to contain instances of .br. However,
86    # they should have all been removed because they cause trouble in some
87    # (other) automated systems that translate man pages to HTML. Complain if
88    # we find .br or .in (another macro that is deprecated).
89
90    if (/^\.br/ || /^\.in/)
91      {
92      print STDERR "\n*** Deprecated macro encountered - rewrite needed\n";
93      print STDERR "*** $_\n";
94      die "*** Processing abandoned\n";
95      }
96
97    # Instead of .br, relevant "literal" sections are enclosed in .nf/.fi.
98
99    elsif (/^\.nf/)
100      {
101      $innf = 1;
102      }
103
104    elsif (/^\.fi/)
105      {
106      $innf = 0;
107      }
108
109    # Handling .sp is subtle. If it is inside a literal section, do nothing if
110    # the next line is a non literal text line; similarly, if not inside a
111    # literal section, do nothing if a literal follows, unless we are inside
112    # a .nf/.fi section or about to enter one. The point being that the <pre>
113    # and </pre> that delimit literal sections will do the spacing. Always skip
114    # if no previous output.
115
116    elsif (/^\.sp/)
117      {
118      if ($wrotetext)
119        {
120        $_ = <STDIN>;
121        if ($inpre)
122          {
123          print TEMP "\n" if (/^[\s.]/);
124          }
125        else
126          {
127          print TEMP "<br>\n<br>\n" if ($innf || /^\.nf/ || !/^[\s.]/);
128          }
129        redo;    # Now process the lookahead line we just read
130        }
131      }
132    elsif (/^\.TP/ || /^\.PP/ || /^\.P/)
133      {
134      &new_para();
135      }
136    elsif (/^\.SH\s*("?)(.*)\1/)
137      {
138      # Ignore the NAME section
139      if ($2 =~ /^NAME\b/)
140        {
141        <STDIN>;
142        next;
143        }
144
145      &end_para();
146      my($title) = &do_line($2);
147      if ($toc)
148        {
149        printf("<li><a name=\"TOC%d\" href=\"#SEC%d\">$title</a>\n",
150          $ref, $ref);
151        printf TEMP ("<br><a name=\"SEC%d\" href=\"#TOC1\">$title</a><br>\n",
152          $ref);
153        $ref++;
154        }
155      else
156        {
157        print TEMP "<br><b>\n$title\n</b><br>\n";
158        }
159      }
160    elsif (/^\.SS\s*("?)(.*)\1/)
161      {
162      &end_para();
163      my($title) = &do_line($2);
164      print TEMP "<br><b>\n$title\n</b><br>\n";
165      }
166    elsif (/^\.B\s*(.*)/)
167      {
168      &new_para() if (!$inpara);
169      $_ = &do_line($1);
170      s/"(.*?)"/$1/g;
171      print TEMP "<b>$_</b>\n";
172      $wrotetext = 1;
173      }
174    elsif (/^\.I\s*(.*)/)
175      {
176      &new_para() if (!$inpara);
177      $_ = &do_line($1);
178      s/"(.*?)"/$1/g;
179      print TEMP "<i>$_</i>\n";
180      $wrotetext = 1;
181      }
182
183    # Remove the "AUTOMATICALLY GENERATED" warning from pcre2demo.3
184    elsif (/^\.\\"AUTOMATICALLY GENERATED/) { next; }
185
186    # A comment that starts "HREF" takes the next line as a name that
187    # is turned into a hyperlink, using the text given, which might be
188    # in a special font. If it ends in () or (digits) or punctuation, they
189    # aren't part of the link.
190
191    elsif (/^\.\\"\s*HREF/)
192      {
193      $_=<STDIN>;
194      chomp;
195      $_ = &do_line($_);
196      $_ =~ s/\s+$//;
197      $_ =~ /^(?:<.>)?([^<(]+)(?:\(\))?(?:<\/.>)?(?:\(\d+\))?[.,;:]?$/;
198      print TEMP "<a href=\"$1.html\">$_</a>\n";
199      }
200
201    # A comment that starts "HTML" inserts literal HTML
202
203    elsif (/^\.\\"\s*HTML\s*(.*)/)
204      {
205      print TEMP $1;
206      }
207
208    # A comment that starts < inserts that HTML at the end of the
209    # *next* input line - so as not to get a newline between them.
210
211    elsif (/^\.\\"\s*(<.*>)/)
212      {
213      my($markup) = $1;
214      $_=<STDIN>;
215      chomp;
216      $_ = &do_line($_);
217      $_ =~ s/\s+$//;
218      print TEMP "$_$markup\n";
219      }
220
221    # A comment that starts JOIN joins the next two lines together, with one
222    # space between them. Then that line is processed. This is used in some
223    # displays where two lines are needed for the "man" version. JOINSH works
224    # the same, except that it assumes this is a shell command, so removes
225    # continuation backslashes.
226
227    elsif (/^\.\\"\s*JOIN(SH)?/)
228      {
229      my($one,$two);
230      $one = <STDIN>;
231      $two = <STDIN>;
232      $one =~ s/\s*\\e\s*$// if (defined($1));
233      chomp($one);
234      $two =~ s/^\s+//;
235      $_ = "$one $two";
236      redo;            # Process the joined lines
237      }
238
239    # .EX/.EE are used in the pcre2demo page to bracket the entire program,
240    # which is unmodified except for turning backslash into "\e".
241
242    elsif (/^\.EX\s*$/)
243      {
244      print TEMP "<PRE>\n";
245      while (<STDIN>)
246        {
247        last if /^\.EE\s*$/;
248        s/\\e/\\/g;
249        s/&/&amp;/g;
250        s/</&lt;/g;
251        s/>/&gt;/g;
252        print TEMP;
253        }
254      }
255
256    # Ignore anything not recognized
257
258    next;
259    }
260
261  # Line does not begin with a dot. Replace blank lines with new paragraphs
262
263  if (/^\s*$/)
264    {
265    &end_para() if ($wrotetext);
266    next;
267    }
268
269  # Convert fonts changes and output an ordinary line. Ensure that indented
270  # lines are marked as literal.
271
272  $_ = &do_line($_);
273  &new_para() if (!$inpara);
274
275  if (/^\s/)
276    {
277    if (!$inpre)
278      {
279      print TEMP "<pre>\n";
280      $inpre = 1;
281      }
282    }
283  elsif ($inpre)
284    {
285    print TEMP "</pre>\n";
286    $inpre = 0;
287    }
288
289  # Add <br> to the end of a non-literal line if we are within .nf/.fi
290
291  $_ .= "<br>\n" if (!$inpre && $innf);
292
293  print TEMP;
294  $wrotetext = 1;
295  }
296
297# The TOC, if present, will have been written - terminate it
298
299print "</ul>\n" if ($toc);
300
301# Copy the remainder to the standard output
302
303close(TEMP);
304open(TEMP, "/tmp/$$") || die "Can't open /tmp/$$ for input\n";
305
306print while (<TEMP>);
307
308print <<End ;
309<p>
310Return to the <a href="index.html">PCRE2 index page</a>.
311</p>
312End
313
314close(TEMP);
315unlink("/tmp/$$");
316
317# End
318