1*22dc650dSSadaf Ebrahimi#! /usr/bin/perl -w 2*22dc650dSSadaf Ebrahimi 3*22dc650dSSadaf Ebrahimi# Script to turn PCRE2 man pages into HTML 4*22dc650dSSadaf Ebrahimi 5*22dc650dSSadaf Ebrahimi 6*22dc650dSSadaf Ebrahimi# Subroutine to handle font changes and other escapes 7*22dc650dSSadaf Ebrahimi 8*22dc650dSSadaf Ebrahimisub do_line { 9*22dc650dSSadaf Ebrahimimy($s) = $_[0]; 10*22dc650dSSadaf Ebrahimi 11*22dc650dSSadaf Ebrahimi$s =~ s/</</g; # Deal with < and > 12*22dc650dSSadaf Ebrahimi$s =~ s/>/>/g; 13*22dc650dSSadaf Ebrahimi$s =~ s"\\fI(.*?)\\f[RP]"<i>$1</i>"g; 14*22dc650dSSadaf Ebrahimi$s =~ s"\\fB(.*?)\\f[RP]"<b>$1</b>"g; 15*22dc650dSSadaf Ebrahimi$s =~ s"\\e"\\"g; 16*22dc650dSSadaf Ebrahimi$s =~ s/(?<=Copyright )\(c\)/©/g; 17*22dc650dSSadaf Ebrahimi$s; 18*22dc650dSSadaf Ebrahimi} 19*22dc650dSSadaf Ebrahimi 20*22dc650dSSadaf Ebrahimi# Subroutine to ensure not in a paragraph 21*22dc650dSSadaf Ebrahimi 22*22dc650dSSadaf Ebrahimisub end_para { 23*22dc650dSSadaf Ebrahimiif ($inpara) 24*22dc650dSSadaf Ebrahimi { 25*22dc650dSSadaf Ebrahimi print TEMP "</PRE>\n" if ($inpre); 26*22dc650dSSadaf Ebrahimi print TEMP "</P>\n"; 27*22dc650dSSadaf Ebrahimi } 28*22dc650dSSadaf Ebrahimi$inpara = $inpre = 0; 29*22dc650dSSadaf Ebrahimi$wrotetext = 0; 30*22dc650dSSadaf Ebrahimi} 31*22dc650dSSadaf Ebrahimi 32*22dc650dSSadaf Ebrahimi# Subroutine to start a new paragraph 33*22dc650dSSadaf Ebrahimi 34*22dc650dSSadaf Ebrahimisub new_para { 35*22dc650dSSadaf Ebrahimi&end_para(); 36*22dc650dSSadaf Ebrahimiprint TEMP "<P>\n"; 37*22dc650dSSadaf Ebrahimi$inpara = 1; 38*22dc650dSSadaf Ebrahimi} 39*22dc650dSSadaf Ebrahimi 40*22dc650dSSadaf Ebrahimi 41*22dc650dSSadaf Ebrahimi# Main program 42*22dc650dSSadaf Ebrahimi 43*22dc650dSSadaf Ebrahimi$innf = 0; 44*22dc650dSSadaf Ebrahimi$inpara = 0; 45*22dc650dSSadaf Ebrahimi$inpre = 0; 46*22dc650dSSadaf Ebrahimi$wrotetext = 0; 47*22dc650dSSadaf Ebrahimi$toc = 0; 48*22dc650dSSadaf Ebrahimi$ref = 1; 49*22dc650dSSadaf Ebrahimi 50*22dc650dSSadaf Ebrahimiwhile ($#ARGV >= 0 && $ARGV[0] =~ /^-/) 51*22dc650dSSadaf Ebrahimi { 52*22dc650dSSadaf Ebrahimi $toc = 1 if $ARGV[0] eq "-toc"; 53*22dc650dSSadaf Ebrahimi shift; 54*22dc650dSSadaf Ebrahimi } 55*22dc650dSSadaf Ebrahimi 56*22dc650dSSadaf Ebrahimi# Initial output to STDOUT 57*22dc650dSSadaf Ebrahimi 58*22dc650dSSadaf Ebrahimiprint <<End ; 59*22dc650dSSadaf Ebrahimi<html> 60*22dc650dSSadaf Ebrahimi<head> 61*22dc650dSSadaf Ebrahimi<title>$ARGV[0] specification</title> 62*22dc650dSSadaf Ebrahimi</head> 63*22dc650dSSadaf Ebrahimi<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB"> 64*22dc650dSSadaf Ebrahimi<h1>$ARGV[0] man page</h1> 65*22dc650dSSadaf Ebrahimi<p> 66*22dc650dSSadaf EbrahimiReturn to the <a href="index.html">PCRE2 index page</a>. 67*22dc650dSSadaf Ebrahimi</p> 68*22dc650dSSadaf Ebrahimi<p> 69*22dc650dSSadaf EbrahimiThis page is part of the PCRE2 HTML documentation. It was generated 70*22dc650dSSadaf Ebrahimiautomatically from the original man page. If there is any nonsense in it, 71*22dc650dSSadaf Ebrahimiplease consult the man page, in case the conversion went wrong. 72*22dc650dSSadaf Ebrahimi<br> 73*22dc650dSSadaf EbrahimiEnd 74*22dc650dSSadaf Ebrahimi 75*22dc650dSSadaf Ebrahimiprint "<ul>\n" if ($toc); 76*22dc650dSSadaf Ebrahimi 77*22dc650dSSadaf Ebrahimiopen(TEMP, ">/tmp/$$") || die "Can't open /tmp/$$ for output\n"; 78*22dc650dSSadaf Ebrahimi 79*22dc650dSSadaf Ebrahimiwhile (<STDIN>) 80*22dc650dSSadaf Ebrahimi { 81*22dc650dSSadaf Ebrahimi # Handle lines beginning with a dot 82*22dc650dSSadaf Ebrahimi 83*22dc650dSSadaf Ebrahimi if (/^\./) 84*22dc650dSSadaf Ebrahimi { 85*22dc650dSSadaf Ebrahimi # Some of the PCRE2 man pages used to contain instances of .br. However, 86*22dc650dSSadaf Ebrahimi # they should have all been removed because they cause trouble in some 87*22dc650dSSadaf Ebrahimi # (other) automated systems that translate man pages to HTML. Complain if 88*22dc650dSSadaf Ebrahimi # we find .br or .in (another macro that is deprecated). 89*22dc650dSSadaf Ebrahimi 90*22dc650dSSadaf Ebrahimi if (/^\.br/ || /^\.in/) 91*22dc650dSSadaf Ebrahimi { 92*22dc650dSSadaf Ebrahimi print STDERR "\n*** Deprecated macro encountered - rewrite needed\n"; 93*22dc650dSSadaf Ebrahimi print STDERR "*** $_\n"; 94*22dc650dSSadaf Ebrahimi die "*** Processing abandoned\n"; 95*22dc650dSSadaf Ebrahimi } 96*22dc650dSSadaf Ebrahimi 97*22dc650dSSadaf Ebrahimi # Instead of .br, relevant "literal" sections are enclosed in .nf/.fi. 98*22dc650dSSadaf Ebrahimi 99*22dc650dSSadaf Ebrahimi elsif (/^\.nf/) 100*22dc650dSSadaf Ebrahimi { 101*22dc650dSSadaf Ebrahimi $innf = 1; 102*22dc650dSSadaf Ebrahimi } 103*22dc650dSSadaf Ebrahimi 104*22dc650dSSadaf Ebrahimi elsif (/^\.fi/) 105*22dc650dSSadaf Ebrahimi { 106*22dc650dSSadaf Ebrahimi $innf = 0; 107*22dc650dSSadaf Ebrahimi } 108*22dc650dSSadaf Ebrahimi 109*22dc650dSSadaf Ebrahimi # Handling .sp is subtle. If it is inside a literal section, do nothing if 110*22dc650dSSadaf Ebrahimi # the next line is a non literal text line; similarly, if not inside a 111*22dc650dSSadaf Ebrahimi # literal section, do nothing if a literal follows, unless we are inside 112*22dc650dSSadaf Ebrahimi # a .nf/.fi section or about to enter one. The point being that the <pre> 113*22dc650dSSadaf Ebrahimi # and </pre> that delimit literal sections will do the spacing. Always skip 114*22dc650dSSadaf Ebrahimi # if no previous output. 115*22dc650dSSadaf Ebrahimi 116*22dc650dSSadaf Ebrahimi elsif (/^\.sp/) 117*22dc650dSSadaf Ebrahimi { 118*22dc650dSSadaf Ebrahimi if ($wrotetext) 119*22dc650dSSadaf Ebrahimi { 120*22dc650dSSadaf Ebrahimi $_ = <STDIN>; 121*22dc650dSSadaf Ebrahimi if ($inpre) 122*22dc650dSSadaf Ebrahimi { 123*22dc650dSSadaf Ebrahimi print TEMP "\n" if (/^[\s.]/); 124*22dc650dSSadaf Ebrahimi } 125*22dc650dSSadaf Ebrahimi else 126*22dc650dSSadaf Ebrahimi { 127*22dc650dSSadaf Ebrahimi print TEMP "<br>\n<br>\n" if ($innf || /^\.nf/ || !/^[\s.]/); 128*22dc650dSSadaf Ebrahimi } 129*22dc650dSSadaf Ebrahimi redo; # Now process the lookahead line we just read 130*22dc650dSSadaf Ebrahimi } 131*22dc650dSSadaf Ebrahimi } 132*22dc650dSSadaf Ebrahimi elsif (/^\.TP/ || /^\.PP/ || /^\.P/) 133*22dc650dSSadaf Ebrahimi { 134*22dc650dSSadaf Ebrahimi &new_para(); 135*22dc650dSSadaf Ebrahimi } 136*22dc650dSSadaf Ebrahimi elsif (/^\.SH\s*("?)(.*)\1/) 137*22dc650dSSadaf Ebrahimi { 138*22dc650dSSadaf Ebrahimi # Ignore the NAME section 139*22dc650dSSadaf Ebrahimi if ($2 =~ /^NAME\b/) 140*22dc650dSSadaf Ebrahimi { 141*22dc650dSSadaf Ebrahimi <STDIN>; 142*22dc650dSSadaf Ebrahimi next; 143*22dc650dSSadaf Ebrahimi } 144*22dc650dSSadaf Ebrahimi 145*22dc650dSSadaf Ebrahimi &end_para(); 146*22dc650dSSadaf Ebrahimi my($title) = &do_line($2); 147*22dc650dSSadaf Ebrahimi if ($toc) 148*22dc650dSSadaf Ebrahimi { 149*22dc650dSSadaf Ebrahimi printf("<li><a name=\"TOC%d\" href=\"#SEC%d\">$title</a>\n", 150*22dc650dSSadaf Ebrahimi $ref, $ref); 151*22dc650dSSadaf Ebrahimi printf TEMP ("<br><a name=\"SEC%d\" href=\"#TOC1\">$title</a><br>\n", 152*22dc650dSSadaf Ebrahimi $ref); 153*22dc650dSSadaf Ebrahimi $ref++; 154*22dc650dSSadaf Ebrahimi } 155*22dc650dSSadaf Ebrahimi else 156*22dc650dSSadaf Ebrahimi { 157*22dc650dSSadaf Ebrahimi print TEMP "<br><b>\n$title\n</b><br>\n"; 158*22dc650dSSadaf Ebrahimi } 159*22dc650dSSadaf Ebrahimi } 160*22dc650dSSadaf Ebrahimi elsif (/^\.SS\s*("?)(.*)\1/) 161*22dc650dSSadaf Ebrahimi { 162*22dc650dSSadaf Ebrahimi &end_para(); 163*22dc650dSSadaf Ebrahimi my($title) = &do_line($2); 164*22dc650dSSadaf Ebrahimi print TEMP "<br><b>\n$title\n</b><br>\n"; 165*22dc650dSSadaf Ebrahimi } 166*22dc650dSSadaf Ebrahimi elsif (/^\.B\s*(.*)/) 167*22dc650dSSadaf Ebrahimi { 168*22dc650dSSadaf Ebrahimi &new_para() if (!$inpara); 169*22dc650dSSadaf Ebrahimi $_ = &do_line($1); 170*22dc650dSSadaf Ebrahimi s/"(.*?)"/$1/g; 171*22dc650dSSadaf Ebrahimi print TEMP "<b>$_</b>\n"; 172*22dc650dSSadaf Ebrahimi $wrotetext = 1; 173*22dc650dSSadaf Ebrahimi } 174*22dc650dSSadaf Ebrahimi elsif (/^\.I\s*(.*)/) 175*22dc650dSSadaf Ebrahimi { 176*22dc650dSSadaf Ebrahimi &new_para() if (!$inpara); 177*22dc650dSSadaf Ebrahimi $_ = &do_line($1); 178*22dc650dSSadaf Ebrahimi s/"(.*?)"/$1/g; 179*22dc650dSSadaf Ebrahimi print TEMP "<i>$_</i>\n"; 180*22dc650dSSadaf Ebrahimi $wrotetext = 1; 181*22dc650dSSadaf Ebrahimi } 182*22dc650dSSadaf Ebrahimi 183*22dc650dSSadaf Ebrahimi # Remove the "AUTOMATICALLY GENERATED" warning from pcre2demo.3 184*22dc650dSSadaf Ebrahimi elsif (/^\.\\"AUTOMATICALLY GENERATED/) { next; } 185*22dc650dSSadaf Ebrahimi 186*22dc650dSSadaf Ebrahimi # A comment that starts "HREF" takes the next line as a name that 187*22dc650dSSadaf Ebrahimi # is turned into a hyperlink, using the text given, which might be 188*22dc650dSSadaf Ebrahimi # in a special font. If it ends in () or (digits) or punctuation, they 189*22dc650dSSadaf Ebrahimi # aren't part of the link. 190*22dc650dSSadaf Ebrahimi 191*22dc650dSSadaf Ebrahimi elsif (/^\.\\"\s*HREF/) 192*22dc650dSSadaf Ebrahimi { 193*22dc650dSSadaf Ebrahimi $_=<STDIN>; 194*22dc650dSSadaf Ebrahimi chomp; 195*22dc650dSSadaf Ebrahimi $_ = &do_line($_); 196*22dc650dSSadaf Ebrahimi $_ =~ s/\s+$//; 197*22dc650dSSadaf Ebrahimi $_ =~ /^(?:<.>)?([^<(]+)(?:\(\))?(?:<\/.>)?(?:\(\d+\))?[.,;:]?$/; 198*22dc650dSSadaf Ebrahimi print TEMP "<a href=\"$1.html\">$_</a>\n"; 199*22dc650dSSadaf Ebrahimi } 200*22dc650dSSadaf Ebrahimi 201*22dc650dSSadaf Ebrahimi # A comment that starts "HTML" inserts literal HTML 202*22dc650dSSadaf Ebrahimi 203*22dc650dSSadaf Ebrahimi elsif (/^\.\\"\s*HTML\s*(.*)/) 204*22dc650dSSadaf Ebrahimi { 205*22dc650dSSadaf Ebrahimi print TEMP $1; 206*22dc650dSSadaf Ebrahimi } 207*22dc650dSSadaf Ebrahimi 208*22dc650dSSadaf Ebrahimi # A comment that starts < inserts that HTML at the end of the 209*22dc650dSSadaf Ebrahimi # *next* input line - so as not to get a newline between them. 210*22dc650dSSadaf Ebrahimi 211*22dc650dSSadaf Ebrahimi elsif (/^\.\\"\s*(<.*>)/) 212*22dc650dSSadaf Ebrahimi { 213*22dc650dSSadaf Ebrahimi my($markup) = $1; 214*22dc650dSSadaf Ebrahimi $_=<STDIN>; 215*22dc650dSSadaf Ebrahimi chomp; 216*22dc650dSSadaf Ebrahimi $_ = &do_line($_); 217*22dc650dSSadaf Ebrahimi $_ =~ s/\s+$//; 218*22dc650dSSadaf Ebrahimi print TEMP "$_$markup\n"; 219*22dc650dSSadaf Ebrahimi } 220*22dc650dSSadaf Ebrahimi 221*22dc650dSSadaf Ebrahimi # A comment that starts JOIN joins the next two lines together, with one 222*22dc650dSSadaf Ebrahimi # space between them. Then that line is processed. This is used in some 223*22dc650dSSadaf Ebrahimi # displays where two lines are needed for the "man" version. JOINSH works 224*22dc650dSSadaf Ebrahimi # the same, except that it assumes this is a shell command, so removes 225*22dc650dSSadaf Ebrahimi # continuation backslashes. 226*22dc650dSSadaf Ebrahimi 227*22dc650dSSadaf Ebrahimi elsif (/^\.\\"\s*JOIN(SH)?/) 228*22dc650dSSadaf Ebrahimi { 229*22dc650dSSadaf Ebrahimi my($one,$two); 230*22dc650dSSadaf Ebrahimi $one = <STDIN>; 231*22dc650dSSadaf Ebrahimi $two = <STDIN>; 232*22dc650dSSadaf Ebrahimi $one =~ s/\s*\\e\s*$// if (defined($1)); 233*22dc650dSSadaf Ebrahimi chomp($one); 234*22dc650dSSadaf Ebrahimi $two =~ s/^\s+//; 235*22dc650dSSadaf Ebrahimi $_ = "$one $two"; 236*22dc650dSSadaf Ebrahimi redo; # Process the joined lines 237*22dc650dSSadaf Ebrahimi } 238*22dc650dSSadaf Ebrahimi 239*22dc650dSSadaf Ebrahimi # .EX/.EE are used in the pcre2demo page to bracket the entire program, 240*22dc650dSSadaf Ebrahimi # which is unmodified except for turning backslash into "\e". 241*22dc650dSSadaf Ebrahimi 242*22dc650dSSadaf Ebrahimi elsif (/^\.EX\s*$/) 243*22dc650dSSadaf Ebrahimi { 244*22dc650dSSadaf Ebrahimi print TEMP "<PRE>\n"; 245*22dc650dSSadaf Ebrahimi while (<STDIN>) 246*22dc650dSSadaf Ebrahimi { 247*22dc650dSSadaf Ebrahimi last if /^\.EE\s*$/; 248*22dc650dSSadaf Ebrahimi s/\\e/\\/g; 249*22dc650dSSadaf Ebrahimi s/&/&/g; 250*22dc650dSSadaf Ebrahimi s/</</g; 251*22dc650dSSadaf Ebrahimi s/>/>/g; 252*22dc650dSSadaf Ebrahimi print TEMP; 253*22dc650dSSadaf Ebrahimi } 254*22dc650dSSadaf Ebrahimi } 255*22dc650dSSadaf Ebrahimi 256*22dc650dSSadaf Ebrahimi # Ignore anything not recognized 257*22dc650dSSadaf Ebrahimi 258*22dc650dSSadaf Ebrahimi next; 259*22dc650dSSadaf Ebrahimi } 260*22dc650dSSadaf Ebrahimi 261*22dc650dSSadaf Ebrahimi # Line does not begin with a dot. Replace blank lines with new paragraphs 262*22dc650dSSadaf Ebrahimi 263*22dc650dSSadaf Ebrahimi if (/^\s*$/) 264*22dc650dSSadaf Ebrahimi { 265*22dc650dSSadaf Ebrahimi &end_para() if ($wrotetext); 266*22dc650dSSadaf Ebrahimi next; 267*22dc650dSSadaf Ebrahimi } 268*22dc650dSSadaf Ebrahimi 269*22dc650dSSadaf Ebrahimi # Convert fonts changes and output an ordinary line. Ensure that indented 270*22dc650dSSadaf Ebrahimi # lines are marked as literal. 271*22dc650dSSadaf Ebrahimi 272*22dc650dSSadaf Ebrahimi $_ = &do_line($_); 273*22dc650dSSadaf Ebrahimi &new_para() if (!$inpara); 274*22dc650dSSadaf Ebrahimi 275*22dc650dSSadaf Ebrahimi if (/^\s/) 276*22dc650dSSadaf Ebrahimi { 277*22dc650dSSadaf Ebrahimi if (!$inpre) 278*22dc650dSSadaf Ebrahimi { 279*22dc650dSSadaf Ebrahimi print TEMP "<pre>\n"; 280*22dc650dSSadaf Ebrahimi $inpre = 1; 281*22dc650dSSadaf Ebrahimi } 282*22dc650dSSadaf Ebrahimi } 283*22dc650dSSadaf Ebrahimi elsif ($inpre) 284*22dc650dSSadaf Ebrahimi { 285*22dc650dSSadaf Ebrahimi print TEMP "</pre>\n"; 286*22dc650dSSadaf Ebrahimi $inpre = 0; 287*22dc650dSSadaf Ebrahimi } 288*22dc650dSSadaf Ebrahimi 289*22dc650dSSadaf Ebrahimi # Add <br> to the end of a non-literal line if we are within .nf/.fi 290*22dc650dSSadaf Ebrahimi 291*22dc650dSSadaf Ebrahimi $_ .= "<br>\n" if (!$inpre && $innf); 292*22dc650dSSadaf Ebrahimi 293*22dc650dSSadaf Ebrahimi print TEMP; 294*22dc650dSSadaf Ebrahimi $wrotetext = 1; 295*22dc650dSSadaf Ebrahimi } 296*22dc650dSSadaf Ebrahimi 297*22dc650dSSadaf Ebrahimi# The TOC, if present, will have been written - terminate it 298*22dc650dSSadaf Ebrahimi 299*22dc650dSSadaf Ebrahimiprint "</ul>\n" if ($toc); 300*22dc650dSSadaf Ebrahimi 301*22dc650dSSadaf Ebrahimi# Copy the remainder to the standard output 302*22dc650dSSadaf Ebrahimi 303*22dc650dSSadaf Ebrahimiclose(TEMP); 304*22dc650dSSadaf Ebrahimiopen(TEMP, "/tmp/$$") || die "Can't open /tmp/$$ for input\n"; 305*22dc650dSSadaf Ebrahimi 306*22dc650dSSadaf Ebrahimiprint while (<TEMP>); 307*22dc650dSSadaf Ebrahimi 308*22dc650dSSadaf Ebrahimiprint <<End ; 309*22dc650dSSadaf Ebrahimi<p> 310*22dc650dSSadaf EbrahimiReturn to the <a href="index.html">PCRE2 index page</a>. 311*22dc650dSSadaf Ebrahimi</p> 312*22dc650dSSadaf EbrahimiEnd 313*22dc650dSSadaf Ebrahimi 314*22dc650dSSadaf Ebrahimiclose(TEMP); 315*22dc650dSSadaf Ebrahimiunlink("/tmp/$$"); 316*22dc650dSSadaf Ebrahimi 317*22dc650dSSadaf Ebrahimi# End 318