xref: /aosp_15_r20/external/pcre/CleanTxt (revision 22dc650d8ae982c6770746019a6f94af92b0f024)
1*22dc650dSSadaf Ebrahimi#! /usr/bin/perl -w
2*22dc650dSSadaf Ebrahimi
3*22dc650dSSadaf Ebrahimi# Script to take the output of nroff -man and remove all the backspacing and
4*22dc650dSSadaf Ebrahimi# the page footers and the screen commands etc so that it is more usefully
5*22dc650dSSadaf Ebrahimi# readable online. In fact, in the latest nroff, intermediate footers don't
6*22dc650dSSadaf Ebrahimi# seem to be generated any more.
7*22dc650dSSadaf Ebrahimi
8*22dc650dSSadaf Ebrahimi$blankcount = 0;
9*22dc650dSSadaf Ebrahimi$lastwascut = 0;
10*22dc650dSSadaf Ebrahimi$firstheader = 1;
11*22dc650dSSadaf Ebrahimi
12*22dc650dSSadaf Ebrahimi# Input on STDIN; output to STDOUT.
13*22dc650dSSadaf Ebrahimi
14*22dc650dSSadaf Ebrahimiwhile (<STDIN>)
15*22dc650dSSadaf Ebrahimi  {
16*22dc650dSSadaf Ebrahimi  s/\x1b\[\d+m//g;   # Remove screen controls "ESC [ number m"
17*22dc650dSSadaf Ebrahimi  s/.\x8//g;         # Remove "char, backspace"
18*22dc650dSSadaf Ebrahimi
19*22dc650dSSadaf Ebrahimi  # Handle header lines. Retain only the first one we encounter, but remove
20*22dc650dSSadaf Ebrahimi  # the blank line that follows. Any others (e.g. at end of document) and the
21*22dc650dSSadaf Ebrahimi  # following blank line are dropped.
22*22dc650dSSadaf Ebrahimi
23*22dc650dSSadaf Ebrahimi  if (/^PCRE(\w*)\(([13])\)\s+PCRE\1\(\2\)$/)
24*22dc650dSSadaf Ebrahimi    {
25*22dc650dSSadaf Ebrahimi    if ($firstheader)
26*22dc650dSSadaf Ebrahimi      {
27*22dc650dSSadaf Ebrahimi      $firstheader = 0;
28*22dc650dSSadaf Ebrahimi      print;
29*22dc650dSSadaf Ebrahimi      $lastprinted = $_;
30*22dc650dSSadaf Ebrahimi      $lastwascut = 0;
31*22dc650dSSadaf Ebrahimi      }
32*22dc650dSSadaf Ebrahimi    $_=<STDIN>;       # Remove a blank that follows
33*22dc650dSSadaf Ebrahimi    next;
34*22dc650dSSadaf Ebrahimi    }
35*22dc650dSSadaf Ebrahimi
36*22dc650dSSadaf Ebrahimi  # Count runs of empty lines
37*22dc650dSSadaf Ebrahimi
38*22dc650dSSadaf Ebrahimi  if (/^\s*$/)
39*22dc650dSSadaf Ebrahimi    {
40*22dc650dSSadaf Ebrahimi    $blankcount++;
41*22dc650dSSadaf Ebrahimi    $lastwascut = 0;
42*22dc650dSSadaf Ebrahimi    next;
43*22dc650dSSadaf Ebrahimi    }
44*22dc650dSSadaf Ebrahimi
45*22dc650dSSadaf Ebrahimi  # If a chunk of lines has been cut out (page footer) and the next line
46*22dc650dSSadaf Ebrahimi  # has a different indentation, put back one blank line.
47*22dc650dSSadaf Ebrahimi
48*22dc650dSSadaf Ebrahimi  if ($lastwascut && $blankcount < 1 && defined($lastprinted))
49*22dc650dSSadaf Ebrahimi    {
50*22dc650dSSadaf Ebrahimi    ($a) = $lastprinted =~ /^(\s*)/;
51*22dc650dSSadaf Ebrahimi    ($b) = $_ =~ /^(\s*)/;
52*22dc650dSSadaf Ebrahimi    $blankcount++ if ($a ne $b);
53*22dc650dSSadaf Ebrahimi    }
54*22dc650dSSadaf Ebrahimi
55*22dc650dSSadaf Ebrahimi  # We get here only when we have a non-blank line in hand. If it was preceded
56*22dc650dSSadaf Ebrahimi  # by 3 or more blank lines, read the next 3 lines and see if they are blank.
57*22dc650dSSadaf Ebrahimi  # If so, remove all 7 lines, and remember that we have just done a cut.
58*22dc650dSSadaf Ebrahimi
59*22dc650dSSadaf Ebrahimi  if ($blankcount >= 3)
60*22dc650dSSadaf Ebrahimi    {
61*22dc650dSSadaf Ebrahimi    for ($i = 0; $i < 3; $i++)
62*22dc650dSSadaf Ebrahimi      {
63*22dc650dSSadaf Ebrahimi      $next[$i] = <STDIN>;
64*22dc650dSSadaf Ebrahimi      $next[$i] = "" if !defined $next[$i];
65*22dc650dSSadaf Ebrahimi      $next[$i] =~ s/\x1b\[\d+m//g;   # Remove screen controls "ESC [ number m"
66*22dc650dSSadaf Ebrahimi      $next[$i] =~ s/.\x8//g;         # Remove "char, backspace"
67*22dc650dSSadaf Ebrahimi      }
68*22dc650dSSadaf Ebrahimi
69*22dc650dSSadaf Ebrahimi    # Cut out chunks of the form <3 blanks><non-blank><3 blanks>
70*22dc650dSSadaf Ebrahimi
71*22dc650dSSadaf Ebrahimi    if ($next[0] =~ /^\s*$/ &&
72*22dc650dSSadaf Ebrahimi        $next[1] =~ /^\s*$/ &&
73*22dc650dSSadaf Ebrahimi        $next[2] =~ /^\s*$/)
74*22dc650dSSadaf Ebrahimi      {
75*22dc650dSSadaf Ebrahimi      $blankcount -= 3;
76*22dc650dSSadaf Ebrahimi      $lastwascut = 1;
77*22dc650dSSadaf Ebrahimi      }
78*22dc650dSSadaf Ebrahimi
79*22dc650dSSadaf Ebrahimi    # Otherwise output the saved blanks, the current, and the next three
80*22dc650dSSadaf Ebrahimi    # lines. Remember the last printed line.
81*22dc650dSSadaf Ebrahimi
82*22dc650dSSadaf Ebrahimi    else
83*22dc650dSSadaf Ebrahimi      {
84*22dc650dSSadaf Ebrahimi      for ($i = 0; $i < $blankcount; $i++) { print "\n"; }
85*22dc650dSSadaf Ebrahimi      print;
86*22dc650dSSadaf Ebrahimi      for ($i = 0; $i < 3; $i++)
87*22dc650dSSadaf Ebrahimi        {
88*22dc650dSSadaf Ebrahimi        $next[$i] =~ s/.\x8//g;
89*22dc650dSSadaf Ebrahimi        print $next[$i];
90*22dc650dSSadaf Ebrahimi        $lastprinted = $_;
91*22dc650dSSadaf Ebrahimi        }
92*22dc650dSSadaf Ebrahimi      $lastwascut = 0;
93*22dc650dSSadaf Ebrahimi      $blankcount = 0;
94*22dc650dSSadaf Ebrahimi      }
95*22dc650dSSadaf Ebrahimi    }
96*22dc650dSSadaf Ebrahimi
97*22dc650dSSadaf Ebrahimi  # This non-blank line is not preceded by 3 or more blank lines. Output
98*22dc650dSSadaf Ebrahimi  # any blanks there are, and the line. Remember it. Force two blank lines
99*22dc650dSSadaf Ebrahimi  # before headings.
100*22dc650dSSadaf Ebrahimi
101*22dc650dSSadaf Ebrahimi  else
102*22dc650dSSadaf Ebrahimi    {
103*22dc650dSSadaf Ebrahimi    $blankcount = 2 if /^\S/ && !/^Last updated/ && !/^Copyright/ &&
104*22dc650dSSadaf Ebrahimi      defined($lastprinted);
105*22dc650dSSadaf Ebrahimi    for ($i = 0; $i < $blankcount; $i++) { print "\n"; }
106*22dc650dSSadaf Ebrahimi    print;
107*22dc650dSSadaf Ebrahimi    $lastprinted = $_;
108*22dc650dSSadaf Ebrahimi    $lastwascut = 0;
109*22dc650dSSadaf Ebrahimi    $blankcount = 0;
110*22dc650dSSadaf Ebrahimi    }
111*22dc650dSSadaf Ebrahimi  }
112*22dc650dSSadaf Ebrahimi
113*22dc650dSSadaf Ebrahimi# End
114