1*22dc650dSSadaf Ebrahimi#! /usr/bin/perl -w 2*22dc650dSSadaf Ebrahimi 3*22dc650dSSadaf Ebrahimi# Script to take the output of nroff -man and remove all the backspacing and 4*22dc650dSSadaf Ebrahimi# the page footers and the screen commands etc so that it is more usefully 5*22dc650dSSadaf Ebrahimi# readable online. In fact, in the latest nroff, intermediate footers don't 6*22dc650dSSadaf Ebrahimi# seem to be generated any more. 7*22dc650dSSadaf Ebrahimi 8*22dc650dSSadaf Ebrahimi$blankcount = 0; 9*22dc650dSSadaf Ebrahimi$lastwascut = 0; 10*22dc650dSSadaf Ebrahimi$firstheader = 1; 11*22dc650dSSadaf Ebrahimi 12*22dc650dSSadaf Ebrahimi# Input on STDIN; output to STDOUT. 13*22dc650dSSadaf Ebrahimi 14*22dc650dSSadaf Ebrahimiwhile (<STDIN>) 15*22dc650dSSadaf Ebrahimi { 16*22dc650dSSadaf Ebrahimi s/\x1b\[\d+m//g; # Remove screen controls "ESC [ number m" 17*22dc650dSSadaf Ebrahimi s/.\x8//g; # Remove "char, backspace" 18*22dc650dSSadaf Ebrahimi 19*22dc650dSSadaf Ebrahimi # Handle header lines. Retain only the first one we encounter, but remove 20*22dc650dSSadaf Ebrahimi # the blank line that follows. Any others (e.g. at end of document) and the 21*22dc650dSSadaf Ebrahimi # following blank line are dropped. 22*22dc650dSSadaf Ebrahimi 23*22dc650dSSadaf Ebrahimi if (/^PCRE(\w*)\(([13])\)\s+PCRE\1\(\2\)$/) 24*22dc650dSSadaf Ebrahimi { 25*22dc650dSSadaf Ebrahimi if ($firstheader) 26*22dc650dSSadaf Ebrahimi { 27*22dc650dSSadaf Ebrahimi $firstheader = 0; 28*22dc650dSSadaf Ebrahimi print; 29*22dc650dSSadaf Ebrahimi $lastprinted = $_; 30*22dc650dSSadaf Ebrahimi $lastwascut = 0; 31*22dc650dSSadaf Ebrahimi } 32*22dc650dSSadaf Ebrahimi $_=<STDIN>; # Remove a blank that follows 33*22dc650dSSadaf Ebrahimi next; 34*22dc650dSSadaf Ebrahimi } 35*22dc650dSSadaf Ebrahimi 36*22dc650dSSadaf Ebrahimi # Count runs of empty lines 37*22dc650dSSadaf Ebrahimi 38*22dc650dSSadaf Ebrahimi if (/^\s*$/) 39*22dc650dSSadaf Ebrahimi { 40*22dc650dSSadaf Ebrahimi $blankcount++; 41*22dc650dSSadaf Ebrahimi $lastwascut = 0; 42*22dc650dSSadaf Ebrahimi next; 43*22dc650dSSadaf Ebrahimi } 44*22dc650dSSadaf Ebrahimi 45*22dc650dSSadaf Ebrahimi # If a chunk of lines has been cut out (page footer) and the next line 46*22dc650dSSadaf Ebrahimi # has a different indentation, put back one blank line. 47*22dc650dSSadaf Ebrahimi 48*22dc650dSSadaf Ebrahimi if ($lastwascut && $blankcount < 1 && defined($lastprinted)) 49*22dc650dSSadaf Ebrahimi { 50*22dc650dSSadaf Ebrahimi ($a) = $lastprinted =~ /^(\s*)/; 51*22dc650dSSadaf Ebrahimi ($b) = $_ =~ /^(\s*)/; 52*22dc650dSSadaf Ebrahimi $blankcount++ if ($a ne $b); 53*22dc650dSSadaf Ebrahimi } 54*22dc650dSSadaf Ebrahimi 55*22dc650dSSadaf Ebrahimi # We get here only when we have a non-blank line in hand. If it was preceded 56*22dc650dSSadaf Ebrahimi # by 3 or more blank lines, read the next 3 lines and see if they are blank. 57*22dc650dSSadaf Ebrahimi # If so, remove all 7 lines, and remember that we have just done a cut. 58*22dc650dSSadaf Ebrahimi 59*22dc650dSSadaf Ebrahimi if ($blankcount >= 3) 60*22dc650dSSadaf Ebrahimi { 61*22dc650dSSadaf Ebrahimi for ($i = 0; $i < 3; $i++) 62*22dc650dSSadaf Ebrahimi { 63*22dc650dSSadaf Ebrahimi $next[$i] = <STDIN>; 64*22dc650dSSadaf Ebrahimi $next[$i] = "" if !defined $next[$i]; 65*22dc650dSSadaf Ebrahimi $next[$i] =~ s/\x1b\[\d+m//g; # Remove screen controls "ESC [ number m" 66*22dc650dSSadaf Ebrahimi $next[$i] =~ s/.\x8//g; # Remove "char, backspace" 67*22dc650dSSadaf Ebrahimi } 68*22dc650dSSadaf Ebrahimi 69*22dc650dSSadaf Ebrahimi # Cut out chunks of the form <3 blanks><non-blank><3 blanks> 70*22dc650dSSadaf Ebrahimi 71*22dc650dSSadaf Ebrahimi if ($next[0] =~ /^\s*$/ && 72*22dc650dSSadaf Ebrahimi $next[1] =~ /^\s*$/ && 73*22dc650dSSadaf Ebrahimi $next[2] =~ /^\s*$/) 74*22dc650dSSadaf Ebrahimi { 75*22dc650dSSadaf Ebrahimi $blankcount -= 3; 76*22dc650dSSadaf Ebrahimi $lastwascut = 1; 77*22dc650dSSadaf Ebrahimi } 78*22dc650dSSadaf Ebrahimi 79*22dc650dSSadaf Ebrahimi # Otherwise output the saved blanks, the current, and the next three 80*22dc650dSSadaf Ebrahimi # lines. Remember the last printed line. 81*22dc650dSSadaf Ebrahimi 82*22dc650dSSadaf Ebrahimi else 83*22dc650dSSadaf Ebrahimi { 84*22dc650dSSadaf Ebrahimi for ($i = 0; $i < $blankcount; $i++) { print "\n"; } 85*22dc650dSSadaf Ebrahimi print; 86*22dc650dSSadaf Ebrahimi for ($i = 0; $i < 3; $i++) 87*22dc650dSSadaf Ebrahimi { 88*22dc650dSSadaf Ebrahimi $next[$i] =~ s/.\x8//g; 89*22dc650dSSadaf Ebrahimi print $next[$i]; 90*22dc650dSSadaf Ebrahimi $lastprinted = $_; 91*22dc650dSSadaf Ebrahimi } 92*22dc650dSSadaf Ebrahimi $lastwascut = 0; 93*22dc650dSSadaf Ebrahimi $blankcount = 0; 94*22dc650dSSadaf Ebrahimi } 95*22dc650dSSadaf Ebrahimi } 96*22dc650dSSadaf Ebrahimi 97*22dc650dSSadaf Ebrahimi # This non-blank line is not preceded by 3 or more blank lines. Output 98*22dc650dSSadaf Ebrahimi # any blanks there are, and the line. Remember it. Force two blank lines 99*22dc650dSSadaf Ebrahimi # before headings. 100*22dc650dSSadaf Ebrahimi 101*22dc650dSSadaf Ebrahimi else 102*22dc650dSSadaf Ebrahimi { 103*22dc650dSSadaf Ebrahimi $blankcount = 2 if /^\S/ && !/^Last updated/ && !/^Copyright/ && 104*22dc650dSSadaf Ebrahimi defined($lastprinted); 105*22dc650dSSadaf Ebrahimi for ($i = 0; $i < $blankcount; $i++) { print "\n"; } 106*22dc650dSSadaf Ebrahimi print; 107*22dc650dSSadaf Ebrahimi $lastprinted = $_; 108*22dc650dSSadaf Ebrahimi $lastwascut = 0; 109*22dc650dSSadaf Ebrahimi $blankcount = 0; 110*22dc650dSSadaf Ebrahimi } 111*22dc650dSSadaf Ebrahimi } 112*22dc650dSSadaf Ebrahimi 113*22dc650dSSadaf Ebrahimi# End 114