#!/usr/bin/perl =begin metadata Name: col Description: filter reverse line feeds from input Author: Ronald J Kimball, rjk-perl@tamias.net License: perl =end metadata =cut use strict; use File::Basename qw(basename); use Getopt::Std qw(getopts); use constant EX_SUCCESS => 0; use constant EX_FAILURE => 1; my $Program = basename($0); our $VERSION = '1.2'; use vars qw($opt_b $opt_f $opt_h $opt_p $opt_s $opt_t $opt_x); getopts('bfhpxl:st') or usage(); usage() if @ARGV; my @buf = []; # character buffer my $col = 0; # current column number my $row = 0; # current row number my $max_row = -2; # max populated row my $iset = 1; # current input character set (1 or 2) my $oset = 1; # current output character set (1 or 2) my $front = 0; # note whether at front of line in output my $spaces = ''; # save spaces for converting to tabs # prepare regexes for matching linefeeds # any full reverse half reverse half forward my @re = qw( [7-9\x07-\x09] ^[7\x07]$ ^[8\x08]$ ^[9\x09]$ ); # INPUT while (<>) { # chop trailing characters not followed by a linefeed of any kind s/^(.*(?:\x0A|\e$re[0])|).*$/$1/sxo if $opt_t; my @chars = split m//; my $i; for ($i=0; $i<=$#chars; ++$i) { my $c = $chars[$i]; if ($c eq "\x1B" or $c eq "\x0B") { if ($c eq "\x0B" or $chars[++$i] =~ /$re[1]/xo) { # reverse line feed $row -= 2 if $row >= 2; } elsif ($chars[$i] =~ /$re[2]/xo) { # half reverse line feed $row-- if $row >= 1; } elsif ($chars[$i] =~ /$re[3]/xo) { # half forward line feed $row += 1; } else { # unrecognized escape if ($opt_p) { add_char("\x1B"); add_char($chars[$i]); } else { } } } elsif ($c eq "\x08") { # backspace $col--; $col >= 0 or $col = 0; } elsif ($c eq "\x0D") { # carriage return $col = 0; } elsif ($c eq "\x0A") { # line feed $row += 2; $col = 0; } elsif ($c eq "\x0F") { # shift in $iset = 1; } elsif ($c eq "\x0E") { # shift out $iset = 2; } elsif ($c eq "\x20") { # space $col++; } elsif ($c eq "\x09") { # tab $col += (($col % 8) || 8); } elsif ($c gt "\x20") { # printable character add_char($c); } else { # unrecognized ctl char } # start row if necessary $buf[$row] ||= []; } } # add_char # add a character at the current row and column # uses globals @buf, $row, $max_row, $col, $iset, $opt_f, $opt_b sub add_char { my($char) = @_; # move to next full line, if necessary my $r = $row; if (!$opt_f) { $r += $row & 1; # start row if necessary $buf[$r] ||= []; } # start column if necessary $buf[$row][$col] ||= []; if ($opt_b) { # no backspacing - just save last character/set $buf[$r][$col][0] = [$char, $iset]; } else { # backspacing - add character/set to end of list push @{$buf[$r][$col]}, [$char, $iset]; } $col++; $max_row = $r if $r > $max_row; } # OUTPUT # add a blank row if buffer does not end with one if ($max_row == $#buf) { push @buf, []; } # add a blank row if buffer ends on half line if ($#buf & 1) { push @buf, []; } for ($row=0; $row < $max_row; ) { # print the current line print_line($row); # print appropriate line ending if ($buf[$row+1] and @{$buf[$row+1]}) { # half line feed print "\e9"; print "\x0D" if !$front; } else { # full line feed print "\x0A"; # skip half line ++$row; } $front = 1; ++$row; } # print the last populated line ($row == $max_row) print_line($row); # shift in if necessary if (!$opt_s) { if ($oset == 2) { print "\x0F"; $oset = 1; } } # print linefeeds for remaining blank lines for ($row++; $row < $#buf; $row += 2) { print "\x0A"; $front = 1; } # print half line feed if necessary if ($max_row & 1) { print "\e9"; print "\x0D" if !$front; $front = 1; } exit EX_SUCCESS; # print_line # print the current line # return character set of the last character # uses globals @buf, $oset, $front, $opt_x, $opt_s sub print_line { my($row) = @_; if (@{$buf[$row]}) { $front = 0; } my $col; for ($col=0; $col<=$#{$buf[$row]}; ++$col) { if ($buf[$row][$col]) { if ($spaces) { # print saved spaces print $spaces; $spaces = ''; } my $b; for my $char (@{$buf[$row][$col]}) { # print backspace if necessary print "\x08" if $b++; # switch character set if necessary if ($char->[1] != $oset) { $oset = $char->[1]; print ($oset == 1 ? "\x0F" : "\x0E"); } # print character print $char->[0]; } } else { # no characters; space if ($opt_x) { print ' '; } else { if (not (($col+1) % 8)) { # at tab stop print "\t"; $spaces = ''; } else { $spaces .= ' '; } } } } # switch to character set 1 for line ending if ($opt_s) { if ($oset == 2) { print "\x0F"; $oset = 1; } } } sub usage { warn "usage: $Program [-bfhpstx] [-l num]\n"; exit EX_FAILURE; } sub VERSION_MESSAGE { print "$Program version $VERSION\n"; exit EX_SUCCESS; } __END__ =pod =head1 NAME col - filter reverse line feeds from input =head1 SYNOPSIS B<col> [B<-bfhpstx>] [B<-l num>] =head1 DESCRIPTION B<col> filters out reverse (and half-reverse) line feeds so that the output is in the correct order with only forward and half-forward line feeds, and replaces whitespace characters with tabs where possible. This can be useful in processing the output of nroff(1) and tbl(1). B<col> reads from the standard input and writes to the standard output. =head2 OPTIONS B<col> accepts the following standard options: =over 4 =item B<-b> Do not output any backspaces, printing only the last character written to each column position. =item B<-f> Forward half-line feeds are permitted ("fine" mode). Without this option, characters which would be printed on a half-line boundary are printed on the following line. =item B<-h> Compress spaces into tabs. This option is the default behavior. =item B<-p> Output unrecognized escape sequences. Without this option, unrecognized escape sequences are ignored. Because escape sequences may be overprinted from reverse line feeds, the use of this option is highly discouraged unless the user is fully aware of the textual position of the escape sequences. =item B<-x> Output multiple spaces instead of tabs. Tab stops are eight characters apart. This option takes precedence over the B<-h> option. =item B<-l> I<num> This option is ignored for compatibility reasons. =back B<col> also accepts the following options for compatibility with other implementations: =over 4 =item B<-s> Shift in before each line feed when in the alternate character set (as in IRIX B<col>). Without this option, B<col> only shifts in before the final line endings (as in BSD B<col>). =item B<-t> Ignore trailing input that is not followed by a line feed (as in IRIX B<col>). Without this option, a final line feed is not necessary (as in BSD B<col>.) =back The control sequences and their decimal ASCII values that col understands are listed in the following table: ESC-7 Reverse line feed (escape then 7). ESC-8 Half reverse line feed (escape then 8). ESC-9 Half forward line feed (escape then 9). backspace Moves back one column (8); ignored in the first column. carriage return (13) newline Forward line feed (10); also does carriage return. shift in Shift to normal character set (15). shift out Shift to alternate character set (14). space Moves forward one column (32). tab Moves forward to next tab stop (9). vertical tab Reverse line feed (11). =head1 BUGS This implementation of B<col> has no known bugs. =head1 CAVEATS Reverse line feeds and half reverse line feeds which would move past the start of the buffer are ignored. Unrecognized control characters are ignored. Unrecognized escape sequences are ignored, unless the B<-p> option is used. Some versions of B<col> for BSD may convert spaces to tabs incorrectly. This implementation of B<col> does not emulate that bug. The B<col> utilities on other systems may have further differences in behavior. =head1 AUTHOR This implementation of B<col> in Perl was written by Ronald J Kimball, I<rjk-perl@tamias.net> =head1 COPYRIGHT and LICENSE This program is copyright 2000 by Ronald J Kimball. This program is free and open software. You may use, modify, or distribute this program (and any modified variants) in any way you wish, provided you do not restrict others from doing the same. =cut