package HTML::FormatText; # $Id: FormatText.pm,v 1.11 1996/04/09 15:44:12 aas Exp $ =head1 NAME HTML::FormatText - Format HTML as text =head1 SYNOPSIS require HTML::FormatText; $html = parse_htmlfile("test.html"); $formatter = new HTML::FormatText; print $formatter->format($html); =head1 DESCRIPTION The HTML::FormatText is a formatter that output plain latin1 text. All character attributes (bold/italic/underline) are ignored. Formatting of HTML tables and forms is not implemented. =head1 SEE ALSO L<HTML::Formatter> =head1 COPYRIGHT Copyright (c) 1995 Gisle Aas. All rights reserved. This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself. =head1 AUTHOR Gisle Aas <aas@oslonett.no> =cut require HTML::Formatter; @ISA = qw(HTML::Formatter); use strict; sub begin { my $self = shift; $self->HTML::Formatter::begin; $self->{lm} = 3; # left margin $self->{rm} = 70; # right margin $self->{curpos} = 0; # current output position. $self->{maxpos} = 0; # highest value of $pos (used by header underliner) } sub end { shift->collect("\n"); } sub header_start { my($self, $level, $node) = @_; $self->vspace(1 + (6-$level) * 0.4); $self->{maxpos} = 0; $self->eat_leading_space; 1; } sub header_end { my($self, $level, $node) = @_; if ($level <= 2) { my $line; $line = '=' if $level == 1; $line = '-' if $level == 2; $self->vspace(0); $self->out($line x ($self->{maxpos} - $self->{lm})); } $self->vspace(1); 1; } sub hr_start { my $self = shift; $self->vspace(1); $self->out('-' x ($self->{rm} - $self->{lm})); $self->vspace(1); } sub pre_out { my $self = shift; # should really handle bold/italic etc. if (defined $self->{vspace}) { if ($self->{out}) { $self->nl() while $self->{vspace}-- > -0.5; $self->{vspace} = undef; } } my $indent = ' ' x $self->{lm}; my $pre = shift; $pre =~ s/\n/\n$indent/g; $self->collect($pre); $self->{out}++; } sub out { my $self = shift; my $text = shift; if (defined $self->{vspace}) { if ($self->{out}) { $self->nl while $self->{vspace}-- >= 0; $self->goto_lm; } else { $self->goto_lm; } $self->{vspace} = undef; } if ($self->{curpos} > $self->{rm}) { # line is too long, break it return if $text =~ /^\s*$/; # white space at eol is ok $self->nl; $self->goto_lm; } if ($self->{pending_space}) { $self->{pending_space} = 0; $self->collect(' '); my $pos = ++$self->{curpos}; $self->{maxpos} = $pos if $self->{maxpos} < $pos; } $self->{pending_space} = 1 if $text =~ s/\s+$//; return unless length $text; $self->collect($text); my $pos = $self->{curpos} += length $text; $self->{maxpos} = $pos if $self->{maxpos} < $pos; $self->{'out'}++; } sub goto_lm { my $self = shift; my $pos = $self->{curpos}; my $lm = $self->{lm}; if ($pos < $lm) { $self->{curpos} = $lm; $self->collect(" " x ($lm - $pos)); } } sub nl { my $self = shift; $self->{'out'}++; $self->{pending_space} = 0; $self->{curpos} = 0; $self->collect("\n"); } sub adjust_lm { my $self = shift; $self->{lm} += $_[0]; $self->goto_lm; } sub adjust_rm { shift->{rm} += $_[0]; } 1;