.\" -*- mode: troff; coding: utf-8 -*-
.\" Automatically generated by Pod::Man 5.0102 (Pod::Simple 3.45)
.\"
.\" Standard preamble:
.\" ========================================================================
.de Sp \" Vertical space (when we can't use .PP)
.if t .sp .5v
.if n .sp
..
.de Vb \" Begin verbatim text
.ft CW
.nf
.ne \\$1
..
.de Ve \" End verbatim text
.ft R
.fi
..
.\" \*(C` and \*(C' are quotes in nroff, nothing in troff, for use with C<>.
.ie n \{\
.    ds C` ""
.    ds C' ""
'br\}
.el\{\
.    ds C`
.    ds C'
'br\}
.\"
.\" Escape single quotes in literal strings from groff's Unicode transform.
.ie \n(.g .ds Aq \(aq
.el       .ds Aq '
.\"
.\" If the F register is >0, we'll generate index entries on stderr for
.\" titles (.TH), headers (.SH), subsections (.SS), items (.Ip), and index
.\" entries marked with X<> in POD.  Of course, you'll have to process the
.\" output yourself in some meaningful fashion.
.\"
.\" Avoid warning from groff about undefined register 'F'.
.de IX
..
.nr rF 0
.if \n(.g .if rF .nr rF 1
.if (\n(rF:(\n(.g==0)) \{\
.    if \nF \{\
.        de IX
.        tm Index:\\$1\t\\n%\t"\\$2"
..
.        if !\nF==2 \{\
.            nr % 0
.            nr F 2
.        \}
.    \}
.\}
.rr rF
.\" ========================================================================
.\"
.IX Title "HTML::Filter 3"
.TH HTML::Filter 3 2024-09-02 "perl v5.40.0" "User Contributed Perl Documentation"
.\" For nroff, turn off justification.  Always turn off hyphenation; it makes
.\" way too many mistakes in technical documents.
.if n .ad l
.nh
.SH NAME
HTML::Filter \- Filter HTML text through the parser
.SH NOTE
.IX Header "NOTE"
\&\fBThis module is deprecated.\fR The \f(CW\*(C`HTML::Parser\*(C'\fR now provides the
functionally of \f(CW\*(C`HTML::Filter\*(C'\fR much more efficiently with the
\&\f(CW\*(C`default\*(C'\fR handler.
.SH SYNOPSIS
.IX Header "SYNOPSIS"
.Vb 2
\& require HTML::Filter;
\& $p = HTML::Filter\->new\->parse_file("index.html");
.Ve
.SH DESCRIPTION
.IX Header "DESCRIPTION"
\&\f(CW\*(C`HTML::Filter\*(C'\fR is an HTML parser that by default prints the
original text of each HTML element (a slow version of \fBcat\fR\|(1) basically).
The callback methods may be overridden to modify the filtering for some
HTML elements and you can override \fBoutput()\fR method which is called to
print the HTML text.
.PP
\&\f(CW\*(C`HTML::Filter\*(C'\fR is a subclass of \f(CW\*(C`HTML::Parser\*(C'\fR. This means that
the document should be given to the parser by calling the \f(CW$p\fR\->\fBparse()\fR
or \f(CW$p\fR\->\fBparse_file()\fR methods.
.SH EXAMPLES
.IX Header "EXAMPLES"
The first example is a filter that will remove all comments from an
HTML file.  This is achieved by simply overriding the comment method
to do nothing.
.PP
.Vb 4
\&  package CommentStripper;
\&  require HTML::Filter;
\&  @ISA=qw(HTML::Filter);
\&  sub comment { }  # ignore comments
.Ve
.PP
The second example shows a filter that will remove any <TABLE>s
found in the HTML file.  We specialize the \fBstart()\fR and \fBend()\fR methods
to count table tags and then make output not happen when inside a
table.
.PP
.Vb 9
\&  package TableStripper;
\&  require HTML::Filter;
\&  @ISA=qw(HTML::Filter);
\&  sub start
\&  {
\&     my $self = shift;
\&     $self\->{table_seen}++ if $_[0] eq "table";
\&     $self\->SUPER::start(@_);
\&  }
\&
\&  sub end
\&  {
\&     my $self = shift;
\&     $self\->SUPER::end(@_);
\&     $self\->{table_seen}\-\- if $_[0] eq "table";
\&  }
\&
\&  sub output
\&  {
\&      my $self = shift;
\&      unless ($self\->{table_seen}) {
\&          $self\->SUPER::output(@_);
\&      }
\&  }
.Ve
.PP
If you want to collect the parsed text internally you might want to do
something like this:
.PP
.Vb 5
\&  package FilterIntoString;
\&  require HTML::Filter;
\&  @ISA=qw(HTML::Filter);
\&  sub output { push(@{$_[0]\->{fhtml}}, $_[1]) }
\&  sub filtered_html { join("", @{$_[0]\->{fhtml}}) }
.Ve
.SH "SEE ALSO"
.IX Header "SEE ALSO"
HTML::Parser
.SH COPYRIGHT
.IX Header "COPYRIGHT"
Copyright 1997\-1999 Gisle Aas.
.PP
This library is free software; you can redistribute it and/or
modify it under the same terms as Perl itself.