NAME
Tags::Reader::Perl - Parse SGML/HTML/XML by each "tag".
SYNOPSIS
use Tags::Reader::Perl;
my $obj = Tags::Reader::Perl->new;
my @tokens = $obj->gettoken;
$obj->set_file($file, $force);
$obj->set_text($text, $force);
METHODS
new()
-
Constructor.
gettoken()
-
Get parsed token. Returns structure defining parsed token in array context. See TOKEN STRUCTURE e.g. <xml> → ('<xml>', 'xml', 1, 1) Returns parsed token in scalar mode. e.g. <xml> → '<xml>'
set_file($file[, $force])
-
Set file for parsing. If $force present, reset file for parsing if exists previous text or file.
set_text($text[, $force])
-
Set text for parsing. if $force present, reset text for parsing if exists previous text or file.
TOKEN STRUCTURE
Structure contains 4 fields in array:
- parsed data
- tag type
- number of line
- number of column in line
Tag types are:
- '[\w:]+' - element name.
- '/[\w:]+' - end of element name.
- '!data' - data
- '![cdata[' - cdata
- '!--' - comment
- '?\w+' - instruction
- '![\w+' - conditional
- '!attlist' - DTD attlist
- '!element' - DTD element
- '!entity' - DTD entity
- '!notation' - DTD notation
ERRORS
new():
From Class::Utils::set_params():
Unknown parameter '%s'.
set_text():
Bad tag.
Bad text.
Cannot set new data if exists data.
set_file():
Bad tag.
Bad file.
Cannot set new data if exists data.
Cannot open file '%s'.
EXAMPLE1
# Pragmas.
use strict;
use warnings;
# Modules.
use Encode qw(decode_utf8 encode_utf8);
use Tag::Reader::Perl;
# Object.
my $obj = Tag::Reader::Perl->new;
# Example data.
my $sgml = <<'END';
<DOKUMENT>
<adresa stát="cs">
<město>
<ulice>Nová</ulice>
<číslo>5</číslo>
</adresa>
</DOKUMENT>
END
# Set data to object.
$obj->set_text(decode_utf8($sgml));
# Tokenize.
while (my @tag = $obj->gettoken) {
print "[\n";
print "\t[0]: '".encode_utf8($tag[0])."'\n";
print "\t[1]: ".encode_utf8($tag[1])."\n";
print "\t[2]: $tag[2]\n";
print "\t[3]: $tag[3]\n";
print "]\n";
}
# Output:
# [
# [0]: '<DOKUMENT>'
# [1]: dokument
# [2]: 1
# [3]: 1
# ]
# [
# [0]: '
# '
# [1]: !data
# [2]: 1
# [3]: 11
# ]
# [
# [0]: '<adresa stát="cs">'
# [1]: adresa
# [2]: 2
# [3]: 3
# ]
# [
# [0]: '
# '
# [1]: !data
# [2]: 2
# [3]: 21
# ]
# [
# [0]: '<město>'
# [1]: město
# [2]: 3
# [3]: 5
# ]
# [
# [0]: '
# '
# [1]: !data
# [2]: 3
# [3]: 12
# ]
# [
# [0]: '<ulice>'
# [1]: ulice
# [2]: 4
# [3]: 5
# ]
# [
# [0]: 'Nová'
# [1]: !data
# [2]: 4
# [3]: 12
# ]
# [
# [0]: '</ulice>'
# [1]: /ulice
# [2]: 4
# [3]: 16
# ]
# [
# [0]: '
# '
# [1]: !data
# [2]: 4
# [3]: 24
# ]
# [
# [0]: '<číslo>'
# [1]: číslo
# [2]: 5
# [3]: 5
# ]
# [
# [0]: '5'
# [1]: !data
# [2]: 5
# [3]: 12
# ]
# [
# [0]: '</číslo>'
# [1]: /číslo
# [2]: 5
# [3]: 13
# ]
# [
# [0]: '
# '
# [1]: !data
# [2]: 5
# [3]: 21
# ]
# [
# [0]: '</adresa>'
# [1]: /adresa
# [2]: 6
# [3]: 3
# ]
# [
# [0]: '
# '
# [1]: !data
# [2]: 6
# [3]: 12
# ]
# [
# [0]: '</DOKUMENT>'
# [1]: /dokument
# [2]: 7
# [3]: 1
# ]
# [
# [0]: '
# '
# [1]: !data
# [2]: 7
# [3]: 12
# ]
DEPENDENCIES
Class::Utils, Error::Pure, Readonly,
SEE ALSO
- Tag::Reader
-
Parse SGML/HTML/XML by each "tag".
- HTML::TagReader
-
Perl extension module for reading html/sgml/xml files by tags.
AUTHOR
Michal Špaček mailto:skim@cpan.org
LICENSE AND COPYRIGHT
© Michal Špaček 2005-2016
BSD 2-Clause License
VERSION
0.01