NAME
Tags::Reader::Perl - Parse SGML/HTML/XML by each "tag".
SYNOPSIS
use Tags::Reader::Perl;
my $obj = Tags::Reader::Perl->new;
my @tokens = $obj->gettoken;
$obj->set_file($file, $force);
$obj->set_text($text, $force);
METHODS
new()
my $obj = Tags::Reader::Perl->new;
Constructor.
Returns instance of object.
gettoken
my @tokens = $obj->gettoken;
Get parsed token.
Returns structure defining parsed token in array context. See TOKEN STRUCTURE e.g. <xml> → ('<xml>', 'xml', 1, 1)
Returns parsed token in scalar mode. e.g. <xml> → '<xml>'
set_file
$obj->set_file($file, $force);
Set file for parsing. If $force present, reset file for parsing if exists previous text or file.
Returns undef.
set_text
$obj->set_text($text, $force);
Set text for parsing. if $force present, reset text for parsing if exists previous text or file.
Returns undef.
TOKEN STRUCTURE
Structure contains 4 fields in array:
- parsed data
- tag type
- number of line
- number of column in line
Tag types are:
- '[\w:]+' - element name.
- '/[\w:]+' - end of element name.
- '!data' - data
- '![cdata[' - cdata
- '!--' - comment
- '?\w+' - instruction
- '![\w+' - conditional
- '!attlist' - DTD attlist
- '!element' - DTD element
- '!entity' - DTD entity
- '!notation' - DTD notation
ERRORS
new():
        From Class::Utils::set_params():
                Unknown parameter '%s'.
set_text():
        Bad tag.
        Bad text.
        Cannot set new data if exists data.
set_file():
        Bad tag.
        Bad file.
        Cannot set new data if exists data.
        Cannot open file '%s'.
EXAMPLE1
use strict;
use warnings;
use Tag::Reader::Perl;
use Unicode::UTF8 qw(decode_utf8 encode_utf8);
# Object.
my $obj = Tag::Reader::Perl->new;
# Example data.
my $sgml = <<'END';
<DOKUMENT> 
  <adresa stát="cs">
    <město>
    <ulice>Nová</ulice>
    <číslo>5</číslo>
  </adresa>
</DOKUMENT>
END
# Set data to object.
$obj->set_text(decode_utf8($sgml));
# Tokenize.
while (my @tag = $obj->gettoken) {
        print "[\n";
        print "\t[0]: '".encode_utf8($tag[0])."'\n";
        print "\t[1]: ".encode_utf8($tag[1])."\n";
        print "\t[2]: $tag[2]\n";
        print "\t[3]: $tag[3]\n";
        print "]\n";
}
# Output:
# [
# 	[0]: '<DOKUMENT>'
# 	[1]: dokument
# 	[2]: 1
# 	[3]: 1
# ]
# [
# 	[0]: ' 
#   '
# 	[1]: !data
# 	[2]: 1
# 	[3]: 11
# ]
# [
# 	[0]: '<adresa stát="cs">'
# 	[1]: adresa
# 	[2]: 2
# 	[3]: 3
# ]
# [
# 	[0]: '
#     '
# 	[1]: !data
# 	[2]: 2
# 	[3]: 21
# ]
# [
# 	[0]: '<město>'
# 	[1]: město
# 	[2]: 3
# 	[3]: 5
# ]
# [
# 	[0]: '
#     '
# 	[1]: !data
# 	[2]: 3
# 	[3]: 12
# ]
# [
# 	[0]: '<ulice>'
# 	[1]: ulice
# 	[2]: 4
# 	[3]: 5
# ]
# [
# 	[0]: 'Nová'
# 	[1]: !data
# 	[2]: 4
# 	[3]: 12
# ]
# [
# 	[0]: '</ulice>'
# 	[1]: /ulice
# 	[2]: 4
# 	[3]: 16
# ]
# [
# 	[0]: '
#     '
# 	[1]: !data
# 	[2]: 4
# 	[3]: 24
# ]
# [
# 	[0]: '<číslo>'
# 	[1]: číslo
# 	[2]: 5
# 	[3]: 5
# ]
# [
# 	[0]: '5'
# 	[1]: !data
# 	[2]: 5
# 	[3]: 12
# ]
# [
# 	[0]: '</číslo>'
# 	[1]: /číslo
# 	[2]: 5
# 	[3]: 13
# ]
# [
# 	[0]: '
#   '
# 	[1]: !data
# 	[2]: 5
# 	[3]: 21
# ]
# [
# 	[0]: '</adresa>'
# 	[1]: /adresa
# 	[2]: 6
# 	[3]: 3
# ]
# [
# 	[0]: '
# '
# 	[1]: !data
# 	[2]: 6
# 	[3]: 12
# ]
# [
# 	[0]: '</DOKUMENT>'
# 	[1]: /dokument
# 	[2]: 7
# 	[3]: 1
# ]
# [
# 	[0]: '
# '
# 	[1]: !data
# 	[2]: 7
# 	[3]: 12
# ]
DEPENDENCIES
Class::Utils, Error::Pure, Readonly,
SEE ALSO
- Tag::Reader
 - 
Parse SGML/HTML/XML by each "tag".
 - HTML::TagReader
 - 
Perl extension module for reading html/sgml/xml files by tags.
 
REPOSITORY
https://github.com/michal-josef-spacek/Tag-Reader-Perl
AUTHOR
Michal Josef Špaček mailto:skim@cpan.org
LICENSE AND COPYRIGHT
© Michal Josef Špaček 2005-2021
BSD 2-Clause License
VERSION
0.02