NAME

Tags::Reader::Perl - Parse SGML/HTML/XML by each "tag".

SYNOPSIS

use Tags::Reader::Perl;
my $obj = Tags::Reader::Perl->new;
my @tokens = $obj->gettoken;
$obj->set_file($file, $force);
$obj->set_text($text, $force);

METHODS

new()
Constructor.
gettoken()
Get parsed token.
Returns structure defining parsed token in array context. See TOKEN STRUCTURE
e.g. <xml> → ('<xml>', 'xml', 1, 1)
Returns parsed token in scalar mode.
e.g. <xml> → '<xml>'
set_file($file[, $force])
Set file for parsing.
If $force present, reset file for parsing if exists previous text or file.
set_text($text[, $force])
Set text for parsing.
if $force present, reset text for parsing if exists previous text or file.

TOKEN STRUCTURE

Structure contains 4 fields in array:
- parsed data
- tag type
- number of line
- number of column in line

Tag types are:
- '[\w:]+' - element name.
- '/[\w:]+' - end of element name.
- '!data' - data
- '![cdata[' - cdata
- '!--' - comment
- '?\w+' - instruction
- '![\w+' - conditional
- '!attlist' - DTD attlist
- '!element' - DTD element
- '!entity' - DTD entity
- '!notation' - DTD notation

ERRORS

new():
        From Class::Utils::set_params():
                Unknown parameter '%s'.

set_text():
        Bad tag.
        Bad text.
        Cannot set new data if exists data.

set_file():
        Bad tag.
        Bad file.
        Cannot set new data if exists data.
        Cannot open file '%s'.

EXAMPLE1

# Pragmas.
use strict;
use warnings;

# Modules.
use Encode qw(decode_utf8 encode_utf8);
use Tag::Reader::Perl;

# Object.
my $obj = Tag::Reader::Perl->new;

# Example data.
my $sgml = <<'END';
<DOKUMENT> 
  <adresa stát="cs">
    <město>
    <ulice>Nová</ulice>
    <číslo>5</číslo>
  </adresa>
</DOKUMENT>
END

# Set data to object.
$obj->set_text(decode_utf8($sgml));

# Tokenize.
while (my @tag = $obj->gettoken) {
        print "[\n";
        print "\t[0]: '".encode_utf8($tag[0])."'\n";
        print "\t[1]: ".encode_utf8($tag[1])."\n";
        print "\t[2]: $tag[2]\n";
        print "\t[3]: $tag[3]\n";
        print "]\n";
}

# Output:
# [
# 	[0]: '<DOKUMENT>'
# 	[1]: dokument
# 	[2]: 1
# 	[3]: 1
# ]
# [
# 	[0]: ' 
#   '
# 	[1]: !data
# 	[2]: 1
# 	[3]: 11
# ]
# [
# 	[0]: '<adresa stát="cs">'
# 	[1]: adresa
# 	[2]: 2
# 	[3]: 3
# ]
# [
# 	[0]: '
#     '
# 	[1]: !data
# 	[2]: 2
# 	[3]: 21
# ]
# [
# 	[0]: '<město>'
# 	[1]: město
# 	[2]: 3
# 	[3]: 5
# ]
# [
# 	[0]: '
#     '
# 	[1]: !data
# 	[2]: 3
# 	[3]: 12
# ]
# [
# 	[0]: '<ulice>'
# 	[1]: ulice
# 	[2]: 4
# 	[3]: 5
# ]
# [
# 	[0]: 'Nová'
# 	[1]: !data
# 	[2]: 4
# 	[3]: 12
# ]
# [
# 	[0]: '</ulice>'
# 	[1]: /ulice
# 	[2]: 4
# 	[3]: 16
# ]
# [
# 	[0]: '
#     '
# 	[1]: !data
# 	[2]: 4
# 	[3]: 24
# ]
# [
# 	[0]: '<číslo>'
# 	[1]: číslo
# 	[2]: 5
# 	[3]: 5
# ]
# [
# 	[0]: '5'
# 	[1]: !data
# 	[2]: 5
# 	[3]: 12
# ]
# [
# 	[0]: '</číslo>'
# 	[1]: /číslo
# 	[2]: 5
# 	[3]: 13
# ]
# [
# 	[0]: '
#   '
# 	[1]: !data
# 	[2]: 5
# 	[3]: 21
# ]
# [
# 	[0]: '</adresa>'
# 	[1]: /adresa
# 	[2]: 6
# 	[3]: 3
# ]
# [
# 	[0]: '
# '
# 	[1]: !data
# 	[2]: 6
# 	[3]: 12
# ]
# [
# 	[0]: '</DOKUMENT>'
# 	[1]: /dokument
# 	[2]: 7
# 	[3]: 1
# ]
# [
# 	[0]: '
# '
# 	[1]: !data
# 	[2]: 7
# 	[3]: 12
# ]

DEPENDENCIES

Class::Utils, Error::Pure, Readonly,

SEE ALSO

Tag::Reader

Parse SGML/HTML/XML by each "tag".

HTML::TagReader

Perl extension module for reading html/sgml/xml files by tags.

AUTHOR

Michal Špaček mailto:skim@cpan.org

http://skim.cz

LICENSE AND COPYRIGHT

© Michal Špaček 2005-2016
BSD 2-Clause License

VERSION

0.01