$VERSION
=
'0.13'
;
my
@RESULTS
=
qw( PASS FAIL )
;
my
$FIXED
=
$HTML::TokeParser::VERSION
>= 3.69 ? 1 : 0;
my
%declarations
= (
'<!DOCTYPE html>'
=> 3,
'xhtml1-strict.dtd'
=> 2,
'xhtml1-transitional.dtd'
=> 2,
'xhtml1-frameset.dtd'
=> 2,
'html401-strict.dtd'
=> 1,
'html401-loose.dtd'
=> 1,
'html401-frameset.dtd'
=> 1,
);
my
%deprecated
= (
'a'
=> {
2
=> {
attr
=> [
qw(charset coords datafld datasrc methods name rev shape urn)
] } },
'acronym'
=> {
0
=> {
tag
=> [
qw(abbr)
] } },
'applet'
=> {
0
=> {
tag
=> [
qw(object)
] },
1
=> {
attr
=> [
qw(align alt archive code codebase height hspace name object vspace width)
] },
2
=> {
attr
=> [
qw(datafld datasrc)
] } },
'area'
=> {
2
=> {
attr
=> [
qw(nohref)
] } },
'b'
=> {
0
=> {
tag
=> [
qw(strong)
] } },
'basefont'
=> {
0
=> {
css
=> [
qw(font color)
] },
1
=> {
attr
=> [
qw(color face size)
] } },
'big'
=> {
0
=> {
css
=> [
qw(font-size)
] } },
'blockquote'
=> {
0
=> {
css
=> [
qw(margin)
] } },
'body'
=> {
1
=> {
attr
=> [
qw(alink background bgcolor link text vlink)
] },
2
=> {
attr
=> [
qw(alink background bgcolor link marginbottom marginheight marginleft marginright margintop marginwidth text vlink)
] } },
'br'
=> {
1
=> {
attr
=> [
qw(clear)
] } },
'button'
=> {
2
=> {
attr
=> [
qw(datafld dataformatas datasrc)
] } },
'caption'
=> {
1
=> {
attr
=> [
qw(align)
] },
2
=> {
attr
=> [
qw(align)
] } },
'center'
=> {
0
=> {
css
=> [
qw(text-align)
] } },
'col'
=> {
2
=> {
attr
=> [
qw(align char charoff valign width)
] } },
'dir'
=> {
0
=> {
tag
=> [
qw(ul)
] },
1
=> {
attr
=> [
qw(compact)
] } },
'div'
=> {
1
=> {
attr
=> [
qw(align)
] },
2
=> {
attr
=> [
qw(align datafld dataformatas datasrc)
] } },
'dl'
=> {
1
=> {
attr
=> [
qw(compact)
] },
2
=> {
attr
=> [
qw(compact)
] } },
'embed'
=> {
0
=> {
tag
=> [
qw(object)
] },
2
=> {
attr
=> [
qw(align hspace name vspace)
] },
3
=> {
tag
=> [
qw(embed)
] } },
'fieldset'
=> {
2
=> {
attr
=> [
qw(datafld)
] } },
'font'
=> {
0
=> {
css
=> [
qw(font color)
] },
1
=> {
attr
=> [
qw(color face size)
] } },
'form'
=> {
2
=> {
attr
=> [
qw(name)
] } },
'frame'
=> {
0
=> {
tag
=> [
qw(iframe)
] },
2
=> {
attr
=> [
qw(datafld datasrc name)
] } },
'frameset'
=> {
0
=> {
tag
=> [
qw(iframe)
] } },
'h1'
=> {
1
=> {
attr
=> [
qw(align)
] } },
'h2'
=> {
1
=> {
attr
=> [
qw(align)
] } },
'h3'
=> {
1
=> {
attr
=> [
qw(align)
] } },
'h4'
=> {
1
=> {
attr
=> [
qw(align)
] } },
'h5'
=> {
1
=> {
attr
=> [
qw(align)
] } },
'h6'
=> {
1
=> {
attr
=> [
qw(align)
] } },
'head'
=> {
2
=> {
attr
=> [
qw(profile)
] } },
'hr'
=> {
1
=> {
attr
=> [
qw(align noshade size width)
] },
2
=> {
attr
=> [
qw(color)
] } },
'html'
=> {
1
=> {
attr
=> [
qw(version)
] } },
'i'
=> {
0
=> {
css
=> [
qw(font-style)
] } },
'iframe'
=> {
1
=> {
attr
=> [
qw(align)
] },
2
=> {
attr
=> [
qw(align allowtransparency datafld datasrc frameborder hspace longdesc marginheight marginwidth name scrolling vspace)
] } },
'img'
=> {
1
=> {
attr
=> [
qw(align border hspace vspace)
] },
2
=> {
attr
=> [
qw(datafld datasrc longdesc lowsrc name)
] } },
'input'
=> {
1
=> {
attr
=> [
qw(align)
] },
2
=> {
attr
=> [
qw(datafld dataformatas datasrc hspace usemap vspace)
] } },
'isindex'
=> {
0
=> {
tag
=> [
qw(input)
] },
1
=> {
attr
=> [
qw(prompt)
] } },
'label'
=> {
2
=> {
attr
=> [
qw(datafld dataformatas datasrc)
] } },
'layer'
=> {
0
=> {
css
=> [
qw(position)
] } },
'legend'
=> {
1
=> {
attr
=> [
qw(align)
] },
2
=> {
attr
=> [
qw(datafld dataformatas datasrc)
] } },
'li'
=> {
1
=> {
attr
=> [
qw(type value)
] } },
'link'
=> {
2
=> {
attr
=> [
qw(charset methods rev target urn)
] } },
'map'
=> {
2
=> {
attr
=> [
qw(name)
] } },
'marquee'
=> {
2
=> {
attr
=> [
qw(datafld dataformatas datasrc)
] } },
'menu'
=> {
0
=> {
tag
=> [
qw(ul)
] },
1
=> {
attr
=> [
qw(compact)
] } },
'meta'
=> {
2
=> {
attr
=> [
qw(scheme)
] } },
'noframes'
=> {
0
=> {
tag
=> [
qw(iframe)
] } },
'object'
=> {
1
=> {
attr
=> [
qw(align border hspace vspace)
] },
2
=> {
attr
=> [
qw(archive classid code codebase codetype datafld dataformatas datasrc declare standby)
] } },
'ol'
=> {
1
=> {
attr
=> [
qw(compact start type)
] } },
'option'
=> {
2
=> {
attr
=> [
qw(dataformatas datasrc name)
] } },
'param'
=> {
2
=> {
attr
=> [
qw(datafld type valuetype)
] } },
'p'
=> {
1
=> {
attr
=> [
qw(align)
] } },
'pre'
=> {
1
=> {
attr
=> [
qw(width)
] } },
's'
=> {
0
=> {
css
=> [
qw(text-decoration)
] } },
'script'
=> {
1
=> {
attr
=> [
qw(language)
] },
2
=> {
attr
=> [
qw(event for)
] } },
'select'
=> {
2
=> {
attr
=> [
qw(datafld dataformatas datasrc)
] } },
'span'
=> {
2
=> {
attr
=> [
qw(datafld dataformatas datasrc)
] } },
'strike'
=> {
0
=> {
css
=> [
qw(text-decoration)
] } },
'table'
=> {
1
=> {
attr
=> [
qw(align bgcolor)
] },
2
=> {
attr
=> [
qw(background cellpadding cellspacing dataformatas datapagesize datasrc frame rules summary width)
] } },
'tbody'
=> {
2
=> {
attr
=> [
qw(align background char charoff valign)
] } },
'td'
=> {
1
=> {
attr
=> [
qw(bgcolor height nowrap width)
] },
2
=> {
attr
=> [
qw(abbr align axis background char charoff valign)
] } },
'textarea'
=> {
2
=> {
attr
=> [
qw(datafld datasrc)
] } },
'tfoot'
=> {
2
=> {
attr
=> [
qw(align background char charoff valign)
] } },
'th'
=> {
1
=> {
attr
=> [
qw(bgcolor height nowrap width)
] },
2
=> {
attr
=> [
qw(abbr align axis background char charoff valign)
] } },
'thead'
=> {
2
=> {
attr
=> [
qw(align background char charoff valign)
] } },
'tr'
=> {
1
=> {
attr
=> [
qw(bgcolor)
] },
2
=> {
attr
=> [
qw(align background char charoff valign)
] } },
'tt'
=> {
0
=> {
css
=> [
qw(text-decoration)
] } },
'u'
=> {
0
=> {
css
=> [
qw(text-decoration)
] } },
'ul'
=> {
1
=> {
attr
=> [
qw(compact type)
] } },
);
my
@TAGS
= (
'a'
,
'abbr'
,
'acronym'
,
'address'
,
'applet'
,
'area'
,
'b'
,
'base'
,
'basefont'
,
'bdo'
,
'big'
,
'blockquote'
,
'body'
,
'br'
,
'button'
,
'caption'
,
'center'
,
'cite'
,
'code'
,
'col'
,
'colgroup'
,
'dd'
,
'del'
,
'dfn'
,
'dir'
,
'div'
,
'dl'
,
'dt'
,
'em'
,
'embed'
,
'fieldset'
,
'font'
,
'form'
,
'frame'
,
'frameset'
,
'head'
,
'h1'
,
'h2'
,
'h3'
,
'h4'
,
'h5'
,
'h6'
,
'hr'
,
'html'
,
'i'
,
'iframe'
,
'img'
,
'input'
,
'ins'
,
'isindex'
,
'kbd'
,
'label'
,
'layer'
,
'legend'
,
'li'
,
'link'
,
'map'
,
'menu'
,
'meta'
,
'noframes'
,
'noscript'
,
'object'
,
'ol'
,
'optgroup'
,
'option'
,
'p'
,
'param'
,
'pre'
,
'q'
,
's'
,
'samp'
,
'script'
,
'select'
,
'small'
,
'span'
,
'strike'
,
'strong'
,
'style'
,
'summary'
,
'sub'
,
'table'
,
'tbody'
,
'td'
,
'textarea'
,
'tfoot'
,
'th'
,
'thead'
,
'title'
,
'tr'
,
'tt'
,
'u'
,
'ul'
,
'var'
,
'/form'
);
sub
new {
my
$proto
=
shift
;
my
$class
=
ref
(
$proto
) ||
$proto
;
my
$self
= {
dtdtype
=> 0 };
$self
->{RESULTS}{
$_
} = 0
for
(
@RESULTS
);
bless
(
$self
,
$class
);
return
$self
;
}
sub
DESTROY {
my
$self
=
shift
;
}
__PACKAGE__->mk_accessors(
qw( logfile logclean )
);
sub
validate { _process_checks(
@_
); }
sub
results { _process_results(
@_
); }
sub
clear {
my
$self
=
shift
;
$self
->{ERRORS} =
undef
;
$self
->_reset_results(); }
sub
errors {
my
$self
=
shift
;
return
$self
->{ERRORS}; }
sub
errstr {
my
$self
=
shift
;
return
$self
->_print_errors(); }
sub
_process_results {
my
$self
=
shift
;
my
%results
=
map
{
$_
=>
$self
->{RESULTS}{
$_
}}
@RESULTS
;
$self
->_log(
sprintf
"%8s%d\n"
,
"$_:"
,
$results
{
$_
} )
for
(
@RESULTS
);
return
\
%results
;
}
sub
_reset_results {
my
$self
=
shift
;
$self
->{RESULTS}{
$_
} = 0
for
(
@RESULTS
);
}
sub
_print_errors {
my
$self
=
shift
;
my
$str
=
"\nErrors:\n"
;
my
$i
= 1;
for
my
$error
(@{
$self
->{ERRORS}}) {
$str
.=
"$i. $error->{error}: $error->{message}"
;
$str
.=
" [$error->{ref}]"
if
(
$error
->{
ref
});
$str
.=
" [row $error->{row}, column $error->{col}]"
if
(
$FIXED
&& (
$error
->{row} ||
$error
->{col}));
$str
.=
"\n"
;
$i
++;
}
return
$str
;
}
sub
_process_checks {
my
$self
=
shift
;
my
$html
=
shift
;
$self
->{
$_
} =
undef
for
(
qw(input label form links)
);
if
(
$html
) {
my
$p
=
$FIXED
? HTML::TokeParser->new( \
$html
,
start
=>
"'S',tagname,attr,attrseq,text,line,column"
,
end
=>
"'E',tagname,text,line,column"
)
: HTML::TokeParser->new( \
$html
);
my
$token
=
$p
->get_token();
if
(
$token
&&
$token
->[0] eq
'D'
) {
my
$declaration
=
$token
->[1];
$declaration
=~ s/\s+/ /sg;
for
my
$type
(
keys
%declarations
) {
if
(
$declaration
=~ /
$type
/) {
$self
->{dtdtype} =
$declarations
{
$type
};
last
;
}
}
}
else
{
$p
->unget_token(
$token
);
}
while
(
my
$tag
=
$p
->get_tag(
@TAGS
) ) {
if
(
$tag
->[0] eq
uc
$tag
->[0]) {
$self
->_check_case(
$tag
);
$tag
->[0] =
lc
$tag
->[0];
}
$self
->_check_deprecated(
$tag
);
if
(
$tag
->[0] eq
'map'
) {
$self
->_check_name(
$tag
);
}
elsif
(
$tag
->[0] eq
'img'
) {
$self
->_check_name(
$tag
);
$self
->_check_size(
$tag
);
}
elsif
(
$tag
->[0] eq
'a'
) {
$self
->_check_policy1(
$tag
,
$p
);
}
elsif
(
$tag
->[0] eq
'script'
) {
$self
->_check_language(
$tag
);
}
elsif
(
$tag
->[0] eq
'title'
) {
$self
->_check_title(
$tag
,
$p
);
}
}
$self
->_check_policy2();
}
else
{
push
@{
$self
->{ERRORS} }, {
error
=>
"missing content"
,
message
=>
'no XHTML content found'
};
}
if
(
$self
->{ERRORS}) {
$self
->_log(
"FAIL\n"
);
$self
->{RESULTS}{FAIL}++;
}
else
{
$self
->_log(
"PASS\n"
);
$self
->{RESULTS}{PASS}++;
}
}
sub
_check_case {
my
(
$self
,
$tag
) =
@_
;
if
(
$self
->{dtdtype} == 1) {
push
@{
$self
->{ERRORS} }, {
error
=>
"C001"
,
message
=>
"W3C recommends use of lowercase in HTML 4 (<$tag->[0]>)"
,
row
=>
$tag
->[2],
col
=>
$tag
->[3]
};
}
elsif
(
$self
->{dtdtype} == 2) {
push
@{
$self
->{ERRORS} }, {
error
=>
"C002"
,
message
=>
"declaration requires lowercase tags (<$tag->[0]>)"
,
row
=>
$tag
->[2],
col
=>
$tag
->[3]
};
}
}
sub
_check_deprecated {
my
(
$self
,
$tag
) =
@_
;
return
unless
(
$deprecated
{
$tag
->[0] });
my
(
$elem
,
@css
);
for
my
$dtdtype
(
sort
{
$b
<=>
$a
}
keys
%{
$deprecated
{
$tag
->[0]}}) {
$elem
||=
$deprecated
{
$tag
->[0]}{
$dtdtype
}{tag};
push
@css
, @{
$deprecated
{
$tag
->[0]}{
$dtdtype
}{css} }
if
(
$deprecated
{
$tag
->[0]}{
$dtdtype
}{css});
next
unless
(
$self
->{dtdtype} >
$dtdtype
);
next
unless
(
$deprecated
{
$tag
->[0]}{
$dtdtype
}{attr});
for
my
$attr
(@{
$deprecated
{
$tag
->[0]}{
$dtdtype
}{attr} }) {
if
(
$tag
->[1]{
$attr
}) {
push
@{
$self
->{ERRORS} }, {
error
=>
"C010"
,
message
=>
"'$attr' attribute deprecated in <$tag->[0]> tag"
,
row
=>
$tag
->[4],
col
=>
$tag
->[5]
};
}
}
}
if
(
$elem
&&
$elem
->[0] ne
$tag
->[0]) {
push
@{
$self
->{ERRORS} }, {
error
=>
"C011"
,
message
=>
"<$tag->[0]> has been deprecated in favour of <$elem->[0]>"
,
row
=>
$tag
->[4],
col
=>
$tag
->[5]
};
}
elsif
(
@css
) {
push
@{
$self
->{ERRORS} }, {
error
=>
"C012"
,
message
=>
"<$tag->[0]> has been deprecated in favour of CSS elements ("
.
join
(
','
,
@css
).
")"
,
row
=>
$tag
->[4],
col
=>
$tag
->[5]
};
}
}
sub
_check_name {
my
(
$self
,
$tag
) =
@_
;
if
(
$tag
->[1]{name}) {
push
@{
$self
->{ERRORS} }, {
error
=>
"C003"
,
message
=>
"name attribute deprecated in <$tag->[0]> tag"
,
row
=>
$tag
->[4],
col
=>
$tag
->[5]
};
}
}
sub
_check_size {
my
(
$self
,
$tag
) =
@_
;
if
(!
$tag
->[1]{width} || !
$tag
->[1]{height}) {
push
@{
$self
->{ERRORS} }, {
error
=>
"C004"
,
message
=>
"width and height attributes allow for pre-rendering <$tag->[0]> tags ($tag->[1]{src})"
,
row
=>
$tag
->[4],
col
=>
$tag
->[5]
};
}
}
sub
_check_language {
my
(
$self
,
$tag
) =
@_
;
if
(
$tag
->[1]{language}) {
push
@{
$self
->{ERRORS} }, {
error
=>
"C005"
,
message
=>
"language attribute deprecated in <$tag->[0]> tag"
,
row
=>
$tag
->[4],
col
=>
$tag
->[5]
};
}
}
sub
_check_policy1 {
my
(
$self
,
$tag
,
$p
) =
@_
;
my
$x
=
$p
->get_text();
if
(
$x
=~ /privacy policy/i
|| (
$tag
->[1]{title} &&
$tag
->[1]{title} =~ /privacy policy/i)
||
$x
=~ /terms.
*conditions
/i
|| (
$tag
->[1]{title} &&
$tag
->[1]{title} =~ /terms.
*conditions
/i) ) {
$self
->{policy}{privacy} = 1;
}
if
(
$x
=~ /home/i
|| (
$tag
->[1]{title} &&
$tag
->[1]{title} =~ /home/i) ) {
$self
->{policy}{home} = 1;
}
}
sub
_check_policy2 {
my
(
$self
) =
@_
;
if
(!
$self
->{policy}{privacy}) {
push
@{
$self
->{ERRORS} }, {
error
=>
"C006"
,
message
=>
"no link to a privacy policy"
};
}
if
(!
$self
->{policy}{home}) {
push
@{
$self
->{ERRORS} }, {
error
=>
"C007"
,
message
=>
"no home page link"
};
}
}
sub
_check_title {
my
(
$self
,
$tag
,
$p
) =
@_
;
my
$x
=
$p
->get_text();
if
(
length
$x
> 64) {
push
@{
$self
->{ERRORS} }, {
error
=>
"C008"
,
message
=>
"W3C recommend <title> should not be longer than 64 characters ["
.(
substr
(
$x
,0,64)).
"]"
,
row
=>
$tag
->[4],
col
=>
$tag
->[5]
};
}
if
(
$x
=~ /['"(){}\[\]]/) {
push
@{
$self
->{ERRORS} }, {
error
=>
"C009"
,
message
=>
qq!avoid using the characters '"(){}[] in <title> tag - <$x>!
,
row
=>
$tag
->[4],
col
=>
$tag
->[5]
};
}
}
sub
_log {
my
$self
=
shift
;
my
$log
=
$self
->logfile or
return
;
mkpath(dirname(
$log
))
unless
(-f
$log
);
my
$mode
=
$self
->logclean ?
'w+'
:
'a+'
;
$self
->logclean(0);
my
$fh
= IO::File->new(
$log
,
$mode
) or
die
"Cannot write to log file [$log]: $!\n"
;
print
$fh
@_
;
$fh
->
close
;
}
1;