—package
HTTP::Headers::Util;
use
strict;
use
warnings;
our
$VERSION
=
'7.00'
;
our
@EXPORT_OK
=
qw(split_header_words _split_header_words join_header_words)
;
sub
split_header_words {
my
@res
=
&_split_header_words
;
for
my
$arr
(
@res
) {
for
(
my
$i
=
@$arr
- 2;
$i
>= 0;
$i
-= 2) {
$arr
->[
$i
] =
lc
(
$arr
->[
$i
]);
}
}
return
@res
;
}
sub
_split_header_words
{
my
(
@val
) =
@_
;
my
@res
;
for
(
@val
) {
my
@cur
;
while
(
length
) {
if
(s/^\s*(=*[^\s=;,]+)//) {
# 'token' or parameter 'attribute'
push
(
@cur
, $1);
# a quoted value
if
(s/^\s*=\s*\"([^\"\\]*(?:\\.[^\"\\]*)*)\"//) {
my
$val
= $1;
$val
=~ s/\\(.)/$1/g;
push
(
@cur
,
$val
);
# some unquoted value
}
elsif
(s/^\s*=\s*([^;,\s]*)//) {
my
$val
= $1;
$val
=~ s/\s+$//;
push
(
@cur
,
$val
);
# no value, a lone token
}
else
{
push
(
@cur
,
undef
);
}
}
elsif
(s/^\s*,//) {
push
(
@res
, [
@cur
])
if
@cur
;
@cur
= ();
}
elsif
(s/^\s*;// || s/^\s+// || s/^=//) {
# continue
}
else
{
die
"This should not happen: '$_'"
;
}
}
push
(
@res
, \
@cur
)
if
@cur
;
}
@res
;
}
sub
join_header_words
{
@_
= ([
@_
])
if
@_
&& !
ref
(
$_
[0]);
my
@res
;
for
(
@_
) {
my
@cur
=
@$_
;
my
@attr
;
while
(
@cur
) {
my
$k
=
shift
@cur
;
my
$v
=
shift
@cur
;
if
(
defined
$v
) {
if
(
$v
=~ /[\x00-\x20()<>@,;:\\\"\/\[\]?={}\x7F-\xFF]/ || !
length
(
$v
)) {
$v
=~ s/([\
"\\])/\\$1/g; # escape "
and \
$k
.=
qq(="$v")
;
}
else
{
# token
$k
.=
"=$v"
;
}
}
push
(
@attr
,
$k
);
}
push
(
@res
,
join
(
"; "
,
@attr
))
if
@attr
;
}
join
(
", "
,
@res
);
}
1;
=pod
=encoding UTF-8
=head1 NAME
HTTP::Headers::Util - Header value parsing utility functions
=head1 VERSION
version 7.00
=head1 SYNOPSIS
use HTTP::Headers::Util qw(split_header_words);
@values = split_header_words($h->header("Content-Type"));
=head1 DESCRIPTION
This module provides a few functions that helps parsing and
construction of valid HTTP header values. None of the functions are
exported by default.
The following functions are available:
=over 4
=item split_header_words( @header_values )
This function will parse the header values given as argument into a
list of anonymous arrays containing key/value pairs. The function
knows how to deal with ",", ";" and "=" as well as quoted values after
"=". A list of space separated tokens are parsed as if they were
separated by ";".
If the @header_values passed as argument contains multiple values,
then they are treated as if they were a single value separated by
comma ",".
This means that this function is useful for parsing header fields that
follow this syntax (BNF as from the HTTP/1.1 specification, but we relax
the requirement for tokens).
headers = #header
header = (token | parameter) *( [";"] (token | parameter))
token = 1*<any CHAR except CTLs or separators>
separators = "(" | ")" | "<" | ">" | "@"
| "," | ";" | ":" | "\" | <">
| "/" | "[" | "]" | "?" | "="
| "{" | "}" | SP | HT
quoted-string = ( <"> *(qdtext | quoted-pair ) <"> )
qdtext = <any TEXT except <">>
quoted-pair = "\" CHAR
parameter = attribute "=" value
attribute = token
value = token | quoted-string
Each I<header> is represented by an anonymous array of key/value
pairs. The keys will be all be forced to lower case.
The value for a simple token (not part of a parameter) is C<undef>.
Syntactically incorrect headers will not necessarily be parsed as you
would want.
This is easier to describe with some examples:
split_header_words('foo="bar"; port="80,81"; DISCARD, BAR=baz');
split_header_words('text/html; charset="iso-8859-1"');
split_header_words('Basic realm="\\"foo\\\\bar\\""');
will return
[foo=>'bar', port=>'80,81', discard=> undef], [bar=>'baz' ]
['text/html' => undef, charset => 'iso-8859-1']
[basic => undef, realm => "\"foo\\bar\""]
If you don't want the function to convert tokens and attribute keys to
lower case you can call it as C<_split_header_words> instead (with a
leading underscore).
=item join_header_words( @arrays )
This will do the opposite of the conversion done by split_header_words().
It takes a list of anonymous arrays as arguments (or a list of
key/value pairs) and produces a single header value. Attribute values
are quoted if needed.
Example:
join_header_words(["text/plain" => undef, charset => "iso-8859/1"]);
join_header_words("text/plain" => undef, charset => "iso-8859/1");
will both return the string:
text/plain; charset="iso-8859/1"
=back
=head1 AUTHOR
Gisle Aas <gisle@activestate.com>
=head1 COPYRIGHT AND LICENSE
This software is copyright (c) 1994 by Gisle Aas.
This is free software; you can redistribute it and/or modify it under
the same terms as the Perl 5 programming language system itself.
=cut
__END__
#ABSTRACT: Header value parsing utility functions