————package
MIME::AltWords;
# Dat: `package MIME::AltWords0' would be incorrect. We don't need the zero.
# copy of package MIME::Words (version 5.420);
# at Wed Sep 27 07:52:29 CEST 2006
=head1 NAME
MIME::AltWords0 - copy of MIME::Words (not for direct use)
=head1 SYNOPSIS
L<MIME::AltWords0> is an auxilary package used by L<MIME::AltWords>.
If you want to encode or decode MIME words (such as
C<=?ISO-8859-2?Q?_=E1ll_e=E1r?=>) found in e-mail message headers (mostly
Subject, From and To), the recommended Perl module to use is
L<MIME::AltWords>. Please don't use L<MIME::AltWords0> (which is
useless by itself anyway) or L<MIME::Words> (version 5.420 has several
serious bugs both with encoding and decoding).
Most users shouldn't read on, but they should read L<MIME::AltWords>
instead.
=head1 DESCRIPTION
The following functions have been moved to L<MIME::AltWords>:
decode_mimewords() (some code stays here as decode_mimewords_wantarray()),
use MIME::AltWords0 qw(:all);
### Split string into array of decoded [DATA,CHARSET] pairs:
@decoded = decode_mimewords_wantarray(
'To: =?ISO-8859-1?Q?Keld_J=F8rn_Simonsen?= <keld@dkuug.dk>',
);
Fellow Americans, you probably won't know what the hell this module
is for. Europeans, Russians, et al, you probably do. C<:-)>.
For example, here's a valid MIME header you might get:
From: =?US-ASCII?Q?Keith_Moore?= <moore@cs.utk.edu>
To: =?ISO-8859-1?Q?Keld_J=F8rn_Simonsen?= <keld@dkuug.dk>
CC: =?ISO-8859-1?Q?Andr=E9_?= Pirard <PIRARD@vm1.ulg.ac.be>
Subject: =?ISO-8859-1?B?SWYgeW91IGNhbiByZWFkIHRoaXMgeW8=?=
=?ISO-8859-2?B?dSB1bmRlcnN0YW5kIHRoZSBleGFtcGxlLg==?=
=?US-ASCII?Q?.._cool!?=
The fields basically decode to (sorry, I can only approximate the
Latin characters with 7 bit sequences /o and 'e):
From: Keith Moore <moore@cs.utk.edu>
To: Keld J/orn Simonsen <keld@dkuug.dk>
CC: Andr'e Pirard <PIRARD@vm1.ulg.ac.be>
Subject: If you can read this you understand the example... cool!
=head1 PUBLIC INTERFACE
=over 4
=cut
require
5.001;
### Pragmas:
use
strict;
### Exporting:
use
Exporter;
%EXPORT_TAGS
= (
all
=> [
qw(decode_mimewords
)
]);
Exporter::export_ok_tags(
'all'
);
### Inheritance:
@ISA
=
qw(Exporter)
;
### Other modules:
use
MIME::Base64;
use
MIME::QuotedPrint;
#------------------------------
#
# Globals...
#
#------------------------------
### The package version, both in 1.23 style *and* usable by MakeMaker:
$VERSION
=
"5.420"
;
### Nonprintables (controls + x7F + 8bit):
$NONPRINT
=
"\\x00-\\x1F\\x7F-\\xFF"
;
#------------------------------
# _decode_Q STRING
# Private: used by _decode_header() to decode "Q" encoding, which is
# almost, but not exactly, quoted-printable. :-P
sub
_decode_Q {
my
$str
=
shift
;
$str
=~ s/_/\x20/g;
# RFC-1522, Q rule 2
$str
=~ s/=([\da-fA-F]{2})/
pack
(
"C"
,
hex
($1))/ge;
# RFC-1522, Q rule 1
$str
;
}
# _encode_Q STRING
# Private: used by _encode_header() to decode "Q" encoding, which is
# almost, but not exactly, quoted-printable. :-P
sub
_encode_Q {
my
$str
=
shift
;
$str
=~ s{([_\?\=
$NONPRINT
])}{
sprintf
(
"=%02X"
,
ord
($1))}eog;
$str
;
}
# _decode_B STRING
# Private: used by _decode_header() to decode "B" encoding.
sub
_decode_B {
my
$str
=
shift
;
decode_base64(
$str
);
}
# _encode_B STRING
# Private: used by _decode_header() to decode "B" encoding.
sub
_encode_B {
my
$str
=
shift
;
encode_base64(
$str
,
''
);
}
#------------------------------
=item decode_mimewords_wantarray ENCODED, [OPTS...]
I<Function.>
Go through the string looking for RFC-1522-style "Q"
(quoted-printable, sort of) or "B" (base64) encoding, and decode them.
B<In an array context,> splits the ENCODED string into a list of decoded
C<[DATA, CHARSET]> pairs, and returns that list. Unencoded
data are returned in a 1-element array C<[DATA]>, giving an effective
CHARSET of C<undef>.
$enc = '=?ISO-8859-1?Q?Keld_J=F8rn_Simonsen?= <keld@dkuug.dk>';
foreach (decode_mimewords($enc)) {
print "", ($_[1] || 'US-ASCII'), ": ", $_[0], "\n";
}
B<In a scalar context,> the result is undocumented, please see
L<MIME::AltWords/decode_mimewords> instead.
In the event of a syntax error, $@ will be set to a description
of the error, but parsing will continue as best as possible (so as to
get I<something> back when decoding headers).
$@ will be false if no error was detected.
Any arguments past the ENCODED string are taken to define a hash of options:
=over 4
=item Field
Name of the mail field this string came from. I<Currently ignored.>
=back
=cut
sub
decode_mimewords_wantarray {
my
$encstr
=
shift
;
my
%params
=
@_
;
my
@tokens
;
$@ =
''
;
### error-return
### Collapse boundaries between adjacent encoded words:
$encstr
=~ s{(\?\=)\s*(\=\?)}{$1$2}gs;
pos
(
$encstr
) = 0;
### print STDOUT "ENC = [", $encstr, "]\n";
### Decode:
my
(
$charset
,
$encoding
,
$enc
,
$dec
);
while
(1) {
last
if
(
pos
(
$encstr
) >=
length
(
$encstr
));
my
$pos
=
pos
(
$encstr
);
### save it
### Case 1: are we looking at "=?..?..?="?
if
(
$encstr
=~ m{\G
# from where we left off..
=\?([^?]*)
# "=?" + charset +
\?([bq])
# "?" + encoding +
\?([^?]+)
# "?" + data maybe with spcs +
\?=
# "?="
}xgi) {
(
$charset
,
$encoding
,
$enc
) = ($1,
lc
($2), $3);
$dec
= ((
$encoding
eq
'q'
) ? _decode_Q(
$enc
) : _decode_B(
$enc
));
push
@tokens
, [
$dec
,
$charset
];
next
;
}
### Case 2: are we looking at a bad "=?..." prefix?
### We need this to detect problems for case 3, which stops at "=?":
pos
(
$encstr
) =
$pos
;
# reset the pointer.
if
(
$encstr
=~ m{\G=\?}xg) {
$@ .= qq|unterminated
"=?..?..?="
in
"$encstr"
(
pos
$pos
)\n|;
push
@tokens
, [
'=?'
];
next
;
}
### Case 3: are we looking at ordinary text?
pos
(
$encstr
) =
$pos
;
# reset the pointer.
if
(
$encstr
=~ m{\G
# from where we left off...
([\x00-\xFF]*?
# shortest possible string,
\n*)
# followed by 0 or more NLs,
(?=(\Z|=\?))
# terminated by "=?" or EOS
}xg) {
length
($1) or
die
"MIME::AltWords0: internal logic err: empty token\n"
;
push
@tokens
, [$1];
next
;
}
if
(
$encstr
=~m{\G([\x00-\xFF]*)[^\x00-\xFF]+}g) {
#### pts ####
$@.=
qq|wide character in encoded string\n|
;
push
@tokens
, [$1]
if
0!=
length
($1);
next
;
}
### Case 4: bug!
die
"MIME::AltWords0: unexpected case:\n($encstr) pos $pos\n\t"
.
"Please alert developer.\n"
;
}
return
(
wantarray
?
@tokens
:
join
(
''
,
map
{
$_
->[0]}
@tokens
));
}
#------------------------------
# vvv buggy implementation of encode_mimeword() commented out,
# see MIME::AltWords
=begin comment
=item encode_mimeword RAW, [ENCODING], [CHARSET]
I<Function.>
Encode a single RAW "word" that has unsafe characters.
The "word" will be encoded in its entirety.
### Encode "<<Franc,ois>>":
$encoded = encode_mimeword("\xABFran\xE7ois\xBB");
You may specify the ENCODING (C<"Q"> or C<"B">), which defaults to C<"Q">.
You may specify the CHARSET, which defaults to C<iso-8859-1>.
=cut
=begin commentcode
sub encode_mimeword {
my $word = shift;
my $encoding = uc(shift || 'Q');
my $charset = uc(shift || 'ISO-8859-1');
my $encfunc = (($encoding eq 'Q') ? \&_encode_Q : \&_encode_B);
"=?$charset?$encoding?" . &$encfunc($word) . "?=";
}
=cut
#------------------------------
# vvv buggy implementation of encode_mimeword() commented out,
# see MIME::AltWords
=begin comment
=item encode_mimewords RAW, [OPTS]
I<Function.>
Given a RAW string, try to find and encode all "unsafe" sequences
of characters:
### Encode a string with some unsafe "words":
$encoded = encode_mimewords("Me and \xABFran\xE7ois\xBB");
Returns the encoded string.
Any arguments past the RAW string are taken to define a hash of options:
=over 4
=item Charset
Encode all unsafe stuff with this charset. Default is 'ISO-8859-1',
a.k.a. "Latin-1".
=item Encoding
The encoding to use, C<"q"> or C<"b">. The default is C<"q">.
=item Field
Name of the mail field this string will be used in. I<Currently ignored.>
=back
B<Warning:> this is a quick-and-dirty solution, intended for character
sets which overlap ASCII. B<It does not comply with the RFC-1522
rules regarding the use of encoded words in message headers>.
You may want to roll your own variant,
using C<encoded_mimeword()>, for your application.
I<Thanks to Jan Kasprzak for reminding me about this problem.>
=cut
=begin commentcode
sub encode_mimewords {
my ($rawstr, %params) = @_;
my $charset = $params{Charset} || 'ISO-8859-1';
my $encoding = lc($params{Encoding} || 'q');
### Encode any "words" with unsafe characters.
### We limit such words to 18 characters, to guarantee that the
### worst-case encoding give us no more than 54 + ~10 < 75 characters
my $word;
$rawstr =~ s{([a-zA-Z0-9\x7F-\xFF]{1,18})}{ ### get next "word"
$word = $1;
(($word !~ /[$NONPRINT]/o)
? $word ### no unsafe chars
: encode_mimeword($word, $encoding, $charset)); ### has unsafe chars
}xeg;
$rawstr;
}
1;
__END__
=cut
=back
=head1 NOTES
Exports its principle functions by default, in keeping with
MIME::Base64 and MIME::QuotedPrint.
=head1 AUTHOR
L<MIME::AltWords0> was copied from L<MIME::Words> (version 5.420) by
Péter Szabó (F<pts@fazekas.hu>) at
Wed Sep 27 07:57:51 CEST 2006.
Here is the original information for L<MIME::Words>.
Eryq (F<eryq@zeegee.com>), ZeeGee Software Inc (F<http://www.zeegee.com>).
David F. Skoll (dfs@roaringpenguin.com) http://www.roaringpenguin.com
All rights reserved. This program is free software; you can redistribute
it and/or modify it under the same terms as Perl itself.
Thanks also to...
Kent Boortz For providing the idea, and the baseline
RFC-1522-decoding code!
KJJ at PrimeNet For requesting that this be split into
its own module.
Stephane Barizien For reporting a nasty bug.
=head1 VERSION
$Revision: 1.14 $ $Date: 2006/03/17 21:03:23 $
=cut
#------------------------------
# Execute simple test if run as a script.
#------------------------------
{
eval
join
(
''
,<main::DATA>) ||
die
"$@ $main::DATA"
unless
caller
();
}
1;
# end the module
__END__
# Dat: these tests have been integrated to MIME::AltWords
### Pick up other MIME stuff, just in case...
BEGIN {
unshift
@INC
,
"."
,
"./etc"
,
"./lib"
};
import
MIME::AltWords0;
my
@encs
= (
'=?US-ASCII?Q?Keith_Moore?= <moore@cs.utk.edu>'
,
'=?ISO-8859-1?Q?Keld_J=F8rn_Simonsen?= <keld@dkuug.dk>'
,
'=?ISO-8859-1?Q?Andr=E9_?= Pirard <PIRARD@vm1.ulg.ac.be>'
,
(
'=?ISO-8859-1?B?SWYgeW91IGNhbiByZWFkIHRoaXMgeW8=?='
.
'=?ISO-8859-2?B?dSB1bmRlcnN0YW5kIHRoZSBleGFtcGxlLg==?='
.
'=?US-ASCII?Q?.._cool!?='
));
foreach
$enc
(
@encs
) {
$x
= decode_mimewords(
$enc
);
"DEC: "
,
$x
,
"\n"
;
}
### Encode a single unsafe word:
$encoded
= encode_mimeword(
"\xABFran\xE7ois\xBB"
);
"ENC1: "
,
$encoded
,
"\n"
;
### Encode a string, trying to find the unsafe words inside it:
$encoded
= encode_mimewords(
"Me and \xABFran\xE7ois\xBB at the beach"
);
"ENC2: "
,
$encoded
,
"\n"
;
### Encode "<<Franc,ois>>":
my
$unsafe
=
<<EOF;
Me and \xABFran\xE7ois\xBB, down at the beach
with Dave <dave\@ether.net>
EOF
$encoded
= encode_mimewords(
$unsafe
);
"ENC3: "
,
$encoded
,
"\n"
;
"DEC3: "
,
scalar
(decode_mimewords(
$encoded
)),
"\n"
;
### So we know everything went well...
exit
0;
#------------------------------