lib/MIME/AltWords0.pm - metacpan.org


            
              1
2
3
4
—
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
—
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
—
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
—
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
              package MIME::AltWords;
# Dat: `package MIME::AltWords0' would be incorrect. We don't need the zero.
# copy of package MIME::Words (version 5.420);
# at Wed Sep 27 07:52:29 CEST 2006
=head1 NAME
MIME::AltWords0 - copy of MIME::Words (not for direct use)
=head1 SYNOPSIS
L<MIME::AltWords0> is an auxilary package used by L<MIME::AltWords>.
If you want to encode or decode MIME words (such as
C<=?ISO-8859-2?Q?_=E1ll_e=E1r?=>) found in e-mail message headers (mostly
Subject, From and To), the recommended Perl module to use is
L<MIME::AltWords>. Please don't use L<MIME::AltWords0> (which is
useless by itself anyway) or L<MIME::Words> (version 5.420 has several
serious bugs both with encoding and decoding).
Most users shouldn't read on, but they should read L<MIME::AltWords>
instead.
=head1 DESCRIPTION
The following functions have been moved to L<MIME::AltWords>:
decode_mimewords() (some code stays here as decode_mimewords_wantarray()),
    use MIME::AltWords0 qw(:all);   
      
    ### Split string into array of decoded [DATA,CHARSET] pairs:
    @decoded = decode_mimewords_wantarray(
          'To: =?ISO-8859-1?Q?Keld_J=F8rn_Simonsen?= <keld@dkuug.dk>',
          );
      
Fellow Americans, you probably won't know what the hell this module
is for.  Europeans, Russians, et al, you probably do.  C<:-)>. 
For example, here's a valid MIME header you might get:
      From: =?US-ASCII?Q?Keith_Moore?= <moore@cs.utk.edu>
      To: =?ISO-8859-1?Q?Keld_J=F8rn_Simonsen?= <keld@dkuug.dk>
      CC: =?ISO-8859-1?Q?Andr=E9_?= Pirard <PIRARD@vm1.ulg.ac.be>
      Subject: =?ISO-8859-1?B?SWYgeW91IGNhbiByZWFkIHRoaXMgeW8=?=
       =?ISO-8859-2?B?dSB1bmRlcnN0YW5kIHRoZSBleGFtcGxlLg==?=
       =?US-ASCII?Q?.._cool!?=
The fields basically decode to (sorry, I can only approximate the
Latin characters with 7 bit sequences /o and 'e):
      From: Keith Moore <moore@cs.utk.edu>
      To: Keld J/orn Simonsen <keld@dkuug.dk>
      CC: Andr'e  Pirard <PIRARD@vm1.ulg.ac.be>
      Subject: If you can read this you understand the example... cool!
=head1 PUBLIC INTERFACE
=over 4
=cut
require 5.001;
### Pragmas:
use strict;
use vars qw($VERSION @EXPORT_OK %EXPORT_TAGS @ISA);
### Exporting:
use Exporter;
%EXPORT_TAGS = (all => [qw(decode_mimewords
                           )]);
Exporter::export_ok_tags('all');
### Inheritance:
@ISA = qw(Exporter);
### Other modules:
use MIME::Base64;
use MIME::QuotedPrint;
#------------------------------
#
# Globals...
#
#------------------------------
### The package version, both in 1.23 style *and* usable by MakeMaker:
$VERSION = "5.420";
use vars qw($NONPRINT); #### pts ####
### Nonprintables (controls + x7F + 8bit):
$NONPRINT = "\\x00-\\x1F\\x7F-\\xFF"; 
#------------------------------
# _decode_Q STRING
#     Private: used by _decode_header() to decode "Q" encoding, which is
#     almost, but not exactly, quoted-printable.  :-P
sub _decode_Q {
    my $str = shift;
    $str =~ s/_/\x20/g;                                # RFC-1522, Q rule 2
    $str =~ s/=([\da-fA-F]{2})/pack("C", hex($1))/ge;  # RFC-1522, Q rule 1
    $str;
}
# _encode_Q STRING
#     Private: used by _encode_header() to decode "Q" encoding, which is
#     almost, but not exactly, quoted-printable.  :-P
sub _encode_Q {
    my $str = shift;
    $str =~ s{([_\?\=$NONPRINT])}{sprintf("=%02X", ord($1))}eog;
    $str;
}
# _decode_B STRING
#     Private: used by _decode_header() to decode "B" encoding.
sub _decode_B {
    my $str = shift;
    decode_base64($str);
}
# _encode_B STRING
#     Private: used by _decode_header() to decode "B" encoding.
sub _encode_B {
    my $str = shift;
    encode_base64($str, '');
}
#------------------------------
=item decode_mimewords_wantarray ENCODED, [OPTS...]
I<Function.>
Go through the string looking for RFC-1522-style "Q"
(quoted-printable, sort of) or "B" (base64) encoding, and decode them.
B<In an array context,> splits the ENCODED string into a list of decoded 
C<[DATA, CHARSET]> pairs, and returns that list.  Unencoded 
data are returned in a 1-element array C<[DATA]>, giving an effective 
CHARSET of C<undef>.
    $enc = '=?ISO-8859-1?Q?Keld_J=F8rn_Simonsen?= <keld@dkuug.dk>';
    foreach (decode_mimewords($enc)) {
        print "", ($_[1] || 'US-ASCII'), ": ", $_[0], "\n";
    }
B<In a scalar context,> the result is undocumented, please see
L<MIME::AltWords/decode_mimewords> instead.
In the event of a syntax error, $@ will be set to a description 
of the error, but parsing will continue as best as possible (so as to
get I<something> back when decoding headers).
$@ will be false if no error was detected.
Any arguments past the ENCODED string are taken to define a hash of options:
=over 4
=item Field
Name of the mail field this string came from.  I<Currently ignored.>
=back
=cut
sub decode_mimewords_wantarray {
    my $encstr = shift;
    my %params = @_;
    my @tokens;
    $@ = '';           ### error-return
    ### Collapse boundaries between adjacent encoded words:
    $encstr =~ s{(\?\=)\s*(\=\?)}{$1$2}gs;
    pos($encstr) = 0;
    ### print STDOUT "ENC = [", $encstr, "]\n";
    ### Decode:
    my ($charset, $encoding, $enc, $dec);
    while (1) {
        last if (pos($encstr) >= length($encstr));
        my $pos = pos($encstr);               ### save it
        ### Case 1: are we looking at "=?..?..?="?
        if ($encstr =~    m{\G             # from where we left off..
                            =\?([^?]*)     # "=?" + charset +
                             \?([bq])      #  "?" + encoding +
                             \?([^?]+)     #  "?" + data maybe with spcs +
                             \?=           #  "?="
                            }xgi) {
            ($charset, $encoding, $enc) = ($1, lc($2), $3);
            $dec = (($encoding eq 'q') ? _decode_Q($enc) : _decode_B($enc));
            push @tokens, [$dec, $charset];
            next;
        }
        ### Case 2: are we looking at a bad "=?..." prefix? 
        ### We need this to detect problems for case 3, which stops at "=?":
        pos($encstr) = $pos;               # reset the pointer.
        if ($encstr =~ m{\G=\?}xg) {
            $@ .= qq|unterminated "=?..?..?=" in "$encstr" (pos $pos)\n|;
            push @tokens, ['=?'];
            next;
        }
        ### Case 3: are we looking at ordinary text?
        pos($encstr) = $pos;               # reset the pointer.
        if ($encstr =~ m{\G                # from where we left off...
                         ([\x00-\xFF]*?    #   shortest possible string,
                          \n*)             #   followed by 0 or more NLs,
                         (?=(\Z|=\?))      # terminated by "=?" or EOS
                        }xg) {
            length($1) or die "MIME::AltWords0: internal logic err: empty token\n";
            push @tokens, [$1];
            next;
        }
         
        if ($encstr=~m{\G([\x00-\xFF]*)[^\x00-\xFF]+}g) { #### pts ####
            $@.=qq|wide character in encoded string\n|;
            push @tokens, [$1] if 0!=length($1);
            next;
        }
        ### Case 4: bug!
        die "MIME::AltWords0: unexpected case:\n($encstr) pos $pos\n\t".
            "Please alert developer.\n";
    }
    return (wantarray ? @tokens : join('',map {$_->[0]} @tokens));
}
#------------------------------
# vvv buggy implementation of encode_mimeword() commented out,
#     see MIME::AltWords
=begin comment
=item encode_mimeword RAW, [ENCODING], [CHARSET]
I<Function.>
Encode a single RAW "word" that has unsafe characters.
The "word" will be encoded in its entirety.
    ### Encode "<<Franc,ois>>":
    $encoded = encode_mimeword("\xABFran\xE7ois\xBB");
You may specify the ENCODING (C<"Q"> or C<"B">), which defaults to C<"Q">.
You may specify the CHARSET, which defaults to C<iso-8859-1>.
=cut
=begin commentcode
sub encode_mimeword {
    my $word = shift;
    my $encoding = uc(shift || 'Q');
    my $charset  = uc(shift || 'ISO-8859-1');
    my $encfunc  = (($encoding eq 'Q') ? \&_encode_Q : \&_encode_B);
    "=?$charset?$encoding?" . &$encfunc($word) . "?=";
}
=cut
#------------------------------
# vvv buggy implementation of encode_mimeword() commented out,
#     see MIME::AltWords
=begin comment
=item encode_mimewords RAW, [OPTS]
I<Function.>
Given a RAW string, try to find and encode all "unsafe" sequences 
of characters:
    ### Encode a string with some unsafe "words":
    $encoded = encode_mimewords("Me and \xABFran\xE7ois\xBB");
Returns the encoded string.
Any arguments past the RAW string are taken to define a hash of options:
=over 4
=item Charset
Encode all unsafe stuff with this charset.  Default is 'ISO-8859-1',
a.k.a. "Latin-1".
=item Encoding
The encoding to use, C<"q"> or C<"b">.  The default is C<"q">.
=item Field
Name of the mail field this string will be used in.  I<Currently ignored.>
=back
B<Warning:> this is a quick-and-dirty solution, intended for character
sets which overlap ASCII.  B<It does not comply with the RFC-1522
rules regarding the use of encoded words in message headers>.
You may want to roll your own variant,
using C<encoded_mimeword()>, for your application.
I<Thanks to Jan Kasprzak for reminding me about this problem.>
=cut
=begin commentcode
sub encode_mimewords {
    my ($rawstr, %params) = @_;
    my $charset  = $params{Charset} || 'ISO-8859-1';
    my $encoding = lc($params{Encoding} || 'q');
    ### Encode any "words" with unsafe characters.
    ###    We limit such words to 18 characters, to guarantee that the 
    ###    worst-case encoding give us no more than 54 + ~10 < 75 characters
    my $word;
    $rawstr =~ s{([a-zA-Z0-9\x7F-\xFF]{1,18})}{     ### get next "word"
        $word = $1;
        (($word !~ /[$NONPRINT]/o) 
         ? $word                                          ### no unsafe chars
         : encode_mimeword($word, $encoding, $charset));  ### has unsafe chars
    }xeg;
    $rawstr;
}
1;
__END__
=cut
=back
=head1 NOTES
Exports its principle functions by default, in keeping with 
MIME::Base64 and MIME::QuotedPrint.
=head1 AUTHOR
L<MIME::AltWords0> was copied from L<MIME::Words> (version 5.420) by
Péter Szabó (F<pts@fazekas.hu>) at
Wed Sep 27 07:57:51 CEST 2006.
Here is the original information for L<MIME::Words>.
Eryq (F<eryq@zeegee.com>), ZeeGee Software Inc (F<http://www.zeegee.com>).
David F. Skoll (dfs@roaringpenguin.com) http://www.roaringpenguin.com
All rights reserved.  This program is free software; you can redistribute 
it and/or modify it under the same terms as Perl itself.
Thanks also to...
      Kent Boortz        For providing the idea, and the baseline 
                         RFC-1522-decoding code!
      KJJ at PrimeNet    For requesting that this be split into
                         its own module.
      Stephane Barizien  For reporting a nasty bug.
=head1 VERSION
$Revision: 1.14 $ $Date: 2006/03/17 21:03:23 $
=cut
#------------------------------
# Execute simple test if run as a script.
#------------------------------
{ 
  package main; no strict;
  eval join('',<main::DATA>) || die "$@ $main::DATA" unless caller();
}
1;           # end the module
__END__
# Dat: these tests have been integrated to MIME::AltWords
### Pick up other MIME stuff, just in case...
BEGIN { unshift @INC, ".", "./etc", "./lib" };
import MIME::AltWords0;
my @encs = (
            '=?US-ASCII?Q?Keith_Moore?= <moore@cs.utk.edu>',
            '=?ISO-8859-1?Q?Keld_J=F8rn_Simonsen?= <keld@dkuug.dk>',
            '=?ISO-8859-1?Q?Andr=E9_?= Pirard <PIRARD@vm1.ulg.ac.be>',
            ('=?ISO-8859-1?B?SWYgeW91IGNhbiByZWFkIHRoaXMgeW8=?='.
             '=?ISO-8859-2?B?dSB1bmRlcnN0YW5kIHRoZSBleGFtcGxlLg==?='.
             '=?US-ASCII?Q?.._cool!?='));
foreach $enc (@encs) {
    $x = decode_mimewords($enc);
    print "DEC: ", $x, "\n";
}
### Encode a single unsafe word:
$encoded = encode_mimeword("\xABFran\xE7ois\xBB");
print "ENC1: ", $encoded, "\n";
     
### Encode a string, trying to find the unsafe words inside it: 
$encoded = encode_mimewords("Me and \xABFran\xE7ois\xBB at the beach");
print "ENC2: ", $encoded, "\n";
### Encode "<<Franc,ois>>":
my $unsafe = <<EOF;
Me and \xABFran\xE7ois\xBB, down at the beach
with Dave <dave\@ether.net>
EOF
$encoded = encode_mimewords($unsafe);
print "ENC3: ", $encoded, "\n";
print "DEC3: ", scalar(decode_mimewords($encoded)), "\n";
### So we know everything went well...
exit 0;
#------------------------------
	Global
`s`	Focus search bar
`?`	Bring up this help dialog
	GitHub
`g` `p`	Go to pull requests
`g` `i`	go to github issues (only if github is preferred repository)
	POD
`g` `a`	Go to author
`g` `c`	Go to changes
`g` `i`	Go to issues
`g` `d`	Go to dist
`g` `r`	Go to repository/SCM
`g` `s`	Go to source
`g` `b`	Go to file browse
	Search terms
module: (e.g. module:Plugin)
distribution: (e.g. distribution:Dancer auth)
author: (e.g. author:SONGMU Redis)
version: (e.g. version:1.00)