lib/Email/MIME/RFC2047/Decoder.pm


            
              1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
—
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
              package Email::MIME::RFC2047::Decoder;
$Email::MIME::RFC2047::Decoder::VERSION = '0.97';
use strict;
use warnings;
# ABSTRACT: Decoding of non-ASCII MIME headers
use Encode ();
use MIME::Base64 ();
# Don't include period "." to correctly handle obs-phrase.
my $rfc_specials = '()<>\[\]:;\@\\,"';
my $rfc_specials_no_quote = '()<>\[\]:;\@\\,';
# Regex for encoded words.
# This also checks the validity of base64 encoded data because MIME::Base64
# silently ignores invalid characters.
# Captures ($encoding, $content_b, $content_q)
my $encoded_word_text_re = qr/
    (?: ^ | (?<= [\r\n\t ] ) )
    = \? ( [A-Za-z0-9_-]++ ) \?
    (?:
        [Bb] \?
        (
            (?:
                [A-Za-z0-9+\/]{2}
                (?: == | [A-Za-z0-9+\/] [A-Za-z0-9+\/=] )
            )++
        ) |
        [Qq] \?
        ( [\x21-\x3E\x40-\x7E]++ )
    )
    \? =
    (?= \z | [\r\n\t ] )
/x;
# Same as $encoded_word_text_re but excluding RFC 2822 special chars
# Also matches after and before special chars (why?).
my $encoded_word_phrase_re = qr/
    (?: ^ | (?<= [\r\n\t $rfc_specials_no_quote] ) )
    = \? ( [A-Za-z0-9_-]++ ) \?
    (?:
        [Bb] \?
        (
            (?:
                [A-Za-z0-9+\/]{2}
                (?: == | [A-Za-z0-9+\/] [A-Za-z0-9+\/=] )
            )++
        ) |
        [Qq] \?
        ( [A-Za-z0-9!*+\/=_-]++ )
    )
    \? =
    (?= \z | [\r\n\t $rfc_specials_no_quote] )
/x;
# Same as $encoded_word_text_re but excluding parens and backslash.
# Also matches after and before parens.
#my $encoded_word_comment_re = qr/
#    (?<= [\r\n\t ()] )
#    = \? ( [A-Za-z0-9_-]++ ) \?
#    (?:
#        [Bb] \?
#        (
#            (?:
#                [A-Za-z0-9+\/]{2}
#                (?: == | [A-Za-z0-9+\/] [A-Za-z0-9+\/=] )
#            )++
#        ) |
#        [Qq] \?
#        ( [\x21-\x27\x2A-\x3E\x40-\x5B\x5D-\x7E]++ )
#    )
#    \? =
#    (?= [\r\n\t ()] )
#/x;
my $quoted_string_re = qr/
    "
    (
        (?:
            [^"\\]++ |
            \\ .
        )*+
    )
    "
/sx;
my $comment_re = qr/
    (
        \(
            (?:
                [^()\\]++ |
                \\ . |
                (?-1)
            )*+
        \)
    )
/sx;
sub new {
    my $package = shift;
    my $self = {};
    return bless($self, $package);
}
sub decode_text {
    my $self = shift;
    return $self->_decode('text', @_);
}
sub decode_phrase {
    my $self = shift;
    return $self->_decode('phrase', @_);
}
sub _decode {
    my ($self, $mode, $encoded) = @_;
    my $encoded_ref = ref($encoded) ? $encoded : \$encoded;
    my $result = '';
    my $enc_flag;
    # use shortest match on any characters we don't want to decode
    my $regex = $mode eq 'phrase' ?
        qr/([^$rfc_specials]*?)($encoded_word_phrase_re|$quoted_string_re|$comment_re)/ :
        qr/(.*?)($encoded_word_text_re)/s;
    while ($$encoded_ref =~ /\G$regex/cg) {
        my ($text, $match,
            $encoding, $b_content, $q_content,
            $qs_content) =
            ($1, $2, $3, $4, $5, $6, $7);
        if (defined($encoding)) {
            # encoded words shouldn't be longer than 75 chars but
            # let's allow up to 255 chars
            if (length($match) > 255) {
                $result .= $text;
                $result .= $match;
                $enc_flag = undef;
                next;
            }
            my $content;
            if (defined($b_content)) {
                # MIME B
                $content = MIME::Base64::decode_base64($b_content);
            }
            else {
                # MIME Q
                $content = $q_content;
                $content =~ tr/_/ /;
                $content =~ s/=([0-9A-Fa-f]{2})/chr(hex($1))/eg;
            }
            my $chunk;
            eval {
                $chunk = Encode::decode(
                    $encoding,
                    $content,
                    Encode::FB_CROAK
                );
            };
            if ($@) {
                warn($@);
                # display raw encoded word in case of errors
                $result .= $text;
                $result .= $match;
                $enc_flag = undef;
                next;
            }
            # ignore whitespace between encoded words
            $result .= $text if !$enc_flag || $text =~ /\S/;
            $result .= $chunk;
            $enc_flag = 1;
        }
        elsif (defined($qs_content)) {
            # quoted string
            $result .= $text;
            # unquote
            $qs_content =~ s/\\(.)/$1/gs;
            $result .= $qs_content;
            $enc_flag = undef;
        }
    }
    $regex = $mode eq 'phrase' ?
        qr/[^$rfc_specials]+/ :
        qr/.+/s;
    $result .= $& if $$encoded_ref =~ /\G$regex/cg;
    # normalize whitespace
    $result =~ s/^[\r\n\t ]+//;
    $result =~ s/[\r\n\t ]+\z//;
    $result =~ s/[\r\n\t ]+/ /g;
    # remove potentially dangerous ASCII control chars
    $result =~ s/[\x00-\x1f\x7f]//g;
    return $result;
}
1;
__END__
=pod
=encoding UTF-8
=head1 NAME
Email::MIME::RFC2047::Decoder - Decoding of non-ASCII MIME headers
=head1 VERSION
version 0.97
=head1 SYNOPSIS
    use Email::MIME::RFC2047::Decoder;
    my $decoder = Email::MIME::RFC2047::Decoder->new();
    my $string = $decoder->decode_text($encoded_text);
    my $string = $decoder->decode_phrase($encoded_phrase);
=head1 DESCRIPTION
This module decodes parts of MIME email message headers containing non-ASCII
text according to RFC 2047.
=head1 CONSTRUCTOR
=head2 new
    my $decoder = Email::MIME::RFC2047::Decoder->new();
Creates a new decoder object.
=head1 METHODS
=head2 decode_text
    my $string = $decoder->decode_text($encoded_text);
Decodes any MIME header field for which the field body is defined as
C<unstructured> (RFC 2822) or C<*text> (RFC 822), for example, any
I<Subject> or I<Comments> header field.
C<$encoded_text> can also be a reference to a scalar. In this case, the
scalar is processed starting from the current search position. See
L<perlfunc/pos>.
The resulting string is trimmed and any whitespace is collapsed. This means
that lines separated by folding whitespace are unfolded. Folding whitespace
is not checked for syntactical correctness. Newlines are treated like
normal whitespace.
=head2 decode_phrase
    my $string = $decoder->decode_phrase($encoded_phrase);
Decodes any C<phrase> token (as defined by RFC 2822) in a MIME header field,
for example, one that precedes an address in a I<From>, I<To>, or I<Cc>
header.
This method works like I<decode_text> but additionally unquotes any
C<quoted-string>s. It also stops at any special character as defined by
RFC 2822, excluding the period character "." and the double quote '"'.
If C<$encoded_phrase> is a reference to a scalar, the current search
position is set accordingly. This is helpful when parsing RFC 2822 address
headers.
=head1 AUTHOR
Nick Wellnhofer <wellnhofer@aevum.de>
=head1 COPYRIGHT AND LICENSE
This software is copyright (c) 2017 by Nick Wellnhofer.
This is free software; you can redistribute it and/or modify it under
the same terms as the Perl 5 programming language system itself.
=cut
	Global
`s`	Focus search bar
`?`	Bring up this help dialog
	GitHub
`g` `p`	Go to pull requests
`g` `i`	go to github issues (only if github is preferred repository)
	POD
`g` `a`	Go to author
`g` `c`	Go to changes
`g` `i`	Go to issues
`g` `d`	Go to dist
`g` `r`	Go to repository/SCM
`g` `s`	Go to source
`g` `b`	Go to file browse
	Search terms
module: (e.g. module:Plugin)
distribution: (e.g. distribution:Dancer auth)
author: (e.g. author:SONGMU Redis)
version: (e.g. version:1.00)