lib/Math/Expression/Evaluator/Lexer.pm


            
              1
2
3
4
5
—
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
              package Math::Expression::Evaluator::Lexer;
use warnings;
use strict;
use Carp qw(confess);
use Data::Dumper;
=head1 NAME
Math::Expression::Evaluator::Lexer - Simple Lexer
=head1 SYNOPSIS
    use Math::Expression::Evaluator::Lexer qw(lex);
    # suppose you want to parse simple math expressions
    my @input_tokens = (
        ['Int',             qr/[+-]?\d+/ ],
        ['Op',              qr{[+-/*]}   ],
        ['Brace_Open',      qr/\(/       ],
        ['Brace_Close',     qr/\)/       ],
        ['Whitespace',      qr/\s+/, sub { return; }],
         );
    my $text = "-12 * (3+4)";
    my $out_tokens = lex($text, \@input_tokens);
    for (@$out_tokens){
        my ($name, $text, $pos) = @$_;
        print "Found Token $name: $text (string pos: $pos)\n";
    }
=head1 DESCRIPTION
Math::Expression::Evaluator::Lexer is a simple lexer that breaks up a text 
into tokens, depending on the input tokens you provide
=head1 METHODS
=over 2
=item lex
The only exported routine is lex, which expects input text as its 
first argument and a array ref to list of input tokens.
Each input token consists of a token name (which you can choose freely), 
a regex which matches the desired token, and optionally a reference to 
a functions that takes the matched token text as its argument. The 
token text is replaced by the return value of that function. If the 
function returns undef, that token will not be included in the list 
of output tokens. The regex should either fail or match at least one
character; zero-width matches utterly confuse the lexer, and are disallowed.
lex() returns an array ref to a list of output tokens, each output 
token is a reference to a list which contains the token name, the matched 
text, the string position (in characters, counted from the start of
the input string, zero based) and the line number.
Note that C<lex()> puts parentheses around the entire regex, so if you 
want to use backreferences, the numbering of the capturing group is changed.
=back
=head1 COPYRIGHT AND LICENSE
Copyright (C) 2007 by Moritz Lenz, L<http://moritz.faui2k3.org>, 
L<moritz@faui2k3.org>.
This Program and its Documentation is free software. You may distribute it 
under the same terms as perl itself.
However all code examples are to be public domain, so you can use it in any 
way you want to.
=cut
require Exporter;
our @ISA = qw(Exporter);
our @EXPORT_OK = qw(lex);
our %EXPORT_TAGS = (":all" => \@EXPORT_OK);
sub lex {
    my ($text, $tokens) = @_;
    confess("passed undefined value to lex()") unless defined $text;
    my $l = length $text;
    return [] unless $l;
    my ($last_line_number, $last_pos) = (0, 0);
    my $pos_and_line_number = sub {
        my $pos = shift;
        $last_line_number +=
            (substr($text, $last_pos, $pos - $last_pos) =~ tr/\n//);
        $last_pos = $pos;
        return ($pos, $last_line_number + 1);
    };
    my @res;
    # avoid 'Use of uninitialized value in numeric lt (<)' warnings:
    pos($text) = 0;
    while (pos($text) < $l){
        my $matched = 0;
REGEXES:
        for (@$tokens){
            my $re = $_->[1];
            # failed regex matches reset pos() unless the /c modifier
            # is present
            if ($text =~ m/\G($re)/gc){
                $matched = 1;
                my $match = $1;
                if (length $match == 0){
                    confess("Each token has to require at least one "
                            . "character; Rule $_->[0] matched Zero!\n");
                }
                # safe information before callbacks can modify $match
                # and thus length($match)
                my $pos = pos($text) - length($match);
                if ($_->[2]){
                    $match = $_->[2]->($match);
                }
                if (defined $match){
                    push @res, [
                        $_->[0],
                        $match,
                        $pos_and_line_number->($pos),
                    ];
                }
                next REGEXES;
            }
        }
        if ($matched == 0){
            confess("No token matched input text <$text> at position " . pos($text));
        }
    }
    return \@res;
}
1;
# vim: sw=4 ts=4 expandtab
	Global
`s`	Focus search bar
`?`	Bring up this help dialog
	GitHub
`g` `p`	Go to pull requests
`g` `i`	go to github issues (only if github is preferred repository)
	POD
`g` `a`	Go to author
`g` `c`	Go to changes
`g` `i`	Go to issues
`g` `d`	Go to dist
`g` `r`	Go to repository/SCM
`g` `s`	Go to source
`g` `b`	Go to file browse
	Search terms
module: (e.g. module:Plugin)
distribution: (e.g. distribution:Dancer auth)
author: (e.g. author:SONGMU Redis)
version: (e.g. version:1.00)