lib/RPerl/Operation/Expression/Operator/RegularExpression.pm


            
              1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
—
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
—
60
61
62
63
64
65
66
67
68
69
70
71
72
73
—
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
              # [[[ HEADER ]]]
package RPerl::Operation::Expression::Operator::RegularExpression;
use strict;
use warnings;
use RPerl::AfterSubclass;
our $VERSION = 0.013_000;
# [[[ OO INHERITANCE ]]]
use parent qw(RPerl::Operation::Expression::Operator);
use RPerl::Operation::Expression::Operator;
# [[[ CRITICS ]]]
## no critic qw(ProhibitUselessNoCritic ProhibitMagicNumbers RequireCheckedSyscalls)  # USER DEFAULT 1: allow numeric values & print operator
## no critic qw(RequireInterpolationOfMetachars)  # USER DEFAULT 2: allow single-quoted control characters & sigils
# [[[ OO PROPERTIES ]]]
our hashref $properties = {};
# COPYRIGHT NOTICE: modifier descriptions copied from JPCRE2 docs under BSD license    https://github.com/jpcre2/jpcre2
# supported and compliant!  :-)
our string_hashref $modifiers_compile = {
    i => 'Case-insensitive. Equivalent to PCRE2_CASELESS option.',
    m => 'Multi-line regex. Equivalent to PCRE2_MULTILINE option.',
    s => 'If this modifier is set, a dot meta-character in the pattern matches all characters, including newlines. Equivalent to PCRE2_DOTALL option.',
    u => 'Enable UTF support.Treat pattern and subjects as UTF strings. It is equivalent to PCRE2_UTF option.',
    x => 'Whitespace data characters in the pattern are totally ignored except when escaped or inside a character class, enables commentary in pattern. Equivalent to PCRE2_EXTENDED option.',
};
=begin DISABLED_UNSUPPORTED_OR_NONCOMPLIANT
# DEV NOTE: there are other unsupported modifiers as well, see the Perl docs    https://perldoc.perl.org/perlre.html#Modifiers
our string_hashref $modifiers_compile_unsupported = {
    xx => 'Whitespace data characters in the pattern are totally ignored except when escaped, EVEN WHEN INSIDE A CHARACTER CLASS.  Requires Perl v5.26 or newer.',
};
# DEV NOTE: some of these noncompliant modifiers may be related to Perl regex assertions, such as 'A'    https://perldoc.perl.org/perlre.html
our string_hashref $modifiers_compile_noncompliant = {
    e => 'Unset back-references in the pattern will match to empty strings. Equivalent to PCRE2_MATCH_UNSET_BACKREF.',
    j => '\u \U \x and unset back-references will act as JavaScript standard. Equivalent to PCRE2_ALT_BSUX | PCRE2_MATCH_UNSET_BACKREF.
\U matches an upper case "U" character (by default it causes a compile error if this option is not set).
\u matches a lower case "u" character unless it is followed by four hexadecimal digits, in which case the hexadecimal number defines the code point to match (by default it causes a compile error if this option is not set).
\x matches a lower case "x" character unless it is followed by two hexadecimal digits, in which case the hexadecimal number defines the code point to match (By default, as in Perl, a hexadecimal number is always expected after \x, but it may have zero, one, or two digits (so, for example, \xz matches a binary zero character followed by z) ).
Unset back-references in the pattern will match to empty strings.',
    n => 'Enable Unicode support for \w \d etc... in pattern. Equivalent to PCRE2_UTF | PCRE2_UCP.',
    A => 'Match only at the first position. It is equivalent to PCRE2_ANCHORED option.',
    D => 'A dollar meta-character in the pattern matches only at the end of the subject string. Without this modifier, a dollar also matches immediately before the final character if it is a newline (but not before any other newlines). This modifier is ignored if m modifier is set. Equivalent to PCRE2_DOLLAR_ENDONLY option.',
    J => 'Allow duplicate names for sub-patterns. Equivalent to PCRE2_DUPNAMES option.',
    S => 'When a pattern is going to be used several times, it is worth spending more time analyzing it in order to speed up the time taken for matching/replacing. It may also be beneficial for a very long subject string or pattern. Equivalent to an extra compilation with JIT_COMPILER with the option PCRE2_JIT_COMPLETE.',
    U => 'This modifier inverts the "greediness" of the quantifiers so that they are not greedy by default, but become greedy if followed by ?. Equivalent to PCRE2_UNGREEDY option.',
};
=end DISABLED_UNSUPPORTED_OR_NONCOMPLIANT
=cut
our string_hashref $modifiers_match = {
    g => 'Global. Will perform global matching or replacement if passed. Equivalent to jpcre2::FIND_ALL for match and PCRE2_SUBSTITUTE_GLOBAL for replace.',
};
=begin DISABLED_UNSUPPORTED_OR_NONCOMPLIANT
our string_hashref $modifiers_match_noncompliant = {
    A => 'Match at start. Equivalent to PCRE2_ANCHORED. Can be used in match operation. Setting this option only at match time (i.e regex was not compiled with this option) will disable optimization during match time.',
};
=end DISABLED_UNSUPPORTED_OR_NONCOMPLIANT
=cut
our string_hashref $modifiers_substitute = {
    g => 'Global. Will perform global matching or replacement if passed. Equivalent to jpcre2::FIND_ALL for match and PCRE2_SUBSTITUTE_GLOBAL for replace.',
};
=begin DISABLED_UNSUPPORTED_OR_NONCOMPLIANT
our string_hashref $modifiers_substitute_noncompliant = {
    e => 'Replaces unset group with empty string. Equivalent to PCRE2_SUBSTITUTE_UNSET_EMPTY.',
    E => 'Extension of e modifier. Sets even unknown groups to empty string. Equivalent to PCRE2_SUBSTITUTE_UNSET_EMPTY | PCRE2_SUBSTITUTE_UNKNOWN_UNSET',
    x => 'Extended replacement operation. Equivalent to PCRE2_SUBSTITUTE_EXTENDED. It enables some Bash like features:
${<n>:-<string>}
${<n>:+<string1>:<string2>}
<n> may be a group number or a name. The first form specifies a default value. If group <n> is set, its value is inserted; if not, <string> is expanded and the result is inserted. The second form specifies strings that are expanded and inserted when group <n> is set or unset, respectively. The first form is just a convenient shorthand for ${<n>:+${<n>}:<string>}.',
};
=end DISABLED_UNSUPPORTED_OR_NONCOMPLIANT
=cut
# [[[ SUBROUTINES & OO METHODS ]]]
sub ast_to_rperl__generate {
    { my string_hashref::method $RETURN_TYPE };
    ( my object $self, my string_hashref $modes) = @ARG;
    my string_hashref $rperl_source_group = { PMC => q{} };
#    RPerl::diag( 'in Operator::RegularExpression->ast_to_rperl__generate(), received $self = ' . "\n" . RPerl::Parser::rperl_ast__dump($self) . "\n" );
    my string $self_class = ref $self;
    if ( $self_class eq 'Operator_110' ) { # Operator -> SubExpression OP06_REGEX_BIND OP06_REGEX_PATTERN
        my string_hashref $rperl_source_subgroup
            = $self->{children}->[0]->ast_to_rperl__generate($modes);
        RPerl::Generator::source_group_append( $rperl_source_group,
            $rperl_source_subgroup );
        $rperl_source_group->{PMC} .= q{ } . $self->{children}->[1] . q{ } . $self->{children}->[2];
    }
    else {
        die RPerl::Parser::rperl_rule__replace(
            'ERROR ECOGEASRP000, CODE GENERATOR, ABSTRACT SYNTAX TO RPERL: Grammar rule '
                . $self_class
                . ' found where Operator_110 expected, dying' )
            . "\n";
    }
    return $rperl_source_group;
}
sub ast_to_cpp__generate__CPPOPS_PERLTYPES {
    { my string_hashref::method $RETURN_TYPE };
    ( my object $self, my string_hashref $modes) = @ARG;
    my string_hashref $cpp_source_group
        = { CPP =>
            q{// <<< RP::O::E::O::RE __DUMMY_SOURCE_CODE CPPOPS_PERLTYPES >>>}
            . "\n" };
    #...
    return $cpp_source_group;
}
sub ast_to_cpp__generate__CPPOPS_CPPTYPES {
    { my string_hashref::method $RETURN_TYPE };
    ( my object $self, my string_hashref $modes) = @ARG;
    my string_hashref $cpp_source_group = { CPP => q{} };
#    RPerl::diag( 'in Operator::RegularExpression->ast_to_cpp__generate__CPPOPS_CPPTYPES(), received $self = ' . "\n" . RPerl::Parser::rperl_ast__dump($self) . "\n" );
    my string $self_class = ref $self;
    if ( $self_class eq 'Operator_110' ) { # Operator -> SubExpression OP06_REGEX_BIND OP06_REGEX_PATTERN
        # generate subexpression, to left of regex bind operator
        my string_hashref $cpp_source_subgroup = $self->{children}->[0]->ast_to_cpp__generate__CPPOPS_CPPTYPES($modes);
        RPerl::Generator::source_group_append( $cpp_source_group, $cpp_source_subgroup );
        # get bind and pattern strings
        my string $bind = $self->{children}->[1];
        my string $pattern = $self->{children}->[2];
        RPerl::diag( q{in Operator::RegularExpression->ast_to_cpp__generate__CPPOPS_CPPTYPES(), have $bind = '} . $bind . q{'} . "\n" );
        RPerl::diag( q{in Operator::RegularExpression->ast_to_cpp__generate__CPPOPS_CPPTYPES(), have $pattern = '} . $pattern . q{'} . "\n" );
        # NEED FIX: DIE ON !~ BINDING OPERATOR, should actually be logic & code generation to implement !~ binding operator
        if ($bind ne '=~') {
            die q{ERROR ECOGEASCP0xx: Regular expression binding operator '} . $bind . q{' not yet supported, dying};
        }
        # separate pattern into match/substitute flag, bare pattern, and modifiers
        my character $match_or_substitute = substr $pattern, 0, 1;
        my string $modifiers = q{};
        for (my integer $pattern_index = ((length $pattern) - 1); $pattern_index >= 0; $pattern_index--) {
            my character $modifier = substr $pattern, $pattern_index, 1;
            if ($modifier eq '/') { last; }
            $modifiers = $modifier . $modifiers;
        }
        my string $pattern_bare = substr $pattern, 1, ((length $pattern) - ((length $modifiers) + 1));
        RPerl::diag( q{in Operator::RegularExpression->ast_to_cpp__generate__CPPOPS_CPPTYPES(), have $match_or_substitute = '} . $match_or_substitute . q{'} . "\n" );
        RPerl::diag( q{in Operator::RegularExpression->ast_to_cpp__generate__CPPOPS_CPPTYPES(), have $pattern_bare = '} . $pattern_bare . q{'} . "\n" );
        RPerl::diag( q{in Operator::RegularExpression->ast_to_cpp__generate__CPPOPS_CPPTYPES(), have $modifiers = '} . $modifiers . q{'} . "\n" );
        # test for and remove book-end forward slash characters
        if ((substr $pattern_bare, 0, 1) ne q{/}) {
            die q{ERROR ECOGEASCP081: Regular expression pattern '} . $pattern_bare . q{' does not begin with forward slash '/' character, dying};
        }
        if ((substr $pattern_bare, -1, 1) ne q{/}) {
            die q{ERROR ECOGEASCP082: Regular expression pattern '} . $pattern_bare . q{' does not end with forward slash '/' character, dying};
        }
        $pattern_bare = substr $pattern_bare, 1, ((length $pattern_bare) - 2);
        # must backslash-escape backslashes within bare pattern, character class \w must appear as \\w etc, convert all single backslashes into double backslashes
        $pattern_bare =~ s/\\/\\\\/gxms;
        RPerl::diag( q{in Operator::RegularExpression->ast_to_cpp__generate__CPPOPS_CPPTYPES(), have forward-slash-cleaned and backslash-escaped $pattern_bare = '} . $pattern_bare . q{'} . "\n" );
        my string $modifiers_compile_enabled = q{};
        my string $modifiers_match_enabled = q{};
        my string $modifiers_substitute_enabled = q{};
        my string $modifiers_compile_extra = q{};
        # include S compile modifier for optimization on long (presumably more complex) patterns
        if ((length $pattern_bare) > 20) {
            RPerl::diag( q{in Operator::RegularExpression->ast_to_cpp__generate__CPPOPS_CPPTYPES(), setting S modifier to attempt PCRE2 optimization of pattern over length of 20 characters} . "\n" );
            $modifiers_compile_extra = 'S';
        }
        # match
        if ($match_or_substitute eq 'm') {
            # EXAMPLE C++ CODE
            # // check if string matches the pattern, return true or false
            # regex("(\\d)|(\\w)").match("I am the subject")
            # // match all and get the match count using the action modifier 'g', return count
            # regex("(\\d)|(\\w)","m").match("I am the subject","g")
  
            RPerl::diag( q{in Operator::RegularExpression->ast_to_cpp__generate__CPPOPS_CPPTYPES(), have all valid match modifiers = '} . (join ', ', (sort keys %{{%{$modifiers_compile}, %{$modifiers_match}}})) . q{'} . "\n" );
            # validate & sort modifiers
            foreach my character $modifier (split //, $modifiers) {
                if (exists $modifiers_compile->{$modifier}) {
                    RPerl::diag( q{in Operator::RegularExpression->ast_to_cpp__generate__CPPOPS_CPPTYPES(), have valid compile modifier '} . $modifier . q{' with description as follows:} . "\n\t" . $modifiers_compile->{$modifier} . "\n" );
                    $modifiers_compile_enabled .= $modifier;
                }
                elsif (exists $modifiers_match->{$modifier}) {
                    RPerl::diag( q{in Operator::RegularExpression->ast_to_cpp__generate__CPPOPS_CPPTYPES(), have valid match modifier '} . $modifier . q{' with description as follows:} . "\n\t" . $modifiers_match->{$modifier} . "\n" );
                    $modifiers_match_enabled .= $modifier;
                }
                else {
                    die q{ERROR ECOGEASCP083: Non-compliant, unsupported, or unrecognized regular expression modifier '} . $modifier . q{' found, must be one of (} . (join ', ', (sort keys %{{%{$modifiers_compile}, %{$modifiers_match}}})) . q{), dying};
                }
            }
            RPerl::diag( q{in Operator::RegularExpression->ast_to_cpp__generate__CPPOPS_CPPTYPES(), have $modifiers_compile_enabled = '} . $modifiers_compile_enabled . "\n" );
            RPerl::diag( q{in Operator::RegularExpression->ast_to_cpp__generate__CPPOPS_CPPTYPES(), have $modifiers_match_enabled = '} . $modifiers_match_enabled . "\n" );
            # compose final C++ code for modifiers
            my $modifiers_compile_CPP = q{};
            if (($modifiers_compile_enabled . $modifiers_compile_extra) ne q{}) {
                $modifiers_compile_CPP = q{, "} . ($modifiers_compile_enabled . $modifiers_compile_extra) . q{"};
            }
            my $modifiers_match_CPP = q{};
            if ($modifiers_match_enabled ne q{}) {
                $modifiers_match_CPP = q{, "} . $modifiers_match_enabled . q{"};
            }
            RPerl::diag( q{in Operator::RegularExpression->ast_to_cpp__generate__CPPOPS_CPPTYPES(), have $modifiers_compile_CPP = '} . $modifiers_compile_CPP . "\n" );
            RPerl::diag( q{in Operator::RegularExpression->ast_to_cpp__generate__CPPOPS_CPPTYPES(), have $modifiers_match_CPP = '} . $modifiers_match_CPP . "\n" );
            # DEV NOTE: $cpp_source_group->{CPP} already contains the generated subexpression to be used as the subject of the regex
            # DEV NOTE: Perl vs JPCRE2 inconsistency, must explicitly cast return value change count as boolean true/false value
            $cpp_source_group->{CPP} = '(boolean) regex("' . $pattern_bare . '"' . $modifiers_compile_CPP . ').match(' . $cpp_source_group->{CPP} . $modifiers_match_CPP . ')';
        }
        # substitute
        elsif ($match_or_substitute eq 's') {
            # EXAMPLE C++ CODE
            # // replace first occurrence of a digit with @
            # string foo = (const string) "I am the subject string 44";
            # regex("\\d").preplace(&foo, "@")
            # // replace all occurrences of a digit with @
            # string foo = (const string) "I am the subject string 44";
            # regex("\\d").preplace(&foo, "@", "g")
            # // swap two parts of a string
            # string foo = (const string) "I am the subject\tTo be swapped according to tab";
            # regex("^([^\t]+)\t([^\t]+)$").preplace(&foo, "$2 $1")
            RPerl::diag( q{in Operator::RegularExpression->ast_to_cpp__generate__CPPOPS_CPPTYPES(), have all valid substitute modifiers = '} . (join ', ', (sort keys %{{%{$modifiers_compile}, %{$modifiers_substitute}}})) . q{'} . "\n" );
            # validate & sort modifiers
            foreach my character $modifier (split //, $modifiers) {
                if (exists $modifiers_compile->{$modifier}) {
                    RPerl::diag( q{in Operator::RegularExpression->ast_to_cpp__generate__CPPOPS_CPPTYPES(), have valid compile modifier '} . $modifier . q{' with description as follows:} . "\n\t" . $modifiers_compile->{$modifier} . "\n" );
                    $modifiers_compile_enabled .= $modifier;
                }
                elsif (exists $modifiers_substitute->{$modifier}) {
                    RPerl::diag( q{in Operator::RegularExpression->ast_to_cpp__generate__CPPOPS_CPPTYPES(), have valid substitute modifier '} . $modifier . q{' with description as follows:} . "\n\t" . $modifiers_substitute->{$modifier} . "\n" );
                    $modifiers_substitute_enabled .= $modifier;
                }
                else {
                    die q{ERROR ECOGEASCP084: Non-compliant, unsupported, or unrecognized regular expression modifier '} . $modifier . q{' found, must be one of (} . (join ', ', (sort keys %{{%{$modifiers_compile}, %{$modifiers_substitute}}})) . q{), dying};
                }
            }
            # compose final C++ code for modifiers
            my $modifiers_compile_CPP = q{};
            if (($modifiers_compile_enabled . $modifiers_compile_extra) ne q{}) {
                $modifiers_compile_CPP = q{, "} . ($modifiers_compile_enabled . $modifiers_compile_extra) . q{"};
            }
            my $modifiers_substitute_CPP = q{};
            if ($modifiers_substitute_enabled ne q{}) {
                $modifiers_substitute_CPP = q{, "} . $modifiers_substitute_enabled . q{"};
            }
            # validate substitute pattern
            my $pattern_forward_slash_count = ( $pattern_bare =~ m/\//gxms );
            if ($pattern_forward_slash_count != 1) {
                die q{ERROR ECOGEASCP085: Substitution regular expression pattern '} . $pattern_bare . q{' does not contain exactly one forward slash '/' character, dying};
            }
            # split find/replace portions of substitute pattern
            my string $pattern_find = q{};
            my string $pattern_replace = q{};
            my boolean $found_slash = 0;
            foreach my character $pattern_character (split //, $pattern_bare) {
                if ($pattern_character eq '/') {
                    $found_slash = 1;
                    next;
                }
                elsif (not $found_slash) {
                    $pattern_find .= $pattern_character;
                }
                else {
                    $pattern_replace .= $pattern_character;
                }
            }
            # START HERE
            # START HERE
            # START HERE
            # NEED ADD ERROR CHECK OR GRAMMAR CHANGE: regex substitution's LHS subexpression can only be a variable, because we must return assign value back to variable to emulate PERLOPS_PERLTYPES behavior
            # NEED ADD SUPPORT: non-destructive regex substitution using Perl's /r modifier, and NOT setting the original variable to the return value in C++
            # NEED ADD LOGIC: bind not !~ instead of only bind =~, disable die on !~ above !!!
            # DEV NOTE: $cpp_source_group->{CPP} already contains the generated subexpression to be used as the subject of the regex
            # EXAMPLE C++ CODE:  regex("FIND", "MODS_COMP").preplace(&foo, "REPLACE_WITH", "MODS_SUBST")
            $cpp_source_group->{CPP} = 'regex("' . $pattern_find . '"' . $modifiers_compile_CPP . ').preplace(&' . $cpp_source_group->{CPP} . ', "' . $pattern_replace . '"' . $modifiers_substitute_CPP . ')';
        }
        else {
            die q{ERROR ECOGEASCP080: Unrecognized regular expression type '} . $match_or_substitute . q{' found, must be 'm' for match or 's' for substitute, dying};
        }
    }
    else {
        die RPerl::Parser::rperl_rule__replace( 'ERROR ECOGEASCP000, CODE GENERATOR, ABSTRACT SYNTAX TO RPERL: Grammar rule ' . $self_class . ' found where Operator_110 expected, dying' ) . "\n";
    }
    RPerl::diag( 'in Operator::RegularExpression->ast_to_cpp__generate__CPPOPS_CPPTYPES(), about to return $cpp_source_group = ' . "\n" . RPerl::Parser::rperl_ast__dump($cpp_source_group) . "\n" );
    return $cpp_source_group;
}
1;    # end of class
	Global
`s`	Focus search bar
`?`	Bring up this help dialog
	GitHub
`g` `p`	Go to pull requests
`g` `i`	go to github issues (only if github is preferred repository)
	POD
`g` `a`	Go to author
`g` `c`	Go to changes
`g` `i`	Go to issues
`g` `d`	Go to dist
`g` `r`	Go to repository/SCM
`g` `s`	Go to source
`g` `b`	Go to file browse
	Search terms
module: (e.g. module:Plugin)
distribution: (e.g. distribution:Dancer auth)
author: (e.g. author:SONGMU Redis)
version: (e.g. version:1.00)