i=> 'Case-insensitive. Equivalent to PCRE2_CASELESS option.',
m=> 'Multi-line regex. Equivalent to PCRE2_MULTILINE option.',
s=> 'If this modifier is set, a dot meta-character in the pattern matches all characters, including newlines. Equivalent to PCRE2_DOTALL option.',
u=> 'Enable UTF support.Treat pattern and subjects as UTF strings. It is equivalent to PCRE2_UTF option.',
x=> 'Whitespace data characters in the pattern are totally ignored except when escaped or inside a character class, enables commentary in pattern. Equivalent to PCRE2_EXTENDED option.',
xx => 'Whitespace data characters in the pattern are totally ignored except when escaped, EVEN WHEN INSIDE A CHARACTER CLASS. Requires Perl v5.26 or newer.',
e => 'Unset back-references in the pattern will match to empty strings. Equivalent to PCRE2_MATCH_UNSET_BACKREF.',
j => '\u \U \x and unset back-references will act as JavaScript standard. Equivalent to PCRE2_ALT_BSUX | PCRE2_MATCH_UNSET_BACKREF.
\U matches an upper case "U" character (by default it causes a compile error if this option is not set).
\u matches a lower case "u" character unless it is followed by four hexadecimal digits, in which case the hexadecimal number defines the code point to match (by default it causes a compile error if this option is not set).
\x matches a lower case "x" character unless it is followed by two hexadecimal digits, in which case the hexadecimal number defines the code point to match (By default, as in Perl, a hexadecimal number is always expected after \x, but it may have zero, one, or two digits (so, for example, \xz matches a binary zero character followed by z) ).
Unset back-references in the pattern will match to empty strings.',
n => 'Enable Unicode support for \w \d etc... in pattern. Equivalent to PCRE2_UTF | PCRE2_UCP.',
A => 'Match only at the first position. It is equivalent to PCRE2_ANCHORED option.',
D => 'A dollar meta-character in the pattern matches only at the end of the subject string. Without this modifier, a dollar also matches immediately before the final character if it is a newline (but not before any other newlines). This modifier is ignored if m modifier is set. Equivalent to PCRE2_DOLLAR_ENDONLY option.',
J => 'Allow duplicate names for sub-patterns. Equivalent to PCRE2_DUPNAMES option.',
S => 'When a pattern is going to be used several times, it is worth spending more time analyzing it in order to speed up the time taken for matching/replacing. It may also be beneficial for a very long subject string or pattern. Equivalent to an extra compilation with JIT_COMPILER with the option PCRE2_JIT_COMPLETE.',
U => 'This modifier inverts the "greediness" of the quantifiers so that they are not greedy by default, but become greedy if followed by ?. Equivalent to PCRE2_UNGREEDY option.',
};
=end DISABLED_UNSUPPORTED_OR_NONCOMPLIANT
=cut
ourstring_hashref $modifiers_match= {
g=> 'Global. Will perform global matching or replacement if passed. Equivalent to jpcre2::FIND_ALL for match and PCRE2_SUBSTITUTE_GLOBAL for replace.',
A => 'Match at start. Equivalent to PCRE2_ANCHORED. Can be used in match operation. Setting this option only at match time (i.e regex was not compiled with this option) will disable optimization during match time.',
};
=end DISABLED_UNSUPPORTED_OR_NONCOMPLIANT
=cut
ourstring_hashref $modifiers_substitute= {
g=> 'Global. Will perform global matching or replacement if passed. Equivalent to jpcre2::FIND_ALL for match and PCRE2_SUBSTITUTE_GLOBAL for replace.',
e => 'Replaces unset group with empty string. Equivalent to PCRE2_SUBSTITUTE_UNSET_EMPTY.',
E => 'Extension of e modifier. Sets even unknown groups to empty string. Equivalent to PCRE2_SUBSTITUTE_UNSET_EMPTY | PCRE2_SUBSTITUTE_UNKNOWN_UNSET',
x => 'Extended replacement operation. Equivalent to PCRE2_SUBSTITUTE_EXTENDED. It enables some Bash like features:
${<n>:-<string>}
${<n>:+<string1>:<string2>}
<n> may be a group number or a name. The first form specifies a default value. If group <n> is set, its value is inserted; if not, <string> is expanded and the result is inserted. The second form specifies strings that are expanded and inserted when group <n> is set or unset, respectively. The first form is just a convenient shorthand for ${<n>:+${<n>}:<string>}.',
# must backslash-escape backslashes within bare pattern, character class \w must appear as \\w etc, convert all single backslashes into double backslashes
$pattern_bare=~ s/\\/\\\\/gxms;
RPerl::diag( q{in Operator::RegularExpression->ast_to_cpp__generate__CPPOPS_CPPTYPES(), have forward-slash-cleaned and backslash-escaped $pattern_bare = '}. $pattern_bare. q{'}. "\n");
mystring $modifiers_compile_enabled= q{};
mystring $modifiers_match_enabled= q{};
mystring $modifiers_substitute_enabled= q{};
mystring $modifiers_compile_extra= q{};
# include S compile modifier for optimization on long (presumably more complex) patterns
if((length$pattern_bare) > 20) {
RPerl::diag( q{in Operator::RegularExpression->ast_to_cpp__generate__CPPOPS_CPPTYPES(), setting S modifier to attempt PCRE2 optimization of pattern over length of 20 characters}. "\n");
$modifiers_compile_extra= 'S';
}
# match
if($match_or_substituteeq 'm') {
# EXAMPLE C++ CODE
# // check if string matches the pattern, return true or false
# regex("(\\d)|(\\w)").match("I am the subject")
# // match all and get the match count using the action modifier 'g', return count
# regex("(\\d)|(\\w)","m").match("I am the subject","g")
RPerl::diag( q{in Operator::RegularExpression->ast_to_cpp__generate__CPPOPS_CPPTYPES(), have all valid match modifiers = '}. (join', ', (sort keys %{{%{$modifiers_compile}, %{$modifiers_match}}})) . q{'} . "\n");
RPerl::diag( q{in Operator::RegularExpression->ast_to_cpp__generate__CPPOPS_CPPTYPES(), have valid compile modifier '}. $modifier. q{' with description as follows:}. "\n\t". $modifiers_compile->{$modifier} . "\n");
$modifiers_compile_enabled.= $modifier;
}
elsif(exists$modifiers_match->{$modifier}) {
RPerl::diag( q{in Operator::RegularExpression->ast_to_cpp__generate__CPPOPS_CPPTYPES(), have valid match modifier '}. $modifier. q{' with description as follows:}. "\n\t". $modifiers_match->{$modifier} . "\n");
$modifiers_match_enabled.= $modifier;
}
else{
dieq{ERROR ECOGEASCP083: Non-compliant, unsupported, or unrecognized regular expression modifier '}. $modifier. q{' found, must be one of (}. (join', ', (sortkeys%{{%{$modifiers_compile}, %{$modifiers_match}}})) . q{), dying};
}
}
RPerl::diag( q{in Operator::RegularExpression->ast_to_cpp__generate__CPPOPS_CPPTYPES(), have $modifiers_compile_enabled = '}. $modifiers_compile_enabled. "\n");
RPerl::diag( q{in Operator::RegularExpression->ast_to_cpp__generate__CPPOPS_CPPTYPES(), have $modifiers_match_enabled = '}. $modifiers_match_enabled. "\n");
# compose final C++ code for modifiers
my$modifiers_compile_CPP= q{};
if(($modifiers_compile_enabled. $modifiers_compile_extra) ne q{}) {
RPerl::diag( q{in Operator::RegularExpression->ast_to_cpp__generate__CPPOPS_CPPTYPES(), have valid compile modifier '}. $modifier. q{' with description as follows:}. "\n\t". $modifiers_compile->{$modifier} . "\n");
$modifiers_compile_enabled.= $modifier;
}
elsif(exists$modifiers_substitute->{$modifier}) {
RPerl::diag( q{in Operator::RegularExpression->ast_to_cpp__generate__CPPOPS_CPPTYPES(), have valid substitute modifier '}. $modifier. q{' with description as follows:}. "\n\t". $modifiers_substitute->{$modifier} . "\n");
$modifiers_substitute_enabled.= $modifier;
}
else{
dieq{ERROR ECOGEASCP084: Non-compliant, unsupported, or unrecognized regular expression modifier '}. $modifier. q{' found, must be one of (}. (join', ', (sortkeys%{{%{$modifiers_compile}, %{$modifiers_substitute}}})) . q{), dying};
}
}
# compose final C++ code for modifiers
my$modifiers_compile_CPP= q{};
if(($modifiers_compile_enabled. $modifiers_compile_extra) ne q{}) {
# NEED ADD ERROR CHECK OR GRAMMAR CHANGE: regex substitution's LHS subexpression can only be a variable, because we must return assign value back to variable to emulate PERLOPS_PERLTYPES behavior
# NEED ADD SUPPORT: non-destructive regex substitution using Perl's /r modifier, and NOT setting the original variable to the return value in C++
# NEED ADD LOGIC: bind not !~ instead of only bind =~, disable die on !~ above !!!
# DEV NOTE: $cpp_source_group->{CPP} already contains the generated subexpression to be used as the subject of the regex
# EXAMPLE C++ CODE: regex("FIND", "MODS_COMP").preplace(&foo, "REPLACE_WITH", "MODS_SUBST")
dieq{ERROR ECOGEASCP080: Unrecognized regular expression type '}. $match_or_substitute. q{' found, must be 'm' for match or 's' for substitute, dying};
}
}
else{
dieRPerl::Parser::rperl_rule__replace( 'ERROR ECOGEASCP000, CODE GENERATOR, ABSTRACT SYNTAX TO RPERL: Grammar rule '. $self_class. ' found where Operator_110 expected, dying') . "\n";
}
RPerl::diag( 'in Operator::RegularExpression->ast_to_cpp__generate__CPPOPS_CPPTYPES(), about to return $cpp_source_group = '. "\n". RPerl::Parser::rperl_ast__dump($cpp_source_group) . "\n");
return$cpp_source_group;
}
1; # end of class
Keyboard Shortcuts
Global
s
Focus search bar
?
Bring up this help dialog
GitHub
gp
Go to pull requests
gi
go to github issues (only if github is preferred repository)