—package
Regexp::Parsertron;
use
strict;
use
warnings;
#use warnings qw(FATAL utf8); # Fatalize encoding glitches.
use
Marpa::R2;
use
Moo;
use
Tree;
use
Try::Tiny;
has
ambiguous
=>
(
default
=>
sub
{
return
0},
is
=>
'rw'
,
isa
=> Int,
required
=> 0,
);
has
bnf
=>
(
default
=>
sub
{
return
''
},
is
=>
'rw'
,
isa
=> Any,
required
=> 0,
);
has
current_node
=>
(
default
=>
sub
{
return
''
},
is
=>
'rw'
,
isa
=> Any,
required
=> 0,
);
has
grammar
=>
(
default
=>
sub
{
return
''
},
is
=>
'rw'
,
isa
=> Any,
required
=> 0,
);
has
re
=>
(
default
=>
sub
{
return
''
},
is
=>
'rw'
,
isa
=> Any,
required
=> 0,
);
has
recce
=>
(
default
=>
sub
{
return
''
},
is
=>
'rw'
,
isa
=> Any,
required
=> 0,
);
has
test_count
=>
(
default
=>
sub
{
return
0},
is
=>
'rw'
,
isa
=> Int,
required
=> 0,
);
has
tree
=>
(
default
=>
sub
{
return
Tree -> new(
'Root'
)},
is
=>
'rw'
,
isa
=> Any,
required
=> 0,
);
has
uid
=>
(
default
=>
sub
{
return
0},
is
=>
'rw'
,
isa
=> Int,
required
=> 0,
);
has
verbose
=>
(
default
=>
sub
{
return
0},
is
=>
'rw'
,
isa
=> Int,
required
=> 0,
);
has
warning_str
=>
(
default
=>
sub
{
return
''
},
is
=>
'rw'
,
isa
=> Str,
required
=> 0,
);
our
$VERSION
=
'1.05'
;
# ------------------------------------------------
sub
BUILD
{
my
(
$self
) =
@_
;
my
(
$bnf
) = get_data_section(
'V 5.20'
);
$self
-> bnf(
$bnf
);
$self
-> grammar
(
Marpa::R2::Scanless::G -> new
({
source
=> \
$self
-> bnf
})
);
$self
->
reset
;
}
# End of BUILD.
# ------------------------------------------------
sub
append
{
my
(
$self
,
%opts
) =
@_
;
for
my
$param
(
qw/text uid/
)
{
# The \n stops Perl printing the line number.
die
"Method append() takes a hash with these keys: text, uid\n"
if
(!
defined
(
$opts
{
$param
}) );
}
my
(
$meta
);
my
(
$uid
);
for
my
$node
(
$self
-> tree -> traverse)
{
next
if
(
$node
-> is_root);
$meta
=
$node
-> meta;
$uid
=
$$meta
{uid};
if
(
$opts
{uid} ==
$uid
)
{
$$meta
{text} .=
$opts
{text};
}
}
}
# End of append.
# ------------------------------------------------
sub
_add_daughter
{
my
(
$self
,
$event_name
,
$attributes
) =
@_
;
$$attributes
{uid} =
$self
-> uid(
$self
-> uid + 1);
my
(
$node
) = Tree -> new(
$event_name
);
$node
-> meta(
$attributes
);
if
(
$event_name
=~ /^close_(?:bracket|parenthesis)$/)
{
$self
-> current_node(
$self
-> current_node -> parent);
}
$self
-> current_node -> add_child(
$node
);
if
( (
$event_name
=~ /^open_(?:bracket|parenthesis)$/) || (
$event_name
=~ /_prefix$/) )
{
$self
-> current_node(
$node
);
}
}
# End of _add_daughter.
# ------------------------------------------------
sub
as_string
{
my
(
$self
) =
@_
;
my
(
$string
) =
''
;
my
(
$meta
);
for
my
$node
(
$self
-> tree -> traverse)
{
next
if
(
$node
-> is_root);
$meta
=
$node
-> meta;
$string
.=
$$meta
{text};
}
return
$string
;
}
# End of as_string.
# ------------------------------------------------
sub
find
{
my
(
$self
,
$target
) =
@_
;
# The \n stops Perl printing the line number.
die
"Method find() takes a defined value as the parameter\n"
if
(!
defined
$target
);
my
(
@found
);
my
(
$meta
);
for
my
$node
(
$self
-> tree -> traverse)
{
next
if
(
$node
-> is_root);
$meta
=
$node
-> meta;
if
(
index
(
$$meta
{text},
$target
) >= 0)
{
push
@found
,
$$meta
{uid};
}
}
return
[
@found
];
}
# End of find.
# ------------------------------------------------
sub
get
{
my
(
$self
,
$wanted_uid
) =
@_
;
my
(
$max_uid
) =
$self
-> uid;
if
(!
defined
(
$wanted_uid
) || (
$wanted_uid
< 1) || (
$wanted_uid
>
$self
-> uid) )
{
# The \n stops Perl printing the line number.
die
"Method get() takes a uid parameter in the range 1 .. $max_uid\n"
;
}
my
(
$meta
);
my
(
$text
);
my
(
$uid
);
for
my
$node
(
$self
-> tree -> traverse)
{
next
if
(
$node
-> is_root);
$meta
=
$node
-> meta;
$uid
=
$$meta
{uid};
if
(
$wanted_uid
==
$uid
)
{
$text
=
$$meta
{text};
}
}
return
$text
;
}
# End of get.
# ------------------------------------------------
sub
_next_few_chars
{
my
(
$self
,
$stringref
,
$offset
) =
@_
;
my
(
$s
) =
substr
(
$$stringref
,
$offset
, 20);
$s
=~
tr
/\n/ /;
$s
=~ s/^\s+//;
$s
=~ s/\s+$//;
return
$s
;
}
# End of _next_few_chars.
# ------------------------------------------------
sub
parse
{
my
(
$self
,
%opts
) =
@_
;
# Emulate parts of new(), which makes things a bit earier for the caller.
$self
-> re(
$opts
{re})
if
(
defined
$opts
{re});
$self
-> verbose(
$opts
{verbose})
if
(
defined
$opts
{verbose});
$self
-> warning_str(
''
);
$self
-> recce
(
Marpa::R2::Scanless::R -> new
({
exhaustion
=>
'event'
,
grammar
=>
$self
-> grammar,
})
);
# Return 0 for success and 1 for failure.
my
(
$result
) = 0;
my
(
$message
);
try
{
if
(
defined
(
my
$value
=
$self
-> _process) )
{
$self
-> print_cooked_tree
if
(
$self
-> verbose > 1);
}
else
{
$result
= 1;
if
(
$self
-> ambiguous)
{
die
"\n"
;
}
else
{
# The \n stops Perl printing the line number.
die
"Error: Marpa parse failed.\n"
;
}
}
}
catch
{
# The \n stops Perl printing the line number.
die
"$_\n"
;
};
# Return 0 for success and 1 for failure.
return
$result
;
}
# End of parse.
# ------------------------------------------------
sub
prepend
{
my
(
$self
,
%opts
) =
@_
;
for
my
$param
(
qw/text uid/
)
{
# The \n stops Perl printing the line number.
die
"Method append() takes a hash with these keys: text, uid\n"
if
(!
defined
(
$opts
{
$param
}) );
}
my
(
$meta
);
my
(
$uid
);
for
my
$node
(
$self
-> tree -> traverse)
{
next
if
(
$node
-> is_root);
$meta
=
$node
-> meta;
$uid
=
$$meta
{uid};
if
(
$opts
{uid} ==
$uid
)
{
$$meta
{text} =
"$opts{text}$$meta{text}"
;
}
}
}
# End of prepend.
# ------------------------------------------------
sub
_process
{
my
(
$self
) =
@_
;
my
(
$raw_re
) =
$self
-> re;
my
(
$test_count
) =
$self
-> test_count(
$self
-> test_count + 1);
# This line is 'print', not 'say'!
"Test count: $test_count. Parsing (in qr/.../ form): "
if
(
$self
-> verbose);
my
(
$string_re
) =
$self
-> _string2re(
$raw_re
);
if
(
$string_re
eq
''
)
{
"\n"
if
(
$self
-> verbose);
return
undef
;
}
"'$string_re'. \n"
if
(
$self
-> verbose);
if
(
$self
-> verbose > 1)
{
my
(
$format
) =
"%-10s %-5s %-20s %-6s %-30s %s \n"
;
sprintf
(
$format
,
' Location'
,
'Width'
,
'Lexeme'
,
'Events'
,
'Names'
,
'Next few chars'
);
}
my
(
$ref_re
) = \
"$string_re"
;
# Use " in comment for UltraEdit.
my
(
$length
) =
length
(
$string_re
);
my
(
$child
);
my
(
$event_name
);
my
(
$lexeme
);
my
(
$pos
);
my
(
$span
,
$start
);
# We use read()/lexeme_read()/resume() because we pause at each lexeme.
for
(
$pos
=
$self
-> recce ->
read
(
$ref_re
);
(
$pos
<
$length
);
$pos
=
$self
-> recce -> resume(
$pos
)
)
{
(
$start
,
$span
) =
$self
-> recce -> pause_span;
(
$event_name
,
$span
,
$pos
) =
$self
-> _validate_event(
$ref_re
,
$start
,
$span
,
$pos
,);
# If the input is exhausted, we exit immediately so we don't try to use
# the values of $start, $span or $pos. They are ignored upon exit.
last
if
(
$event_name
eq
"'exhausted"
);
# Yes, it has a leading quote.
$lexeme
=
$self
-> recce -> literal(
$start
,
$span
);
$pos
=
$self
-> recce -> lexeme_read(
$event_name
);
# The \n stops Perl printing the line number.
die
"Marpa lexeme_read($event_name) rejected lexeme '$lexeme'\n"
if
(!
defined
$pos
);
$self
-> _add_daughter(
$event_name
, {
text
=>
$lexeme
});
}
my
(
$message
);
if
(
my
$status
=
$self
-> recce -> ambiguous)
{
$self
-> ambiguous(1);
my
(
$terminals
) =
$self
-> recce -> terminals_expected;
$terminals
= [
'(None)'
]
if
(
$#$terminals
< 0); # Next line deliberately omits
'.'
after
$status
, so output lines up.
$message
=
"Marpa error. Parse ambiguous. Status: ${status}Terminals expected: "
.
join
(
', '
,
@$terminals
);
"$message\n"
;
$message
=
''
;
# To stop it being stored just below, and to stop it being printed again.
}
elsif
(
$self
-> recce -> exhausted)
{
# Special case. Sigh. I need to patch the BNF to do this. TODO.
if
( (
$pos
+ 1 ==
$length
) && (
substr
(
$string_re
,
$pos
, 1) eq
')'
) )
{
$self
-> _add_daughter(
'close_parenthesis'
, {
text
=>
')'
});
}
# for why this code is exhaustion-loving. This is not an error. See docs for details.
$message
=
'Marpa parse exhausted'
if
(
$self
-> verbose > 1);
}
if
(
$message
)
{
$self
-> warning_str(
$message
);
"$message\n"
if
(
$self
-> verbose);
}
$self
-> print_raw_tree
if
(
$self
-> verbose);
# Return a defined value for success and undef for failure.
# Note: value() can return undef.
return
$self
-> ambiguous ?
undef
:
$self
-> recce -> value;
}
# End of _process.
# ------------------------------------------------
sub
print_cooked_tree
{
my
(
$self
) =
@_
;
my
(
$format
) =
"%-30s %3s %s \n"
;
sprintf
(
$format
,
'Name'
,
'Uid'
,
'Text'
);
sprintf
(
$format
,
'----'
,
'---'
,
'----'
);
my
(
$meta
);
for
my
$node
(
$self
-> tree -> traverse)
{
next
if
(
$node
-> is_root);
$meta
=
$node
-> meta;
sprintf
(
$format
,
$node
-> value,
$$meta
{uid},
$$meta
{text});
}
}
# End of print_cooked_tree.
# ------------------------------------------------
sub
print_raw_tree
{
my
(
$self
) =
@_
;
map
(
"$_\n"
, @{
$self
-> tree -> tree2string});
}
# End of print_raw_tree.
# ------------------------------------------------
sub
reset
{
my
(
$self
) =
@_
;
$self
-> ambiguous(0);
$self
-> tree(Tree -> new(
'Root'
) );
$self
-> tree -> meta({
text
=>
'Root'
,
uid
=> 0});
$self
-> current_node(
$self
-> tree);
$self
-> uid(0);
$self
-> warning_str(
''
);
}
# End of reset.
# ------------------------------------------------
sub
search
{
my
(
$self
,
$target
) =
@_
;
# The \n stops Perl printing the line number.
die
"Method search() takes a defined value as the parameter\n"
if
(!
defined
$target
);
my
(
$re
) =
$self
-> _string2re(
$target
);
my
(
@found
);
my
(
$meta
);
for
my
$node
(
$self
-> tree -> traverse)
{
next
if
(
$node
-> is_root);
$meta
=
$node
-> meta;
if
(
$$meta
{text} =~
$re
)
{
push
@found
,
$$meta
{uid};
}
}
return
[
@found
];
}
# End of search.
# ------------------------------------------------
sub
set
{
my
(
$self
,
%opts
) =
@_
;
for
my
$param
(
qw/text uid/
)
{
# The \n stops Perl printing the line number.
die
"Method set() takes a hash with these keys: text, uid\n"
if
(!
defined
(
$opts
{
$param
}) );
}
my
(
$meta
);
my
(
$uid
);
for
my
$node
(
$self
-> tree -> traverse)
{
next
if
(
$node
-> is_root);
$meta
=
$node
-> meta;
$uid
=
$$meta
{uid};
if
(
$opts
{uid} ==
$uid
)
{
$$meta
{text} =
$opts
{text};
}
}
}
# End of set.
# ------------------------------------------------
sub
_string2re
{
my
(
$self
,
$raw_re
) =
@_
;
my
(
$re
);
try
{
$re
= does(
$raw_re
,
'Regexp'
) ?
$raw_re
:
qr/$raw_re/
;
}
catch
{
# The \n stops Perl printing the line number with 'die'.
die
"Error: Perl cannot convert $raw_re into qr/.../ form\n"
;
};
return
$re
;
}
# End of _string2re.
# ------------------------------------------------
sub
validate
{
my
(
$self
) =
@_
;
my
(
$re
) =
$self
-> as_string;
my
(
$result
);
try
{
$result
= (
'x'
=~
$re
) ? 0 : 0;
# Use any test to force Perl to process the Regexp.
}
catch
{
$result
= 1;
# Failure.
};
# Return 0 for success and 1 for failure.
return
$result
;
}
# End of validate.
# ------------------------------------------------
sub
_validate_event
{
my
(
$self
,
$stringref
,
$start
,
$span
,
$pos
) =
@_
;
my
(
@event
) = @{
$self
-> recce -> events};
my
(
$event_count
) =
scalar
@event
;
my
(
@event_names
) =
sort
map
{
$$_
[0]}
@event
;
my
(
$event_name
) =
$event_names
[0];
# Default.
# Handle some special cases.
if
(
$event_count
> 1)
{
my
(
$event_list
) =
join
(
', '
,
@event_names
);
if
(
$event_list
eq
'caret, string'
)
{
$event_count
= 1;
$event_name
=
'caret'
;
@event_names
=
$event_name
;
$pos
=
$start
;
$span
= 1;
}
elsif
(
$event_list
eq
'query, string'
)
{
$event_count
= 1;
$event_name
=
'query'
;
@event_names
=
$event_name
;
$pos
=
$start
;
$span
= 1;
}
elsif
(
$event_list
eq
'string, vertical_bar'
)
{
$event_count
= 1;
$event_name
=
'vertical_bar'
;
@event_names
=
$event_name
;
$pos
=
$start
;
$span
= 1;
}
else
{
#$self -> print_cooked_tree;
# The \n stops Perl printing the line number.
die
"event_count: $event_count. "
.
$event_list
.
"\n"
;
}
}
# If the input is exhausted, we return immediately so we don't try to use
# the values of $start, $span or $pos. They are ignored upon return.
if
(
$event_name
eq
"'exhausted"
)
# Yes, it has a leading quote.
{
return
(
$event_name
,
$span
,
$pos
);
}
my
(
$lexeme
) =
substr
(
$$stringref
,
$start
,
$span
);
my
(
$line
,
$column
) =
$self
-> recce -> line_column(
$start
);
my
(
$literal
) =
$self
-> _next_few_chars(
$stringref
,
$start
+
$span
);
my
(
$message
) =
"Location: ($line, $column). Lexeme: $lexeme. Events: $event_count. Names: "
;
my
(
$name_list
) =
join
(
', '
,
@event_names
);
$message
.=
". Next few chars: $literal"
;
if
(
$self
-> verbose > 1)
{
my
(
$format
) =
"%4d, %4d %5d %-20s %6d %-30s %s \n"
;
sprintf
(
$format
,
$line
,
$column
,
length
(
$lexeme
),
$lexeme
,
$event_count
,
$name_list
,
$literal
);
}
return
(
$event_name
,
$span
,
$pos
);
}
# End of _validate_event.
# ------------------------------------------------
1;
=pod
=head1 NAME
C<Regexp::Parsertron> - Parse a Perl regexp into a data structure of type L<Tree>
Warning: Development version. See L</Version Numbers> for details.
=head1 Synopsis
=head2 Sample Code
This is scripts/synopsis.pl:
#!/usr/bin/env perl
use v5.10;
use strict;
use warnings;
use Regexp::Parsertron;
# ---------------------
my($re) = qr/Perl|JavaScript/i;
my($parser) = Regexp::Parsertron -> new(verbose => 1);
# Return 0 for success and 1 for failure.
my($result) = $parser -> parse(re => $re);
my($node_id) = 5; # Obtained from displaying and inspecting the tree.
print "Calling append(text => '|C++', uid => $node_id) \n";
$parser -> append(text => '|C++', uid => $node_id);
$parser -> print_raw_tree;
$parser -> print_cooked_tree;
my($as_string) = $parser -> as_string;
print "Original: $re. Result: $result (0 is success) \n";
print "as_string(): $as_string \n";
$result = $parser -> validate;
print "validate(): Result: $result (0 is success) \n";
# Return 0 for success and 1 for failure.
$parser -> reset;
$parser -> verbose(0);
$re = qr/Perl|JavaScript|(?:Flub|BCPL)/i;
$result = $parser -> parse(re => $re);
print "\nAdd complexity to the regexp by parsing a new regexp \n";
$parser -> print_raw_tree;
And its output:
Test count: 1. Parsing (in qr/.../ form): '(?^i:Perl|JavaScript)'.
Root. Attributes: {text => "Root", uid => "0"}
|--- open_parenthesis. Attributes: {text => "(", uid => "1"}
| |--- query_caret. Attributes: {text => "?^", uid => "2"}
| |--- flag_set. Attributes: {text => "i", uid => "3"}
| |--- colon. Attributes: {text => ":", uid => "4"}
| |--- string. Attributes: {text => "Perl|JavaScript", uid => "5"}
|--- close_parenthesis. Attributes: {text => ")", uid => "6"}
Calling append(text => '|C++', uid => 5)
Root. Attributes: {text => "Root", uid => "0"}
|--- open_parenthesis. Attributes: {text => "(", uid => "1"}
| |--- query_caret. Attributes: {text => "?^", uid => "2"}
| |--- flag_set. Attributes: {text => "i", uid => "3"}
| |--- colon. Attributes: {text => ":", uid => "4"}
| |--- string. Attributes: {text => "Perl|JavaScript|C++", uid => "5"}
|--- close_parenthesis. Attributes: {text => ")", uid => "6"}
Name Uid Text
---- --- ----
open_parenthesis 1 (
query_caret 2 ?^
flag_set 3 i
colon 4 :
string 5 Perl|JavaScript|C++
close_parenthesis 6 )
Original: (?^i:Perl|JavaScript). Result: 0 (0 is success)
as_string(): (?^i:Perl|JavaScript|C++)
validate(): Result: 0 (0 is success)
Adding complexity to the regexp by parsing a new regexp:
Root. Attributes: {text => "Root", uid => "0"}
|--- open_parenthesis. Attributes: {text => "(", uid => "1"}
| |--- query_caret. Attributes: {text => "?^", uid => "2"}
| |--- flag_set. Attributes: {text => "i", uid => "3"}
| |--- colon. Attributes: {text => ":", uid => "4"}
| |--- string. Attributes: {text => "Perl|JavaScript|", uid => "5"}
| |--- colon_prefix. Attributes: {text => "(?:", uid => "6"}
| | |--- string. Attributes: {text => "Flub|BCPL", uid => "7"}
| |--- close_parenthesis. Attributes: {text => ")", uid => "8"}
|--- close_parenthesis. Attributes: {text => ")", uid => "9"}
Note: The 1st tree is printed due to verbose => 1 in the call to L</new([%opts])>, while the 2nd
is due to the call to L</print_raw_tree()>. The columnar output is due to the call to
L</print_cooked_tree()>.
=head2 Tutorial
=over 4
=item o Start with a simple program and a simple regexp
This code, scripts/tutorial.pl, is a cut-down version of scripts/synopsis.pl:
#!/usr/bin/env perl
use v5.10;
use strict;
use warnings;
use Regexp::Parsertron;
# ---------------------
my($re) = qr/Perl|JavaScript/i;
my($parser) = Regexp::Parsertron -> new(verbose => 1);
# Return 0 for success and 1 for failure.
my($result) = $parser -> parse(re => $re);
print "Original: $re. Result: $result. (0 is success) \n";
Running it outputs:
Test count: 1. Parsing (in qr/.../ form): '(?^i:Perl|JavaScript)'.
Root. Attributes: {text => "Root", uid => "0"}
|--- open_parenthesis. Attributes: {text => "(", uid => "1"}
| |--- query_caret. Attributes: {text => "?^", uid => "2"}
| |--- flag_set. Attributes: {text => "i", uid => "3"}
| |--- colon. Attributes: {text => ":", uid => "4"}
| |--- string. Attributes: {text => "Perl|JavaScript", uid => "5"}
|--- close_parenthesis. Attributes: {text => ")", uid => "6"}
Original: (?^i:Perl|JavaScript). Result: 0. (0 is success)
=item o Examine the tree and determine which nodes you wish to edit
The nodes are uniquely identified by their uids.
=item o Proceed as does scripts/synopsis.pl
Add these lines to the end of the tutorial code, and re-run:
my($node_id) = 5; # Obtained from displaying and inspecting the tree.
$parser -> append(text => '|C++', uid => $node_id);
$parser -> print_raw_tree;
The extra output, showing the change to node uid == 5, is:
Root. Attributes: {text => "Root", uid => "0"}
|--- open_parenthesis. Attributes: {text => "(", uid => "1"}
| |--- query_caret. Attributes: {text => "?^", uid => "2"}
| |--- flag_set. Attributes: {text => "i", uid => "3"}
| |--- colon. Attributes: {text => ":", uid => "4"}
| |--- string. Attributes: {text => "Perl|JavaScript|C++", uid => "5"}
|--- close_parenthesis. Attributes: {text => ")", uid => "6"}
=item o Test also with L</prepend(%opts)> and L</set(%opts)>
See t/get.set.t for sample code.
=item o Since everything works, make a cup of tea
=back
=head2 The Edit Methods
The I<edit methods> simply means any one or more of these methods, which can all change the text of
a node:
=over 4
=item o L</append(%opts)>
=item o L</prepend(%opts)>
=item o L</set(%opts)>
=back
The edit methods are exercised in t/get.set.t, as well as scripts/synopsis.pl (above).
=head1 Description
Parses a regexp into a tree object managed by the L<Tree> module, and provides various methods for
updating and retrieving that tree's contents.
This module uses L<Marpa::R2> and L<Moo>.
=head1 Distributions
This module is available as a Unix-style distro (*.tgz).
for help on unpacking and installing distros.
=head1 Installation
Install C<Regexp::Parsertron> as you would any C<Perl> module:
Run:
cpanm Regexp::Parsertron
or run:
sudo cpan Regexp::Parsertron
or unpack the distro, and then use:
perl Makefile.PL
make (or dmake or nmake)
make test
make install
=head1 Constructor and Initialization
C<new()> is called as C<< my($parser) = Regexp::Parsertron -> new(k1 => v1, k2 => v2, ...) >>.
It returns a new object of type C<Regexp::Parsertron>.
Key-value pairs accepted in the parameter list (see corresponding methods for details
[e.g. L</re([$regexp])>]):
=over 4
=item o re => $regexp
The C<does()> method of L<Scalar::Does> is called to see what C<re> is. If it's already of the
form C<qr/$re/>, then it's processed as is, but if it's not, then it's transformed using C<qr/$re/>.
Warning: Currently, the input is expected to have been pre-processed by Perl via qr/$regexp/.
Default: ''.
=item o verbose => $integer
Takes values 0, 1 or 2, which print more and more progress reports.
Used for debugging.
Default: 0 (print nothing).
=back
=head1 Methods
=head2 append(%opts)
Append some text to the text of a node.
%opts is a hash with these (key => value) pairs:
=over 4
=item o text => $string
The text to append.
=item o uid => $uid
The uid of the node to update.
=back
The code calls C<die()> if %opts does not have these 2 keys, or if either value is undef.
See scripts/synopsis.pl for sample code.
Note: Calling C<append()> never changes the uids of nodes, so repeated calling of C<append()> with
the same C<uid> will apply more and more updates to the same node.
See also L</prepend(%opts)>, L</set(%opts)> and t/get.set.t.
=head2 as_string()
Returns the parsed regexp as a string. The string contains all edits applied with
L</The Edit Methods>.
=head2 find($target)
Returns an arrayref of node uids whose text contains the given string.
If the arrayref is empty, there were no matches.
The Perl function C<index()> is used here to test for $target being a substring of the text
associated with each node.
The code calls C<die()> if $target is undef.
See t/get.set.t for sample usage of C<find()>.
See L</search($target)> for a regexp-based test. See also L</get($uid)>.
=head2 get($uid)
Get the text of the node with the given $uid.
The code calls C<die()> if $uid is undef, or outside the range 1 .. $self -> uid. The latter value
is the highest uid so far assigned to any node.
Returns undef if the given $uid is not found.
See also L</find($target)>.
=head2 new([%opts])
Here, '[]' indicate an optional parameter.
See L</Constructor and Initialization> for details on the parameters accepted by L</new()>.
=head2 parse([%opts])
Here, '[]' indicate an optional parameter.
Parses the regexp supplied with the parameter C<re> in the call to L</new()> or in the call to
L</re($regexp)>, or in the call to C<< parse(re => $regexp) >> itself. The latter takes precedence.
The hash C<%opts> takes the same (key => value) pairs as L</new()> does.
See L</Constructor and Initialization> for details.
=head2 prepend(%opts)
Prepend some text to the text of a node.
%opts is a hash with these (key => value) pairs:
=over 4
=item o text => $string
The text to prepend.
=item o uid => $uid
The uid of the node to update.
=back
The code calls C<die()> if %opts does not have these 2 keys, or if either value is undef.
Note: Calling C<prepend()> never changes the uids of nodes, so repeated calling of C<prepend()> with
the same C<uid> will apply more and more updates to the same node.
See also L</append(%opts)>, L</set(%opts)>, and t/get.set.t.
=head2 print_cooked_tree()
Prints, in a pretty format, the tree built from parsing.
See the </Synopsis> for sample output.
See also L</print_raw_tree>.
=head2 print_raw_tree()
Prints, in a simple format, the tree built from parsing.
See the </Synopsis> for sample output.
See also L</print_cooked_tree>.
=head2 re([$regexp])
Here, '[]' indicate an optional parameter.
Gets or sets the regexp to be processed.
Note: C<re> is a parameter to L</new([%opts])>.
=head2 reset()
Resets various internal things, except test_count.
Used basically for debugging.
=head2 search($target)
Returns an arrayref of node uids whose text contains the given string.
If the arrayref is empty, there were no matches.
$target is converted to a regexp if a simple string is passed in.
The code calls C<die()> if $target is undef.
See t/search.t for sample usage of C<search()>.
See L</find($target)> for a non-regexp search. See also L</get($uid)>.
=head2 set(%opts)
Set the text of a node to $opt{text}.
%opts is a hash with these (key => value) pairs:
=over 4
=item o text => $string
The text to use to overwrite the text of the node.
=item o uid => $uid
The uid of the node to update.
=back
The code calls C<die()> if %opts does not have these 2 keys, or if either value is undef.
See also L</append(%opts)> and L</prepend(%opts)>.
=head2 tree()
Returns an object of type L<Tree>. Ignore the root node.
Each node's C<meta()> method returns a hashref of information about the node. See the
L</What is the format of the nodes in the tree built by this module?> for details.
See also the source code for L</print_cooked_tree()> and L</print_raw_tree()> for ideas on how to
use this object.
=head2 uid()
Returns the last-used uid.
Each node in the tree is given a uid, which allows methods like L</append(%opts)> to work.
=head2 verbose([$integer])
Here, '[]' indicate an optional parameter.
Gets or sets the verbosity level, within the range 0 .. 2. Higher numbers print more progress
reports.
Used basically for debugging.
Note: C<verbose> is a parameter to L</new([%opts])>.
=head2 warning_str()
Returns the last Marpa warning.
In short, Marpa will always report 'Marpa parse exhausted' in warning_str() if the parse is not
ambiguous, but do not worry - I<this is not an error>.
See L<After calling parse(), warning_str() contains the string '... Parse ambiguous ...'|/FAQ> and
L<Is this a (Marpa) exhaustion-hating or exhaustion-loving app?|/FAQ>.
=head1 FAQ
=head2 Can I add a subtree to the tree?
Not yet.
There is a private method, C<_add_daughter()>, which I could make public, if I felt it was safe to
do so.
=head2 Why does the BNF not accept an empty regexp?
Simple answer: Changing the BNF to handle this creates a massive problem elsewhere in the BNF.
Complex answer:
The BNF contains this countable rule to allow patterns to be juxtaposed without '|', say, to
separate them:
global_sequence ::= pattern_type+
And in turn (further toward the leaves of the tree of BNF), I then use:
pattern_sequence ::= pattern_set+
To allow an empty regexp would mean changing this rule to:
pattern_sequence ::= pattern_set*
But that makes this rule nullable, and Marpa rejects the C<global_sequence> rule on the grounds that
a countable rule is not allowed to be nullable. ATM I cannot see a way of
rewriting the rules to avoid this problem. But I'm hopeful such a rewrite is possible.
=head2 Why does the code sometimes not store '|' - as in qr/(Perl|JavaScript/) - in its own node?
It could be done by, for example, splitting such a string into three nodes, 'Perl', '|',
'Javascript'. But does that offer any benefit?
It makes processing by the user more complex because then if they wish to edit the list of
alternatives, they might have to edit two or three nodes instead of one. Here, editing means perhaps
replacing any existing string with the empty string.
Further, to extend the list of alternatives, the user will be confused by not being sure if they
should change 'Javascript' to 'Javascript|C' or if they have to add two nodes, containing '|' and
'C'. And ATM adding nodes is contraindicated!
Despite this, when the input stream triggers two events, C<string> and C<vertical_bar>,
simultaneously because the '|' is at the start of a string, special code in the private method
C<_validate_event()> does put '|' in its own node. IOW the BNF does not do the work, which is really
what I would prefer.
=head2 Does this module ever use \Q...\E to quote regexp metacharacters?
No.
=head2 What is the format of the nodes in the tree built by this module?
Each node's C<name> is the name of the Marpa-style event which was triggered by detection of
some C<text> within the regexp.
Each node's C<meta()> method returns a hashref with these (key => value) pairs:
=over 4
=item o text => $string
This is the text within the regexp which triggered the event just mentioned.
=item o uid => $integer
This is the unique id of the 'current' node.
This C<uid> is often used by you to specify which node to work on.
See t/get.set.t and t/simple.t for sample code.
The code never changes the uid of a node.
=back
See also the source code for L</print_cooked_tree()> and L</print_raw_tree()> for ideas on how to
use the tree.
See the L</Synopsis> for sample code and a report after parsing a tiny regexp.
=head2 Does the root node in the tree ever hold useful information?
No. Always ignore it.
=head2 Why does the BNF never use the lexeme adverb C<priority>?
Because with Marpa::R2 the priority is only used when lexemes are the same length.
L<See FAQ #140|https://savage.net.au/Perl-modules/html/marpa.faq/faq.html#q140>.
=head2 Does this module interpret regexps in any way?
No. You have to run your own Perl code to do that. This module just parses them into a data
structure.
And that really means this module does not match the regexp against anything. If I appear to do that
while debugging new code, you can't rely on that appearing in production versions of the module.
=head2 Does this module rewrite regexps?
No, unless you call one of L</The Edit Methods>.
=head2 Does this module handle both Perl 5 and Perl 6?
No. It will only handle Perl 5 syntax.
=head2 Does this module handle regexps for various versions of Perl5?
Not yet. Version-dependent regexp syntax will be supported for recent versions of Perl. This is
done by having tokens within the BNF which are replaced at start-up time with version-dependent
details.
There are no such tokens at the moment.
All debugging is done assuming the regexp syntax as documented online. See L</References> for the
urls in question.
=head2 So which version of Perl is supported?
The code is expected to work for Perls back to V 5.14.0, which is when stringification of regexps
changed. See L</References> below for more.
I'm (2018-01-14) using Perl V 5.20.2 and making the BNF match the Perl regexp docs listed in
L</References> below.
The program t/perl-5.21.11.t reads the file 'xt/author/re_tests' which I copied from the source code
of Perl V 5.21.11. This test is the one which currently provides 858 passing tests out of the 1027
tests which pass for me using prove -lv t.
=head2 Could Perl and this module generate different parses of the same regexp?
Absolutely! There is no escape from this fact simply because the code used in each program bears no
relationship to the code in the other one.
The real question is: How do we make the code in each program accept and reject exactly the same
regexps as the code in the other program. I think trial-and-error is all we have available to us for
dealing with this issue.
=head2 After calling parse(), warning_str() contains the string '... Parse ambiguous ...'
This is almost certainly an error with the BNF, although of course it may be an error with an
exceptionally-badly formed regexp.
See examples/ambiguous.pl and
L<this email thread|https://www.nntp.perl.org/group/perl.perl5.porters/2018/07/msg251447.html>.
See examples/commit.pl and
L<this email thread|https://www.nntp.perl.org/group/perl.perl5.porters/2018/07/msg251463.html>.
In such cases the code dies, as of V 1.04.
Please report it via L<https://rt.cpan.org/Public/Dist/Display.html?Name=Regexp-Parsertron>, and
include the regexp in the report. Thanx!
=head2 Is this a (Marpa) exhaustion-hating or exhaustion-loving app?
Exhaustion-loving.
=head2 Will this code be modified to run under Marpa::R3 when the latter is stable?
Yes.
=head2 What is the purpose of this module?
=over 4
=item o To provide a stand-alone parser for regexps
=item o To help me learn more about regexps
=item o To become, I hope, a replacement for the horrendously complex L<Regexp::Assemble>
=back
=head2 Who crafted the BNF?
I did.
=head1 Scripts
This diagram indicates the flow of logic from script to script:
xt/author/re_tests
|
V
xt/author/generate.tests.pl
|
V
xt/authors/perl-5.21.11.tests
|
V
perl -Ilib t/perl-5.21.11.t > xt/author/perl-5.21.11.log 2>&1
If xt/author/perl-5.21.11.log only contains lines starting with 'ok', then all Perl and Marpa
errors have been hidden, so t/perl-5.21.11.t is ready to live in t/. Before that time it lives in
xt/author/.
=head1 TODO
=over 4
=item o How to best define 'code' in the BNF.
=item o I could traverse the tree and store a pointer to each node in an array
This would mean fast access to nodes in random order. But is there any point? Yes, it would speed up
various methods. Specifically, any module which calls C<traverse()> on the tree object would
benefit.
=item o Allow users to add nodes and hence subtrees to the tree
=back
=head1 References
L<https://www.rexegg.com/regex-lookarounds.html>. Mastering Lookahead and Lookbehind.
L<http://www.pcre.org/>. PCRE - Perl Compatible Regular Expressions.
L<http://perldoc.perl.org/perlre.html>. This is the definitive document.
L<http://perldoc.perl.org/perlretut.html>. Samples with commentary.
L<http://perldoc.perl.org/perl5140delta.html#Regular-Expressions>. This is when stringification
changed to return (?^...) rather than (?-xism...).
L<http://www.nntp.perl.org/group/perl.perl5.porters/2016/02/msg234642.html>. Regular Expression
Inconsistencies With Unicode.
=head1 See Also
L<Graph::Regexp>
L<Regexp::Assemble>
L<Regexp::Debugger>
L<Regexp::ERE>
L<Regexp::Keywords>
L<Regexp::Lexer>
L<Regexp::List>
L<Regexp::Optimizer>
L<Regexp::Parser>
L<Regexp::SAR>. This is vaguely a version of L<Set::FA::Element>.
L<Regexp::Stringify>
L<Regexp::Trie>
And many others...
=head1 Machine-Readable Change Log
The file Changes was converted into Changelog.ini by L<Module::Metadata::Changes>.
=head1 Version Numbers
Version numbers < 1.00 represent development versions. From 1.00 up, they are production versions.
=head1 CPAN Tester Results
=head1 Repository
=head1 Support
Email the author, or log a bug on RT:
=head1 Author
L<Regexp::Parsertron> was written by Ron Savage I<E<lt>ron@savage.net.auE<gt>> in 2011.
Marpa's homepage: L<http://savage.net.au/Marpa.html>.
L<My homepage|http://savage.net.au/>.
=head1 Copyright
Australian copyright (c) 2016, Ron Savage.
All Programs of mine are 'OSI Certified Open Source Software';
you can redistribute them and/or modify them under the terms of
The Artistic License 2.0, a copy of which is available at:
=cut
__DATA__
@@ V 5.20
:default ::= action => [values]
lexeme default = latm => 1
:start ::= regexp
# G1 stuff.
regexp ::= open_parenthesis global_prefix global_sequence close_parenthesis
global_prefix ::= query_caret flag_sequence colon
flag_sequence ::= positive_flags negative_flag_set
positive_flags ::=
positive_flags ::= flag_set
negative_flag_set ::=
negative_flag_set ::= minus negative_flags
negative_flags ::= flag_set
# Extended patterns from http://perldoc.perl.org/perlre.html:
global_sequence ::= pattern_type+
pattern_type ::= comment_thingy # 1. Extended patterns.
| flag_thingy # 2.
| colon_thingy # 3.
| vertical_bar_thingy # 4.
| equals_thingy # 5.
| exclamation_mark_thingy # 6.
| less_or_equals_thingy # 7.
| less_exclamation_mark_thingy # 8. See elsewhere for 9 and 10.
| single_brace_thingy # 11.
| double_brace_thingy # 12.
| recursive_subpattern_thingy # 13.
| recurse_thingy # 14.
| conditional_thingy # 15.
| greater_than_thingy # 16.
| extended_bracketed_thingy # 17.
| pattern_sequence # 99.
# 1: (?#text)
comment_thingy ::= comment_prefix comment close_parenthesis
comment ::= non_close_parenthesis*
# 2: (?adlupimnsx-imnsx)
# & (?^alupimnsx)
flag_thingy ::= open_parenthesis query flag_set_1
| open_parenthesis query_caret flag_set_2 close_parenthesis
flag_set_1 ::= flag_sequence # adlupimnsx-imnsx
flag_set_2 ::= flag_sequence # alupimnsx
# 3: (?:pattern) Eg: (?:(?<n>foo)|(?<n>bar))\k<n>
# & (?adluimnsx-imnsx:pattern)
# & (?^aluimnsx:pattern)
colon_thingy ::= colon_prefix pattern_sequence close_parenthesis
| open_parenthesis query flag_set_3 colon pattern_sequence close_parenthesis
| open_parenthesis query_caret positive_flags colon pattern_sequence close_parenthesis
flag_set_3 ::= flag_sequence # adluimnsx-imnsx
# 99. Non-extended patterns.
pattern_sequence ::= pattern_set+
pattern_set ::= pattern_item
| pattern_item '|' pattern_item
pattern_item ::= bracket_pattern
| named_capture_group_thingy # 9.
| parenthesis_pattern
| slash_pattern
| character_sequence
bracket_pattern ::= open_bracket characters_in_set close_bracket
# Perl accepts /()/.
# Perl does not accept /[]/.
characters_in_set ::= character_in_set+
character_in_set ::= escaped_close_bracket
| non_close_bracket
character_sequence ::= simple_character_sequence+
simple_character_sequence ::= escaped_close_parenthesis
| escaped_open_parenthesis
| escaped_slash
| caret
| vertical_bar
| string
parenthesis_pattern ::= open_parenthesis pattern_sequence close_parenthesis
slash_pattern ::= slash pattern_sequence slash
# 4: (?|pattern)
vertical_bar_thingy ::= vertical_bar_prefix pattern_sequence close_parenthesis
# 5: (?=pattern)
equals_thingy ::= equals_prefix pattern_sequence close_parenthesis
# 6: (?!pattern)
exclamation_mark_thingy ::= exclamation_mark_prefix pattern_sequence close_parenthesis
# 7: (?<=pattern
# & \K
less_or_equals_thingy ::= less_or_equals_prefix close_parenthesis
| escaped_K
# 8: (?<!pattern)
less_exclamation_mark_thingy ::= less_exclamation_mark_prefix close_parenthesis
# 9: (?<NAME>pattern)
# & (?'NAME'pattern)
named_capture_group_thingy ::= named_capture_group_prefix named_capture_group_set close_parenthesis named_backreference_thingy
named_capture_group_set ::= named_capture_group
| named_capture_group '|' named_capture_group_set
named_capture_group ::= capture_group_item pattern_sequence
capture_group_item ::= capture_name named_capture_group_suffix
# 10: \k<NAME>
# & \k'NAME'
# & \k{NAME}
# & \g{NAME}
named_backreference_thingy ::=
named_backreference_thingy ::= named_backreference_prefix capture_group_item
# 11: (?{ code })
single_brace_thingy ::= single_brace_prefix code close_brace close_parenthesis
code ::= [[:print:]] # TODO.
# 12: (??{ code })
double_brace_thingy ::= double_brace_prefix code close_brace close_parenthesis
# 13: (?PARNO) || (?-PARNO) || (?+PARNO) || (?R) || (?0)
recursive_subpattern_thingy ::= recursive_subpattern_prefix close_parenthesis
recursive_subpattern_prefix ::= recursive_subpattern_minus
| recursive_subpattern_natural
| recursive_subpattern_plus
| recursive_subpattern_R # Includes 0.
# 14: (?&NAME)
recurse_thingy ::= recurse_prefix capture_name close_parenthesis
| open_parenthesis query_P greater_than capture_name close_parenthesis
# 15: (?(condition)yes-pattern|no-pattern)
# & (?(condition)yes-pattern)
conditional_thingy ::= condition_prefix close_parenthesis
condition_prefix ::= condition_natural
| condition_capture_group
| equals_thingy
| exclamation_mark_thingy
| less_or_equals_thingy # Includes \K.
| less_exclamation_mark_thingy
| single_brace_thingy
| condition_R
| condition_predicate_check
| DEFINE
condition_natural ::= condition_natural_prefix close_parenthesis
condition_capture_group ::= condition_capture_group_prefix capture_group_item close_parenthesis
condition_predicate_check ::= condition_predicate_prefix capture_name close_parenthesis
condition_R ::= condition_R_prefix close_parenthesis
# 16: (?>pattern)
greater_than_thingy ::= greater_than_prefix close_parenthesis
# 17: (?[ ])
extended_bracketed_thingy ::= extended_bracketed_prefix character_classes close_bracket close_parenthesis
character_classes ::= [[:print:]]
# L0 stuff, in alphabetical order.
#
# Policy: Event names are always the same as the name of the corresponding lexeme.
#
# Note: Tokens of the form '_xxx_', if any, are replaced with version-dependent values.
#
###########################################
###########################################
###########################################
###########################################
# Warning. Double warning. Triple warning.#
###########################################
# Lexemes whose names match /_prefix$/ #
# are special in that they are used by #
# add_daughter() to add depth to the tree.#
###########################################
###########################################
###########################################
###########################################
:lexeme ~ capture_name pause => before event => capture_name
capture_name ~ capture_name_head capture_name_tail
capture_name_head ~ [_A-Za-z]
capture_name_tail ~ [_A-Za-z0-9]*
:lexeme ~ caret pause => before event => caret
caret ~ '^'
:lexeme ~ close_brace pause => before event => close_brace
close_brace ~ '}'
:lexeme ~ close_bracket pause => before event => close_bracket
close_bracket ~ ']'
:lexeme ~ close_parenthesis pause => before event => close_parenthesis
close_parenthesis ~ ')'
:lexeme ~ colon pause => before event => colon
colon ~ ':'
:lexeme ~ colon_prefix pause => before event => colon_prefix
colon_prefix ~ '(?:'
:lexeme ~ comment_prefix pause => before event => comment_prefix
comment_prefix ~ '(?#'
:lexeme ~ condition_capture_group_prefix pause => before event => condition_capture_group_prefix
condition_capture_group_prefix ~ '(<'
:lexeme ~ condition_natural_prefix pause => before event => condition_natural_prefix
condition_natural_prefix ~ '(1'
condition_natural_prefix ~ '(2'
condition_natural_prefix ~ '(3'
condition_natural_prefix ~ '(4'
condition_natural_prefix ~ '(5'
condition_natural_prefix ~ '(6'
condition_natural_prefix ~ '(7'
condition_natural_prefix ~ '(8'
condition_natural_prefix ~ '(9'
condition_natural_prefix ~ '(10'
condition_natural_prefix ~ '(11'
condition_natural_prefix ~ '(12'
condition_natural_prefix ~ '(13'
condition_natural_prefix ~ '(14'
condition_natural_prefix ~ '(15'
condition_natural_prefix ~ '(16'
condition_natural_prefix ~ '(17'
condition_natural_prefix ~ '(18'
condition_natural_prefix ~ '(19'
condition_natural_prefix ~ '(20'
:lexeme ~ condition_predicate_prefix pause => before event => condition_predicate_prefix
condition_predicate_prefix ~ '(R&'
:lexeme ~ condition_R_prefix pause => before event => condition_R_prefix
condition_R_prefix ~ '(R'
condition_R_prefix ~ '(R1'
condition_R_prefix ~ '(R2'
condition_R_prefix ~ '(R3'
condition_R_prefix ~ '(R4'
condition_R_prefix ~ '(R5'
condition_R_prefix ~ '(R6'
condition_R_prefix ~ '(R7'
condition_R_prefix ~ '(R8'
condition_R_prefix ~ '(R9'
condition_R_prefix ~ '(R10'
condition_R_prefix ~ '(R11'
condition_R_prefix ~ '(R12'
condition_R_prefix ~ '(R13'
condition_R_prefix ~ '(R14'
condition_R_prefix ~ '(R15'
condition_R_prefix ~ '(R16'
condition_R_prefix ~ '(R17'
condition_R_prefix ~ '(R18'
condition_R_prefix ~ '(R19'
condition_R_prefix ~ '(R20'
:lexeme ~ DEFINE pause => before event => DEFINE
DEFINE ~ 'DEFINE'
:lexeme ~ double_brace_prefix pause => before event => double_brace_prefix
double_brace_prefix ~ '(?{{'
:lexeme ~ equals_prefix pause => before event => equals_prefix
equals_prefix ~ '(?='
:lexeme ~ escaped_close_bracket pause => before event => escaped_close_bracket
escaped_close_bracket ~ '\\' ']'
:lexeme ~ escaped_close_parenthesis pause => before event => escaped_close_parenthesis
escaped_close_parenthesis ~ '\\)'
:lexeme ~ escaped_K pause => before event => escaped_K
escaped_K ~ '\\K'
:lexeme ~ escaped_open_parenthesis pause => before event => escaped_open_parenthesis
escaped_open_parenthesis ~ '\\('
:lexeme ~ escaped_slash pause => before event => escaped_slash
escaped_slash ~ '\\\\'
:lexeme ~ exclamation_mark_prefix pause => before event => exclamation_mark_prefix
exclamation_mark_prefix ~ '(?!'
:lexeme ~ extended_bracketed_prefix pause => before event => extended_bracketed_prefix
extended_bracketed_prefix ~ '(?['
:lexeme ~ flag_set pause => before event => flag_set
flag_set ~ [a-z]+
:lexeme ~ greater_than pause => before event => greater_than
greater_than ~ '>'
:lexeme ~ greater_than_prefix pause => before event => greater_than_prefix
greater_than_prefix ~ '(?>'
:lexeme ~ less_or_equals_prefix pause => before event => less_or_equals_prefix
less_or_equals_prefix ~ '(?<='
:lexeme ~ less_exclamation_mark_prefix pause => before event => less_exclamation_mark_prefix
less_exclamation_mark_prefix ~ '(?<!'
:lexeme ~ minus pause => before event => minus
minus ~ '-'
:lexeme ~ named_backreference_prefix pause => before event => named_backreference_prefix
named_backreference_prefix ~ '\\k' ['] # Use a ' for the Ultraedit syntax hiliter.
named_backreference_prefix ~ '\\k<'
named_backreference_prefix ~ '\\k{'
named_backreference_prefix ~ '\\g{'
:lexeme ~ named_capture_group_prefix pause => before event => named_capture_group_prefix
named_capture_group_prefix ~ '(?<'
named_capture_group_prefix ~ '(?' ['] # Use a ' for the Ultraedit syntax hiliter.
:lexeme ~ named_capture_group_suffix pause => before event => named_capture_group_suffix
named_capture_group_suffix ~ '>'
named_capture_group_suffix ~ ['] # Use a ' for the Ultraedit syntax hiliter.
:lexeme ~ non_close_bracket pause => before event => non_close_bracket
non_close_bracket ~ [^\]]+
:lexeme ~ non_close_parenthesis pause => before event => non_close_parenthesis
non_close_parenthesis ~ [^)]
:lexeme ~ open_bracket pause => before event => open_bracket
open_bracket ~ '['
:lexeme ~ open_parenthesis pause => before event => open_parenthesis
open_parenthesis ~ '('
:lexeme ~ query pause => before event => query
query ~ '?'
:lexeme ~ query_caret pause => before event => query_caret
query_caret ~ '?^'
:lexeme ~ query_P pause => before event => query_P
query_P ~ '?P'
:lexeme ~ recursive_subpattern_minus pause => before event => recursive_subpattern_minus
recursive_subpattern_minus ~ '(?-1)'
recursive_subpattern_minus ~ '(?-2)'
recursive_subpattern_minus ~ '(?-3)'
recursive_subpattern_minus ~ '(?-4)'
recursive_subpattern_minus ~ '(?-5)'
recursive_subpattern_minus ~ '(?-6)'
recursive_subpattern_minus ~ '(?-7)'
recursive_subpattern_minus ~ '(?-8)'
recursive_subpattern_minus ~ '(?-9)'
recursive_subpattern_minus ~ '(?-10)'
recursive_subpattern_minus ~ '(?-11)'
recursive_subpattern_minus ~ '(?-12)'
recursive_subpattern_minus ~ '(?-13)'
recursive_subpattern_minus ~ '(?-14)'
recursive_subpattern_minus ~ '(?-15)'
recursive_subpattern_minus ~ '(?-16)'
recursive_subpattern_minus ~ '(?-17)'
recursive_subpattern_minus ~ '(?-18)'
recursive_subpattern_minus ~ '(?-19)'
recursive_subpattern_minus ~ '(?-20)'
:lexeme ~ recursive_subpattern_natural pause => before event => recursive_subpattern_natural
recursive_subpattern_natural ~ '(?1)'
recursive_subpattern_natural ~ '(?2)'
recursive_subpattern_natural ~ '(?3)'
recursive_subpattern_natural ~ '(?4)'
recursive_subpattern_natural ~ '(?5)'
recursive_subpattern_natural ~ '(?6)'
recursive_subpattern_natural ~ '(?7)'
recursive_subpattern_natural ~ '(?8)'
recursive_subpattern_natural ~ '(?9)'
recursive_subpattern_natural ~ '(?10)'
recursive_subpattern_natural ~ '(?11)'
recursive_subpattern_natural ~ '(?12)'
recursive_subpattern_natural ~ '(?13)'
recursive_subpattern_natural ~ '(?14)'
recursive_subpattern_natural ~ '(?15)'
recursive_subpattern_natural ~ '(?16)'
recursive_subpattern_natural ~ '(?17)'
recursive_subpattern_natural ~ '(?18)'
recursive_subpattern_natural ~ '(?19)'
recursive_subpattern_natural ~ '(?20)'
:lexeme ~ recursive_subpattern_plus pause => before event => recursive_subpattern_plus
recursive_subpattern_plus ~ '(?+1)'
recursive_subpattern_plus ~ '(?+2)'
recursive_subpattern_plus ~ '(?+3)'
recursive_subpattern_plus ~ '(?+4)'
recursive_subpattern_plus ~ '(?+5)'
recursive_subpattern_plus ~ '(?+6)'
recursive_subpattern_plus ~ '(?+7)'
recursive_subpattern_plus ~ '(?+8)'
recursive_subpattern_plus ~ '(?+9)'
recursive_subpattern_plus ~ '(?+10)'
recursive_subpattern_plus ~ '(?+11)'
recursive_subpattern_plus ~ '(?+12)'
recursive_subpattern_plus ~ '(?+13)'
recursive_subpattern_plus ~ '(?+14)'
recursive_subpattern_plus ~ '(?+15)'
recursive_subpattern_plus ~ '(?+16)'
recursive_subpattern_plus ~ '(?+17)'
recursive_subpattern_plus ~ '(?+18)'
recursive_subpattern_plus ~ '(?+19)'
recursive_subpattern_plus ~ '(?+20)'
:lexeme ~ recursive_subpattern_R pause => before event => recursive_subpattern_R
recursive_subpattern_R ~ '(?R)'
recursive_subpattern_R ~ '(?0)'
:lexeme ~ recurse_prefix pause => before event => recurse_prefix
recurse_prefix ~ '(?&'
:lexeme ~ single_brace_prefix pause => before event => single_brace_prefix
single_brace_prefix ~ '(?{'
:lexeme ~ slash pause => before event => slash
slash ~ '/'
:lexeme ~ string pause => before event => string
string ~ [^()/]*
:lexeme ~ vertical_bar pause => before event => vertical_bar
vertical_bar ~ '|'
:lexeme ~ vertical_bar_prefix pause => before event => vertical_bar_prefix
vertical_bar_prefix ~ '(?|'