—package
Regexp::RegGrp;
use
5.008009;
use
warnings;
use
strict;
use
Carp;
use
Regexp::RegGrp::Data;
BEGIN {
if
( $] < 5.010000 ) {
re->
import
(
'eval'
);
}
}
use
constant {
ESCAPE_BRACKETS
=>
qr~(?<!\\)\[[^\]]+(?<!\\)\]|\(\?([\^dlupimsx-]+:|[:=!><])~
,
ESCAPE_CHARS
=>
qr~\\.~
,
BRACKETS
=>
qr~\(~
,
BACK_REF
=>
qr~(?:\\g?(\d\d*)|\\g\{(\d+)\})~
};
# =========================================================================== #
our
$VERSION
=
'2.01'
;
sub
new {
my
(
$class
,
$in_ref
) =
@_
;
my
$self
= {};
bless
(
$self
,
$class
);
if
(
ref
(
$in_ref
) ne
'HASH'
) {
carp(
'First argument must be a hashref!'
);
return
;
}
unless
(
exists
(
$in_ref
->{reggrp} ) ) {
carp(
'Key "reggrp" does not exist in input hashref!'
);
return
;
}
if
(
ref
(
$in_ref
->{reggrp} ) ne
'ARRAY'
) {
carp(
'Value for key "reggrp" must be an arrayref!'
);
return
;
}
if
(
ref
(
$in_ref
->{restore_pattern} ) and
ref
(
$in_ref
->{restore_pattern} ) ne
'Regexp'
) {
carp(
'Value for key "restore_pattern" must be a scalar or regexp!'
);
return
;
}
my
$no
= 0;
map
{
$no
++;
my
$reggrp_data
= Regexp::RegGrp::Data->new(
{
regexp
=>
$_
->{regexp},
replacement
=>
$_
->{replacement},
store
=>
$_
->{store},
modifier
=>
$_
->{modifier},
restore_pattern
=>
$in_ref
->{restore_pattern}
}
);
unless
(
$reggrp_data
) {
carp(
'RegGrp No '
.
$no
.
' in arrayref is malformed!'
);
return
;
}
$self
->reggrp_add(
$reggrp_data
);
} @{
$in_ref
->{reggrp}};
my
$restore_pattern
=
$in_ref
->{restore_pattern} ||
qr~\x01(\d+)\x01~
;
$self
->{_restore_pattern} =
qr/$restore_pattern/
;
my
$offset
= 1;
my
$midx
= 0;
# In perl versions < 5.10 hash %+ doesn't exist, so we have to initialize it
$self
->{_re_str} = ( ( $] < 5.010000 ) ?
'(?{ %+ = (); })'
:
''
) .
join
(
'|'
,
map
{
my
$re
=
$_
->regexp();
# Count backref brackets
$re
=~ s/${\(ESCAPE_CHARS)}//g;
$re
=~ s/${\(ESCAPE_BRACKETS)}//g;
my
@nparen
=
$re
=~ /${\(BRACKETS)}/g;
$re
=
$_
->regexp();
my
$backref_pattern
=
'\\g{%d}'
;
if
( $] < 5.010000 ) {
$backref_pattern
=
'\\%d'
;
}
$re
=~ s/${\(BACK_REF)}/
sprintf
(
$backref_pattern
,
$offset
+ ( $1 || $2 ) )/eg;
my
$ret
;
if
( $] < 5.010000 ) {
# In perl versions < 5.10 we need to fill %+ hash manually
# perl 5.8 doesn't reset the %+ hash correctly if there are zero-length submatches
# so this is also done here
$ret
=
'('
.
$re
.
')'
.
'(?{ %+ = ( \'_'
.
$midx
++ .
'\' => $^N ); })'
;
}
else
{
$ret
=
'(?\'_'
.
$midx
++ .
'\''
.
$re
.
')'
;
}
$offset
+=
scalar
(
@nparen
) + 1;
$ret
;
}
$self
->reggrp_array()
);
return
$self
;
}
# re_str methods
sub
re_str {
my
$self
=
shift
;
return
$self
->{_re_str};
}
# /re_str methods
# restore_pattern methods
sub
restore_pattern {
my
$self
=
shift
;
return
$self
->{_restore_pattern};
}
# /restore_pattern methods
# store_data methods
sub
store_data_add {
my
(
$self
,
$data
) =
@_
;
push
( @{
$self
->{_store_data}},
$data
);
}
sub
store_data_by_idx {
my
(
$self
,
$idx
) =
@_
;
return
$self
->{_store_data}->[
$idx
];
}
sub
store_data_count {
my
$self
=
shift
;
return
scalar
( @{
$self
->{_store_data} || []} );
}
sub
flush_stored {
my
$self
=
shift
;
$self
->{_store_data} = [];
}
# /store_data methods
# reggrp methods
sub
reggrp_add {
my
(
$self
,
$reggrp
) =
@_
;
push
( @{
$self
->{_reggrp}},
$reggrp
);
}
sub
reggrp_array {
my
$self
=
shift
;
return
@{
$self
->{_reggrp}};
}
sub
reggrp_by_idx {
my
(
$self
,
$idx
) =
@_
;
return
$self
->{_reggrp}->[
$idx
];
}
# /reggrp methods
sub
exec
{
my
(
$self
,
$input
,
$opts
) =
@_
;
if
(
ref
(
$input
) ne
'SCALAR'
) {
carp(
'First argument in Regexp::RegGrp->exec must be a scalarref!'
);
return
undef
;
}
$opts
||= {};
if
(
ref
(
$opts
) ne
'HASH'
) {
carp(
'Second argument in Regexp::RegGrp->exec must be a hashref!'
);
return
undef
;
}
my
$to_process
= \
''
;
my
$cont
=
'void'
;
if
(
defined
(
wantarray
) ) {
my
$tmp_input
= ${
$input
};
$to_process
= \
$tmp_input
;
$cont
=
'scalar'
;
}
else
{
$to_process
=
$input
;
}
${
$to_process
} =~ s/${\
$self
->re_str()}/
$self
->_process( {
match_hash
=> \%+,
opts
=>
$opts
} )/eg;
# Return a scalar if requested by context
return
${
$to_process
}
if
(
$cont
eq
'scalar'
);
}
sub
_process {
my
(
$self
,
$in_ref
) =
@_
;
my
%match_hash
= %{
$in_ref
->{match_hash}};
my
$opts
=
$in_ref
->{opts};
my
$match_key
= (
keys
(
%match_hash
) )[0];
my
(
$midx
) =
$match_key
=~ /^_(\d+)$/;
my
$match
=
$match_hash
{
$match_key
};
my
$reggrp
=
$self
->reggrp_by_idx(
$midx
);
my
@submatches
=
$match
=~
$reggrp
->regexp();
map
{
$_
.=
''
; }
@submatches
;
my
$ret
=
$match
;
my
$replacement
=
$reggrp
->replacement();
if
(
defined
(
$replacement
) and
not
ref
(
$replacement
)
) {
$ret
=
$replacement
;
}
elsif
(
ref
(
$replacement
) eq
'CODE'
) {
$ret
=
$replacement
->(
{
match
=>
$match
,
submatches
=> \
@submatches
,
opts
=>
$opts
,
store_index
=>
$self
->store_data_count()
}
);
}
my
$store
=
$reggrp
->store();
if
(
$store
) {
my
$tmp_match
=
$match
;
if
( not
ref
(
$store
) ) {
$tmp_match
=
$store
;
}
elsif
(
ref
(
$store
) eq
'CODE'
) {
$tmp_match
=
$store
->(
{
match
=>
$match
,
submatches
=> \
@submatches
,
opts
=>
$opts
}
);
}
$self
->store_data_add(
$tmp_match
);
}
return
$ret
;
};
sub
restore_stored {
my
(
$self
,
$input
) =
@_
;
if
(
ref
(
$input
) ne
'SCALAR'
) {
carp(
'First argument in Regexp::RegGrp->restore must be a scalarref!'
);
return
undef
;
}
my
$to_process
= \
''
;
my
$cont
=
'void'
;
if
(
defined
(
wantarray
) ) {
my
$tmp_input
= ${
$input
};
$to_process
= \
$tmp_input
;
$cont
=
'scalar'
;
}
else
{
$to_process
=
$input
;
}
# Here is a while loop, because there could be recursive replacements
while
( ${
$to_process
} =~ /${\
$self
->restore_pattern()}/ ) {
${
$to_process
} =~ s/${\
$self
->restore_pattern()}/
$self
->store_data_by_idx( $1 )/egsm;
}
$self
->flush_stored();
# Return a scalar if requested by context
return
${
$to_process
}
if
(
$cont
eq
'scalar'
);
}
1;
__END__
=head1 NAME
Regexp::RegGrp - Groups a regular expressions collection
=for html
<a href='https://travis-ci.org/leejo/regexp-reggrp-perl?branch=master'><img src='https://travis-ci.org/leejo/regexp-reggrp-perl.svg?branch=master' alt='Build Status' /></a>
<a href='https://coveralls.io/r/leejo/regexp-reggrp-perl'><img src='https://coveralls.io/repos/leejo/regexp-reggrp-perl/badge.png?branch=master' alt='Coverage Status' /></a>
=head1 VERSION
Version 2.00
=head1 DESCRIPTION
Groups regular expressions to one regular expression
=head1 SYNOPSIS
use Regexp::RegGrp;
my $reggrp = Regexp::RegGrp->new(
{
reggrp => [
{
regexp => '%name%',
replacement => 'John Doe',
modifier => $modifier
},
{
regexp => '%company%',
replacement => 'ACME',
modifier => $modifier
}
],
restore_pattern => $restore_pattern
}
);
$reggrp->exec( \$scalar );
To return a scalar without changing the input simply use (e.g. example 2):
my $ret = $reggrp->exec( \$scalar );
The first argument must be a hashref. The keys are:
=over 4
=item reggrp (required)
Arrayref of hashrefs. The keys of each hashref are:
=over 8
=item regexp (required)
A regular expression
=item replacement (optional)
Scalar or sub.
A replacement for the regular expression match. If not set, nothing will be replaced except "store" is set.
In this case the match is replaced by something like sprintf("\x01%d\x01", $idx) where $idx is the index
of the stored element in the store_data arrayref. If "store" is set the default is:
sub {
return sprintf( "\x01%d\x01", $_[0]->{store_index} );
}
If a custom restore_pattern is passed to to constructor you MUST also define a replacement. Otherwise
it is undefined.
If you define a subroutine as replacement an hashref is passed to this subroutine. This hashref has
four keys:
=over 12
=item match
Scalar. The match of the regular expression.
=item submatches
Arrayref of submatches.
=item store_index
The next index. You need this if you want to create a placeholder and store the replacement in the
$self->{store_data} arrayref.
=item opts
Hashref of custom options.
=back
=item modifier (optional)
Scalar. The default is 'sm'.
=item store (optional)
Scalar or sub. If you define a subroutine an hashref is passed to this subroutine. This hashref has
three keys:
=over 12
=item match
Scalar. The match of the regular expression.
=item submatches
Arrayref of submatches.
=item opts
Hashref of custom options.
=back
A replacement for the regular expression match. It will not replace the match directly. The replacement
will be stored in the $self->{store_data} arrayref. The placeholders in the text can easily be rereplaced
with the restore_stored method later.
=back
=item restore_pattern (optional)
Scalar or Regexp object. The default restore pattern is
qr~\x01(\d+)\x01~
This means, if you use the restore_stored method it is looking for \x010\x01, \x011\x01, ... and
replaces the matches with $self->{store_data}->[0], $self->{store_data}->[1], ...
=back
=head1 EXAMPLES
=over 4
=item Example 1
Common usage.
#!/usr/bin/perl
use strict;
use warnings;
use Regexp::RegGrp;
my $reggrp = Regexp::RegGrp->new(
{
reggrp => [
{
regexp => '%name%',
replacement => 'John Doe'
},
{
regexp => '%company%',
replacement => 'ACME'
}
]
}
);
open( INFILE, 'unprocessed.txt' );
open( OUTFILE, '>processed.txt' );
my $txt = join( '', <INFILE> );
$reggrp->exec( \$txt );
print OUTFILE $txt;
close(INFILE);
close(OUTFILE);
=item Example 2
A scalar is requested by the context. The input will remain unchanged.
#!/usr/bin/perl
use strict;
use warnings;
use Regexp::RegGrp;
my $reggrp = Regexp::RegGrp->new(
{
reggrp => [
{
regexp => '%name%',
replacement => 'John Doe'
},
{
regexp => '%company%',
replacement => 'ACME'
}
]
}
);
open( INFILE, 'unprocessed.txt' );
open( OUTFILE, '>processed.txt' );
my $unprocessed = join( '', <INFILE> );
my $processed = $reggrp->exec( \$unprocessed );
print OUTFILE $processed;
close(INFILE);
close(OUTFILE);
=back
=head1 AUTHOR
Merten Falk, C<< <nevesenin at cpan.org> >>. Now maintained by LEEJO
=head1 BUGS
Please report any bugs or feature requests through the web interface at
=head1 SUPPORT
You can find documentation for this module with the perldoc command.
perldoc Regexp::RegGrp
=head1 COPYRIGHT & LICENSE
Copyright 2010, 2011 Merten Falk, all rights reserved.
This program is free software; you can redistribute it and/or modify it under
the same terms as Perl itself.
=cut