#ifdef PERL_EXT_RE_BUILD
#include "re_top.h"
#endif
#include "EXTERN.h"
#define PERL_IN_REGEX_ENGINE
#define PERL_IN_REGCOMP_ANY
#define PERL_IN_REGCOMP_STUDY_C
#include "perl.h"
#ifdef PERL_IN_XSUB_RE
# include "re_comp.h"
#else
# include "regcomp.h"
#endif
#include "invlist_inline.h"
#include "unicode_constants.h"
#include "regcomp_internal.h"
#define INIT_AND_WITHP \
assert
(!and_withp); \
Newx(and_withp, 1, regnode_ssc); \
SAVEFREEPV(and_withp)
STATIC
void
S_unwind_scan_frames(pTHX_
const
void
*p)
{
PERL_ARGS_ASSERT_UNWIND_SCAN_FRAMES;
scan_frame *f= (scan_frame *)p;
do
{
scan_frame *n= f->next_frame;
Safefree(f);
f= n;
}
while
(f);
}
STATIC
void
S_rck_elide_nothing(pTHX_ regnode *node)
{
PERL_ARGS_ASSERT_RCK_ELIDE_NOTHING;
if
(OP(node) != CURLYX) {
const
int
max = (REGNODE_OFF_BY_ARG(OP(node))
? I32_MAX
: (I32_MAX < U16_MAX ? I32_MAX : U16_MAX));
int
off = (REGNODE_OFF_BY_ARG(OP(node)) ? ARG1u(node) : NEXT_OFF(node));
int
noff;
regnode *n = node;
while
(
(n = regnext(n))
&& (
(REGNODE_TYPE(OP(n)) == NOTHING && (noff = NEXT_OFF(n)))
|| ((OP(n) == LONGJMP) && (noff = ARG1u(n)))
)
&& off + noff < max
) {
off += noff;
}
if
(REGNODE_OFF_BY_ARG(OP(node)))
ARG1u(node) = off;
else
NEXT_OFF(node) = off;
}
return
;
}
STATIC SV*
S_make_exactf_invlist(pTHX_ RExC_state_t *pRExC_state, regnode *node)
{
const
U8 * s = (U8*)STRING(node);
SSize_t bytelen = STR_LEN(node);
UV uc;
SV* invlist = _new_invlist(4);
PERL_ARGS_ASSERT_MAKE_EXACTF_INVLIST;
if
(! UTF) {
uc = *s;
if
(is_MULTI_CHAR_FOLD_latin1_safe(s, s + bytelen)) {
invlist = _add_range_to_invlist(invlist, 0, UV_MAX);
}
else
{
if
(OP(node) == EXACTFL) {
_invlist_union(invlist, PL_Latin1, &invlist);
if
(isALPHA_FOLD_EQ(uc,
'I'
)) {
invlist = add_cp_to_invlist(invlist,
LATIN_SMALL_LETTER_DOTLESS_I);
invlist = add_cp_to_invlist(invlist,
LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE);
}
}
else
{
invlist = add_cp_to_invlist(invlist, uc);
if
(IS_IN_SOME_FOLD_L1(uc))
invlist = add_cp_to_invlist(invlist, PL_fold_latin1[uc]);
}
if
(HAS_NONLATIN1_SIMPLE_FOLD_CLOSURE(uc)
&& (! isASCII(uc) || ! inRANGE(OP(node), EXACTFAA,
EXACTFAA_NO_TRIE)))
{
add_above_Latin1_folds(pRExC_state, (U8) uc, &invlist);
}
}
}
else
{
U8 folded[UTF8_MAX_FOLD_CHAR_EXPAND * UTF8_MAXBYTES_CASE + 1] = {
'\0'
};
const
U8* e = s + bytelen;
IV fc;
fc = uc = utf8_to_uvchr_buf(s, s + bytelen, NULL);
if
(OP(node) == EXACTFL && is_PROBLEMATIC_LOCALE_FOLDEDS_START_cp(uc)) {
U8 *d = folded;
int
i;
fc = -1;
for
(i = 0; i < UTF8_MAX_FOLD_CHAR_EXPAND && s < e; i++) {
if
(isASCII(*s)) {
*(d++) = (U8) toFOLD(*s);
if
(fc < 0) {
fc = *(d-1);
}
s++;
}
else
{
STRLEN len;
UV fold = toFOLD_utf8_safe(s, e, d, &len);
if
(fc < 0) {
fc = fold;
}
d += len;
s += UTF8SKIP(s);
}
}
e = d;
s = folded;
}
if
(is_MULTI_CHAR_FOLD_utf8_safe(s, e)) {
invlist = _add_range_to_invlist(invlist, 0, UV_MAX);
}
else
{
unsigned
int
k;
U32 first_fold;
const
U32 * remaining_folds;
Size_t folds_count;
invlist = add_cp_to_invlist(invlist, fc);
folds_count = _inverse_folds(fc, &first_fold,
&remaining_folds);
for
(k = 0; k < folds_count; k++) {
UV c = (k == 0) ? first_fold : remaining_folds[k-1];
if
( inRANGE(OP(node), EXACTFAA, EXACTFAA_NO_TRIE)
&& isASCII(c) != isASCII(fc))
{
continue
;
}
invlist = add_cp_to_invlist(invlist, c);
}
if
(OP(node) == EXACTFL) {
if
(isALPHA_FOLD_EQ(fc,
'I'
)) {
invlist = add_cp_to_invlist(invlist,
LATIN_SMALL_LETTER_DOTLESS_I);
invlist = add_cp_to_invlist(invlist,
LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE);
}
else
if
(fc == LATIN_SMALL_LETTER_DOTLESS_I) {
invlist = add_cp_to_invlist(invlist,
'I'
);
}
else
if
(fc == LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE) {
invlist = add_cp_to_invlist(invlist,
'i'
);
}
}
}
}
return
invlist;
}
void
Perl_scan_commit(pTHX_
const
RExC_state_t *pRExC_state, scan_data_t *data,
SSize_t *minlenp,
int
is_inf)
{
const
STRLEN l = CHR_SVLEN(data->last_found);
SV *
const
longest_sv = data->substrs[data->cur_is_floating].str;
const
STRLEN old_l = CHR_SVLEN(longest_sv);
DECLARE_AND_GET_RE_DEBUG_FLAGS;
PERL_ARGS_ASSERT_SCAN_COMMIT;
if
((l >= old_l) && ((l > old_l) || (data->flags & SF_BEFORE_EOL))) {
const
U8 i = data->cur_is_floating;
SvSetMagicSV(longest_sv, data->last_found);
data->substrs[i].min_offset = l ? data->last_start_min : data->pos_min;
if
(!i)
data->substrs[0].max_offset = data->substrs[0].min_offset;
else
{
data->substrs[1].max_offset =
(is_inf)
? OPTIMIZE_INFTY
: (l
? data->last_start_max
: (data->pos_delta > OPTIMIZE_INFTY - data->pos_min
? OPTIMIZE_INFTY
: data->pos_min + data->pos_delta));
}
data->substrs[i].flags &= ~SF_BEFORE_EOL;
data->substrs[i].flags |= data->flags & SF_BEFORE_EOL;
data->substrs[i].minlenp = minlenp;
data->substrs[i].lookbehind = 0;
}
SvCUR_set(data->last_found, 0);
{
SV *
const
sv = data->last_found;
if
(SvUTF8(sv) && SvMAGICAL(sv)) {
MAGIC *
const
mg = mg_find(sv, PERL_MAGIC_utf8);
if
(mg)
mg->mg_len = 0;
}
}
data->last_end = -1;
data->flags &= ~SF_BEFORE_EOL;
DEBUG_STUDYDATA(
"commit"
, data, 0, is_inf, -1, -1, -1);
}
STATIC
void
S_ssc_anything(pTHX_ regnode_ssc *ssc)
{
PERL_ARGS_ASSERT_SSC_ANYTHING;
assert
(is_ANYOF_SYNTHETIC(ssc));
ssc->invlist = sv_2mortal(_add_range_to_invlist(NULL, 0, UV_MAX));
ANYOF_FLAGS(ssc) |= SSC_MATCHES_EMPTY_STRING;
}
STATIC
int
S_ssc_is_anything(
const
regnode_ssc *ssc)
{
UV start = 0, end = 0;
bool
ret;
PERL_ARGS_ASSERT_SSC_IS_ANYTHING;
assert
(is_ANYOF_SYNTHETIC(ssc));
if
(! (ANYOF_FLAGS(ssc) & SSC_MATCHES_EMPTY_STRING)) {
return
FALSE;
}
invlist_iterinit(ssc->invlist);
ret = invlist_iternext(ssc->invlist, &start, &end)
&& start == 0
&& end == UV_MAX;
invlist_iterfinish(ssc->invlist);
if
(ret) {
return
TRUE;
}
if
(ANYOF_POSIXL_SSC_TEST_ANY_SET(ssc)) {
int
i;
for
(i = 0; i < ANYOF_POSIXL_MAX; i += 2) {
if
(ANYOF_POSIXL_TEST(ssc, i) && ANYOF_POSIXL_TEST(ssc, i+1)) {
return
TRUE;
}
}
}
return
FALSE;
}
void
Perl_ssc_init(pTHX_
const
RExC_state_t *pRExC_state, regnode_ssc *ssc)
{
PERL_ARGS_ASSERT_SSC_INIT;
Zero(ssc, 1, regnode_ssc);
set_ANYOF_SYNTHETIC(ssc);
ARG1u_SET(ssc, ANYOF_MATCHES_ALL_OUTSIDE_BITMAP_VALUE);
ssc_anything(ssc);
if
(RExC_contains_locale) {
ANYOF_POSIXL_SETALL(ssc);
}
else
{
ANYOF_POSIXL_ZERO(ssc);
}
}
STATIC
int
S_ssc_is_cp_posixl_init(
const
RExC_state_t *pRExC_state,
const
regnode_ssc *ssc)
{
UV start = 0, end = 0;
bool
ret;
PERL_ARGS_ASSERT_SSC_IS_CP_POSIXL_INIT;
assert
(is_ANYOF_SYNTHETIC(ssc));
invlist_iterinit(ssc->invlist);
ret = invlist_iternext(ssc->invlist, &start, &end)
&& start == 0
&& end == UV_MAX;
invlist_iterfinish(ssc->invlist);
if
(! ret) {
return
FALSE;
}
if
(RExC_contains_locale && ! ANYOF_POSIXL_SSC_TEST_ALL_SET(ssc)) {
return
FALSE;
}
return
TRUE;
}
STATIC SV*
S_get_ANYOF_cp_list_for_ssc(pTHX_
const
RExC_state_t *pRExC_state,
const
regnode_charclass*
const
node)
{
SV* invlist = NULL;
SV* only_utf8_locale_invlist = NULL;
bool
new_node_has_latin1 = FALSE;
const
U8 flags = (REGNODE_TYPE(OP(node)) == ANYOF)
? ANYOF_FLAGS(node)
: 0;
PERL_ARGS_ASSERT_GET_ANYOF_CP_LIST_FOR_SSC;
if
(ANYOF_MATCHES_ALL_OUTSIDE_BITMAP(node)) {
invlist = sv_2mortal(_new_invlist(1));
invlist = _add_range_to_invlist(invlist, NUM_ANYOF_CODE_POINTS, UV_MAX);
}
else
if
(ANYOF_HAS_AUX(node)) {
const
U32 n = ARG1u(node);
SV *
const
rv = MUTABLE_SV(RExC_rxi->data->data[n]);
AV *
const
av = MUTABLE_AV(SvRV(rv));
SV **
const
ary = AvARRAY(av);
if
(av_tindex_skip_len_mg(av) >= DEFERRED_USER_DEFINED_INDEX) {
invlist = sv_2mortal(_new_invlist(1));
return
_add_range_to_invlist(invlist, 0, UV_MAX);
}
else
if
(ary[INVLIST_INDEX]) {
invlist = sv_2mortal(invlist_clone(ary[INVLIST_INDEX], NULL));
}
if
( (flags & ANYOFL_FOLD)
&& av_tindex_skip_len_mg(av) >= ONLY_LOCALE_MATCHES_INDEX)
{
only_utf8_locale_invlist = ary[ONLY_LOCALE_MATCHES_INDEX];
}
}
if
(! invlist) {
invlist = sv_2mortal(_new_invlist(0));
}
if
(flags & ANYOF_INVERT) {
_invlist_intersection_complement_2nd(invlist,
PL_UpperLatin1,
&invlist);
}
if
(REGNODE_TYPE(OP(node)) == ANYOF){
for
(unsigned i = 0; i < NUM_ANYOF_CODE_POINTS; i++) {
if
(ANYOF_BITMAP_TEST(node, i)) {
unsigned
int
start = i++;
for
(; i < NUM_ANYOF_CODE_POINTS
&& ANYOF_BITMAP_TEST(node, i); ++i)
{
}
invlist = _add_range_to_invlist(invlist, start, i-1);
new_node_has_latin1 = TRUE;
}
}
}
if
( ! (flags & ANYOF_INVERT)
&& OP(node) == ANYOFD
&& (flags & ANYOFD_NON_UTF8_MATCHES_ALL_NON_ASCII__shared))
{
_invlist_union(invlist, PL_UpperLatin1, &invlist);
}
if
(ANYOF_MATCHES_ALL_OUTSIDE_BITMAP(node)) {
_invlist_union_complement_2nd(invlist, PL_InBitmap, &invlist);
}
if
(flags & ANYOF_INVERT) {
_invlist_invert(invlist);
}
else
if
(flags & ANYOFL_FOLD) {
if
(new_node_has_latin1) {
if
(_invlist_contains_cp(invlist,
'i'
)) {
invlist = add_cp_to_invlist(invlist,
LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE);
}
if
(_invlist_contains_cp(invlist,
'I'
)) {
invlist = add_cp_to_invlist(invlist,
LATIN_SMALL_LETTER_DOTLESS_I);
}
_invlist_union(invlist, PL_Latin1, &invlist);
}
else
{
if
(_invlist_contains_cp(invlist, LATIN_SMALL_LETTER_DOTLESS_I)) {
invlist = add_cp_to_invlist(invlist,
'I'
);
}
if
(_invlist_contains_cp(invlist,
LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE))
{
invlist = add_cp_to_invlist(invlist,
'i'
);
}
}
}
if
(only_utf8_locale_invlist) {
_invlist_union_maybe_complement_2nd(invlist,
only_utf8_locale_invlist,
flags & ANYOF_INVERT,
&invlist);
}
return
invlist;
}
STATIC
void
S_ssc_and(pTHX_
const
RExC_state_t *pRExC_state, regnode_ssc *ssc,
const
regnode_charclass *and_with)
{
SV* anded_cp_list;
U8 and_with_flags = (REGNODE_TYPE(OP(and_with)) == ANYOF)
? ANYOF_FLAGS(and_with)
: 0;
U8 anded_flags;
PERL_ARGS_ASSERT_SSC_AND;
assert
(is_ANYOF_SYNTHETIC(ssc));
if
(is_ANYOF_SYNTHETIC(and_with)) {
anded_cp_list = ((regnode_ssc *)and_with)->invlist;
anded_flags = and_with_flags;
if
(ssc_is_anything((regnode_ssc *)and_with)) {
anded_flags |= ANYOF_WARN_SUPER__shared;
}
}
else
{
anded_cp_list = get_ANYOF_cp_list_for_ssc(pRExC_state, and_with);
if
(OP(and_with) == ANYOFD) {
anded_flags = and_with_flags & ANYOF_COMMON_FLAGS;
}
else
{
anded_flags = and_with_flags
& ( ANYOF_COMMON_FLAGS
|ANYOFD_NON_UTF8_MATCHES_ALL_NON_ASCII__shared
|ANYOF_HAS_EXTRA_RUNTIME_MATCHES);
if
(and_with_flags & ANYOFL_UTF8_LOCALE_REQD) {
anded_flags &= ANYOF_HAS_EXTRA_RUNTIME_MATCHES;
}
}
}
ANYOF_FLAGS(ssc) &= anded_flags;
if
((and_with_flags & ANYOF_INVERT)
&& ! is_ANYOF_SYNTHETIC(and_with))
{
unsigned
int
i;
ssc_intersection(ssc,
anded_cp_list,
FALSE
);
if
(! (and_with_flags & ANYOF_MATCHES_POSIXL)) {
ANYOF_POSIXL_ZERO(ssc);
}
else
if
(ANYOF_POSIXL_SSC_TEST_ANY_SET(ssc)) {
regnode_charclass_posixl temp;
int
add = 1;
Zero(&temp, 1, regnode_charclass_posixl);
ANYOF_POSIXL_ZERO(&temp);
for
(i = 0; i < ANYOF_MAX; i++) {
assert
(i % 2 != 0
|| ! ANYOF_POSIXL_TEST((regnode_charclass_posixl*) and_with, i)
|| ! ANYOF_POSIXL_TEST((regnode_charclass_posixl*) and_with, i + 1));
if
(ANYOF_POSIXL_TEST((regnode_charclass_posixl*) and_with, i)) {
ANYOF_POSIXL_SET(&temp, i + add);
}
add = 0 - add;
}
ANYOF_POSIXL_AND(&temp, ssc);
}
}
else
if
(! is_ANYOF_SYNTHETIC(and_with)
|| ! ssc_is_cp_posixl_init(pRExC_state, (regnode_ssc *)and_with))
{
if
(ssc_is_cp_posixl_init(pRExC_state, ssc)) {
if
(is_ANYOF_SYNTHETIC(and_with)) {
StructCopy(and_with, ssc, regnode_ssc);
}
else
{
ssc->invlist = anded_cp_list;
ANYOF_POSIXL_ZERO(ssc);
if
(and_with_flags & ANYOF_MATCHES_POSIXL) {
ANYOF_POSIXL_OR((regnode_charclass_posixl*) and_with, ssc);
}
}
}
else
if
(ANYOF_POSIXL_SSC_TEST_ANY_SET(ssc)
|| (and_with_flags & ANYOF_MATCHES_POSIXL))
{
if
(and_with_flags & ANYOF_MATCHES_POSIXL) {
ANYOF_POSIXL_AND((regnode_charclass_posixl*) and_with, ssc);
}
ssc_union(ssc, anded_cp_list, FALSE);
}
else
{
ssc_intersection(ssc, anded_cp_list, FALSE);
}
}
}
STATIC
void
S_ssc_or(pTHX_
const
RExC_state_t *pRExC_state, regnode_ssc *ssc,
const
regnode_charclass *or_with)
{
SV* ored_cp_list;
U8 ored_flags;
U8 or_with_flags = (REGNODE_TYPE(OP(or_with)) == ANYOF)
? ANYOF_FLAGS(or_with)
: 0;
PERL_ARGS_ASSERT_SSC_OR;
assert
(is_ANYOF_SYNTHETIC(ssc));
if
(is_ANYOF_SYNTHETIC(or_with)) {
ored_cp_list = ((regnode_ssc*) or_with)->invlist;
ored_flags = or_with_flags;
}
else
{
ored_cp_list = get_ANYOF_cp_list_for_ssc(pRExC_state, or_with);
ored_flags = or_with_flags & ANYOF_COMMON_FLAGS;
if
(OP(or_with) != ANYOFD) {
ored_flags |=
or_with_flags & ( ANYOFD_NON_UTF8_MATCHES_ALL_NON_ASCII__shared
|ANYOF_HAS_EXTRA_RUNTIME_MATCHES);
if
(or_with_flags & ANYOFL_UTF8_LOCALE_REQD) {
ored_flags |= ANYOF_HAS_EXTRA_RUNTIME_MATCHES;
}
}
}
ANYOF_FLAGS(ssc) |= ored_flags;
if
((or_with_flags & ANYOF_INVERT)
&& ! is_ANYOF_SYNTHETIC(or_with))
{
}
else
if
(or_with_flags & ANYOF_MATCHES_POSIXL) {
ANYOF_POSIXL_OR((regnode_charclass_posixl*)or_with, ssc);
if
(ANYOF_POSIXL_SSC_TEST_ANY_SET(ssc)) {
unsigned
int
i;
for
(i = 0; i < ANYOF_MAX; i += 2) {
if
(ANYOF_POSIXL_TEST(ssc, i) && ANYOF_POSIXL_TEST(ssc, i + 1))
{
ssc_match_all_cp(ssc);
ANYOF_POSIXL_CLEAR(ssc, i);
ANYOF_POSIXL_CLEAR(ssc, i+1);
}
}
}
}
ssc_union(ssc,
ored_cp_list,
FALSE
);
}
STATIC
void
S_ssc_union(pTHX_ regnode_ssc *ssc, SV*
const
invlist,
const
bool
invert2nd)
{
PERL_ARGS_ASSERT_SSC_UNION;
assert
(is_ANYOF_SYNTHETIC(ssc));
_invlist_union_maybe_complement_2nd(ssc->invlist,
invlist,
invert2nd,
&ssc->invlist);
}
STATIC
void
S_ssc_intersection(pTHX_ regnode_ssc *ssc,
SV*
const
invlist,
const
bool
invert2nd)
{
PERL_ARGS_ASSERT_SSC_INTERSECTION;
assert
(is_ANYOF_SYNTHETIC(ssc));
_invlist_intersection_maybe_complement_2nd(ssc->invlist,
invlist,
invert2nd,
&ssc->invlist);
}
STATIC
void
S_ssc_add_range(pTHX_ regnode_ssc *ssc,
const
UV start,
const
UV end)
{
PERL_ARGS_ASSERT_SSC_ADD_RANGE;
assert
(is_ANYOF_SYNTHETIC(ssc));
ssc->invlist = _add_range_to_invlist(ssc->invlist, start, end);
}
STATIC
void
S_ssc_cp_and(pTHX_ regnode_ssc *ssc,
const
UV cp)
{
SV* cp_list = _new_invlist(2);
PERL_ARGS_ASSERT_SSC_CP_AND;
assert
(is_ANYOF_SYNTHETIC(ssc));
cp_list = add_cp_to_invlist(cp_list, cp);
ssc_intersection(ssc, cp_list,
FALSE
);
SvREFCNT_dec_NN(cp_list);
}
STATIC
void
S_ssc_clear_locale(regnode_ssc *ssc)
{
PERL_ARGS_ASSERT_SSC_CLEAR_LOCALE;
assert
(is_ANYOF_SYNTHETIC(ssc));
ANYOF_POSIXL_ZERO(ssc);
ANYOF_FLAGS(ssc) &= ~ANYOF_LOCALE_FLAGS;
}
U32
Perl_join_exact(pTHX_ RExC_state_t *pRExC_state, regnode *scan,
UV *min_subtract,
bool
*unfolded_multi_char,
U32 flags, regnode *val, U32 depth)
{
regnode *n = regnext(scan);
U32 stringok = 1;
regnode *next = REGNODE_AFTER_varies(scan);
U32 stopnow = 0;
#ifdef DEBUGGING
U32 merged = 0;
regnode *stop = scan;
DECLARE_AND_GET_RE_DEBUG_FLAGS;
#else
PERL_UNUSED_ARG(depth);
#endif
PERL_ARGS_ASSERT_JOIN_EXACT;
#ifndef EXPERIMENTAL_INPLACESCAN
PERL_UNUSED_ARG(flags);
PERL_UNUSED_ARG(val);
#endif
DEBUG_PEEP(
"join"
, scan, depth, 0);
assert
(REGNODE_TYPE(OP(scan)) == EXACT);
while
( n
&& ( REGNODE_TYPE(OP(n)) == NOTHING
|| (stringok && REGNODE_TYPE(OP(n)) == EXACT))
&& NEXT_OFF(n)
&& NEXT_OFF(scan) + NEXT_OFF(n) < I16_MAX)
{
if
(OP(n) == TAIL || n > next)
stringok = 0;
if
(REGNODE_TYPE(OP(n)) == NOTHING) {
DEBUG_PEEP(
"skip:"
, n, depth, 0);
NEXT_OFF(scan) += NEXT_OFF(n);
next = n + NODE_STEP_REGNODE;
#ifdef DEBUGGING
if
(stringok)
stop = n;
#endif
n = regnext(n);
}
else
if
(stringok) {
const
unsigned
int
oldl = STR_LEN(scan);
regnode *
const
nnext = regnext(n);
if
(oldl + STR_LEN(n) > U8_MAX)
break
;
if
(OP(scan) == EXACT && (OP(n) == EXACT_REQ8)) {
OP(scan) = EXACT_REQ8;
}
else
if
(OP(scan) == EXACT_REQ8 && (OP(n) == EXACT)) {
;
}
else
if
((OP(scan) == EXACTFU) && (OP(n) == EXACTFU_REQ8)) {
OP(scan) = EXACTFU_REQ8;
}
else
if
((OP(scan) == EXACTFU_REQ8) && (OP(n) == EXACTFU)) {
;
}
else
if
(OP(scan) == EXACTFU && OP(n) == EXACTFU) {
;
}
else
if
(OP(scan) == EXACTFU && OP(n) == EXACTFU_S_EDGE) {
if
(STRING(n)[STR_LEN(n)-1] ==
's'
) {
if
(OP(nnext) == EXACTF) {
break
;
}
OP(scan) = EXACTFU_S_EDGE;
}
}
else
if
(OP(scan) == EXACTF && OP(n) == EXACTF) {
;
}
else
if
(OP(scan) == EXACTF && OP(n) == EXACTFU_S_EDGE) {
if
(OP(nnext) == EXACTFU) {
break
;
}
}
else
if
(OP(scan) == EXACTFU_S_EDGE && OP(n) == EXACTFU_S_EDGE) {
if
( STRING(scan)[STR_LEN(scan)-1] ==
's'
&& STRING(n)[0] ==
's'
)
{
OP(scan) = EXACTF;
}
}
else
if
(OP(scan) == EXACTFU_S_EDGE && OP(n) == EXACTFU) {
if
(STRING(n)[0] ==
's'
) {
;
}
else
{
OP(scan) = EXACTFU;
}
}
else
if
(OP(scan) == EXACTFU_S_EDGE && OP(n) == EXACTF) {
OP(scan) = EXACTF;
}
else
if
(OP(scan) != OP(n)) {
break
;
}
DEBUG_PEEP(
"merg"
, n, depth, 0);
#ifdef DEBUGGING
merged++;
#endif
next = REGNODE_AFTER_varies(n);
NEXT_OFF(scan) += NEXT_OFF(n);
assert
( ( STR_LEN(scan) + STR_LEN(n) ) < 256 );
setSTR_LEN(scan, (U8)(STR_LEN(scan) + STR_LEN(n)));
Move(STRING(n), STRING(scan) + oldl, STR_LEN(n),
char
);
#ifdef DEBUGGING
stop = next - 1;
#endif
n = nnext;
if
(stopnow)
break
;
}
#ifdef EXPERIMENTAL_INPLACESCAN
if
(flags && !NEXT_OFF(n)) {
DEBUG_PEEP(
"atch"
, val, depth, 0);
if
(REGNODE_OFF_BY_ARG(OP(n))) {
ARG1u_SET(n, val - n);
}
else
{
NEXT_OFF(n) = val - n;
}
stopnow = 1;
}
#endif
}
if
(OP(scan) == EXACTFU_S_EDGE) {
OP(scan) = EXACTFU;
}
*min_subtract = 0;
*unfolded_multi_char = FALSE;
if
(OP(scan) != EXACT && OP(scan) != EXACT_REQ8 && OP(scan) != EXACTL) {
U8* s0 = (U8*) STRING(scan);
U8* s = s0;
U8* s_end = s0 + STR_LEN(scan);
int
total_count_delta = 0;
if
(UTF) {
U8* folded = NULL;
if
(OP(scan) == EXACTFL) {
U8 *d;
Newx(folded, UTF8_MAX_FOLD_CHAR_EXPAND * STR_LEN(scan) + 1, U8);
d = folded;
while
(s < s_end) {
STRLEN s_len = UTF8SKIP(s);
if
(! is_PROBLEMATIC_LOCALE_FOLD_utf8(s)) {
Copy(s, d, s_len, U8);
d += s_len;
}
else
if
(is_FOLDS_TO_MULTI_utf8(s)) {
*unfolded_multi_char = TRUE;
Copy(s, d, s_len, U8);
d += s_len;
}
else
if
(isASCII(*s)) {
*(d++) = toFOLD(*s);
}
else
{
STRLEN len;
_toFOLD_utf8_flags(s, s_end, d, &len, FOLD_FLAGS_FULL);
d += len;
}
s += s_len;
}
s = folded;
s_end = d;
}
while
(s < s_end - 1)
{
int
count = 0;
int
len = is_MULTI_CHAR_FOLD_utf8_safe(s, s_end);
if
(! len) {
s += UTF8SKIP(s);
continue
;
}
{
U8* multi_end = s + len;
if
(OP(scan) != EXACTFAA && OP(scan) != EXACTFAA_NO_TRIE) {
count = utf8_length(s, multi_end);
s = multi_end;
}
else
{
while
(s < multi_end) {
if
(isASCII(*s)) {
s++;
goto
next_iteration;
}
else
{
s += UTF8SKIP(s);
}
count++;
}
}
}
total_count_delta += count - 1;
next_iteration: ;
}
if
(OP(scan) == EXACTFL) {
int
total_chars = utf8_length((U8*) STRING(scan),
(U8*) STRING(scan) + STR_LEN(scan));
if
(total_count_delta > total_chars) {
total_count_delta = total_chars;
}
}
*min_subtract += total_count_delta;
Safefree(folded);
}
else
if
(OP(scan) == EXACTFAA) {
#if UNICODE_MAJOR_VERSION > 3 /* no multifolds in early Unicode */ \
|| (UNICODE_MAJOR_VERSION == 3 && ( UNICODE_DOT_VERSION > 0) \
|| UNICODE_DOT_DOT_VERSION > 0)
while
(s < s_end) {
if
(*s == LATIN_SMALL_LETTER_SHARP_S) {
OP(scan) = EXACTFAA_NO_TRIE;
*unfolded_multi_char = TRUE;
break
;
}
s++;
}
}
else
if
(OP(scan) != EXACTFAA_NO_TRIE) {
const
U8* upper = (OP(scan) == EXACTF || OP(scan) == EXACTFL)
? s_end
: s_end -1;
while
(s < upper) {
int
len = is_MULTI_CHAR_FOLD_latin1_safe(s, s_end);
if
(! len) {
if
(*s == LATIN_SMALL_LETTER_SHARP_S
&& (OP(scan) == EXACTF || OP(scan) == EXACTFL))
{
*unfolded_multi_char = TRUE;
}
s++;
continue
;
}
if
(len == 2
&& isALPHA_FOLD_EQ(*s,
's'
)
&& isALPHA_FOLD_EQ(*(s+1),
's'
))
{
if
(OP(scan) != EXACTF && OP(scan) != EXACTFL) {
OP(scan) = EXACTFUP;
}
}
*min_subtract += len - 1;
s += len;
}
#endif
}
}
#ifdef DEBUGGING
n = REGNODE_AFTER_varies(scan);
while
(n <= stop) {
OP(n) = OPTIMIZED;
FLAGS(n) = 0;
NEXT_OFF(n) = 0;
n++;
}
#endif
DEBUG_OPTIMISE_r(
if
(merged){DEBUG_PEEP(
"finl"
, scan, depth, 0);});
return
stopnow;
}
SSize_t
Perl_study_chunk(pTHX_
RExC_state_t *pRExC_state,
regnode **scanp,
SSize_t *minlenp,
SSize_t *deltap,
regnode *last,
scan_data_t *data,
I32 stopparen,
U32 recursed_depth,
regnode_ssc *and_withp,
U32 flags,
U32 depth,
bool
was_mutate_ok
)
{
regnode *scan = *scanp;
regnode *next = NULL;
regnode *first_non_open = scan;
I32 code = 0;
SSize_t min = 0;
SSize_t stopmin = OPTIMIZE_INFTY;
SSize_t delta = 0;
I32 pars = 0;
I32 is_par = OP(scan) == OPEN ? PARNO(scan) : 0;
int
is_inf = (flags & SCF_DO_SUBSTR) && (data->flags & SF_IS_INF);
int
is_inf_internal = 0;
scan_data_t data_fake;
SV *re_trie_maxbuff = NULL;
scan_frame *frame = NULL;
DECLARE_AND_GET_RE_DEBUG_FLAGS;
PERL_ARGS_ASSERT_STUDY_CHUNK;
RExC_study_started= 1;
Zero(&data_fake, 1, scan_data_t);
if
( depth == 0 ) {
while
(first_non_open && OP(first_non_open) == OPEN)
first_non_open=regnext(first_non_open);
}
fake_study_recurse:
DEBUG_r(
RExC_study_chunk_recursed_count++;
);
DEBUG_OPTIMISE_MORE_r(
{
Perl_re_indentf( aTHX_
"study_chunk stopparen=%ld recursed_count=%lu depth=%lu recursed_depth=%lu scan=%p last=%p"
,
depth, (
long
)stopparen,
(unsigned
long
)RExC_study_chunk_recursed_count,
(unsigned
long
)depth, (unsigned
long
)recursed_depth,
scan,
last);
if
(recursed_depth) {
U32 i;
U32 j;
for
( j = 0 ; j < recursed_depth ; j++ ) {
for
( i = 0 ; i < (U32)RExC_total_parens ; i++ ) {
if
(PAREN_TEST(j, i) && (!j || !PAREN_TEST(j - 1, i))) {
Perl_re_printf( aTHX_
" %d"
,(
int
)i);
break
;
}
}
if
( j + 1 < recursed_depth ) {
Perl_re_printf( aTHX_
","
);
}
}
}
Perl_re_printf( aTHX_
"\n"
);
}
);
while
( scan && OP(scan) != END && scan < last ){
UV min_subtract = 0;
bool
unfolded_multi_char = FALSE;
bool
mutate_ok = was_mutate_ok && !(frame && frame->in_gosub);
DEBUG_STUDYDATA(
"Peep"
, data, depth, is_inf, min, stopmin, delta);
DEBUG_PEEP(
"Peep"
, scan, depth, flags);
if
(REGNODE_TYPE(OP(scan)) == EXACT
&& OP(scan) != LEXACT
&& OP(scan) != LEXACT_REQ8
&& mutate_ok
) {
join_exact(pRExC_state, scan, &min_subtract, &unfolded_multi_char,
0, NULL, depth + 1);
}
rck_elide_nothing(scan);
if
( OP(scan) == DEFINEP ) {
SSize_t minlen = 0;
SSize_t deltanext = 0;
SSize_t fake_last_close = 0;
regnode *fake_last_close_op = NULL;
U32 f = SCF_IN_DEFINE | (flags & SCF_TRIE_DOING_RESTUDY);
StructCopy(&zero_scan_data, &data_fake, scan_data_t);
scan = regnext(scan);
assert
( OP(scan) == IFTHEN );
DEBUG_PEEP(
"expect IFTHEN"
, scan, depth, flags);
data_fake.last_closep= &fake_last_close;
data_fake.last_close_opp= &fake_last_close_op;
minlen = *minlenp;
next = regnext(scan);
scan = REGNODE_AFTER_type(scan,tregnode_IFTHEN);
DEBUG_PEEP(
"scan"
, scan, depth, flags);
DEBUG_PEEP(
"next"
, next, depth, flags);
(
void
)study_chunk(pRExC_state, &scan, &minlen,
&deltanext, next, &data_fake, stopparen,
recursed_depth, NULL, f, depth+1, mutate_ok);
scan = next;
}
else
if
(
OP(scan) == BRANCH ||
OP(scan) == BRANCHJ ||
OP(scan) == IFTHEN
) {
next = regnext(scan);
code = OP(scan);
if
(OP(next) == code || code == IFTHEN) {
SSize_t max1 = 0, min1 = OPTIMIZE_INFTY, num = 0;
regnode_ssc accum;
regnode *
const
startbranch=scan;
if
(flags & SCF_DO_SUBSTR) {
scan_commit(pRExC_state, data, minlenp, is_inf);
}
if
(flags & SCF_DO_STCLASS)
ssc_init_zero(pRExC_state, &accum);
while
(OP(scan) == code) {
SSize_t deltanext, minnext, fake_last_close = 0;
regnode *fake_last_close_op = NULL;
U32 f = (flags & SCF_TRIE_DOING_RESTUDY);
regnode_ssc this_class;
DEBUG_PEEP(
"Branch"
, scan, depth, flags);
num++;
StructCopy(&zero_scan_data, &data_fake, scan_data_t);
if
(data) {
data_fake.whilem_c = data->whilem_c;
data_fake.last_closep = data->last_closep;
data_fake.last_close_opp = data->last_close_opp;
}
else
{
data_fake.last_closep = &fake_last_close;
data_fake.last_close_opp = &fake_last_close_op;
}
data_fake.pos_delta = delta;
next = regnext(scan);
scan = REGNODE_AFTER_opcode(scan, code);
if
(flags & SCF_DO_STCLASS) {
ssc_init(pRExC_state, &this_class);
data_fake.start_class = &this_class;
f |= SCF_DO_STCLASS_AND;
}
if
(flags & SCF_WHILEM_VISITED_POS)
f |= SCF_WHILEM_VISITED_POS;
minnext = study_chunk(pRExC_state, &scan, minlenp,
&deltanext, next, &data_fake, stopparen,
recursed_depth, NULL, f, depth+1,
mutate_ok);
if
(min1 > minnext)
min1 = minnext;
if
(deltanext == OPTIMIZE_INFTY) {
is_inf = is_inf_internal = 1;
max1 = OPTIMIZE_INFTY;
}
else
if
(max1 < minnext + deltanext)
max1 = minnext + deltanext;
scan = next;
if
(data_fake.flags & (SF_HAS_PAR|SF_IN_PAR))
pars++;
if
(data_fake.flags & SCF_SEEN_ACCEPT) {
if
( stopmin > minnext)
stopmin = min + min1;
flags &= ~SCF_DO_SUBSTR;
if
(data)
data->flags |= SCF_SEEN_ACCEPT;
}
if
(data) {
if
(data_fake.flags & SF_HAS_EVAL)
data->flags |= SF_HAS_EVAL;
data->whilem_c = data_fake.whilem_c;
}
if
(flags & SCF_DO_STCLASS)
ssc_or(pRExC_state, &accum, (regnode_charclass*)&this_class);
DEBUG_STUDYDATA(
"end BRANCH"
, data, depth, is_inf, min, stopmin, delta);
}
if
(code == IFTHEN && num < 2)
min1 = 0;
if
(flags & SCF_DO_SUBSTR) {
data->pos_min += min1;
if
(data->pos_delta >= OPTIMIZE_INFTY - (max1 - min1))
data->pos_delta = OPTIMIZE_INFTY;
else
data->pos_delta += max1 - min1;
if
(max1 != min1 || is_inf)
data->cur_is_floating = 1;
}
min += min1;
if
(delta == OPTIMIZE_INFTY
|| OPTIMIZE_INFTY - delta - (max1 - min1) < 0)
delta = OPTIMIZE_INFTY;
else
delta += max1 - min1;
if
(flags & SCF_DO_STCLASS_OR) {
ssc_or(pRExC_state, data->start_class, (regnode_charclass*) &accum);
if
(min1) {
ssc_and(pRExC_state, data->start_class, (regnode_charclass *) and_withp);
flags &= ~SCF_DO_STCLASS;
}
}
else
if
(flags & SCF_DO_STCLASS_AND) {
if
(min1) {
ssc_and(pRExC_state, data->start_class, (regnode_charclass *) &accum);
flags &= ~SCF_DO_STCLASS;
}
else
{
INIT_AND_WITHP;
StructCopy(data->start_class, and_withp, regnode_ssc);
flags &= ~SCF_DO_STCLASS_AND;
StructCopy(&accum, data->start_class, regnode_ssc);
flags |= SCF_DO_STCLASS_OR;
}
}
DEBUG_STUDYDATA(
"pre TRIE"
, data, depth, is_inf, min, stopmin, delta);
if
(PERL_ENABLE_TRIE_OPTIMISATION
&& OP(startbranch) == BRANCH
&& mutate_ok
) {
int
made=0;
if
(!re_trie_maxbuff) {
re_trie_maxbuff = get_sv(RE_TRIE_MAXBUF_NAME, 1);
if
(!SvIOK(re_trie_maxbuff))
sv_setiv(re_trie_maxbuff, RE_TRIE_MAXBUF_INIT);
}
if
( SvIV(re_trie_maxbuff)>=0 ) {
regnode *cur;
regnode *first = (regnode *)NULL;
regnode *prev = (regnode *)NULL;
regnode *tail = scan;
U8 trietype = 0;
U32 count=0;
while
( OP( tail ) == TAIL ) {
tail = regnext( tail );
}
DEBUG_TRIE_COMPILE_r({
regprop(RExC_rx, RExC_mysv, tail, NULL, pRExC_state);
Perl_re_indentf( aTHX_
"%s %"
UVuf
":%s\n"
,
depth+1,
"Looking for TRIE'able sequences. Tail node is "
,
(UV) REGNODE_OFFSET(tail),
SvPV_nolen_const( RExC_mysv )
);
});
#define TRIE_TYPE(X) ( ( NOTHING == (X) ) \
? NOTHING \
: ( EXACT == (X) || EXACT_REQ8 == (X) ) \
? EXACT \
: ( EXACTFU == (X) \
|| EXACTFU_REQ8 == (X) \
|| EXACTFUP == (X) ) \
? EXACTFU \
: ( EXACTFAA == (X) ) \
? EXACTFAA \
: ( EXACTL == (X) ) \
? EXACTL \
: ( EXACTFLU8 == (X) ) \
? EXACTFLU8 \
: 0 )
for
( cur = startbranch ; cur != scan ; cur = regnext( cur ) ) {
regnode *
const
noper = REGNODE_AFTER( cur );
U8 noper_type = OP( noper );
U8 noper_trietype = TRIE_TYPE( noper_type );
#if defined(DEBUGGING) || defined(NOJUMPTRIE)
regnode *
const
noper_next = regnext( noper );
U8 noper_next_type = (noper_next && noper_next < tail) ? OP(noper_next) : 0;
U8 noper_next_trietype = (noper_next && noper_next < tail) ? TRIE_TYPE( noper_next_type ) :0;
#endif
DEBUG_TRIE_COMPILE_r({
regprop(RExC_rx, RExC_mysv, cur, NULL, pRExC_state);
Perl_re_indentf( aTHX_
"- %d:%s (%d)"
,
depth+1,
REG_NODE_NUM(cur), SvPV_nolen_const( RExC_mysv ), REG_NODE_NUM(cur) );
regprop(RExC_rx, RExC_mysv, noper, NULL, pRExC_state);
Perl_re_printf( aTHX_
" -> %d:%s"
,
REG_NODE_NUM(noper), SvPV_nolen_const(RExC_mysv));
if
( noper_next ) {
regprop(RExC_rx, RExC_mysv, noper_next, NULL, pRExC_state);
Perl_re_printf( aTHX_
"\t=> %d:%s\t"
,
REG_NODE_NUM(noper_next), SvPV_nolen_const(RExC_mysv));
}
Perl_re_printf( aTHX_
"(First==%d,Last==%d,Cur==%d,tt==%s,ntt==%s,nntt==%s)\n"
,
REG_NODE_NUM(first), REG_NODE_NUM(prev), REG_NODE_NUM(cur),
REGNODE_NAME(trietype), REGNODE_NAME(noper_trietype), REGNODE_NAME(noper_next_trietype)
);
});
if
( noper_trietype
&&
(
( noper_trietype == NOTHING )
|| ( trietype == NOTHING )
|| ( trietype == noper_trietype )
)
#ifdef NOJUMPTRIE
&& noper_next >= tail
#endif
&& count < U16_MAX)
{
if
( !first ) {
first = cur;
if
( noper_trietype == NOTHING ) {
#if !defined(DEBUGGING) && !defined(NOJUMPTRIE)
regnode *
const
noper_next = regnext( noper );
U8 noper_next_type = (noper_next && noper_next < tail) ? OP(noper_next) : 0;
U8 noper_next_trietype = noper_next_type ? TRIE_TYPE( noper_next_type ) :0;
#endif
if
( noper_next_trietype ) {
trietype = noper_next_trietype;
}
else
if
(noper_next_type) {
first = NULL;
}
}
else
{
trietype = noper_trietype;
}
}
else
{
if
( trietype == NOTHING )
trietype = noper_trietype;
prev = cur;
}
if
(first)
count++;
}
else
{
if
( prev ) {
if
( trietype && trietype != NOTHING )
make_trie( pRExC_state,
startbranch, first, cur, tail,
count, trietype, depth+1 );
prev = NULL;
}
if
( noper_trietype
#ifdef NOJUMPTRIE
&& noper_next >= tail
#endif
){
count = 1;
first = cur;
trietype = noper_trietype;
}
else
if
(first) {
count = 0;
first = NULL;
trietype = 0;
}
}
}
DEBUG_TRIE_COMPILE_r({
regprop(RExC_rx, RExC_mysv, cur, NULL, pRExC_state);
Perl_re_indentf( aTHX_
"- %s (%d) <SCAN FINISHED> "
,
depth+1, SvPV_nolen_const( RExC_mysv ), REG_NODE_NUM(cur));
Perl_re_printf( aTHX_
"(First==%d, Last==%d, Cur==%d, tt==%s)\n"
,
REG_NODE_NUM(first), REG_NODE_NUM(prev), REG_NODE_NUM(cur),
REGNODE_NAME(trietype)
);
});
if
( prev && trietype ) {
if
( trietype != NOTHING ) {
made= make_trie( pRExC_state, startbranch,
first, scan, tail, count,
trietype, depth+1 );
#ifdef TRIE_STUDY_OPT
if
( ((made == MADE_EXACT_TRIE &&
startbranch == first)
|| ( first_non_open == first )) &&
depth==0 ) {
flags |= SCF_TRIE_RESTUDY;
if
( startbranch == first
&& scan >= tail )
{
RExC_seen &=~REG_TOP_LEVEL_BRANCHES_SEEN;
}
}
#endif
}
else
{
if
( startbranch == first ) {
regnode *opt;
DEBUG_TRIE_COMPILE_r({
regprop(RExC_rx, RExC_mysv, cur, NULL, pRExC_state);
Perl_re_indentf( aTHX_
"- %s (%d) <NOTHING BRANCH SEQUENCE>\n"
,
depth+1,
SvPV_nolen_const( RExC_mysv ), REG_NODE_NUM(cur));
});
OP(startbranch)= NOTHING;
NEXT_OFF(startbranch)= tail - startbranch;
for
( opt= startbranch + 1; opt < tail ; opt++ )
OP(opt)= OPTIMIZED;
}
}
}
}
}
DEBUG_STUDYDATA(
"after TRIE"
, data, depth, is_inf, min, stopmin, delta);
}
else
scan = REGNODE_AFTER_opcode(scan,code);
continue
;
}
else
if
(OP(scan) == SUSPEND || OP(scan) == GOSUB) {
I32 paren = 0;
regnode *start = NULL;
regnode *end = NULL;
U32 my_recursed_depth= recursed_depth;
if
(OP(scan) != SUSPEND) {
paren = ARG1u(scan);
RExC_recurse[ARG2i(scan)] = scan;
start = REGNODE_p(RExC_open_parens[paren]);
end = REGNODE_p(RExC_close_parens[paren]);
if
(
( flags & SCF_IN_DEFINE )
||
(
(is_inf_internal || is_inf || (data && data->flags & SF_IS_INF))
&&
( (flags & (SCF_DO_STCLASS | SCF_DO_SUBSTR)) == 0 )
)
) {
is_inf = is_inf_internal = 1;
scan= regnext(scan);
continue
;
}
if
(
!recursed_depth
|| !PAREN_TEST(recursed_depth - 1, paren)
) {
if
(!recursed_depth) {
Zero(RExC_study_chunk_recursed, RExC_study_chunk_recursed_bytes, U8);
}
else
{
Copy(PAREN_OFFSET(recursed_depth - 1),
PAREN_OFFSET(recursed_depth),
RExC_study_chunk_recursed_bytes, U8);
}
DEBUG_STUDYDATA(
"gosub-set"
, data, depth, is_inf, min, stopmin, delta);
PAREN_SET(recursed_depth, paren);
my_recursed_depth= recursed_depth + 1;
}
else
{
DEBUG_STUDYDATA(
"gosub-inf"
, data, depth, is_inf, min, stopmin, delta);
if
(flags & SCF_DO_SUBSTR) {
scan_commit(pRExC_state, data, minlenp, is_inf);
data->cur_is_floating = 1;
}
is_inf = is_inf_internal = 1;
if
(flags & SCF_DO_STCLASS_OR)
ssc_anything(data->start_class);
flags &= ~SCF_DO_STCLASS;
start= NULL;
}
}
else
{
paren = stopparen;
start = scan + 2;
end = regnext(scan);
}
if
(start) {
scan_frame *newframe;
assert
(end);
if
(!RExC_frame_last) {
Newxz(newframe, 1, scan_frame);
SAVEDESTRUCTOR_X(S_unwind_scan_frames, newframe);
RExC_frame_head= newframe;
RExC_frame_count++;
}
else
if
(!RExC_frame_last->next_frame) {
Newxz(newframe, 1, scan_frame);
RExC_frame_last->next_frame= newframe;
newframe->prev_frame= RExC_frame_last;
RExC_frame_count++;
}
else
{
newframe= RExC_frame_last->next_frame;
}
RExC_frame_last= newframe;
newframe->next_regnode = regnext(scan);
newframe->last_regnode = last;
newframe->stopparen = stopparen;
newframe->prev_recursed_depth = recursed_depth;
newframe->this_prev_frame= frame;
newframe->in_gosub = (
(frame && frame->in_gosub) || OP(scan) == GOSUB
);
DEBUG_STUDYDATA(
"frame-new"
, data, depth, is_inf, min, stopmin, delta);
DEBUG_PEEP(
"fnew"
, scan, depth, flags);
frame = newframe;
scan = start;
stopparen = paren;
last = end;
depth = depth + 1;
recursed_depth= my_recursed_depth;
continue
;
}
}
else
if
(REGNODE_TYPE(OP(scan)) == EXACT && ! isEXACTFish(OP(scan))) {
SSize_t bytelen = STR_LEN(scan), charlen;
UV uc;
assert
(bytelen);
if
(UTF) {
const
U8 *
const
s = (U8*)STRING(scan);
uc = utf8_to_uvchr_buf(s, s + bytelen, NULL);
charlen = utf8_length(s, s + bytelen);
}
else
{
uc = *((U8*)STRING(scan));
charlen = bytelen;
}
min += charlen;
if
(flags & SCF_DO_SUBSTR) {
if
(data->last_end == -1) {
data->last_start_min = data->pos_min;
data->last_start_max =
is_inf ? OPTIMIZE_INFTY
: (data->pos_delta > OPTIMIZE_INFTY - data->pos_min)
? OPTIMIZE_INFTY : data->pos_min + data->pos_delta;
}
sv_catpvn(data->last_found, STRING(scan), bytelen);
if
(UTF)
SvUTF8_on(data->last_found);
{
SV *
const
sv = data->last_found;
MAGIC *
const
mg = SvUTF8(sv) && SvMAGICAL(sv) ?
mg_find(sv, PERL_MAGIC_utf8) : NULL;
if
(mg && mg->mg_len >= 0)
mg->mg_len += charlen;
}
data->last_end = data->pos_min + charlen;
data->pos_min += charlen;
data->flags &= ~SF_BEFORE_EOL;
}
if
(flags & SCF_DO_STCLASS_AND) {
ssc_cp_and(data->start_class, uc);
ANYOF_FLAGS(data->start_class) &= ~SSC_MATCHES_EMPTY_STRING;
ssc_clear_locale(data->start_class);
}
else
if
(flags & SCF_DO_STCLASS_OR) {
ssc_add_cp(data->start_class, uc);
ssc_and(pRExC_state, data->start_class, (regnode_charclass *) and_withp);
ANYOF_FLAGS(data->start_class) &= ~SSC_MATCHES_EMPTY_STRING;
}
flags &= ~SCF_DO_STCLASS;
DEBUG_STUDYDATA(
"end EXACT"
, data, depth, is_inf, min, stopmin, delta);
}
else
if
(REGNODE_TYPE(OP(scan)) == EXACT) {
SSize_t bytelen = STR_LEN(scan), charlen;
const
U8 * s = (U8*)STRING(scan);
if
( bytelen == 1
&& isALPHA_A(*s)
&& ( OP(scan) == EXACTFAA
|| ( OP(scan) == EXACTFU
&& ! HAS_NONLATIN1_SIMPLE_FOLD_CLOSURE(*s)))
&& mutate_ok
) {
U8 mask = ~ (
'A'
^
'a'
);
OP(scan) = ANYOFM;
ARG1u_SET(scan, *s & mask);
FLAGS(scan) = mask;
continue
;
}
if
(flags & SCF_DO_SUBSTR) {
assert
(data);
scan_commit(pRExC_state, data, minlenp, is_inf);
}
charlen = UTF ? (SSize_t) utf8_length(s, s + bytelen) : bytelen;
if
(unfolded_multi_char) {
RExC_seen |= REG_UNFOLDED_MULTI_SEEN;
}
min += charlen - min_subtract;
assert
(min >= 0);
if
((SSize_t)min_subtract < OPTIMIZE_INFTY
&& delta < OPTIMIZE_INFTY - (SSize_t)min_subtract
) {
delta += min_subtract;
}
else
{
delta = OPTIMIZE_INFTY;
}
if
(flags & SCF_DO_SUBSTR) {
data->pos_min += charlen - min_subtract;
if
(data->pos_min < 0) {
data->pos_min = 0;
}
if
((SSize_t)min_subtract < OPTIMIZE_INFTY
&& data->pos_delta < OPTIMIZE_INFTY - (SSize_t)min_subtract
) {
data->pos_delta += min_subtract;
}
else
{
data->pos_delta = OPTIMIZE_INFTY;
}
if
(min_subtract) {
data->cur_is_floating = 1;
}
}
if
(flags & SCF_DO_STCLASS) {
SV* EXACTF_invlist = make_exactf_invlist(pRExC_state, scan);
assert
(EXACTF_invlist);
if
(flags & SCF_DO_STCLASS_AND) {
if
(OP(scan) != EXACTFL)
ssc_clear_locale(data->start_class);
ANYOF_FLAGS(data->start_class) &= ~SSC_MATCHES_EMPTY_STRING;
ANYOF_POSIXL_ZERO(data->start_class);
ssc_intersection(data->start_class, EXACTF_invlist, FALSE);
}
else
{
ssc_union(data->start_class, EXACTF_invlist, FALSE);
ssc_and(pRExC_state, data->start_class, (regnode_charclass *) and_withp);
ANYOF_FLAGS(data->start_class) &= ~SSC_MATCHES_EMPTY_STRING;
}
flags &= ~SCF_DO_STCLASS;
SvREFCNT_dec(EXACTF_invlist);
}
DEBUG_STUDYDATA(
"end EXACTish"
, data, depth, is_inf, min, stopmin, delta);
}
else
if
(REGNODE_VARIES(OP(scan))) {
SSize_t mincount, maxcount, minnext, deltanext, pos_before = 0;
I32 fl = 0;
U32 f = flags;
regnode *
const
oscan = scan;
regnode_ssc this_class;
regnode_ssc *oclass = NULL;
I32 next_is_eval = 0;
switch
(REGNODE_TYPE(OP(scan))) {
case
WHILEM:
scan = REGNODE_AFTER(scan);
goto
finish;
case
PLUS:
if
(flags & (SCF_DO_SUBSTR | SCF_DO_STCLASS)) {
next = REGNODE_AFTER(scan);
if
( ( REGNODE_TYPE(OP(next)) == EXACT
&& ! isEXACTFish(OP(next)))
|| (flags & SCF_DO_STCLASS))
{
mincount = 1;
maxcount = REG_INFTY;
next = regnext(scan);
scan = REGNODE_AFTER(scan);
goto
do_curly;
}
}
if
(flags & SCF_DO_SUBSTR)
data->pos_min++;
min++;
case
STAR:
next = REGNODE_AFTER(scan);
if
(OP(next) == EXACTFU_S_EDGE && mutate_ok) {
OP(next) = EXACTFU;
}
if
( STR_LEN(next) == 1
&& isALPHA_A(* STRING(next))
&& ( OP(next) == EXACTFAA
|| ( OP(next) == EXACTFU
&& ! HAS_NONLATIN1_SIMPLE_FOLD_CLOSURE(* STRING(next))))
&& mutate_ok
) {
U8 mask = ~ (
'A'
^
'a'
);
assert
(isALPHA_A(* STRING(next)));
OP(next) = ANYOFM;
ARG1u_SET(next, *STRING(next) & mask);
FLAGS(next) = mask;
}
if
(flags & SCF_DO_STCLASS) {
mincount = 0;
maxcount = REG_INFTY;
next = regnext(scan);
scan = REGNODE_AFTER(scan);
goto
do_curly;
}
if
(flags & SCF_DO_SUBSTR) {
scan_commit(pRExC_state, data, minlenp, is_inf);
data->cur_is_floating = 1;
}
is_inf = is_inf_internal = 1;
scan = regnext(scan);
goto
optimize_curly_tail;
case
CURLY:
if
(stopparen>0 && (OP(scan)==CURLYN || OP(scan)==CURLYM)
&& (FLAGS(scan) == stopparen))
{
mincount = 1;
maxcount = 1;
}
else
{
mincount = ARG1i(scan);
maxcount = ARG2i(scan);
}
next = regnext(scan);
if
(OP(scan) == CURLYX) {
I32 lp = (data ? *(data->last_closep) : 0);
FLAGS(scan) = ((lp <= (I32)U8_MAX) ? (U8)lp : U8_MAX);
}
scan = REGNODE_AFTER(scan);
next_is_eval = (OP(scan) == EVAL);
do_curly:
if
(flags & SCF_DO_SUBSTR) {
if
(mincount == 0)
scan_commit(pRExC_state, data, minlenp, is_inf);
pos_before = data->pos_min;
}
if
(data) {
fl = data->flags;
data->flags &= ~(SF_HAS_PAR|SF_IN_PAR|SF_HAS_EVAL);
if
(is_inf)
data->flags |= SF_IS_INF;
}
if
(flags & SCF_DO_STCLASS) {
ssc_init(pRExC_state, &this_class);
oclass = data->start_class;
data->start_class = &this_class;
f |= SCF_DO_STCLASS_AND;
f &= ~SCF_DO_STCLASS_OR;
}
if
((mincount > 1) || (maxcount > 1 && maxcount != REG_INFTY))
f &= ~SCF_WHILEM_VISITED_POS;
minnext = study_chunk(pRExC_state, &scan, minlenp, &deltanext,
last, data, stopparen, recursed_depth, NULL,
(mincount == 0
? (f & ~SCF_DO_SUBSTR)
: f)
, depth+1, mutate_ok);
if
(data && data->flags & SCF_SEEN_ACCEPT) {
if
(mincount > 1)
mincount = 1;
}
if
(flags & SCF_DO_STCLASS)
data->start_class = oclass;
if
(mincount == 0 || minnext == 0) {
if
(flags & SCF_DO_STCLASS_OR) {
ssc_or(pRExC_state, data->start_class, (regnode_charclass *) &this_class);
}
else
if
(flags & SCF_DO_STCLASS_AND) {
INIT_AND_WITHP;
StructCopy(data->start_class, and_withp, regnode_ssc);
flags &= ~SCF_DO_STCLASS_AND;
StructCopy(&this_class, data->start_class, regnode_ssc);
flags |= SCF_DO_STCLASS_OR;
ANYOF_FLAGS(data->start_class)
|= SSC_MATCHES_EMPTY_STRING;
}
}
else
{
if
(flags & SCF_DO_STCLASS_OR) {
ssc_or(pRExC_state, data->start_class, (regnode_charclass *) &this_class);
ssc_and(pRExC_state, data->start_class, (regnode_charclass *) and_withp);
}
else
if
(flags & SCF_DO_STCLASS_AND)
ssc_and(pRExC_state, data->start_class, (regnode_charclass *) &this_class);
flags &= ~SCF_DO_STCLASS;
}
if
(!scan)
scan = next;
if
(((flags & (SCF_TRIE_DOING_RESTUDY|SCF_DO_SUBSTR))==SCF_DO_SUBSTR)
&& (next_is_eval || !(mincount == 0 && maxcount == 1))
&& (minnext == 0) && (deltanext == 0)
&& data && !(data->flags & (SF_HAS_PAR|SF_IN_PAR))
&& maxcount <= REG_INFTY/3)
{
_WARN_HELPER(RExC_precomp_end, packWARN(WARN_REGEXP),
Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP),
"Quantifier unexpected on zero-length expression "
"in regex m/%"
UTF8f
"/"
,
UTF8fARG(UTF, RExC_precomp_end - RExC_precomp,
RExC_precomp)));
}
if
( ( minnext > 0 && mincount >= SSize_t_MAX / minnext )
|| min >= SSize_t_MAX - minnext * mincount )
{
FAIL(
"Regexp out of space"
);
}
min += minnext * mincount;
is_inf_internal |= deltanext == OPTIMIZE_INFTY
|| (maxcount == REG_INFTY && minnext + deltanext > 0);
is_inf |= is_inf_internal;
if
(is_inf) {
delta = OPTIMIZE_INFTY;
}
else
{
delta += (minnext + deltanext) * maxcount
- minnext * mincount;
}
if
(data && data->flags & SCF_SEEN_ACCEPT) {
if
(flags & SCF_DO_SUBSTR) {
scan_commit(pRExC_state, data, minlenp, is_inf);
flags &= ~SCF_DO_SUBSTR;
}
if
(stopmin > min)
stopmin = min;
DEBUG_STUDYDATA(
"after-whilem accept"
, data, depth, is_inf, min, stopmin, delta);
}
DEBUG_STUDYDATA(
"PRE CURLYX_TO_CURLYN"
, data, depth, is_inf, min, stopmin, delta);
if
( RE_OPTIMIZE_CURLYX_TO_CURLYN
&& OP(oscan) == CURLYX
&& data
&& !(RExC_seen & REG_PESSIMIZE_SEEN)
&& ( data->flags & SF_IN_PAR )
&& !deltanext
&& minnext == 1
&& mutate_ok
) {
DEBUG_STUDYDATA(
"CURLYX_TO_CURLYN"
, data, depth, is_inf, min, stopmin, delta);
regnode *nxt = REGNODE_AFTER_type(oscan, tregnode_CURLYX);
regnode *
const
nxt1 = nxt;
#ifdef DEBUGGING
regnode *nxt2;
#endif
nxt = regnext(nxt);
if
(!REGNODE_SIMPLE(OP(nxt))
&& !(REGNODE_TYPE(OP(nxt)) == EXACT
&& STR_LEN(nxt) == 1))
goto
nogo;
#ifdef DEBUGGING
nxt2 = nxt;
#endif
nxt = regnext(nxt);
if
(OP(nxt) != CLOSE)
goto
nogo;
if
(RExC_open_parens) {
RExC_open_parens[PARNO(nxt1)] = REGNODE_OFFSET(oscan);
RExC_close_parens[PARNO(nxt1)] = REGNODE_OFFSET(nxt) + 2;
}
FLAGS(oscan) = (U8)PARNO(nxt);
OP(oscan) = CURLYN;
OP(nxt1) = NOTHING;
#ifdef DEBUGGING
OP(nxt1 + 1) = OPTIMIZED;
NEXT_OFF(nxt1+ 1) = 0;
NEXT_OFF(nxt2) = 0;
OP(nxt) = OPTIMIZED;
OP(nxt + 1) = OPTIMIZED;
NEXT_OFF(nxt+ 1) = 0;
#endif
}
nogo:
DEBUG_STUDYDATA(
"PRE CURLYX_TO_CURLYM"
, data, depth, is_inf, min, stopmin, delta);
if
( RE_OPTIMIZE_CURLYX_TO_CURLYM
&& OP(oscan) == CURLYX
&& data
&& !(RExC_seen & REG_PESSIMIZE_SEEN)
&& !(data->flags & SF_HAS_PAR)
&& !deltanext
&& minnext != 0
&& !(RExC_seen & REG_UNFOLDED_MULTI_SEEN)
&& mutate_ok
) {
DEBUG_STUDYDATA(
"CURLYX_TO_CURLYM"
, data, depth, is_inf, min, stopmin, delta);
regnode *nxt = REGNODE_AFTER_type(oscan, tregnode_CURLYX);
regnode *nxt2;
OP(oscan) = CURLYM;
while
( (nxt2 = regnext(nxt))
&& (OP(nxt2) != WHILEM))
nxt = nxt2;
OP(nxt2) = SUCCEED;
if
((data->flags & SF_IN_PAR) && OP(nxt) == CLOSE) {
regnode *nxt1 = REGNODE_AFTER_type(oscan, tregnode_CURLYM);
FLAGS(oscan) = (U8)PARNO(nxt);
if
(RExC_open_parens) {
RExC_open_parens[PARNO(nxt1)] = REGNODE_OFFSET(oscan);
RExC_close_parens[PARNO(nxt1)] = REGNODE_OFFSET(nxt2)
+ 1;
}
OP(nxt1) = OPTIMIZED;
OP(nxt) = OPTIMIZED;
#ifdef DEBUGGING
OP(nxt1 + 1) = OPTIMIZED;
OP(nxt + 1) = OPTIMIZED;
NEXT_OFF(nxt1 + 1) = 0;
NEXT_OFF(nxt + 1) = 0;
#endif
#if 0
while
( nxt1 && (OP(nxt1) != WHILEM)) {
regnode *nnxt = regnext(nxt1);
if
(nnxt == nxt) {
if
(REGNODE_OFF_BY_ARG(OP(nxt1)))
ARG1u_SET(nxt1, nxt2 - nxt1);
else
if
(nxt2 - nxt1 < U16_MAX)
NEXT_OFF(nxt1) = nxt2 - nxt1;
else
OP(nxt) = NOTHING;
}
nxt1 = nnxt;
}
#endif
study_chunk(pRExC_state, &nxt1, minlenp, &deltanext, nxt,
NULL, stopparen, recursed_depth, NULL, 0,
depth+1, mutate_ok);
}
else
FLAGS(oscan) = 0;
}
else
if
((OP(oscan) == CURLYX)
&& (flags & SCF_WHILEM_VISITED_POS)
&& (maxcount == REG_INFTY)
&& data) {
regnode *nxt = oscan + NEXT_OFF(oscan);
if
(OP(REGNODE_BEFORE(nxt)) == NOTHING)
nxt += ARG1u(nxt);
nxt = REGNODE_BEFORE(nxt);
if
(FLAGS(nxt) & 0xf) {
}
else
if
(++data->whilem_c < 16) {
assert
(data->whilem_c <= RExC_whilem_seen);
FLAGS(nxt) = (U8)(data->whilem_c
| (RExC_whilem_seen << 4));
}
}
if
(data && fl & (SF_HAS_PAR|SF_IN_PAR))
pars++;
if
(flags & SCF_DO_SUBSTR) {
SV *last_str = NULL;
STRLEN last_chrs = 0;
int
counted = mincount != 0;
if
(data->last_end > 0 && mincount != 0) {
SSize_t b = pos_before >= data->last_start_min
? pos_before : data->last_start_min;
STRLEN l;
const
char
*
const
s = SvPV_const(data->last_found, l);
SSize_t old = b - data->last_start_min;
assert
(old >= 0);
if
(UTF)
old = utf8_hop_forward((U8*)s, old,
(U8 *) SvEND(data->last_found))
- (U8*)s;
l -= old;
last_str = newSVpvn_utf8(s + old, l, UTF);
last_chrs = UTF ? utf8_length((U8*)(s + old),
(U8*)(s + old + l)) : l;
if
(deltanext == 0 && pos_before == b) {
if
(mincount > 1) {
SvGROW(last_str, (mincount * l) + 1);
repeatcpy(SvPVX(last_str) + l,
SvPVX_const(last_str), l,
mincount - 1);
SvCUR_set(last_str, SvCUR(last_str) * mincount);
SvCUR_set(data->last_found,
SvCUR(data->last_found) - l);
sv_catsv(data->last_found, last_str);
{
SV * sv = data->last_found;
MAGIC *mg =
SvUTF8(sv) && SvMAGICAL(sv) ?
mg_find(sv, PERL_MAGIC_utf8) : NULL;
if
(mg && mg->mg_len >= 0)
mg->mg_len += last_chrs * (mincount-1);
}
last_chrs *= mincount;
data->last_end += l * (mincount - 1);
}
}
else
{
data->last_start_min += minnext * (mincount - 1);
data->last_start_max =
is_inf
? OPTIMIZE_INFTY
: data->last_start_max +
(maxcount - 1) * (minnext + data->pos_delta);
}
}
data->pos_min += minnext * (mincount - counted);
#if 0
Perl_re_printf( aTHX_
"counted=%"
UVuf
" deltanext=%"
UVuf
" OPTIMIZE_INFTY=%"
UVuf
" minnext=%"
UVuf
" maxcount=%"
UVuf
" mincount=%"
UVuf
" data->pos_delta=%"
UVuf
"\n"
,
(UV)counted, (UV)deltanext, (UV)OPTIMIZE_INFTY, (UV)minnext,
(UV)maxcount, (UV)mincount, (UV)data->pos_delta);
if
(deltanext != OPTIMIZE_INFTY)
Perl_re_printf( aTHX_
"LHS=%"
UVuf
" RHS=%"
UVuf
"\n"
,
(UV)(-counted * deltanext + (minnext + deltanext) * maxcount
- minnext * mincount), (UV)(OPTIMIZE_INFTY - data->pos_delta));
#endif
if
(deltanext == OPTIMIZE_INFTY
|| data->pos_delta == OPTIMIZE_INFTY
|| -counted * deltanext + (minnext + deltanext) * maxcount - minnext * mincount >= OPTIMIZE_INFTY - data->pos_delta)
data->pos_delta = OPTIMIZE_INFTY;
else
data->pos_delta += - counted * deltanext +
(minnext + deltanext) * maxcount - minnext * mincount;
if
(mincount != maxcount) {
scan_commit(pRExC_state, data, minlenp, is_inf);
if
(mincount && last_str) {
SV *
const
sv = data->last_found;
MAGIC *
const
mg = SvUTF8(sv) && SvMAGICAL(sv) ?
mg_find(sv, PERL_MAGIC_utf8) : NULL;
if
(mg)
mg->mg_len = -1;
sv_setsv(sv, last_str);
data->last_end = data->pos_min;
data->last_start_min = data->pos_min - last_chrs;
data->last_start_max = is_inf
? OPTIMIZE_INFTY
: data->pos_min + data->pos_delta - last_chrs;
}
data->cur_is_floating = 1;
}
SvREFCNT_dec(last_str);
}
if
(data && (fl & SF_HAS_EVAL))
data->flags |= SF_HAS_EVAL;
optimize_curly_tail:
rck_elide_nothing(oscan);
continue
;
default
:
Perl_croak(aTHX_
"panic: unexpected varying REx opcode %d"
,
OP(scan));
case
REF:
case
CLUMP:
if
(flags & SCF_DO_SUBSTR) {
scan_commit(pRExC_state, data, minlenp, is_inf);
data->cur_is_floating = 1;
}
is_inf = is_inf_internal = 1;
if
(flags & SCF_DO_STCLASS_OR) {
if
(OP(scan) == CLUMP) {
ssc_match_all_cp(data->start_class);
}
else
{
ssc_anything(data->start_class);
}
}
flags &= ~SCF_DO_STCLASS;
break
;
}
}
else
if
(OP(scan) == LNBREAK) {
if
(flags & SCF_DO_STCLASS) {
if
(flags & SCF_DO_STCLASS_AND) {
ssc_intersection(data->start_class,
PL_XPosix_ptrs[CC_VERTSPACE_], FALSE);
ssc_clear_locale(data->start_class);
ANYOF_FLAGS(data->start_class)
&= ~SSC_MATCHES_EMPTY_STRING;
}
else
if
(flags & SCF_DO_STCLASS_OR) {
ssc_union(data->start_class,
PL_XPosix_ptrs[CC_VERTSPACE_],
FALSE);
ssc_and(pRExC_state, data->start_class, (regnode_charclass *) and_withp);
ANYOF_FLAGS(data->start_class)
&= ~SSC_MATCHES_EMPTY_STRING;
}
flags &= ~SCF_DO_STCLASS;
}
min++;
if
(delta != OPTIMIZE_INFTY)
delta++;
if
(flags & SCF_DO_SUBSTR) {
scan_commit(pRExC_state, data, minlenp, is_inf);
data->pos_min += 1;
if
(data->pos_delta != OPTIMIZE_INFTY) {
data->pos_delta += 1;
}
data->cur_is_floating = 1;
}
}
else
if
(REGNODE_SIMPLE(OP(scan))) {
if
(flags & SCF_DO_SUBSTR) {
scan_commit(pRExC_state, data, minlenp, is_inf);
data->pos_min++;
}
min++;
if
(flags & SCF_DO_STCLASS) {
bool
invert = 0;
SV* my_invlist = NULL;
U8 namedclass;
ANYOF_FLAGS(data->start_class) &= ~SSC_MATCHES_EMPTY_STRING;
switch
(OP(scan)) {
default
:
#ifdef DEBUGGING
Perl_croak(aTHX_
"panic: unexpected simple REx opcode %d"
,
OP(scan));
#endif
case
SANY:
if
(flags & SCF_DO_STCLASS_OR)
ssc_match_all_cp(data->start_class);
break
;
case
REG_ANY:
{
SV* REG_ANY_invlist = _new_invlist(2);
REG_ANY_invlist = add_cp_to_invlist(REG_ANY_invlist,
'\n'
);
if
(flags & SCF_DO_STCLASS_OR) {
ssc_union(data->start_class,
REG_ANY_invlist,
TRUE
);
}
else
if
(flags & SCF_DO_STCLASS_AND) {
ssc_intersection(data->start_class,
REG_ANY_invlist,
TRUE
);
ssc_clear_locale(data->start_class);
}
SvREFCNT_dec_NN(REG_ANY_invlist);
}
break
;
case
ANYOFD:
case
ANYOFL:
case
ANYOFPOSIXL:
case
ANYOFH:
case
ANYOFHb:
case
ANYOFHr:
case
ANYOFHs:
case
ANYOF:
if
(flags & SCF_DO_STCLASS_AND)
ssc_and(pRExC_state, data->start_class,
(regnode_charclass *) scan);
else
ssc_or(pRExC_state, data->start_class,
(regnode_charclass *) scan);
break
;
case
ANYOFHbbm:
{
SV* cp_list = get_ANYOFHbbm_contents(scan);
if
(flags & SCF_DO_STCLASS_OR) {
ssc_union(data->start_class, cp_list, invert);
}
else
if
(flags & SCF_DO_STCLASS_AND) {
ssc_intersection(data->start_class, cp_list, invert);
}
SvREFCNT_dec_NN(cp_list);
break
;
}
case
NANYOFM:
case
ANYOFM:
{
SV* cp_list = get_ANYOFM_contents(scan);
if
(flags & SCF_DO_STCLASS_OR) {
ssc_union(data->start_class, cp_list, invert);
}
else
if
(flags & SCF_DO_STCLASS_AND) {
ssc_intersection(data->start_class, cp_list, invert);
}
SvREFCNT_dec_NN(cp_list);
break
;
}
case
ANYOFR:
case
ANYOFRb:
{
SV* cp_list = NULL;
cp_list = _add_range_to_invlist(cp_list,
ANYOFRbase(scan),
ANYOFRbase(scan) + ANYOFRdelta(scan));
if
(flags & SCF_DO_STCLASS_OR) {
ssc_union(data->start_class, cp_list, invert);
}
else
if
(flags & SCF_DO_STCLASS_AND) {
ssc_intersection(data->start_class, cp_list, invert);
}
SvREFCNT_dec_NN(cp_list);
break
;
}
case
NPOSIXL:
invert = 1;
case
POSIXL:
namedclass = classnum_to_namedclass(FLAGS(scan)) + invert;
if
(flags & SCF_DO_STCLASS_AND) {
bool
was_there = cBOOL(
ANYOF_POSIXL_TEST(data->start_class,
namedclass));
ANYOF_POSIXL_ZERO(data->start_class);
if
(was_there) {
ANYOF_POSIXL_SET(data->start_class, namedclass);
}
data->start_class->invlist
= sv_2mortal(_new_invlist(0));
}
else
{
int
complement = namedclass + ((invert) ? -1 : 1);
assert
(flags & SCF_DO_STCLASS_OR);
if
(ANYOF_POSIXL_TEST(data->start_class, complement)) {
ssc_match_all_cp(data->start_class);
ANYOF_POSIXL_CLEAR(data->start_class, namedclass);
ANYOF_POSIXL_CLEAR(data->start_class, complement);
}
else
{
ANYOF_POSIXL_SET(data->start_class, namedclass);
}
}
break
;
case
NPOSIXA:
invert = 1;
case
POSIXA:
my_invlist = invlist_clone(PL_Posix_ptrs[FLAGS(scan)], NULL);
goto
join_posix_and_ascii;
case
NPOSIXD:
case
NPOSIXU:
invert = 1;
case
POSIXD:
case
POSIXU:
my_invlist = invlist_clone(PL_XPosix_ptrs[FLAGS(scan)], NULL);
if
(OP(scan) == NPOSIXD) {
_invlist_subtract(my_invlist, PL_UpperLatin1,
&my_invlist);
}
join_posix_and_ascii:
if
(flags & SCF_DO_STCLASS_AND) {
ssc_intersection(data->start_class, my_invlist, invert);
ssc_clear_locale(data->start_class);
}
else
{
assert
(flags & SCF_DO_STCLASS_OR);
ssc_union(data->start_class, my_invlist, invert);
}
SvREFCNT_dec(my_invlist);
}
if
(flags & SCF_DO_STCLASS_OR)
ssc_and(pRExC_state, data->start_class, (regnode_charclass *) and_withp);
flags &= ~SCF_DO_STCLASS;
}
}
else
if
(REGNODE_TYPE(OP(scan)) == EOL && flags & SCF_DO_SUBSTR) {
data->flags |= (OP(scan) == MEOL
? SF_BEFORE_MEOL
: SF_BEFORE_SEOL);
scan_commit(pRExC_state, data, minlenp, is_inf);
}
else
if
( REGNODE_TYPE(OP(scan)) == BRANCHJ
&& (FLAGS(scan) || data || (flags & SCF_DO_STCLASS))
&& (OP(scan) == IFMATCH || OP(scan) == UNLESSM))
{
if
( !PERL_ENABLE_POSITIVE_ASSERTION_STUDY
|| OP(scan) == UNLESSM )
{
bool
is_positive = OP(scan) == IFMATCH ? 1 : 0;
SSize_t deltanext, minnext;
SSize_t fake_last_close = 0;
regnode *fake_last_close_op = NULL;
regnode *cur_last_close_op;
regnode *nscan;
regnode_ssc intrnl;
U32 f = (flags & SCF_TRIE_DOING_RESTUDY);
StructCopy(&zero_scan_data, &data_fake, scan_data_t);
if
(data) {
data_fake.whilem_c = data->whilem_c;
data_fake.last_closep = data->last_closep;
data_fake.last_close_opp = data->last_close_opp;
}
else
{
data_fake.last_closep = &fake_last_close;
data_fake.last_close_opp = &fake_last_close_op;
}
cur_last_close_op= *(data_fake.last_close_opp);
data_fake.pos_delta = delta;
if
( flags & SCF_DO_STCLASS && !FLAGS(scan)
&& OP(scan) == IFMATCH ) {
ssc_init(pRExC_state, &intrnl);
data_fake.start_class = &intrnl;
f |= SCF_DO_STCLASS_AND;
}
if
(flags & SCF_WHILEM_VISITED_POS)
f |= SCF_WHILEM_VISITED_POS;
next = regnext(scan);
nscan = REGNODE_AFTER(scan);
minnext = study_chunk(pRExC_state, &nscan, minlenp, &deltanext,
last, &data_fake, stopparen,
recursed_depth, NULL, f, depth+1,
mutate_ok);
if
(FLAGS(scan)) {
if
( deltanext < 0
|| deltanext > (I32) U8_MAX
|| minnext > (I32)U8_MAX
|| minnext + deltanext > (I32)U8_MAX)
{
FAIL2(
"Lookbehind longer than %"
UVuf
" not implemented"
,
(UV)U8_MAX);
}
if
(deltanext) {
NEXT_OFF(scan) = deltanext;
if
(
cur_last_close_op != *(data_fake.last_close_opp)
&& !(flags & SCF_TRIE_DOING_RESTUDY)
) {
ckWARNexperimental_with_arg(RExC_parse,
WARN_EXPERIMENTAL__VLB,
"Variable length %s lookbehind with capturing is experimental"
,
is_positive ?
"positive"
:
"negative"
);
}
}
FLAGS(scan) = (U8)minnext + deltanext;
}
if
(data) {
if
(data_fake.flags & (SF_HAS_PAR|SF_IN_PAR))
pars++;
if
(data_fake.flags & SF_HAS_EVAL)
data->flags |= SF_HAS_EVAL;
data->whilem_c = data_fake.whilem_c;
}
if
(f & SCF_DO_STCLASS_AND) {
if
(flags & SCF_DO_STCLASS_OR) {
ssc_init(pRExC_state, data->start_class);
}
else
{
ssc_and(pRExC_state, data->start_class, (regnode_charclass *) &intrnl);
ANYOF_FLAGS(data->start_class)
|= SSC_MATCHES_EMPTY_STRING;
}
}
DEBUG_STUDYDATA(
"end LOOKAROUND"
, data, depth, is_inf, min, stopmin, delta);
}
#if PERL_ENABLE_POSITIVE_ASSERTION_STUDY
else
{
SSize_t deltanext, fake_last_close = 0;
regnode *last_close_op = NULL;
regnode *nscan;
regnode_ssc intrnl;
U32 f = (flags & SCF_TRIE_DOING_RESTUDY);
SSize_t *minnextp;
Newx( minnextp, 1, SSize_t );
SAVEFREEPV(minnextp);
if
(data) {
StructCopy(data, &data_fake, scan_data_t);
if
((flags & SCF_DO_SUBSTR) && data->last_found) {
f |= SCF_DO_SUBSTR;
if
(FLAGS(scan))
scan_commit(pRExC_state, &data_fake, minlenp, is_inf);
data_fake.last_found=newSVsv(data->last_found);
}
}
else
{
data_fake.last_closep = &fake_last_close;
data_fake.last_close_opp = &fake_last_close_opp;
}
data_fake.flags = 0;
data_fake.substrs[0].flags = 0;
data_fake.substrs[1].flags = 0;
data_fake.pos_delta = delta;
if
(is_inf)
data_fake.flags |= SF_IS_INF;
if
( flags & SCF_DO_STCLASS && !FLAGS(scan)
&& OP(scan) == IFMATCH ) {
ssc_init(pRExC_state, &intrnl);
data_fake.start_class = &intrnl;
f |= SCF_DO_STCLASS_AND;
}
if
(flags & SCF_WHILEM_VISITED_POS)
f |= SCF_WHILEM_VISITED_POS;
next = regnext(scan);
nscan = REGNODE_AFTER(scan);
*minnextp = study_chunk(pRExC_state, &nscan, minnextp,
&deltanext, last, &data_fake,
stopparen, recursed_depth, NULL,
f, depth+1, mutate_ok);
if
(FLAGS(scan)) {
assert
(0);
if
( deltanext < 0
|| deltanext > (I32) U8_MAX
|| *minnextp > (I32)U8_MAX
|| *minnextp + deltanext > (I32)U8_MAX)
{
FAIL2(
"Lookbehind longer than %"
UVuf
" not implemented"
,
(UV)U8_MAX);
}
if
(deltanext) {
NEXT_OFF(scan) = deltanext;
}
FLAGS(scan) = (U8)*minnextp + deltanext;
}
*minnextp += min;
if
(f & SCF_DO_STCLASS_AND) {
ssc_and(pRExC_state, data->start_class, (regnode_charclass *) &intrnl);
ANYOF_FLAGS(data->start_class) |= SSC_MATCHES_EMPTY_STRING;
}
if
(data) {
if
(data_fake.flags & (SF_HAS_PAR|SF_IN_PAR))
pars++;
if
(data_fake.flags & SF_HAS_EVAL)
data->flags |= SF_HAS_EVAL;
data->whilem_c = data_fake.whilem_c;
if
((flags & SCF_DO_SUBSTR) && data_fake.last_found) {
int
i;
if
(RExC_rx->minlen < *minnextp)
RExC_rx->minlen = *minnextp;
scan_commit(pRExC_state, &data_fake, minnextp, is_inf);
SvREFCNT_dec_NN(data_fake.last_found);
for
(i = 0; i < 2; i++) {
if
(data_fake.substrs[i].minlenp != minlenp) {
data->substrs[i].min_offset =
data_fake.substrs[i].min_offset;
data->substrs[i].max_offset =
data_fake.substrs[i].max_offset;
data->substrs[i].minlenp =
data_fake.substrs[i].minlenp;
data->substrs[i].lookbehind += FLAGS(scan);
}
}
}
}
}
#endif
}
else
if
(OP(scan) == OPEN) {
if
(stopparen != (I32)PARNO(scan))
pars++;
}
else
if
(OP(scan) == CLOSE) {
if
(stopparen == (I32)PARNO(scan)) {
break
;
}
if
((I32)PARNO(scan) == is_par) {
next = regnext(scan);
if
( next && (OP(next) != WHILEM) && next < last)
is_par = 0;
}
if
(data) {
*(data->last_closep) = PARNO(scan);
*(data->last_close_opp) = scan;
}
}
else
if
(OP(scan) == EVAL) {
if
(data && !(FLAGS(scan) & EVAL_OPTIMISTIC_FLAG) )
data->flags |= SF_HAS_EVAL;
}
else
if
( REGNODE_TYPE(OP(scan)) == ENDLIKE ) {
if
(flags & SCF_DO_SUBSTR) {
scan_commit(pRExC_state, data, minlenp, is_inf);
flags &= ~SCF_DO_SUBSTR;
}
if
(OP(scan)==ACCEPT) {
flags &= ~SCF_DO_STCLASS;
if
(data)
data->flags |= SCF_SEEN_ACCEPT;
if
(stopmin > min)
stopmin = min;
}
}
else
if
(OP(scan) == COMMIT) {
if
(flags & SCF_DO_SUBSTR) {
scan_commit(pRExC_state, data, minlenp, is_inf);
flags &= ~SCF_DO_SUBSTR;
}
}
else
if
(OP(scan) == LOGICAL && FLAGS(scan) == 2)
{
if
(flags & SCF_DO_SUBSTR) {
scan_commit(pRExC_state, data, minlenp, is_inf);
data->cur_is_floating = 1;
}
is_inf = is_inf_internal = 1;
if
(flags & SCF_DO_STCLASS_OR)
ssc_anything(data->start_class);
flags &= ~SCF_DO_STCLASS;
}
else
if
(OP(scan) == GPOS) {
if
(!(RExC_rx->intflags & PREGf_GPOS_FLOAT) &&
!(delta || is_inf || (data && data->pos_delta)))
{
if
(!(RExC_rx->intflags & PREGf_ANCH) && (flags & SCF_DO_SUBSTR))
RExC_rx->intflags |= PREGf_ANCH_GPOS;
if
(RExC_rx->gofs < (STRLEN)min)
RExC_rx->gofs = min;
}
else
{
RExC_rx->intflags |= PREGf_GPOS_FLOAT;
RExC_rx->gofs = 0;
}
}
#ifdef TRIE_STUDY_OPT
#ifdef FULL_TRIE_STUDY
else
if
(REGNODE_TYPE(OP(scan)) == TRIE) {
regnode *trie_node= scan;
regnode *tail= regnext(scan);
reg_trie_data *trie = (reg_trie_data*)RExC_rxi->data->data[ ARG1u(scan) ];
SSize_t max1 = 0, min1 = OPTIMIZE_INFTY;
regnode_ssc accum;
if
(flags & SCF_DO_SUBSTR) {
scan_commit(pRExC_state, data, minlenp, is_inf);
}
if
(flags & SCF_DO_STCLASS)
ssc_init_zero(pRExC_state, &accum);
if
(!trie->jump) {
min1= trie->minlen;
max1= trie->maxlen;
}
else
{
const
regnode *nextbranch= NULL;
U32 word;
for
( word=1 ; word <= trie->wordcount ; word++)
{
SSize_t deltanext = 0, minnext = 0;
U32 f = (flags & SCF_TRIE_DOING_RESTUDY);
SSize_t fake_last_close = 0;
regnode *fake_last_close_op = NULL;
regnode_ssc this_class;
StructCopy(&zero_scan_data, &data_fake, scan_data_t);
if
(data) {
data_fake.whilem_c = data->whilem_c;
data_fake.last_closep = data->last_closep;
data_fake.last_close_opp = data->last_close_opp;
}
else
{
data_fake.last_closep = &fake_last_close;
data_fake.last_close_opp = &fake_last_close_op;
}
data_fake.pos_delta = delta;
if
(flags & SCF_DO_STCLASS) {
ssc_init(pRExC_state, &this_class);
data_fake.start_class = &this_class;
f |= SCF_DO_STCLASS_AND;
}
if
(flags & SCF_WHILEM_VISITED_POS)
f |= SCF_WHILEM_VISITED_POS;
if
(trie->jump[word]) {
if
(!nextbranch)
nextbranch = trie_node + trie->jump[0];
scan= trie_node + trie->jump[word];
minnext = study_chunk(pRExC_state, &scan, minlenp,
&deltanext, (regnode *)nextbranch, &data_fake,
stopparen, recursed_depth, NULL, f, depth+1,
mutate_ok);
}
if
(nextbranch && REGNODE_TYPE(OP(nextbranch))==BRANCH)
nextbranch= regnext((regnode*)nextbranch);
if
(min1 > (SSize_t)(minnext + trie->minlen))
min1 = minnext + trie->minlen;
if
(deltanext == OPTIMIZE_INFTY) {
is_inf = is_inf_internal = 1;
max1 = OPTIMIZE_INFTY;
}
else
if
(max1 < (SSize_t)(minnext + deltanext + trie->maxlen))
max1 = minnext + deltanext + trie->maxlen;
if
(data_fake.flags & (SF_HAS_PAR|SF_IN_PAR))
pars++;
if
(data_fake.flags & SCF_SEEN_ACCEPT) {
if
( stopmin > min + min1)
stopmin = min + min1;
flags &= ~SCF_DO_SUBSTR;
if
(data)
data->flags |= SCF_SEEN_ACCEPT;
}
if
(data) {
if
(data_fake.flags & SF_HAS_EVAL)
data->flags |= SF_HAS_EVAL;
data->whilem_c = data_fake.whilem_c;
}
if
(flags & SCF_DO_STCLASS)
ssc_or(pRExC_state, &accum, (regnode_charclass *) &this_class);
}
DEBUG_STUDYDATA(
"after JUMPTRIE"
, data, depth, is_inf, min, stopmin, delta);
}
if
(flags & SCF_DO_SUBSTR) {
data->pos_min += min1;
data->pos_delta += max1 - min1;
if
(max1 != min1 || is_inf)
data->cur_is_floating = 1;
}
min += min1;
if
(delta != OPTIMIZE_INFTY) {
if
(OPTIMIZE_INFTY - (max1 - min1) >= delta)
delta += max1 - min1;
else
delta = OPTIMIZE_INFTY;
}
if
(flags & SCF_DO_STCLASS_OR) {
ssc_or(pRExC_state, data->start_class, (regnode_charclass *) &accum);
if
(min1) {
ssc_and(pRExC_state, data->start_class, (regnode_charclass *) and_withp);
flags &= ~SCF_DO_STCLASS;
}
}
else
if
(flags & SCF_DO_STCLASS_AND) {
if
(min1) {
ssc_and(pRExC_state, data->start_class, (regnode_charclass *) &accum);
flags &= ~SCF_DO_STCLASS;
}
else
{
INIT_AND_WITHP;
StructCopy(data->start_class, and_withp, regnode_ssc);
flags &= ~SCF_DO_STCLASS_AND;
StructCopy(&accum, data->start_class, regnode_ssc);
flags |= SCF_DO_STCLASS_OR;
}
}
scan= tail;
DEBUG_STUDYDATA(
"after TRIE study"
, data, depth, is_inf, min, stopmin, delta);
continue
;
}
#else
else
if
(REGNODE_TYPE(OP(scan)) == TRIE) {
reg_trie_data *trie = (reg_trie_data*)RExC_rxi->data->data[ ARG1u(scan) ];
U8*bang=NULL;
min += trie->minlen;
delta += (trie->maxlen - trie->minlen);
flags &= ~SCF_DO_STCLASS;
if
(flags & SCF_DO_SUBSTR) {
scan_commit(pRExC_state, data, minlenp, is_inf);
data->pos_min += trie->minlen;
data->pos_delta += (trie->maxlen - trie->minlen);
if
(trie->maxlen != trie->minlen)
data->cur_is_floating = 1;
}
if
(trie->jump)
flags &= ~SCF_DO_SUBSTR;
}
#endif /* old or new */
#endif /* TRIE_STUDY_OPT */
else
if
(OP(scan) == REGEX_SET) {
Perl_croak(aTHX_
"panic: %s regnode should be resolved"
" before optimization"
, REGNODE_NAME(REGEX_SET));
}
scan = regnext(scan);
}
finish:
if
(frame) {
depth = depth - 1;
DEBUG_STUDYDATA(
"frame-end"
, data, depth, is_inf, min, stopmin, delta);
DEBUG_PEEP(
"fend"
, scan, depth, flags);
last = frame->last_regnode;
scan = frame->next_regnode;
stopparen = frame->stopparen;
recursed_depth = frame->prev_recursed_depth;
RExC_frame_last = frame->prev_frame;
frame = frame->this_prev_frame;
goto
fake_study_recurse;
}
assert
(!frame);
DEBUG_STUDYDATA(
"pre-fin"
, data, depth, is_inf, min, stopmin, delta);
if
(is_inf_internal)
delta = OPTIMIZE_INFTY;
if
(min > stopmin) {
if
(OPTIMIZE_INFTY - delta >= min - stopmin)
delta += min - stopmin;
else
delta = OPTIMIZE_INFTY;
min = stopmin;
}
*scanp = scan;
*deltap = delta;
if
(flags & SCF_DO_SUBSTR && is_inf)
data->pos_delta = OPTIMIZE_INFTY - data->pos_min;
if
(is_par > (I32)U8_MAX)
is_par = 0;
if
(is_par && pars==1 && data) {
data->flags |= SF_IN_PAR;
data->flags &= ~SF_HAS_PAR;
}
else
if
(pars && data) {
data->flags |= SF_HAS_PAR;
data->flags &= ~SF_IN_PAR;
}
if
(flags & SCF_DO_STCLASS_OR)
ssc_and(pRExC_state, data->start_class, (regnode_charclass *) and_withp);
if
(flags & SCF_TRIE_RESTUDY)
data->flags |= SCF_TRIE_RESTUDY;
if
(!(RExC_seen & REG_UNBOUNDED_QUANTIFIER_SEEN)) {
if
(min > OPTIMIZE_INFTY - delta)
RExC_maxlen = OPTIMIZE_INFTY;
else
if
(RExC_maxlen < min + delta)
RExC_maxlen = min + delta;
}
DEBUG_STUDYDATA(
"post-fin"
, data, depth, is_inf, min, stopmin, delta);
return
min;
}