#ifdef PERL_EXT_RE_BUILD
#include "re_top.h"
#endif
#include "EXTERN.h"
#define PERL_IN_REGEX_ENGINE
#define PERL_IN_REGEXEC_C
#include "perl.h"
#ifdef PERL_IN_XSUB_RE
# include "re_comp.h"
#else
# include "regcomp.h"
#endif
#include "invlist_inline.h"
#include "unicode_constants.h"
static
const
char
b_utf8_locale_required[] =
"Use of \\b{} or \\B{} for non-UTF-8 locale is wrong."
" Assuming a UTF-8 locale"
;
#define CHECK_AND_WARN_NON_UTF8_CTYPE_LOCALE_IN_BOUND \
STMT_START { \
if
(! IN_UTF8_CTYPE_LOCALE) { \
Perl_ck_warner(aTHX_ packWARN(WARN_LOCALE), \
b_utf8_locale_required); \
} \
} STMT_END
static
const
char
sets_utf8_locale_required[] =
"Use of (?[ ]) for non-UTF-8 locale is wrong. Assuming a UTF-8 locale"
;
#define CHECK_AND_WARN_NON_UTF8_CTYPE_LOCALE_IN_SETS(n) \
STMT_START { \
if
(! IN_UTF8_CTYPE_LOCALE && (FLAGS(n) & ANYOFL_UTF8_LOCALE_REQD)){\
Perl_ck_warner(aTHX_ packWARN(WARN_LOCALE), \
sets_utf8_locale_required); \
} \
} STMT_END
#ifdef DEBUGGING
static
const
char
non_utf8_target_but_utf8_required[]
=
"Can't match, because target string needs to be in UTF-8\n"
;
#endif
#define NON_UTF8_TARGET_BUT_UTF8_REQUIRED(target) STMT_START { \
DEBUG_EXECUTE_r(Perl_re_printf( aTHX_
"%s"
, non_utf8_target_but_utf8_required));\
goto
target; \
} STMT_END
#ifndef STATIC
#define STATIC static
#endif
#define CHR_SVLEN(sv) (utf8_target ? sv_len_utf8(sv) : SvCUR(sv))
#define HOPc(pos,off) \
(
char
*)(reginfo->is_utf8_target \
? reghop3((U8*)pos, off, \
(U8*)(off >= 0 ? reginfo->strend : reginfo->strbeg)) \
: (U8*)(pos + off))
#define HOPBACK3(pos, off, lim) \
(reginfo->is_utf8_target \
? reghopmaybe3((U8*)pos, (SSize_t)0-off, (U8*)(lim)) \
: (pos - off >= lim) \
? (U8*)pos - off \
: NULL)
#define HOPBACKc(pos, off) ((char*)HOPBACK3(pos, off, reginfo->strbeg))
#define HOP3(pos,off,lim) (reginfo->is_utf8_target ? reghop3((U8*)(pos), off, (U8*)(lim)) : (U8*)(pos + off))
#define HOP3c(pos,off,lim) ((char*)HOP3(pos,off,lim))
#define HOPMAYBE3(pos,off,lim) \
(reginfo->is_utf8_target \
? reghopmaybe3((U8*)pos, off, (U8*)(lim)) \
: ((U8*)pos + off <= lim) \
? (U8*)pos + off \
: NULL)
#define HOP3lim(pos,off,lim) (reginfo->is_utf8_target \
? reghop3((U8*)(pos), off, (U8*)(lim)) \
: (U8*)((pos + off) > lim ? lim : (pos + off)))
#define HOP3clim(pos,off,lim) ((char*)HOP3lim(pos,off,lim))
#define HOP4(pos,off,llim, rlim) (reginfo->is_utf8_target \
? reghop4((U8*)(pos), off, (U8*)(llim), (U8*)(rlim)) \
: (U8*)(pos + off))
#define HOP4c(pos,off,llim, rlim) ((char*)HOP4(pos,off,llim, rlim))
#define PLACEHOLDER /* Something for the preprocessor to grab onto */
#define JUMPABLE(rn) ( \
OP(rn) == OPEN || \
(OP(rn) == CLOSE && \
!EVAL_CLOSE_PAREN_IS(cur_eval,PARNO(rn)) ) || \
OP(rn) == EVAL || \
OP(rn) == SUSPEND || OP(rn) == IFMATCH || \
OP(rn) == PLUS || OP(rn) == MINMOD || \
OP(rn) == KEEPS || \
(REGNODE_TYPE(OP(rn)) == CURLY && ARG1i(rn) > 0) \
)
#define IS_EXACT(rn) (REGNODE_TYPE(OP(rn)) == EXACT)
#define HAS_TEXT(rn) ( IS_EXACT(rn) || REGNODE_TYPE(OP(rn)) == REF )
#define FIND_NEXT_IMPT(rn) STMT_START { \
while
(JUMPABLE(rn)) { \
const
OPCODE type = OP(rn); \
if
(type == SUSPEND || REGNODE_TYPE(type) == CURLY) \
rn = REGNODE_AFTER_opcode(rn,type); \
else
if
(type == PLUS) \
rn = REGNODE_AFTER_type(rn,tregnode_PLUS); \
else
if
(type == IFMATCH) \
rn = (FLAGS(rn) == 0) ? REGNODE_AFTER_type(rn,tregnode_IFMATCH) : rn + ARG1u(rn); \
else
rn += NEXT_OFF(rn); \
} \
} STMT_END
#define SLAB_FIRST(s) (&(s)->states[0])
#define SLAB_LAST(s) (&(s)->states[PERL_REGMATCH_SLAB_SLOTS-1])
static
void
S_setup_eval_state(pTHX_ regmatch_info *
const
reginfo);
static
void
S_cleanup_regmatch_info_aux(pTHX_
void
*arg);
static
regmatch_state * S_push_slab(pTHX);
#define REGCP_OTHER_ELEMS 3
#define REGCP_FRAME_ELEMS 1
STATIC CHECKPOINT
S_regcppush(pTHX_
const
regexp *rex, I32 parenfloor, U32 maxopenparen comma_pDEPTH)
{
const
int
retval = PL_savestack_ix;
const
SSize_t paren_bytes_to_push =
sizeof
(*RXp_OFFSp(rex)) * (maxopenparen - parenfloor);
const
int
paren_elems_to_push = (paren_bytes_to_push +
sizeof
(*PL_savestack) - 1) /
sizeof
(*PL_savestack);
const
UV total_elems = paren_elems_to_push + REGCP_OTHER_ELEMS;
const
UV elems_shifted = total_elems << SAVE_TIGHT_SHIFT;
DECLARE_AND_GET_RE_DEBUG_FLAGS;
PERL_ARGS_ASSERT_REGCPPUSH;
if
(paren_elems_to_push < 0)
Perl_croak(aTHX_
"panic: paren_elems_to_push, %i < 0, maxopenparen: %i parenfloor: %i"
,
(
int
)paren_elems_to_push, (
int
)maxopenparen,
(
int
)parenfloor);
if
((elems_shifted >> SAVE_TIGHT_SHIFT) != total_elems)
Perl_croak(aTHX_
"panic: paren_elems_to_push offset %"
UVuf
" out of range (%lu-%ld)"
,
total_elems,
(unsigned
long
)maxopenparen,
(
long
)parenfloor);
DEBUG_BUFFERS_r(
if
((
int
)maxopenparen > (
int
)parenfloor)
Perl_re_exec_indentf( aTHX_
"rex=0x%"
UVxf
" offs=0x%"
UVxf
": saving capture indices:\n"
,
depth,
PTR2UV(rex),
PTR2UV(RXp_OFFSp(rex))
);
);
SSGROW(total_elems + REGCP_FRAME_ELEMS);
assert
((IV)PL_savestack_max > (IV)(total_elems + REGCP_FRAME_ELEMS));
memcpy
(&PL_savestack[PL_savestack_ix], RXp_OFFSp(rex) + parenfloor + 1, paren_bytes_to_push);
PL_savestack_ix += paren_elems_to_push;
DEBUG_BUFFERS_r({
I32 p;
for
(p = parenfloor + 1; p <= (I32)maxopenparen; p++) {
Perl_re_exec_indentf(aTHX_
" \\%"
UVuf
" %"
IVdf
" (%"
IVdf
") .. %"
IVdf
" (regcppush)\n"
,
depth,
(UV)p,
(IV)RXp_OFFSp(rex)[p].start,
(IV)RXp_OFFSp(rex)[p].start_tmp,
(IV)RXp_OFFSp(rex)[p].end
);
}
});
SSPUSHINT(maxopenparen);
SSPUSHINT(RXp_LASTPAREN(rex));
SSPUSHINT(RXp_LASTCLOSEPAREN(rex));
SSPUSHUV(SAVEt_REGCONTEXT | elems_shifted);
DEBUG_BUFFERS_r({
Perl_re_exec_indentf(aTHX_
"finished regcppush returning %"
IVdf
" cur: %"
IVdf
"\n"
,
depth, retval, PL_savestack_ix);
});
return
retval;
}
#define REGCP_SET(cp) \
DEBUG_STATE_r( \
Perl_re_exec_indentf( aTHX_ \
"Setting an EVAL scope, savestack=%"
IVdf
",\n"
, \
depth, (IV)PL_savestack_ix \
) \
); \
cp = PL_savestack_ix
#define REGCP_UNWIND(cp) \
DEBUG_STATE_r( \
if
(cp != PL_savestack_ix) \
Perl_re_exec_indentf( aTHX_ \
"Clearing an EVAL scope, savestack=%"
\
IVdf
"..%"
IVdf
"\n"
, \
depth, (IV)(cp), (IV)PL_savestack_ix \
) \
); \
regcpblow(cp)
#define CLOSE_ANY_CAPTURE(rex, ix, s, e) \
RXp_OFFSp(rex)[(ix)].start = (s); \
RXp_OFFSp(rex)[(ix)].end = (e)
#define CLOSE_CAPTURE(rex, ix, s, e) \
CLOSE_ANY_CAPTURE(rex, ix, s, e); \
if
(ix > RXp_LASTPAREN(rex)) \
RXp_LASTPAREN(rex) = (ix); \
RXp_LASTCLOSEPAREN(rex) = (ix); \
DEBUG_BUFFERS_r(Perl_re_exec_indentf( aTHX_ \
"CLOSE: rex=0x%"
UVxf
" offs=0x%"
UVxf
": \\%"
UVuf
": set %"
IVdf
" .. %"
IVdf
" max: %"
UVuf
"\n"
, \
depth, \
PTR2UV(rex), \
PTR2UV(RXp_OFFSp(rex)), \
(UV)(ix), \
(IV)RXp_OFFSp(rex)[ix].start, \
(IV)RXp_OFFSp(rex)[ix].end, \
(UV)RXp_LASTPAREN(rex) \
))
PERL_STATIC_INLINE
void
S_unwind_paren(pTHX_ regexp *rex, U32 lp, U32 lcp comma_pDEPTH) {
PERL_ARGS_ASSERT_UNWIND_PAREN;
U32 n;
DECLARE_AND_GET_RE_DEBUG_FLAGS;
DEBUG_BUFFERS_r(Perl_re_exec_indentf( aTHX_
"UNWIND_PAREN: rex=0x%"
UVxf
" offs=0x%"
UVxf
": invalidate (%"
UVuf
" .. %"
UVuf
") set lcp: %"
UVuf
"\n"
,
depth,
PTR2UV(rex),
PTR2UV(RXp_OFFSp(rex)),
(UV)(lp),
(UV)(RXp_LASTPAREN(rex)),
(UV)(lcp)
));
for
(n = RXp_LASTPAREN(rex); n > lp; n--) {
RXp_OFFSp(rex)[n].end = -1;
}
RXp_LASTPAREN(rex) = n;
RXp_LASTCLOSEPAREN(rex) = lcp;
}
#define UNWIND_PAREN(lp,lcp) unwind_paren(rex,lp,lcp)
PERL_STATIC_INLINE
void
S_capture_clear(pTHX_ regexp *rex, U16 from_ix, U16 to_ix,
const
char
*str comma_pDEPTH) {
PERL_ARGS_ASSERT_CAPTURE_CLEAR;
PERL_UNUSED_ARG(str);
U16 my_ix;
DECLARE_AND_GET_RE_DEBUG_FLAGS;
for
( my_ix = from_ix; my_ix <= to_ix; my_ix++ ) {
DEBUG_BUFFERS_r(Perl_re_exec_indentf( aTHX_
"CAPTURE_CLEAR %s \\%"
IVdf
": "
"%"
IVdf
"(%"
IVdf
") .. %"
IVdf
" => "
"%"
IVdf
"(%"
IVdf
") .. %"
IVdf
"\n"
,
depth, str, (IV)my_ix,
(IV)RXp_OFFSp(rex)[my_ix].start,
(IV)RXp_OFFSp(rex)[my_ix].start_tmp,
(IV)RXp_OFFSp(rex)[my_ix].end,
(IV)-1, (IV)-1, (IV)-1));
RXp_OFFSp(rex)[my_ix].start = -1;
RXp_OFFSp(rex)[my_ix].start_tmp = -1;
RXp_OFFSp(rex)[my_ix].end = -1;
}
}
#define CAPTURE_CLEAR(from_ix, to_ix, str) \
if
(from_ix) capture_clear(rex,from_ix, to_ix, str)
STATIC
void
S_regcppop(pTHX_ regexp *rex, U32 *maxopenparen_p comma_pDEPTH)
{
UV i;
U32 paren;
DECLARE_AND_GET_RE_DEBUG_FLAGS;
PERL_ARGS_ASSERT_REGCPPOP;
DEBUG_BUFFERS_r({
Perl_re_exec_indentf(aTHX_
"starting regcppop at %"
IVdf
"\n"
,
depth, PL_savestack_ix);
});
i = SSPOPUV;
assert
((i & SAVE_MASK) == SAVEt_REGCONTEXT);
i >>= SAVE_TIGHT_SHIFT;
RXp_LASTCLOSEPAREN(rex) = SSPOPINT;
RXp_LASTPAREN(rex) = SSPOPINT;
*maxopenparen_p = SSPOPINT;
i -= REGCP_OTHER_ELEMS;
DEBUG_BUFFERS_r(
if
(i || RXp_LASTPAREN(rex) + 1 <= rex->nparens)
Perl_re_exec_indentf( aTHX_
"rex=0x%"
UVxf
" offs=0x%"
UVxf
": restoring capture indices to:\n"
,
depth,
PTR2UV(rex),
PTR2UV(RXp_OFFSp(rex))
);
);
PL_savestack_ix -= i;
STATIC_ASSERT_STMT(
sizeof
(*RXp_OFFSp(rex)) >=
sizeof
(*PL_savestack));
i = (i *
sizeof
(*PL_savestack)) /
sizeof
(*RXp_OFFSp(rex));
paren = *maxopenparen_p - i + 1;
memcpy
(RXp_OFFSp(rex) + paren, &PL_savestack[PL_savestack_ix], i *
sizeof
(*RXp_OFFSp(rex)));
DEBUG_BUFFERS_r(
for
(; paren <= *maxopenparen_p; ++paren) {
Perl_re_exec_indentf(aTHX_
" \\%"
UVuf
" %"
IVdf
"(%"
IVdf
") .. %"
IVdf
" %s (regcppop)\n"
,
depth,
(UV)paren,
(IV)RXp_OFFSp(rex)[paren].start,
(IV)RXp_OFFSp(rex)[paren].start_tmp,
(IV)RXp_OFFSp(rex)[paren].end,
(paren > RXp_LASTPAREN(rex) ?
"(skipped)"
:
""
));
}
);
#if 1
for
(i = RXp_LASTPAREN(rex) + 1; i <= rex->nparens; i++) {
if
(i > *maxopenparen_p) {
RXp_OFFSp(rex)[i].start = -1;
}
RXp_OFFSp(rex)[i].end = -1;
DEBUG_BUFFERS_r( Perl_re_exec_indentf( aTHX_
" \\%"
UVuf
": %s ..-1 undeffing (regcppop)\n"
,
depth,
(UV)i,
(i > *maxopenparen_p) ?
"-1"
:
" "
));
}
#endif
DEBUG_BUFFERS_r({
Perl_re_exec_indentf(aTHX_
"finished regcppop at %"
IVdf
"\n"
,
depth, PL_savestack_ix);
});
}
STATIC
void
S_regcp_restore(pTHX_ regexp *rex, I32 ix, U32 *maxopenparen_p comma_pDEPTH)
{
I32 tmpix = PL_savestack_ix;
PERL_ARGS_ASSERT_REGCP_RESTORE;
PL_savestack_ix = ix;
regcppop(rex, maxopenparen_p);
PL_savestack_ix = tmpix;
}
#define regcpblow(cp) LEAVE_SCOPE(cp) /* Ignores regcppush()ed data. */
STATIC
bool
S_isFOO_lc(pTHX_
const
U8 classnum,
const
U8 character)
{
if
(IN_UTF8_CTYPE_LOCALE) {
return
cBOOL(generic_isCC_(character, classnum));
}
switch
((char_class_number_) classnum) {
case
CC_ENUM_ALPHANUMERIC_:
return
isU8_ALPHANUMERIC_LC(character);
case
CC_ENUM_ALPHA_:
return
isU8_ALPHA_LC(character);
case
CC_ENUM_ASCII_:
return
isU8_ASCII_LC(character);
case
CC_ENUM_BLANK_:
return
isU8_BLANK_LC(character);
case
CC_ENUM_CASED_:
return
isU8_CASED_LC(character);
case
CC_ENUM_CNTRL_:
return
isU8_CNTRL_LC(character);
case
CC_ENUM_DIGIT_:
return
isU8_DIGIT_LC(character);
case
CC_ENUM_GRAPH_:
return
isU8_GRAPH_LC(character);
case
CC_ENUM_LOWER_:
return
isU8_LOWER_LC(character);
case
CC_ENUM_PRINT_:
return
isU8_PRINT_LC(character);
case
CC_ENUM_PUNCT_:
return
isU8_PUNCT_LC(character);
case
CC_ENUM_SPACE_:
return
isU8_SPACE_LC(character);
case
CC_ENUM_UPPER_:
return
isU8_UPPER_LC(character);
case
CC_ENUM_WORDCHAR_:
return
isU8_WORDCHAR_LC(character);
case
CC_ENUM_XDIGIT_:
return
isU8_XDIGIT_LC(character);
default
:
break
;
}
Perl_croak(aTHX_
"panic: isFOO_lc() has an unexpected character class '%d'"
,
classnum);
NOT_REACHED;
return
FALSE;
}
PERL_STATIC_INLINE I32
S_foldEQ_latin1_s2_folded(pTHX_
const
char
*s1,
const
char
*s2, I32 len)
{
const
U8 *a = (
const
U8 *)s1;
const
U8 *b = (
const
U8 *)s2;
PERL_ARGS_ASSERT_FOLDEQ_LATIN1_S2_FOLDED;
assert
(len >= 0);
while
(len--) {
assert
(! isUPPER_L1(*b));
if
(toLOWER_L1(*a) != *b) {
return
0;
}
a++, b++;
}
return
1;
}
STATIC
bool
S_isFOO_utf8_lc(pTHX_
const
U8 classnum,
const
U8* character,
const
U8* e)
{
PERL_ARGS_ASSERT_ISFOO_UTF8_LC;
if
(UTF8_IS_INVARIANT(*character)) {
return
isFOO_lc(classnum, *character);
}
else
if
(UTF8_IS_DOWNGRADEABLE_START(*character)) {
return
isFOO_lc(classnum,
EIGHT_BIT_UTF8_TO_NATIVE(*character, *(character + 1)));
}
_CHECK_AND_OUTPUT_WIDE_LOCALE_UTF8_MSG(character, e);
switch
((char_class_number_) classnum) {
case
CC_ENUM_SPACE_:
return
is_XPERLSPACE_high(character);
case
CC_ENUM_BLANK_:
return
is_HORIZWS_high(character);
case
CC_ENUM_XDIGIT_:
return
is_XDIGIT_high(character);
case
CC_ENUM_VERTSPACE_:
return
is_VERTWS_high(character);
default
:
return
_invlist_contains_cp(PL_XPosix_ptrs[classnum],
utf8_to_uvchr_buf(character, e, NULL));
}
NOT_REACHED;
}
STATIC U8 *
S_find_span_end(U8 * s,
const
U8 * send,
const
U8 span_byte)
{
PERL_ARGS_ASSERT_FIND_SPAN_END;
assert
(send >= s);
if
((STRLEN) (send - s) >= PERL_WORDSIZE
+ PERL_WORDSIZE * PERL_IS_SUBWORD_ADDR(s)
- (PTR2nat(s) & PERL_WORD_BOUNDARY_MASK))
{
PERL_UINTMAX_T span_word;
while
(PTR2nat(s) & PERL_WORD_BOUNDARY_MASK) {
if
(*s != span_byte) {
return
s;
}
s++;
}
span_word = PERL_COUNT_MULTIPLIER * span_byte;
do
{
if
((* (PERL_UINTMAX_T *) s) == span_word) {
s += PERL_WORDSIZE;
continue
;
}
#ifdef EBCDIC
break
;
#else
span_word ^= * (PERL_UINTMAX_T *) s;
span_word |= span_word << 1;
span_word |= span_word << 2;
span_word |= span_word << 4;
return
s + variant_byte_number(span_word);
#endif
}
while
(s + PERL_WORDSIZE <= send);
}
while
(s < send) {
if
(*s != span_byte) {
return
s;
}
s++;
}
return
s;
}
STATIC U8 *
S_find_next_masked(U8 * s,
const
U8 * send,
const
U8 byte,
const
U8 mask)
{
PERL_ARGS_ASSERT_FIND_NEXT_MASKED;
assert
(send >= s);
assert
((byte & mask) == byte);
#ifndef EBCDIC
if
((STRLEN) (send - s) >= PERL_WORDSIZE
+ PERL_WORDSIZE * PERL_IS_SUBWORD_ADDR(s)
- (PTR2nat(s) & PERL_WORD_BOUNDARY_MASK))
{
PERL_UINTMAX_T word, mask_word;
while
(PTR2nat(s) & PERL_WORD_BOUNDARY_MASK) {
if
(((*s) & mask) == byte) {
return
s;
}
s++;
}
word = PERL_COUNT_MULTIPLIER * byte;
mask_word = PERL_COUNT_MULTIPLIER * mask;
do
{
PERL_UINTMAX_T masked = (* (PERL_UINTMAX_T *) s) & mask_word;
masked ^= word;
masked |= masked << 1;
masked |= masked << 2;
masked |= masked << 4;
if
((masked & PERL_VARIANTS_WORD_MASK) == PERL_VARIANTS_WORD_MASK) {
s += PERL_WORDSIZE;
continue
;
}
masked = ~ masked;
masked &= PERL_VARIANTS_WORD_MASK;
s += variant_byte_number(masked);
return
s;
}
while
(s + PERL_WORDSIZE <= send);
}
#endif
while
(s < send) {
if
(((*s) & mask) == byte) {
return
s;
}
s++;
}
return
s;
}
STATIC U8 *
S_find_span_end_mask(U8 * s,
const
U8 * send,
const
U8 span_byte,
const
U8 mask)
{
PERL_ARGS_ASSERT_FIND_SPAN_END_MASK;
assert
(send >= s);
assert
((span_byte & mask) == span_byte);
if
((STRLEN) (send - s) >= PERL_WORDSIZE
+ PERL_WORDSIZE * PERL_IS_SUBWORD_ADDR(s)
- (PTR2nat(s) & PERL_WORD_BOUNDARY_MASK))
{
PERL_UINTMAX_T span_word, mask_word;
while
(PTR2nat(s) & PERL_WORD_BOUNDARY_MASK) {
if
(((*s) & mask) != span_byte) {
return
s;
}
s++;
}
span_word = PERL_COUNT_MULTIPLIER * span_byte;
mask_word = PERL_COUNT_MULTIPLIER * mask;
do
{
PERL_UINTMAX_T masked = (* (PERL_UINTMAX_T *) s) & mask_word;
if
(masked == span_word) {
s += PERL_WORDSIZE;
continue
;
}
#ifdef EBCDIC
break
;
#else
masked ^= span_word;
masked |= masked << 1;
masked |= masked << 2;
masked |= masked << 4;
return
s + variant_byte_number(masked);
#endif
}
while
(s + PERL_WORDSIZE <= send);
}
while
(s < send) {
if
(((*s) & mask) != span_byte) {
return
s;
}
s++;
}
return
s;
}
#ifndef PERL_IN_XSUB_RE
I32
Perl_pregexec(pTHX_ REGEXP *
const
prog,
char
* stringarg,
char
*strend,
char
*strbeg, SSize_t minend, SV *screamer, U32 nosave)
{
PERL_ARGS_ASSERT_PREGEXEC;
return
regexec_flags(prog, stringarg, strend, strbeg, minend, screamer, NULL,
nosave ? 0 : REXEC_COPY_STR);
}
#endif
char
*
Perl_re_intuit_start(pTHX_
REGEXP *
const
rx,
SV *sv,
const
char
*
const
strbeg,
char
*strpos,
char
*strend,
const
U32 flags,
re_scream_pos_data *data)
{
struct
regexp *
const
prog = ReANY(rx);
SSize_t start_shift = prog->check_offset_min;
SSize_t end_shift = 0;
char
*rx_origin = strpos;
SV *check;
const
bool
utf8_target = (sv && SvUTF8(sv)) ? 1 : 0;
U8 other_ix = 1 - prog->substrs->check_ix;
bool
ml_anch = 0;
char
*other_last = strpos;
char
*check_at = NULL;
const
I32 multiline = prog->extflags & RXf_PMf_MULTILINE;
RXi_GET_DECL(prog,progi);
regmatch_info reginfo_buf;
regmatch_info *
const
reginfo = ®info_buf;
DECLARE_AND_GET_RE_DEBUG_FLAGS;
PERL_ARGS_ASSERT_RE_INTUIT_START;
PERL_UNUSED_ARG(flags);
PERL_UNUSED_ARG(data);
DEBUG_EXECUTE_r(Perl_re_printf( aTHX_
"Intuit: trying to determine minimum start position...\n"
));
assert
(prog->substrs->data[0].min_offset >= 0);
assert
(prog->substrs->data[0].max_offset >= 0);
assert
(prog->substrs->data[1].min_offset >= 0);
assert
(prog->substrs->data[1].max_offset >= 0);
assert
(prog->substrs->data[2].min_offset >= 0);
assert
(prog->substrs->data[2].max_offset >= 0);
assert
(
! ( (prog->anchored_utf8 || prog->anchored_substr)
&& (prog->float_utf8 || prog->float_substr))
|| (prog->float_min_offset >= prog->anchored_offset));
if
(prog->minlen > strend - strpos) {
DEBUG_EXECUTE_r(Perl_re_printf( aTHX_
" String too short...\n"
));
goto
fail;
}
RXp_MATCH_UTF8_set(prog, utf8_target);
reginfo->is_utf8_target = cBOOL(utf8_target);
reginfo->info_aux = NULL;
reginfo->strbeg = strbeg;
reginfo->strend = strend;
reginfo->is_utf8_pat = cBOOL(RX_UTF8(rx));
reginfo->intuit = 1;
reginfo->poscache_maxiter = 0;
if
(utf8_target) {
if
((!prog->anchored_utf8 && prog->anchored_substr)
|| (!prog->float_utf8 && prog->float_substr))
to_utf8_substr(prog);
check = prog->check_utf8;
}
else
{
if
(!prog->check_substr && prog->check_utf8) {
if
(! to_byte_substr(prog)) {
NON_UTF8_TARGET_BUT_UTF8_REQUIRED(fail);
}
}
check = prog->check_substr;
}
DEBUG_OPTIMISE_MORE_r({
int
i;
for
(i=0; i<=2; i++) {
SV *sv = (utf8_target ? prog->substrs->data[i].utf8_substr
: prog->substrs->data[i].substr);
if
(!sv)
continue
;
Perl_re_printf( aTHX_
" substrs[%d]: min=%"
IVdf
" max=%"
IVdf
" end shift=%"
IVdf
" useful=%"
IVdf
" utf8=%d [%s]\n"
,
i,
(IV)prog->substrs->data[i].min_offset,
(IV)prog->substrs->data[i].max_offset,
(IV)prog->substrs->data[i].end_shift,
BmUSEFUL(sv),
utf8_target ? 1 : 0,
SvPEEK(sv));
}
});
if
(prog->intflags & PREGf_ANCH) {
ml_anch = (prog->intflags & PREGf_ANCH_MBOL)
&& !(prog->intflags & PREGf_IMPLICIT);
if
(!ml_anch && !(prog->intflags & PREGf_IMPLICIT)) {
if
( strpos != strbeg
&& (prog->intflags & PREGf_ANCH_SBOL))
{
DEBUG_EXECUTE_r(Perl_re_printf( aTHX_
" Not at start...\n"
));
goto
fail;
}
if
(prog->check_offset_min == prog->check_offset_max) {
SSize_t slen = SvCUR(check);
char
*s = HOP3c(strpos, prog->check_offset_min, strend);
DEBUG_EXECUTE_r(Perl_re_printf( aTHX_
" Looking for check substr at fixed offset %"
IVdf
"...\n"
,
(IV)prog->check_offset_min));
if
(SvTAIL(check)) {
if
(!multiline
&& ( strend - s > slen
|| strend - s < slen - 1
|| (strend - s == slen && strend[-1] !=
'\n'
)))
{
DEBUG_EXECUTE_r(Perl_re_printf( aTHX_
" String too long...\n"
));
goto
fail_finish;
}
slen--;
}
if
(slen && (strend - s < slen
|| *SvPVX_const(check) != *s
|| (slen > 1 && (memNE(SvPVX_const(check), s, slen)))))
{
DEBUG_EXECUTE_r(Perl_re_printf( aTHX_
" String not equal...\n"
));
goto
fail_finish;
}
check_at = s;
goto
success_at_start;
}
}
}
end_shift = prog->check_end_shift;
#ifdef DEBUGGING /* 7/99: reports of failure (with the older version) */
if
(end_shift < 0)
Perl_croak(aTHX_
"panic: end_shift: %"
IVdf
" pattern:\n%s\n "
,
(IV)end_shift, RX_PRECOMP(rx));
#endif
restart:
{
U8* start_point;
U8* end_point;
DEBUG_OPTIMISE_MORE_r({
Perl_re_printf( aTHX_
" At restart: rx_origin=%"
IVdf
" Check offset min: %"
IVdf
" Start shift: %"
IVdf
" End shift %"
IVdf
" Real end Shift: %"
IVdf
"\n"
,
(IV)(rx_origin - strbeg),
(IV)prog->check_offset_min,
(IV)start_shift,
(IV)end_shift,
(IV)prog->check_end_shift);
});
end_point = HOPBACK3(strend, end_shift, rx_origin);
if
(!end_point)
goto
fail_finish;
start_point = HOPMAYBE3(rx_origin, start_shift, end_point);
if
(!start_point)
goto
fail_finish;
if
(!ml_anch
&& prog->intflags & PREGf_ANCH
&& prog->check_offset_max != SSize_t_MAX)
{
SSize_t check_len = SvCUR(check) - cBOOL(SvTAIL(check));
const
char
*
const
anchor =
(prog->intflags & PREGf_ANCH_GPOS ? strpos : strbeg);
SSize_t targ_len = (
char
*)end_point - anchor;
if
(check_len > targ_len) {
DEBUG_EXECUTE_r(Perl_re_printf( aTHX_
"Target string too short to match required substring...\n"
));
goto
fail_finish;
}
assert
(anchor + check_len <= (
char
*)end_point);
if
(prog->check_offset_max + check_len < targ_len) {
end_point = HOP3lim((U8*)anchor,
prog->check_offset_max,
end_point - check_len
)
+ check_len;
if
(end_point < start_point)
goto
fail_finish;
}
}
check_at = fbm_instr( start_point, end_point,
check, multiline ? FBMrf_MULTILINE : 0);
DEBUG_EXECUTE_r(Perl_re_printf( aTHX_
" doing 'check' fbm scan, [%"
IVdf
"..%"
IVdf
"] gave %"
IVdf
"\n"
,
(IV)((
char
*)start_point - strbeg),
(IV)((
char
*)end_point - strbeg),
(IV)(check_at ? check_at - strbeg : -1)
));
DEBUG_EXECUTE_r({
RE_PV_QUOTED_DECL(quoted, utf8_target, PERL_DEBUG_PAD_ZERO(0),
SvPVX_const(check), RE_SV_DUMPLEN(check), 30);
Perl_re_printf( aTHX_
" %s %s substr %s%s%s"
,
(check_at ?
"Found"
:
"Did not find"
),
(check == (utf8_target ? prog->anchored_utf8 : prog->anchored_substr)
?
"anchored"
:
"floating"
),
quoted,
RE_SV_TAIL(check),
(check_at ?
" at offset "
:
"...\n"
) );
});
if
(!check_at)
goto
fail_finish;
if
(check_at - rx_origin > prog->check_offset_max)
rx_origin = HOP3c(check_at, -prog->check_offset_max, rx_origin);
DEBUG_EXECUTE_r(Perl_re_printf( aTHX_
"%ld (rx_origin now %"
IVdf
")...\n"
,
(
long
)(check_at - strbeg),
(IV)(rx_origin - strbeg)
));
}
if
(prog->substrs->data[other_ix].utf8_substr
|| prog->substrs->data[other_ix].substr)
{
char
*last, *last1;
char
*s;
SV* must;
struct
reg_substr_datum *other;
do_other_substr:
other = &prog->substrs->data[other_ix];
if
(!utf8_target && !other->substr) {
if
(!to_byte_substr(prog)) {
NON_UTF8_TARGET_BUT_UTF8_REQUIRED(fail);
}
}
assert
(prog->minlen >= other->min_offset);
last1 = HOP3c(strend,
other->min_offset - prog->minlen, strbeg);
if
(other_ix) {
assert
(rx_origin <= last1);
last =
(last1 - rx_origin) < other->max_offset
? last1
: (
char
*)HOP3lim(rx_origin, other->max_offset, last1);
}
else
{
assert
(strpos + start_shift <= check_at);
last = HOP4c(check_at, other->min_offset - start_shift,
strbeg, strend);
}
s = HOP3c(rx_origin, other->min_offset, strend);
if
(s < other_last)
s = other_last;
must = utf8_target ? other->utf8_substr : other->substr;
assert
(SvPOK(must));
{
char
*from = s;
char
*to = last + SvCUR(must) - (SvTAIL(must)!=0);
if
(to > strend)
to = strend;
if
(from > to) {
s = NULL;
DEBUG_EXECUTE_r(Perl_re_printf( aTHX_
" skipping 'other' fbm scan: %"
IVdf
" > %"
IVdf
"\n"
,
(IV)(from - strbeg),
(IV)(to - strbeg)
));
}
else
{
s = fbm_instr(
(unsigned
char
*)from,
(unsigned
char
*)to,
must,
multiline ? FBMrf_MULTILINE : 0
);
DEBUG_EXECUTE_r(Perl_re_printf( aTHX_
" doing 'other' fbm scan, [%"
IVdf
"..%"
IVdf
"] gave %"
IVdf
"\n"
,
(IV)(from - strbeg),
(IV)(to - strbeg),
(IV)(s ? s - strbeg : -1)
));
}
}
DEBUG_EXECUTE_r({
RE_PV_QUOTED_DECL(quoted, utf8_target, PERL_DEBUG_PAD_ZERO(0),
SvPVX_const(must), RE_SV_DUMPLEN(must), 30);
Perl_re_printf( aTHX_
" %s %s substr %s%s"
,
s ?
"Found"
:
"Contradicts"
,
other_ix ?
"floating"
:
"anchored"
,
quoted, RE_SV_TAIL(must));
});
if
(!s) {
if
(last >= last1) {
DEBUG_EXECUTE_r(Perl_re_printf( aTHX_
"; giving up...\n"
));
goto
fail_finish;
}
other_last = HOP3c(last, 1, strend)
;
rx_origin =
other_ix
? HOP3c(rx_origin, 1, strend)
: HOP4c(last, 1 - other->min_offset, strbeg, strend);
DEBUG_EXECUTE_r(Perl_re_printf( aTHX_
"; about to retry %s at offset %ld (rx_origin now %"
IVdf
")...\n"
,
(other_ix ?
"floating"
:
"anchored"
),
(
long
)(HOP3c(check_at, 1, strend) - strbeg),
(IV)(rx_origin - strbeg)
));
goto
restart;
}
else
{
if
(other_ix) {
other_last = s;
}
else
{
rx_origin = HOP3c(s, -other->min_offset, strbeg);
other_last = HOP3c(s, 1, strend);
}
DEBUG_EXECUTE_r(Perl_re_printf( aTHX_
" at offset %ld (rx_origin now %"
IVdf
")...\n"
,
(
long
)(s - strbeg),
(IV)(rx_origin - strbeg)
));
}
}
else
{
DEBUG_OPTIMISE_MORE_r(
Perl_re_printf( aTHX_
" Check-only match: offset min:%"
IVdf
" max:%"
IVdf
" check_at:%"
IVdf
" rx_origin:%"
IVdf
" rx_origin-check_at:%"
IVdf
" strend:%"
IVdf
"\n"
,
(IV)prog->check_offset_min,
(IV)prog->check_offset_max,
(IV)(check_at-strbeg),
(IV)(rx_origin-strbeg),
(IV)(rx_origin-check_at),
(IV)(strend-strbeg)
)
);
}
postprocess_substr_matches:
if
(ml_anch && rx_origin != strbeg && rx_origin[-1] !=
'\n'
) {
char
*s;
DEBUG_EXECUTE_r(Perl_re_printf( aTHX_
" looking for /^/m anchor"
));
s = HOP3c(strend, - prog->minlen, strpos);
if
(s <= rx_origin ||
! ( rx_origin = (
char
*)
memchr
(rx_origin,
'\n'
, s - rx_origin)))
{
DEBUG_EXECUTE_r(Perl_re_printf( aTHX_
" Did not find /%s^%s/m...\n"
,
PL_colors[0], PL_colors[1]));
goto
fail_finish;
}
rx_origin++;
if
(prog->substrs->check_ix == 0
|| rx_origin >= HOP3c(check_at, - prog->check_offset_min, strpos))
{
DEBUG_EXECUTE_r(Perl_re_printf( aTHX_
" Found /%s^%s/m, about to restart lookup for check-string with rx_origin %ld...\n"
,
PL_colors[0], PL_colors[1], (
long
)(rx_origin - strbeg)));
goto
restart;
}
assert
(prog->substrs->check_ix);
if
(utf8_target ? prog->anchored_utf8 : prog->anchored_substr) {
DEBUG_EXECUTE_r(Perl_re_printf( aTHX_
" Found /%s^%s/m, rescanning for anchored from offset %"
IVdf
" (rx_origin now %"
IVdf
")...\n"
,
PL_colors[0], PL_colors[1],
(IV)(rx_origin - strbeg + prog->anchored_offset),
(IV)(rx_origin - strbeg)
));
goto
do_other_substr;
}
DEBUG_EXECUTE_r(Perl_re_printf( aTHX_
" Found /%s^%s/m with rx_origin %ld...\n"
,
PL_colors[0], PL_colors[1], (
long
)(rx_origin - strbeg)));
}
else
{
DEBUG_EXECUTE_r(Perl_re_printf( aTHX_
" (multiline anchor test skipped)\n"
));
}
success_at_start:
if
(progi->regstclass && REGNODE_TYPE(OP(progi->regstclass))!=TRIE) {
const
U8*
const
str = (U8*)STRING(progi->regstclass);
const
SSize_t cl_l = (REGNODE_TYPE(OP(progi->regstclass)) == EXACT
? (reginfo->is_utf8_pat
? (SSize_t)utf8_distance(str + STR_LEN(progi->regstclass), str)
: (SSize_t)STR_LEN(progi->regstclass))
: 1);
char
* endpos;
char
*s;
char
*rx_max_float = NULL;
if
(prog->anchored_substr || prog->anchored_utf8 || ml_anch)
endpos = HOP3clim(rx_origin, (prog->minlen ? cl_l : 0), strend);
else
if
(prog->float_substr || prog->float_utf8) {
rx_max_float = HOP3c(check_at, -start_shift, strbeg);
endpos = HOP3clim(rx_max_float, cl_l, strend);
}
else
endpos= strend;
DEBUG_EXECUTE_r(Perl_re_printf( aTHX_
" looking for class: start_shift: %"
IVdf
" check_at: %"
IVdf
" rx_origin: %"
IVdf
" endpos: %"
IVdf
"\n"
,
(IV)start_shift, (IV)(check_at - strbeg),
(IV)(rx_origin - strbeg), (IV)(endpos - strbeg)));
s = find_byclass(prog, progi->regstclass, rx_origin, endpos,
reginfo);
if
(!s) {
if
(endpos == strend) {
DEBUG_EXECUTE_r( Perl_re_printf( aTHX_
" Could not match STCLASS...\n"
) );
goto
fail;
}
DEBUG_EXECUTE_r( Perl_re_printf( aTHX_
" This position contradicts STCLASS...\n"
) );
if
((prog->intflags & PREGf_ANCH) && !ml_anch
&& !(prog->intflags & PREGf_IMPLICIT))
goto
fail;
if
(prog->anchored_substr || prog->anchored_utf8) {
if
(prog->substrs->check_ix == 1) {
assert
(rx_origin + start_shift <= check_at);
if
(rx_origin + start_shift != check_at) {
DEBUG_EXECUTE_r( Perl_re_printf( aTHX_
" about to retry anchored at offset %ld (rx_origin now %"
IVdf
")...\n"
,
(
long
)(other_last - strbeg),
(IV)(rx_origin - strbeg)
));
goto
do_other_substr;
}
}
}
else
{
if
(ml_anch) {
rx_origin++;
DEBUG_EXECUTE_r( Perl_re_printf( aTHX_
" about to look for /%s^%s/m starting at rx_origin %ld...\n"
,
PL_colors[0], PL_colors[1],
(
long
)(rx_origin - strbeg)) );
goto
postprocess_substr_matches;
}
if
(!(utf8_target ? prog->float_utf8 : prog->float_substr))
goto
fail;
rx_origin = rx_max_float;
}
rx_origin = HOP3c(rx_origin, 1, strend);
if
(rx_origin + start_shift + end_shift > strend) {
DEBUG_EXECUTE_r( Perl_re_printf( aTHX_
" Could not match STCLASS...\n"
) );
goto
fail;
}
DEBUG_EXECUTE_r( Perl_re_printf( aTHX_
" about to look for %s substr starting at offset %ld (rx_origin now %"
IVdf
")...\n"
,
(prog->substrs->check_ix ?
"floating"
:
"anchored"
),
(
long
)(rx_origin + start_shift - strbeg),
(IV)(rx_origin - strbeg)
));
goto
restart;
}
if
(rx_origin != s) {
DEBUG_EXECUTE_r(Perl_re_printf( aTHX_
" By STCLASS: moving %ld --> %ld\n"
,
(
long
)(rx_origin - strbeg), (
long
)(s - strbeg))
);
}
else
{
DEBUG_EXECUTE_r(Perl_re_printf( aTHX_
" Does not contradict STCLASS...\n"
);
);
}
}
if
(rx_origin != strpos) {
DEBUG_EXECUTE_r(Perl_re_printf( aTHX_
" try at offset...\n"
));
++BmUSEFUL(utf8_target ? prog->check_utf8 : prog->check_substr);
}
else
{
if
(!(prog->intflags & PREGf_NAUGHTY)
&& (utf8_target ? (
prog->check_utf8
&& --BmUSEFUL(prog->check_utf8) < 0
&& (prog->check_utf8 == prog->float_utf8)
) : (
prog->check_substr
&& --BmUSEFUL(prog->check_substr) < 0
&& (prog->check_substr == prog->float_substr)
)))
{
DEBUG_EXECUTE_r(Perl_re_printf( aTHX_
" ... Disabling check substring...\n"
));
SvREFCNT_dec(utf8_target ? prog->check_utf8 : prog->check_substr);
SvREFCNT_dec(utf8_target ? prog->check_substr : prog->check_utf8);
prog->check_substr = prog->check_utf8 = NULL;
prog->float_substr = prog->float_utf8 = NULL;
check = NULL;
prog->extflags &= ~RXf_USE_INTUIT;
}
}
DEBUG_EXECUTE_r(Perl_re_printf( aTHX_
"Intuit: %sSuccessfully guessed:%s match at offset %ld\n"
,
PL_colors[4], PL_colors[5], (
long
)(rx_origin - strbeg)) );
return
rx_origin;
fail_finish:
if
(prog->check_substr || prog->check_utf8)
BmUSEFUL(utf8_target ? prog->check_utf8 : prog->check_substr) += 5;
fail:
DEBUG_EXECUTE_r(Perl_re_printf( aTHX_
"%sMatch rejected by optimizer%s\n"
,
PL_colors[4], PL_colors[5]));
return
NULL;
}
#define DECL_TRIE_TYPE(scan) \
const
enum
{ trie_plain, trie_utf8, trie_utf8_fold, trie_latin_utf8_fold, \
trie_utf8_exactfa_fold, trie_latin_utf8_exactfa_fold, \
trie_utf8l, trie_flu8, trie_flu8_latin } \
trie_type = ((FLAGS(scan) == EXACT) \
? (utf8_target ? trie_utf8 : trie_plain) \
: (FLAGS(scan) == EXACTL) \
? (utf8_target ? trie_utf8l : trie_plain) \
: (FLAGS(scan) == EXACTFAA) \
? (utf8_target \
? trie_utf8_exactfa_fold \
: trie_latin_utf8_exactfa_fold) \
: (FLAGS(scan) == EXACTFLU8 \
? (utf8_target \
? trie_flu8 \
: trie_flu8_latin) \
: (utf8_target \
? trie_utf8_fold \
: trie_latin_utf8_fold)))
#define REXEC_TRIE_READ_CHAR(trie_type, trie, widecharmap, uc, uc_end, uscan, len, uvc, charid, foldlen, foldbuf, uniflags) \
STMT_START { \
STRLEN skiplen; \
U8 flags = FOLD_FLAGS_FULL; \
switch
(trie_type) { \
case
trie_flu8: \
CHECK_AND_WARN_PROBLEMATIC_LOCALE_; \
if
(UTF8_IS_ABOVE_LATIN1(*uc)) { \
_CHECK_AND_OUTPUT_WIDE_LOCALE_UTF8_MSG(uc, uc_end); \
} \
goto
do_trie_utf8_fold; \
case
trie_utf8_exactfa_fold: \
flags |= FOLD_FLAGS_NOMIX_ASCII; \
\
case
trie_utf8_fold: \
do_trie_utf8_fold: \
if
( foldlen>0 ) { \
uvc = utf8n_to_uvchr( (
const
U8*) uscan, foldlen, &len, uniflags ); \
foldlen -= len; \
uscan += len; \
len=0; \
}
else
{ \
uvc = _toFOLD_utf8_flags( (
const
U8*) uc, uc_end, foldbuf, &foldlen, \
flags); \
len = UTF8_SAFE_SKIP(uc, uc_end); \
skiplen = UVCHR_SKIP( uvc ); \
foldlen -= skiplen; \
uscan = foldbuf + skiplen; \
} \
break
; \
case
trie_flu8_latin: \
CHECK_AND_WARN_PROBLEMATIC_LOCALE_; \
goto
do_trie_latin_utf8_fold; \
case
trie_latin_utf8_exactfa_fold: \
flags |= FOLD_FLAGS_NOMIX_ASCII; \
\
case
trie_latin_utf8_fold: \
do_trie_latin_utf8_fold: \
if
( foldlen>0 ) { \
uvc = utf8n_to_uvchr( (
const
U8*) uscan, foldlen, &len, uniflags ); \
foldlen -= len; \
uscan += len; \
len=0; \
}
else
{ \
len = 1; \
uvc = _to_fold_latin1( (U8) *uc, foldbuf, &foldlen, flags); \
skiplen = UVCHR_SKIP( uvc ); \
foldlen -= skiplen; \
uscan = foldbuf + skiplen; \
} \
break
; \
case
trie_utf8l: \
CHECK_AND_WARN_PROBLEMATIC_LOCALE_; \
if
(utf8_target && UTF8_IS_ABOVE_LATIN1(*uc)) { \
_CHECK_AND_OUTPUT_WIDE_LOCALE_UTF8_MSG(uc, uc_end); \
} \
\
case
trie_utf8: \
uvc = utf8n_to_uvchr( (
const
U8*) uc, uc_end - uc, &len, uniflags ); \
break
; \
case
trie_plain: \
uvc = (UV)*uc; \
len = 1; \
} \
if
(uvc < 256) { \
charid = trie->charmap[ uvc ]; \
} \
else
{ \
charid = 0; \
if
(widecharmap) { \
SV**
const
svpp = hv_fetch(widecharmap, \
(
char
*)&uvc,
sizeof
(UV), 0); \
if
(svpp) \
charid = (U16)SvIV(*svpp); \
} \
} \
} STMT_END
#define DUMP_EXEC_POS(li,s,doutf8,depth) \
dump_exec_pos(li,s,(reginfo->strend),(reginfo->strbeg), \
startpos, doutf8, depth)
#define GET_ANYOFH_INVLIST(prog, n) \
GET_REGCLASS_AUX_DATA(prog, n, TRUE, 0, NULL, NULL)
#define REXEC_FBC_UTF8_SCAN(CODE) \
STMT_START { \
while
(s < strend) { \
CODE \
s += UTF8_SAFE_SKIP(s, reginfo->strend); \
} \
} STMT_END
#define REXEC_FBC_NON_UTF8_SCAN(CODE) \
STMT_START { \
while
(s < strend) { \
CODE \
s++; \
} \
} STMT_END
#define REXEC_FBC_UTF8_CLASS_SCAN(COND) \
STMT_START { \
while
(s < strend) { \
REXEC_FBC_UTF8_CLASS_SCAN_GUTS(COND) \
} \
} STMT_END
#define REXEC_FBC_NON_UTF8_CLASS_SCAN(COND) \
STMT_START { \
while
(s < strend) { \
REXEC_FBC_NON_UTF8_CLASS_SCAN_GUTS(COND) \
} \
} STMT_END
#define REXEC_FBC_UTF8_CLASS_SCAN_GUTS(COND) \
if
(COND) { \
FBC_CHECK_AND_TRY \
s += UTF8_SAFE_SKIP(s, reginfo->strend); \
previous_occurrence_end = s; \
} \
else
{ \
s += UTF8SKIP(s); \
}
#define REXEC_FBC_NON_UTF8_CLASS_SCAN_GUTS(COND) \
if
(COND) { \
FBC_CHECK_AND_TRY \
s++; \
previous_occurrence_end = s; \
} \
else
{ \
s++; \
}
#define LAST_REGTRY_SKIPPED_FORWARD(reginfo) (reginfo->cutpoint)
#define FBC_CHECK_AND_TRY \
if
( ( doevery \
|| s != previous_occurrence_end \
|| LAST_REGTRY_SKIPPED_FORWARD(reginfo) ) \
&& ( reginfo->intuit \
|| (s <= reginfo->strend && regtry(reginfo, &s)))) \
{ \
goto
got_it; \
}
#define REXEC_FBC_UTF8_FIND_NEXT_SCAN(f) \
while
(s < strend) { \
s = (
char
*) (f); \
if
(s >= strend) { \
break
; \
} \
\
FBC_CHECK_AND_TRY \
s += UTF8SKIP(s); \
previous_occurrence_end = s; \
}
#define REXEC_FBC_NON_UTF8_FIND_NEXT_SCAN(f) \
while
(s < strend) { \
s = (
char
*) (f); \
if
(s >= strend) { \
break
; \
} \
\
FBC_CHECK_AND_TRY \
s++; \
previous_occurrence_end = s; \
}
#define REXEC_FBC_FIND_NEXT_UTF8_SCAN_COND(f, COND) \
while
(s < strend) { \
s = (
char
*) (f); \
if
(s == NULL) { \
s = (
char
*) strend; \
break
; \
} \
\
if
(COND) { \
FBC_CHECK_AND_TRY \
s += UTF8_SAFE_SKIP(s, reginfo->strend); \
previous_occurrence_end = s; \
} \
else
{ \
s += UTF8SKIP(s); \
} \
}
#define REXEC_FBC_FIND_NEXT_UTF8_BYTE_SCAN(byte, COND) \
REXEC_FBC_FIND_NEXT_UTF8_SCAN_COND(
memchr
(s, byte, strend - s), \
COND)
#define REXEC_FBC_FIND_NEXT_UTF8_STRING_SCAN(substr, substr_end, COND) \
REXEC_FBC_FIND_NEXT_UTF8_SCAN_COND( \
ninstr(s, strend, substr, substr_end), \
COND)
#define FBC_UTF8_A(TEST_NON_UTF8, IF_SUCCESS, IF_FAIL) \
tmp = (s != reginfo->strbeg) ? UCHARAT(s - 1) :
'\n'
; \
tmp = TEST_NON_UTF8(tmp); \
REXEC_FBC_UTF8_SCAN(
\
if
(tmp == ! TEST_NON_UTF8((U8) *s)) { \
tmp = !tmp; \
IF_SUCCESS;
\
} \
else
{ \
IF_FAIL; \
} \
); \
#define FBC_UTF8(TEST_UV, TEST_UTF8, IF_SUCCESS, IF_FAIL) \
if
(s == reginfo->strbeg) { \
tmp =
'\n'
; \
} \
else
{
\
U8 *
const
r = reghop3((U8*)s, -1, (U8*)reginfo->strbeg); \
tmp = utf8n_to_uvchr(r, (U8*) reginfo->strend - r, \
0, UTF8_ALLOW_DEFAULT); \
} \
tmp = TEST_UV(tmp); \
REXEC_FBC_UTF8_SCAN(
\
if
(tmp == ! (TEST_UTF8((U8 *) s, (U8 *) reginfo->strend))) { \
tmp = !tmp; \
IF_SUCCESS; \
} \
else
{ \
IF_FAIL; \
} \
);
#define FBC_BOUND_COMMON_UTF8(UTF8_CODE, TEST_NON_UTF8, IF_SUCCESS, IF_FAIL) \
UTF8_CODE; \
\
if
(tmp == ! TEST_NON_UTF8(
'\n'
)) { \
IF_SUCCESS; \
} \
else
{ \
IF_FAIL; \
}
#define FBC_BOUND_COMMON_NON_UTF8(TEST_NON_UTF8, IF_SUCCESS, IF_FAIL) \
tmp = (s != reginfo->strbeg) ? UCHARAT(s - 1) :
'\n'
; \
tmp = TEST_NON_UTF8(tmp); \
REXEC_FBC_NON_UTF8_SCAN(
\
if
(tmp == ! TEST_NON_UTF8(UCHARAT(s))) { \
IF_SUCCESS; \
tmp = !tmp; \
} \
else
{ \
IF_FAIL; \
} \
); \
\
if
(tmp == ! TEST_NON_UTF8(
'\n'
)) { \
IF_SUCCESS; \
} \
else
{ \
IF_FAIL; \
}
#define REXEC_FBC_TRYIT \
if
(reginfo->intuit || (s <= reginfo->strend && regtry(reginfo, &s))) \
goto
got_it
#define FBC_BOUND_UTF8(TEST_NON_UTF8, TEST_UV, TEST_UTF8) \
FBC_BOUND_COMMON_UTF8( \
FBC_UTF8(TEST_UV, TEST_UTF8, REXEC_FBC_TRYIT, PLACEHOLDER), \
TEST_NON_UTF8, REXEC_FBC_TRYIT, PLACEHOLDER)
#define FBC_BOUND_NON_UTF8(TEST_NON_UTF8) \
FBC_BOUND_COMMON_NON_UTF8(TEST_NON_UTF8, REXEC_FBC_TRYIT, PLACEHOLDER)
#define FBC_BOUND_A_UTF8(TEST_NON_UTF8) \
FBC_BOUND_COMMON_UTF8( \
FBC_UTF8_A(TEST_NON_UTF8, REXEC_FBC_TRYIT, PLACEHOLDER),\
TEST_NON_UTF8, REXEC_FBC_TRYIT, PLACEHOLDER)
#define FBC_BOUND_A_NON_UTF8(TEST_NON_UTF8) \
FBC_BOUND_COMMON_NON_UTF8(TEST_NON_UTF8, REXEC_FBC_TRYIT, PLACEHOLDER)
#define FBC_NBOUND_UTF8(TEST_NON_UTF8, TEST_UV, TEST_UTF8) \
FBC_BOUND_COMMON_UTF8( \
FBC_UTF8(TEST_UV, TEST_UTF8, PLACEHOLDER, REXEC_FBC_TRYIT), \
TEST_NON_UTF8, PLACEHOLDER, REXEC_FBC_TRYIT)
#define FBC_NBOUND_NON_UTF8(TEST_NON_UTF8) \
FBC_BOUND_COMMON_NON_UTF8(TEST_NON_UTF8, PLACEHOLDER, REXEC_FBC_TRYIT)
#define FBC_NBOUND_A_UTF8(TEST_NON_UTF8) \
FBC_BOUND_COMMON_UTF8( \
FBC_UTF8_A(TEST_NON_UTF8, PLACEHOLDER, REXEC_FBC_TRYIT), \
TEST_NON_UTF8, PLACEHOLDER, REXEC_FBC_TRYIT)
#define FBC_NBOUND_A_NON_UTF8(TEST_NON_UTF8) \
FBC_BOUND_COMMON_NON_UTF8(TEST_NON_UTF8, PLACEHOLDER, REXEC_FBC_TRYIT)
#ifdef DEBUGGING
static
IV
S_get_break_val_cp_checked(SV*
const
invlist,
const
UV cp_in) {
IV cp_out = _invlist_search(invlist, cp_in);
assert
(cp_out >= 0);
return
cp_out;
}
# define _generic_GET_BREAK_VAL_CP_CHECKED(invlist, invmap, cp) \
invmap[S_get_break_val_cp_checked(invlist, cp)]
#else
# define _generic_GET_BREAK_VAL_CP_CHECKED(invlist, invmap, cp) \
invmap[_invlist_search(invlist, cp)]
#endif
#define _generic_GET_BREAK_VAL_CP(invlist, invmap, cp) \
_generic_GET_BREAK_VAL_CP_CHECKED(invlist, invmap, cp)
#define _generic_GET_BREAK_VAL_UTF8(cp_macro, pos, strend) \
(__ASSERT_(pos < strend) \
\
(cp_macro(utf8_to_uvchr_buf((pos), (strend), NULL))))
#define getGCB_VAL_CP(cp) \
_generic_GET_BREAK_VAL_CP( \
PL_GCB_invlist, \
_Perl_GCB_invmap, \
(cp))
#define getGCB_VAL_UTF8(pos, strend) \
_generic_GET_BREAK_VAL_UTF8(getGCB_VAL_CP, pos, strend)
#define getLB_VAL_CP(cp) \
_generic_GET_BREAK_VAL_CP( \
PL_LB_invlist, \
_Perl_LB_invmap, \
(cp))
#define getLB_VAL_UTF8(pos, strend) \
_generic_GET_BREAK_VAL_UTF8(getLB_VAL_CP, pos, strend)
#define getSB_VAL_CP(cp) \
_generic_GET_BREAK_VAL_CP( \
PL_SB_invlist, \
_Perl_SB_invmap, \
(cp))
#define getSB_VAL_UTF8(pos, strend) \
_generic_GET_BREAK_VAL_UTF8(getSB_VAL_CP, pos, strend)
#define getWB_VAL_CP(cp) \
_generic_GET_BREAK_VAL_CP( \
PL_WB_invlist, \
_Perl_WB_invmap, \
(cp))
#define getWB_VAL_UTF8(pos, strend) \
_generic_GET_BREAK_VAL_UTF8(getWB_VAL_CP, pos, strend)
STATIC
char
*
S_find_byclass(pTHX_ regexp * prog,
const
regnode *c,
char
*s,
const
char
*strend, regmatch_info *reginfo)
{
const
I32 doevery = (prog->intflags & PREGf_SKIP) == 0;
char
*pat_string;
char
*pat_end;
re_fold_t folder;
const
U8 *fold_array;
STRLEN ln;
STRLEN lnc;
U8 c1;
U8 c2;
char
*e = NULL;
char
* previous_occurrence_end = 0;
I32 tmp;
const
bool
utf8_target = reginfo->is_utf8_target;
UV utf8_fold_flags = 0;
const
bool
is_utf8_pat = reginfo->is_utf8_pat;
bool
to_complement = FALSE;
char_class_number_ classnum;
RXi_GET_DECL(prog,progi);
PERL_ARGS_ASSERT_FIND_BYCLASS;
switch
(with_tp_UTF8ness(OP(c), utf8_target, is_utf8_pat)) {
SV * anyofh_list;
case
ANYOFPOSIXL_t8_pb:
case
ANYOFPOSIXL_t8_p8:
case
ANYOFL_t8_pb:
case
ANYOFL_t8_p8:
CHECK_AND_WARN_PROBLEMATIC_LOCALE_;
CHECK_AND_WARN_NON_UTF8_CTYPE_LOCALE_IN_SETS(c);
case
ANYOFD_t8_pb:
case
ANYOFD_t8_p8:
case
ANYOF_t8_pb:
case
ANYOF_t8_p8:
REXEC_FBC_UTF8_CLASS_SCAN(
reginclass(prog, c, (U8*)s, (U8*) strend, 1
));
break
;
case
ANYOFPOSIXL_tb_pb:
case
ANYOFPOSIXL_tb_p8:
case
ANYOFL_tb_pb:
case
ANYOFL_tb_p8:
CHECK_AND_WARN_PROBLEMATIC_LOCALE_;
CHECK_AND_WARN_NON_UTF8_CTYPE_LOCALE_IN_SETS(c);
case
ANYOFD_tb_pb:
case
ANYOFD_tb_p8:
case
ANYOF_tb_pb:
case
ANYOF_tb_p8:
if
(! ANYOF_FLAGS(c) && ANYOF_MATCHES_NONE_OUTSIDE_BITMAP(c)) {
REXEC_FBC_NON_UTF8_CLASS_SCAN(ANYOF_BITMAP_TEST(c, *((U8*)s)));
}
else
{
REXEC_FBC_NON_UTF8_CLASS_SCAN(reginclass(prog,c, (U8*)s, (U8*)s+1,
0));
}
break
;
case
ANYOFM_tb_pb:
case
ANYOFM_tb_p8:
REXEC_FBC_NON_UTF8_FIND_NEXT_SCAN(
find_next_masked((U8 *) s, (U8 *) strend, (U8) ARG1u(c), FLAGS(c)));
break
;
case
ANYOFM_t8_pb:
case
ANYOFM_t8_p8:
REXEC_FBC_UTF8_FIND_NEXT_SCAN(
find_next_masked((U8 *) s, (U8 *) strend, (U8) ARG1u(c), FLAGS(c)));
break
;
case
NANYOFM_tb_pb:
case
NANYOFM_tb_p8:
REXEC_FBC_NON_UTF8_FIND_NEXT_SCAN(
find_span_end_mask((U8 *) s, (U8 *) strend, (U8) ARG1u(c), FLAGS(c)));
break
;
case
NANYOFM_t8_pb:
case
NANYOFM_t8_p8:
REXEC_FBC_UTF8_FIND_NEXT_SCAN(
(
char
*) find_span_end_mask((U8 *) s, (U8 *) strend,
(U8) ARG1u(c), FLAGS(c)));
break
;
case
ANYOFH_tb_pb:
case
ANYOFH_tb_p8:
case
ANYOFHb_tb_pb:
case
ANYOFHb_tb_p8:
case
ANYOFHbbm_tb_pb:
case
ANYOFHbbm_tb_p8:
case
ANYOFHr_tb_pb:
case
ANYOFHr_tb_p8:
case
ANYOFHs_tb_pb:
case
ANYOFHs_tb_p8:
case
EXACTFLU8_tb_pb:
case
EXACTFLU8_tb_p8:
case
EXACTFU_REQ8_tb_pb:
case
EXACTFU_REQ8_tb_p8:
break
;
case
ANYOFH_t8_pb:
case
ANYOFH_t8_p8:
anyofh_list = GET_ANYOFH_INVLIST(prog, c);
REXEC_FBC_UTF8_CLASS_SCAN(
( (U8) NATIVE_UTF8_TO_I8(*s) >= ANYOF_FLAGS(c)
&& _invlist_contains_cp(anyofh_list,
utf8_to_uvchr_buf((U8 *) s,
(U8 *) strend,
NULL))));
break
;
case
ANYOFHb_t8_pb:
case
ANYOFHb_t8_p8:
{
U8 first_byte = FLAGS(c);
anyofh_list = GET_ANYOFH_INVLIST(prog, c);
REXEC_FBC_FIND_NEXT_UTF8_BYTE_SCAN(first_byte,
_invlist_contains_cp(anyofh_list,
utf8_to_uvchr_buf((U8 *) s,
(U8 *) strend,
NULL)));
}
break
;
case
ANYOFHbbm_t8_pb:
case
ANYOFHbbm_t8_p8:
{
U8 first_byte = FLAGS(c);
REXEC_FBC_FIND_NEXT_UTF8_BYTE_SCAN(first_byte,
( s < strend
&& BITMAP_TEST(((
struct
regnode_bbm *) c)->bitmap,
(U8) s[1] & UTF_CONTINUATION_MASK)));
}
break
;
case
ANYOFHr_t8_pb:
case
ANYOFHr_t8_p8:
anyofh_list = GET_ANYOFH_INVLIST(prog, c);
REXEC_FBC_UTF8_CLASS_SCAN(
( inRANGE(NATIVE_UTF8_TO_I8(*s),
LOWEST_ANYOF_HRx_BYTE(ANYOF_FLAGS(c)),
HIGHEST_ANYOF_HRx_BYTE(ANYOF_FLAGS(c)))
&& _invlist_contains_cp(anyofh_list,
utf8_to_uvchr_buf((U8 *) s,
(U8 *) strend,
NULL))));
break
;
case
ANYOFHs_t8_pb:
case
ANYOFHs_t8_p8:
anyofh_list = GET_ANYOFH_INVLIST(prog, c);
REXEC_FBC_FIND_NEXT_UTF8_STRING_SCAN(
((
struct
regnode_anyofhs *) c)->string,
((
struct
regnode_anyofhs *) c)->string + FLAGS(c),
_invlist_contains_cp(anyofh_list,
utf8_to_uvchr_buf((U8 *) s,
(U8 *) strend,
NULL)));
break
;
case
ANYOFR_tb_pb:
case
ANYOFR_tb_p8:
REXEC_FBC_NON_UTF8_CLASS_SCAN(withinCOUNT((U8) *s,
ANYOFRbase(c), ANYOFRdelta(c)));
break
;
case
ANYOFR_t8_pb:
case
ANYOFR_t8_p8:
REXEC_FBC_UTF8_CLASS_SCAN(
( NATIVE_UTF8_TO_I8(*s) >= ANYOF_FLAGS(c)
&& withinCOUNT(utf8_to_uvchr_buf((U8 *) s,
(U8 *) strend,
NULL),
ANYOFRbase(c), ANYOFRdelta(c))));
break
;
case
ANYOFRb_tb_pb:
case
ANYOFRb_tb_p8:
REXEC_FBC_NON_UTF8_CLASS_SCAN(withinCOUNT((U8) *s,
ANYOFRbase(c), ANYOFRdelta(c)));
break
;
case
ANYOFRb_t8_pb:
case
ANYOFRb_t8_p8:
{
U8 first_byte = FLAGS(c);
REXEC_FBC_FIND_NEXT_UTF8_BYTE_SCAN(first_byte,
withinCOUNT(utf8_to_uvchr_buf((U8 *) s,
(U8 *) strend,
NULL),
ANYOFRbase(c), ANYOFRdelta(c)));
}
break
;
case
EXACTFAA_tb_pb:
fold_array = PL_fold_latin1;
folder = S_foldEQ_latin1_s2_folded;
goto
do_exactf_non_utf8;
case
EXACTF_tb_pb:
fold_array = PL_fold;
folder = Perl_foldEQ;
goto
do_exactf_non_utf8;
case
EXACTFL_tb_pb:
CHECK_AND_WARN_PROBLEMATIC_LOCALE_;
if
(IN_UTF8_CTYPE_LOCALE) {
utf8_fold_flags = FOLDEQ_LOCALE;
goto
do_exactf_utf8;
}
fold_array = PL_fold_locale;
folder = Perl_foldEQ_locale;
goto
do_exactf_non_utf8;
case
EXACTFU_tb_pb:
fold_array = PL_fold_latin1;
folder = S_foldEQ_latin1_s2_folded;
do_exactf_non_utf8:
pat_string = STRINGs(c);
ln = STR_LENs(c);
e = HOP3c(strend, -((SSize_t)ln), s);
if
(e < s)
break
;
c1 = *pat_string;
c2 = fold_array[c1];
if
(c1 == c2) {
while
(s <= e) {
s = (
char
*)
memchr
(s, c1, e + 1 - s);
if
(s == NULL) {
break
;
}
if
( (ln == 1 || folder(aTHX_ s + 1, pat_string + 1, ln - 1))
&& (reginfo->intuit || regtry(reginfo, &s)) )
{
goto
got_it;
}
s++;
}
}
else
{
U8 bits_differing = c1 ^ c2;
if
(LIKELY(PL_bitcount[bits_differing] == 1)) {
bits_differing = ~ bits_differing;
while
(s <= e) {
s = (
char
*) find_next_masked((U8 *) s, (U8 *) e + 1,
(c1 & bits_differing), bits_differing);
if
(s > e) {
break
;
}
if
( (ln == 1 || folder(aTHX_ s + 1, pat_string + 1, ln - 1))
&& (reginfo->intuit || regtry(reginfo, &s)) )
{
goto
got_it;
}
s++;
}
}
else
{
while
(s <= e) {
if
( (*(U8*)s == c1 || *(U8*)s == c2)
&& (ln == 1 || folder(aTHX_ s + 1, pat_string + 1, ln - 1))
&& (reginfo->intuit || regtry(reginfo, &s)) )
{
goto
got_it;
}
s++;
}
}
}
break
;
case
EXACTFAA_tb_p8:
case
EXACTFAA_t8_p8:
utf8_fold_flags = FOLDEQ_UTF8_NOMIX_ASCII
|FOLDEQ_S2_ALREADY_FOLDED
|FOLDEQ_S2_FOLDS_SANE;
goto
do_exactf_utf8;
case
EXACTFAA_NO_TRIE_tb_pb:
case
EXACTFAA_NO_TRIE_t8_pb:
case
EXACTFAA_t8_pb:
utf8_fold_flags = FOLDEQ_UTF8_NOMIX_ASCII;
goto
do_exactf_utf8;
case
EXACTFL_tb_p8:
case
EXACTFL_t8_pb:
case
EXACTFL_t8_p8:
CHECK_AND_WARN_PROBLEMATIC_LOCALE_;
utf8_fold_flags = FOLDEQ_LOCALE;
goto
do_exactf_utf8;
case
EXACTFLU8_t8_pb:
case
EXACTFLU8_t8_p8:
utf8_fold_flags = FOLDEQ_LOCALE | FOLDEQ_S2_ALREADY_FOLDED
| FOLDEQ_S2_FOLDS_SANE;
goto
do_exactf_utf8;
case
EXACTFU_REQ8_t8_p8:
utf8_fold_flags = FOLDEQ_S2_ALREADY_FOLDED;
goto
do_exactf_utf8;
case
EXACTFU_tb_p8:
case
EXACTFU_t8_pb:
case
EXACTFU_t8_p8:
utf8_fold_flags = FOLDEQ_S2_ALREADY_FOLDED;
goto
do_exactf_utf8;
case
EXACTF_t8_pb:
case
EXACTFUP_tb_pb:
case
EXACTFUP_t8_pb:
do_exactf_utf8:
{
unsigned expansion;
pat_string = STRINGs(c);
ln = STR_LENs(c);
pat_end = pat_string + ln;
lnc = is_utf8_pat
? utf8_length((U8 *) pat_string, (U8 *) pat_end)
: ln;
expansion = (utf8_target) ? UTF8_MAX_FOLD_CHAR_EXPAND : 2;
lnc = (lnc + expansion - 1) / expansion;
e = HOP3c(strend, -((SSize_t)lnc), s);
while
(s <= e) {
char
*my_strend= (
char
*)strend;
if
( foldEQ_utf8_flags(s, &my_strend, 0, utf8_target,
pat_string, NULL, ln, is_utf8_pat,
utf8_fold_flags)
&& (reginfo->intuit || regtry(reginfo, &s)) )
{
goto
got_it;
}
s += (utf8_target) ? UTF8_SAFE_SKIP(s, reginfo->strend) : 1;
}
}
break
;
case
BOUNDA_tb_pb:
case
BOUNDA_tb_p8:
case
BOUND_tb_pb:
case
BOUND_tb_p8:
assert
(FLAGS(c) == TRADITIONAL_BOUND);
FBC_BOUND_A_NON_UTF8(isWORDCHAR_A);
break
;
case
BOUNDA_t8_pb:
case
BOUNDA_t8_p8:
assert
(FLAGS(c) == TRADITIONAL_BOUND);
FBC_BOUND_A_UTF8(isWORDCHAR_A);
break
;
case
NBOUNDA_tb_pb:
case
NBOUNDA_tb_p8:
case
NBOUND_tb_pb:
case
NBOUND_tb_p8:
assert
(FLAGS(c) == TRADITIONAL_BOUND);
FBC_NBOUND_A_NON_UTF8(isWORDCHAR_A);
break
;
case
NBOUNDA_t8_pb:
case
NBOUNDA_t8_p8:
assert
(FLAGS(c) == TRADITIONAL_BOUND);
FBC_NBOUND_A_UTF8(isWORDCHAR_A);
break
;
case
NBOUNDU_tb_pb:
case
NBOUNDU_tb_p8:
if
((bound_type) FLAGS(c) == TRADITIONAL_BOUND) {
FBC_NBOUND_NON_UTF8(isWORDCHAR_L1);
break
;
}
to_complement = 1;
goto
do_boundu_non_utf8;
case
NBOUNDL_tb_pb:
case
NBOUNDL_tb_p8:
CHECK_AND_WARN_PROBLEMATIC_LOCALE_;
if
(FLAGS(c) == TRADITIONAL_BOUND) {
FBC_NBOUND_NON_UTF8(isWORDCHAR_LC);
break
;
}
CHECK_AND_WARN_NON_UTF8_CTYPE_LOCALE_IN_BOUND;
to_complement = 1;
goto
do_boundu_non_utf8;
case
BOUNDL_tb_pb:
case
BOUNDL_tb_p8:
CHECK_AND_WARN_PROBLEMATIC_LOCALE_;
if
(FLAGS(c) == TRADITIONAL_BOUND) {
FBC_BOUND_NON_UTF8(isWORDCHAR_LC);
break
;
}
CHECK_AND_WARN_NON_UTF8_CTYPE_LOCALE_IN_BOUND;
goto
do_boundu_non_utf8;
case
BOUNDU_tb_pb:
case
BOUNDU_tb_p8:
if
((bound_type) FLAGS(c) == TRADITIONAL_BOUND) {
FBC_BOUND_NON_UTF8(isWORDCHAR_L1);
break
;
}
do_boundu_non_utf8:
if
(s == reginfo->strbeg) {
if
(reginfo->intuit || regtry(reginfo, &s))
{
goto
got_it;
}
s++;
if
(UNLIKELY(s >= reginfo->strend)) {
break
;
}
}
switch
((bound_type) FLAGS(c)) {
case
TRADITIONAL_BOUND:
assert
(0);
break
;
case
GCB_BOUND:
while
(s < strend) {
if
((to_complement ^ ( UCHARAT(s - 1) !=
'\r'
|| UCHARAT(s) !=
'\n'
))
&& (reginfo->intuit || regtry(reginfo, &s)))
{
goto
got_it;
}
s++;
}
break
;
case
LB_BOUND:
{
LB_enum before = getLB_VAL_CP((U8) *(s -1));
while
(s < strend) {
LB_enum after = getLB_VAL_CP((U8) *s);
if
(to_complement ^ isLB(before,
after,
(U8*) reginfo->strbeg,
(U8*) s,
(U8*) reginfo->strend,
0
)
&& (reginfo->intuit || regtry(reginfo, &s)))
{
goto
got_it;
}
before = after;
s++;
}
}
break
;
case
SB_BOUND:
{
SB_enum before = getSB_VAL_CP((U8) *(s -1));
while
(s < strend) {
SB_enum after = getSB_VAL_CP((U8) *s);
if
((to_complement ^ isSB(before,
after,
(U8*) reginfo->strbeg,
(U8*) s,
(U8*) reginfo->strend,
0
))
&& (reginfo->intuit || regtry(reginfo, &s)))
{
goto
got_it;
}
before = after;
s++;
}
}
break
;
case
WB_BOUND:
{
WB_enum previous = WB_UNKNOWN;
WB_enum before = getWB_VAL_CP((U8) *(s -1));
while
(s < strend) {
WB_enum after = getWB_VAL_CP((U8) *s);
if
((to_complement ^ isWB(previous,
before,
after,
(U8*) reginfo->strbeg,
(U8*) s,
(U8*) reginfo->strend,
0
))
&& (reginfo->intuit || regtry(reginfo, &s)))
{
goto
got_it;
}
previous = before;
before = after;
s++;
}
}
}
if
( reginfo->intuit
|| (s <= reginfo->strend && regtry(reginfo, &s)))
{
goto
got_it;
}
break
;
case
BOUNDL_t8_pb:
case
BOUNDL_t8_p8:
CHECK_AND_WARN_PROBLEMATIC_LOCALE_;
if
(FLAGS(c) == TRADITIONAL_BOUND) {
FBC_BOUND_UTF8(isWORDCHAR_LC, isWORDCHAR_LC_uvchr,
isWORDCHAR_LC_utf8_safe);
break
;
}
CHECK_AND_WARN_NON_UTF8_CTYPE_LOCALE_IN_BOUND;
to_complement = 1;
goto
do_boundu_utf8;
case
NBOUNDL_t8_pb:
case
NBOUNDL_t8_p8:
CHECK_AND_WARN_PROBLEMATIC_LOCALE_;
if
(FLAGS(c) == TRADITIONAL_BOUND) {
FBC_NBOUND_UTF8(isWORDCHAR_LC, isWORDCHAR_LC_uvchr,
isWORDCHAR_LC_utf8_safe);
break
;
}
CHECK_AND_WARN_NON_UTF8_CTYPE_LOCALE_IN_BOUND;
to_complement = 1;
goto
do_boundu_utf8;
case
NBOUND_t8_pb:
case
NBOUND_t8_p8:
assert
(FLAGS(c) == TRADITIONAL_BOUND);
case
NBOUNDU_t8_pb:
case
NBOUNDU_t8_p8:
if
((bound_type) FLAGS(c) == TRADITIONAL_BOUND) {
FBC_NBOUND_UTF8(isWORDCHAR_L1, isWORDCHAR_uni,
isWORDCHAR_utf8_safe);
break
;
}
to_complement = 1;
goto
do_boundu_utf8;
case
BOUND_t8_pb:
case
BOUND_t8_p8:
assert
(FLAGS(c) == TRADITIONAL_BOUND);
case
BOUNDU_t8_pb:
case
BOUNDU_t8_p8:
if
((bound_type) FLAGS(c) == TRADITIONAL_BOUND) {
FBC_BOUND_UTF8(isWORDCHAR_L1, isWORDCHAR_uni, isWORDCHAR_utf8_safe);
break
;
}
do_boundu_utf8:
if
(s == reginfo->strbeg) {
if
(reginfo->intuit || regtry(reginfo, &s))
{
goto
got_it;
}
s += UTF8_SAFE_SKIP(s, reginfo->strend);
if
(UNLIKELY(s >= reginfo->strend)) {
break
;
}
}
switch
((bound_type) FLAGS(c)) {
case
TRADITIONAL_BOUND:
assert
(0);
break
;
case
GCB_BOUND:
{
GCB_enum before = getGCB_VAL_UTF8(
reghop3((U8*)s, -1,
(U8*)(reginfo->strbeg)),
(U8*) reginfo->strend);
while
(s < strend) {
GCB_enum after = getGCB_VAL_UTF8((U8*) s,
(U8*) reginfo->strend);
if
( (to_complement ^ isGCB(before,
after,
(U8*) reginfo->strbeg,
(U8*) s,
1
))
&& (reginfo->intuit || regtry(reginfo, &s)))
{
goto
got_it;
}
before = after;
s += UTF8_SAFE_SKIP(s, reginfo->strend);
}
}
break
;
case
LB_BOUND:
{
LB_enum before = getLB_VAL_UTF8(reghop3((U8*)s,
-1,
(U8*)(reginfo->strbeg)),
(U8*) reginfo->strend);
while
(s < strend) {
LB_enum after = getLB_VAL_UTF8((U8*) s,
(U8*) reginfo->strend);
if
(to_complement ^ isLB(before,
after,
(U8*) reginfo->strbeg,
(U8*) s,
(U8*) reginfo->strend,
1
)
&& (reginfo->intuit || regtry(reginfo, &s)))
{
goto
got_it;
}
before = after;
s += UTF8_SAFE_SKIP(s, reginfo->strend);
}
}
break
;
case
SB_BOUND:
{
SB_enum before = getSB_VAL_UTF8(reghop3((U8*)s,
-1,
(U8*)(reginfo->strbeg)),
(U8*) reginfo->strend);
while
(s < strend) {
SB_enum after = getSB_VAL_UTF8((U8*) s,
(U8*) reginfo->strend);
if
((to_complement ^ isSB(before,
after,
(U8*) reginfo->strbeg,
(U8*) s,
(U8*) reginfo->strend,
1
))
&& (reginfo->intuit || regtry(reginfo, &s)))
{
goto
got_it;
}
before = after;
s += UTF8_SAFE_SKIP(s, reginfo->strend);
}
}
break
;
case
WB_BOUND:
{
WB_enum previous = WB_UNKNOWN;
WB_enum before = getWB_VAL_UTF8(
reghop3((U8*)s,
-1,
(U8*)(reginfo->strbeg)),
(U8*) reginfo->strend);
while
(s < strend) {
WB_enum after = getWB_VAL_UTF8((U8*) s,
(U8*) reginfo->strend);
if
((to_complement ^ isWB(previous,
before,
after,
(U8*) reginfo->strbeg,
(U8*) s,
(U8*) reginfo->strend,
1
))
&& (reginfo->intuit || regtry(reginfo, &s)))
{
goto
got_it;
}
previous = before;
before = after;
s += UTF8_SAFE_SKIP(s, reginfo->strend);
}
}
}
if
( reginfo->intuit
|| (s <= reginfo->strend && regtry(reginfo, &s)))
{
goto
got_it;
}
break
;
case
LNBREAK_t8_pb:
case
LNBREAK_t8_p8:
REXEC_FBC_UTF8_CLASS_SCAN(is_LNBREAK_utf8_safe(s, strend));
break
;
case
LNBREAK_tb_pb:
case
LNBREAK_tb_p8:
REXEC_FBC_NON_UTF8_CLASS_SCAN(is_LNBREAK_latin1_safe(s, strend));
break
;
case
NPOSIXL_t8_pb:
case
NPOSIXL_t8_p8:
to_complement = 1;
case
POSIXL_t8_pb:
case
POSIXL_t8_p8:
CHECK_AND_WARN_PROBLEMATIC_LOCALE_;
REXEC_FBC_UTF8_CLASS_SCAN(
to_complement ^ cBOOL(isFOO_utf8_lc(FLAGS(c), (U8 *) s,
(U8 *) strend)));
break
;
case
NPOSIXL_tb_pb:
case
NPOSIXL_tb_p8:
to_complement = 1;
case
POSIXL_tb_pb:
case
POSIXL_tb_p8:
CHECK_AND_WARN_PROBLEMATIC_LOCALE_;
REXEC_FBC_NON_UTF8_CLASS_SCAN(
to_complement ^ cBOOL(isFOO_lc(FLAGS(c), *s)));
break
;
case
NPOSIXA_t8_pb:
case
NPOSIXA_t8_p8:
REXEC_FBC_UTF8_CLASS_SCAN( ! isASCII_utf8_safe(s, strend)
|| ! generic_isCC_A_(*s, FLAGS(c)));
break
;
case
POSIXA_t8_pb:
case
POSIXA_t8_p8:
REXEC_FBC_UTF8_CLASS_SCAN(generic_isCC_A_(*s, FLAGS(c)));
break
;
case
NPOSIXD_tb_pb:
case
NPOSIXD_tb_p8:
case
NPOSIXA_tb_pb:
case
NPOSIXA_tb_p8:
to_complement = 1;
case
POSIXD_tb_pb:
case
POSIXD_tb_p8:
case
POSIXA_tb_pb:
case
POSIXA_tb_p8:
REXEC_FBC_NON_UTF8_CLASS_SCAN(
to_complement ^ cBOOL(generic_isCC_A_(*s, FLAGS(c))));
break
;
case
NPOSIXU_tb_pb:
case
NPOSIXU_tb_p8:
to_complement = 1;
case
POSIXU_tb_pb:
case
POSIXU_tb_p8:
REXEC_FBC_NON_UTF8_CLASS_SCAN(
to_complement ^ cBOOL(generic_isCC_(*s,
FLAGS(c))));
break
;
case
NPOSIXD_t8_pb:
case
NPOSIXD_t8_p8:
case
NPOSIXU_t8_pb:
case
NPOSIXU_t8_p8:
to_complement = 1;
case
POSIXD_t8_pb:
case
POSIXD_t8_p8:
case
POSIXU_t8_pb:
case
POSIXU_t8_p8:
classnum = (char_class_number_) FLAGS(c);
switch
(classnum) {
default
:
REXEC_FBC_UTF8_CLASS_SCAN(
to_complement ^ cBOOL(_invlist_contains_cp(
PL_XPosix_ptrs[classnum],
utf8_to_uvchr_buf((U8 *) s,
(U8 *) strend,
NULL))));
break
;
case
CC_ENUM_SPACE_:
REXEC_FBC_UTF8_CLASS_SCAN(
to_complement ^ cBOOL(isSPACE_utf8_safe(s, strend)));
break
;
case
CC_ENUM_BLANK_:
REXEC_FBC_UTF8_CLASS_SCAN(
to_complement ^ cBOOL(isBLANK_utf8_safe(s, strend)));
break
;
case
CC_ENUM_XDIGIT_:
REXEC_FBC_UTF8_CLASS_SCAN(
to_complement ^ cBOOL(isXDIGIT_utf8_safe(s, strend)));
break
;
case
CC_ENUM_VERTSPACE_:
REXEC_FBC_UTF8_CLASS_SCAN(
to_complement ^ cBOOL(isVERTWS_utf8_safe(s, strend)));
break
;
case
CC_ENUM_CNTRL_:
REXEC_FBC_UTF8_CLASS_SCAN(
to_complement ^ cBOOL(isCNTRL_utf8_safe(s, strend)));
break
;
}
break
;
case
AHOCORASICKC_tb_pb:
case
AHOCORASICKC_tb_p8:
case
AHOCORASICKC_t8_pb:
case
AHOCORASICKC_t8_p8:
case
AHOCORASICK_tb_pb:
case
AHOCORASICK_tb_p8:
case
AHOCORASICK_t8_pb:
case
AHOCORASICK_t8_p8:
{
DECL_TRIE_TYPE(c);
reg_ac_data *aho = (reg_ac_data*)progi->data->data[ ARG1u( c ) ];
reg_trie_data *trie = (reg_trie_data*)progi->data->data[aho->trie];
HV *widecharmap = MUTABLE_HV(progi->data->data[ aho->trie + 1 ]);
const
char
*last_start = strend - trie->minlen;
#ifdef DEBUGGING
const
char
*real_start = s;
#endif
STRLEN maxlen = trie->maxlen;
SV *sv_points;
U8 **points;
U8 foldbuf[ UTF8_MAXBYTES_CASE + 1 ];
U8 *bitmap=NULL;
DECLARE_AND_GET_RE_DEBUG_FLAGS;
ENTER;
SAVETMPS;
sv_points=newSV(maxlen *
sizeof
(U8 *));
SvCUR_set(sv_points,
maxlen *
sizeof
(U8 *));
SvPOK_on(sv_points);
sv_2mortal(sv_points);
points=(U8**)SvPV_nolen(sv_points );
if
( trie_type != trie_utf8_fold
&& (trie->bitmap || OP(c)==AHOCORASICKC) )
{
if
(trie->bitmap)
bitmap=(U8*)trie->bitmap;
else
bitmap=(U8*)ANYOF_BITMAP(c);
}
while
(s <= last_start) {
const
U32 uniflags = UTF8_ALLOW_DEFAULT;
U8 *uc = (U8*)s;
U16 charid = 0;
U32 base = 1;
U32 state = 1;
UV uvc = 0;
STRLEN len = 0;
STRLEN foldlen = 0;
U8 *uscan = (U8*)NULL;
U8 *leftmost = NULL;
#ifdef DEBUGGING
U32 accepted_word= 0;
#endif
U32 pointpos = 0;
while
( state && uc <= (U8*)strend ) {
int
failed=0;
U32 word = aho->states[ state ].wordnum;
if
( state==1 ) {
if
( bitmap ) {
DEBUG_TRIE_EXECUTE_r(
if
( uc <= (U8*)last_start
&& !BITMAP_TEST(bitmap,*uc) )
{
dump_exec_pos( (
char
*)uc, c, strend,
real_start,
(
char
*)uc, utf8_target, 0 );
Perl_re_printf( aTHX_
" Scanning for legal start char...\n"
);
}
);
if
(utf8_target) {
while
( uc <= (U8*)last_start
&& !BITMAP_TEST(bitmap,*uc) )
{
uc += UTF8SKIP(uc);
}
}
else
{
while
( uc <= (U8*)last_start
&& ! BITMAP_TEST(bitmap,*uc) )
{
uc++;
}
}
s= (
char
*)uc;
}
if
(uc >(U8*)last_start)
break
;
}
if
( word ) {
U8 *lpos= points[ (pointpos - trie->wordinfo[word].len)
% maxlen ];
if
(!leftmost || lpos < leftmost) {
DEBUG_r(accepted_word=word);
leftmost= lpos;
}
if
(base==0)
break
;
}
points[pointpos++ % maxlen]= uc;
if
(foldlen || uc < (U8*)strend) {
REXEC_TRIE_READ_CHAR(trie_type, trie, widecharmap, uc,
(U8 *) strend, uscan, len, uvc,
charid, foldlen, foldbuf,
uniflags);
DEBUG_TRIE_EXECUTE_r({
dump_exec_pos( (
char
*)uc, c, strend,
real_start, s, utf8_target, 0);
Perl_re_printf( aTHX_
" Charid:%3u CP:%4"
UVxf
" "
,
charid, uvc);
});
}
else
{
len = 0;
charid = 0;
}
do
{
#ifdef DEBUGGING
word = aho->states[ state ].wordnum;
#endif
base = aho->states[ state ].trans.base;
DEBUG_TRIE_EXECUTE_r({
if
(failed)
dump_exec_pos((
char
*)uc, c, strend, real_start,
s, utf8_target, 0 );
Perl_re_printf( aTHX_
"%sState: %4"
UVxf
", word=%"
UVxf,
failed ?
" Fail transition to "
:
""
,
(UV)state, (UV)word);
});
if
( base ) {
U32 tmp;
I32 offset;
if
(charid &&
( ((offset = base + charid
- 1 - trie->uniquecharcount)) >= 0)
&& ((U32)offset < trie->lasttrans)
&& trie->trans[offset].check == state
&& (tmp=trie->trans[offset].next))
{
DEBUG_TRIE_EXECUTE_r(
Perl_re_printf( aTHX_
" - legal\n"
));
state = tmp;
break
;
}
else
{
DEBUG_TRIE_EXECUTE_r(
Perl_re_printf( aTHX_
" - fail\n"
));
failed = 1;
state = aho->fail[state];
}
}
else
{
DEBUG_TRIE_EXECUTE_r(
Perl_re_printf( aTHX_
" - accepting\n"
));
failed = 1;
break
;
}
}
while
(state);
uc += len;
if
(failed) {
if
(leftmost)
break
;
if
(!state) state = 1;
}
}
if
( aho->states[ state ].wordnum ) {
U8 *lpos = points[ (pointpos
- trie->wordinfo[aho->states[ state ]
.wordnum].len) % maxlen ];
if
(!leftmost || lpos < leftmost) {
DEBUG_r(accepted_word=aho->states[ state ].wordnum);
leftmost = lpos;
}
}
if
(leftmost) {
s = (
char
*)leftmost;
DEBUG_TRIE_EXECUTE_r({
Perl_re_printf( aTHX_
"Matches word #%"
UVxf
" at position %"
IVdf
". Trying full"
" pattern...\n"
,
(UV)accepted_word, (IV)(s - real_start)
);
});
if
(reginfo->intuit || regtry(reginfo, &s)) {
FREETMPS;
LEAVE;
goto
got_it;
}
if
(s < reginfo->strend) {
s = HOPc(s,1);
}
DEBUG_TRIE_EXECUTE_r({
Perl_re_printf( aTHX_
"Pattern failed. Looking for new start"
" point...\n"
);
});
}
else
{
DEBUG_TRIE_EXECUTE_r(
Perl_re_printf( aTHX_
"No match.\n"
));
break
;
}
}
FREETMPS;
LEAVE;
}
break
;
case
EXACTFU_REQ8_t8_pb:
case
EXACTFUP_tb_p8:
case
EXACTFUP_t8_p8:
case
EXACTF_tb_p8:
case
EXACTF_t8_p8:
case
EXACTFAA_NO_TRIE_tb_p8:
case
EXACTFAA_NO_TRIE_t8_p8:
assert
(0);
default
:
Perl_croak(aTHX_
"panic: unknown regstclass %d"
, (
int
)OP(c));
}
return
0;
got_it:
return
s;
}
static
void
S_reg_set_capture_string(pTHX_ REGEXP *
const
rx,
char
*strbeg,
char
*strend,
SV *sv,
U32 flags,
bool
utf8_target)
{
struct
regexp *
const
prog = ReANY(rx);
if
(flags & REXEC_COPY_STR) {
#ifdef PERL_ANY_COW
if
(SvCANCOW(sv)) {
DEBUG_C(Perl_re_printf( aTHX_
"Copy on write: regexp capture, type %d\n"
,
(
int
) SvTYPE(sv)));
if
(( RXp_SAVED_COPY(prog)
&& SvIsCOW(RXp_SAVED_COPY(prog))
&& SvPOKp(RXp_SAVED_COPY(prog))
&& SvIsCOW(sv)
&& SvPOKp(sv)
&& SvPVX(sv) == SvPVX(RXp_SAVED_COPY(prog))))
{
if
(RXp_MATCH_COPIED(prog)) {
Safefree(RXp_SUBBEG(prog));
RXp_MATCH_COPIED_off(prog);
}
}
else
{
RXp_MATCH_COPY_FREE(prog);
RXp_SAVED_COPY(prog) = sv_setsv_cow(RXp_SAVED_COPY(prog), sv);
}
RXp_SUBBEG(prog) = (
char
*)SvPVX_const(RXp_SAVED_COPY(prog));
assert
(SvPOKp(RXp_SAVED_COPY(prog)));
RXp_SUBLEN(prog) = strend - strbeg;
RXp_SUBOFFSET(prog) = 0;
RXp_SUBCOFFSET(prog) = 0;
}
else
#endif
{
SSize_t min = 0;
SSize_t max = strend - strbeg;
SSize_t sublen;
if
( (flags & REXEC_COPY_SKIP_POST)
&& !(prog->extflags & RXf_PMf_KEEPCOPY)
&& !(PL_sawampersand & SAWAMPERSAND_RIGHT)
) {
SSize_t offs_end;
U32 n = 0;
max = -1;
while
(n <= RXp_LASTPAREN(prog)) {
if
((offs_end = RXp_OFFS_END(prog,n)) > max)
max = offs_end;
n++;
}
if
(max == -1)
max = (PL_sawampersand & SAWAMPERSAND_LEFT)
? RXp_OFFS_START(prog,0)
: 0;
assert
(max >= 0 && max <= strend - strbeg);
}
if
( (flags & REXEC_COPY_SKIP_PRE)
&& !(prog->extflags & RXf_PMf_KEEPCOPY)
&& !(PL_sawampersand & SAWAMPERSAND_LEFT)
) {
U32 n = 0;
min = max;
while
(min && n <= RXp_LASTPAREN(prog)) {
SSize_t start = RXp_OFFS_START(prog,n);
if
( start != -1
&& start < min)
{
min = start;
}
n++;
}
if
(PL_sawampersand & SAWAMPERSAND_RIGHT) {
SSize_t end = RXp_OFFS_END(prog,0);
if
( min > end )
min = end;
}
}
assert
(min >= 0 && min <= max && min <= strend - strbeg);
sublen = max - min;
if
(RXp_MATCH_COPIED(prog)) {
if
(sublen > RXp_SUBLEN(prog))
RXp_SUBBEG(prog) =
(
char
*)saferealloc(RXp_SUBBEG(prog), sublen+1);
}
else
RXp_SUBBEG(prog) = (
char
*)safemalloc(sublen+1);
Copy(strbeg + min, RXp_SUBBEG(prog), sublen,
char
);
RXp_SUBBEG(prog)[sublen] =
'\0'
;
RXp_SUBOFFSET(prog) = min;
RXp_SUBLEN(prog) = sublen;
RXp_MATCH_COPIED_on(prog);
}
RXp_SUBCOFFSET(prog) = RXp_SUBOFFSET(prog);
if
(RXp_SUBOFFSET(prog) && utf8_target) {
if
(SvPOKp(sv) && SvPVX(sv) == strbeg)
RXp_SUBCOFFSET(prog) = sv_pos_b2u_flags(sv, RXp_SUBCOFFSET(prog),
SV_GMAGIC|SV_CONST_RETURN);
else
RXp_SUBCOFFSET(prog) = utf8_length((U8*)strbeg,
(U8*)(strbeg+RXp_SUBOFFSET(prog)));
}
}
else
{
RXp_MATCH_COPY_FREE(prog);
RXp_SUBBEG(prog) = strbeg;
RXp_SUBOFFSET(prog) = 0;
RXp_SUBCOFFSET(prog) = 0;
RXp_SUBLEN(prog) = strend - strbeg;
}
}
I32
Perl_regexec_flags(pTHX_ REGEXP *
const
rx,
char
*stringarg,
char
*strend,
char
*strbeg, SSize_t minend, SV *sv,
void
*data, U32 flags)
{
struct
regexp *
const
prog = ReANY(rx);
char
*s;
regnode *c;
char
*startpos;
SSize_t minlen;
SSize_t dontbother = 0;
const
bool
utf8_target = cBOOL(DO_UTF8(sv));
I32 multiline;
RXi_GET_DECL(prog,progi);
regmatch_info reginfo_buf;
regmatch_info *
const
reginfo = ®info_buf;
regexp_paren_pair *swap = NULL;
I32 oldsave;
DECLARE_AND_GET_RE_DEBUG_FLAGS;
PERL_ARGS_ASSERT_REGEXEC_FLAGS;
PERL_UNUSED_ARG(data);
if
(prog == NULL) {
Perl_croak(aTHX_
"NULL regexp parameter"
);
}
DEBUG_EXECUTE_r(
debug_start_match(rx, utf8_target, stringarg, strend,
"Matching"
);
);
startpos = stringarg;
reginfo->strbeg = strbeg;
reginfo->strend = strend;
reginfo->is_utf8_target = cBOOL(utf8_target);
if
(prog->intflags & PREGf_GPOS_SEEN) {
MAGIC *mg;
reginfo->ganch =
(flags & REXEC_IGNOREPOS)
? stringarg
: ((mg = mg_find_mglob(sv)) && mg->mg_len >= 0)
? strbeg + MgBYTEPOS(mg, sv, strbeg, strend-strbeg)
: strbeg;
DEBUG_GPOS_r(Perl_re_printf( aTHX_
"GPOS ganch set to strbeg[%"
IVdf
"]\n"
, (IV)(reginfo->ganch - strbeg)));
if
(prog->intflags & PREGf_ANCH_GPOS) {
if
(prog->gofs) {
startpos = HOPBACKc(reginfo->ganch, prog->gofs);
if
(!startpos ||
((flags & REXEC_FAIL_ON_UNDERFLOW) && startpos < stringarg))
{
DEBUG_GPOS_r(Perl_re_printf( aTHX_
"fail: ganch-gofs before earliest possible start\n"
));
return
0;
}
}
else
startpos = reginfo->ganch;
}
else
if
(prog->gofs) {
startpos = HOPBACKc(startpos, prog->gofs);
if
(!startpos)
startpos = strbeg;
}
else
if
(prog->intflags & PREGf_GPOS_FLOAT)
startpos = strbeg;
}
minlen = prog->minlen;
if
((startpos + minlen) > strend || startpos < strbeg) {
DEBUG_EXECUTE_r(Perl_re_printf( aTHX_
"Regex match can't succeed, so not even tried\n"
));
return
0;
}
oldsave = PL_savestack_ix;
s = startpos;
if
((prog->extflags & RXf_USE_INTUIT)
&& !(flags & REXEC_CHECKED))
{
s = re_intuit_start(rx, sv, strbeg, startpos, strend,
flags, NULL);
if
(!s)
return
0;
if
(prog->extflags & RXf_CHECK_ALL) {
assert
(!prog->nparens);
if
( (flags & REXEC_FAIL_ON_UNDERFLOW)
&& (s < stringarg))
{
assert
(prog->intflags & PREGf_GPOS_SEEN);
DEBUG_EXECUTE_r(Perl_re_printf( aTHX_
"matched, but failing for REXEC_FAIL_ON_UNDERFLOW\n"
));
goto
phooey;
}
RXp_LASTPAREN(prog) = RXp_LASTCLOSEPAREN(prog) = 0;
RXp_MATCH_UTF8_set(prog, utf8_target);
SSize_t match_start = s - strbeg;
SSize_t match_end = utf8_target
? (
char
*)utf8_hop_forward((U8*)s, prog->minlenret, (U8 *) strend) - strbeg
: s - strbeg + prog->minlenret;
CLOSE_ANY_CAPTURE(prog, 0, match_start, match_end);
if
( !(flags & REXEC_NOT_FIRST) )
S_reg_set_capture_string(aTHX_ rx,
strbeg, strend,
sv, flags, utf8_target);
return
1;
}
}
multiline = prog->extflags & RXf_PMf_MULTILINE;
if
(strend - s < (minlen+(prog->check_offset_min<0?prog->check_offset_min:0))) {
DEBUG_EXECUTE_r(Perl_re_printf( aTHX_
"String too short [regexec_flags]...\n"
));
goto
phooey;
}
if
(UCHARAT(progi->program) != REG_MAGIC) {
Perl_croak(aTHX_
"corrupted regexp program"
);
}
RXp_MATCH_TAINTED_off(prog);
RXp_MATCH_UTF8_set(prog, utf8_target);
reginfo->prog = rx;
reginfo->intuit = 0;
reginfo->is_utf8_pat = cBOOL(RX_UTF8(rx));
reginfo->warned = FALSE;
reginfo->sv = sv;
reginfo->poscache_maxiter = 0;
reginfo->till = stringarg + minend;
if
(prog->extflags & RXf_EVAL_SEEN && SvPADTMP(sv)) {
reginfo->sv = newSV_type(SVt_NULL);
SvSetSV_nosteal(reginfo->sv, sv);
SAVEFREESV(reginfo->sv);
}
{
regmatch_state *old_regmatch_state;
regmatch_slab *old_regmatch_slab;
int
i, max = (prog->extflags & RXf_EVAL_SEEN) ? 2 : 1;
if
(!PL_regmatch_slab) {
Newx(PL_regmatch_slab, 1, regmatch_slab);
PL_regmatch_slab->prev = NULL;
PL_regmatch_slab->next = NULL;
PL_regmatch_state = SLAB_FIRST(PL_regmatch_slab);
}
old_regmatch_state = PL_regmatch_state;
old_regmatch_slab = PL_regmatch_slab;
for
(i=0; i <= max; i++) {
if
(i == 1)
reginfo->info_aux = &(PL_regmatch_state->u.info_aux);
else
if
(i ==2)
reginfo->info_aux_eval =
reginfo->info_aux->info_aux_eval =
&(PL_regmatch_state->u.info_aux_eval);
if
(++PL_regmatch_state > SLAB_LAST(PL_regmatch_slab))
PL_regmatch_state = S_push_slab(aTHX);
}
reginfo->info_aux->old_regmatch_state = old_regmatch_state;
reginfo->info_aux->old_regmatch_slab = old_regmatch_slab;
reginfo->info_aux->poscache = NULL;
SAVEDESTRUCTOR_X(S_cleanup_regmatch_info_aux, reginfo->info_aux);
if
((prog->extflags & RXf_EVAL_SEEN))
S_setup_eval_state(aTHX_ reginfo);
else
reginfo->info_aux_eval = reginfo->info_aux->info_aux_eval = NULL;
}
if
(PL_curpm && (PM_GETRE(PL_curpm) == rx)) {
swap = RXp_OFFSp(prog);
SAVEFREEPV(swap);
Newxz(RXp_OFFSp(prog), (prog->nparens + 1), regexp_paren_pair);
DEBUG_BUFFERS_r(Perl_re_exec_indentf( aTHX_
"rex=0x%"
UVxf
" saving offs: orig=0x%"
UVxf
" new=0x%"
UVxf
"\n"
,
0,
PTR2UV(prog),
PTR2UV(swap),
PTR2UV(RXp_OFFSp(prog))
));
}
if
(prog->recurse_locinput)
Zero(prog->recurse_locinput,prog->nparens + 1,
char
*);
if
(prog->intflags & (PREGf_ANCH & ~PREGf_ANCH_GPOS)) {
char
*end;
if
(regtry(reginfo, &s))
goto
got_it;
if
(!(prog->intflags & PREGf_ANCH_MBOL))
goto
phooey;
if
(minlen)
dontbother = minlen - 1;
end = HOP3c(strend, -dontbother, strbeg) - 1;
while
(s <= end) {
if
(*s++ !=
'\n'
)
continue
;
if
(prog->check_substr || prog->check_utf8) {
s = re_intuit_start(rx, sv, strbeg, s, strend, flags, NULL);
if
(!s)
goto
phooey;
}
if
(regtry(reginfo, &s))
goto
got_it;
}
goto
phooey;
}
if
(prog->intflags & PREGf_ANCH_GPOS)
{
assert
(prog->intflags & PREGf_GPOS_SEEN);
assert
(startpos == HOPBACKc(reginfo->ganch, prog->gofs));
if
(s == startpos && regtry(reginfo, &s))
goto
got_it;
goto
phooey;
}
if
((prog->anchored_substr || prog->anchored_utf8) && prog->intflags & PREGf_SKIP) {
char
ch;
#ifdef DEBUGGING
int
did_match = 0;
#endif
if
(utf8_target) {
if
(! prog->anchored_utf8) {
to_utf8_substr(prog);
}
ch = SvPVX_const(prog->anchored_utf8)[0];
REXEC_FBC_UTF8_SCAN(
if
(*s == ch) {
DEBUG_EXECUTE_r( did_match = 1 );
if
(regtry(reginfo, &s))
goto
got_it;
s += UTF8_SAFE_SKIP(s, strend);
while
(s < strend && *s == ch)
s += UTF8SKIP(s);
}
);
}
else
{
if
(! prog->anchored_substr) {
if
(! to_byte_substr(prog)) {
NON_UTF8_TARGET_BUT_UTF8_REQUIRED(phooey);
}
}
ch = SvPVX_const(prog->anchored_substr)[0];
REXEC_FBC_NON_UTF8_SCAN(
if
(*s == ch) {
DEBUG_EXECUTE_r( did_match = 1 );
if
(regtry(reginfo, &s))
goto
got_it;
s++;
while
(s < strend && *s == ch)
s++;
}
);
}
DEBUG_EXECUTE_r(
if
(!did_match)
Perl_re_printf( aTHX_
"Did not find anchored character...\n"
)
);
}
else
if
(prog->anchored_substr != NULL
|| prog->anchored_utf8 != NULL
|| ((prog->float_substr != NULL || prog->float_utf8 != NULL)
&& prog->float_max_offset < strend - s)) {
SV *must;
SSize_t back_max;
SSize_t back_min;
char
*last;
char
*last1;
#ifdef DEBUGGING
int
did_match = 0;
#endif
if
(prog->anchored_substr || prog->anchored_utf8) {
if
(utf8_target) {
if
(! prog->anchored_utf8) {
to_utf8_substr(prog);
}
must = prog->anchored_utf8;
}
else
{
if
(! prog->anchored_substr) {
if
(! to_byte_substr(prog)) {
NON_UTF8_TARGET_BUT_UTF8_REQUIRED(phooey);
}
}
must = prog->anchored_substr;
}
back_max = back_min = prog->anchored_offset;
}
else
{
if
(utf8_target) {
if
(! prog->float_utf8) {
to_utf8_substr(prog);
}
must = prog->float_utf8;
}
else
{
if
(! prog->float_substr) {
if
(! to_byte_substr(prog)) {
NON_UTF8_TARGET_BUT_UTF8_REQUIRED(phooey);
}
}
must = prog->float_substr;
}
back_max = prog->float_max_offset;
back_min = prog->float_min_offset;
}
if
(back_min<0) {
last = strend;
}
else
{
last = HOP3c(strend,
-(SSize_t)(CHR_SVLEN(must)
- (SvTAIL(must) != 0) + back_min), strbeg);
}
if
(s > reginfo->strbeg)
last1 = HOPc(s, -1);
else
last1 = s - 1;
dontbother = 0;
strend = HOPc(strend, -dontbother);
while
( (s <= last) &&
(s = fbm_instr((unsigned
char
*)HOP4c(s, back_min, strbeg, strend),
(unsigned
char
*)strend, must,
multiline ? FBMrf_MULTILINE : 0)) ) {
DEBUG_EXECUTE_r( did_match = 1 );
if
(HOPc(s, -back_max) > last1) {
last1 = HOPc(s, -back_min);
s = HOPc(s, -back_max);
}
else
{
char
*
const
t = (last1 >= reginfo->strbeg)
? HOPc(last1, 1) : last1 + 1;
last1 = HOPc(s, -back_min);
s = t;
}
if
(utf8_target) {
while
(s <= last1) {
if
(regtry(reginfo, &s))
goto
got_it;
if
(s >= last1) {
s++;
break
;
}
s += UTF8SKIP(s);
}
}
else
{
while
(s <= last1) {
if
(regtry(reginfo, &s))
goto
got_it;
s++;
}
}
}
DEBUG_EXECUTE_r(
if
(!did_match) {
RE_PV_QUOTED_DECL(quoted, utf8_target, PERL_DEBUG_PAD_ZERO(0),
SvPVX_const(must), RE_SV_DUMPLEN(must), 30);
Perl_re_printf( aTHX_
"Did not find %s substr %s%s...\n"
,
((must == prog->anchored_substr || must == prog->anchored_utf8)
?
"anchored"
:
"floating"
),
quoted, RE_SV_TAIL(must));
});
goto
phooey;
}
else
if
( (c = progi->regstclass) ) {
if
(minlen) {
const
OPCODE op = OP(progi->regstclass);
if
(REGNODE_TYPE(op) != EXACT && REGNODE_TYPE(op) != TRIE)
strend = HOPc(strend, -(minlen - 1));
}
DEBUG_EXECUTE_r({
SV *
const
prop = sv_newmortal();
regprop(prog, prop, c, reginfo, NULL);
{
RE_PV_QUOTED_DECL(quoted,utf8_target,PERL_DEBUG_PAD_ZERO(1),
s,strend-s,PL_dump_re_max_len);
Perl_re_printf( aTHX_
"Matching stclass %.*s against %s (%d bytes)\n"
,
(
int
)SvCUR(prop), SvPVX_const(prop),
quoted, (
int
)(strend - s));
}
});
if
(find_byclass(prog, c, s, strend, reginfo))
goto
got_it;
DEBUG_EXECUTE_r(Perl_re_printf( aTHX_
"Contradicts stclass... [regexec_flags]\n"
));
}
else
{
dontbother = 0;
if
(prog->float_substr != NULL || prog->float_utf8 != NULL) {
char
*last= NULL;
SV* float_real;
STRLEN len;
const
char
*little;
if
(utf8_target) {
if
(! prog->float_utf8) {
to_utf8_substr(prog);
}
float_real = prog->float_utf8;
}
else
{
if
(! prog->float_substr) {
if
(! to_byte_substr(prog)) {
NON_UTF8_TARGET_BUT_UTF8_REQUIRED(phooey);
}
}
float_real = prog->float_substr;
}
little = SvPV_const(float_real, len);
if
(SvTAIL(float_real)) {
char
*checkpos= strend - len;
DEBUG_OPTIMISE_r(
Perl_re_printf( aTHX_
"%sChecking for float_real.%s\n"
,
PL_colors[4], PL_colors[5]));
if
(checkpos + 1 < strbeg) {
DEBUG_EXECUTE_r(
Perl_re_printf( aTHX_
"%sString shorter than required trailing substring, cannot match.%s\n"
,
PL_colors[4], PL_colors[5]));
goto
phooey;
}
else
if
(memEQ(checkpos + 1, little, len - 1)) {
last = checkpos + 1;
}
else
if
(checkpos < strbeg) {
DEBUG_EXECUTE_r(
Perl_re_printf( aTHX_
"%sString does not contain required trailing substring, cannot match.%s\n"
,
PL_colors[4], PL_colors[5]));
goto
phooey;
}
else
if
(!multiline) {
if
(memEQ(checkpos, little, len)) {
last= checkpos;
}
else
{
DEBUG_EXECUTE_r(
Perl_re_printf( aTHX_
"%sString does not contain required trailing substring, cannot match.%s\n"
,
PL_colors[4], PL_colors[5]));
goto
phooey;
}
}
else
{
goto
find_last;
}
}
else
{
find_last:
if
(len)
last = rninstr(s, strend, little, little + len);
else
last = strend;
}
if
(!last) {
DEBUG_EXECUTE_r(
Perl_re_printf( aTHX_
"%sString does not contain required substring, cannot match.%s\n"
,
PL_colors[4], PL_colors[5]
));
goto
phooey;
}
dontbother = strend - last + prog->float_min_offset;
}
if
(minlen && (dontbother < minlen))
dontbother = minlen - 1;
strend -= dontbother;
if
(utf8_target) {
for
(;;) {
if
(regtry(reginfo, &s))
goto
got_it;
if
(s >= strend)
break
;
s += UTF8SKIP(s);
};
}
else
{
do
{
if
(regtry(reginfo, &s))
goto
got_it;
}
while
(s++ < strend);
}
}
goto
phooey;
got_it:
if
( (flags & REXEC_FAIL_ON_UNDERFLOW)
&& (RXp_OFFS_START(prog,0) < stringarg - strbeg))
{
assert
(prog->intflags & PREGf_GPOS_SEEN);
DEBUG_EXECUTE_r(Perl_re_printf( aTHX_
"matched, but failing for REXEC_FAIL_ON_UNDERFLOW\n"
));
goto
phooey;
}
LEAVE_SCOPE(oldsave);
if
(RXp_PAREN_NAMES(prog))
(
void
)hv_iterinit(RXp_PAREN_NAMES(prog));
if
( !(flags & REXEC_NOT_FIRST) )
S_reg_set_capture_string(aTHX_ rx,
strbeg, reginfo->strend,
sv, flags, utf8_target);
return
1;
phooey:
DEBUG_EXECUTE_r(Perl_re_printf( aTHX_
"%sMatch failed%s\n"
,
PL_colors[4], PL_colors[5]));
if
(swap) {
DEBUG_BUFFERS_r(Perl_re_exec_indentf( aTHX_
"rex=0x%"
UVxf
" rolling back offs: 0x%"
UVxf
" will be freed; restoring data to =0x%"
UVxf
"\n"
,
0,
PTR2UV(prog),
PTR2UV(RXp_OFFSp(prog)),
PTR2UV(swap)
));
Copy(swap, RXp_OFFSp(prog), prog->nparens + 1, regexp_paren_pair);
}
LEAVE_SCOPE(oldsave);
return
0;
}
#define SET_reg_curpm(Re2) \
if
(reginfo->info_aux_eval) { \
(
void
)ReREFCNT_inc(Re2); \
ReREFCNT_dec(PM_GETRE(PL_reg_curpm)); \
PM_SETRE((PL_reg_curpm), (Re2)); \
}
STATIC
bool
S_regtry(pTHX_ regmatch_info *reginfo,
char
**startposp)
{
CHECKPOINT lastcp;
REGEXP *
const
rx = reginfo->prog;
regexp *
const
prog = ReANY(rx);
SSize_t result;
#ifdef DEBUGGING
U32 depth = 0;
#endif
RXi_GET_DECL(prog,progi);
DECLARE_AND_GET_RE_DEBUG_FLAGS;
PERL_ARGS_ASSERT_REGTRY;
reginfo->cutpoint=NULL;
RXp_OFFSp(prog)[0].start = *startposp - reginfo->strbeg;
RXp_LASTPAREN(prog) = 0;
RXp_LASTCLOSEPAREN(prog) = 0;
#if 1
if
(prog->nparens) {
regexp_paren_pair *pp = RXp_OFFSp(prog);
I32 i;
for
(i = prog->nparens; i > (I32)RXp_LASTPAREN(prog); i--) {
++pp;
pp->start = -1;
pp->end = -1;
}
}
#endif
REGCP_SET(lastcp);
result = regmatch(reginfo, *startposp, progi->program + 1);
if
(result != -1) {
RXp_OFFSp(prog)[0].end = result;
return
1;
}
if
(reginfo->cutpoint)
*startposp= reginfo->cutpoint;
REGCP_UNWIND(lastcp);
return
0;
}
#define REPORT_CODE_OFF 29
#define INDENT_CHARS(depth) ((int)(depth) % 20)
#ifdef PERL_RE_BUILD_DEBUG
int
Perl_re_exec_indentf(pTHX_
const
char
*fmt, U32 depth, ...)
{
va_list
ap;
int
result;
PerlIO *f= Perl_debug_log;
PERL_ARGS_ASSERT_RE_EXEC_INDENTF;
va_start
(ap, depth);
PerlIO_printf(f,
"%*s|%4"
UVuf
"| %*s"
, REPORT_CODE_OFF,
""
, (UV)depth, INDENT_CHARS(depth),
""
);
result = PerlIO_vprintf(f, fmt, ap);
va_end
(ap);
return
result;
}
#endif /* DEBUGGING */
STATIC regmatch_state *
S_push_slab(pTHX)
{
regmatch_slab *s = PL_regmatch_slab->next;
if
(!s) {
Newx(s, 1, regmatch_slab);
s->prev = PL_regmatch_slab;
s->next = NULL;
PL_regmatch_slab->next = s;
}
PL_regmatch_slab = s;
return
SLAB_FIRST(s);
}
#ifdef DEBUGGING
STATIC
void
S_debug_start_match(pTHX_
const
REGEXP *prog,
const
bool
utf8_target,
const
char
*start,
const
char
*end,
const
char
*blurb)
{
const
bool
utf8_pat = RX_UTF8(prog) ? 1 : 0;
PERL_ARGS_ASSERT_DEBUG_START_MATCH;
if
(!PL_colorset)
reginitcolors();
{
RE_PV_QUOTED_DECL(s0, utf8_pat, PERL_DEBUG_PAD_ZERO(0),
RX_PRECOMP_const(prog), RX_PRELEN(prog), PL_dump_re_max_len);
RE_PV_QUOTED_DECL(s1, utf8_target, PERL_DEBUG_PAD_ZERO(1),
start, end - start, PL_dump_re_max_len);
Perl_re_printf( aTHX_
"%s%s REx%s %s against %s\n"
,
PL_colors[4], blurb, PL_colors[5], s0, s1);
if
(utf8_target||utf8_pat)
Perl_re_printf( aTHX_
"UTF-8 %s%s%s...\n"
,
utf8_pat ?
"pattern"
:
""
,
utf8_pat && utf8_target ?
" and "
:
""
,
utf8_target ?
"string"
:
""
);
}
}
STATIC
void
S_dump_exec_pos(pTHX_
const
char
*locinput,
const
regnode *scan,
const
char
*loc_regeol,
const
char
*loc_bostr,
const
char
*loc_reg_starttry,
const
bool
utf8_target,
const
U32 depth
)
{
const
int
docolor = *PL_colors[0] || *PL_colors[2] || *PL_colors[4];
const
int
taill = (docolor ? 10 : 7);
int
l = (loc_regeol - locinput) > taill ? taill : (loc_regeol - locinput);
int
pref_len = (locinput - loc_bostr) > (5 + taill) - l
? (5 + taill) - l : locinput - loc_bostr;
int
pref0_len;
PERL_ARGS_ASSERT_DUMP_EXEC_POS;
if
(utf8_target) {
while
(UTF8_IS_CONTINUATION(*(U8*)(locinput - pref_len))) {
pref_len++;
}
}
pref0_len = pref_len - (locinput - loc_reg_starttry);
if
(l + pref_len < (5 + taill) && l < loc_regeol - locinput)
l = ( loc_regeol - locinput > (5 + taill) - pref_len
? (5 + taill) - pref_len : loc_regeol - locinput);
if
(utf8_target) {
while
(UTF8_IS_CONTINUATION(*(U8*)(locinput + l))) {
l--;
}
}
if
(pref0_len < 0)
pref0_len = 0;
if
(pref0_len > pref_len)
pref0_len = pref_len;
{
const
int
is_uni = utf8_target ? 1 : 0;
RE_PV_COLOR_DECL(s0,len0,is_uni,PERL_DEBUG_PAD(0),
(locinput - pref_len),pref0_len, PL_dump_re_max_len, 4, 5);
RE_PV_COLOR_DECL(s1,len1,is_uni,PERL_DEBUG_PAD(1),
(locinput - pref_len + pref0_len),
pref_len - pref0_len, PL_dump_re_max_len, 2, 3);
RE_PV_COLOR_DECL(s2,len2,is_uni,PERL_DEBUG_PAD(2),
locinput, loc_regeol - locinput, 10, 0, 1);
const
STRLEN tlen=len0+len1+len2;
Perl_re_printf( aTHX_
"%4"
IVdf
" <%.*s%.*s%s%.*s>%*s|%4"
UVuf
"| "
,
(IV)(locinput - loc_bostr),
len0, s0,
len1, s1,
(docolor ?
""
:
"> <"
),
len2, s2,
(
int
)(tlen > 19 ? 0 : 19 - tlen),
""
,
(UV)depth);
}
}
#endif
STATIC I32
S_reg_check_named_buff_matched(
const
regexp *rex,
const
regnode *scan)
{
I32 n;
RXi_GET_DECL(rex,rexi);
SV *sv_dat= MUTABLE_SV(rexi->data->data[ ARG1u( scan ) ]);
I32 *nums=(I32*)SvPVX(sv_dat);
PERL_ARGS_ASSERT_REG_CHECK_NAMED_BUFF_MATCHED;
for
( n=0; n<SvIVX(sv_dat); n++ ) {
if
((I32)RXp_LASTPAREN(rex) >= nums[n] &&
RXp_OFFS_END(rex,nums[n]) != -1)
{
return
nums[n];
}
}
return
0;
}
static
bool
S_setup_EXACTISH_ST(pTHX_
const
regnode *
const
text_node,
struct
next_matchable_info * m,
regmatch_info *reginfo)
{
const
bool
utf8_target = reginfo->is_utf8_target;
bool
utf8_pat = reginfo->is_utf8_pat;
PERL_UINT_FAST8_T i;
U8 matches[MAX_MATCHES][UTF8_MAXBYTES_CASE + 1] = { { 0 } };
U8 lengths[MAX_MATCHES] = { 0 };
U8 index_of_longest = 0;
U8 *pat = (U8*)STRING(text_node);
Size_t pat_len = STR_LEN(text_node);
U8 op = OP(text_node);
U8 byte_mask[5] = {0};
U8 byte_anded[5] = {0};
UV multi_fold_from = 0;
U8 mod_pat[UTF8_MAXBYTES_CASE + 1] = {
'\0'
};
m->max_length = 0;
m->min_length = 255;
m->count = 0;
if
(! utf8_target && isEXACT_REQ8(op)) {
return
FALSE;
}
#define TURKISH PSEUDO
if
( (op == EXACTF && utf8_target)
|| (op == EXACTFL && IN_UTF8_CTYPE_LOCALE))
{
if
(op == EXACTFL && IN_UTF8_TURKIC_LOCALE) {
op = TURKISH;
}
else
{
op = EXACTFU;
}
if
(utf8_pat) {
if
(is_PROBLEMATIC_LOCALE_FOLD_utf8(pat)) {
multi_fold_from
= what_MULTI_CHAR_FOLD_utf8_safe(pat, pat + pat_len);
if
(multi_fold_from) {
_to_uni_fold_flags(multi_fold_from, mod_pat, &pat_len,
FOLD_FLAGS_FULL);
pat = mod_pat;
}
else
if
( UNLIKELY(op == TURKISH)
&& pat_len >= 3
&& isALPHA_FOLD_EQ(pat[0],
'f'
)
&& ( memBEGINs(pat + 1, pat_len - 1,
LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE_UTF8)
|| ( pat_len >= 4
&& isALPHA_FOLD_EQ(pat[1],
'f'
)
&& memBEGINs(pat + 2, pat_len - 2,
LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE_UTF8)
))) {
if
(pat[1] ==
'f'
) {
pat_len = 3;
Copy(
"ffi"
, mod_pat, pat_len, U8);
}
else
{
pat_len = 2;
Copy(
"fi"
, mod_pat, pat_len, U8);
}
pat = mod_pat;
}
else
if
( UTF8_IS_DOWNGRADEABLE_START(*pat)
&& LIKELY(memNEs(pat, pat_len, MICRO_SIGN_UTF8))
&& LIKELY(memNEs(pat, pat_len,
LATIN_SMALL_LETTER_SHARP_S_UTF8))
&& (LIKELY(op != TURKISH || *pat !=
'I'
)))
{
mod_pat[0] = toLOWER_L1(EIGHT_BIT_UTF8_TO_NATIVE(pat[0],
pat[1]));
pat_len = 1;
pat = mod_pat;
utf8_pat = FALSE;
}
else
{
_to_utf8_fold_flags(pat, pat + pat_len,
mod_pat, &pat_len,
FOLD_FLAGS_FULL|FOLD_FLAGS_LOCALE);
pat = mod_pat;
}
}
}
else
if
((multi_fold_from
= what_MULTI_CHAR_FOLD_latin1_safe(pat, pat + pat_len)))
{
_to_uni_fold_flags(multi_fold_from, mod_pat, &pat_len,
FOLD_FLAGS_FULL);
pat = mod_pat;
}
else
if
(UNLIKELY(*pat == LATIN_SMALL_LETTER_SHARP_S)) {
mod_pat[0] = mod_pat[1] =
's'
;
pat_len = 2;
utf8_pat = utf8_target;
pat = mod_pat;
}
else
if
(LIKELY(op != TURKISH || *pat !=
'I'
)) {
mod_pat[0] = toLOWER_L1(*pat);
pat_len = 1;
pat = mod_pat;
}
}
else
if
( utf8_target
&& ! utf8_pat
&& op == EXACTFAA_NO_TRIE
&& *pat == LATIN_SMALL_LETTER_SHARP_S)
{
pat_len = 2 * (
sizeof
(LATIN_SMALL_LETTER_LONG_S_UTF8) - 1);
Copy(LATIN_SMALL_LETTER_LONG_S_UTF8
LATIN_SMALL_LETTER_LONG_S_UTF8, mod_pat, pat_len, U8);
pat = mod_pat;
utf8_pat = TRUE;
}
if
(UTF8_IS_INVARIANT(*pat)) {
matches[0][0] = pat[0];
lengths[0] = 1;
m->count++;
}
else
if
(utf8_target) {
if
(utf8_pat) {
lengths[0] = UTF8SKIP(pat);
Copy(pat, matches[0], lengths[0], U8);
m->count++;
}
else
{
matches[0][0] = UTF8_EIGHT_BIT_HI(pat[0]);
matches[0][1] = UTF8_EIGHT_BIT_LO(pat[0]);
lengths[0] = 2;
m->count++;
}
}
else
if
(! utf8_pat) {
matches[0][0] = pat[0];
lengths[0] = 1;
m->count++;
}
else
if
(UTF8_IS_DOWNGRADEABLE_START(*pat))
{
matches[0][0] = EIGHT_BIT_UTF8_TO_NATIVE(pat[0], pat[1]);
lengths[0] = 1;
m->count++;
}
if
(m->count) {
m->max_length = lengths[0];
m->min_length = lengths[0];
}
if
(UNLIKELY(op == TURKISH) || isEXACTFish(op)) {
UV folded;
U32 first_fold_from;
const
U32 * remaining_fold_froms;
Size_t folds_to_count;
if
(utf8_pat) {
folded = valid_utf8_to_uvchr(pat, NULL);
multi_fold_from
= what_MULTI_CHAR_FOLD_utf8_safe(pat, pat + pat_len);
}
else
{
folded = *pat;
multi_fold_from
= what_MULTI_CHAR_FOLD_latin1_safe(pat, pat + pat_len);
}
folds_to_count = 1;
if
(UNLIKELY(op == EXACTFL) && folded < 256) {
first_fold_from = PL_fold_locale[folded];
}
else
if
( op == EXACTFL && utf8_target && utf8_pat
&& memBEGINs(pat, pat_len, LATIN_SMALL_LETTER_LONG_S_UTF8
LATIN_SMALL_LETTER_LONG_S_UTF8))
{
first_fold_from = LATIN_CAPITAL_LETTER_SHARP_S;
}
else
if
(UNLIKELY( op == TURKISH
&& ( isALPHA_FOLD_EQ(folded,
'i'
)
|| inRANGE(folded,
LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE,
LATIN_SMALL_LETTER_DOTLESS_I))))
{
if
(folded ==
'i'
)
first_fold_from = LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE;
else
if
(folded ==
'I'
)
first_fold_from = LATIN_SMALL_LETTER_DOTLESS_I;
else
if
(folded == LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE)
first_fold_from =
'i'
;
else
first_fold_from =
'I'
;
}
else
{
redo_multi:
folds_to_count = _inverse_folds(folded, &first_fold_from,
&remaining_fold_froms);
}
for
(i = 0; i < folds_to_count
+ UNLIKELY(multi_fold_from != 0); i++)
{
UV fold_from = 0;
if
(i >= folds_to_count) {
fold_from = multi_fold_from;
}
else
if
(i == 0) {
fold_from = first_fold_from;
}
else
if
(i < folds_to_count) {
fold_from = remaining_fold_froms[i-1];
}
if
(folded == fold_from) {
continue
;
}
if
(op == EXACTF && (! isASCII(folded) || ! isASCII(fold_from))) {
continue
;
}
if
( isASCII(folded) != isASCII(fold_from)
&& inRANGE(op, EXACTFAA, EXACTFAA_NO_TRIE))
{
continue
;
}
if
( op == EXACTFL
&& (folded < 256) != (fold_from < 256))
{
continue
;
}
assert
(m->count < MAX_MATCHES);
if
(utf8_target) {
uvchr_to_utf8(matches[(U8) m->count], fold_from);
lengths[m->count] = UVCHR_SKIP(fold_from);
m->count++;
}
else
{
if
(fold_from > 255) {
continue
;
}
matches[m->count][0] = fold_from;
lengths[m->count] = 1;
m->count++;
}
if
(m->min_length > lengths[m->count-1]) {
m->min_length = lengths[m->count-1];
}
if
(m->max_length < lengths[m->count-1]) {
index_of_longest = m->count - 1;
m->max_length = lengths[index_of_longest];
}
}
if
(multi_fold_from) {
folded = multi_fold_from;
multi_fold_from = 0;
goto
redo_multi;
}
}
if
(m->count == 0) {
m->min_length = 0;
return
FALSE;
}
m->initial_exact = 0;
m->initial_definitive = 0;
{
unsigned
int
mask_ones = 0;
unsigned
int
possible_ones = 0;
U8 j;
for
(j = 0; j < MIN(m->min_length, 5); j++) {
byte_mask[j] = 0xFF;
byte_anded[j] = matches[0][j];
for
(i = 1; i < (PERL_UINT_FAST8_T) m->count; i++) {
byte_mask[j] &= ~ (byte_anded[j] ^ matches[i][j]);
byte_anded[j] &= matches[i][j];
}
if
(m->min_length == m->max_length) {
mask_ones += PL_bitcount[byte_mask[j]];
possible_ones += 8;
if
(mask_ones + 1 >= possible_ones) {
m->initial_definitive++;
if
(mask_ones >= possible_ones) {
m->initial_exact++;
}
}
}
}
}
m->first_byte_mask = byte_mask[0];
m->first_byte_anded = byte_anded[0];
m->mask32 = m->anded32 = 0;
for
(i = 1; i < MIN(m->min_length, 5); i++) {
U8 which = i;
U8 shift = (which - 1) * 8;
m->mask32 |= (U32) byte_mask[i] << shift;
m->anded32 |= (U32) byte_anded[i] << shift;
}
{
U8 cur_pos = 0;
U8 output_index = 0;
if
(m->count > 1) {
for
(i = 0; i < (PERL_UINT_FAST8_T) m->count; i++) {
if
(i != index_of_longest) {
assert
(cur_pos + lengths[i] <= C_ARRAY_LENGTH(m->matches));
Copy(matches[i], m->matches + cur_pos, lengths[i], U8);
cur_pos += lengths[i];
m->lengths[output_index++] = lengths[i];
}
}
}
assert
(cur_pos + lengths[index_of_longest] <= C_ARRAY_LENGTH(m->matches));
Copy(matches[index_of_longest], m->matches + cur_pos,
lengths[index_of_longest], U8);
m->lengths[output_index] = lengths[index_of_longest];
}
return
TRUE;
}
PERL_STATIC_FORCE_INLINE
bool
S_test_EXACTISH_ST(
const
char
* loc,
struct
next_matchable_info info)
{
U32 input32 = 0;
if
(((U8) loc[0] & info.first_byte_mask) != info.first_byte_anded)
return
FALSE;
switch
(info.min_length) {
default
:
input32 |= (U32) ((U8) loc[4]) << 3 * 8;
case
4:
input32 |= (U8) loc[3] << 2 * 8;
case
3:
input32 |= (U8) loc[2] << 1 * 8;
case
2:
input32 |= (U8) loc[1];
break
;
case
1:
return
TRUE;
case
0:
ASSUME(0);
}
return
(input32 & info.mask32) == info.anded32;
}
STATIC
bool
S_isGCB(pTHX_
const
GCB_enum before,
const
GCB_enum after,
const
U8 *
const
strbeg,
const
U8 *
const
curpos,
const
bool
utf8_target)
{
PERL_ARGS_ASSERT_ISGCB;
switch
(GCB_table[before][after]) {
case
GCB_BREAKABLE:
return
TRUE;
case
GCB_NOBREAK:
return
FALSE;
case
GCB_RI_then_RI:
{
int
RI_count = 1;
U8 * temp_pos = (U8 *) curpos;
while
(backup_one_GCB(strbeg,
&temp_pos,
utf8_target) == GCB_Regional_Indicator)
{
RI_count++;
}
return
RI_count % 2 != 1;
}
case
GCB_EX_then_EM:
{
U8 * temp_pos = (U8 *) curpos;
GCB_enum prev;
do
{
prev = backup_one_GCB(strbeg, &temp_pos, utf8_target);
}
while
(prev == GCB_Extend);
return
prev != GCB_E_Base && prev != GCB_E_Base_GAZ;
}
case
GCB_Maybe_Emoji_NonBreak:
{
U8 * temp_pos = (U8 *) curpos;
GCB_enum prev;
do
{
prev = backup_one_GCB(strbeg, &temp_pos, utf8_target);
}
while
(prev == GCB_Extend);
return
prev != GCB_ExtPict_XX;
}
default
:
break
;
}
#ifdef DEBUGGING
Perl_re_printf( aTHX_
"Unhandled GCB pair: GCB_table[%d, %d] = %d\n"
,
before, after, GCB_table[before][after]);
assert
(0);
#endif
return
TRUE;
}
STATIC GCB_enum
S_backup_one_GCB(pTHX_
const
U8 *
const
strbeg, U8 ** curpos,
const
bool
utf8_target)
{
GCB_enum gcb;
PERL_ARGS_ASSERT_BACKUP_ONE_GCB;
if
(*curpos < strbeg) {
return
GCB_EDGE;
}
if
(utf8_target) {
U8 * prev_char_pos = reghopmaybe3(*curpos, -1, strbeg);
U8 * prev_prev_char_pos;
if
(! prev_char_pos) {
return
GCB_EDGE;
}
if
((prev_prev_char_pos = reghopmaybe3((U8 *) prev_char_pos, -1, strbeg))) {
gcb = getGCB_VAL_UTF8(prev_prev_char_pos, prev_char_pos);
*curpos = prev_char_pos;
prev_char_pos = prev_prev_char_pos;
}
else
{
*curpos = (U8 *) strbeg;
return
GCB_EDGE;
}
}
else
{
if
(*curpos - 2 < strbeg) {
*curpos = (U8 *) strbeg;
return
GCB_EDGE;
}
(*curpos)--;
gcb = getGCB_VAL_CP(*(*curpos - 1));
}
return
gcb;
}
#define LB_CM_ATTACHES_TO(prev) ( ! ( prev == LB_EDGE \
|| prev == LB_Mandatory_Break \
|| prev == LB_Carriage_Return \
|| prev == LB_Line_Feed \
|| prev == LB_Next_Line \
|| prev == LB_Space \
|| prev == LB_ZWSpace))
STATIC
bool
S_isLB(pTHX_ LB_enum before,
LB_enum after,
const
U8 *
const
strbeg,
const
U8 *
const
curpos,
const
U8 *
const
strend,
const
bool
utf8_target)
{
U8 * temp_pos = (U8 *) curpos;
LB_enum prev = before;
PERL_ARGS_ASSERT_ISLB;
redo:
before = prev;
switch
(LB_table[before][after]) {
case
LB_BREAKABLE:
return
TRUE;
case
LB_NOBREAK:
case
LB_NOBREAK_EVEN_WITH_SP_BETWEEN:
return
FALSE;
case
LB_SP_foo + LB_BREAKABLE:
case
LB_SP_foo + LB_NOBREAK:
case
LB_SP_foo + LB_NOBREAK_EVEN_WITH_SP_BETWEEN:
assert
(after != LB_Space);
do
{
prev = backup_one_LB(strbeg, &temp_pos, utf8_target);
}
while
(prev == LB_Space);
if
(prev == LB_ZWSpace) {
return
TRUE;
}
if
(LB_table[LB_Space][after] - LB_SP_foo
== LB_NOBREAK_EVEN_WITH_SP_BETWEEN)
{
return
FALSE;
}
if
(prev == LB_Combining_Mark) {
do
{
prev = backup_one_LB(strbeg, &temp_pos, utf8_target);
}
while
(prev == LB_Combining_Mark);
if
(! LB_CM_ATTACHES_TO(prev)) {
prev = LB_Alphabetic;
}
}
return
LB_table[prev][after] != LB_NOBREAK_EVEN_WITH_SP_BETWEEN;
case
LB_CM_ZWJ_foo:
do
{
prev = backup_one_LB(strbeg, &temp_pos, utf8_target);
}
while
(prev == LB_Combining_Mark || prev == LB_ZWJ);
if
(! LB_CM_ATTACHES_TO(prev)) {
prev = LB_Alphabetic;
}
goto
redo;
case
LB_HY_or_BA_then_foo + LB_BREAKABLE:
case
LB_HY_or_BA_then_foo + LB_NOBREAK:
if
(backup_one_LB(strbeg, &temp_pos, utf8_target)
== LB_Hebrew_Letter)
{
return
FALSE;
}
return
LB_table[prev][after] - LB_HY_or_BA_then_foo == LB_BREAKABLE;
case
LB_PR_or_PO_then_OP_or_HY + LB_BREAKABLE:
case
LB_PR_or_PO_then_OP_or_HY + LB_NOBREAK:
if
(advance_one_LB(&temp_pos, strend, utf8_target) == LB_Numeric) {
return
FALSE;
}
return
LB_table[prev][after] - LB_PR_or_PO_then_OP_or_HY
== LB_BREAKABLE;
case
LB_SY_or_IS_then_various + LB_BREAKABLE:
case
LB_SY_or_IS_then_various + LB_NOBREAK:
{
LB_enum temp = prev;
do
{
temp = backup_one_LB(strbeg, &temp_pos, utf8_target);
}
while
(temp == LB_Break_Symbols || temp == LB_Infix_Numeric);
if
(temp == LB_Numeric) {
return
FALSE;
}
return
LB_table[prev][after] - LB_SY_or_IS_then_various
== LB_BREAKABLE;
}
case
LB_various_then_PO_or_PR + LB_BREAKABLE:
case
LB_various_then_PO_or_PR + LB_NOBREAK:
{
LB_enum temp = prev;
if
(temp == LB_Close_Punctuation || temp == LB_Close_Parenthesis)
{
temp = backup_one_LB(strbeg, &temp_pos, utf8_target);
}
while
(temp == LB_Break_Symbols || temp == LB_Infix_Numeric) {
temp = backup_one_LB(strbeg, &temp_pos, utf8_target);
}
if
(temp == LB_Numeric) {
return
FALSE;
}
return
LB_various_then_PO_or_PR;
}
case
LB_RI_then_RI + LB_NOBREAK:
case
LB_RI_then_RI + LB_BREAKABLE:
{
int
RI_count = 1;
while
(backup_one_LB(strbeg,
&temp_pos,
utf8_target) == LB_Regional_Indicator)
{
RI_count++;
}
return
RI_count % 2 == 0;
}
default
:
break
;
}
#ifdef DEBUGGING
Perl_re_printf( aTHX_
"Unhandled LB pair: LB_table[%d, %d] = %d\n"
,
before, after, LB_table[before][after]);
assert
(0);
#endif
return
TRUE;
}
STATIC LB_enum
S_advance_one_LB(pTHX_ U8 ** curpos,
const
U8 *
const
strend,
const
bool
utf8_target)
{
LB_enum lb;
PERL_ARGS_ASSERT_ADVANCE_ONE_LB;
if
(*curpos >= strend) {
return
LB_EDGE;
}
if
(utf8_target) {
*curpos += UTF8SKIP(*curpos);
if
(*curpos >= strend) {
return
LB_EDGE;
}
lb = getLB_VAL_UTF8(*curpos, strend);
}
else
{
(*curpos)++;
if
(*curpos >= strend) {
return
LB_EDGE;
}
lb = getLB_VAL_CP(**curpos);
}
return
lb;
}
STATIC LB_enum
S_backup_one_LB(pTHX_
const
U8 *
const
strbeg, U8 ** curpos,
const
bool
utf8_target)
{
LB_enum lb;
PERL_ARGS_ASSERT_BACKUP_ONE_LB;
if
(*curpos < strbeg) {
return
LB_EDGE;
}
if
(utf8_target) {
U8 * prev_char_pos = reghopmaybe3(*curpos, -1, strbeg);
U8 * prev_prev_char_pos;
if
(! prev_char_pos) {
return
LB_EDGE;
}
if
((prev_prev_char_pos = reghopmaybe3((U8 *) prev_char_pos, -1, strbeg))) {
lb = getLB_VAL_UTF8(prev_prev_char_pos, prev_char_pos);
*curpos = prev_char_pos;
prev_char_pos = prev_prev_char_pos;
}
else
{
*curpos = (U8 *) strbeg;
return
LB_EDGE;
}
}
else
{
if
(*curpos - 2 < strbeg) {
*curpos = (U8 *) strbeg;
return
LB_EDGE;
}
(*curpos)--;
lb = getLB_VAL_CP(*(*curpos - 1));
}
return
lb;
}
STATIC
bool
S_isSB(pTHX_ SB_enum before,
SB_enum after,
const
U8 *
const
strbeg,
const
U8 *
const
curpos,
const
U8 *
const
strend,
const
bool
utf8_target)
{
U8 * lpos = (U8 *) curpos;
bool
has_para_sep = FALSE;
bool
has_sp = FALSE;
PERL_ARGS_ASSERT_ISSB;
if
(before == SB_EDGE || after == SB_EDGE) {
return
before != after;
}
if
(before == SB_CR && after == SB_LF) {
return
FALSE;
}
if
(before == SB_Sep || before == SB_CR || before == SB_LF) {
return
TRUE;
}
if
(after == SB_Extend || after == SB_Format) {
return
FALSE;
}
if
(before == SB_Extend || before == SB_Format) {
U8 * temp_pos = lpos;
const
SB_enum backup = backup_one_SB(strbeg, &temp_pos, utf8_target);
if
( backup != SB_EDGE
&& backup != SB_Sep
&& backup != SB_CR
&& backup != SB_LF)
{
before = backup;
lpos = temp_pos;
}
if
(backup == SB_Extend || backup == SB_Format) {
return
FALSE;
}
}
if
(before == SB_ATerm && after == SB_Numeric) {
return
FALSE;
}
if
(before == SB_ATerm && after == SB_Upper) {
U8 * temp_pos = lpos;
SB_enum backup = backup_one_SB(strbeg, &temp_pos, utf8_target);
if
(backup == SB_Upper || backup == SB_Lower) {
return
FALSE;
}
}
if
(before == SB_Sep || before == SB_CR || before == SB_LF) {
has_para_sep = TRUE;
before = backup_one_SB(strbeg, &lpos, utf8_target);
}
if
(before == SB_Sp) {
has_sp = TRUE;
do
{
before = backup_one_SB(strbeg, &lpos, utf8_target);
}
while
(before == SB_Sp);
}
while
(before == SB_Close) {
before = backup_one_SB(strbeg, &lpos, utf8_target);
}
if
(before == SB_STerm || before == SB_ATerm) {
if
(! has_para_sep) {
if
(before == SB_ATerm) {
U8 * rpos = (U8 *) curpos;
SB_enum later = after;
while
( later != SB_OLetter
&& later != SB_Upper
&& later != SB_Lower
&& later != SB_Sep
&& later != SB_CR
&& later != SB_LF
&& later != SB_STerm
&& later != SB_ATerm
&& later != SB_EDGE)
{
later = advance_one_SB(&rpos, strend, utf8_target);
}
if
(later == SB_Lower) {
return
FALSE;
}
}
if
( after == SB_SContinue
|| after == SB_STerm
|| after == SB_ATerm)
{
return
FALSE;
}
if
(! has_sp) {
if
( after == SB_Close
|| after == SB_Sp
|| after == SB_Sep
|| after == SB_CR
|| after == SB_LF)
{
return
FALSE;
}
}
if
( after == SB_Sp
|| after == SB_Sep
|| after == SB_CR
|| after == SB_LF)
{
return
FALSE;
}
}
return
TRUE;
}
return
FALSE;
}
STATIC SB_enum
S_advance_one_SB(pTHX_ U8 ** curpos,
const
U8 *
const
strend,
const
bool
utf8_target)
{
SB_enum sb;
PERL_ARGS_ASSERT_ADVANCE_ONE_SB;
if
(*curpos >= strend) {
return
SB_EDGE;
}
if
(utf8_target) {
do
{
*curpos += UTF8SKIP(*curpos);
if
(*curpos >= strend) {
return
SB_EDGE;
}
sb = getSB_VAL_UTF8(*curpos, strend);
}
while
(sb == SB_Extend || sb == SB_Format);
}
else
{
do
{
(*curpos)++;
if
(*curpos >= strend) {
return
SB_EDGE;
}
sb = getSB_VAL_CP(**curpos);
}
while
(sb == SB_Extend || sb == SB_Format);
}
return
sb;
}
STATIC SB_enum
S_backup_one_SB(pTHX_
const
U8 *
const
strbeg, U8 ** curpos,
const
bool
utf8_target)
{
SB_enum sb;
PERL_ARGS_ASSERT_BACKUP_ONE_SB;
if
(*curpos < strbeg) {
return
SB_EDGE;
}
if
(utf8_target) {
U8 * prev_char_pos = reghopmaybe3(*curpos, -1, strbeg);
if
(! prev_char_pos) {
return
SB_EDGE;
}
do
{
U8 * prev_prev_char_pos;
if
((prev_prev_char_pos = reghopmaybe3((U8 *) prev_char_pos, -1,
strbeg)))
{
sb = getSB_VAL_UTF8(prev_prev_char_pos, prev_char_pos);
*curpos = prev_char_pos;
prev_char_pos = prev_prev_char_pos;
}
else
{
*curpos = (U8 *) strbeg;
return
SB_EDGE;
}
}
while
(sb == SB_Extend || sb == SB_Format);
}
else
{
do
{
if
(*curpos - 2 < strbeg) {
*curpos = (U8 *) strbeg;
return
SB_EDGE;
}
(*curpos)--;
sb = getSB_VAL_CP(*(*curpos - 1));
}
while
(sb == SB_Extend || sb == SB_Format);
}
return
sb;
}
STATIC
bool
S_isWB(pTHX_ WB_enum previous,
WB_enum before,
WB_enum after,
const
U8 *
const
strbeg,
const
U8 *
const
curpos,
const
U8 *
const
strend,
const
bool
utf8_target)
{
U8 * before_pos = (U8 *) curpos;
U8 * after_pos = (U8 *) curpos;
WB_enum prev = before;
WB_enum next;
PERL_ARGS_ASSERT_ISWB;
redo:
before = prev;
switch
(WB_table[before][after]) {
case
WB_BREAKABLE:
return
TRUE;
case
WB_NOBREAK:
return
FALSE;
case
WB_hs_then_hs:
next = advance_one_WB(&after_pos, strend, utf8_target,
FALSE
);
return
next == WB_Extend || next == WB_Format;
case
WB_Ex_or_FO_or_ZWJ_then_foo:
prev = backup_one_WB(&previous, strbeg, &before_pos, utf8_target);
goto
redo;
case
WB_DQ_then_HL + WB_BREAKABLE:
case
WB_DQ_then_HL + WB_NOBREAK:
if
(backup_one_WB(&previous, strbeg, &before_pos, utf8_target)
== WB_Hebrew_Letter)
{
return
FALSE;
}
return
WB_table[before][after] - WB_DQ_then_HL == WB_BREAKABLE;
case
WB_HL_then_DQ + WB_BREAKABLE:
case
WB_HL_then_DQ + WB_NOBREAK:
if
(advance_one_WB(&after_pos, strend, utf8_target,
TRUE
)
== WB_Hebrew_Letter)
{
return
FALSE;
}
return
WB_table[before][after] - WB_HL_then_DQ == WB_BREAKABLE;
case
WB_LE_or_HL_then_MB_or_ML_or_SQ + WB_NOBREAK:
case
WB_LE_or_HL_then_MB_or_ML_or_SQ + WB_BREAKABLE:
next = advance_one_WB(&after_pos, strend, utf8_target,
TRUE
);
if
(next == WB_ALetter || next == WB_Hebrew_Letter)
{
return
FALSE;
}
return
WB_table[before][after]
- WB_LE_or_HL_then_MB_or_ML_or_SQ == WB_BREAKABLE;
case
WB_MB_or_ML_or_SQ_then_LE_or_HL + WB_NOBREAK:
case
WB_MB_or_ML_or_SQ_then_LE_or_HL + WB_BREAKABLE:
prev = backup_one_WB(&previous, strbeg, &before_pos, utf8_target);
if
(prev == WB_ALetter || prev == WB_Hebrew_Letter)
{
return
FALSE;
}
return
WB_table[before][after]
- WB_MB_or_ML_or_SQ_then_LE_or_HL == WB_BREAKABLE;
case
WB_MB_or_MN_or_SQ_then_NU + WB_NOBREAK:
case
WB_MB_or_MN_or_SQ_then_NU + WB_BREAKABLE:
if
(backup_one_WB(&previous, strbeg, &before_pos, utf8_target)
== WB_Numeric)
{
return
FALSE;
}
return
WB_table[before][after]
- WB_MB_or_MN_or_SQ_then_NU == WB_BREAKABLE;
case
WB_NU_then_MB_or_MN_or_SQ + WB_NOBREAK:
case
WB_NU_then_MB_or_MN_or_SQ + WB_BREAKABLE:
if
(advance_one_WB(&after_pos, strend, utf8_target,
TRUE
)
== WB_Numeric)
{
return
FALSE;
}
return
WB_table[before][after]
- WB_NU_then_MB_or_MN_or_SQ == WB_BREAKABLE;
case
WB_RI_then_RI + WB_NOBREAK:
case
WB_RI_then_RI + WB_BREAKABLE:
{
int
RI_count = 1;
while
(backup_one_WB(&previous,
strbeg,
&before_pos,
utf8_target) == WB_Regional_Indicator)
{
RI_count++;
}
return
RI_count % 2 != 1;
}
default
:
break
;
}
#ifdef DEBUGGING
Perl_re_printf( aTHX_
"Unhandled WB pair: WB_table[%d, %d] = %d\n"
,
before, after, WB_table[before][after]);
assert
(0);
#endif
return
TRUE;
}
STATIC WB_enum
S_advance_one_WB(pTHX_ U8 ** curpos,
const
U8 *
const
strend,
const
bool
utf8_target,
const
bool
skip_Extend_Format)
{
WB_enum wb;
PERL_ARGS_ASSERT_ADVANCE_ONE_WB;
if
(*curpos >= strend) {
return
WB_EDGE;
}
if
(utf8_target) {
do
{
*curpos += UTF8SKIP(*curpos);
if
(*curpos >= strend) {
return
WB_EDGE;
}
wb = getWB_VAL_UTF8(*curpos, strend);
}
while
( skip_Extend_Format
&& (wb == WB_Extend || wb == WB_Format));
}
else
{
do
{
(*curpos)++;
if
(*curpos >= strend) {
return
WB_EDGE;
}
wb = getWB_VAL_CP(**curpos);
}
while
( skip_Extend_Format
&& (wb == WB_Extend || wb == WB_Format));
}
return
wb;
}
STATIC WB_enum
S_backup_one_WB(pTHX_ WB_enum * previous,
const
U8 *
const
strbeg, U8 ** curpos,
const
bool
utf8_target)
{
WB_enum wb;
PERL_ARGS_ASSERT_BACKUP_ONE_WB;
if
(*previous != WB_UNKNOWN) {
wb = *previous;
if
(utf8_target) {
*curpos = reghopmaybe3(*curpos, -1, strbeg);
if
(! *curpos) {
*previous = WB_EDGE;
*curpos = (U8 *) strbeg;
}
else
{
*previous = WB_UNKNOWN;
}
}
else
{
(*curpos)--;
*previous = (*curpos <= strbeg) ? WB_EDGE : WB_UNKNOWN;
}
if
(wb != WB_Extend && wb != WB_Format && wb != WB_ZWJ) {
return
wb;
}
}
if
(*curpos < strbeg) {
return
WB_EDGE;
}
if
(utf8_target) {
U8 * prev_char_pos = reghopmaybe3(*curpos, -1, strbeg);
if
(! prev_char_pos) {
return
WB_EDGE;
}
do
{
U8 * prev_prev_char_pos;
if
((prev_prev_char_pos = reghopmaybe3((U8 *) prev_char_pos,
-1,
strbeg)))
{
wb = getWB_VAL_UTF8(prev_prev_char_pos, prev_char_pos);
*curpos = prev_char_pos;
prev_char_pos = prev_prev_char_pos;
}
else
{
*curpos = (U8 *) strbeg;
return
WB_EDGE;
}
}
while
(wb == WB_Extend || wb == WB_Format || wb == WB_ZWJ);
}
else
{
do
{
if
(*curpos - 2 < strbeg) {
*curpos = (U8 *) strbeg;
return
WB_EDGE;
}
(*curpos)--;
wb = getWB_VAL_CP(*(*curpos - 1));
}
while
(wb == WB_Extend || wb == WB_Format);
}
return
wb;
}
#define NEXTCHR_EOS -10 /* nextchr has fallen off the end */
#define NEXTCHR_IS_EOS (nextbyte < 0)
#define SET_nextchr \
nextbyte = ((locinput < reginfo->strend) ? UCHARAT(locinput) : NEXTCHR_EOS)
#define SET_locinput(p) \
locinput = (p); \
SET_nextchr
#define sayYES goto yes
#define sayNO goto no
#define sayNO_SILENT goto no_silent
#define CACHEsayNO \
if
(ST.cache_mask) \
reginfo->info_aux->poscache[ST.cache_offset] |= ST.cache_mask; \
sayNO
#define EVAL_CLOSE_PAREN_IS(st,expr) \
( \
( ( st ) ) && \
( ( st )->u.eval.close_paren ) && \
( ( ( st )->u.eval.close_paren ) == ( (expr) + 1 ) ) \
)
#define EVAL_CLOSE_PAREN_IS_TRUE(st,expr) \
( \
( ( st ) ) && \
( ( st )->u.eval.close_paren ) && \
( ( expr ) ) && \
( ( ( st )->u.eval.close_paren ) == ( (expr) + 1 ) ) \
)
#define EVAL_CLOSE_PAREN_SET(st,expr) \
(st)->u.eval.close_paren = ( (expr) + 1 )
#define EVAL_CLOSE_PAREN_CLEAR(st) \
(st)->u.eval.close_paren = 0
#define PUSH_STATE_GOTO(state, node, input, eol, sr0) \
pushinput = input; \
pusheol = eol; \
pushsr0 = sr0; \
scan = node; \
st->resume_state = state; \
goto
push_state;
#define PUSH_YES_STATE_GOTO(state, node, input, eol, sr0) \
pushinput = input; \
pusheol = eol; \
pushsr0 = sr0; \
scan = node; \
st->resume_state = state; \
goto
push_yes_state;
#define DEBUG_STATE_pp(pp) \
DEBUG_STATE_r({ \
DUMP_EXEC_POS(locinput, scan, utf8_target,depth); \
Perl_re_printf( aTHX_ \
"%*s"
pp
" %s%s%s%s%s\n"
, \
INDENT_CHARS(depth),
""
, \
REGNODE_NAME(st->resume_state), \
((st==yes_state||st==mark_state) ?
"["
:
""
), \
((st==yes_state) ?
"Y"
:
""
), \
((st==mark_state) ?
"M"
:
""
), \
((st==yes_state||st==mark_state) ?
"]"
:
""
) \
); \
});
STATIC SSize_t
S_regmatch(pTHX_ regmatch_info *reginfo,
char
*startpos, regnode *prog)
{
const
bool
utf8_target = reginfo->is_utf8_target;
const
U32 uniflags = UTF8_ALLOW_DEFAULT;
REGEXP *rex_sv = reginfo->prog;
regexp *rex = ReANY(rex_sv);
RXi_GET_DECL(rex,rexi);
regmatch_state *st;
regnode *scan;
regnode *next;
U32 n = 0;
U32 utmp = 0;
SSize_t ln = 0;
SSize_t endref = 0;
char
*locinput = startpos;
char
*loceol = reginfo->strend;
char
*pushinput;
char
*pusheol;
U8 *pushsr0;
PERL_INT_FAST16_T nextbyte;
bool
result = 0;
U32 depth = 0;
U32 nochange_depth = 0;
const
U32 max_nochange_depth =
(3 * rex->nparens > MAX_RECURSE_EVAL_NOCHANGE_DEPTH) ?
3 * rex->nparens : MAX_RECURSE_EVAL_NOCHANGE_DEPTH;
regmatch_state *yes_state = NULL;
regmatch_state *mark_state = NULL;
regmatch_state *cur_eval = NULL;
struct
regmatch_state *cur_curlyx = NULL;
U32 state_num;
bool
no_final = 0;
bool
do_cutgroup = 0;
char
*startpoint = locinput;
SV *popmark = NULL;
SV *sv_commit = NULL;
SV *sv_yes_mark = NULL;
U32 lastopen = 0;
bool
has_cutgroup = RXp_HAS_CUTGROUP(rex) ? 1 : 0;
SV*
const
oreplsv = GvSVn(PL_replgv);
bool
sw = 0;
bool
minmod = 0;
int
logical = 0;
PAD* last_pad = NULL;
dMULTICALL;
U8 gimme = G_SCALAR;
CV *caller_cv = NULL;
CV *last_pushed_cv = NULL;
U32 maxopenparen = 0;
int
to_complement;
char_class_number_ classnum;
bool
is_utf8_pat = reginfo->is_utf8_pat;
bool
match = FALSE;
I32 orig_savestack_ix = PL_savestack_ix;
U8 * script_run_begin = NULL;
char
*match_end= NULL;
bool
is_accepted = FALSE;
re_fold_t folder = NULL;
const
U8 * fold_array = NULL;
#if (defined(__SUNPRO_C) && (__SUNPRO_C == 0x5120) && defined(__x86_64) && defined(USE_64_BIT_ALL))
# define SOLARIS_BAD_OPTIMIZER
const
U32 *pl_charclass_dup = PL_charclass;
# define PL_charclass pl_charclass_dup
#endif
#ifdef DEBUGGING
DECLARE_AND_GET_RE_DEBUG_FLAGS;
#endif
SAVEFREESV(SvREFCNT_inc_simple_NN(oreplsv));
multicall_oldcatch = 0;
PERL_UNUSED_VAR(multicall_cop);
PERL_ARGS_ASSERT_REGMATCH;
st = PL_regmatch_state;
SET_nextchr;
scan = prog;
DEBUG_OPTIMISE_r( DEBUG_EXECUTE_r({
DUMP_EXEC_POS( locinput, scan, utf8_target, depth );
Perl_re_printf( aTHX_
"regmatch start\n"
);
}));
while
(scan != NULL) {
next = scan + NEXT_OFF(scan);
if
(next == scan)
next = NULL;
state_num = OP(scan);
reenter_switch:
DEBUG_EXECUTE_r(
if
(state_num <= REGNODE_MAX) {
SV *
const
prop = sv_newmortal();
regnode *rnext = regnext(scan);
DUMP_EXEC_POS( locinput, scan, utf8_target, depth );
regprop(rex, prop, scan, reginfo, NULL);
Perl_re_printf( aTHX_
"%*s%"
IVdf
":%s(%"
IVdf
")\n"
,
INDENT_CHARS(depth),
""
,
(IV)(scan - rexi->program),
SvPVX_const(prop),
(REGNODE_TYPE(OP(scan)) == END || !rnext) ?
0 : (IV)(rnext - rexi->program));
}
);
to_complement = 0;
SET_nextchr;
assert
(nextbyte < 256 && (nextbyte >= 0 || nextbyte == NEXTCHR_EOS));
switch
(state_num) {
SV * anyofh_list;
case
SBOL:
if
(locinput == reginfo->strbeg)
break
;
sayNO;
case
MBOL:
if
(locinput == reginfo->strbeg ||
(!NEXTCHR_IS_EOS && locinput[-1] ==
'\n'
))
{
break
;
}
sayNO;
case
GPOS:
if
(locinput == reginfo->ganch)
break
;
sayNO;
case
KEEPS:
st->u.keeper.val = RXp_OFFS_START(rex,0);
RXp_OFFSp(rex)[0].start = locinput - reginfo->strbeg;
PUSH_STATE_GOTO(KEEPS_next, next, locinput, loceol,
script_run_begin);
NOT_REACHED;
case
KEEPS_next_fail:
RXp_OFFSp(rex)[0].start = st->u.keeper.val;
sayNO_SILENT;
NOT_REACHED;
case
MEOL:
if
(!NEXTCHR_IS_EOS && nextbyte !=
'\n'
)
sayNO;
break
;
case
SEOL:
if
(!NEXTCHR_IS_EOS && nextbyte !=
'\n'
)
sayNO;
if
(reginfo->strend - locinput > 1)
sayNO;
break
;
case
EOS:
if
(!NEXTCHR_IS_EOS)
sayNO;
break
;
case
SANY:
if
(NEXTCHR_IS_EOS || locinput >= loceol)
sayNO;
goto
increment_locinput;
case
REG_ANY:
if
( NEXTCHR_IS_EOS
|| locinput >= loceol
|| nextbyte ==
'\n'
)
{
sayNO;
}
goto
increment_locinput;
#undef ST
#define ST st->u.trie
case
TRIEC:
if
( ! NEXTCHR_IS_EOS
&& locinput < loceol
&& ! ANYOF_BITMAP_TEST(scan, nextbyte))
{
DEBUG_EXECUTE_r(
Perl_re_exec_indentf( aTHX_
"%sTRIE: failed to match trie start class...%s\n"
,
depth, PL_colors[4], PL_colors[5])
);
sayNO_SILENT;
NOT_REACHED;
}
case
TRIE:
{
DECL_TRIE_TYPE(scan);
reg_trie_data *
const
trie
= (reg_trie_data*)rexi->data->data[ ARG1u( scan ) ];
ST.before_paren = trie->before_paren;
ST.after_paren = trie->after_paren;
assert
(ST.before_paren<=rex->nparens);
assert
(ST.after_paren<=rex->nparens);
HV * widecharmap = MUTABLE_HV(rexi->data->data[ ARG1u( scan ) + 1 ]);
U32 state = trie->startstate;
if
(FLAGS(scan) == EXACTL || FLAGS(scan) == EXACTFLU8) {
CHECK_AND_WARN_PROBLEMATIC_LOCALE_;
if
(utf8_target
&& ! NEXTCHR_IS_EOS
&& UTF8_IS_ABOVE_LATIN1(nextbyte)
&& FLAGS(scan) == EXACTL)
{
_CHECK_AND_OUTPUT_WIDE_LOCALE_UTF8_MSG(locinput,
reginfo->strend);
}
}
if
( trie->bitmap
&& ( NEXTCHR_IS_EOS
|| locinput >= loceol
|| ! TRIE_BITMAP_TEST(trie, nextbyte)))
{
if
(trie->states[ state ].wordnum) {
DEBUG_EXECUTE_r(
Perl_re_exec_indentf( aTHX_
"%sTRIE: matched empty string...%s\n"
,
depth, PL_colors[4], PL_colors[5])
);
if
(!trie->jump)
break
;
}
else
{
DEBUG_EXECUTE_r(
Perl_re_exec_indentf( aTHX_
"%sTRIE: failed to match trie start class...%s\n"
,
depth, PL_colors[4], PL_colors[5])
);
sayNO_SILENT;
}
}
{
U8 *uc = ( U8* )locinput;
STRLEN len = 0;
STRLEN foldlen = 0;
U8 *uscan = (U8*)NULL;
U8 foldbuf[ UTF8_MAXBYTES_CASE + 1 ];
U32 charcount = 0;
U32 accepted = 0;
ST.jump = trie->jump;
ST.j_before_paren = trie->j_before_paren;
ST.j_after_paren= trie->j_after_paren;
ST.me = scan;
ST.firstpos = NULL;
ST.longfold = FALSE;
ST.nextword = 0;
while
( state && uc <= (U8*)(loceol) ) {
U32 base = trie->states[ state ].trans.base;
UV uvc = 0;
U16 charid = 0;
U16 wordnum;
wordnum = trie->states[ state ].wordnum;
if
(wordnum) {
if
(!accepted) {
accepted = 1;
if
(ST.longfold) {
ST.firstpos = (U8*)locinput;
ST.firstchars = 0;
}
else
{
ST.firstpos = uc;
ST.firstchars = charcount;
}
}
if
(!ST.nextword || wordnum < ST.nextword)
ST.nextword = wordnum;
ST.topword = wordnum;
}
DEBUG_TRIE_EXECUTE_r({
DUMP_EXEC_POS( (
char
*)uc, scan, utf8_target, depth );
PerlIO_printf( Perl_debug_log,
"%*s%sTRIE: State: %4"
UVxf
" Accepted: %c "
,
INDENT_CHARS(depth),
""
, PL_colors[4],
(UV)state, (accepted ?
'Y'
:
'N'
));
});
if
( base && (foldlen || uc < (U8*)(loceol))) {
I32 offset;
REXEC_TRIE_READ_CHAR(trie_type, trie, widecharmap, uc,
(U8 *) loceol, uscan,
len, uvc, charid, foldlen,
foldbuf, uniflags);
charcount++;
if
(foldlen>0)
ST.longfold = TRUE;
if
(charid &&
( ((offset =
base + charid - 1 - trie->uniquecharcount)) >= 0)
&& ((U32)offset < trie->lasttrans)
&& trie->trans[offset].check == state)
{
state = trie->trans[offset].next;
}
else
{
state = 0;
}
uc += len;
}
else
{
state = 0;
}
DEBUG_TRIE_EXECUTE_r(
Perl_re_printf( aTHX_
"TRIE: Charid:%3x CP:%4"
UVxf
" After State: %4"
UVxf
"%s\n"
,
charid, uvc, (UV)state, PL_colors[5] );
);
}
if
(!accepted)
sayNO;
{
U16 w = ST.topword;
accepted = 0;
while
(w) {
w = trie->wordinfo[w].prev;
accepted++;
}
ST.accepted = accepted;
}
DEBUG_EXECUTE_r(
Perl_re_exec_indentf( aTHX_
"%sTRIE: got %"
IVdf
" possible matches%s\n"
,
depth,
PL_colors[4], (IV)ST.accepted, PL_colors[5] );
);
goto
trie_first_try;
}}
NOT_REACHED;
case
TRIE_next_fail:
{
U8 *uc;
if
(RE_PESSIMISTIC_PARENS) {
REGCP_UNWIND(ST.lastcp);
regcppop(rex,&maxopenparen);
}
if
( ST.jump ) {
REGCP_UNWIND(ST.cp);
UNWIND_PAREN(ST.lastparen, ST.lastcloseparen);
if
(ST.after_paren) {
assert
(ST.before_paren<=rex->nparens && ST.after_paren<=rex->nparens);
CAPTURE_CLEAR(ST.before_paren+1, ST.after_paren,
"TRIE_next_fail"
);
}
}
if
(!--ST.accepted) {
DEBUG_EXECUTE_r({
Perl_re_exec_indentf( aTHX_
"%sTRIE failed...%s\n"
,
depth,
PL_colors[4],
PL_colors[5] );
});
sayNO_SILENT;
}
{
U16 min = 0;
U16 word;
U16
const
nextword = ST.nextword;
reg_trie_wordinfo *
const
wordinfo
= ((reg_trie_data*)rexi->data->data[ARG1u(ST.me)])->wordinfo;
for
(word=ST.topword; word; word=wordinfo[word].prev) {
if
(word > nextword && (!min || word < min))
min = word;
}
ST.nextword = min;
}
trie_first_try:
if
(do_cutgroup) {
do_cutgroup = 0;
no_final = 0;
}
if
( ST.jump ) {
ST.lastparen = RXp_LASTPAREN(rex);
ST.lastcloseparen = RXp_LASTCLOSEPAREN(rex);
REGCP_SET(ST.cp);
}
{
U32 chars;
reg_trie_data *
const
trie
= (reg_trie_data*)rexi->data->data[ARG1u(ST.me)];
assert
((trie->wordinfo[ST.nextword].len - trie->prefixlen)
>= ST.firstchars);
chars = (trie->wordinfo[ST.nextword].len - trie->prefixlen)
- ST.firstchars;
uc = ST.firstpos;
if
(ST.longfold) {
U8 foldbuf[UTF8_MAXBYTES_CASE + 1];
STRLEN foldlen;
STRLEN len;
UV uvc;
U8 *uscan;
while
(chars) {
if
(utf8_target) {
uvc = utf8n_to_uvchr((U8*)uc, UTF8_MAXLEN, &len,
uniflags);
uc += len;
}
else
{
uvc = *uc;
uc++;
}
uvc = to_uni_fold(uvc, foldbuf, &foldlen);
uscan = foldbuf;
while
(foldlen) {
if
(!--chars)
break
;
uvc = utf8n_to_uvchr(uscan, foldlen, &len,
uniflags);
uscan += len;
foldlen -= len;
}
}
}
else
{
if
(utf8_target)
uc = utf8_hop(uc, chars);
else
uc += chars;
}
}
if
(ST.jump && ST.jump[ST.nextword]) {
scan = ST.me + ST.jump[ST.nextword];
ST.before_paren = ST.j_before_paren[ST.nextword];
assert
(ST.before_paren <= rex->nparens);
ST.after_paren = ST.j_after_paren[ST.nextword];
assert
(ST.after_paren <= rex->nparens);
}
else
{
scan = ST.me + NEXT_OFF(ST.me);
}
DEBUG_EXECUTE_r({
Perl_re_exec_indentf( aTHX_
"%sTRIE matched word #%d, continuing%s\n"
,
depth,
PL_colors[4],
ST.nextword,
PL_colors[5]
);
});
if
( ST.accepted > 1 || has_cutgroup || ST.jump ) {
if
(RE_PESSIMISTIC_PARENS) {
(
void
)regcppush(rex, 0, maxopenparen);
REGCP_SET(ST.lastcp);
}
PUSH_STATE_GOTO(TRIE_next, scan, (
char
*)uc, loceol,
script_run_begin);
NOT_REACHED;
}
DEBUG_EXECUTE_r({
AV *
const
trie_words
= MUTABLE_AV(rexi->data->data[ARG1u(ST.me)+TRIE_WORDS_OFFSET]);
SV **
const
tmp = trie_words
? av_fetch(trie_words, ST.nextword - 1, 0) : NULL;
SV *sv= tmp ? sv_newmortal() : NULL;
Perl_re_exec_indentf( aTHX_
"%sTRIE: only one match left, short-circuiting: #%d <%s>%s\n"
,
depth, PL_colors[4],
ST.nextword,
tmp ? pv_pretty(sv, SvPV_nolen_const(*tmp), SvCUR(*tmp), 0,
PL_colors[0], PL_colors[1],
(SvUTF8(*tmp) ? PERL_PV_ESCAPE_UNI : 0)|PERL_PV_ESCAPE_NONASCII
)
:
"not compiled under -Dr"
,
PL_colors[5] );
});
locinput = (
char
*)uc;
continue
;
}
#undef ST
case
LEXACT_REQ8:
if
(! utf8_target) {
sayNO;
}
case
LEXACT:
{
char
*s;
s = STRINGl(scan);
ln = STR_LENl(scan);
goto
join_short_long_exact;
case
EXACTL:
CHECK_AND_WARN_PROBLEMATIC_LOCALE_;
if
(utf8_target && UTF8_IS_ABOVE_LATIN1(*locinput)) {
_CHECK_AND_OUTPUT_WIDE_LOCALE_UTF8_MSG(locinput, reginfo->strend);
}
goto
do_exact;
case
EXACT_REQ8:
if
(! utf8_target) {
sayNO;
}
case
EXACT:
do_exact:
s = STRINGs(scan);
ln = STR_LENs(scan);
join_short_long_exact:
if
(utf8_target != is_utf8_pat) {
char
*l = locinput;
const
char
*
const
e = s + ln;
if
(utf8_target) {
while
(s < e) {
if
( l >= loceol
|| UTF8_IS_ABOVE_LATIN1(* (U8*) l))
{
sayNO;
}
if
(UTF8_IS_INVARIANT(*(U8*)l)) {
if
(*l != *s) {
sayNO;
}
l++;
}
else
{
if
(EIGHT_BIT_UTF8_TO_NATIVE(*l, *(l+1)) != * (U8*) s)
{
sayNO;
}
l += 2;
}
s++;
}
}
else
{
while
(s < e) {
if
( l >= loceol
|| UTF8_IS_ABOVE_LATIN1(* (U8*) s))
{
sayNO;
}
if
(UTF8_IS_INVARIANT(*(U8*)s)) {
if
(*s != *l) {
sayNO;
}
s++;
}
else
{
if
(EIGHT_BIT_UTF8_TO_NATIVE(*s, *(s+1)) != * (U8*) l)
{
sayNO;
}
s += 2;
}
l++;
}
}
locinput = l;
}
else
{
if
( loceol - locinput < ln
|| UCHARAT(s) != nextbyte
|| (ln > 1 && memNE(s, locinput, ln)))
{
sayNO;
}
locinput += ln;
}
break
;
}
case
EXACTFL:
{
const
char
* s;
U32 fold_utf8_flags;
CHECK_AND_WARN_PROBLEMATIC_LOCALE_;
folder = Perl_foldEQ_locale;
fold_array = PL_fold_locale;
fold_utf8_flags = FOLDEQ_LOCALE;
goto
do_exactf;
case
EXACTFLU8:
if
(! utf8_target) {
sayNO;
}
fold_utf8_flags = FOLDEQ_LOCALE | FOLDEQ_S2_ALREADY_FOLDED
| FOLDEQ_S2_FOLDS_SANE;
folder = S_foldEQ_latin1_s2_folded;
fold_array = PL_fold_latin1;
goto
do_exactf;
case
EXACTFU_REQ8:
if
(! utf8_target) {
sayNO;
}
assert
(is_utf8_pat);
fold_utf8_flags = FOLDEQ_S2_ALREADY_FOLDED;
#ifdef DEBUGGING
folder = NULL;
fold_array = NULL;
#endif
goto
do_exactf;
case
EXACTFUP:
assert
(! is_utf8_pat);
folder = Perl_foldEQ_latin1;
fold_array = PL_fold_latin1;
fold_utf8_flags = 0;
goto
do_exactf;
case
EXACTFU:
folder = S_foldEQ_latin1_s2_folded;
fold_array = PL_fold_latin1;
fold_utf8_flags = FOLDEQ_S2_ALREADY_FOLDED;
goto
do_exactf;
case
EXACTFAA_NO_TRIE:
assert
(! is_utf8_pat);
case
EXACTFAA:
folder = S_foldEQ_latin1_s2_folded;
fold_array = PL_fold_latin1;
fold_utf8_flags = FOLDEQ_UTF8_NOMIX_ASCII;
if
(is_utf8_pat || ! utf8_target) {
fold_utf8_flags |= FOLDEQ_S2_ALREADY_FOLDED
|FOLDEQ_S2_FOLDS_SANE;
}
goto
do_exactf;
case
EXACTF:
assert
(! is_utf8_pat);
folder = Perl_foldEQ;
fold_array = PL_fold;
fold_utf8_flags = 0;
do_exactf:
s = STRINGs(scan);
ln = STR_LENs(scan);
if
( utf8_target
|| is_utf8_pat
|| state_num == EXACTFUP
|| (state_num == EXACTFL && IN_UTF8_CTYPE_LOCALE))
{
const
char
*
const
l = locinput;
char
*e = loceol;
if
(! foldEQ_utf8_flags(l, &e, 0, utf8_target,
s, 0, ln, is_utf8_pat,fold_utf8_flags))
{
sayNO;
}
locinput = e;
break
;
}
assert
(fold_array);
if
(UCHARAT(s) != nextbyte
&& !NEXTCHR_IS_EOS
&& UCHARAT(s) != fold_array[nextbyte])
{
sayNO;
}
if
(loceol - locinput < ln)
sayNO;
assert
(folder);
if
(ln > 1 && ! folder(aTHX_ locinput, s, ln))
sayNO;
locinput += ln;
break
;
}
case
NBOUNDL:
to_complement = 1;
case
BOUNDL:
{
bool
b1, b2;
CHECK_AND_WARN_PROBLEMATIC_LOCALE_;
if
(FLAGS(scan) != TRADITIONAL_BOUND) {
CHECK_AND_WARN_NON_UTF8_CTYPE_LOCALE_IN_BOUND;
goto
boundu;
}
if
(utf8_target) {
if
(locinput == reginfo->strbeg)
b1 = isWORDCHAR_LC(
'\n'
);
else
{
U8 *p = reghop3((U8*)locinput, -1,
(U8*)(reginfo->strbeg));
b1 = isWORDCHAR_LC_utf8_safe(p, (U8*)(reginfo->strend));
}
b2 = (NEXTCHR_IS_EOS)
? isWORDCHAR_LC(
'\n'
)
: isWORDCHAR_LC_utf8_safe((U8*) locinput,
(U8*) reginfo->strend);
}
else
{
b1 = (locinput == reginfo->strbeg)
? isWORDCHAR_LC(
'\n'
)
: isWORDCHAR_LC(UCHARAT(locinput - 1));
b2 = (NEXTCHR_IS_EOS)
? isWORDCHAR_LC(
'\n'
)
: isWORDCHAR_LC(nextbyte);
}
if
(to_complement ^ (b1 == b2)) {
sayNO;
}
break
;
}
case
NBOUND:
to_complement = 1;
case
BOUND:
if
(utf8_target) {
goto
bound_utf8;
}
goto
bound_ascii_match_only;
case
NBOUNDA:
to_complement = 1;
case
BOUNDA:
{
bool
b1, b2;
bound_ascii_match_only:
b1 = (locinput == reginfo->strbeg)
? isWORDCHAR_A(
'\n'
)
: isWORDCHAR_A(UCHARAT(locinput - 1));
b2 = (NEXTCHR_IS_EOS)
? isWORDCHAR_A(
'\n'
)
: isWORDCHAR_A(nextbyte);
if
(to_complement ^ (b1 == b2)) {
sayNO;
}
break
;
}
case
NBOUNDU:
to_complement = 1;
case
BOUNDU:
boundu:
if
(UNLIKELY(reginfo->strbeg >= reginfo->strend)) {
match = FALSE;
}
else
if
(utf8_target) {
bound_utf8:
switch
((bound_type) FLAGS(scan)) {
case
TRADITIONAL_BOUND:
{
bool
b1, b2;
if
(locinput == reginfo->strbeg) {
b1 = 0
;
}
else
{
U8 *p = reghop3((U8*)locinput, -1,
(U8*)(reginfo->strbeg));
b1 = isWORDCHAR_utf8_safe(p, (U8*) reginfo->strend);
}
b2 = (NEXTCHR_IS_EOS)
? 0
: isWORDCHAR_utf8_safe((U8*)locinput,
(U8*) reginfo->strend);
match = cBOOL(b1 != b2);
break
;
}
case
GCB_BOUND:
if
(locinput == reginfo->strbeg || NEXTCHR_IS_EOS) {
match = TRUE;
}
else
{
match = isGCB(getGCB_VAL_UTF8(
reghop3((U8*)locinput,
-1,
(U8*)(reginfo->strbeg)),
(U8*) reginfo->strend),
getGCB_VAL_UTF8((U8*) locinput,
(U8*) reginfo->strend),
(U8*) reginfo->strbeg,
(U8*) locinput,
utf8_target);
}
break
;
case
LB_BOUND:
if
(locinput == reginfo->strbeg) {
match = FALSE;
}
else
if
(NEXTCHR_IS_EOS) {
match = TRUE;
}
else
{
match = isLB(getLB_VAL_UTF8(
reghop3((U8*)locinput,
-1,
(U8*)(reginfo->strbeg)),
(U8*) reginfo->strend),
getLB_VAL_UTF8((U8*) locinput,
(U8*) reginfo->strend),
(U8*) reginfo->strbeg,
(U8*) locinput,
(U8*) reginfo->strend,
utf8_target);
}
break
;
case
SB_BOUND:
if
(locinput == reginfo->strbeg || NEXTCHR_IS_EOS) {
match = TRUE;
}
else
{
match = isSB(getSB_VAL_UTF8(
reghop3((U8*)locinput,
-1,
(U8*)(reginfo->strbeg)),
(U8*) reginfo->strend),
getSB_VAL_UTF8((U8*) locinput,
(U8*) reginfo->strend),
(U8*) reginfo->strbeg,
(U8*) locinput,
(U8*) reginfo->strend,
utf8_target);
}
break
;
case
WB_BOUND:
if
(locinput == reginfo->strbeg || NEXTCHR_IS_EOS) {
match = TRUE;
}
else
{
match = isWB(WB_UNKNOWN,
getWB_VAL_UTF8(
reghop3((U8*)locinput,
-1,
(U8*)(reginfo->strbeg)),
(U8*) reginfo->strend),
getWB_VAL_UTF8((U8*) locinput,
(U8*) reginfo->strend),
(U8*) reginfo->strbeg,
(U8*) locinput,
(U8*) reginfo->strend,
utf8_target);
}
break
;
}
}
else
{
switch
((bound_type) FLAGS(scan)) {
case
TRADITIONAL_BOUND:
{
bool
b1, b2;
b1 = (locinput == reginfo->strbeg)
? 0
: isWORDCHAR_L1(UCHARAT(locinput - 1));
b2 = (NEXTCHR_IS_EOS)
? 0
: isWORDCHAR_L1(nextbyte);
match = cBOOL(b1 != b2);
break
;
}
case
GCB_BOUND:
if
(locinput == reginfo->strbeg || NEXTCHR_IS_EOS) {
match = TRUE;
}
else
{
match = UCHARAT(locinput - 1) !=
'\r'
|| UCHARAT(locinput) !=
'\n'
;
}
break
;
case
LB_BOUND:
if
(locinput == reginfo->strbeg) {
match = FALSE;
}
else
if
(NEXTCHR_IS_EOS) {
match = TRUE;
}
else
{
match = isLB(getLB_VAL_CP(UCHARAT(locinput -1)),
getLB_VAL_CP(UCHARAT(locinput)),
(U8*) reginfo->strbeg,
(U8*) locinput,
(U8*) reginfo->strend,
utf8_target);
}
break
;
case
SB_BOUND:
if
(locinput == reginfo->strbeg || NEXTCHR_IS_EOS) {
match = TRUE;
}
else
{
match = isSB(getSB_VAL_CP(UCHARAT(locinput -1)),
getSB_VAL_CP(UCHARAT(locinput)),
(U8*) reginfo->strbeg,
(U8*) locinput,
(U8*) reginfo->strend,
utf8_target);
}
break
;
case
WB_BOUND:
if
(locinput == reginfo->strbeg || NEXTCHR_IS_EOS) {
match = TRUE;
}
else
{
match = isWB(WB_UNKNOWN,
getWB_VAL_CP(UCHARAT(locinput -1)),
getWB_VAL_CP(UCHARAT(locinput)),
(U8*) reginfo->strbeg,
(U8*) locinput,
(U8*) reginfo->strend,
utf8_target);
}
break
;
}
}
if
(to_complement ^ ! match) {
sayNO;
}
break
;
case
ANYOFPOSIXL:
case
ANYOFL:
CHECK_AND_WARN_PROBLEMATIC_LOCALE_;
CHECK_AND_WARN_NON_UTF8_CTYPE_LOCALE_IN_SETS(scan);
case
ANYOFD:
case
ANYOF:
if
(NEXTCHR_IS_EOS || locinput >= loceol)
sayNO;
if
( (! utf8_target || UTF8_IS_INVARIANT(*locinput))
&& ! ANYOF_FLAGS(scan)
&& ANYOF_MATCHES_NONE_OUTSIDE_BITMAP(scan))
{
if
(! ANYOF_BITMAP_TEST(scan, * (U8 *) (locinput))) {
sayNO;
}
locinput++;
}
else
{
if
(!reginclass(rex, scan, (U8*)locinput, (U8*) loceol,
utf8_target))
{
sayNO;
}
goto
increment_locinput;
}
break
;
case
ANYOFM:
if
( NEXTCHR_IS_EOS
|| (UCHARAT(locinput) & FLAGS(scan)) != ARG1u(scan)
|| locinput >= loceol)
{
sayNO;
}
locinput++;
break
;
case
NANYOFM:
if
( NEXTCHR_IS_EOS
|| (UCHARAT(locinput) & FLAGS(scan)) == ARG1u(scan)
|| locinput >= loceol)
{
sayNO;
}
goto
increment_locinput;
break
;
case
ANYOFH:
if
( ! utf8_target
|| NEXTCHR_IS_EOS
|| ANYOF_FLAGS(scan) > NATIVE_UTF8_TO_I8(*locinput)
|| ! (anyofh_list = GET_ANYOFH_INVLIST(rex, scan))
|| ! _invlist_contains_cp(anyofh_list,
utf8_to_uvchr_buf((U8 *) locinput,
(U8 *) loceol,
NULL)))
{
sayNO;
}
goto
increment_locinput;
break
;
case
ANYOFHb:
if
( ! utf8_target
|| NEXTCHR_IS_EOS
|| ANYOF_FLAGS(scan) != (U8) *locinput
|| ! (anyofh_list = GET_ANYOFH_INVLIST(rex, scan))
|| ! _invlist_contains_cp(anyofh_list,
utf8_to_uvchr_buf((U8 *) locinput,
(U8 *) loceol,
NULL)))
{
sayNO;
}
goto
increment_locinput;
break
;
case
ANYOFHbbm:
if
( ! utf8_target
|| NEXTCHR_IS_EOS
|| ANYOF_FLAGS(scan) != (U8) locinput[0]
|| locinput >= reginfo->strend
|| ! BITMAP_TEST(( (
struct
regnode_bbm *) scan)->bitmap,
(U8) locinput[1] & UTF_CONTINUATION_MASK))
{
sayNO;
}
goto
increment_locinput;
break
;
case
ANYOFHr:
if
( ! utf8_target
|| NEXTCHR_IS_EOS
|| ! inRANGE((U8) NATIVE_UTF8_TO_I8(*locinput),
LOWEST_ANYOF_HRx_BYTE(ANYOF_FLAGS(scan)),
HIGHEST_ANYOF_HRx_BYTE(ANYOF_FLAGS(scan)))
|| ! (anyofh_list = GET_ANYOFH_INVLIST(rex, scan))
|| ! _invlist_contains_cp(anyofh_list,
utf8_to_uvchr_buf((U8 *) locinput,
(U8 *) loceol,
NULL)))
{
sayNO;
}
goto
increment_locinput;
break
;
case
ANYOFHs:
if
( ! utf8_target
|| NEXTCHR_IS_EOS
|| loceol - locinput < FLAGS(scan)
|| memNE(locinput, ((
struct
regnode_anyofhs *) scan)->string, FLAGS(scan))
|| ! (anyofh_list = GET_ANYOFH_INVLIST(rex, scan))
|| ! _invlist_contains_cp(anyofh_list,
utf8_to_uvchr_buf((U8 *) locinput,
(U8 *) loceol,
NULL)))
{
sayNO;
}
goto
increment_locinput;
break
;
case
ANYOFR:
if
(NEXTCHR_IS_EOS) {
sayNO;
}
if
(utf8_target) {
if
( ANYOF_FLAGS(scan) > NATIVE_UTF8_TO_I8(*locinput)
|| ! withinCOUNT(utf8_to_uvchr_buf((U8 *) locinput,
(U8 *) reginfo->strend,
NULL),
ANYOFRbase(scan), ANYOFRdelta(scan)))
{
sayNO;
}
}
else
{
if
(! withinCOUNT((U8) *locinput,
ANYOFRbase(scan), ANYOFRdelta(scan)))
{
sayNO;
}
}
goto
increment_locinput;
break
;
case
ANYOFRb:
if
(NEXTCHR_IS_EOS) {
sayNO;
}
if
(utf8_target) {
if
( ANYOF_FLAGS(scan) != (U8) *locinput
|| ! withinCOUNT(utf8_to_uvchr_buf((U8 *) locinput,
(U8 *) reginfo->strend,
NULL),
ANYOFRbase(scan), ANYOFRdelta(scan)))
{
sayNO;
}
}
else
{
if
(! withinCOUNT((U8) *locinput,
ANYOFRbase(scan), ANYOFRdelta(scan)))
{
sayNO;
}
}
goto
increment_locinput;
break
;
case
NPOSIXL:
to_complement = 1;
case
POSIXL:
CHECK_AND_WARN_PROBLEMATIC_LOCALE_;
if
(NEXTCHR_IS_EOS || locinput >= loceol)
sayNO;
if
(UTF8_IS_INVARIANT(nextbyte) || ! utf8_target) {
if
(! (to_complement ^ cBOOL(isFOO_lc(FLAGS(scan), (U8) nextbyte)))) {
sayNO;
}
locinput++;
break
;
}
if
(! UTF8_IS_NEXT_CHAR_DOWNGRADEABLE(locinput, reginfo->strend)) {
_CHECK_AND_OUTPUT_WIDE_LOCALE_UTF8_MSG(locinput,
reginfo->strend);
goto
utf8_posix_above_latin1;
}
if
(! (to_complement ^ cBOOL(isFOO_lc(FLAGS(scan),
EIGHT_BIT_UTF8_TO_NATIVE(nextbyte,
*(locinput + 1))))))
{
sayNO;
}
goto
increment_locinput;
case
NPOSIXD:
to_complement = 1;
case
POSIXD:
if
(utf8_target) {
goto
utf8_posix;
}
goto
posixa;
case
NPOSIXA:
if
(NEXTCHR_IS_EOS || locinput >= loceol) {
sayNO;
}
if
(! UTF8_IS_INVARIANT(nextbyte)) {
goto
increment_locinput;
}
to_complement = 1;
goto
join_nposixa;
case
POSIXA:
posixa:
if
(NEXTCHR_IS_EOS || locinput >= loceol) {
sayNO;
}
join_nposixa:
if
(! (to_complement ^ cBOOL(generic_isCC_A_(nextbyte,
FLAGS(scan)))))
{
sayNO;
}
locinput++;
break
;
case
NPOSIXU:
to_complement = 1;
case
POSIXU:
utf8_posix:
if
(NEXTCHR_IS_EOS || locinput >= loceol) {
sayNO;
}
if
(UTF8_IS_INVARIANT(nextbyte) || ! utf8_target) {
if
(! (to_complement ^ cBOOL(generic_isCC_(nextbyte,
FLAGS(scan)))))
{
sayNO;
}
locinput++;
}
else
if
(UTF8_IS_NEXT_CHAR_DOWNGRADEABLE(locinput, reginfo->strend)) {
if
(! (to_complement
^ cBOOL(generic_isCC_(EIGHT_BIT_UTF8_TO_NATIVE(nextbyte,
*(locinput + 1)),
FLAGS(scan)))))
{
sayNO;
}
locinput += 2;
}
else
{
utf8_posix_above_latin1:
classnum = (char_class_number_) FLAGS(scan);
switch
(classnum) {
default
:
if
(! (to_complement
^ cBOOL(_invlist_contains_cp(
PL_XPosix_ptrs[classnum],
utf8_to_uvchr_buf((U8 *) locinput,
(U8 *) reginfo->strend,
NULL)))))
{
sayNO;
}
break
;
case
CC_ENUM_SPACE_:
if
(! (to_complement
^ cBOOL(is_XPERLSPACE_high(locinput))))
{
sayNO;
}
break
;
case
CC_ENUM_BLANK_:
if
(! (to_complement
^ cBOOL(is_HORIZWS_high(locinput))))
{
sayNO;
}
break
;
case
CC_ENUM_XDIGIT_:
if
(! (to_complement
^ cBOOL(is_XDIGIT_high(locinput))))
{
sayNO;
}
break
;
case
CC_ENUM_VERTSPACE_:
if
(! (to_complement
^ cBOOL(is_VERTWS_high(locinput))))
{
sayNO;
}
break
;
case
CC_ENUM_CNTRL_:
case
CC_ENUM_ASCII_:
if
(! to_complement) {
sayNO;
}
break
;
}
locinput += UTF8_SAFE_SKIP(locinput, reginfo->strend);
}
break
;
case
CLUMP:
if
(NEXTCHR_IS_EOS || locinput >= loceol)
sayNO;
if
(! utf8_target) {
locinput++;
if
(nextbyte ==
'\r'
&& locinput < loceol
&& UCHARAT(locinput) ==
'\n'
)
{
locinput++;
}
}
else
{
GCB_enum prev_gcb = getGCB_VAL_UTF8((U8*) locinput,
(U8*) reginfo->strend);
locinput += UTF8SKIP(locinput);
while
(locinput < loceol) {
GCB_enum cur_gcb = getGCB_VAL_UTF8((U8*) locinput,
(U8*) reginfo->strend);
if
(isGCB(prev_gcb, cur_gcb,
(U8*) reginfo->strbeg, (U8*) locinput,
utf8_target))
{
break
;
}
prev_gcb = cur_gcb;
locinput += UTF8SKIP(locinput);
}
}
break
;
case
REFFLN:
{
const
char
*s;
char
type;
re_fold_t folder;
const
U8 *fold_array;
UV utf8_fold_flags;
CHECK_AND_WARN_PROBLEMATIC_LOCALE_;
folder = Perl_foldEQ_locale;
fold_array = PL_fold_locale;
type = REFFL;
utf8_fold_flags = FOLDEQ_LOCALE;
goto
do_nref;
case
REFFAN:
folder = Perl_foldEQ_latin1;
fold_array = PL_fold_latin1;
type = REFFA;
utf8_fold_flags = FOLDEQ_UTF8_NOMIX_ASCII;
goto
do_nref;
case
REFFUN:
folder = Perl_foldEQ_latin1;
fold_array = PL_fold_latin1;
type = REFFU;
utf8_fold_flags = 0;
goto
do_nref;
case
REFFN:
folder = Perl_foldEQ;
fold_array = PL_fold;
type = REFF;
utf8_fold_flags = 0;
goto
do_nref;
case
REFN:
type = REF;
folder = NULL;
fold_array = NULL;
utf8_fold_flags = 0;
do_nref:
n = reg_check_named_buff_matched(rex,scan);
if
( ! n ) {
sayNO;
}
goto
do_nref_ref_common;
case
REFFL:
CHECK_AND_WARN_PROBLEMATIC_LOCALE_;
folder = Perl_foldEQ_locale;
fold_array = PL_fold_locale;
utf8_fold_flags = FOLDEQ_LOCALE;
goto
do_ref;
case
REFFA:
folder = Perl_foldEQ_latin1;
fold_array = PL_fold_latin1;
utf8_fold_flags = FOLDEQ_UTF8_NOMIX_ASCII;
goto
do_ref;
case
REFFU:
folder = Perl_foldEQ_latin1;
fold_array = PL_fold_latin1;
utf8_fold_flags = 0;
goto
do_ref;
case
REFF:
folder = Perl_foldEQ;
fold_array = PL_fold;
utf8_fold_flags = 0;
goto
do_ref;
#undef ST
#define ST st->u.backref
case
REF:
folder = NULL;
fold_array = NULL;
utf8_fold_flags = 0;
do_ref:
type = OP(scan);
n = ARG1u(scan);
if
(rex->logical_to_parno) {
n = rex->logical_to_parno[n];
do
{
if
( RXp_LASTPAREN(rex) < n ||
RXp_OFFS_START(rex,n) == -1 ||
RXp_OFFS_END(rex,n) == -1
) {
n = rex->parno_to_logical_next[n];
}
else
{
break
;
}
}
while
(n);
if
(!n)
sayNO;
}
do_nref_ref_common:
reginfo->poscache_iter = reginfo->poscache_maxiter;
if
(RXp_LASTPAREN(rex) < n)
sayNO;
ln = RXp_OFFSp(rex)[n].start;
endref = RXp_OFFSp(rex)[n].end;
if
(ln == -1 || endref == -1)
sayNO;
if
(ln == endref)
goto
ref_yes;
s = reginfo->strbeg + ln;
if
(type != REF
&& (utf8_target || type == REFFU || type == REFFL))
{
char
* limit = loceol;
if
(! foldEQ_utf8_flags(s, NULL, endref - ln, utf8_target,
locinput, &limit, 0, utf8_target, utf8_fold_flags))
{
sayNO;
}
locinput = limit;
goto
ref_yes;
}
if
( ! NEXTCHR_IS_EOS
&& locinput < loceol
&& UCHARAT(s) != nextbyte
&& ( type == REF
|| UCHARAT(s) != fold_array[nextbyte]))
{
sayNO;
}
ln = endref - ln;
if
(locinput + ln > loceol)
sayNO;
if
(ln > 1 && (type == REF
? memNE(s, locinput, ln)
: ! folder(aTHX_ locinput, s, ln)))
sayNO;
locinput += ln;
}
ref_yes:
if
(FLAGS(scan)) {
ST.cp = regcppush(rex, ARG2u(scan) - 1, maxopenparen);
REGCP_SET(ST.lastcp);
PUSH_STATE_GOTO(REF_next, next, locinput, loceol,
script_run_begin);
}
break
;
NOT_REACHED;
case
REF_next:
sayYES;
break
;
case
REF_next_fail:
REGCP_UNWIND(ST.lastcp);
regcppop(rex, &maxopenparen);
sayNO;
break
;
case
NOTHING:
break
;
case
TAIL:
break
;
#undef ST
#define ST st->u.eval
#define CUR_EVAL cur_eval->u.eval
{
SV *ret;
REGEXP *re_sv;
regexp *re;
regexp_internal *rei;
regnode *startpoint;
U32 arg;
case
GOSUB:
arg = ARG1u(scan);
if
(cur_eval && cur_eval->locinput == locinput) {
if
( ++nochange_depth > max_nochange_depth )
Perl_croak(aTHX_
"Pattern subroutine nesting without pos change"
" exceeded limit in regex"
);
}
else
{
nochange_depth = 0;
}
re_sv = rex_sv;
re = rex;
rei = rexi;
startpoint = scan + ARG2i(scan);
EVAL_CLOSE_PAREN_SET( st, arg );
if
( rex->recurse_locinput[arg] == locinput ) {
Perl_croak(aTHX_
"Infinite recursion in regex"
);
}
else
{
ST.prev_recurse_locinput= rex->recurse_locinput[arg];
rex->recurse_locinput[arg]= locinput;
DEBUG_r({
DECLARE_AND_GET_RE_DEBUG_FLAGS;
DEBUG_STACK_r({
Perl_re_exec_indentf( aTHX_
"entering GOSUB, prev_recurse_locinput=%p recurse_locinput[%d]=%p\n"
,
depth, ST.prev_recurse_locinput, arg, rex->recurse_locinput[arg]
);
});
});
}
ST.cp = regcppush(rex, 0, maxopenparen);
REGCP_SET(ST.lastcp);
goto
eval_recurse_doit;
case
EVAL:
if
(logical == 2 && cur_eval && cur_eval->locinput==locinput) {
if
( ++nochange_depth > max_nochange_depth )
Perl_croak(aTHX_
"EVAL without pos change exceeded limit in regex"
);
}
else
{
nochange_depth = 0;
}
{
dSP;
IV before;
OP *
const
oop = PL_op;
COP *
const
ocurcop = PL_curcop;
OP *nop;
CV *newcv;
ST.cp = regcppush(rex, 0, maxopenparen);
REGCP_SET(ST.lastcp);
if
(!caller_cv)
caller_cv = find_runcv(NULL);
n = ARG1u(scan);
if
(rexi->data->what[n] ==
'r'
) {
newcv = (ReANY(
(REGEXP*)(rexi->data->data[n])
))->qr_anoncv;
nop = (OP*)rexi->data->data[n+1];
}
else
if
(rexi->data->what[n] ==
'l'
) {
newcv = caller_cv;
nop = (OP*)rexi->data->data[n];
assert
(CvDEPTH(newcv));
}
else
{
assert
(rexi->data->what[n] ==
'L'
);
newcv = rex->qr_anoncv;
nop = (OP*)rexi->data->data[n];
}
if
(newcv != last_pushed_cv || PL_comppad != last_pad)
{
U8 flags = (CXp_SUB_RE |
((newcv == caller_cv) ? CXp_SUB_RE_FAKE : 0));
SAVECOMPPAD();
if
(last_pushed_cv) {
CHANGE_MULTICALL_FLAGS(newcv, flags);
}
else
{
PUSH_MULTICALL_FLAGS(newcv, flags);
}
CX_CUR()->blk_oldsaveix = orig_savestack_ix;
last_pushed_cv = newcv;
}
else
{
multicall_cop = NULL;
}
last_pad = PL_comppad;
{
OP *o = cUNOPx(nop)->op_first;
assert
(o->op_type == OP_NULL);
if
(o->op_targ == OP_SCOPE) {
o = cUNOPo->op_first;
}
else
{
assert
(o->op_targ == OP_LEAVE);
o = cUNOPo->op_first;
assert
(o->op_type == OP_ENTER);
o = OpSIBLING(o);
}
if
(o->op_type != OP_STUB) {
assert
( o->op_type == OP_NEXTSTATE
|| o->op_type == OP_DBSTATE
|| (o->op_type == OP_NULL
&& ( o->op_targ == OP_NEXTSTATE
|| o->op_targ == OP_DBSTATE
)
)
);
PL_curcop = (COP*)o;
}
}
nop = nop->op_next;
DEBUG_STATE_r( Perl_re_printf( aTHX_
" re EVAL PL_op=0x%"
UVxf
"\n"
, PTR2UV(nop)) );
RXp_OFFSp(rex)[0].end = locinput - reginfo->strbeg;
if
(reginfo->info_aux_eval->pos_magic)
MgBYTEPOS_set(reginfo->info_aux_eval->pos_magic,
reginfo->sv, reginfo->strbeg,
locinput - reginfo->strbeg);
if
(sv_yes_mark) {
SV *sv_mrk = get_sv(
"REGMARK"
, 1);
sv_setsv(sv_mrk, sv_yes_mark);
}
before = (IV)(SP-PL_stack_base);
PL_op = nop;
CALLRUNOPS(aTHX);
SPAGAIN;
if
((IV)(SP-PL_stack_base) == before)
ret = &PL_sv_undef;
else
{
ret = POPs;
PUTBACK;
}
PL_op = NULL;
re_sv = NULL;
if
(logical == 0) {
SV *replsv = save_scalar(PL_replgv);
sv_setsv(replsv, ret);
SvSETMAGIC(replsv);
}
else
if
(logical == 1) {
sw = cBOOL(SvTRUE_NN(ret));
logical = 0;
}
else
{
if
(SvGMAGICAL(ret))
ret = sv_mortalcopy(ret);
if
(!SvAMAGIC(ret)) {
SV *sv = ret;
if
(SvROK(sv))
sv = SvRV(sv);
if
(SvTYPE(sv) == SVt_REGEXP)
re_sv = (REGEXP*) sv;
else
if
(SvSMAGICAL(ret)) {
MAGIC *mg = mg_find(ret, PERL_MAGIC_qr);
if
(mg)
re_sv = (REGEXP *) mg->mg_obj;
}
if
(!re_sv && !SvPOK(ret) && !SvNIOK(ret)) {
ret = sv_mortalcopy(ret);
(
void
) SvPV_force_nolen(ret);
}
}
}
PL_op = oop;
PL_curcop = ocurcop;
regcp_restore(rex, ST.lastcp, &maxopenparen);
PL_curpm_under = PL_curpm;
PL_curpm = PL_reg_curpm;
if
(logical != 2) {
PUSH_STATE_GOTO(EVAL_B, next, locinput, loceol,
script_run_begin);
}
}
logical = 0;
{
if
(re_sv) {
re_sv = reg_temp_copy(NULL, re_sv);
}
else
{
U32 pm_flags = 0;
if
(SvUTF8(ret) && IN_BYTES) {
STRLEN len;
const
char
*
const
p = SvPV(ret, len);
ret = newSVpvn_flags(p, len, SVs_TEMP);
}
if
(rex->intflags & PREGf_USE_RE_EVAL)
pm_flags |= PMf_USE_RE_EVAL;
assert
(rex->engine && rex->engine->op_comp);
assert
(!(FLAGS(scan) & ~RXf_PMf_COMPILETIME));
re_sv = rex->engine->op_comp(aTHX_ &ret, 1, NULL,
rex->engine, NULL, NULL,
ARG2i(scan),
pm_flags);
if
(!(SvFLAGS(ret)
& (SVs_TEMP | SVs_GMG | SVf_ROK))
&& (!SvPADTMP(ret) || SvREADONLY(ret))) {
sv_magic(ret, MUTABLE_SV(re_sv), PERL_MAGIC_qr, 0, 0);
}
}
SAVEFREESV(re_sv);
re = ReANY(re_sv);
}
RXp_MATCH_COPIED_off(re);
RXp_SUBBEG(re) = RXp_SUBBEG(rex);
RXp_SUBLEN(re) = RXp_SUBLEN(rex);
RXp_SUBOFFSET(re) = RXp_SUBOFFSET(rex);
RXp_SUBCOFFSET(re) = RXp_SUBCOFFSET(rex);
RXp_LASTPAREN(re) = 0;
RXp_LASTCLOSEPAREN(re) = 0;
rei = RXi_GET(re);
DEBUG_EXECUTE_r(
debug_start_match(re_sv, utf8_target, locinput,
reginfo->strend,
"EVAL/GOSUB: Matching embedded"
);
);
startpoint = rei->program + 1;
EVAL_CLOSE_PAREN_CLEAR(st);
ST.prev_recurse_locinput= NULL;
maxopenparen = 0;
eval_recurse_doit:
reginfo->poscache_maxiter = 0;
is_utf8_pat = reginfo->is_utf8_pat = cBOOL(RX_UTF8(re_sv));
ST.prev_rex = rex_sv;
ST.prev_curlyx = cur_curlyx;
rex_sv = re_sv;
SET_reg_curpm(rex_sv);
rex = re;
rexi = rei;
cur_curlyx = NULL;
ST.B = next;
ST.prev_eval = cur_eval;
cur_eval = st;
PUSH_YES_STATE_GOTO(EVAL_postponed_AB, startpoint, locinput,
loceol, script_run_begin);
NOT_REACHED;
}
case
EVAL_postponed_AB:
DEBUG_STACK_r({
Perl_re_exec_indentf( aTHX_
"EVAL_AB cur_eval=%p prev_eval=%p\n"
,
depth, cur_eval, ST.prev_eval);
});
#define SET_RECURSE_LOCINPUT(STR,VAL)\
if
( cur_eval && CUR_EVAL.close_paren ) {\
DEBUG_STACK_r({ \
Perl_re_exec_indentf( aTHX_ STR
" GOSUB%d ce=%p recurse_locinput=%p\n"
,\
depth, \
CUR_EVAL.close_paren - 1,\
cur_eval, \
VAL); \
}); \
rex->recurse_locinput[CUR_EVAL.close_paren - 1] = VAL;\
}
SET_RECURSE_LOCINPUT(
"EVAL_AB[before]"
, CUR_EVAL.prev_recurse_locinput);
rex_sv = ST.prev_rex;
is_utf8_pat = reginfo->is_utf8_pat = cBOOL(RX_UTF8(rex_sv));
SET_reg_curpm(rex_sv);
rex = ReANY(rex_sv);
rexi = RXi_GET(rex);
{
SV *save_sv= GvSV(PL_replgv);
SV *replsv;
SvREFCNT_inc(save_sv);
regcpblow(ST.cp);
replsv = GvSV(PL_replgv);
sv_setsv(replsv, save_sv);
SvSETMAGIC(replsv);
SvREFCNT_dec(save_sv);
}
cur_eval = ST.prev_eval;
cur_curlyx = ST.prev_curlyx;
reginfo->poscache_maxiter = 0;
if
( nochange_depth )
nochange_depth--;
SET_RECURSE_LOCINPUT(
"EVAL_AB[after]"
, cur_eval->locinput);
sayYES;
case
EVAL_B_fail:
REGCP_UNWIND(ST.lastcp);
regcppop(rex, &maxopenparen);
sayNO;
case
EVAL_postponed_AB_fail:
DEBUG_STACK_r({
Perl_re_exec_indentf( aTHX_
"EVAL_AB_fail cur_eval=%p prev_eval=%p\n"
,
depth, cur_eval, ST.prev_eval);
});
SET_RECURSE_LOCINPUT(
"EVAL_AB_fail[before]"
, CUR_EVAL.prev_recurse_locinput);
rex_sv = ST.prev_rex;
is_utf8_pat = reginfo->is_utf8_pat = cBOOL(RX_UTF8(rex_sv));
SET_reg_curpm(rex_sv);
rex = ReANY(rex_sv);
rexi = RXi_GET(rex);
REGCP_UNWIND(ST.lastcp);
regcppop(rex, &maxopenparen);
cur_eval = ST.prev_eval;
cur_curlyx = ST.prev_curlyx;
reginfo->poscache_maxiter = 0;
if
( nochange_depth )
nochange_depth--;
SET_RECURSE_LOCINPUT(
"EVAL_AB_fail[after]"
, cur_eval->locinput);
sayNO_SILENT;
#undef ST
case
OPEN:
n = PARNO(scan);
RXp_OFFSp(rex)[n].start_tmp = locinput - reginfo->strbeg;
if
(n > maxopenparen)
maxopenparen = n;
DEBUG_BUFFERS_r(Perl_re_exec_indentf( aTHX_
"OPEN: rex=0x%"
UVxf
" offs=0x%"
UVxf
": \\%"
UVuf
": set %"
IVdf
" tmp; maxopenparen=%"
UVuf
"\n"
,
depth,
PTR2UV(rex),
PTR2UV(RXp_OFFSp(rex)),
(UV)n,
(IV)RXp_OFFSp(rex)[n].start_tmp,
(UV)maxopenparen
));
lastopen = n;
break
;
case
SROPEN:
script_run_begin = (U8 *) locinput;
break
;
case
CLOSE:
n = PARNO(scan);
CLOSE_CAPTURE(rex, n, RXp_OFFSp(rex)[n].start_tmp,
locinput - reginfo->strbeg);
if
( EVAL_CLOSE_PAREN_IS( cur_eval, n ) )
goto
fake_end;
break
;
case
SRCLOSE:
if
(! isSCRIPT_RUN(script_run_begin, (U8 *) locinput, utf8_target))
{
sayNO;
}
break
;
case
ACCEPT:
is_accepted =
true
;
if
(FLAGS(scan))
sv_yes_mark = MUTABLE_SV(rexi->data->data[ ARG1u( scan ) ]);
utmp = ARG2u(scan);
if
( utmp ) {
regnode *cursor;
for
(
cursor = scan;
cursor && ( OP(cursor) != END );
cursor = (
REGNODE_TYPE( OP(cursor) ) == END
|| REGNODE_TYPE( OP(cursor) ) == WHILEM
)
? REGNODE_AFTER(cursor)
: regnext(cursor)
){
if
( OP(cursor) != CLOSE )
continue
;
n = PARNO(cursor);
if
( n > lastopen )
continue
;
CLOSE_CAPTURE(rex, n, RXp_OFFSp(rex)[n].start_tmp,
locinput - reginfo->strbeg);
if
( n == utmp || EVAL_CLOSE_PAREN_IS(cur_eval, n) )
break
;
}
}
goto
fake_end;
case
GROUPP:
n = ARG1u(scan);
sw = cBOOL(RXp_LASTPAREN(rex) >= n && RXp_OFFS_END(rex,n) != -1);
break
;
case
GROUPPN:
sw = cBOOL(0 < reg_check_named_buff_matched(rex,scan));
break
;
case
INSUBP:
n = ARG1u(scan);
sw = cur_eval && (n == 0 || CUR_EVAL.close_paren == n);
break
;
case
DEFINEP:
sw = 0;
break
;
case
IFTHEN:
reginfo->poscache_iter = reginfo->poscache_maxiter;
if
(sw)
next = REGNODE_AFTER_type(scan,tregnode_IFTHEN);
else
{
next = scan + ARG1u(scan);
if
(OP(next) == IFTHEN)
next = REGNODE_AFTER_type(next,tregnode_IFTHEN);
}
break
;
case
LOGICAL:
logical = FLAGS(scan) & EVAL_FLAGS_MASK;
break
;
#define ST st->u.curlyx
case
CURLYX:
{
I32 parenfloor = FLAGS(scan);
assert
(next);
if
(OP(REGNODE_BEFORE(next)) == NOTHING)
next += ARG1u(next);
if
(parenfloor > (I32)RXp_LASTPAREN(rex))
parenfloor = RXp_LASTPAREN(rex);
ST.prev_curlyx= cur_curlyx;
cur_curlyx = st;
ST.cp = PL_savestack_ix;
ST.parenfloor = parenfloor;
ST.me = scan;
ST.B = next;
ST.minmod = minmod;
minmod = 0;
ST.count = -1;
ST.lastloc = NULL;
PUSH_YES_STATE_GOTO(CURLYX_end, REGNODE_BEFORE(next), locinput, loceol,
script_run_begin);
NOT_REACHED;
}
case
CURLYX_end:
cur_curlyx = ST.prev_curlyx;
sayYES;
NOT_REACHED;
case
CURLYX_end_fail:
regcpblow(ST.cp);
cur_curlyx = ST.prev_curlyx;
sayNO;
NOT_REACHED;
#undef ST
#define ST st->u.whilem
case
WHILEM:
{
I32 n;
int
min, max;
regnode *A;
assert
(cur_curlyx);
min = ARG1i(cur_curlyx->u.curlyx.me);
max = ARG2i(cur_curlyx->u.curlyx.me);
A = REGNODE_AFTER(cur_curlyx->u.curlyx.me);
n = ++cur_curlyx->u.curlyx.count;
ST.save_lastloc = cur_curlyx->u.curlyx.lastloc;
ST.cache_offset = 0;
ST.cache_mask = 0;
DEBUG_EXECUTE_r( Perl_re_exec_indentf( aTHX_
"WHILEM: matched %ld out of %d..%d\n"
,
depth, (
long
)n, min, max)
);
if
(n < min) {
ST.cp = regcppush(rex, cur_curlyx->u.curlyx.parenfloor, maxopenparen);
cur_curlyx->u.curlyx.lastloc = locinput;
REGCP_SET(ST.lastcp);
PUSH_STATE_GOTO(WHILEM_A_pre, A, locinput, loceol,
script_run_begin);
NOT_REACHED;
}
if
(locinput == cur_curlyx->u.curlyx.lastloc) {
DEBUG_EXECUTE_r( Perl_re_exec_indentf( aTHX_
"WHILEM: empty match detected, trying continuation...\n"
,
depth)
);
goto
do_whilem_B_max;
}
if
(FLAGS(scan)) {
if
(!reginfo->poscache_maxiter) {
reginfo->poscache_maxiter
= (reginfo->strend - reginfo->strbeg + 1)
* (FLAGS(scan)>>4);
if
(reginfo->poscache_maxiter < 0)
reginfo->poscache_maxiter = I32_MAX;
reginfo->poscache_iter = reginfo->poscache_maxiter;
}
if
(reginfo->poscache_iter-- == 0) {
const
SSize_t size = (reginfo->poscache_maxiter + 7)/8;
regmatch_info_aux *
const
aux = reginfo->info_aux;
if
(aux->poscache) {
if
((SSize_t)reginfo->poscache_size < size) {
Renew(aux->poscache, size,
char
);
reginfo->poscache_size = size;
}
Zero(aux->poscache, size,
char
);
}
else
{
reginfo->poscache_size = size;
Newxz(aux->poscache, size,
char
);
}
DEBUG_EXECUTE_r( Perl_re_printf( aTHX_
"%sWHILEM: Detected a super-linear match, switching on caching%s...\n"
,
PL_colors[4], PL_colors[5])
);
}
if
(reginfo->poscache_iter < 0) {
SSize_t offset, mask;
reginfo->poscache_iter = -1;
offset = (FLAGS(scan) & 0xf) - 1
+ (locinput - reginfo->strbeg)
* (FLAGS(scan)>>4);
mask = 1 << (offset % 8);
offset /= 8;
if
(reginfo->info_aux->poscache[offset] & mask) {
DEBUG_EXECUTE_r( Perl_re_exec_indentf( aTHX_
"WHILEM: (cache) already tried at this position...\n"
,
depth)
);
cur_curlyx->u.curlyx.count--;
sayNO;
}
ST.cache_offset = offset;
ST.cache_mask = mask;
}
}
if
(cur_curlyx->u.curlyx.minmod) {
ST.save_curlyx = cur_curlyx;
cur_curlyx = cur_curlyx->u.curlyx.prev_curlyx;
PUSH_YES_STATE_GOTO(WHILEM_B_min, ST.save_curlyx->u.curlyx.B,
locinput, loceol, script_run_begin);
NOT_REACHED;
}
if
(n < max) {
ST.cp = regcppush(rex, cur_curlyx->u.curlyx.parenfloor,
maxopenparen);
cur_curlyx->u.curlyx.lastloc = locinput;
REGCP_SET(ST.lastcp);
PUSH_STATE_GOTO(WHILEM_A_max, A, locinput, loceol,
script_run_begin);
NOT_REACHED;
}
goto
do_whilem_B_max;
}
NOT_REACHED;
case
WHILEM_B_min:
case
WHILEM_B_max:
cur_curlyx = ST.save_curlyx;
sayYES;
NOT_REACHED;
case
WHILEM_B_max_fail:
cur_curlyx = ST.save_curlyx;
cur_curlyx->u.curlyx.lastloc = ST.save_lastloc;
cur_curlyx->u.curlyx.count--;
CACHEsayNO;
NOT_REACHED;
case
WHILEM_A_min_fail:
case
WHILEM_A_pre_fail:
REGCP_UNWIND(ST.lastcp);
regcppop(rex, &maxopenparen);
cur_curlyx->u.curlyx.lastloc = ST.save_lastloc;
cur_curlyx->u.curlyx.count--;
CACHEsayNO;
NOT_REACHED;
case
WHILEM_A_max_fail:
REGCP_UNWIND(ST.lastcp);
regcppop(rex, &maxopenparen);
DEBUG_EXECUTE_r(Perl_re_exec_indentf( aTHX_
"WHILEM: failed, trying continuation...\n"
,
depth)
);
do_whilem_B_max:
ST.save_curlyx = cur_curlyx;
cur_curlyx = cur_curlyx->u.curlyx.prev_curlyx;
PUSH_YES_STATE_GOTO(WHILEM_B_max, ST.save_curlyx->u.curlyx.B,
locinput, loceol, script_run_begin);
NOT_REACHED;
case
WHILEM_B_min_fail:
cur_curlyx = ST.save_curlyx;
if
(cur_curlyx->u.curlyx.count >=
ARG2i(cur_curlyx->u.curlyx.me)) {
cur_curlyx->u.curlyx.count--;
CACHEsayNO;
}
DEBUG_EXECUTE_r(Perl_re_exec_indentf( aTHX_
"WHILEM: B min fail: trying longer...\n"
, depth)
);
cur_curlyx->u.curlyx.lastloc = locinput;
ST.cp = regcppush(rex, cur_curlyx->u.curlyx.parenfloor, maxopenparen);
REGCP_SET(ST.lastcp);
PUSH_STATE_GOTO(WHILEM_A_min,
REGNODE_AFTER(ST.save_curlyx->u.curlyx.me),
locinput, loceol, script_run_begin);
NOT_REACHED;
#undef ST
#define ST st->u.branch
case
BRANCHJ:
next = scan + ARG1u(scan);
if
(next == scan)
next = NULL;
ST.before_paren = ARG2a(scan);
ST.after_paren = ARG2b(scan);
goto
branch_logic;
NOT_REACHED;
case
BRANCH:
ST.before_paren = ARG1a(scan);
ST.after_paren = ARG1b(scan);
branch_logic:
scan = REGNODE_AFTER_opcode(scan,state_num);
assert
(scan);
ST.lastparen = RXp_LASTPAREN(rex);
ST.lastcloseparen = RXp_LASTCLOSEPAREN(rex);
ST.next_branch = next;
REGCP_SET(ST.cp);
if
(RE_PESSIMISTIC_PARENS) {
regcppush(rex, 0, maxopenparen);
REGCP_SET(ST.lastcp);
}
if
(has_cutgroup) {
PUSH_YES_STATE_GOTO(BRANCH_next, scan, locinput, loceol,
script_run_begin);
}
else
{
PUSH_STATE_GOTO(BRANCH_next, scan, locinput, loceol,
script_run_begin);
}
NOT_REACHED;
case
CUTGROUP:
sv_yes_mark = st->u.mark.mark_name = FLAGS(scan)
? MUTABLE_SV(rexi->data->data[ ARG1u( scan ) ])
: NULL;
PUSH_STATE_GOTO(CUTGROUP_next, next, locinput, loceol,
script_run_begin);
NOT_REACHED;
case
CUTGROUP_next_fail:
do_cutgroup = 1;
no_final = 1;
if
(st->u.mark.mark_name)
sv_commit = st->u.mark.mark_name;
sayNO;
NOT_REACHED;
case
BRANCH_next:
sayYES;
NOT_REACHED;
case
BRANCH_next_fail:
if
(do_cutgroup) {
do_cutgroup = 0;
no_final = 0;
}
if
(RE_PESSIMISTIC_PARENS) {
REGCP_UNWIND(ST.lastcp);
regcppop(rex,&maxopenparen);
}
REGCP_UNWIND(ST.cp);
UNWIND_PAREN(ST.lastparen, ST.lastcloseparen);
CAPTURE_CLEAR(ST.before_paren+1, ST.after_paren,
"BRANCH_next_fail"
);
scan = ST.next_branch;
if
(!scan || (OP(scan) != BRANCH && OP(scan) != BRANCHJ)) {
DEBUG_EXECUTE_r({
Perl_re_exec_indentf( aTHX_
"%sBRANCH failed...%s\n"
,
depth,
PL_colors[4],
PL_colors[5] );
});
sayNO_SILENT;
}
continue
;
case
MINMOD:
minmod = 1;
break
;
#undef ST
#define ST st->u.curlym
case
CURLYM:
ST.me = scan;
scan = REGNODE_AFTER_type(scan, tregnode_CURLYM);
ST.lastparen = RXp_LASTPAREN(rex);
ST.lastcloseparen = RXp_LASTCLOSEPAREN(rex);
if
(FLAGS(ST.me)) {
U32 paren = FLAGS(ST.me);
lastopen = paren;
if
(paren > maxopenparen)
maxopenparen = paren;
scan += NEXT_OFF(scan);
}
ST.A = scan;
ST.B = next;
ST.alen = 0;
ST.count = 0;
ST.minmod = minmod;
minmod = 0;
ST.Binfo.count = -1;
REGCP_SET(ST.cp);
if
(!(ST.minmod ? ARG1i(ST.me) : ARG2i(ST.me)))
goto
curlym_do_B;
curlym_do_A:
PUSH_YES_STATE_GOTO(CURLYM_A, ST.A, locinput, loceol,
script_run_begin);
NOT_REACHED;
case
CURLYM_A:
ST.count++;
if
(ST.count == 1) {
if
(reginfo->is_utf8_target) {
char
*s = st->locinput;
while
(s < locinput) {
ST.alen++;
s += UTF8SKIP(s);
}
}
else
{
ST.alen = locinput - st->locinput;
}
if
(ST.alen == 0)
ST.count = ST.minmod ? ARG1i(ST.me) : ARG2i(ST.me);
}
DEBUG_EXECUTE_r(
Perl_re_exec_indentf( aTHX_
"CURLYM now matched %"
IVdf
" times, len=%"
IVdf
"...\n"
,
depth, (IV) ST.count, (IV)ST.alen)
);
if
(FLAGS(ST.me)) {
U32 paren = (U32)FLAGS(ST.me);
CLOSE_CAPTURE(rex, paren,
HOPc(locinput, -ST.alen) - reginfo->strbeg,
locinput - reginfo->strbeg);
}
if
(EVAL_CLOSE_PAREN_IS_TRUE(cur_eval,(U32)FLAGS(ST.me)))
goto
fake_end;
if
(!is_accepted) {
I32 max = (ST.minmod ? ARG1i(ST.me) : ARG2i(ST.me));
if
( max == REG_INFTY || ST.count < max )
goto
curlym_do_A;
}
goto
curlym_do_B;
case
CURLYM_A_fail:
REGCP_UNWIND(ST.cp);
if
(ST.minmod || ST.count < ARG1i(ST.me)
|| EVAL_CLOSE_PAREN_IS_TRUE(cur_eval,(U32)FLAGS(ST.me)))
sayNO;
curlym_do_B:
if
(is_accepted)
goto
curlym_close_B;
if
(ST.Binfo.count < 0) {
assert
(ST.B);
if
(HAS_TEXT(ST.B) || JUMPABLE(ST.B)) {
regnode *text_node = ST.B;
if
(! HAS_TEXT(text_node))
FIND_NEXT_IMPT(text_node);
if
(REGNODE_TYPE(OP(text_node)) == EXACT) {
if
(! S_setup_EXACTISH_ST(aTHX_ text_node,
&ST.Binfo, reginfo))
{
sayNO;
}
}
}
}
DEBUG_EXECUTE_r(
Perl_re_exec_indentf( aTHX_
"CURLYM trying tail with matches=%"
IVdf
"...\n"
,
depth, (IV)ST.count)
);
if
(! NEXTCHR_IS_EOS && ST.Binfo.count >= 0) {
assert
(ST.Binfo.count > 0);
if
( locinput + ST.Binfo.min_length > loceol
|| ! S_test_EXACTISH_ST(locinput, ST.Binfo))
{
DEBUG_OPTIMISE_r(
Perl_re_exec_indentf( aTHX_
"CURLYM Fast bail next target=0x%X anded==0x%X"
" mask=0x%X\n"
,
depth,
(
int
) nextbyte, ST.Binfo.first_byte_anded,
ST.Binfo.first_byte_mask)
);
state_num = CURLYM_B_fail;
goto
reenter_switch;
}
}
curlym_close_B:
if
(FLAGS(ST.me)) {
U32 paren = (U32)FLAGS(ST.me);
if
(ST.count || is_accepted) {
CLOSE_CAPTURE(rex, paren,
HOPc(locinput, -ST.alen) - reginfo->strbeg,
locinput - reginfo->strbeg);
}
else
RXp_OFFSp(rex)[paren].end = -1;
if
(EVAL_CLOSE_PAREN_IS_TRUE(cur_eval,(U32)FLAGS(ST.me)))
{
if
(ST.count || is_accepted)
goto
fake_end;
else
sayNO;
}
}
if
(is_accepted)
goto
fake_end;
PUSH_STATE_GOTO(CURLYM_B, ST.B, locinput, loceol,
script_run_begin);
NOT_REACHED;
case
CURLYM_B_fail:
REGCP_UNWIND(ST.cp);
UNWIND_PAREN(ST.lastparen, ST.lastcloseparen);
if
(ST.minmod) {
I32 max = ARG2i(ST.me);
if
(max != REG_INFTY && ST.count == max)
sayNO;
goto
curlym_do_A;
}
if
(ST.count == ARG1i(ST.me)
)
sayNO;
ST.count--;
SET_locinput(HOPc(locinput, -ST.alen));
goto
curlym_do_B;
#undef ST
#define ST st->u.curly
#define CURLY_SETPAREN(paren, success) \
if
(paren) { \
if
(success) { \
CLOSE_CAPTURE(rex, paren, HOPc(locinput, -1) - reginfo->strbeg, \
locinput - reginfo->strbeg); \
} \
else
{ \
RXp_OFFSp(rex)[paren].end = -1; \
RXp_LASTPAREN(rex) = ST.lastparen; \
RXp_LASTCLOSEPAREN(rex) = ST.lastcloseparen; \
} \
}
case
STAR:
ST.paren = 0;
ST.min = 0;
ST.max = REG_INFTY;
scan = REGNODE_AFTER_type(scan,tregnode_STAR);
goto
repeat;
case
PLUS:
ST.paren = 0;
ST.min = 1;
ST.max = REG_INFTY;
scan = REGNODE_AFTER_type(scan,tregnode_PLUS);
goto
repeat;
case
CURLYN:
ST.paren = FLAGS(scan);
ST.lastparen = RXp_LASTPAREN(rex);
ST.lastcloseparen = RXp_LASTCLOSEPAREN(rex);
if
(ST.paren > maxopenparen)
maxopenparen = ST.paren;
ST.min = ARG1i(scan);
ST.max = ARG2i(scan);
scan = regnext(REGNODE_AFTER_type(scan, tregnode_CURLYN));
if
(EVAL_CLOSE_PAREN_IS_TRUE(cur_eval,(U32)ST.paren))
{
char
*li = locinput;
if
(!regrepeat(rex, &li, scan, loceol, reginfo, 1))
sayNO;
SET_locinput(li);
goto
fake_end;
}
goto
repeat;
case
CURLY:
ST.paren = 0;
ST.min = ARG1i(scan);
ST.max = ARG2i(scan);
scan = REGNODE_AFTER_type(scan, tregnode_CURLY);
repeat:
assert
(ST.min <= ST.max);
if
(! HAS_TEXT(next) && ! JUMPABLE(next)) {
ST.Binfo.count = 0;
}
else
{
regnode *text_node = next;
if
(! HAS_TEXT(text_node))
FIND_NEXT_IMPT(text_node);
if
(! HAS_TEXT(text_node))
ST.Binfo.count = 0;
else
{
if
( REGNODE_TYPE(OP(text_node)) != EXACT ) {
ST.Binfo.count = 0;
}
else
{
if
(! S_setup_EXACTISH_ST(aTHX_ text_node,
&ST.Binfo, reginfo))
{
sayNO;
}
}
}
}
ST.A = scan;
ST.B = next;
if
(minmod) {
char
*li = locinput;
minmod = 0;
if
(ST.min &&
regrepeat(rex, &li, ST.A, loceol, reginfo, ST.min)
< ST.min)
sayNO;
SET_locinput(li);
ST.count = ST.min;
REGCP_SET(ST.cp);
if
(ST.Binfo.count <= 0)
goto
curly_try_B_min;
ST.oldloc = locinput;
if
(ST.max == REG_INFTY) {
ST.maxpos = loceol - 1;
if
(utf8_target)
while
(UTF8_IS_CONTINUATION(*(U8*)ST.maxpos))
ST.maxpos--;
}
else
if
(utf8_target) {
int
m = ST.max - ST.min;
for
(ST.maxpos = locinput;
m >0 && ST.maxpos < loceol; m--)
ST.maxpos += UTF8SKIP(ST.maxpos);
}
else
{
ST.maxpos = locinput + ST.max - ST.min;
if
(ST.maxpos >= loceol)
ST.maxpos = loceol - 1;
}
goto
curly_try_B_min_known;
}
else
{
char
*li = locinput;
if
(ST.max)
ST.count = regrepeat(rex, &li, ST.A, loceol, reginfo, ST.max);
else
ST.count = 0;
if
(ST.count < ST.min)
sayNO;
SET_locinput(li);
if
((ST.count > ST.min)
&& (REGNODE_TYPE(OP(ST.B)) == EOL) && (OP(ST.B) != MEOL))
{
ST.min = ST.count;
if
(UCHARAT(locinput - 1) ==
'\n'
&& OP(ST.B) != EOS)
ST.min--;
}
REGCP_SET(ST.cp);
goto
curly_try_B_max;
}
NOT_REACHED;
case
CURLY_B_min_fail:
if
(RE_PESSIMISTIC_PARENS) {
REGCP_UNWIND(ST.lastcp);
regcppop(rex, &maxopenparen);
}
REGCP_UNWIND(ST.cp);
if
(ST.paren) {
UNWIND_PAREN(ST.lastparen, ST.lastcloseparen);
}
if
(ST.Binfo.count == 0) {
char
*li = locinput;
if
(!regrepeat(rex, &li, ST.A, loceol, reginfo, 1)) {
sayNO;
}
locinput = li;
ST.count++;
if
(!( ST.count <= ST.max
|| (ST.max == REG_INFTY && ST.count > 0))
)
sayNO;
}
else
{
int
n;
ST.oldloc = locinput;
if
(utf8_target)
locinput += UTF8SKIP(locinput);
else
locinput++;
ST.count++;
curly_try_B_min_known:
if
(locinput + ST.Binfo.initial_exact < loceol) {
if
(ST.Binfo.initial_exact >= ST.Binfo.max_length) {
locinput = ninstr(locinput, loceol,
(
char
*) ST.Binfo.matches,
(
char
*) ST.Binfo.matches
+ ST.Binfo.initial_exact);
if
(locinput == NULL) {
sayNO;
}
}
else
do
{
if
(ST.Binfo.initial_exact > 0) {
locinput = ninstr(locinput, loceol,
(
char
*) ST.Binfo.matches,
(
char
*) ST.Binfo.matches
+ ST.Binfo.initial_exact);
}
else
{
locinput = (
char
*) find_next_masked(
(U8 *) locinput, (U8 *) loceol,
ST.Binfo.first_byte_anded,
ST.Binfo.first_byte_mask);
if
(utf8_target) {
while
( locinput < loceol
&& UTF8_IS_CONTINUATION(*locinput))
{
locinput++;
}
}
}
if
( locinput == NULL
|| locinput + ST.Binfo.min_length > loceol)
{
sayNO;
}
if
(S_test_EXACTISH_ST(locinput, ST.Binfo)) {
break
;
}
locinput += (utf8_target) ? UTF8SKIP(locinput) : 1;
}
while
(locinput <= ST.maxpos);
}
if
(locinput > ST.maxpos)
sayNO;
n = (utf8_target)
? utf8_length((U8 *) ST.oldloc, (U8 *) locinput)
: (STRLEN) (locinput - ST.oldloc);
if
(n) {
char
*li = ST.oldloc;
ST.count += n;
if
(regrepeat(rex, &li, ST.A, loceol, reginfo, n) < n)
sayNO;
assert
(n == REG_INFTY || locinput == li);
}
}
curly_try_B_min:
if
(RE_PESSIMISTIC_PARENS) {
(
void
)regcppush(rex, 0, maxopenparen);
REGCP_SET(ST.lastcp);
}
CURLY_SETPAREN(ST.paren, ST.count);
PUSH_STATE_GOTO(CURLY_B_min, ST.B, locinput, loceol,
script_run_begin);
NOT_REACHED;
curly_try_B_max:
if
( ST.Binfo.count <= 0
|| ( ST.Binfo.count > 0
&& locinput + ST.Binfo.min_length <= loceol
&& S_test_EXACTISH_ST(locinput, ST.Binfo)))
{
if
(RE_PESSIMISTIC_PARENS) {
(
void
)regcppush(rex, 0, maxopenparen);
REGCP_SET(ST.lastcp);
}
CURLY_SETPAREN(ST.paren, ST.count);
PUSH_STATE_GOTO(CURLY_B_max, ST.B, locinput, loceol,
script_run_begin);
NOT_REACHED;
}
goto
CURLY_B_all_failed;
NOT_REACHED;
case
CURLY_B_max_fail:
if
(RE_PESSIMISTIC_PARENS) {
REGCP_UNWIND(ST.lastcp);
regcppop(rex, &maxopenparen);
}
CURLY_B_all_failed:
REGCP_UNWIND(ST.cp);
if
(ST.paren) {
UNWIND_PAREN(ST.lastparen, ST.lastcloseparen);
}
if
(--ST.count < ST.min)
sayNO;
locinput = HOPc(locinput, -1);
goto
curly_try_B_max;
#undef ST
case
END:
fake_end:
if
(cur_eval) {
is_accepted=
false
;
SET_RECURSE_LOCINPUT(
"FAKE-END[before]"
, CUR_EVAL.prev_recurse_locinput);
st->u.eval.prev_rex = rex_sv;
st->u.eval.cp = regcppush(rex, 0, maxopenparen);
rex_sv = CUR_EVAL.prev_rex;
is_utf8_pat = reginfo->is_utf8_pat = cBOOL(RX_UTF8(rex_sv));
SET_reg_curpm(rex_sv);
rex = ReANY(rex_sv);
rexi = RXi_GET(rex);
st->u.eval.prev_curlyx = cur_curlyx;
cur_curlyx = CUR_EVAL.prev_curlyx;
REGCP_SET(st->u.eval.lastcp);
regcp_restore(rex, CUR_EVAL.lastcp, &maxopenparen);
st->u.eval.prev_eval = cur_eval;
cur_eval = CUR_EVAL.prev_eval;
DEBUG_EXECUTE_r(
Perl_re_exec_indentf( aTHX_
"END: EVAL trying tail ... (cur_eval=%p)\n"
,
depth, cur_eval););
if
( nochange_depth )
nochange_depth--;
SET_RECURSE_LOCINPUT(
"FAKE-END[after]"
, cur_eval->locinput);
PUSH_YES_STATE_GOTO(EVAL_postponed_AB,
st->u.eval.prev_eval->u.eval.B,
locinput, loceol, script_run_begin);
}
if
(locinput < reginfo->till) {
DEBUG_EXECUTE_r(Perl_re_printf( aTHX_
"%sEND: Match possible, but length=%ld is smaller than requested=%ld, failing!%s\n"
,
PL_colors[4],
(
long
)(locinput - startpos),
(
long
)(reginfo->till - startpos),
PL_colors[5]));
sayNO_SILENT;
}
sayYES;
case
LOOKBEHIND_END:
if
(match_end && locinput != match_end)
{
DEBUG_EXECUTE_r(
Perl_re_exec_indentf( aTHX_
"%sLOOKBEHIND_END: subpattern failed...%s\n"
,
depth, PL_colors[4], PL_colors[5]));
sayNO;
}
case
SUCCEED:
DEBUG_EXECUTE_r(
Perl_re_exec_indentf( aTHX_
"%sSUCCEED: subpattern success...%s\n"
,
depth, PL_colors[4], PL_colors[5]));
sayYES;
#undef ST
#define ST st->u.ifmatch
case
SUSPEND:
ST.wanted = 1;
ST.start = locinput;
ST.end = loceol;
ST.count = 1;
goto
do_ifmatch;
case
UNLESSM:
ST.wanted = 0;
goto
ifmatch_trivial_fail_test;
case
IFMATCH:
ST.wanted = 1;
ifmatch_trivial_fail_test:
ST.prev_match_end= match_end;
ST.count = NEXT_OFF(scan) + 1;
if
(! FLAGS(scan)) {
ST.start = locinput;
ST.end = loceol;
match_end = NULL;
}
else
{
PERL_UINT_FAST8_T back_count = FLAGS(scan);
char
* s;
match_end = locinput;
ST.end = loceol;
for
(; ST.count > 0; ST.count--, back_count--) {
s = HOPBACKc(locinput, back_count);
if
(s) {
ST.start = s;
goto
do_ifmatch;
}
}
match_end = ST.prev_match_end;
if
(logical) {
logical = 0;
sw = 1 - cBOOL(ST.wanted);
}
else
if
(ST.wanted)
sayNO;
next = scan + ARG1u(scan);
if
(next == scan)
next = NULL;
break
;
}
do_ifmatch:
ST.me = scan;
ST.logical = logical;
logical = 0;
PUSH_YES_STATE_GOTO(IFMATCH_A, REGNODE_AFTER(scan), ST.start,
ST.end, script_run_begin);
NOT_REACHED;
{
bool
matched;
case
IFMATCH_A_fail:
if
(! ST.logical && ST.count > 1) {
ST.count--;
ST.start = HOPc(ST.start, 1);
scan = ST.me;
logical = ST.logical;
goto
do_ifmatch;
}
matched = FALSE;
goto
ifmatch_done;
case
IFMATCH_A:
matched = TRUE;
ifmatch_done:
sw = matched == ST.wanted;
match_end = ST.prev_match_end;
if
(! ST.logical && !sw) {
sayNO;
}
if
(OP(ST.me) != SUSPEND) {
locinput = st->locinput;
loceol = st->loceol;
script_run_begin = st->sr0;
}
scan = ST.me + ARG1u(ST.me);
if
(scan == ST.me)
scan = NULL;
continue
;
}
#undef ST
case
LONGJMP:
next = scan + ARG1u(scan);
if
(next == scan)
next = NULL;
break
;
case
COMMIT:
reginfo->cutpoint = loceol;
case
PRUNE:
if
(FLAGS(scan))
sv_yes_mark = sv_commit = MUTABLE_SV(rexi->data->data[ ARG1u( scan ) ]);
PUSH_STATE_GOTO(COMMIT_next, next, locinput, loceol,
script_run_begin);
NOT_REACHED;
case
COMMIT_next_fail:
no_final = 1;
sayNO;
NOT_REACHED;
case
OPFAIL:
if
(FLAGS(scan))
sv_commit = MUTABLE_SV(rexi->data->data[ ARG1u( scan ) ]);
if
(logical) {
sw= 0;
break
;
}
else
{
sayNO;
}
NOT_REACHED;
#define ST st->u.mark
case
MARKPOINT:
ST.prev_mark = mark_state;
ST.mark_name = sv_commit = sv_yes_mark
= MUTABLE_SV(rexi->data->data[ ARG1u( scan ) ]);
mark_state = st;
ST.mark_loc = locinput;
PUSH_YES_STATE_GOTO(MARKPOINT_next, next, locinput, loceol,
script_run_begin);
NOT_REACHED;
case
MARKPOINT_next:
mark_state = ST.prev_mark;
sayYES;
NOT_REACHED;
case
MARKPOINT_next_fail:
if
(popmark && sv_eq(ST.mark_name,popmark))
{
if
(ST.mark_loc > startpoint)
reginfo->cutpoint = HOPBACKc(ST.mark_loc, 1);
popmark = NULL;
sv_commit = ST.mark_name;
DEBUG_EXECUTE_r({
Perl_re_exec_indentf( aTHX_
"%sMARKPOINT: next fail: setting cutpoint to mark:%"
SVf
"...%s\n"
,
depth,
PL_colors[4], SVfARG(sv_commit), PL_colors[5]);
});
}
mark_state = ST.prev_mark;
sv_yes_mark = mark_state ?
mark_state->u.mark.mark_name : NULL;
sayNO;
NOT_REACHED;
case
SKIP:
if
(!FLAGS(scan)) {
ST.mark_name = NULL;
ST.mark_loc = locinput;
PUSH_STATE_GOTO(SKIP_next,next, locinput, loceol,
script_run_begin);
}
else
{
regmatch_state *cur = mark_state;
SV *find = MUTABLE_SV(rexi->data->data[ ARG1u( scan ) ]);
while
(cur) {
if
( sv_eq( cur->u.mark.mark_name,
find ) )
{
ST.mark_name = find;
PUSH_STATE_GOTO( SKIP_next, next, locinput, loceol,
script_run_begin);
}
cur = cur->u.mark.prev_mark;
}
}
break
;
case
SKIP_next_fail:
if
(ST.mark_name) {
popmark = ST.mark_name;
}
else
{
if
(ST.mark_loc > startpoint)
reginfo->cutpoint = HOPBACKc(ST.mark_loc, 1);
if
(mark_state)
sv_commit=mark_state->u.mark.mark_name;
}
no_final = 1;
sayNO;
NOT_REACHED;
#undef ST
case
LNBREAK:
if
((n=is_LNBREAK_safe(locinput, loceol, utf8_target))) {
locinput += n;
}
else
sayNO;
break
;
default
:
PerlIO_printf(Perl_error_log,
"%"
UVxf
" %d\n"
,
PTR2UV(scan), OP(scan));
Perl_croak(aTHX_
"regexp memory corruption"
);
increment_locinput:
assert
(!NEXTCHR_IS_EOS);
if
(utf8_target) {
locinput += PL_utf8skip[nextbyte];
if
(locinput > loceol)
sayNO;
}
else
locinput++;
break
;
}
scan = next;
continue
;
push_yes_state:
st->u.yes.prev_yes_state = yes_state;
yes_state = st;
push_state:
{
regmatch_state *newst;
DECLARE_AND_GET_RE_DEBUG_FLAGS;
DEBUG_r(
if
(DEBUG_v_TEST || RE_DEBUG_FLAG(RE_DEBUG_EXTRA_STACK)) {
regmatch_state *cur = st;
regmatch_state *curyes = yes_state;
U32 i;
regmatch_slab *slab = PL_regmatch_slab;
for
(i = 0; i < 3 && i <= depth; cur--,i++) {
if
(cur < SLAB_FIRST(slab)) {
slab = slab->prev;
cur = SLAB_LAST(slab);
}
Perl_re_exec_indentf( aTHX_
"%4s #%-3d %-10s %s\n"
,
depth,
i ?
" "
:
"push"
,
depth - i, REGNODE_NAME(cur->resume_state),
(curyes == cur) ?
"yes"
:
""
);
if
(curyes == cur)
curyes = cur->u.yes.prev_yes_state;
}
}
else
{
DEBUG_STATE_pp(
"push"
)
});
depth++;
st->locinput = locinput;
st->loceol = loceol;
st->sr0 = script_run_begin;
newst = st+1;
if
(newst > SLAB_LAST(PL_regmatch_slab))
newst = S_push_slab(aTHX);
PL_regmatch_state = newst;
locinput = pushinput;
loceol = pusheol;
script_run_begin = pushsr0;
st = newst;
continue
;
}
}
#ifdef SOLARIS_BAD_OPTIMIZER
# undef PL_charclass
#endif
Perl_croak(aTHX_
"corrupted regexp pointers"
);
NOT_REACHED;
yes:
if
(yes_state) {
assert
(st != yes_state);
#ifdef DEBUGGING
while
(st != yes_state) {
st--;
if
(st < SLAB_FIRST(PL_regmatch_slab)) {
PL_regmatch_slab = PL_regmatch_slab->prev;
st = SLAB_LAST(PL_regmatch_slab);
}
DEBUG_STATE_r({
if
(no_final) {
DEBUG_STATE_pp(
"pop (no final)"
);
}
else
{
DEBUG_STATE_pp(
"pop (yes)"
);
}
});
depth--;
}
#else
while
(yes_state < SLAB_FIRST(PL_regmatch_slab)
|| yes_state > SLAB_LAST(PL_regmatch_slab))
{
depth -= (st - SLAB_FIRST(PL_regmatch_slab) + 1);
PL_regmatch_slab = PL_regmatch_slab->prev;
st = SLAB_LAST(PL_regmatch_slab);
}
depth -= (st - yes_state);
#endif
st = yes_state;
yes_state = st->u.yes.prev_yes_state;
PL_regmatch_state = st;
if
(no_final) {
locinput= st->locinput;
loceol= st->loceol;
script_run_begin = st->sr0;
}
state_num = st->resume_state + no_final;
goto
reenter_switch;
}
DEBUG_EXECUTE_r(Perl_re_printf( aTHX_
"%sMatch successful!%s\n"
,
PL_colors[4], PL_colors[5]));
if
(reginfo->info_aux_eval) {
if
(oreplsv != GvSV(PL_replgv)) {
sv_setsv(oreplsv, GvSV(PL_replgv));
SvSETMAGIC(oreplsv);
}
}
result = 1;
goto
final_exit;
no:
DEBUG_EXECUTE_r(
Perl_re_exec_indentf( aTHX_
"%sfailed...%s\n"
,
depth,
PL_colors[4], PL_colors[5])
);
no_silent:
if
(no_final) {
if
(yes_state) {
goto
yes;
}
else
{
goto
final_exit;
}
}
if
(depth) {
st--;
if
(st < SLAB_FIRST(PL_regmatch_slab)) {
PL_regmatch_slab = PL_regmatch_slab->prev;
st = SLAB_LAST(PL_regmatch_slab);
}
PL_regmatch_state = st;
locinput= st->locinput;
loceol= st->loceol;
script_run_begin = st->sr0;
DEBUG_STATE_pp(
"pop"
);
depth--;
if
(yes_state == st)
yes_state = st->u.yes.prev_yes_state;
state_num = st->resume_state + 1;
PERL_ASYNC_CHECK();
goto
reenter_switch;
}
result = 0;
final_exit:
if
(rex->intflags & PREGf_VERBARG_SEEN) {
SV *sv_err = get_sv(
"REGERROR"
, 1);
SV *sv_mrk = get_sv(
"REGMARK"
, 1);
if
(result) {
sv_commit = &PL_sv_no;
if
(!sv_yes_mark)
sv_yes_mark = &PL_sv_yes;
}
else
{
if
(!sv_commit)
sv_commit = &PL_sv_yes;
sv_yes_mark = &PL_sv_no;
}
assert
(sv_err);
assert
(sv_mrk);
sv_setsv(sv_err, sv_commit);
sv_setsv(sv_mrk, sv_yes_mark);
}
if
(last_pushed_cv) {
dSP;
POP_MULTICALL;
PERL_UNUSED_VAR(SP);
}
else
LEAVE_SCOPE(orig_savestack_ix);
assert
(!result || locinput - reginfo->strbeg >= 0);
return
result ? locinput - reginfo->strbeg : -1;
}
STATIC I32
S_regrepeat(pTHX_ regexp *prog,
char
**startposp,
const
regnode *p,
char
* loceol, regmatch_info *
const
reginfo, I32 max comma_pDEPTH)
{
char
*scan;
I32 c;
char
*this_eol = loceol;
I32 hardcount = 0;
bool
utf8_target = reginfo->is_utf8_target;
unsigned
int
to_complement = 0;
char_class_number_ classnum;
PERL_ARGS_ASSERT_REGREPEAT;
scan = *startposp;
if
(max == REG_INFTY)
max = I32_MAX;
else
if
(! utf8_target && this_eol - scan > max)
this_eol = scan + max;
switch
(with_t_UTF8ness(OP(p), utf8_target)) {
SV * anyofh_list;
case
REG_ANY_t8:
while
(scan < this_eol && hardcount < max && *scan !=
'\n'
) {
scan += UTF8SKIP(scan);
hardcount++;
}
break
;
case
REG_ANY_tb:
scan = (
char
*)
memchr
(scan,
'\n'
, this_eol - scan);
if
(! scan) {
scan = this_eol;
}
break
;
case
SANY_t8:
while
(scan < this_eol && hardcount < max) {
scan += UTF8SKIP(scan);
hardcount++;
}
break
;
case
SANY_tb:
scan = this_eol;
break
;
case
EXACT_REQ8_tb:
case
LEXACT_REQ8_tb:
case
EXACTFU_REQ8_tb:
break
;
case
EXACTL_t8:
if
(UTF8_IS_ABOVE_LATIN1(*scan)) {
_CHECK_AND_OUTPUT_WIDE_LOCALE_UTF8_MSG(scan, loceol);
}
case
EXACTL_tb:
case
EXACTFL_t8:
case
EXACTFL_tb:
case
EXACTFLU8_t8:
case
EXACTFLU8_tb:
CHECK_AND_WARN_PROBLEMATIC_LOCALE_;
case
EXACT_REQ8_t8:
case
LEXACT_REQ8_t8:
case
EXACTFU_REQ8_t8:
case
LEXACT_t8:
case
LEXACT_tb:
case
EXACT_t8:
case
EXACT_tb:
case
EXACTF_t8:
case
EXACTF_tb:
case
EXACTFAA_NO_TRIE_t8:
case
EXACTFAA_NO_TRIE_tb:
case
EXACTFAA_t8:
case
EXACTFAA_tb:
case
EXACTFU_t8:
case
EXACTFU_tb:
case
EXACTFUP_t8:
case
EXACTFUP_tb:
{
struct
next_matchable_info Binfo;
PERL_UINT_FAST8_T definitive_len;
assert
(STR_LEN(p) == reginfo->is_utf8_pat ? UTF8SKIP(STRING(p)) : 1);
if
( ! S_setup_EXACTISH_ST(aTHX_ p, &Binfo, reginfo)
|| scan + Binfo.min_length > this_eol
|| ! S_test_EXACTISH_ST(scan, Binfo))
{
break
;
}
definitive_len = Binfo.initial_definitive;
if
(definitive_len > 0) {
if
(definitive_len >= Binfo.max_length) {
scan += definitive_len;
hardcount++;
if
(hardcount < max && scan + definitive_len <= this_eol) {
if
(definitive_len == 1) {
const
char
* orig_scan = scan;
if
(this_eol - (scan - hardcount) > max) {
this_eol = scan - hardcount + max;
}
if
(Binfo.initial_exact == 1) {
scan = (
char
*) find_span_end((U8 *) scan,
(U8 *) this_eol,
Binfo.matches[0]);
}
else
{
scan = (
char
*) find_span_end_mask(
(U8 *) scan,
(U8 *) this_eol,
Binfo.first_byte_anded,
Binfo.first_byte_mask);
}
hardcount += scan - orig_scan;
}
else
{
while
( hardcount < max
&& scan + definitive_len <= this_eol
&& S_test_EXACTISH_ST(scan, Binfo))
{
scan += definitive_len;
hardcount++;
}
}
}
break
;
}
do
{
int
i;
U8 * matches = Binfo.matches;
for
(i = 0; i < Binfo.count; i++) {
if
(memEQ(scan + definitive_len,
matches + definitive_len,
Binfo.lengths[i] - definitive_len))
{
goto
found_a_completion;
}
matches += Binfo.lengths[i];
}
break
;
found_a_completion:
hardcount++;
scan += Binfo.lengths[i];
}
while
( hardcount < max
&& scan + definitive_len < this_eol
&& S_test_EXACTISH_ST(scan, Binfo));
break
;
}
while
(scan < this_eol && hardcount < max) {
int
i;
U8 * matches = Binfo.matches;
for
(i = 0; i < Binfo.count; i++) {
if
(memEQ(scan, matches, Binfo.lengths[i])) {
goto
found1;
}
matches += Binfo.lengths[i];
}
break
;
found1:
hardcount++;
scan += Binfo.lengths[i];
}
break
;
}
case
ANYOFPOSIXL_t8:
case
ANYOFL_t8:
CHECK_AND_WARN_PROBLEMATIC_LOCALE_;
CHECK_AND_WARN_NON_UTF8_CTYPE_LOCALE_IN_SETS(p);
case
ANYOFD_t8:
case
ANYOF_t8:
while
( hardcount < max
&& scan < this_eol
&& reginclass(prog, p, (U8*)scan, (U8*) this_eol, TRUE))
{
scan += UTF8SKIP(scan);
hardcount++;
}
break
;
case
ANYOFPOSIXL_tb:
case
ANYOFL_tb:
CHECK_AND_WARN_PROBLEMATIC_LOCALE_;
CHECK_AND_WARN_NON_UTF8_CTYPE_LOCALE_IN_SETS(p);
case
ANYOFD_tb:
case
ANYOF_tb:
if
(ANYOF_FLAGS(p) || ANYOF_HAS_AUX(p)) {
while
( scan < this_eol
&& reginclass(prog, p, (U8*)scan, (U8*)scan+1, 0))
scan++;
}
else
{
while
(scan < this_eol && ANYOF_BITMAP_TEST(p, *((U8*)scan)))
scan++;
}
break
;
case
ANYOFM_t8:
if
(this_eol - scan > max) {
this_eol = scan + max;
}
case
ANYOFM_tb:
scan = (
char
*) find_span_end_mask((U8 *) scan, (U8 *) this_eol,
(U8) ARG1u(p), FLAGS(p));
break
;
case
NANYOFM_t8:
while
( hardcount < max
&& scan < this_eol
&& (*scan & FLAGS(p)) != ARG1u(p))
{
scan += UTF8SKIP(scan);
hardcount++;
}
break
;
case
NANYOFM_tb:
scan = (
char
*) find_next_masked((U8 *) scan, (U8 *) this_eol,
(U8) ARG1u(p), FLAGS(p));
break
;
case
ANYOFH_tb:
case
ANYOFHb_tb:
case
ANYOFHbbm_tb:
case
ANYOFHr_tb:
case
ANYOFHs_tb:
break
;
case
ANYOFH_t8:
anyofh_list = GET_ANYOFH_INVLIST(prog, p);
while
( hardcount < max
&& scan < this_eol
&& NATIVE_UTF8_TO_I8(*scan) >= ANYOF_FLAGS(p)
&& _invlist_contains_cp(anyofh_list,
utf8_to_uvchr_buf((U8 *) scan,
(U8 *) this_eol,
NULL)))
{
scan += UTF8SKIP(scan);
hardcount++;
}
break
;
case
ANYOFHb_t8:
anyofh_list = GET_ANYOFH_INVLIST(prog, p);
while
( hardcount < max
&& scan < this_eol
&& (U8) *scan == ANYOF_FLAGS(p)
&& _invlist_contains_cp(anyofh_list,
utf8_to_uvchr_buf((U8 *) scan,
(U8 *) this_eol,
NULL)))
{
scan += UTF8SKIP(scan);
hardcount++;
}
break
;
case
ANYOFHbbm_t8:
while
( hardcount < max
&& scan + 1 < this_eol
&& (U8) *scan == ANYOF_FLAGS(p)
&& BITMAP_TEST(( (
struct
regnode_bbm *) p)->bitmap,
(U8) scan[1] & UTF_CONTINUATION_MASK))
{
scan += 2;
hardcount++;
}
break
;
case
ANYOFHr_t8:
anyofh_list = GET_ANYOFH_INVLIST(prog, p);
while
( hardcount < max
&& scan < this_eol
&& inRANGE(NATIVE_UTF8_TO_I8(*scan),
LOWEST_ANYOF_HRx_BYTE(ANYOF_FLAGS(p)),
HIGHEST_ANYOF_HRx_BYTE(ANYOF_FLAGS(p)))
&& NATIVE_UTF8_TO_I8(*scan) >= ANYOF_FLAGS(p)
&& _invlist_contains_cp(anyofh_list,
utf8_to_uvchr_buf((U8 *) scan,
(U8 *) this_eol,
NULL)))
{
scan += UTF8SKIP(scan);
hardcount++;
}
break
;
case
ANYOFHs_t8:
anyofh_list = GET_ANYOFH_INVLIST(prog, p);
while
( hardcount < max
&& scan + FLAGS(p) < this_eol
&& memEQ(scan, ((
struct
regnode_anyofhs *) p)->string, FLAGS(p))
&& _invlist_contains_cp(anyofh_list,
utf8_to_uvchr_buf((U8 *) scan,
(U8 *) this_eol,
NULL)))
{
scan += UTF8SKIP(scan);
hardcount++;
}
break
;
case
ANYOFR_t8:
while
( hardcount < max
&& scan < this_eol
&& NATIVE_UTF8_TO_I8(*scan) >= ANYOF_FLAGS(p)
&& withinCOUNT(utf8_to_uvchr_buf((U8 *) scan,
(U8 *) this_eol,
NULL),
ANYOFRbase(p), ANYOFRdelta(p)))
{
scan += UTF8SKIP(scan);
hardcount++;
}
break
;
case
ANYOFR_tb:
while
( hardcount < max
&& scan < this_eol
&& withinCOUNT((U8) *scan, ANYOFRbase(p), ANYOFRdelta(p)))
{
scan++;
hardcount++;
}
break
;
case
ANYOFRb_t8:
while
( hardcount < max
&& scan < this_eol
&& (U8) *scan == ANYOF_FLAGS(p)
&& withinCOUNT(utf8_to_uvchr_buf((U8 *) scan,
(U8 *) this_eol,
NULL),
ANYOFRbase(p), ANYOFRdelta(p)))
{
scan += UTF8SKIP(scan);
hardcount++;
}
break
;
case
ANYOFRb_tb:
while
( hardcount < max
&& scan < this_eol
&& withinCOUNT((U8) *scan, ANYOFRbase(p), ANYOFRdelta(p)))
{
scan++;
hardcount++;
}
break
;
case
NPOSIXL_tb:
to_complement = 1;
case
POSIXL_tb:
CHECK_AND_WARN_PROBLEMATIC_LOCALE_;
while
( scan < this_eol
&& to_complement ^ cBOOL(isFOO_lc(FLAGS(p), *scan)))
{
scan++;
}
break
;
case
NPOSIXL_t8:
to_complement = 1;
case
POSIXL_t8:
while
( hardcount < max && scan < this_eol
&& to_complement ^ cBOOL(isFOO_utf8_lc(FLAGS(p),
(U8 *) scan,
(U8 *) this_eol)))
{
scan += UTF8SKIP(scan);
hardcount++;
}
break
;
case
POSIXD_tb:
case
POSIXA_t8:
if
(this_eol - scan > max) {
this_eol = scan + max;
}
case
POSIXA_tb:
while
(scan < this_eol && generic_isCC_A_((U8) *scan, FLAGS(p))) {
scan++;
}
break
;
case
NPOSIXD_tb:
case
NPOSIXA_tb:
while
(scan < this_eol && ! generic_isCC_A_((U8) *scan, FLAGS(p))) {
scan++;
}
break
;
case
NPOSIXA_t8:
while
( hardcount < max && scan < this_eol
&& ( ! isASCII_utf8_safe(scan, loceol)
|| ! generic_isCC_A_((U8) *scan, FLAGS(p))))
{
scan += UTF8SKIP(scan);
hardcount++;
}
break
;
case
NPOSIXU_tb:
to_complement = 1;
case
POSIXU_tb:
while
( scan < this_eol
&& to_complement ^ cBOOL(generic_isCC_((U8) *scan, FLAGS(p))))
{
scan++;
}
break
;
case
NPOSIXU_t8:
case
NPOSIXD_t8:
to_complement = 1;
case
POSIXD_t8:
case
POSIXU_t8:
classnum = (char_class_number_) FLAGS(p);
switch
(classnum) {
default
:
while
( hardcount < max && scan < this_eol
&& to_complement
^ cBOOL(_invlist_contains_cp(PL_XPosix_ptrs[classnum],
utf8_to_uvchr_buf((U8 *) scan, (U8 *) this_eol, NULL))))
{
scan += UTF8SKIP(scan);
hardcount++;
}
break
;
case
CC_ENUM_SPACE_:
while
( hardcount < max
&& scan < this_eol
&& (to_complement
^ cBOOL(isSPACE_utf8_safe(scan, this_eol))))
{
scan += UTF8SKIP(scan);
hardcount++;
}
break
;
case
CC_ENUM_BLANK_:
while
( hardcount < max
&& scan < this_eol
&& (to_complement
^ cBOOL(isBLANK_utf8_safe(scan, this_eol))))
{
scan += UTF8SKIP(scan);
hardcount++;
}
break
;
case
CC_ENUM_XDIGIT_:
while
( hardcount < max
&& scan < this_eol
&& (to_complement
^ cBOOL(isXDIGIT_utf8_safe(scan, this_eol))))
{
scan += UTF8SKIP(scan);
hardcount++;
}
break
;
case
CC_ENUM_VERTSPACE_:
while
( hardcount < max
&& scan < this_eol
&& (to_complement
^ cBOOL(isVERTWS_utf8_safe(scan, this_eol))))
{
scan += UTF8SKIP(scan);
hardcount++;
}
break
;
case
CC_ENUM_CNTRL_:
while
( hardcount < max
&& scan < this_eol
&& (to_complement
^ cBOOL(isCNTRL_utf8_safe(scan, this_eol))))
{
scan += UTF8SKIP(scan);
hardcount++;
}
break
;
}
break
;
case
LNBREAK_t8:
while
( hardcount < max && scan < this_eol
&& (c=is_LNBREAK_utf8_safe(scan, this_eol)))
{
scan += c;
hardcount++;
}
break
;
case
LNBREAK_tb:
while
(
hardcount < max && scan < loceol
&& (c = is_LNBREAK_latin1_safe(scan, loceol))
) {
scan += c;
hardcount++;
}
break
;
default
:
Perl_croak(aTHX_
"panic: regrepeat() called with unrecognized"
" node type %d='%s'"
, OP(p), REGNODE_NAME(OP(p)));
NOT_REACHED;
}
if
(hardcount)
c = hardcount;
else
c = scan - *startposp;
*startposp = scan;
DEBUG_r({
DECLARE_AND_GET_RE_DEBUG_FLAGS;
DEBUG_EXECUTE_r({
SV *
const
prop = sv_newmortal();
regprop(prog, prop, p, reginfo, NULL);
Perl_re_exec_indentf( aTHX_
"%s can match %"
IVdf
" times out of %"
IVdf
"...\n"
,
depth, SvPVX_const(prop),(IV)c,(IV)max);
});
});
return
(c);
}
STATIC
bool
S_reginclass(pTHX_ regexp *
const
prog,
const
regnode *
const
n,
const
U8*
const
p,
const
U8*
const
p_end,
const
bool
utf8_target)
{
const
char
flags = (inRANGE(OP(n), ANYOFH, ANYOFHs))
? 0
: ANYOF_FLAGS(n);
bool
match = FALSE;
UV c = *p;
PERL_ARGS_ASSERT_REGINCLASS;
if
(! UTF8_IS_INVARIANT(c) && utf8_target) {
STRLEN c_len = 0;
const
U32 utf8n_flags = UTF8_ALLOW_DEFAULT;
c = utf8n_to_uvchr(p, p_end - p, &c_len, utf8n_flags | UTF8_CHECK_ONLY);
if
(c_len == (STRLEN)-1) {
_force_out_malformed_utf8_message(p, p_end,
utf8n_flags,
1
);
NOT_REACHED;
}
if
( c > 255
&& (OP(n) == ANYOFL || OP(n) == ANYOFPOSIXL)
&& ! (flags & ANYOFL_UTF8_LOCALE_REQD))
{
_CHECK_AND_OUTPUT_WIDE_LOCALE_CP_MSG(c);
}
}
if
(c < NUM_ANYOF_CODE_POINTS && ! inRANGE(OP(n), ANYOFH, ANYOFHb)) {
if
(ANYOF_BITMAP_TEST(n, c))
match = TRUE;
else
if
( (flags & ANYOFD_NON_UTF8_MATCHES_ALL_NON_ASCII__shared)
&& OP(n) == ANYOFD
&& ! utf8_target
&& ! isASCII(c))
{
match = TRUE;
}
else
if
(flags & ANYOF_LOCALE_FLAGS) {
if
( (flags & ANYOFL_FOLD)
&& c < 256
&& ANYOF_BITMAP_TEST(n, PL_fold_locale[c]))
{
match = TRUE;
}
else
if
( ANYOF_POSIXL_TEST_ANY_SET(n)
&& c <= U8_MAX
) {
U32 posixl_bits = ANYOF_POSIXL_BITMAP(n);
do
{
U8 bit_pos = lsbit_pos32(posixl_bits);
if
(bit_pos % 2 ^ cBOOL(isFOO_lc(bit_pos/2, (U8) c))) {
match = TRUE;
break
;
}
POSIXL_CLEAR(posixl_bits, bit_pos);
}
while
(posixl_bits != 0);
}
}
}
if
(!match) {
if
( c >= NUM_ANYOF_CODE_POINTS
&& ANYOF_ONLY_HAS_BITMAP(n)
&& ! (flags & ANYOF_HAS_EXTRA_RUNTIME_MATCHES))
{
match = ARG1u(n) & 1;
}
else
if
( c >= NUM_ANYOF_CODE_POINTS
|| ( (flags & ANYOF_HAS_EXTRA_RUNTIME_MATCHES)
&& ( UNLIKELY(OP(n) != ANYOFD)
|| (utf8_target && ! isASCII_uvchr(c)
# if NUM_ANYOF_CODE_POINTS > 256
&& c < 256
# endif
))))
{
if
(ANYOF_HAS_AUX(n)) {
SV* only_utf8_locale = NULL;
SV *
const
definition = GET_REGCLASS_AUX_DATA(prog, n, TRUE, 0,
&only_utf8_locale, NULL);
if
(definition) {
if
(_invlist_contains_cp(definition, c)) {
match = TRUE;
}
else
if
( UNLIKELY(IN_UTF8_TURKIC_LOCALE)
&& isALPHA_FOLD_EQ(*p,
'i'
))
{
if
(*p ==
'i'
) {
if
(_invlist_contains_cp(definition,
LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE))
{
match = TRUE;
}
}
else
if
(_invlist_contains_cp(definition,
LATIN_SMALL_LETTER_DOTLESS_I))
{
match = TRUE;
}
}
}
if
( UNLIKELY(only_utf8_locale)
&& UNLIKELY(IN_UTF8_CTYPE_LOCALE)
&& ! match)
{
match = _invlist_contains_cp(only_utf8_locale, c);
}
}
if
(UNLIKELY(IN_UTF8_TURKIC_LOCALE) && ! match) {
if
(utf8_target) {
if
(c == LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE) {
if
(ANYOF_BITMAP_TEST(n,
'i'
)) {
match = TRUE;
}
}
else
if
(c == LATIN_SMALL_LETTER_DOTLESS_I) {
if
(ANYOF_BITMAP_TEST(n,
'I'
)) {
match = TRUE;
}
}
}
#if NUM_ANYOF_CODE_POINTS > 256
if
(*p ==
'i'
) {
if
(ANYOF_BITMAP_TEST(n,
LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE))
{
match = TRUE;
}
}
else
if
(*p ==
'I'
) {
if
(ANYOF_BITMAP_TEST(n, LATIN_SMALL_LETTER_DOTLESS_I)) {
match = TRUE;
}
}
#endif
}
}
if
( UNICODE_IS_SUPER(c)
&& (flags & ANYOF_WARN_SUPER__shared)
&& OP(n) != ANYOFD
&& ckWARN_d(WARN_NON_UNICODE))
{
Perl_warner(aTHX_ packWARN(WARN_NON_UNICODE),
"Matched non-Unicode code point 0x%04"
UVXf
" against Unicode property; may not be portable"
, c);
}
}
#if ANYOF_INVERT != 1
# error ANYOF_INVERT needs to be set to 1, or guarded with cBOOL below,
#endif
return
(flags & ANYOF_INVERT) ^ match;
}
STATIC U8 *
S_reghop3(U8 *s, SSize_t off,
const
U8* lim)
{
PERL_ARGS_ASSERT_REGHOP3;
if
(off >= 0) {
while
(off-- && s < lim) {
U8 *new_s = s + UTF8SKIP(s);
if
(new_s > lim)
return
s;
s = new_s;
}
}
else
{
while
(off++ && s > lim) {
s--;
if
(UTF8_IS_CONTINUED(*s)) {
while
(s > lim && UTF8_IS_CONTINUATION(*s))
s--;
if
(! UTF8_IS_START(*s)) {
Perl_croak_nocontext(
"Malformed UTF-8 character (fatal)"
);
}
}
}
}
return
s;
}
STATIC U8 *
S_reghop4(U8 *s, SSize_t off,
const
U8* llim,
const
U8* rlim)
{
PERL_ARGS_ASSERT_REGHOP4;
return
utf8_hop_safe(s, off, llim, rlim);
}
STATIC U8 *
S_reghopmaybe3(U8* s, SSize_t off,
const
U8*
const
lim)
{
PERL_ARGS_ASSERT_REGHOPMAYBE3;
if
(off >= 0) {
while
(off-- && s < lim) {
s += UTF8SKIP(s);
}
if
(off >= 0)
return
NULL;
}
else
{
while
(off++ && s > lim) {
s--;
if
(UTF8_IS_CONTINUED(*s)) {
while
(s > lim && UTF8_IS_CONTINUATION(*s))
s--;
if
(! UTF8_IS_START(*s)) {
Perl_croak_nocontext(
"Malformed UTF-8 character (fatal)"
);
}
}
}
if
(off <= 0)
return
NULL;
}
return
s;
}
static
void
S_setup_eval_state(pTHX_ regmatch_info *
const
reginfo)
{
MAGIC *mg;
regexp *
const
rex = ReANY(reginfo->prog);
regmatch_info_aux_eval *eval_state = reginfo->info_aux_eval;
eval_state->rex = rex;
eval_state->sv = reginfo->sv;
if
(reginfo->sv) {
if
(reginfo->sv != DEFSV) {
SAVE_DEFSV;
DEFSV_set(reginfo->sv);
}
SvREFCNT_inc_NN(reginfo->sv);
if
(!(mg = mg_find_mglob(reginfo->sv))) {
mg = sv_magicext_mglob(reginfo->sv);
mg->mg_len = -1;
}
eval_state->pos_magic = mg;
eval_state->pos = mg->mg_len;
eval_state->pos_flags = mg->mg_flags;
}
else
eval_state->pos_magic = NULL;
if
(!PL_reg_curpm) {
Newxz(PL_reg_curpm, 1, PMOP);
#ifdef USE_ITHREADS
{
SV*
const
repointer = &PL_sv_undef;
av_push(PL_regex_padav, repointer);
PL_reg_curpm->op_pmoffset = av_top_index(PL_regex_padav);
PL_regex_pad = AvARRAY(PL_regex_padav);
}
#endif
}
SET_reg_curpm(reginfo->prog);
eval_state->curpm = PL_curpm;
PL_curpm_under = PL_curpm;
PL_curpm = PL_reg_curpm;
if
(RXp_MATCH_COPIED(rex)) {
eval_state->subbeg = RXp_SUBBEG(rex);
eval_state->sublen = RXp_SUBLEN(rex);
eval_state->suboffset = RXp_SUBOFFSET(rex);
eval_state->subcoffset = RXp_SUBCOFFSET(rex);
#ifdef PERL_ANY_COW
eval_state->saved_copy = RXp_SAVED_COPY(rex);
#endif
RXp_MATCH_COPIED_off(rex);
}
else
eval_state->subbeg = NULL;
RXp_SUBBEG(rex) = (
char
*)reginfo->strbeg;
RXp_SUBOFFSET(rex) = 0;
RXp_SUBCOFFSET(rex) = 0;
RXp_SUBLEN(rex) = reginfo->strend - reginfo->strbeg;
}
static
void
S_cleanup_regmatch_info_aux(pTHX_
void
*arg)
{
regmatch_info_aux *aux = (regmatch_info_aux *) arg;
regmatch_info_aux_eval *eval_state = aux->info_aux_eval;
regmatch_slab *s;
Safefree(aux->poscache);
if
(eval_state) {
if
(eval_state->subbeg) {
regexp *
const
rex = eval_state->rex;
RXp_SUBBEG(rex) = eval_state->subbeg;
RXp_SUBLEN(rex) = eval_state->sublen;
RXp_SUBOFFSET(rex) = eval_state->suboffset;
RXp_SUBCOFFSET(rex) = eval_state->subcoffset;
#ifdef PERL_ANY_COW
RXp_SAVED_COPY(rex) = eval_state->saved_copy;
#endif
RXp_MATCH_COPIED_on(rex);
}
if
(eval_state->pos_magic)
{
eval_state->pos_magic->mg_len = eval_state->pos;
eval_state->pos_magic->mg_flags =
(eval_state->pos_magic->mg_flags & ~MGf_BYTES)
| (eval_state->pos_flags & MGf_BYTES);
}
PL_curpm = eval_state->curpm;
SvREFCNT_dec(eval_state->sv);
}
PL_regmatch_state = aux->old_regmatch_state;
PL_regmatch_slab = aux->old_regmatch_slab;
s = PL_regmatch_slab->next;
if
(s) {
PL_regmatch_slab->next = NULL;
while
(s) {
regmatch_slab *
const
osl = s;
s = s->next;
Safefree(osl);
}
}
}
STATIC
void
S_to_utf8_substr(pTHX_ regexp *prog)
{
int
i = 1;
PERL_ARGS_ASSERT_TO_UTF8_SUBSTR;
do
{
if
(prog->substrs->data[i].substr
&& !prog->substrs->data[i].utf8_substr) {
SV*
const
sv = newSVsv(prog->substrs->data[i].substr);
prog->substrs->data[i].utf8_substr = sv;
sv_utf8_upgrade(sv);
if
(SvVALID(prog->substrs->data[i].substr)) {
if
(SvTAIL(prog->substrs->data[i].substr)) {
SvCUR_set(sv, SvCUR(sv) - 1);
fbm_compile(sv, FBMcf_TAIL);
}
else
fbm_compile(sv, 0);
}
if
(prog->substrs->data[i].substr == prog->check_substr)
prog->check_utf8 = sv;
}
}
while
(i--);
}
STATIC
bool
S_to_byte_substr(pTHX_ regexp *prog)
{
int
i = 1;
PERL_ARGS_ASSERT_TO_BYTE_SUBSTR;
do
{
if
(prog->substrs->data[i].utf8_substr
&& !prog->substrs->data[i].substr) {
SV* sv = newSVsv(prog->substrs->data[i].utf8_substr);
if
(! sv_utf8_downgrade(sv, TRUE)) {
SvREFCNT_dec_NN(sv);
return
FALSE;
}
if
(SvVALID(prog->substrs->data[i].utf8_substr)) {
if
(SvTAIL(prog->substrs->data[i].utf8_substr)) {
SvCUR_set(sv, SvCUR(sv) - 1);
fbm_compile(sv, FBMcf_TAIL);
}
else
fbm_compile(sv, 0);
}
prog->substrs->data[i].substr = sv;
if
(prog->substrs->data[i].utf8_substr == prog->check_utf8)
prog->check_substr = sv;
}
}
while
(i--);
return
TRUE;
}
#ifndef PERL_IN_XSUB_RE
bool
Perl_is_grapheme(pTHX_
const
U8 * strbeg,
const
U8 * s,
const
U8 * strend,
const
UV cp)
{
GCB_enum cp_gcb_val, prev_cp_gcb_val, next_cp_gcb_val;
const
U8 * prev_cp_start;
PERL_ARGS_ASSERT_IS_GRAPHEME;
if
( UNLIKELY(UNICODE_IS_SUPER(cp))
|| UNLIKELY(UNICODE_IS_NONCHAR(cp)))
{
return
TRUE;
}
if
(UNLIKELY(! ELEMENT_RANGE_MATCHES_INVLIST(
_invlist_search(PL_Assigned_invlist, cp))))
{
return
FALSE;
}
cp_gcb_val = getGCB_VAL_CP(cp);
prev_cp_start = utf8_hop_back(s, -1, strbeg);
if
(UNLIKELY(prev_cp_start == s)) {
prev_cp_gcb_val = GCB_EDGE;
}
else
{
prev_cp_gcb_val = getGCB_VAL_UTF8(prev_cp_start, strend);
}
if
(! isGCB(prev_cp_gcb_val, cp_gcb_val, strbeg, s,
TRUE
))
{
return
FALSE;
}
s += UTF8SKIP(s);
if
(s >= strend) {
next_cp_gcb_val = GCB_EDGE;
}
else
{
next_cp_gcb_val = getGCB_VAL_UTF8(s, strend);
}
return
isGCB(cp_gcb_val, next_cp_gcb_val, strbeg, s, TRUE);
}
bool
Perl_isSCRIPT_RUN(pTHX_
const
U8 * s,
const
U8 * send,
const
bool
utf8_target)
{
SV * decimals_invlist = PL_XPosix_ptrs[CC_DIGIT_];
UV * decimals_array = invlist_array(decimals_invlist);
UV zero_of_run = 0;
SCX_enum script_of_run = SCX_INVALID;
SCX_enum script_of_char = SCX_INVALID;
SCX_enum * intersection = NULL;
PERL_UINT_FAST8_T intersection_len = 0;
bool
retval = TRUE;
SCX_enum * ret_script = NULL;
assert
(send >= s);
PERL_ARGS_ASSERT_ISSCRIPT_RUN;
if
(! utf8_target && LIKELY(send > s)) {
if
(ret_script == NULL) {
return
TRUE;
}
while
(s < send) {
if
(isALPHA_L1(*s) && LIKELY(*s != MICRO_SIGN_NATIVE)) {
*ret_script = SCX_Latin;
return
TRUE;
}
}
*ret_script = SCX_Common;
return
TRUE;
}
while
(s < send) {
UV zero_of_char;
UV cp;
if
(UNLIKELY(isDIGIT(*s))) {
if
(UNLIKELY(script_of_run == SCX_Unknown)) {
retval = FALSE;
break
;
}
if
(zero_of_run) {
if
(zero_of_run !=
'0'
) {
retval = FALSE;
break
;
}
}
else
{
zero_of_run =
'0'
;
}
s++;
continue
;
}
if
(! UTF8_IS_INVARIANT(*s)) {
Size_t len;
cp = valid_utf8_to_uvchr((U8 *) s, &len);
s += len;
}
else
{
cp = *(s++);
}
if
(UNLIKELY(zero_of_run && withinCOUNT(cp, zero_of_run, 9))) {
continue
;
}
if
(cp < 0x2B9) {
if
( cp > 255
|| ( isALPHA_L1(cp)
&& LIKELY(cp != MICRO_SIGN_NATIVE)))
{
script_of_char = SCX_Latin;
}
else
{
script_of_char = SCX_Common;
}
}
else
{
script_of_char = _Perl_SCX_invmap[
_invlist_search(PL_SCX_invlist, cp)];
}
if
( UNLIKELY(script_of_run == SCX_Unknown)
|| UNLIKELY( script_of_run != SCX_INVALID
&& script_of_char == SCX_Unknown))
{
retval = FALSE;
break
;
}
if
( UNLIKELY(script_of_run == SCX_INVALID)
|| UNLIKELY(script_of_run == SCX_Inherited))
{
script_of_run = script_of_char;
}
if
(UNLIKELY(script_of_char == SCX_Unknown)) {
continue
;
}
if
(UNLIKELY(script_of_char == SCX_Inherited)) {
continue
;
}
if
(script_of_run == SCX_Common && script_of_char != SCX_Common) {
script_of_run = script_of_char;
}
if
(LIKELY(script_of_char == script_of_run)) {
goto
scripts_match;
}
if
(script_of_char == SCX_Common) {
goto
scripts_match;
}
#ifndef HAS_SCX_AUX_TABLES
PERL_UNUSED_VAR(intersection_len);
retval = FALSE;
break
;
#else
if
(LIKELY(script_of_char >= 0)) {
const
SCX_enum * search_in;
PERL_UINT_FAST8_T search_in_len;
PERL_UINT_FAST8_T i;
if
(LIKELY(script_of_run >= 0)) {
retval = FALSE;
break
;
}
if
(intersection) {
search_in = intersection;
search_in_len = intersection_len;
}
else
{
search_in = SCX_AUX_TABLE_ptrs[-script_of_run];
search_in_len = SCX_AUX_TABLE_lengths[-script_of_run];
}
for
(i = 0; i < search_in_len; i++) {
if
(search_in[i] == script_of_char) {
script_of_run = script_of_char;
goto
scripts_match;
}
}
retval = FALSE;
break
;
}
else
if
(LIKELY(script_of_run >= 0)) {
const
SCX_enum * search_in = SCX_AUX_TABLE_ptrs[-script_of_char];
const
PERL_UINT_FAST8_T search_in_len
= SCX_AUX_TABLE_lengths[-script_of_char];
PERL_UINT_FAST8_T i;
for
(i = 0; i < search_in_len; i++) {
if
(search_in[i] == script_of_run) {
script_of_char = script_of_run;
goto
scripts_match;
}
}
retval = FALSE;
break
;
}
else
{
const
SCX_enum * search_char = SCX_AUX_TABLE_ptrs[-script_of_char];
const
PERL_UINT_FAST8_T char_len
= SCX_AUX_TABLE_lengths[-script_of_char];
const
SCX_enum * search_run;
PERL_UINT_FAST8_T run_len;
SCX_enum * new_overlap = NULL;
PERL_UINT_FAST8_T i, j;
if
(intersection) {
search_run = intersection;
run_len = intersection_len;
}
else
{
search_run = SCX_AUX_TABLE_ptrs[-script_of_run];
run_len = SCX_AUX_TABLE_lengths[-script_of_run];
}
intersection_len = 0;
for
(i = 0; i < run_len; i++) {
for
(j = 0; j < char_len; j++) {
if
(search_run[i] == search_char[j]) {
if
(intersection_len == 0) {
Newx(new_overlap,
MIN(run_len - i, char_len - j),
SCX_enum);
}
new_overlap[intersection_len++] = search_run[i];
}
}
}
if
(intersection_len == 0) {
retval = FALSE;
break
;
}
Safefree(intersection);
intersection = NULL;
if
(intersection_len == 1) {
script_of_run = script_of_char = new_overlap[0];
Safefree(new_overlap);
new_overlap = NULL;
}
else
{
intersection = new_overlap;
}
}
#endif
scripts_match:
if
(cp < FIRST_NON_ASCII_DECIMAL_DIGIT) {
continue
;
}
if
( script_of_char >= 0
&& (zero_of_char = script_zeros[script_of_char]))
{
if
(! withinCOUNT(cp, zero_of_char, 9)) {
continue
;
}
}
else
{
SSize_t index_of_zero_of_char;
index_of_zero_of_char = _invlist_search(decimals_invlist, cp);
if
( UNLIKELY(index_of_zero_of_char < 0)
|| ! ELEMENT_RANGE_MATCHES_INVLIST(index_of_zero_of_char))
{
continue
;
}
zero_of_char = decimals_array[index_of_zero_of_char];
}
if
(zero_of_run) {
if
(zero_of_run != zero_of_char) {
retval = FALSE;
break
;
}
}
else
{
zero_of_run = zero_of_char;
}
}
Safefree(intersection);
if
(ret_script != NULL) {
if
(retval) {
*ret_script = script_of_run;
}
else
{
*ret_script = SCX_INVALID;
}
}
return
retval;
}
#endif /* ifndef PERL_IN_XSUB_RE */
SV*
Perl_reg_named_buff(pTHX_ REGEXP *
const
rx, SV *
const
key, SV *
const
value,
const
U32 flags)
{
PERL_ARGS_ASSERT_REG_NAMED_BUFF;
PERL_UNUSED_ARG(value);
if
(flags & RXapif_FETCH) {
return
reg_named_buff_fetch(rx, key, flags);
}
else
if
(flags & (RXapif_STORE | RXapif_DELETE | RXapif_CLEAR)) {
Perl_croak_no_modify();
return
NULL;
}
else
if
(flags & RXapif_EXISTS) {
return
reg_named_buff_exists(rx, key, flags)
? &PL_sv_yes
: &PL_sv_no;
}
else
if
(flags & RXapif_REGNAMES) {
return
reg_named_buff_all(rx, flags);
}
else
if
(flags & (RXapif_SCALAR | RXapif_REGNAMES_COUNT)) {
return
reg_named_buff_scalar(rx, flags);
}
else
{
Perl_croak(aTHX_
"panic: Unknown flags %d in named_buff"
, (
int
)flags);
return
NULL;
}
}
SV*
Perl_reg_named_buff_iter(pTHX_ REGEXP *
const
rx,
const
SV *
const
lastkey,
const
U32 flags)
{
PERL_ARGS_ASSERT_REG_NAMED_BUFF_ITER;
PERL_UNUSED_ARG(lastkey);
if
(flags & RXapif_FIRSTKEY)
return
reg_named_buff_firstkey(rx, flags);
else
if
(flags & RXapif_NEXTKEY)
return
reg_named_buff_nextkey(rx, flags);
else
{
Perl_croak(aTHX_
"panic: Unknown flags %d in named_buff_iter"
,
(
int
)flags);
return
NULL;
}
}
SV*
Perl_reg_named_buff_fetch(pTHX_ REGEXP *
const
r, SV *
const
namesv,
const
U32 flags)
{
SV *ret;
struct
regexp *
const
rx = ReANY(r);
PERL_ARGS_ASSERT_REG_NAMED_BUFF_FETCH;
if
(rx && RXp_PAREN_NAMES(rx)) {
HE *he_str = hv_fetch_ent( RXp_PAREN_NAMES(rx), namesv, 0, 0 );
if
(he_str) {
IV i;
SV* sv_dat=HeVAL(he_str);
I32 *nums=(I32*)SvPVX(sv_dat);
AV *
const
retarray = (flags & RXapif_ALL) ? newAV_alloc_x(SvIVX(sv_dat)) : NULL;
for
( i=0; i<SvIVX(sv_dat); i++ ) {
if
((I32)(rx->nparens) >= nums[i]
&& RXp_OFFS_VALID(rx,nums[i]))
{
ret = newSVpvs(
""
);
Perl_reg_numbered_buff_fetch_flags(aTHX_ r, nums[i], ret, REG_FETCH_ABSOLUTE);
if
(!retarray)
return
ret;
}
else
{
if
(retarray)
ret = newSV_type(SVt_NULL);
}
if
(retarray)
av_push_simple(retarray, ret);
}
if
(retarray)
return
newRV_noinc(MUTABLE_SV(retarray));
}
}
return
NULL;
}
bool
Perl_reg_named_buff_exists(pTHX_ REGEXP *
const
r, SV *
const
key,
const
U32 flags)
{
struct
regexp *
const
rx = ReANY(r);
PERL_ARGS_ASSERT_REG_NAMED_BUFF_EXISTS;
if
(rx && RXp_PAREN_NAMES(rx)) {
if
(flags & RXapif_ALL) {
return
hv_exists_ent(RXp_PAREN_NAMES(rx), key, 0);
}
else
{
SV *sv = CALLREG_NAMED_BUFF_FETCH(r, key, flags);
if
(sv) {
SvREFCNT_dec_NN(sv);
return
TRUE;
}
else
{
return
FALSE;
}
}
}
else
{
return
FALSE;
}
}
SV*
Perl_reg_named_buff_firstkey(pTHX_ REGEXP *
const
r,
const
U32 flags)
{
struct
regexp *
const
rx = ReANY(r);
PERL_ARGS_ASSERT_REG_NAMED_BUFF_FIRSTKEY;
if
( rx && RXp_PAREN_NAMES(rx) ) {
(
void
)hv_iterinit(RXp_PAREN_NAMES(rx));
return
CALLREG_NAMED_BUFF_NEXTKEY(r, NULL, flags & ~RXapif_FIRSTKEY);
}
else
{
return
FALSE;
}
}
SV*
Perl_reg_named_buff_nextkey(pTHX_ REGEXP *
const
r,
const
U32 flags)
{
struct
regexp *
const
rx = ReANY(r);
DECLARE_AND_GET_RE_DEBUG_FLAGS;
PERL_ARGS_ASSERT_REG_NAMED_BUFF_NEXTKEY;
if
(rx && RXp_PAREN_NAMES(rx)) {
HV *hv = RXp_PAREN_NAMES(rx);
HE *temphe;
while
( (temphe = hv_iternext_flags(hv, 0)) ) {
IV i;
IV parno = 0;
SV* sv_dat = HeVAL(temphe);
I32 *nums = (I32*)SvPVX(sv_dat);
for
( i = 0; i < SvIVX(sv_dat); i++ ) {
if
((I32)(RXp_LASTPAREN(rx)) >= nums[i] &&
RXp_OFFS_VALID(rx,nums[i]))
{
parno = nums[i];
break
;
}
}
if
(parno || flags & RXapif_ALL) {
return
newSVhek(HeKEY_hek(temphe));
}
}
}
return
NULL;
}
SV*
Perl_reg_named_buff_scalar(pTHX_ REGEXP *
const
r,
const
U32 flags)
{
SV *ret;
AV *av;
SSize_t length;
struct
regexp *
const
rx = ReANY(r);
PERL_ARGS_ASSERT_REG_NAMED_BUFF_SCALAR;
if
(rx && RXp_PAREN_NAMES(rx)) {
if
(flags & (RXapif_ALL | RXapif_REGNAMES_COUNT)) {
return
newSViv(HvTOTALKEYS(RXp_PAREN_NAMES(rx)));
}
else
if
(flags & RXapif_ONE) {
ret = CALLREG_NAMED_BUFF_ALL(r, (flags | RXapif_REGNAMES));
av = MUTABLE_AV(SvRV(ret));
length = av_count(av);
SvREFCNT_dec_NN(ret);
return
newSViv(length);
}
else
{
Perl_croak(aTHX_
"panic: Unknown flags %d in named_buff_scalar"
,
(
int
)flags);
return
NULL;
}
}
return
&PL_sv_undef;
}
SV*
Perl_reg_named_buff_all(pTHX_ REGEXP *
const
r,
const
U32 flags)
{
struct
regexp *
const
rx = ReANY(r);
AV *av = newAV();
PERL_ARGS_ASSERT_REG_NAMED_BUFF_ALL;
if
(rx && RXp_PAREN_NAMES(rx)) {
HV *hv= RXp_PAREN_NAMES(rx);
HE *temphe;
(
void
)hv_iterinit(hv);
while
( (temphe = hv_iternext_flags(hv, 0)) ) {
IV i;
IV parno = 0;
SV* sv_dat = HeVAL(temphe);
I32 *nums = (I32*)SvPVX(sv_dat);
for
( i = 0; i < SvIVX(sv_dat); i++ ) {
if
((I32)(RXp_LASTPAREN(rx)) >= nums[i] &&
RXp_OFFS_VALID(rx,nums[i]))
{
parno = nums[i];
break
;
}
}
if
(parno || flags & RXapif_ALL) {
av_push_simple(av, newSVhek(HeKEY_hek(temphe)));
}
}
}
return
newRV_noinc(MUTABLE_SV(av));
}
void
Perl_reg_numbered_buff_fetch(pTHX_ REGEXP *
const
re,
const
I32 paren,
SV *
const
sv)
{
PERL_ARGS_ASSERT_REG_NUMBERED_BUFF_FETCH;
Perl_reg_numbered_buff_fetch_flags(aTHX_ re, paren, sv, 0);
}
#ifndef PERL_IN_XSUB_RE
void
Perl_reg_numbered_buff_fetch_flags(pTHX_ REGEXP *
const
re,
const
I32 paren,
SV *
const
sv, U32 flags)
{
struct
regexp *
const
rx = ReANY(re);
char
*s = NULL;
SSize_t i,t = 0;
SSize_t s1, t1;
I32 n = paren;
I32 logical_nparens = rx->logical_nparens ? rx->logical_nparens : rx->nparens;
PERL_ARGS_ASSERT_REG_NUMBERED_BUFF_FETCH_FLAGS;
if
( n == RX_BUFF_IDX_CARET_PREMATCH
|| n == RX_BUFF_IDX_CARET_FULLMATCH
|| n == RX_BUFF_IDX_CARET_POSTMATCH
)
{
bool
keepcopy = cBOOL(rx->extflags & RXf_PMf_KEEPCOPY);
if
(!keepcopy) {
if
(PL_curpm && re == PM_GETRE(PL_curpm))
keepcopy = cBOOL(PL_curpm->op_pmflags & PMf_KEEPCOPY);
}
if
(!keepcopy)
goto
ret_undef;
}
if
(!RXp_SUBBEG(rx))
goto
ret_undef;
if
(n == RX_BUFF_IDX_CARET_FULLMATCH)
n = RX_BUFF_IDX_FULLMATCH;
if
((n == RX_BUFF_IDX_PREMATCH || n == RX_BUFF_IDX_CARET_PREMATCH)
&& (i = RXp_OFFS_START(rx,0)) != -1)
{
s = RXp_SUBBEG(rx);
}
else
if
((n == RX_BUFF_IDX_POSTMATCH || n == RX_BUFF_IDX_CARET_POSTMATCH)
&& (t = RXp_OFFS_END(rx,0)) != -1)
{
s = RXp_SUBBEG(rx) - RXp_SUBOFFSET(rx) + t;
i = RXp_SUBLEN(rx) + RXp_SUBOFFSET(rx) - t;
}
else
if
(inRANGE(n, 0, flags ? (I32)rx->nparens : logical_nparens)) {
I32 *map = (!flags && n) ? rx->logical_to_parno : NULL;
I32 true_parno = map ? map[n] : n;
do
{
if
(((s1 = RXp_OFFS_START(rx,true_parno)) != -1) &&
((t1 = RXp_OFFS_END(rx,true_parno)) != -1))
{
i = t1 - s1;
s = RXp_SUBBEG(rx) + s1 - RXp_SUBOFFSET(rx);
goto
found_it;
}
else
if
(map) {
true_parno = rx->parno_to_logical_next[true_parno];
}
else
{
break
;
}
}
while
(true_parno);
goto
ret_undef;
}
else
{
goto
ret_undef;
}
found_it:
assert
(s >= RXp_SUBBEG(rx));
assert
((STRLEN)RXp_SUBLEN(rx) >= (STRLEN)((s - RXp_SUBBEG(rx)) + i) );
if
(i >= 0) {
#ifdef NO_TAINT_SUPPORT
sv_setpvn(sv, s, i);
#else
const
int
oldtainted = TAINT_get;
TAINT_NOT;
sv_setpvn(sv, s, i);
TAINT_set(oldtainted);
#endif
if
(RXp_MATCH_UTF8(rx))
SvUTF8_on(sv);
else
SvUTF8_off(sv);
if
(TAINTING_get) {
if
(RXp_MATCH_TAINTED(rx)) {
if
(SvTYPE(sv) >= SVt_PVMG) {
MAGIC*
const
mg = SvMAGIC(sv);
MAGIC* mgt;
TAINT;
SvMAGIC_set(sv, mg->mg_moremagic);
SvTAINT(sv);
if
((mgt = SvMAGIC(sv))) {
mg->mg_moremagic = mgt;
SvMAGIC_set(sv, mg);
}
}
else
{
TAINT;
SvTAINT(sv);
}
}
else
SvTAINTED_off(sv);
}
}
else
{
ret_undef:
sv_set_undef(sv);
return
;
}
}
#endif
void
Perl_reg_numbered_buff_store(pTHX_ REGEXP *
const
rx,
const
I32 paren,
SV
const
*
const
value)
{
PERL_ARGS_ASSERT_REG_NUMBERED_BUFF_STORE;
PERL_UNUSED_ARG(rx);
PERL_UNUSED_ARG(paren);
PERL_UNUSED_ARG(value);
if
(!PL_localizing)
Perl_croak_no_modify();
}
I32
Perl_reg_numbered_buff_length(pTHX_ REGEXP *
const
r,
const
SV *
const
sv,
const
I32 paren)
{
struct
regexp *
const
rx = ReANY(r);
I32 i,j;
I32 s1, t1;
I32 logical_nparens = rx->logical_nparens ? rx->logical_nparens : rx->nparens;
PERL_ARGS_ASSERT_REG_NUMBERED_BUFF_LENGTH;
if
( paren == RX_BUFF_IDX_CARET_PREMATCH
|| paren == RX_BUFF_IDX_CARET_FULLMATCH
|| paren == RX_BUFF_IDX_CARET_POSTMATCH
)
{
bool
keepcopy = cBOOL(rx->extflags & RXf_PMf_KEEPCOPY);
if
(!keepcopy) {
if
(PL_curpm && r == PM_GETRE(PL_curpm))
keepcopy = cBOOL(PL_curpm->op_pmflags & PMf_KEEPCOPY);
}
if
(!keepcopy)
goto
warn_undef;
}
switch
(paren) {
case
RX_BUFF_IDX_CARET_PREMATCH:
case
RX_BUFF_IDX_PREMATCH:
if
( (i = RXp_OFFS_START(rx,0)) != -1) {
if
(i > 0) {
s1 = 0;
t1 = i;
goto
getlen;
}
}
return
0;
case
RX_BUFF_IDX_CARET_POSTMATCH:
case
RX_BUFF_IDX_POSTMATCH:
if
( (j = RXp_OFFS_END(rx,0)) != -1 ) {
i = RXp_SUBLEN(rx) - j;
if
(i > 0) {
s1 = j;
t1 = RXp_SUBLEN(rx);
goto
getlen;
}
}
return
0;
default
:
if
(paren <= logical_nparens) {
I32 true_paren = rx->logical_to_parno
? rx->logical_to_parno[paren]
: paren;
do
{
if
(((s1 = RXp_OFFS_START(rx,true_paren)) != -1) &&
((t1 = RXp_OFFS_END(rx,true_paren)) != -1))
{
i = t1 - s1;
goto
getlen;
}
else
if
(rx->parno_to_logical_next) {
true_paren = rx->parno_to_logical_next[true_paren];
}
else
{
break
;
}
}
while
(true_paren);
}
warn_undef:
if
(ckWARN(WARN_UNINITIALIZED))
report_uninit((
const
SV *)sv);
return
0;
}
getlen:
if
(i > 0 && RXp_MATCH_UTF8(rx)) {
const
char
*
const
s = RXp_SUBBEG(rx) - RXp_SUBOFFSET(rx) + s1;
const
U8 *ep;
STRLEN el;
i = t1 - s1;
if
(is_utf8_string_loclen((U8*)s, i, &ep, &el))
i = el;
}
return
i;
}