2009-10-20 22:40 UTC+0200 Viktor Szakats (harbour.01 syenar.hu)
* external/pcre/pcretryf.c
* external/pcre/pcre.h
* external/pcre/pcreexec.c
* external/pcre/pcreinal.h
* external/pcre/pcredfa.c
* external/pcre/pcrecomp.c
* external/pcre/config.h
* external/pcre/LICENCE
* external/pcre/pcreucd.c
* external/pcre/pcreprni.h
* external/pcre/pcrestud.c
* external/pcre/pcrefinf.c
+ Updated pcre to 8.0 (from 7.9)
* external/pcre/cnv_o2hb.bat
* external/pcre/cnv_hb2o.bat
! Minor fix.
This commit is contained in:
@@ -17,6 +17,25 @@
|
||||
past entries belonging to author(s): Viktor Szakats.
|
||||
*/
|
||||
|
||||
2009-10-20 22:40 UTC+0200 Viktor Szakats (harbour.01 syenar.hu)
|
||||
* external/pcre/pcretryf.c
|
||||
* external/pcre/pcre.h
|
||||
* external/pcre/pcreexec.c
|
||||
* external/pcre/pcreinal.h
|
||||
* external/pcre/pcredfa.c
|
||||
* external/pcre/pcrecomp.c
|
||||
* external/pcre/config.h
|
||||
* external/pcre/LICENCE
|
||||
* external/pcre/pcreucd.c
|
||||
* external/pcre/pcreprni.h
|
||||
* external/pcre/pcrestud.c
|
||||
* external/pcre/pcrefinf.c
|
||||
+ Updated pcre to 8.0 (from 7.9)
|
||||
|
||||
* external/pcre/cnv_o2hb.bat
|
||||
* external/pcre/cnv_hb2o.bat
|
||||
! Minor fix.
|
||||
|
||||
2009-10-20 21:35 UTC+0200 Viktor Szakats (harbour.01 syenar.hu)
|
||||
* contrib/rddads/adsfunc.c
|
||||
* contrib/rddads/rddads.h
|
||||
|
||||
4
harbour/external/pcre/LICENCE
vendored
4
harbour/external/pcre/LICENCE
vendored
@@ -4,7 +4,7 @@ PCRE LICENCE
|
||||
PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Release 7 of PCRE is distributed under the terms of the "BSD" licence, as
|
||||
Release 8 of PCRE is distributed under the terms of the "BSD" licence, as
|
||||
specified below. The documentation for PCRE, supplied in the "doc"
|
||||
directory, is distributed under the same terms as the software itself.
|
||||
|
||||
@@ -22,7 +22,7 @@ Email domain: cam.ac.uk
|
||||
University of Cambridge Computing Service,
|
||||
Cambridge, England.
|
||||
|
||||
Copyright (c) 1997-2008 University of Cambridge
|
||||
Copyright (c) 1997-2009 University of Cambridge
|
||||
All rights reserved.
|
||||
|
||||
|
||||
|
||||
2
harbour/external/pcre/cnv_hb2o.bat
vendored
2
harbour/external/pcre/cnv_hb2o.bat
vendored
@@ -21,7 +21,7 @@ rem don't have to mess with this tool.
|
||||
md ori_dst
|
||||
del ori_dst\*.* /Y
|
||||
|
||||
copy LICENSE ori_dst\LICENSE
|
||||
copy LICENCE ori_dst\LICENCE
|
||||
copy config.h ori_dst\config.h.generic
|
||||
copy pcre.h ori_dst\pcre.h.generic
|
||||
copy pcreinal.h ori_dst\pcre_internal.h
|
||||
|
||||
2
harbour/external/pcre/cnv_o2hb.bat
vendored
2
harbour/external/pcre/cnv_o2hb.bat
vendored
@@ -21,7 +21,7 @@ rem DISCLAIMER: This tool is targeted only to Harbour core
|
||||
rem maintainers. If you're not one of them you
|
||||
rem don't have to mess with this tool.
|
||||
|
||||
copy ori_src\LICENSE LICENSE
|
||||
copy ori_src\LICENCE LICENCE
|
||||
copy ori_src\config.h.generic config.h
|
||||
copy ori_src\pcre.h.generic pcre.h
|
||||
copy ori_src\pcre_internal.h pcreinal.h
|
||||
|
||||
12
harbour/external/pcre/config.h
vendored
12
harbour/external/pcre/config.h
vendored
@@ -175,6 +175,12 @@ them both to 0; an emulation function will be used. */
|
||||
#define LINK_SIZE 2
|
||||
#endif
|
||||
|
||||
/* Define to the sub-directory in which libtool stores uninstalled libraries.
|
||||
*/
|
||||
#ifndef LT_OBJDIR
|
||||
#define LT_OBJDIR ".libs/"
|
||||
#endif
|
||||
|
||||
/* The value of MATCH_LIMIT determines the default number of times the
|
||||
internal match() function can be called during a single execution of
|
||||
pcre_exec(). There is a runtime interface for setting a different limit.
|
||||
@@ -241,13 +247,13 @@ them both to 0; an emulation function will be used. */
|
||||
#define PACKAGE_NAME "PCRE"
|
||||
|
||||
/* Define to the full name and version of this package. */
|
||||
#define PACKAGE_STRING "PCRE 7.9"
|
||||
#define PACKAGE_STRING "PCRE 8.00"
|
||||
|
||||
/* Define to the one symbol short name of this package. */
|
||||
#define PACKAGE_TARNAME "pcre"
|
||||
|
||||
/* Define to the version of this package. */
|
||||
#define PACKAGE_VERSION "7.9"
|
||||
#define PACKAGE_VERSION "8.00"
|
||||
|
||||
|
||||
/* If you are compiling for a system other than a Unix-like system or
|
||||
@@ -303,7 +309,7 @@ them both to 0; an emulation function will be used. */
|
||||
|
||||
/* Version number of package */
|
||||
#ifndef VERSION
|
||||
#define VERSION "7.9"
|
||||
#define VERSION "8.00"
|
||||
#endif
|
||||
|
||||
/* Define to empty if `const' does not conform to ANSI C. */
|
||||
|
||||
12
harbour/external/pcre/pcre.h
vendored
12
harbour/external/pcre/pcre.h
vendored
@@ -41,10 +41,10 @@ POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
/* The current PCRE version information. */
|
||||
|
||||
#define PCRE_MAJOR 7
|
||||
#define PCRE_MINOR 9
|
||||
#define PCRE_MAJOR 8
|
||||
#define PCRE_MINOR 00
|
||||
#define PCRE_PRERELEASE
|
||||
#define PCRE_DATE 2009-04-11
|
||||
#define PCRE_DATE 2009-10-19
|
||||
|
||||
/* When an application links to a PCRE DLL in Windows, the symbols that are
|
||||
imported have to be identified as such. When building PCRE, the appropriate
|
||||
@@ -113,7 +113,8 @@ both, so we keep them all distinct. */
|
||||
#define PCRE_NO_AUTO_CAPTURE 0x00001000
|
||||
#define PCRE_NO_UTF8_CHECK 0x00002000
|
||||
#define PCRE_AUTO_CALLOUT 0x00004000
|
||||
#define PCRE_PARTIAL 0x00008000
|
||||
#define PCRE_PARTIAL_SOFT 0x00008000
|
||||
#define PCRE_PARTIAL 0x00008000 /* Backwards compatible synonym */
|
||||
#define PCRE_DFA_SHORTEST 0x00010000
|
||||
#define PCRE_DFA_RESTART 0x00020000
|
||||
#define PCRE_FIRSTLINE 0x00040000
|
||||
@@ -128,6 +129,8 @@ both, so we keep them all distinct. */
|
||||
#define PCRE_JAVASCRIPT_COMPAT 0x02000000
|
||||
#define PCRE_NO_START_OPTIMIZE 0x04000000
|
||||
#define PCRE_NO_START_OPTIMISE 0x04000000
|
||||
#define PCRE_PARTIAL_HARD 0x08000000
|
||||
#define PCRE_NOTEMPTY_ATSTART 0x10000000
|
||||
|
||||
/* Exec-time and get/set-time error codes */
|
||||
|
||||
@@ -174,6 +177,7 @@ both, so we keep them all distinct. */
|
||||
#define PCRE_INFO_OKPARTIAL 12
|
||||
#define PCRE_INFO_JCHANGED 13
|
||||
#define PCRE_INFO_HASCRORLF 14
|
||||
#define PCRE_INFO_MINLENGTH 15
|
||||
|
||||
/* Request types for pcre_config(). Do not re-arrange, in order to remain
|
||||
compatible. */
|
||||
|
||||
302
harbour/external/pcre/pcrecomp.c
vendored
302
harbour/external/pcre/pcrecomp.c
vendored
@@ -341,7 +341,9 @@ static const char error_texts[] =
|
||||
"number is too big\0"
|
||||
"subpattern name expected\0"
|
||||
"digit expected after (?+\0"
|
||||
"] is an invalid data character in JavaScript compatibility mode";
|
||||
"] is an invalid data character in JavaScript compatibility mode\0"
|
||||
/* 65 */
|
||||
"different names for subpatterns of the same number are not allowed";
|
||||
|
||||
|
||||
/* Table to identify digits and hex digits. This is used when compiling
|
||||
@@ -1100,6 +1102,7 @@ if (ptr[0] == CHAR_LEFT_PARENTHESIS)
|
||||
if (name != NULL && lorn == ptr - thisname &&
|
||||
strncmp((const char *)name, (const char *)thisname, lorn) == 0)
|
||||
return *count;
|
||||
term++;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1134,19 +1137,21 @@ for (; *ptr != 0; ptr++)
|
||||
BOOL negate_class = FALSE;
|
||||
for (;;)
|
||||
{
|
||||
int c = *(++ptr);
|
||||
if (c == CHAR_BACKSLASH)
|
||||
if (ptr[1] == CHAR_BACKSLASH)
|
||||
{
|
||||
if (ptr[1] == CHAR_E)
|
||||
ptr++;
|
||||
else if (strncmp((const char *)ptr+1,
|
||||
if (ptr[2] == CHAR_E)
|
||||
ptr+= 2;
|
||||
else if (strncmp((const char *)ptr+2,
|
||||
STR_Q STR_BACKSLASH STR_E, 3) == 0)
|
||||
ptr += 3;
|
||||
ptr += 4;
|
||||
else
|
||||
break;
|
||||
}
|
||||
else if (!negate_class && c == CHAR_CIRCUMFLEX_ACCENT)
|
||||
else if (!negate_class && ptr[1] == CHAR_CIRCUMFLEX_ACCENT)
|
||||
{
|
||||
negate_class = TRUE;
|
||||
ptr++;
|
||||
}
|
||||
else break;
|
||||
}
|
||||
|
||||
@@ -1312,7 +1317,9 @@ for (;;)
|
||||
|
||||
case OP_CALLOUT:
|
||||
case OP_CREF:
|
||||
case OP_NCREF:
|
||||
case OP_RREF:
|
||||
case OP_NRREF:
|
||||
case OP_DEF:
|
||||
code += _pcre_OP_lengths[*code];
|
||||
break;
|
||||
@@ -1328,23 +1335,34 @@ for (;;)
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Find the fixed length of a pattern *
|
||||
* Find the fixed length of a branch *
|
||||
*************************************************/
|
||||
|
||||
/* Scan a pattern and compute the fixed length of subject that will match it,
|
||||
/* Scan a branch and compute the fixed length of subject that will match it,
|
||||
if the length is fixed. This is needed for dealing with backward assertions.
|
||||
In UTF8 mode, the result is in characters rather than bytes.
|
||||
In UTF8 mode, the result is in characters rather than bytes. The branch is
|
||||
temporarily terminated with OP_END when this function is called.
|
||||
|
||||
This function is called when a backward assertion is encountered, so that if it
|
||||
fails, the error message can point to the correct place in the pattern.
|
||||
However, we cannot do this when the assertion contains subroutine calls,
|
||||
because they can be forward references. We solve this by remembering this case
|
||||
and doing the check at the end; a flag specifies which mode we are running in.
|
||||
|
||||
Arguments:
|
||||
code points to the start of the pattern (the bracket)
|
||||
options the compiling options
|
||||
atend TRUE if called when the pattern is complete
|
||||
cd the "compile data" structure
|
||||
|
||||
Returns: the fixed length, or -1 if there is no fixed length,
|
||||
Returns: the fixed length,
|
||||
or -1 if there is no fixed length,
|
||||
or -2 if \C was encountered
|
||||
or -3 if an OP_RECURSE item was encountered and atend is FALSE
|
||||
*/
|
||||
|
||||
static int
|
||||
find_fixedlength(uschar *code, int options)
|
||||
find_fixedlength(uschar *code, int options, BOOL atend, compile_data *cd)
|
||||
{
|
||||
int length = -1;
|
||||
|
||||
@@ -1357,6 +1375,7 @@ branch, check the length against that of the other branches. */
|
||||
for (;;)
|
||||
{
|
||||
int d;
|
||||
uschar *ce, *cs;
|
||||
register int op = *cc;
|
||||
switch (op)
|
||||
{
|
||||
@@ -1364,7 +1383,7 @@ for (;;)
|
||||
case OP_BRA:
|
||||
case OP_ONCE:
|
||||
case OP_COND:
|
||||
d = find_fixedlength(cc + ((op == OP_CBRA)? 2:0), options);
|
||||
d = find_fixedlength(cc + ((op == OP_CBRA)? 2:0), options, atend, cd);
|
||||
if (d < 0) return d;
|
||||
branchlength += d;
|
||||
do cc += GET(cc, 1); while (*cc == OP_ALT);
|
||||
@@ -1387,6 +1406,21 @@ for (;;)
|
||||
branchlength = 0;
|
||||
break;
|
||||
|
||||
/* A true recursion implies not fixed length, but a subroutine call may
|
||||
be OK. If the subroutine is a forward reference, we can't deal with
|
||||
it until the end of the pattern, so return -3. */
|
||||
|
||||
case OP_RECURSE:
|
||||
if (!atend) return -3;
|
||||
cs = ce = (uschar *)cd->start_code + GET(cc, 1); /* Start subpattern */
|
||||
do ce += GET(ce, 1); while (*ce == OP_ALT); /* End subpattern */
|
||||
if (cc > cs && cc < ce) return -1; /* Recursion */
|
||||
d = find_fixedlength(cs + 2, options, atend, cd);
|
||||
if (d < 0) return d;
|
||||
branchlength += d;
|
||||
cc += 1 + LINK_SIZE;
|
||||
break;
|
||||
|
||||
/* Skip over assertive subpatterns */
|
||||
|
||||
case OP_ASSERT:
|
||||
@@ -1400,7 +1434,9 @@ for (;;)
|
||||
|
||||
case OP_REVERSE:
|
||||
case OP_CREF:
|
||||
case OP_NCREF:
|
||||
case OP_RREF:
|
||||
case OP_NRREF:
|
||||
case OP_DEF:
|
||||
case OP_OPT:
|
||||
case OP_CALLOUT:
|
||||
@@ -1423,10 +1459,8 @@ for (;;)
|
||||
branchlength++;
|
||||
cc += 2;
|
||||
#ifdef SUPPORT_UTF8
|
||||
if ((options & PCRE_UTF8) != 0)
|
||||
{
|
||||
while ((*cc & 0xc0) == 0x80) cc++;
|
||||
}
|
||||
if ((options & PCRE_UTF8) != 0 && cc[-1] >= 0xc0)
|
||||
cc += _pcre_utf8_table4[cc[-1] & 0x3f];
|
||||
#endif
|
||||
break;
|
||||
|
||||
@@ -1437,10 +1471,8 @@ for (;;)
|
||||
branchlength += GET2(cc,1);
|
||||
cc += 4;
|
||||
#ifdef SUPPORT_UTF8
|
||||
if ((options & PCRE_UTF8) != 0)
|
||||
{
|
||||
while((*cc & 0x80) == 0x80) cc++;
|
||||
}
|
||||
if ((options & PCRE_UTF8) != 0 && cc[-1] >= 0xc0)
|
||||
cc += _pcre_utf8_table4[cc[-1] & 0x3f];
|
||||
#endif
|
||||
break;
|
||||
|
||||
@@ -1519,22 +1551,25 @@ for (;;)
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Scan compiled regex for numbered bracket *
|
||||
* Scan compiled regex for specific bracket *
|
||||
*************************************************/
|
||||
|
||||
/* This little function scans through a compiled pattern until it finds a
|
||||
capturing bracket with the given number.
|
||||
capturing bracket with the given number, or, if the number is negative, an
|
||||
instance of OP_REVERSE for a lookbehind. The function is global in the C sense
|
||||
so that it can be called from pcre_study() when finding the minimum matching
|
||||
length.
|
||||
|
||||
Arguments:
|
||||
code points to start of expression
|
||||
utf8 TRUE in UTF-8 mode
|
||||
number the required bracket number
|
||||
number the required bracket number or negative to find a lookbehind
|
||||
|
||||
Returns: pointer to the opcode for the bracket, or NULL if not found
|
||||
*/
|
||||
|
||||
static const uschar *
|
||||
find_bracket(const uschar *code, BOOL utf8, int number)
|
||||
const uschar *
|
||||
_pcre_find_bracket(const uschar *code, BOOL utf8, int number)
|
||||
{
|
||||
for (;;)
|
||||
{
|
||||
@@ -1547,6 +1582,14 @@ for (;;)
|
||||
|
||||
if (c == OP_XCLASS) code += GET(code, 1);
|
||||
|
||||
/* Handle recursion */
|
||||
|
||||
else if (c == OP_REVERSE)
|
||||
{
|
||||
if (number < 0) return (uschar *)code;
|
||||
code += _pcre_OP_lengths[c];
|
||||
}
|
||||
|
||||
/* Handle capturing bracket */
|
||||
|
||||
else if (c == OP_CBRA)
|
||||
@@ -1912,10 +1955,13 @@ for (code = first_significant_code(code + _pcre_OP_lengths[*code], NULL, 0, TRUE
|
||||
case OP_QUERY:
|
||||
case OP_MINQUERY:
|
||||
case OP_POSQUERY:
|
||||
if (utf8 && code[1] >= 0xc0) code += _pcre_utf8_table4[code[1] & 0x3f];
|
||||
break;
|
||||
|
||||
case OP_UPTO:
|
||||
case OP_MINUPTO:
|
||||
case OP_POSUPTO:
|
||||
if (utf8) while ((code[2] & 0xc0) == 0x80) code++;
|
||||
if (utf8 && code[3] >= 0xc0) code += _pcre_utf8_table4[code[3] & 0x3f];
|
||||
break;
|
||||
#endif
|
||||
}
|
||||
@@ -3869,10 +3915,15 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
|
||||
|
||||
if (repeat_max == 0) goto END_REPEAT;
|
||||
|
||||
/*--------------------------------------------------------------------*/
|
||||
/* This code is obsolete from release 8.00; the restriction was finally
|
||||
removed: */
|
||||
|
||||
/* All real repeats make it impossible to handle partial matching (maybe
|
||||
one day we will be able to remove this restriction). */
|
||||
|
||||
if (repeat_max != 1) cd->external_flags |= PCRE_NOPARTIAL;
|
||||
/* if (repeat_max != 1) cd->external_flags |= PCRE_NOPARTIAL; */
|
||||
/*--------------------------------------------------------------------*/
|
||||
|
||||
/* Combine the op_type with the repeat_type */
|
||||
|
||||
@@ -4019,10 +4070,15 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
|
||||
goto END_REPEAT;
|
||||
}
|
||||
|
||||
/*--------------------------------------------------------------------*/
|
||||
/* This code is obsolete from release 8.00; the restriction was finally
|
||||
removed: */
|
||||
|
||||
/* All real repeats make it impossible to handle partial matching (maybe
|
||||
one day we will be able to remove this restriction). */
|
||||
|
||||
if (repeat_max != 1) cd->external_flags |= PCRE_NOPARTIAL;
|
||||
/* if (repeat_max != 1) cd->external_flags |= PCRE_NOPARTIAL; */
|
||||
/*--------------------------------------------------------------------*/
|
||||
|
||||
if (repeat_min == 0 && repeat_max == -1)
|
||||
*code++ = OP_CRSTAR + repeat_type;
|
||||
@@ -4337,11 +4393,20 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
|
||||
if (possessive_quantifier)
|
||||
{
|
||||
int len;
|
||||
if (*tempcode == OP_EXACT || *tempcode == OP_TYPEEXACT ||
|
||||
*tempcode == OP_NOTEXACT)
|
||||
|
||||
if (*tempcode == OP_TYPEEXACT)
|
||||
tempcode += _pcre_OP_lengths[*tempcode] +
|
||||
((*tempcode == OP_TYPEEXACT &&
|
||||
(tempcode[3] == OP_PROP || tempcode[3] == OP_NOTPROP))? 2:0);
|
||||
((tempcode[3] == OP_PROP || tempcode[3] == OP_NOTPROP)? 2 : 0);
|
||||
|
||||
else if (*tempcode == OP_EXACT || *tempcode == OP_NOTEXACT)
|
||||
{
|
||||
tempcode += _pcre_OP_lengths[*tempcode];
|
||||
#ifdef SUPPORT_UTF8
|
||||
if (utf8 && tempcode[-1] >= 0xc0)
|
||||
tempcode += _pcre_utf8_table4[tempcode[-1] & 0x3f];
|
||||
#endif
|
||||
}
|
||||
|
||||
len = code - tempcode;
|
||||
if (len > 0) switch (*tempcode)
|
||||
{
|
||||
@@ -4419,8 +4484,19 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
|
||||
if (namelen == verbs[i].len &&
|
||||
strncmp((char *)name, vn, namelen) == 0)
|
||||
{
|
||||
*code = verbs[i].op;
|
||||
if (*code++ == OP_ACCEPT) cd->had_accept = TRUE;
|
||||
/* Check for open captures before ACCEPT */
|
||||
|
||||
if (verbs[i].op == OP_ACCEPT)
|
||||
{
|
||||
open_capitem *oc;
|
||||
cd->had_accept = TRUE;
|
||||
for (oc = cd->open_caps; oc != NULL; oc = oc->next)
|
||||
{
|
||||
*code++ = OP_CLOSE;
|
||||
PUT2INC(code, 0, oc->number);
|
||||
}
|
||||
}
|
||||
*code++ = verbs[i].op;
|
||||
break;
|
||||
}
|
||||
vn += verbs[i].len + 1;
|
||||
@@ -4582,7 +4658,10 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
|
||||
}
|
||||
|
||||
/* Otherwise (did not start with "+" or "-"), start by looking for the
|
||||
name. */
|
||||
name. If we find a name, add one to the opcode to change OP_CREF or
|
||||
OP_RREF into OP_NCREF or OP_NRREF. These behave exactly the same,
|
||||
except they record that the reference was originally to a name. The
|
||||
information is used to check duplicate names. */
|
||||
|
||||
slot = cd->name_table;
|
||||
for (i = 0; i < cd->names_found; i++)
|
||||
@@ -4597,6 +4676,7 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
|
||||
{
|
||||
recno = GET2(slot, 0);
|
||||
PUT2(code, 2+LINK_SIZE, recno);
|
||||
code[1+LINK_SIZE]++;
|
||||
}
|
||||
|
||||
/* Search the pattern for a forward reference */
|
||||
@@ -4605,6 +4685,7 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
|
||||
(options & PCRE_EXTENDED) != 0)) > 0)
|
||||
{
|
||||
PUT2(code, 2+LINK_SIZE, i);
|
||||
code[1+LINK_SIZE]++;
|
||||
}
|
||||
|
||||
/* If terminator == 0 it means that the name followed directly after
|
||||
@@ -4797,11 +4878,24 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
|
||||
}
|
||||
}
|
||||
|
||||
/* In the real compile, create the entry in the table */
|
||||
/* In the real compile, create the entry in the table, maintaining
|
||||
alphabetical order. Duplicate names for different numbers are
|
||||
permitted only if PCRE_DUPNAMES is set. Duplicate names for the same
|
||||
number are always OK. (An existing number can be re-used if (?|
|
||||
appears in the pattern.) In either event, a duplicate name results in
|
||||
a duplicate entry in the table, even if the number is the same. This
|
||||
is because the number of names, and hence the table size, is computed
|
||||
in the pre-compile, and it affects various numbers and pointers which
|
||||
would all have to be modified, and the compiled code moved down, if
|
||||
duplicates with the same number were omitted from the table. This
|
||||
doesn't seem worth the hassle. However, *different* names for the
|
||||
same number are not permitted. */
|
||||
|
||||
else
|
||||
{
|
||||
BOOL dupname = FALSE;
|
||||
slot = cd->name_table;
|
||||
|
||||
for (i = 0; i < cd->names_found; i++)
|
||||
{
|
||||
int crc = memcmp(name, slot+2, namelen);
|
||||
@@ -4809,33 +4903,66 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
|
||||
{
|
||||
if (slot[2+namelen] == 0)
|
||||
{
|
||||
if ((options & PCRE_DUPNAMES) == 0)
|
||||
if (GET2(slot, 0) != cd->bracount + 1 &&
|
||||
(options & PCRE_DUPNAMES) == 0)
|
||||
{
|
||||
*errorcodeptr = ERR43;
|
||||
goto FAILED;
|
||||
}
|
||||
else dupname = TRUE;
|
||||
}
|
||||
else crc = -1; /* Current name is substring */
|
||||
else crc = -1; /* Current name is a substring */
|
||||
}
|
||||
|
||||
/* Make space in the table and break the loop for an earlier
|
||||
name. For a duplicate or later name, carry on. We do this for
|
||||
duplicates so that in the simple case (when ?(| is not used) they
|
||||
are in order of their numbers. */
|
||||
|
||||
if (crc < 0)
|
||||
{
|
||||
memmove(slot + cd->name_entry_size, slot,
|
||||
(cd->names_found - i) * cd->name_entry_size);
|
||||
break;
|
||||
}
|
||||
|
||||
/* Continue the loop for a later or duplicate name */
|
||||
|
||||
slot += cd->name_entry_size;
|
||||
}
|
||||
|
||||
/* For non-duplicate names, check for a duplicate number before
|
||||
adding the new name. */
|
||||
|
||||
if (!dupname)
|
||||
{
|
||||
uschar *cslot = cd->name_table;
|
||||
for (i = 0; i < cd->names_found; i++)
|
||||
{
|
||||
if (cslot != slot)
|
||||
{
|
||||
if (GET2(cslot, 0) == cd->bracount + 1)
|
||||
{
|
||||
*errorcodeptr = ERR65;
|
||||
goto FAILED;
|
||||
}
|
||||
}
|
||||
else i--;
|
||||
cslot += cd->name_entry_size;
|
||||
}
|
||||
}
|
||||
|
||||
PUT2(slot, 0, cd->bracount + 1);
|
||||
memcpy(slot + 2, name, namelen);
|
||||
slot[2+namelen] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
/* In both cases, count the number of names we've encountered. */
|
||||
/* In both pre-compile and compile, count the number of names we've
|
||||
encountered. */
|
||||
|
||||
ptr++; /* Move past > or ' */
|
||||
cd->names_found++;
|
||||
ptr++; /* Move past > or ' */
|
||||
goto NUMBERED_GROUP;
|
||||
|
||||
|
||||
@@ -5004,7 +5131,8 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
|
||||
if (lengthptr == NULL)
|
||||
{
|
||||
*code = OP_END;
|
||||
if (recno != 0) called = find_bracket(cd->start_code, utf8, recno);
|
||||
if (recno != 0)
|
||||
called = _pcre_find_bracket(cd->start_code, utf8, recno);
|
||||
|
||||
/* Forward reference */
|
||||
|
||||
@@ -5648,6 +5776,8 @@ uschar *code = *codeptr;
|
||||
uschar *last_branch = code;
|
||||
uschar *start_bracket = code;
|
||||
uschar *reverse_count = NULL;
|
||||
open_capitem capitem;
|
||||
int capnumber = 0;
|
||||
int firstbyte, reqbyte;
|
||||
int branchfirstbyte, branchreqbyte;
|
||||
int length;
|
||||
@@ -5674,6 +5804,17 @@ the code that abstracts option settings at the start of the pattern and makes
|
||||
them global. It tests the value of length for (2 + 2*LINK_SIZE) in the
|
||||
pre-compile phase to find out whether anything has yet been compiled or not. */
|
||||
|
||||
/* If this is a capturing subpattern, add to the chain of open capturing items
|
||||
so that we can detect them if (*ACCEPT) is encountered. */
|
||||
|
||||
if (*code == OP_CBRA)
|
||||
{
|
||||
capnumber = GET2(code, 1 + LINK_SIZE);
|
||||
capitem.number = capnumber;
|
||||
capitem.next = cd->open_caps;
|
||||
cd->open_caps = &capitem;
|
||||
}
|
||||
|
||||
/* Offset is set zero to mark that this bracket is still open */
|
||||
|
||||
PUT(code, 1, 0);
|
||||
@@ -5768,21 +5909,29 @@ for (;;)
|
||||
|
||||
/* If lookbehind, check that this branch matches a fixed-length string, and
|
||||
put the length into the OP_REVERSE item. Temporarily mark the end of the
|
||||
branch with OP_END. */
|
||||
branch with OP_END. If the branch contains OP_RECURSE, the result is -3
|
||||
because there may be forward references that we can't check here. Set a
|
||||
flag to cause another lookbehind check at the end. Why not do it all at the
|
||||
end? Because common, erroneous checks are picked up here and the offset of
|
||||
the problem can be shown. */
|
||||
|
||||
if (lookbehind)
|
||||
{
|
||||
int fixed_length;
|
||||
*code = OP_END;
|
||||
fixed_length = find_fixedlength(last_branch, options);
|
||||
fixed_length = find_fixedlength(last_branch, options, FALSE, cd);
|
||||
DPRINTF(("fixed length = %d\n", fixed_length));
|
||||
if (fixed_length < 0)
|
||||
if (fixed_length == -3)
|
||||
{
|
||||
cd->check_lookbehind = TRUE;
|
||||
}
|
||||
else if (fixed_length < 0)
|
||||
{
|
||||
*errorcodeptr = (fixed_length == -2)? ERR36 : ERR25;
|
||||
*ptrptr = ptr;
|
||||
return FALSE;
|
||||
}
|
||||
PUT(reverse_count, 0, fixed_length);
|
||||
else { PUT(reverse_count, 0, fixed_length); }
|
||||
}
|
||||
}
|
||||
|
||||
@@ -5810,6 +5959,10 @@ for (;;)
|
||||
while (branch_length > 0);
|
||||
}
|
||||
|
||||
/* If it was a capturing subpattern, remove it from the chain. */
|
||||
|
||||
if (capnumber > 0) cd->open_caps = cd->open_caps->next;
|
||||
|
||||
/* Fill in the ket */
|
||||
|
||||
*code = OP_KET;
|
||||
@@ -6012,7 +6165,9 @@ do {
|
||||
switch (*scode)
|
||||
{
|
||||
case OP_CREF:
|
||||
case OP_NCREF:
|
||||
case OP_RREF:
|
||||
case OP_NRREF:
|
||||
case OP_DEF:
|
||||
return FALSE;
|
||||
|
||||
@@ -6181,9 +6336,7 @@ int length = 1; /* For final END opcode */
|
||||
int firstbyte, reqbyte, newline;
|
||||
int errorcode = 0;
|
||||
int skipatstart = 0;
|
||||
#ifdef SUPPORT_UTF8
|
||||
BOOL utf8;
|
||||
#endif
|
||||
BOOL utf8 = (options & PCRE_UTF8) != 0;
|
||||
size_t size;
|
||||
uschar *code;
|
||||
const uschar *codestart;
|
||||
@@ -6280,7 +6433,6 @@ while (ptr[skipatstart] == CHAR_LEFT_PARENTHESIS &&
|
||||
/* Can't support UTF8 unless PCRE has been compiled to include the code. */
|
||||
|
||||
#ifdef SUPPORT_UTF8
|
||||
utf8 = (options & PCRE_UTF8) != 0;
|
||||
if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0 &&
|
||||
(*erroroffset = _pcre_valid_utf8((uschar *)pattern, -1)) >= 0)
|
||||
{
|
||||
@@ -6288,7 +6440,7 @@ if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0 &&
|
||||
goto PCRE_EARLY_ERROR_RETURN2;
|
||||
}
|
||||
#else
|
||||
if ((options & PCRE_UTF8) != 0)
|
||||
if (utf8)
|
||||
{
|
||||
errorcode = ERR32;
|
||||
goto PCRE_EARLY_ERROR_RETURN;
|
||||
@@ -6377,6 +6529,7 @@ cd->end_pattern = (const uschar *)(pattern + strlen(pattern));
|
||||
cd->req_varyopt = 0;
|
||||
cd->external_options = options;
|
||||
cd->external_flags = 0;
|
||||
cd->open_caps = NULL;
|
||||
|
||||
/* Now do the pre-compile. On error, errorcode will be set non-zero, so we
|
||||
don't need to look at the result of the function here. The initial options have
|
||||
@@ -6451,6 +6604,8 @@ cd->start_code = codestart;
|
||||
cd->hwm = cworkspace;
|
||||
cd->req_varyopt = 0;
|
||||
cd->had_accept = FALSE;
|
||||
cd->check_lookbehind = FALSE;
|
||||
cd->open_caps = NULL;
|
||||
|
||||
/* Set up a starting, non-extracting bracket, then compile the expression. On
|
||||
error, errorcode will be set non-zero, so we don't need to look at the result
|
||||
@@ -6489,7 +6644,7 @@ while (errorcode == 0 && cd->hwm > cworkspace)
|
||||
cd->hwm -= LINK_SIZE;
|
||||
offset = GET(cd->hwm, 0);
|
||||
recno = GET(codestart, offset);
|
||||
groupptr = find_bracket(codestart, (re->options & PCRE_UTF8) != 0, recno);
|
||||
groupptr = _pcre_find_bracket(codestart, utf8, recno);
|
||||
if (groupptr == NULL) errorcode = ERR53;
|
||||
else PUT(((uschar *)codestart), offset, groupptr - codestart);
|
||||
}
|
||||
@@ -6499,6 +6654,47 @@ subpattern. */
|
||||
|
||||
if (errorcode == 0 && re->top_backref > re->top_bracket) errorcode = ERR15;
|
||||
|
||||
/* If there were any lookbehind assertions that contained OP_RECURSE
|
||||
(recursions or subroutine calls), a flag is set for them to be checked here,
|
||||
because they may contain forward references. Actual recursions can't be fixed
|
||||
length, but subroutine calls can. It is done like this so that those without
|
||||
OP_RECURSE that are not fixed length get a diagnosic with a useful offset. The
|
||||
exceptional ones forgo this. We scan the pattern to check that they are fixed
|
||||
length, and set their lengths. */
|
||||
|
||||
if (cd->check_lookbehind)
|
||||
{
|
||||
uschar *cc = (uschar *)codestart;
|
||||
|
||||
/* Loop, searching for OP_REVERSE items, and process those that do not have
|
||||
their length set. (Actually, it will also re-process any that have a length
|
||||
of zero, but that is a pathological case, and it does no harm.) When we find
|
||||
one, we temporarily terminate the branch it is in while we scan it. */
|
||||
|
||||
for (cc = (uschar *)_pcre_find_bracket(codestart, utf8, -1);
|
||||
cc != NULL;
|
||||
cc = (uschar *)_pcre_find_bracket(cc, utf8, -1))
|
||||
{
|
||||
if (GET(cc, 1) == 0)
|
||||
{
|
||||
int fixed_length;
|
||||
uschar *be = cc - 1 - LINK_SIZE + GET(cc, -LINK_SIZE);
|
||||
int end_op = *be;
|
||||
*be = OP_END;
|
||||
fixed_length = find_fixedlength(cc, re->options, TRUE, cd);
|
||||
*be = end_op;
|
||||
DPRINTF(("fixed length = %d\n", fixed_length));
|
||||
if (fixed_length < 0)
|
||||
{
|
||||
errorcode = (fixed_length == -2)? ERR36 : ERR25;
|
||||
break;
|
||||
}
|
||||
PUT(cc, 1, fixed_length);
|
||||
}
|
||||
cc += 1 + LINK_SIZE;
|
||||
}
|
||||
}
|
||||
|
||||
/* Failed to compile, or error while post-processing */
|
||||
|
||||
if (errorcode != 0)
|
||||
|
||||
321
harbour/external/pcre/pcredfa.c
vendored
321
harbour/external/pcre/pcredfa.c
vendored
@@ -45,6 +45,34 @@ FSM). This is NOT Perl- compatible, but it has advantages in certain
|
||||
applications. */
|
||||
|
||||
|
||||
/* NOTE ABOUT PERFORMANCE: A user of this function sent some code that improved
|
||||
the performance of his patterns greatly. I could not use it as it stood, as it
|
||||
was not thread safe, and made assumptions about pattern sizes. Also, it caused
|
||||
test 7 to loop, and test 9 to crash with a segfault.
|
||||
|
||||
The issue is the check for duplicate states, which is done by a simple linear
|
||||
search up the state list. (Grep for "duplicate" below to find the code.) For
|
||||
many patterns, there will never be many states active at one time, so a simple
|
||||
linear search is fine. In patterns that have many active states, it might be a
|
||||
bottleneck. The suggested code used an indexing scheme to remember which states
|
||||
had previously been used for each character, and avoided the linear search when
|
||||
it knew there was no chance of a duplicate. This was implemented when adding
|
||||
states to the state lists.
|
||||
|
||||
I wrote some thread-safe, not-limited code to try something similar at the time
|
||||
of checking for duplicates (instead of when adding states), using index vectors
|
||||
on the stack. It did give a 13% improvement with one specially constructed
|
||||
pattern for certain subject strings, but on other strings and on many of the
|
||||
simpler patterns in the test suite it did worse. The major problem, I think,
|
||||
was the extra time to initialize the index. This had to be done for each call
|
||||
of internal_dfa_exec(). (The supplied patch used a static vector, initialized
|
||||
only once - I suspect this was the cause of the problems with the tests.)
|
||||
|
||||
Overall, I concluded that the gains in some cases did not outweigh the losses
|
||||
in others, so I abandoned this code. */
|
||||
|
||||
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
@@ -81,8 +109,9 @@ never stored, so we push them well clear of the normal opcodes. */
|
||||
character that is to be tested in some way. This makes is possible to
|
||||
centralize the loading of these characters. In the case of Type * etc, the
|
||||
"character" is the opcode for \D, \d, \S, \s, \W, or \w, which will always be a
|
||||
small value. ***NOTE*** If the start of this table is modified, the two tables
|
||||
that follow must also be modified. */
|
||||
small value. Non-zero values in the table are the offsets from the opcode where
|
||||
the character is to be found. ***NOTE*** If the start of this table is
|
||||
modified, the three tables that follow must also be modified. */
|
||||
|
||||
static const uschar coptable[] = {
|
||||
0, /* End */
|
||||
@@ -132,7 +161,63 @@ static const uschar coptable[] = {
|
||||
0, /* DEF */
|
||||
0, 0, /* BRAZERO, BRAMINZERO */
|
||||
0, 0, 0, 0, /* PRUNE, SKIP, THEN, COMMIT */
|
||||
0, 0, 0 /* FAIL, ACCEPT, SKIPZERO */
|
||||
0, 0, 0, 0 /* FAIL, ACCEPT, CLOSE, SKIPZERO */
|
||||
};
|
||||
|
||||
/* This table identifies those opcodes that inspect a character. It is used to
|
||||
remember the fact that a character could have been inspected when the end of
|
||||
the subject is reached. ***NOTE*** If the start of this table is modified, the
|
||||
two tables that follow must also be modified. */
|
||||
|
||||
static const uschar poptable[] = {
|
||||
0, /* End */
|
||||
0, 0, 0, 1, 1, /* \A, \G, \K, \B, \b */
|
||||
1, 1, 1, 1, 1, 1, /* \D, \d, \S, \s, \W, \w */
|
||||
1, 1, 1, /* Any, AllAny, Anybyte */
|
||||
1, 1, 1, /* NOTPROP, PROP, EXTUNI */
|
||||
1, 1, 1, 1, 1, /* \R, \H, \h, \V, \v */
|
||||
0, 0, 0, 0, 0, /* \Z, \z, Opt, ^, $ */
|
||||
1, /* Char */
|
||||
1, /* Charnc */
|
||||
1, /* not */
|
||||
/* Positive single-char repeats */
|
||||
1, 1, 1, 1, 1, 1, /* *, *?, +, +?, ?, ?? */
|
||||
1, 1, 1, /* upto, minupto, exact */
|
||||
1, 1, 1, 1, /* *+, ++, ?+, upto+ */
|
||||
/* Negative single-char repeats - only for chars < 256 */
|
||||
1, 1, 1, 1, 1, 1, /* NOT *, *?, +, +?, ?, ?? */
|
||||
1, 1, 1, /* NOT upto, minupto, exact */
|
||||
1, 1, 1, 1, /* NOT *+, ++, ?+, upto+ */
|
||||
/* Positive type repeats */
|
||||
1, 1, 1, 1, 1, 1, /* Type *, *?, +, +?, ?, ?? */
|
||||
1, 1, 1, /* Type upto, minupto, exact */
|
||||
1, 1, 1, 1, /* Type *+, ++, ?+, upto+ */
|
||||
/* Character class & ref repeats */
|
||||
1, 1, 1, 1, 1, 1, /* *, *?, +, +?, ?, ?? */
|
||||
1, 1, /* CRRANGE, CRMINRANGE */
|
||||
1, /* CLASS */
|
||||
1, /* NCLASS */
|
||||
1, /* XCLASS - variable length */
|
||||
0, /* REF */
|
||||
0, /* RECURSE */
|
||||
0, /* CALLOUT */
|
||||
0, /* Alt */
|
||||
0, /* Ket */
|
||||
0, /* KetRmax */
|
||||
0, /* KetRmin */
|
||||
0, /* Assert */
|
||||
0, /* Assert not */
|
||||
0, /* Assert behind */
|
||||
0, /* Assert behind not */
|
||||
0, /* Reverse */
|
||||
0, 0, 0, 0, /* ONCE, BRA, CBRA, COND */
|
||||
0, 0, 0, /* SBRA, SCBRA, SCOND */
|
||||
0, /* CREF */
|
||||
0, /* RREF */
|
||||
0, /* DEF */
|
||||
0, 0, /* BRAZERO, BRAMINZERO */
|
||||
0, 0, 0, 0, /* PRUNE, SKIP, THEN, COMMIT */
|
||||
0, 0, 0, 0 /* FAIL, ACCEPT, CLOSE, SKIPZERO */
|
||||
};
|
||||
|
||||
/* These 2 tables allow for compact code for testing for \D, \d, \S, \s, \W,
|
||||
@@ -390,6 +475,11 @@ if (*first_op == OP_REVERSE)
|
||||
current_subject -= gone_back;
|
||||
}
|
||||
|
||||
/* Save the earliest consulted character */
|
||||
|
||||
if (current_subject < md->start_used_ptr)
|
||||
md->start_used_ptr = current_subject;
|
||||
|
||||
/* Now we can process the individual branches. */
|
||||
|
||||
end_code = this_start_code;
|
||||
@@ -454,6 +544,8 @@ for (;;)
|
||||
int i, j;
|
||||
int clen, dlen;
|
||||
unsigned int c, d;
|
||||
int forced_fail = 0;
|
||||
BOOL could_continue = FALSE;
|
||||
|
||||
/* Make the new state list into the active state list and empty the
|
||||
new state list. */
|
||||
@@ -543,7 +635,9 @@ for (;;)
|
||||
}
|
||||
}
|
||||
|
||||
/* Check for a duplicate state with the same count, and skip if found. */
|
||||
/* Check for a duplicate state with the same count, and skip if found.
|
||||
See the note at the head of this module about the possibility of improving
|
||||
performance here. */
|
||||
|
||||
for (j = 0; j < i; j++)
|
||||
{
|
||||
@@ -560,6 +654,12 @@ for (;;)
|
||||
code = start_code + state_offset;
|
||||
codevalue = *code;
|
||||
|
||||
/* If this opcode inspects a character, but we are at the end of the
|
||||
subject, remember the fact for use when testing for a partial match. */
|
||||
|
||||
if (clen == 0 && poptable[codevalue] != 0)
|
||||
could_continue = TRUE;
|
||||
|
||||
/* If this opcode is followed by an inline character, load it. It is
|
||||
tempting to test for the presence of a subject character here, but that
|
||||
is wrong, because sometimes zero repetitions of the subject are
|
||||
@@ -610,7 +710,8 @@ for (;;)
|
||||
/* ========================================================================== */
|
||||
/* Reached a closing bracket. If not at the end of the pattern, carry
|
||||
on with the next opcode. Otherwise, unless we have an empty string and
|
||||
PCRE_NOTEMPTY is set, save the match data, shifting up all previous
|
||||
PCRE_NOTEMPTY is set, or PCRE_NOTEMPTY_ATSTART is set and we are at the
|
||||
start of the subject, save the match data, shifting up all previous
|
||||
matches so we always have the longest first. */
|
||||
|
||||
case OP_KET:
|
||||
@@ -624,26 +725,32 @@ for (;;)
|
||||
ADD_ACTIVE(state_offset - GET(code, 1), 0);
|
||||
}
|
||||
}
|
||||
else if (ptr > current_subject || (md->moptions & PCRE_NOTEMPTY) == 0)
|
||||
else
|
||||
{
|
||||
if (match_count < 0) match_count = (offsetcount >= 2)? 1 : 0;
|
||||
else if (match_count > 0 && ++match_count * 2 >= offsetcount)
|
||||
match_count = 0;
|
||||
count = ((match_count == 0)? offsetcount : match_count * 2) - 2;
|
||||
if (count > 0) memmove(offsets + 2, offsets, count * sizeof(int));
|
||||
if (offsetcount >= 2)
|
||||
if (ptr > current_subject ||
|
||||
((md->moptions & PCRE_NOTEMPTY) == 0 &&
|
||||
((md->moptions & PCRE_NOTEMPTY_ATSTART) == 0 ||
|
||||
current_subject > start_subject + md->start_offset)))
|
||||
{
|
||||
offsets[0] = current_subject - start_subject;
|
||||
offsets[1] = ptr - start_subject;
|
||||
DPRINTF(("%.*sSet matched string = \"%.*s\"\n", rlevel*2-2, SP,
|
||||
offsets[1] - offsets[0], current_subject));
|
||||
}
|
||||
if ((md->moptions & PCRE_DFA_SHORTEST) != 0)
|
||||
{
|
||||
DPRINTF(("%.*sEnd of internal_dfa_exec %d: returning %d\n"
|
||||
"%.*s---------------------\n\n", rlevel*2-2, SP, rlevel,
|
||||
match_count, rlevel*2-2, SP));
|
||||
return match_count;
|
||||
if (match_count < 0) match_count = (offsetcount >= 2)? 1 : 0;
|
||||
else if (match_count > 0 && ++match_count * 2 >= offsetcount)
|
||||
match_count = 0;
|
||||
count = ((match_count == 0)? offsetcount : match_count * 2) - 2;
|
||||
if (count > 0) memmove(offsets + 2, offsets, count * sizeof(int));
|
||||
if (offsetcount >= 2)
|
||||
{
|
||||
offsets[0] = current_subject - start_subject;
|
||||
offsets[1] = ptr - start_subject;
|
||||
DPRINTF(("%.*sSet matched string = \"%.*s\"\n", rlevel*2-2, SP,
|
||||
offsets[1] - offsets[0], current_subject));
|
||||
}
|
||||
if ((md->moptions & PCRE_DFA_SHORTEST) != 0)
|
||||
{
|
||||
DPRINTF(("%.*sEnd of internal_dfa_exec %d: returning %d\n"
|
||||
"%.*s---------------------\n\n", rlevel*2-2, SP, rlevel,
|
||||
match_count, rlevel*2-2, SP));
|
||||
return match_count;
|
||||
}
|
||||
}
|
||||
}
|
||||
break;
|
||||
@@ -794,6 +901,7 @@ for (;;)
|
||||
if (ptr > start_subject)
|
||||
{
|
||||
const uschar *temp = ptr - 1;
|
||||
if (temp < md->start_used_ptr) md->start_used_ptr = temp;
|
||||
#ifdef SUPPORT_UTF8
|
||||
if (utf8) BACKCHAR(temp);
|
||||
#endif
|
||||
@@ -802,8 +910,9 @@ for (;;)
|
||||
}
|
||||
else left_word = 0;
|
||||
|
||||
if (clen > 0) right_word = c < 256 && (ctypes[c] & ctype_word) != 0;
|
||||
else right_word = 0;
|
||||
if (clen > 0)
|
||||
right_word = c < 256 && (ctypes[c] & ctype_word) != 0;
|
||||
else right_word = 0;
|
||||
|
||||
if ((left_word == right_word) == (codevalue == OP_NOT_WORD_BOUNDARY))
|
||||
{ ADD_ACTIVE(state_offset + 1, 0); }
|
||||
@@ -2157,11 +2266,12 @@ for (;;)
|
||||
|
||||
/* ========================================================================== */
|
||||
/* These are the opcodes for fancy brackets of various kinds. We have
|
||||
to use recursion in order to handle them. The "always failing" assersion
|
||||
(?!) is optimised when compiling to OP_FAIL, so we have to support that,
|
||||
to use recursion in order to handle them. The "always failing" assertion
|
||||
(?!) is optimised to OP_FAIL when compiling, so we have to support that,
|
||||
though the other "backtracking verbs" are not supported. */
|
||||
|
||||
case OP_FAIL:
|
||||
forced_fail++; /* Count FAILs for multiple states */
|
||||
break;
|
||||
|
||||
case OP_ASSERT:
|
||||
@@ -2235,7 +2345,8 @@ for (;;)
|
||||
|
||||
/* Back reference conditions are not supported */
|
||||
|
||||
if (condcode == OP_CREF) return PCRE_ERROR_DFA_UCOND;
|
||||
if (condcode == OP_CREF || condcode == OP_NCREF)
|
||||
return PCRE_ERROR_DFA_UCOND;
|
||||
|
||||
/* The DEFINE condition is always false */
|
||||
|
||||
@@ -2246,7 +2357,7 @@ for (;;)
|
||||
which means "test if in any recursion". We can't test for specifically
|
||||
recursed groups. */
|
||||
|
||||
else if (condcode == OP_RREF)
|
||||
else if (condcode == OP_RREF || condcode == OP_NRREF)
|
||||
{
|
||||
int value = GET2(code, LINK_SIZE+2);
|
||||
if (value != RREF_ANY) return PCRE_ERROR_DFA_UCOND;
|
||||
@@ -2469,19 +2580,33 @@ for (;;)
|
||||
/* We have finished the processing at the current subject character. If no
|
||||
new states have been set for the next character, we have found all the
|
||||
matches that we are going to find. If we are at the top level and partial
|
||||
matching has been requested, check for appropriate conditions. */
|
||||
matching has been requested, check for appropriate conditions.
|
||||
|
||||
The "forced_ fail" variable counts the number of (*F) encountered for the
|
||||
character. If it is equal to the original active_count (saved in
|
||||
workspace[1]) it means that (*F) was found on every active state. In this
|
||||
case we don't want to give a partial match.
|
||||
|
||||
The "could_continue" variable is true if a state could have continued but
|
||||
for the fact that the end of the subject was reached. */
|
||||
|
||||
if (new_count <= 0)
|
||||
{
|
||||
if (match_count < 0 && /* No matches found */
|
||||
rlevel == 1 && /* Top level match function */
|
||||
(md->moptions & PCRE_PARTIAL) != 0 && /* Want partial matching */
|
||||
ptr >= end_subject && /* Reached end of subject */
|
||||
ptr > current_subject) /* Matched non-empty string */
|
||||
if (rlevel == 1 && /* Top level, and */
|
||||
could_continue && /* Some could go on */
|
||||
forced_fail != workspace[1] && /* Not all forced fail & */
|
||||
( /* either... */
|
||||
(md->moptions & PCRE_PARTIAL_HARD) != 0 /* Hard partial */
|
||||
|| /* or... */
|
||||
((md->moptions & PCRE_PARTIAL_SOFT) != 0 && /* Soft partial and */
|
||||
match_count < 0) /* no matches */
|
||||
) && /* And... */
|
||||
ptr >= end_subject && /* Reached end of subject */
|
||||
ptr > current_subject) /* Matched non-empty string */
|
||||
{
|
||||
if (offsetcount >= 2)
|
||||
{
|
||||
offsets[0] = current_subject - start_subject;
|
||||
offsets[0] = md->start_used_ptr - start_subject;
|
||||
offsets[1] = end_subject - start_subject;
|
||||
}
|
||||
match_count = PCRE_ERROR_PARTIAL;
|
||||
@@ -2623,6 +2748,7 @@ md->start_code = (const uschar *)argument_re +
|
||||
re->name_table_offset + re->name_count * re->name_entry_size;
|
||||
md->start_subject = (const unsigned char *)subject;
|
||||
md->end_subject = end_subject;
|
||||
md->start_offset = start_offset;
|
||||
md->moptions = options;
|
||||
md->poptions = re->options;
|
||||
|
||||
@@ -2727,8 +2853,8 @@ if (!anchored)
|
||||
}
|
||||
else
|
||||
{
|
||||
if (startline && study != NULL &&
|
||||
(study->options & PCRE_STUDY_MAPPED) != 0)
|
||||
if (!startline && study != NULL &&
|
||||
(study->flags & PCRE_STUDY_MAPPED) != 0)
|
||||
start_bits = study->start_bits;
|
||||
}
|
||||
}
|
||||
@@ -2779,13 +2905,11 @@ for (;;)
|
||||
}
|
||||
|
||||
/* There are some optimizations that avoid running the match if a known
|
||||
starting point is not found, or if a known later character is not present.
|
||||
However, there is an option that disables these, for testing and for
|
||||
ensuring that all callouts do actually occur. */
|
||||
starting point is not found. However, there is an option that disables
|
||||
these, for testing and for ensuring that all callouts do actually occur. */
|
||||
|
||||
if ((options & PCRE_NO_START_OPTIMIZE) == 0)
|
||||
{
|
||||
|
||||
/* Advance to a known first byte. */
|
||||
|
||||
if (first_byte >= 0)
|
||||
@@ -2851,67 +2975,80 @@ for (;;)
|
||||
/* Restore fudged end_subject */
|
||||
|
||||
end_subject = save_end_subject;
|
||||
}
|
||||
|
||||
/* If req_byte is set, we know that that character must appear in the subject
|
||||
for the match to succeed. If the first character is set, req_byte must be
|
||||
later in the subject; otherwise the test starts at the match point. This
|
||||
optimization can save a huge amount of work in patterns with nested unlimited
|
||||
repeats that aren't going to match. Writing separate code for cased/caseless
|
||||
versions makes it go faster, as does using an autoincrement and backing off
|
||||
on a match.
|
||||
/* The following two optimizations are disabled for partial matching or if
|
||||
disabling is explicitly requested (and of course, by the test above, this
|
||||
code is not obeyed when restarting after a partial match). */
|
||||
|
||||
HOWEVER: when the subject string is very, very long, searching to its end can
|
||||
take a long time, and give bad performance on quite ordinary patterns. This
|
||||
showed up when somebody was matching /^C/ on a 32-megabyte string... so we
|
||||
don't do this when the string is sufficiently long.
|
||||
|
||||
ALSO: this processing is disabled when partial matching is requested, and can
|
||||
also be explicitly deactivated. */
|
||||
|
||||
if ((options & PCRE_NO_START_OPTIMIZE) == 0 &&
|
||||
req_byte >= 0 &&
|
||||
end_subject - current_subject < REQ_BYTE_MAX &&
|
||||
(options & PCRE_PARTIAL) == 0)
|
||||
{
|
||||
register const uschar *p = current_subject + ((first_byte >= 0)? 1 : 0);
|
||||
|
||||
/* We don't need to repeat the search if we haven't yet reached the
|
||||
place we found it at last time. */
|
||||
|
||||
if (p > req_byte_ptr)
|
||||
if ((options & PCRE_NO_START_OPTIMIZE) == 0 &&
|
||||
(options & (PCRE_PARTIAL_HARD|PCRE_PARTIAL_SOFT)) == 0)
|
||||
{
|
||||
if (req_byte_caseless)
|
||||
/* If the pattern was studied, a minimum subject length may be set. This
|
||||
is a lower bound; no actual string of that length may actually match the
|
||||
pattern. Although the value is, strictly, in characters, we treat it as
|
||||
bytes to avoid spending too much time in this optimization. */
|
||||
|
||||
if (study != NULL && (study->flags & PCRE_STUDY_MINLEN) != 0 &&
|
||||
end_subject - current_subject < study->minlength)
|
||||
return PCRE_ERROR_NOMATCH;
|
||||
|
||||
/* If req_byte is set, we know that that character must appear in the
|
||||
subject for the match to succeed. If the first character is set, req_byte
|
||||
must be later in the subject; otherwise the test starts at the match
|
||||
point. This optimization can save a huge amount of work in patterns with
|
||||
nested unlimited repeats that aren't going to match. Writing separate
|
||||
code for cased/caseless versions makes it go faster, as does using an
|
||||
autoincrement and backing off on a match.
|
||||
|
||||
HOWEVER: when the subject string is very, very long, searching to its end
|
||||
can take a long time, and give bad performance on quite ordinary
|
||||
patterns. This showed up when somebody was matching /^C/ on a 32-megabyte
|
||||
string... so we don't do this when the string is sufficiently long. */
|
||||
|
||||
if (req_byte >= 0 && end_subject - current_subject < REQ_BYTE_MAX)
|
||||
{
|
||||
while (p < end_subject)
|
||||
register const uschar *p = current_subject + ((first_byte >= 0)? 1 : 0);
|
||||
|
||||
/* We don't need to repeat the search if we haven't yet reached the
|
||||
place we found it at last time. */
|
||||
|
||||
if (p > req_byte_ptr)
|
||||
{
|
||||
register int pp = *p++;
|
||||
if (pp == req_byte || pp == req_byte2) { p--; break; }
|
||||
if (req_byte_caseless)
|
||||
{
|
||||
while (p < end_subject)
|
||||
{
|
||||
register int pp = *p++;
|
||||
if (pp == req_byte || pp == req_byte2) { p--; break; }
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
while (p < end_subject)
|
||||
{
|
||||
if (*p++ == req_byte) { p--; break; }
|
||||
}
|
||||
}
|
||||
|
||||
/* If we can't find the required character, break the matching loop,
|
||||
which will cause a return or PCRE_ERROR_NOMATCH. */
|
||||
|
||||
if (p >= end_subject) break;
|
||||
|
||||
/* If we have found the required character, save the point where we
|
||||
found it, so that we don't search again next time round the loop if
|
||||
the start hasn't passed this character yet. */
|
||||
|
||||
req_byte_ptr = p;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
while (p < end_subject)
|
||||
{
|
||||
if (*p++ == req_byte) { p--; break; }
|
||||
}
|
||||
}
|
||||
|
||||
/* If we can't find the required character, break the matching loop,
|
||||
which will cause a return or PCRE_ERROR_NOMATCH. */
|
||||
|
||||
if (p >= end_subject) break;
|
||||
|
||||
/* If we have found the required character, save the point where we
|
||||
found it, so that we don't search again next time round the loop if
|
||||
the start hasn't passed this character yet. */
|
||||
|
||||
req_byte_ptr = p;
|
||||
}
|
||||
}
|
||||
} /* End of optimizations that are done when not restarting */
|
||||
|
||||
/* OK, now we can do the business */
|
||||
|
||||
md->start_used_ptr = current_subject;
|
||||
|
||||
rc = internal_dfa_exec(
|
||||
md, /* fixed match data */
|
||||
md->start_code, /* this subexpression's code */
|
||||
|
||||
1316
harbour/external/pcre/pcreexec.c
vendored
1316
harbour/external/pcre/pcreexec.c
vendored
File diff suppressed because it is too large
Load Diff
11
harbour/external/pcre/pcrefinf.c
vendored
11
harbour/external/pcre/pcrefinf.c
vendored
@@ -119,10 +119,16 @@ switch (what)
|
||||
|
||||
case PCRE_INFO_FIRSTTABLE:
|
||||
*((const uschar **)where) =
|
||||
(study != NULL && (study->options & PCRE_STUDY_MAPPED) != 0)?
|
||||
(study != NULL && (study->flags & PCRE_STUDY_MAPPED) != 0)?
|
||||
((const pcre_study_data *)extra_data->study_data)->start_bits : NULL;
|
||||
break;
|
||||
|
||||
case PCRE_INFO_MINLENGTH:
|
||||
*((int *)where) =
|
||||
(study != NULL && (study->flags & PCRE_STUDY_MINLEN) != 0)?
|
||||
study->minlength : -1;
|
||||
break;
|
||||
|
||||
case PCRE_INFO_LASTLITERAL:
|
||||
*((int *)where) =
|
||||
((re->flags & PCRE_REQCHSET) != 0)? re->req_byte : -1;
|
||||
@@ -144,6 +150,9 @@ switch (what)
|
||||
*((const uschar **)where) = (const uschar *)(_pcre_default_tables);
|
||||
break;
|
||||
|
||||
/* From release 8.00 this will always return TRUE because NOPARTIAL is
|
||||
no longer ever set (the restrictions have been removed). */
|
||||
|
||||
case PCRE_INFO_OKPARTIAL:
|
||||
*((int *)where) = (re->flags & PCRE_NOPARTIAL) == 0;
|
||||
break;
|
||||
|
||||
119
harbour/external/pcre/pcreinal.h
vendored
119
harbour/external/pcre/pcreinal.h
vendored
@@ -535,7 +535,9 @@ Standard C system should have one. */
|
||||
|
||||
/* Private flags containing information about the compiled regex. They used to
|
||||
live at the top end of the options word, but that got almost full, so now they
|
||||
are in a 16-bit flags word. */
|
||||
are in a 16-bit flags word. From release 8.00, PCRE_NOPARTIAL is unused, as
|
||||
the restrictions on partial matching have been lifted. It remains for backwards
|
||||
compatibility. */
|
||||
|
||||
#define PCRE_NOPARTIAL 0x0001 /* can't use partial with this regex */
|
||||
#define PCRE_FIRSTSET 0x0002 /* first_byte is set */
|
||||
@@ -547,6 +549,7 @@ are in a 16-bit flags word. */
|
||||
/* Options for the "extra" block produced by pcre_study(). */
|
||||
|
||||
#define PCRE_STUDY_MAPPED 0x01 /* a map of starting chars exists */
|
||||
#define PCRE_STUDY_MINLEN 0x02 /* a minimum length field exists */
|
||||
|
||||
/* Masks for identifying the public options that are permitted at compile
|
||||
time, run time, or study time, respectively. */
|
||||
@@ -562,14 +565,15 @@ time, run time, or study time, respectively. */
|
||||
PCRE_JAVASCRIPT_COMPAT)
|
||||
|
||||
#define PUBLIC_EXEC_OPTIONS \
|
||||
(PCRE_ANCHORED|PCRE_NOTBOL|PCRE_NOTEOL|PCRE_NOTEMPTY|PCRE_NO_UTF8_CHECK| \
|
||||
PCRE_PARTIAL|PCRE_NEWLINE_BITS|PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE| \
|
||||
PCRE_NO_START_OPTIMIZE)
|
||||
(PCRE_ANCHORED|PCRE_NOTBOL|PCRE_NOTEOL|PCRE_NOTEMPTY|PCRE_NOTEMPTY_ATSTART| \
|
||||
PCRE_NO_UTF8_CHECK|PCRE_PARTIAL_HARD|PCRE_PARTIAL_SOFT|PCRE_NEWLINE_BITS| \
|
||||
PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE|PCRE_NO_START_OPTIMIZE)
|
||||
|
||||
#define PUBLIC_DFA_EXEC_OPTIONS \
|
||||
(PCRE_ANCHORED|PCRE_NOTBOL|PCRE_NOTEOL|PCRE_NOTEMPTY|PCRE_NO_UTF8_CHECK| \
|
||||
PCRE_PARTIAL|PCRE_DFA_SHORTEST|PCRE_DFA_RESTART|PCRE_NEWLINE_BITS| \
|
||||
PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE|PCRE_NO_START_OPTIMIZE)
|
||||
(PCRE_ANCHORED|PCRE_NOTBOL|PCRE_NOTEOL|PCRE_NOTEMPTY|PCRE_NOTEMPTY_ATSTART| \
|
||||
PCRE_NO_UTF8_CHECK|PCRE_PARTIAL_HARD|PCRE_PARTIAL_SOFT|PCRE_DFA_SHORTEST| \
|
||||
PCRE_DFA_RESTART|PCRE_NEWLINE_BITS|PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE| \
|
||||
PCRE_NO_START_OPTIMIZE)
|
||||
|
||||
#define PUBLIC_STUDY_OPTIONS 0 /* None defined */
|
||||
|
||||
@@ -598,7 +602,6 @@ variable-length repeat, or a anything other than literal characters. */
|
||||
environments where these macros are defined elsewhere. Unfortunately, there
|
||||
is no way to do the same for the typedef. */
|
||||
|
||||
|
||||
#ifndef FALSE
|
||||
typedef int BOOL;
|
||||
#define FALSE 0
|
||||
@@ -1206,8 +1209,8 @@ enum { ESC_A = 1, ESC_G, ESC_K, ESC_B, ESC_b, ESC_D, ESC_d, ESC_S, ESC_s,
|
||||
OP_EOD must correspond in order to the list of escapes immediately above.
|
||||
|
||||
*** NOTE NOTE NOTE *** Whenever this list is updated, the two macro definitions
|
||||
that follow must also be updated to match. There is also a table called
|
||||
"coptable" in pcre_dfa_exec.c that must be updated. */
|
||||
that follow must also be updated to match. There are also tables called
|
||||
"coptable" and "poptable" in pcre_dfa_exec.c that must be updated. */
|
||||
|
||||
enum {
|
||||
OP_END, /* 0 End of pattern */
|
||||
@@ -1343,30 +1346,39 @@ enum {
|
||||
OP_SCBRA, /* 98 Start of capturing bracket, check empty */
|
||||
OP_SCOND, /* 99 Conditional group, check empty */
|
||||
|
||||
OP_CREF, /* 100 Used to hold a capture number as condition */
|
||||
OP_RREF, /* 101 Used to hold a recursion number as condition */
|
||||
OP_DEF, /* 102 The DEFINE condition */
|
||||
/* The next two pairs must (respectively) be kept together. */
|
||||
|
||||
OP_BRAZERO, /* 103 These two must remain together and in this */
|
||||
OP_BRAMINZERO, /* 104 order. */
|
||||
OP_CREF, /* 100 Used to hold a capture number as condition */
|
||||
OP_NCREF, /* 101 Same, but generaged by a name reference*/
|
||||
OP_RREF, /* 102 Used to hold a recursion number as condition */
|
||||
OP_NRREF, /* 103 Same, but generaged by a name reference*/
|
||||
OP_DEF, /* 104 The DEFINE condition */
|
||||
|
||||
OP_BRAZERO, /* 105 These two must remain together and in this */
|
||||
OP_BRAMINZERO, /* 106 order. */
|
||||
|
||||
/* These are backtracking control verbs */
|
||||
|
||||
OP_PRUNE, /* 105 */
|
||||
OP_SKIP, /* 106 */
|
||||
OP_THEN, /* 107 */
|
||||
OP_COMMIT, /* 108 */
|
||||
OP_PRUNE, /* 107 */
|
||||
OP_SKIP, /* 108 */
|
||||
OP_THEN, /* 109 */
|
||||
OP_COMMIT, /* 110 */
|
||||
|
||||
/* These are forced failure and success verbs */
|
||||
|
||||
OP_FAIL, /* 109 */
|
||||
OP_ACCEPT, /* 110 */
|
||||
OP_FAIL, /* 111 */
|
||||
OP_ACCEPT, /* 112 */
|
||||
OP_CLOSE, /* 113 Used before OP_ACCEPT to close open captures */
|
||||
|
||||
/* This is used to skip a subpattern with a {0} quantifier */
|
||||
|
||||
OP_SKIPZERO /* 111 */
|
||||
OP_SKIPZERO /* 114 */
|
||||
};
|
||||
|
||||
/* *** NOTE NOTE NOTE *** Whenever the list above is updated, the two macro
|
||||
definitions that follow must also be updated to match. There are also tables
|
||||
called "coptable" cna "poptable" in pcre_dfa_exec.c that must be updated. */
|
||||
|
||||
|
||||
/* This macro defines textual names for all the opcodes. These are used only
|
||||
for debugging. The macro is referenced only in pcre_printint.c. */
|
||||
@@ -1388,9 +1400,10 @@ for debugging. The macro is referenced only in pcre_printint.c. */
|
||||
"Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not", \
|
||||
"AssertB", "AssertB not", "Reverse", \
|
||||
"Once", "Bra", "CBra", "Cond", "SBra", "SCBra", "SCond", \
|
||||
"Cond ref", "Cond rec", "Cond def", "Brazero", "Braminzero", \
|
||||
"Cond ref", "Cond nref", "Cond rec", "Cond nrec", "Cond def", \
|
||||
"Brazero", "Braminzero", \
|
||||
"*PRUNE", "*SKIP", "*THEN", "*COMMIT", "*FAIL", "*ACCEPT", \
|
||||
"Skip zero"
|
||||
"Close", "Skip zero"
|
||||
|
||||
|
||||
/* This macro defines the length of fixed length operations in the compiled
|
||||
@@ -1450,15 +1463,16 @@ in UTF-8 mode. The code that uses this table must know about such things. */
|
||||
1+LINK_SIZE, /* SBRA */ \
|
||||
3+LINK_SIZE, /* SCBRA */ \
|
||||
1+LINK_SIZE, /* SCOND */ \
|
||||
3, /* CREF */ \
|
||||
3, /* RREF */ \
|
||||
3, 3, /* CREF, NCREF */ \
|
||||
3, 3, /* RREF, NRREF */ \
|
||||
1, /* DEF */ \
|
||||
1, 1, /* BRAZERO, BRAMINZERO */ \
|
||||
1, 1, 1, 1, /* PRUNE, SKIP, THEN, COMMIT, */ \
|
||||
1, 1, 1 /* FAIL, ACCEPT, SKIPZERO */
|
||||
1, 1, 3, 1 /* FAIL, ACCEPT, CLOSE, SKIPZERO */
|
||||
|
||||
|
||||
/* A magic value for OP_RREF to indicate the "any recursion" condition. */
|
||||
/* A magic value for OP_RREF and OP_NRREF to indicate the "any recursion"
|
||||
condition. */
|
||||
|
||||
#define RREF_ANY 0xffff
|
||||
|
||||
@@ -1471,7 +1485,7 @@ enum { ERR0, ERR1, ERR2, ERR3, ERR4, ERR5, ERR6, ERR7, ERR8, ERR9,
|
||||
ERR30, ERR31, ERR32, ERR33, ERR34, ERR35, ERR36, ERR37, ERR38, ERR39,
|
||||
ERR40, ERR41, ERR42, ERR43, ERR44, ERR45, ERR46, ERR47, ERR48, ERR49,
|
||||
ERR50, ERR51, ERR52, ERR53, ERR54, ERR55, ERR56, ERR57, ERR58, ERR59,
|
||||
ERR60, ERR61, ERR62, ERR63, ERR64 };
|
||||
ERR60, ERR61, ERR62, ERR63, ERR64, ERR65 };
|
||||
|
||||
/* The real format of the start of the pcre block; the index of names and the
|
||||
code vector run on as long as necessary after the end. We store an explicit
|
||||
@@ -1487,7 +1501,7 @@ Because people can now save and re-use compiled patterns, any additions to this
|
||||
structure should be made at the end, and something earlier (e.g. a new
|
||||
flag in the options or one of the dummy fields) should indicate that the new
|
||||
fields are present. Currently PCRE always sets the dummy fields to zero.
|
||||
NOTE NOTE NOTE:
|
||||
NOTE NOTE NOTE
|
||||
*/
|
||||
|
||||
typedef struct real_pcre {
|
||||
@@ -1514,10 +1528,20 @@ remark (see NOTE above) about extending this structure applies. */
|
||||
|
||||
typedef struct pcre_study_data {
|
||||
pcre_uint32 size; /* Total that was malloced */
|
||||
pcre_uint32 options;
|
||||
uschar start_bits[32];
|
||||
pcre_uint32 flags; /* Private flags */
|
||||
uschar start_bits[32]; /* Starting char bits */
|
||||
pcre_uint32 minlength; /* Minimum subject length */
|
||||
} pcre_study_data;
|
||||
|
||||
/* Structure for building a chain of open capturing subpatterns during
|
||||
compiling, so that instructions to close them can be compiled when (*ACCEPT) is
|
||||
encountered. */
|
||||
|
||||
typedef struct open_capitem {
|
||||
struct open_capitem *next; /* Chain link */
|
||||
pcre_uint16 number; /* Capture number */
|
||||
} open_capitem;
|
||||
|
||||
/* Structure for passing "static" information around between the functions
|
||||
doing the compiling, so that they are thread-safe. */
|
||||
|
||||
@@ -1530,6 +1554,7 @@ typedef struct compile_data {
|
||||
const uschar *start_code; /* The start of the compiled code */
|
||||
const uschar *start_pattern; /* The start of the pattern */
|
||||
const uschar *end_pattern; /* The end of the pattern */
|
||||
open_capitem *open_caps; /* Chain of open capture items */
|
||||
uschar *hwm; /* High watermark of workspace */
|
||||
uschar *name_table; /* The name/number table */
|
||||
int names_found; /* Number of entries so far */
|
||||
@@ -1542,6 +1567,7 @@ typedef struct compile_data {
|
||||
int external_flags; /* External flag bits to be set */
|
||||
int req_varyopt; /* "After variable item" flag for reqbyte */
|
||||
BOOL had_accept; /* (*ACCEPT) encountered */
|
||||
BOOL check_lookbehind; /* Lookbehinds need later checking */
|
||||
int nltype; /* Newline type */
|
||||
int nllen; /* Newline string length */
|
||||
uschar nl[4]; /* Newline string when fixed length */
|
||||
@@ -1565,6 +1591,7 @@ typedef struct recursion_info {
|
||||
USPTR save_start; /* Old value of mstart */
|
||||
int *offset_save; /* Pointer to start of saved offsets */
|
||||
int saved_max; /* Number of saved offsets */
|
||||
int save_offset_top; /* Current value of offset_top */
|
||||
} recursion_info;
|
||||
|
||||
/* Structure for building a chain of data for holding the values of the subject
|
||||
@@ -1589,6 +1616,9 @@ typedef struct match_data {
|
||||
int offset_max; /* The maximum usable for return data */
|
||||
int nltype; /* Newline type */
|
||||
int nllen; /* Newline string length */
|
||||
int name_count; /* Number of names in name table */
|
||||
int name_entry_size; /* Size of entry in names table */
|
||||
uschar *name_table; /* Table of names */
|
||||
uschar nl[4]; /* Newline string when fixed */
|
||||
const uschar *lcc; /* Points to lower casing table */
|
||||
const uschar *ctypes; /* Points to table of type maps */
|
||||
@@ -1599,7 +1629,7 @@ typedef struct match_data {
|
||||
BOOL jscript_compat; /* JAVASCRIPT_COMPAT flag */
|
||||
BOOL endonly; /* Dollar not before final \n */
|
||||
BOOL notempty; /* Empty string match not wanted */
|
||||
BOOL partial; /* PARTIAL flag */
|
||||
BOOL notempty_atstart; /* Empty string match at start not wanted */
|
||||
BOOL hitend; /* Hit the end of the subject at some point */
|
||||
BOOL bsr_anycrlf; /* \R is just any CRLF, not full Unicode */
|
||||
const uschar *start_code; /* For use when recursing */
|
||||
@@ -1607,6 +1637,8 @@ typedef struct match_data {
|
||||
USPTR end_subject; /* End of the subject string */
|
||||
USPTR start_match_ptr; /* Start of matched string */
|
||||
USPTR end_match_ptr; /* Subject position at end match */
|
||||
USPTR start_used_ptr; /* Earliest consulted character */
|
||||
int partial; /* PARTIAL options */
|
||||
int end_offset_top; /* Highwater mark at end of match */
|
||||
int capture_last; /* Most recent capture number */
|
||||
int start_offset; /* The start offset value */
|
||||
@@ -1623,7 +1655,9 @@ typedef struct dfa_match_data {
|
||||
const uschar *start_code; /* Start of the compiled pattern */
|
||||
const uschar *start_subject; /* Start of the subject string */
|
||||
const uschar *end_subject; /* End of subject string */
|
||||
const uschar *start_used_ptr; /* Earliest consulted character */
|
||||
const uschar *tables; /* Character tables */
|
||||
int start_offset; /* The start offset value */
|
||||
int moptions; /* Match options */
|
||||
int poptions; /* Pattern options */
|
||||
int nltype; /* Newline type */
|
||||
@@ -1702,15 +1736,16 @@ extern const uschar _pcre_OP_lengths[];
|
||||
one of the exported public functions. They have to be "external" in the C
|
||||
sense, but are not part of the PCRE public API. */
|
||||
|
||||
extern BOOL _pcre_is_newline(const uschar *, int, const uschar *,
|
||||
int *, BOOL);
|
||||
extern int _pcre_ord2utf8(int, uschar *);
|
||||
extern real_pcre *_pcre_try_flipped(const real_pcre *, real_pcre *,
|
||||
const pcre_study_data *, pcre_study_data *);
|
||||
extern int _pcre_valid_utf8(const uschar *, int);
|
||||
extern BOOL _pcre_was_newline(const uschar *, int, const uschar *,
|
||||
int *, BOOL);
|
||||
extern BOOL _pcre_xclass(int, const uschar *);
|
||||
extern const uschar *_pcre_find_bracket(const uschar *, BOOL, int);
|
||||
extern BOOL _pcre_is_newline(const uschar *, int, const uschar *,
|
||||
int *, BOOL);
|
||||
extern int _pcre_ord2utf8(int, uschar *);
|
||||
extern real_pcre *_pcre_try_flipped(const real_pcre *, real_pcre *,
|
||||
const pcre_study_data *, pcre_study_data *);
|
||||
extern int _pcre_valid_utf8(const uschar *, int);
|
||||
extern BOOL _pcre_was_newline(const uschar *, int, const uschar *,
|
||||
int *, BOOL);
|
||||
extern BOOL _pcre_xclass(int, const uschar *);
|
||||
|
||||
|
||||
/* Unicode character database (UCD) */
|
||||
|
||||
13
harbour/external/pcre/pcreprni.h
vendored
13
harbour/external/pcre/pcreprni.h
vendored
@@ -246,7 +246,12 @@ for(;;)
|
||||
fprintf(f, "%s", OP_names[*code]);
|
||||
break;
|
||||
|
||||
case OP_CLOSE:
|
||||
fprintf(f, " %s %d", OP_names[*code], GET2(code, 1));
|
||||
break;
|
||||
|
||||
case OP_CREF:
|
||||
case OP_NCREF:
|
||||
fprintf(f, "%3d %s", GET2(code,1), OP_names[*code]);
|
||||
break;
|
||||
|
||||
@@ -258,6 +263,14 @@ for(;;)
|
||||
fprintf(f, " Cond recurse %d", c);
|
||||
break;
|
||||
|
||||
case OP_NRREF:
|
||||
c = GET2(code, 1);
|
||||
if (c == RREF_ANY)
|
||||
fprintf(f, " Cond nrecurse any");
|
||||
else
|
||||
fprintf(f, " Cond nrecurse %d", c);
|
||||
break;
|
||||
|
||||
case OP_DEF:
|
||||
fprintf(f, " Cond def");
|
||||
break;
|
||||
|
||||
424
harbour/external/pcre/pcrestud.c
vendored
424
harbour/external/pcre/pcrestud.c
vendored
@@ -6,7 +6,7 @@
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Copyright (c) 1997-2008 University of Cambridge
|
||||
Copyright (c) 1997-2009 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
@@ -54,6 +54,364 @@ supporting functions. */
|
||||
enum { SSB_FAIL, SSB_DONE, SSB_CONTINUE };
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Find the minimum subject length for a group *
|
||||
*************************************************/
|
||||
|
||||
/* Scan a parenthesized group and compute the minimum length of subject that
|
||||
is needed to match it. This is a lower bound; it does not mean there is a
|
||||
string of that length that matches. In UTF8 mode, the result is in characters
|
||||
rather than bytes.
|
||||
|
||||
Arguments:
|
||||
code pointer to start of group (the bracket)
|
||||
startcode pointer to start of the whole pattern
|
||||
options the compiling options
|
||||
|
||||
Returns: the minimum length
|
||||
-1 if \C was encountered
|
||||
-2 internal error (missing capturing bracket)
|
||||
*/
|
||||
|
||||
static int
|
||||
find_minlength(const uschar *code, const uschar *startcode, int options)
|
||||
{
|
||||
int length = -1;
|
||||
BOOL utf8 = (options & PCRE_UTF8) != 0;
|
||||
BOOL had_recurse = FALSE;
|
||||
register int branchlength = 0;
|
||||
register uschar *cc = (uschar *)code + 1 + LINK_SIZE;
|
||||
|
||||
if (*code == OP_CBRA || *code == OP_SCBRA) cc += 2;
|
||||
|
||||
/* Scan along the opcodes for this branch. If we get to the end of the
|
||||
branch, check the length against that of the other branches. */
|
||||
|
||||
for (;;)
|
||||
{
|
||||
int d, min;
|
||||
uschar *cs, *ce;
|
||||
register int op = *cc;
|
||||
|
||||
switch (op)
|
||||
{
|
||||
case OP_CBRA:
|
||||
case OP_SCBRA:
|
||||
case OP_BRA:
|
||||
case OP_SBRA:
|
||||
case OP_ONCE:
|
||||
case OP_COND:
|
||||
case OP_SCOND:
|
||||
d = find_minlength(cc, startcode, options);
|
||||
if (d < 0) return d;
|
||||
branchlength += d;
|
||||
do cc += GET(cc, 1); while (*cc == OP_ALT);
|
||||
cc += 1 + LINK_SIZE;
|
||||
break;
|
||||
|
||||
/* Reached end of a branch; if it's a ket it is the end of a nested
|
||||
call. If it's ALT it is an alternation in a nested call. If it is
|
||||
END it's the end of the outer call. All can be handled by the same code. */
|
||||
|
||||
case OP_ALT:
|
||||
case OP_KET:
|
||||
case OP_KETRMAX:
|
||||
case OP_KETRMIN:
|
||||
case OP_END:
|
||||
if (length < 0 || (!had_recurse && branchlength < length))
|
||||
length = branchlength;
|
||||
if (*cc != OP_ALT) return length;
|
||||
cc += 1 + LINK_SIZE;
|
||||
branchlength = 0;
|
||||
had_recurse = FALSE;
|
||||
break;
|
||||
|
||||
/* Skip over assertive subpatterns */
|
||||
|
||||
case OP_ASSERT:
|
||||
case OP_ASSERT_NOT:
|
||||
case OP_ASSERTBACK:
|
||||
case OP_ASSERTBACK_NOT:
|
||||
do cc += GET(cc, 1); while (*cc == OP_ALT);
|
||||
/* Fall through */
|
||||
|
||||
/* Skip over things that don't match chars */
|
||||
|
||||
case OP_REVERSE:
|
||||
case OP_CREF:
|
||||
case OP_NCREF:
|
||||
case OP_RREF:
|
||||
case OP_NRREF:
|
||||
case OP_DEF:
|
||||
case OP_OPT:
|
||||
case OP_CALLOUT:
|
||||
case OP_SOD:
|
||||
case OP_SOM:
|
||||
case OP_EOD:
|
||||
case OP_EODN:
|
||||
case OP_CIRC:
|
||||
case OP_DOLL:
|
||||
case OP_NOT_WORD_BOUNDARY:
|
||||
case OP_WORD_BOUNDARY:
|
||||
cc += _pcre_OP_lengths[*cc];
|
||||
break;
|
||||
|
||||
/* Skip over a subpattern that has a {0} or {0,x} quantifier */
|
||||
|
||||
case OP_BRAZERO:
|
||||
case OP_BRAMINZERO:
|
||||
case OP_SKIPZERO:
|
||||
cc += _pcre_OP_lengths[*cc];
|
||||
do cc += GET(cc, 1); while (*cc == OP_ALT);
|
||||
cc += 1 + LINK_SIZE;
|
||||
break;
|
||||
|
||||
/* Handle literal characters and + repetitions */
|
||||
|
||||
case OP_CHAR:
|
||||
case OP_CHARNC:
|
||||
case OP_NOT:
|
||||
case OP_PLUS:
|
||||
case OP_MINPLUS:
|
||||
case OP_POSPLUS:
|
||||
case OP_NOTPLUS:
|
||||
case OP_NOTMINPLUS:
|
||||
case OP_NOTPOSPLUS:
|
||||
branchlength++;
|
||||
cc += 2;
|
||||
#ifdef SUPPORT_UTF8
|
||||
if (utf8 && cc[-1] >= 0xc0) cc += _pcre_utf8_table4[cc[-1] & 0x3f];
|
||||
#endif
|
||||
break;
|
||||
|
||||
case OP_TYPEPLUS:
|
||||
case OP_TYPEMINPLUS:
|
||||
case OP_TYPEPOSPLUS:
|
||||
branchlength++;
|
||||
cc += (cc[1] == OP_PROP || cc[1] == OP_NOTPROP)? 4 : 2;
|
||||
break;
|
||||
|
||||
/* Handle exact repetitions. The count is already in characters, but we
|
||||
need to skip over a multibyte character in UTF8 mode. */
|
||||
|
||||
case OP_EXACT:
|
||||
case OP_NOTEXACT:
|
||||
branchlength += GET2(cc,1);
|
||||
cc += 4;
|
||||
#ifdef SUPPORT_UTF8
|
||||
if (utf8 && cc[-1] >= 0xc0) cc += _pcre_utf8_table4[cc[-1] & 0x3f];
|
||||
#endif
|
||||
break;
|
||||
|
||||
case OP_TYPEEXACT:
|
||||
branchlength += GET2(cc,1);
|
||||
cc += (cc[3] == OP_PROP || cc[3] == OP_NOTPROP)? 6 : 4;
|
||||
break;
|
||||
|
||||
/* Handle single-char non-literal matchers */
|
||||
|
||||
case OP_PROP:
|
||||
case OP_NOTPROP:
|
||||
cc += 2;
|
||||
/* Fall through */
|
||||
|
||||
case OP_NOT_DIGIT:
|
||||
case OP_DIGIT:
|
||||
case OP_NOT_WHITESPACE:
|
||||
case OP_WHITESPACE:
|
||||
case OP_NOT_WORDCHAR:
|
||||
case OP_WORDCHAR:
|
||||
case OP_ANY:
|
||||
case OP_ALLANY:
|
||||
case OP_EXTUNI:
|
||||
case OP_HSPACE:
|
||||
case OP_NOT_HSPACE:
|
||||
case OP_VSPACE:
|
||||
case OP_NOT_VSPACE:
|
||||
branchlength++;
|
||||
cc++;
|
||||
break;
|
||||
|
||||
/* "Any newline" might match two characters */
|
||||
|
||||
case OP_ANYNL:
|
||||
branchlength += 2;
|
||||
cc++;
|
||||
break;
|
||||
|
||||
/* The single-byte matcher means we can't proceed in UTF-8 mode */
|
||||
|
||||
case OP_ANYBYTE:
|
||||
#ifdef SUPPORT_UTF8
|
||||
if (utf8) return -1;
|
||||
#endif
|
||||
branchlength++;
|
||||
cc++;
|
||||
break;
|
||||
|
||||
/* For repeated character types, we have to test for \p and \P, which have
|
||||
an extra two bytes of parameters. */
|
||||
|
||||
case OP_TYPESTAR:
|
||||
case OP_TYPEMINSTAR:
|
||||
case OP_TYPEQUERY:
|
||||
case OP_TYPEMINQUERY:
|
||||
case OP_TYPEPOSSTAR:
|
||||
case OP_TYPEPOSQUERY:
|
||||
if (cc[1] == OP_PROP || cc[1] == OP_NOTPROP) cc += 2;
|
||||
cc += _pcre_OP_lengths[op];
|
||||
break;
|
||||
|
||||
case OP_TYPEUPTO:
|
||||
case OP_TYPEMINUPTO:
|
||||
case OP_TYPEPOSUPTO:
|
||||
if (cc[3] == OP_PROP || cc[3] == OP_NOTPROP) cc += 2;
|
||||
cc += _pcre_OP_lengths[op];
|
||||
break;
|
||||
|
||||
/* Check a class for variable quantification */
|
||||
|
||||
#ifdef SUPPORT_UTF8
|
||||
case OP_XCLASS:
|
||||
cc += GET(cc, 1) - 33;
|
||||
/* Fall through */
|
||||
#endif
|
||||
|
||||
case OP_CLASS:
|
||||
case OP_NCLASS:
|
||||
cc += 33;
|
||||
|
||||
switch (*cc)
|
||||
{
|
||||
case OP_CRPLUS:
|
||||
case OP_CRMINPLUS:
|
||||
branchlength++;
|
||||
/* Fall through */
|
||||
|
||||
case OP_CRSTAR:
|
||||
case OP_CRMINSTAR:
|
||||
case OP_CRQUERY:
|
||||
case OP_CRMINQUERY:
|
||||
cc++;
|
||||
break;
|
||||
|
||||
case OP_CRRANGE:
|
||||
case OP_CRMINRANGE:
|
||||
branchlength += GET2(cc,1);
|
||||
cc += 5;
|
||||
break;
|
||||
|
||||
default:
|
||||
branchlength++;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
|
||||
/* Backreferences and subroutine calls are treated in the same way: we find
|
||||
the minimum length for the subpattern. A recursion, however, causes an
|
||||
a flag to be set that causes the length of this branch to be ignored. The
|
||||
logic is that a recursion can only make sense if there is another
|
||||
alternation that stops the recursing. That will provide the minimum length
|
||||
(when no recursion happens). A backreference within the group that it is
|
||||
referencing behaves in the same way.
|
||||
|
||||
If PCRE_JAVASCRIPT_COMPAT is set, a backreference to an unset bracket
|
||||
matches an empty string (by default it causes a matching failure), so in
|
||||
that case we must set the minimum length to zero. */
|
||||
|
||||
case OP_REF:
|
||||
if ((options & PCRE_JAVASCRIPT_COMPAT) == 0)
|
||||
{
|
||||
ce = cs = (uschar *)_pcre_find_bracket(startcode, utf8, GET2(cc, 1));
|
||||
if (cs == NULL) return -2;
|
||||
do ce += GET(ce, 1); while (*ce == OP_ALT);
|
||||
if (cc > cs && cc < ce)
|
||||
{
|
||||
d = 0;
|
||||
had_recurse = TRUE;
|
||||
}
|
||||
else d = find_minlength(cs, startcode, options);
|
||||
}
|
||||
else d = 0;
|
||||
cc += 3;
|
||||
|
||||
/* Handle repeated back references */
|
||||
|
||||
switch (*cc)
|
||||
{
|
||||
case OP_CRSTAR:
|
||||
case OP_CRMINSTAR:
|
||||
case OP_CRQUERY:
|
||||
case OP_CRMINQUERY:
|
||||
min = 0;
|
||||
cc++;
|
||||
break;
|
||||
|
||||
case OP_CRRANGE:
|
||||
case OP_CRMINRANGE:
|
||||
min = GET2(cc, 1);
|
||||
cc += 5;
|
||||
break;
|
||||
|
||||
default:
|
||||
min = 1;
|
||||
break;
|
||||
}
|
||||
|
||||
branchlength += min * d;
|
||||
break;
|
||||
|
||||
case OP_RECURSE:
|
||||
cs = ce = (uschar *)startcode + GET(cc, 1);
|
||||
if (cs == NULL) return -2;
|
||||
do ce += GET(ce, 1); while (*ce == OP_ALT);
|
||||
if (cc > cs && cc < ce)
|
||||
had_recurse = TRUE;
|
||||
else
|
||||
branchlength += find_minlength(cs, startcode, options);
|
||||
cc += 1 + LINK_SIZE;
|
||||
break;
|
||||
|
||||
/* Anything else does not or need not match a character. We can get the
|
||||
item's length from the table, but for those that can match zero occurrences
|
||||
of a character, we must take special action for UTF-8 characters. */
|
||||
|
||||
case OP_UPTO:
|
||||
case OP_NOTUPTO:
|
||||
case OP_MINUPTO:
|
||||
case OP_NOTMINUPTO:
|
||||
case OP_POSUPTO:
|
||||
case OP_STAR:
|
||||
case OP_MINSTAR:
|
||||
case OP_NOTMINSTAR:
|
||||
case OP_POSSTAR:
|
||||
case OP_NOTPOSSTAR:
|
||||
case OP_QUERY:
|
||||
case OP_MINQUERY:
|
||||
case OP_NOTMINQUERY:
|
||||
case OP_POSQUERY:
|
||||
case OP_NOTPOSQUERY:
|
||||
cc += _pcre_OP_lengths[op];
|
||||
#ifdef SUPPORT_UTF8
|
||||
if (utf8 && cc[-1] >= 0xc0) cc += _pcre_utf8_table4[cc[-1] & 0x3f];
|
||||
#endif
|
||||
break;
|
||||
|
||||
/* For the record, these are the opcodes that are matched by "default":
|
||||
OP_ACCEPT, OP_CLOSE, OP_COMMIT, OP_FAIL, OP_PRUNE, OP_SET_SOM, OP_SKIP,
|
||||
OP_THEN. */
|
||||
|
||||
default:
|
||||
cc += _pcre_OP_lengths[op];
|
||||
break;
|
||||
}
|
||||
}
|
||||
/* Control never gets here */
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Set a bit and maybe its alternate case *
|
||||
*************************************************/
|
||||
@@ -500,13 +858,15 @@ Arguments:
|
||||
set NULL unless error
|
||||
|
||||
Returns: pointer to a pcre_extra block, with study_data filled in and the
|
||||
appropriate flag set;
|
||||
appropriate flags set;
|
||||
NULL on error or if no optimization possible
|
||||
*/
|
||||
|
||||
PCRE_EXP_DEFN pcre_extra * PCRE_CALL_CONVENTION
|
||||
pcre_study(const pcre *external_re, int options, const char **errorptr)
|
||||
{
|
||||
int min;
|
||||
BOOL bits_set = FALSE;
|
||||
uschar start_bits[32];
|
||||
pcre_extra *extra;
|
||||
pcre_study_data *study;
|
||||
@@ -533,30 +893,39 @@ code = (uschar *)re + re->name_table_offset +
|
||||
(re->name_count * re->name_entry_size);
|
||||
|
||||
/* For an anchored pattern, or an unanchored pattern that has a first char, or
|
||||
a multiline pattern that matches only at "line starts", no further processing
|
||||
at present. */
|
||||
a multiline pattern that matches only at "line starts", there is no point in
|
||||
seeking a list of starting bytes. */
|
||||
|
||||
if ((re->options & PCRE_ANCHORED) != 0 ||
|
||||
(re->flags & (PCRE_FIRSTSET|PCRE_STARTLINE)) != 0)
|
||||
return NULL;
|
||||
if ((re->options & PCRE_ANCHORED) == 0 &&
|
||||
(re->flags & (PCRE_FIRSTSET|PCRE_STARTLINE)) == 0)
|
||||
{
|
||||
/* Set the character tables in the block that is passed around */
|
||||
|
||||
/* Set the character tables in the block that is passed around */
|
||||
tables = re->tables;
|
||||
if (tables == NULL)
|
||||
(void)pcre_fullinfo(external_re, NULL, PCRE_INFO_DEFAULT_TABLES,
|
||||
(void *)(&tables));
|
||||
|
||||
tables = re->tables;
|
||||
if (tables == NULL)
|
||||
(void)pcre_fullinfo(external_re, NULL, PCRE_INFO_DEFAULT_TABLES,
|
||||
(void *)(&tables));
|
||||
compile_block.lcc = tables + lcc_offset;
|
||||
compile_block.fcc = tables + fcc_offset;
|
||||
compile_block.cbits = tables + cbits_offset;
|
||||
compile_block.ctypes = tables + ctypes_offset;
|
||||
|
||||
compile_block.lcc = tables + lcc_offset;
|
||||
compile_block.fcc = tables + fcc_offset;
|
||||
compile_block.cbits = tables + cbits_offset;
|
||||
compile_block.ctypes = tables + ctypes_offset;
|
||||
/* See if we can find a fixed set of initial characters for the pattern. */
|
||||
|
||||
/* See if we can find a fixed set of initial characters for the pattern. */
|
||||
memset(start_bits, 0, 32 * sizeof(uschar));
|
||||
bits_set = set_start_bits(code, start_bits,
|
||||
(re->options & PCRE_CASELESS) != 0, (re->options & PCRE_UTF8) != 0,
|
||||
&compile_block) == SSB_DONE;
|
||||
}
|
||||
|
||||
memset(start_bits, 0, 32 * sizeof(uschar));
|
||||
if (set_start_bits(code, start_bits, (re->options & PCRE_CASELESS) != 0,
|
||||
(re->options & PCRE_UTF8) != 0, &compile_block) != SSB_DONE) return NULL;
|
||||
/* Find the minimum length of subject string. */
|
||||
|
||||
min = find_minlength(code, code, re->options);
|
||||
|
||||
/* Return NULL if no optimization is possible. */
|
||||
|
||||
if (!bits_set && min < 0) return NULL;
|
||||
|
||||
/* Get a pcre_extra block and a pcre_study_data block. The study data is put in
|
||||
the latter, which is pointed to by the former, which may also get additional
|
||||
@@ -579,8 +948,19 @@ extra->flags = PCRE_EXTRA_STUDY_DATA;
|
||||
extra->study_data = study;
|
||||
|
||||
study->size = sizeof(pcre_study_data);
|
||||
study->options = PCRE_STUDY_MAPPED;
|
||||
memcpy(study->start_bits, start_bits, sizeof(start_bits));
|
||||
study->flags = 0;
|
||||
|
||||
if (bits_set)
|
||||
{
|
||||
study->flags |= PCRE_STUDY_MAPPED;
|
||||
memcpy(study->start_bits, start_bits, sizeof(start_bits));
|
||||
}
|
||||
|
||||
if (min >= 0)
|
||||
{
|
||||
study->flags |= PCRE_STUDY_MINLEN;
|
||||
study->minlength = min;
|
||||
}
|
||||
|
||||
return extra;
|
||||
}
|
||||
|
||||
6
harbour/external/pcre/pcretryf.c
vendored
6
harbour/external/pcre/pcretryf.c
vendored
@@ -6,7 +6,7 @@
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Copyright (c) 1997-2008 University of Cambridge
|
||||
Copyright (c) 1997-2009 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
@@ -128,7 +128,9 @@ if (study != NULL)
|
||||
{
|
||||
*internal_study = *study; /* To copy other fields */
|
||||
internal_study->size = byteflip(study->size, sizeof(study->size));
|
||||
internal_study->options = byteflip(study->options, sizeof(study->options));
|
||||
internal_study->flags = byteflip(study->flags, sizeof(study->flags));
|
||||
internal_study->minlength = byteflip(study->minlength,
|
||||
sizeof(study->minlength));
|
||||
}
|
||||
|
||||
return internal_re;
|
||||
|
||||
18
harbour/external/pcre/pcreucd.c
vendored
18
harbour/external/pcre/pcreucd.c
vendored
@@ -1,11 +1,28 @@
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#include "pcreinal.h"
|
||||
|
||||
/* Unicode character database. */
|
||||
/* This file was autogenerated by the MultiStage2.py script. */
|
||||
/* Total size: 52808 bytes, block size: 128. */
|
||||
|
||||
/* The tables herein are needed only when UCP support is built */
|
||||
/* into PCRE. This module should not be referenced otherwise, so */
|
||||
/* it should not matter whether it is compiled or not. However */
|
||||
/* a comment was received about space saving - maybe the guy linked */
|
||||
/* all the modules rather than using a library - so we include a */
|
||||
/* condition to cut out the tables when not needed. But don't leave */
|
||||
/* a totally empty module because some compilers barf at that. */
|
||||
/* Instead, just supply small dummy tables. */
|
||||
|
||||
#ifndef SUPPORT_UCP
|
||||
const ucd_record _pcre_ucd_records[] = {{0,0,0 }};
|
||||
const uschar _pcre_ucd_stage1[] = {0};
|
||||
const pcre_uint16 _pcre_ucd_stage2[] = {0};
|
||||
#else
|
||||
|
||||
/* When recompiling tables with a new Unicode version,
|
||||
please check types in the structure definition from pcre_internal.h:
|
||||
typedef struct {
|
||||
@@ -2608,3 +2625,4 @@ const pcre_uint16 _pcre_ucd_stage2[] = { /* 40448 bytes, block = 128 */
|
||||
#if UCD_BLOCK_SIZE != 128
|
||||
#error Please correct UCD_BLOCK_SIZE in pcre_internal.h
|
||||
#endif
|
||||
#endif /* SUPPORT_UCP */
|
||||
|
||||
Reference in New Issue
Block a user