From 1d483175f5098aa756cc4e5ba725ca64ab9ccf5e Mon Sep 17 00:00:00 2001 From: Viktor Szakats Date: Wed, 8 Feb 2012 00:58:21 +0000 Subject: [PATCH] 2012-02-08 01:51 UTC+0100 Viktor Szakats (harbour syenar.net) * src/3rd/pcre/pcre.dif * deleted patch applied upstream * src/3rd/pcre/Makefile - src/3rd/pcre/pcreinfo.c - src/3rd/pcre/pcretryf.c - src/3rd/pcre/pcreprni.h + src/3rd/pcre/pcreprni.c * src/3rd/pcre/* * 8.21 -> 8.30 (using hb3rdpat) ; supports 16-bit char API flavour (pcre16_*()). not enabled in Harbour ATM. --- harbour/ChangeLog | 14 + harbour/src/3rd/pcre/LICENCE | 8 +- harbour/src/3rd/pcre/Makefile | 8 +- harbour/src/3rd/pcre/chartabs.c | 2 +- harbour/src/3rd/pcre/config.h | 26 +- harbour/src/3rd/pcre/pcre.dif | 74 +- harbour/src/3rd/pcre/pcre.h | 203 +- harbour/src/3rd/pcre/pcrecomp.c | 1988 ++++++++++------- harbour/src/3rd/pcre/pcreconf.c | 40 +- harbour/src/3rd/pcre/pcredfa.c | 519 +++-- harbour/src/3rd/pcre/pcreexec.c | 1056 +++++---- harbour/src/3rd/pcre/pcrefinf.c | 47 +- harbour/src/3rd/pcre/pcreget.c | 196 +- harbour/src/3rd/pcre/pcreglob.c | 28 +- harbour/src/3rd/pcre/pcreinal.h | 777 +++++-- harbour/src/3rd/pcre/pcreinfo.c | 93 - harbour/src/3rd/pcre/pcrejitc.c | 1380 +++++++----- harbour/src/3rd/pcre/pcremktb.c | 13 +- harbour/src/3rd/pcre/pcrenewl.c | 50 +- harbour/src/3rd/pcre/pcreoutf.c | 30 +- .../src/3rd/pcre/{pcreprni.h => pcreprni.c} | 252 ++- harbour/src/3rd/pcre/pcrerefc.c | 11 +- harbour/src/3rd/pcre/pcrestud.c | 351 ++- harbour/src/3rd/pcre/pcretabs.c | 50 +- harbour/src/3rd/pcre/pcretryf.c | 139 -- harbour/src/3rd/pcre/pcreucd.c | 16 +- harbour/src/3rd/pcre/pcrever.c | 7 +- harbour/src/3rd/pcre/pcrevutf.c | 12 +- harbour/src/3rd/pcre/pcrexcls.c | 54 +- harbour/src/3rd/pcre/sjarmth2.c | 154 +- harbour/src/3rd/pcre/sjarmv5.c | 138 +- harbour/src/3rd/pcre/sjconf.h | 2 +- harbour/src/3rd/pcre/sjconfi.h | 33 +- harbour/src/3rd/pcre/sjexeca.c | 5 +- harbour/src/3rd/pcre/sjlir.c | 254 ++- harbour/src/3rd/pcre/sjlir.h | 277 ++- harbour/src/3rd/pcre/sjmips32.c | 2 +- harbour/src/3rd/pcre/sjmipsc.c | 197 +- harbour/src/3rd/pcre/sjppc32.c | 34 +- harbour/src/3rd/pcre/sjppc64.c | 37 +- harbour/src/3rd/pcre/sjppcc.c | 265 ++- harbour/src/3rd/pcre/sjutils.c | 2 +- harbour/src/3rd/pcre/sjx8632.c | 92 +- harbour/src/3rd/pcre/sjx8664.c | 169 +- harbour/src/3rd/pcre/sjx86c.c | 255 ++- 45 files changed, 5928 insertions(+), 3432 deletions(-) delete mode 100644 harbour/src/3rd/pcre/pcreinfo.c rename harbour/src/3rd/pcre/{pcreprni.h => pcreprni.c} (69%) delete mode 100644 harbour/src/3rd/pcre/pcretryf.c diff --git a/harbour/ChangeLog b/harbour/ChangeLog index 833dbfa241..4b97417b6b 100644 --- a/harbour/ChangeLog +++ b/harbour/ChangeLog @@ -16,6 +16,20 @@ The license applies to all entries newer than 2009-04-28. */ +2012-02-08 01:51 UTC+0100 Viktor Szakats (harbour syenar.net) + * src/3rd/pcre/pcre.dif + * deleted patch applied upstream + + * src/3rd/pcre/Makefile + - src/3rd/pcre/pcreinfo.c + - src/3rd/pcre/pcretryf.c + - src/3rd/pcre/pcreprni.h + + src/3rd/pcre/pcreprni.c + * src/3rd/pcre/* + * 8.21 -> 8.30 (using hb3rdpat) + ; supports 16-bit char API flavour (pcre16_*()). + not enabled in Harbour ATM. + 2012-02-08 01:25 UTC+0100 Viktor Szakats (harbour syenar.net) * src/3rd/pcre/Makefile + added translation for JIT files from PCRE package diff --git a/harbour/src/3rd/pcre/LICENCE b/harbour/src/3rd/pcre/LICENCE index ae7bbcf8cf..5ce31a828d 100644 --- a/harbour/src/3rd/pcre/LICENCE +++ b/harbour/src/3rd/pcre/LICENCE @@ -24,7 +24,7 @@ Email domain: cam.ac.uk University of Cambridge Computing Service, Cambridge, England. -Copyright (c) 1997-2011 University of Cambridge +Copyright (c) 1997-2012 University of Cambridge All rights reserved. @@ -35,7 +35,7 @@ Written by: Zoltan Herczeg Email local part: hzmester Emain domain: freemail.hu -Copyright(c) 2010-2011 Zoltan Herczeg +Copyright(c) 2010-2012 Zoltan Herczeg All rights reserved. @@ -46,7 +46,7 @@ Written by: Zoltan Herczeg Email local part: hzmester Emain domain: freemail.hu -Copyright(c) 2009-2011 Zoltan Herczeg +Copyright(c) 2009-2012 Zoltan Herczeg All rights reserved. @@ -55,7 +55,7 @@ THE C++ WRAPPER FUNCTIONS Contributed by: Google Inc. -Copyright (c) 2007-2011, Google Inc. +Copyright (c) 2007-2012, Google Inc. All rights reserved. diff --git a/harbour/src/3rd/pcre/Makefile b/harbour/src/3rd/pcre/Makefile index 6845c3c5ec..c08dc37f6f 100644 --- a/harbour/src/3rd/pcre/Makefile +++ b/harbour/src/3rd/pcre/Makefile @@ -81,8 +81,8 @@ else endif # ORIGIN http://www.pcre.org/ -# VER 8.21 -# URL ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre/pcre-8.21.tar.gz +# VER 8.30 +# URL ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre/pcre-8.30.tar.gz # DIFF pcre.dif # # MAP LICENCE @@ -98,16 +98,14 @@ endif # MAP pcre_fullinfo.c pcrefinf.c # MAP pcre_get.c pcreget.c # MAP pcre_globals.c pcreglob.c -# MAP pcre_info.c pcreinfo.c # MAP pcre_jit_compile.c pcrejitc.c # MAP pcre_maketables.c pcremktb.c # MAP pcre_newline.c pcrenewl.c # MAP pcre_ord2utf8.c pcreoutf.c -# MAP pcre_printint.src pcreprni.h +# MAP pcre_printint.c pcreprni.c # MAP pcre_refcount.c pcrerefc.c # MAP pcre_study.c pcrestud.c # MAP pcre_tables.c pcretabs.c -# MAP pcre_try_flipped.c pcretryf.c # MAP pcre_ucd.c pcreucd.c # MAP pcre_valid_utf8.c pcrevutf.c # MAP pcre_version.c pcrever.c diff --git a/harbour/src/3rd/pcre/chartabs.c b/harbour/src/3rd/pcre/chartabs.c index 1d037e0d69..57a6c311a6 100644 --- a/harbour/src/3rd/pcre/chartabs.c +++ b/harbour/src/3rd/pcre/chartabs.c @@ -26,7 +26,7 @@ unit might reference this" and so it will always be supplied to the linker. */ #include "pcreinal.h" -const unsigned char _pcre_default_tables[] = { +const pcre_uint8 PRIV(default_tables)[] = { /* This table is a lower casing table. */ diff --git a/harbour/src/3rd/pcre/config.h b/harbour/src/3rd/pcre/config.h index 7282d1e7cc..df48a8ed51 100644 --- a/harbour/src/3rd/pcre/config.h +++ b/harbour/src/3rd/pcre/config.h @@ -31,8 +31,8 @@ them both to 0; an emulation function will be used. */ character codes, define this macro as 1. On systems that can use "configure", this can be done via --enable-ebcdic. PCRE will then assume that all input strings are in EBCDIC. If you do not define this macro, PCRE - will assume input strings are ASCII or UTF-8 Unicode. It is not possible to - build a version of PCRE that supports both EBCDIC and UTF-8. */ + will assume input strings are ASCII or UTF-8/16 Unicode. It is not possible + to build a version of PCRE that supports both EBCDIC and UTF-8/16. */ /* #undef EBCDIC */ /* Define to 1 if you have the `bcopy' function. */ @@ -250,7 +250,7 @@ them both to 0; an emulation function will be used. */ #define PACKAGE_NAME "PCRE" /* Define to the full name and version of this package. */ -#define PACKAGE_STRING "PCRE 8.21" +#define PACKAGE_STRING "PCRE 8.30" /* Define to the one symbol short name of this package. */ #define PACKAGE_TARNAME "pcre" @@ -259,7 +259,7 @@ them both to 0; an emulation function will be used. */ #define PACKAGE_URL "" /* Define to the version of this package. */ -#define PACKAGE_VERSION "8.21" +#define PACKAGE_VERSION "8.30" /* The value of PCREGREP_BUFSIZE determines the size of buffer used by pcregrep to hold parts of the file it is searching. On systems that support @@ -317,21 +317,29 @@ them both to 0; an emulation function will be used. */ handle .gz files. */ /* #undef SUPPORT_LIBZ */ +/* Define to enable the 16 bit PCRE library. */ +/* #undef SUPPORT_PCRE16 */ + +/* Define to enable the 8 bit PCRE library. */ +#ifndef SUPPORT_PCRE8 +#define SUPPORT_PCRE8 /**/ +#endif + /* Define to enable JIT support in pcregrep. */ /* #undef SUPPORT_PCREGREP_JIT */ /* Define to enable support for Unicode properties. */ /* #undef SUPPORT_UCP */ -/* Define to enable support for the UTF-8 Unicode encoding. This will work +/* Define to enable support for the UTF-8/16 Unicode encoding. This will work even in an EBCDIC environment, but it is incompatible with the EBCDIC - macro. That is, PCRE can support *either* EBCDIC code *or* ASCII/UTF-8, but - not both at once. */ -/* #undef SUPPORT_UTF8 */ + macro. That is, PCRE can support *either* EBCDIC code *or* ASCII/UTF-8/16, + but not both at once. */ +/* #undef SUPPORT_UTF */ /* Version number of package */ #ifndef VERSION -#define VERSION "8.21" +#define VERSION "8.30" #endif /* Define to empty if `const' does not conform to ANSI C. */ diff --git a/harbour/src/3rd/pcre/pcre.dif b/harbour/src/3rd/pcre/pcre.dif index dbd6105697..e2e997314f 100644 --- a/harbour/src/3rd/pcre/pcre.dif +++ b/harbour/src/3rd/pcre/pcre.dif @@ -1,39 +1,41 @@ -diff -urN pcre.orig\pcrefinf.c pcre\pcrefinf.c ---- pcre.orig\pcrefinf.c Wed Feb 08 01:29:30 2012 -+++ pcre\pcrefinf.c Wed Feb 08 01:29:30 2012 -@@ -139,7 +139,7 @@ - case PCRE_INFO_MINLENGTH: - *((int *)where) = - (study != NULL && (study->flags & PCRE_STUDY_MINLEN) != 0)? -- study->minlength : -1; -+ (int)study->minlength : -1; - break; - - case PCRE_INFO_JIT: -diff -urN pcre.orig\pcreglob.c pcre\pcreglob.c ---- pcre.orig\pcreglob.c Wed Feb 08 01:29:30 2012 -+++ pcre\pcreglob.c Wed Feb 08 01:29:30 2012 -@@ -74,11 +74,17 @@ - PCRE_EXP_DATA_DEFN int (*pcre_callout)(pcre_callout_block *) = NULL; - - #elif !defined VPCOMPAT -+#if defined( __cplusplus ) && !defined( __IBMCPP__ ) -+extern "C" { -+#endif - PCRE_EXP_DATA_DEFN void *(*pcre_malloc)(size_t) = malloc; - PCRE_EXP_DATA_DEFN void (*pcre_free)(void *) = free; - PCRE_EXP_DATA_DEFN void *(*pcre_stack_malloc)(size_t) = malloc; - PCRE_EXP_DATA_DEFN void (*pcre_stack_free)(void *) = free; - PCRE_EXP_DATA_DEFN int (*pcre_callout)(pcre_callout_block *) = NULL; -+#if defined( __cplusplus ) && !defined( __IBMCPP__ ) -+} -+#endif - #endif - - /* End of pcre_globals.c */ +diff -urN pcre.orig\pcreglob.c.rej pcre\pcreglob.c.rej +--- pcre.orig\pcreglob.c.rej Thu Jan 01 01:00:00 1970 ++++ pcre\pcreglob.c.rej Wed Feb 08 01:46:37 2012 +@@ -0,0 +1,31 @@ ++*************** ++*** 74,84 **** ++ PCRE_EXP_DATA_DEFN int (*pcre_callout)(pcre_callout_block *) = NULL; ++ ++ #elif !defined VPCOMPAT ++ PCRE_EXP_DATA_DEFN void *(*pcre_malloc)(size_t) = malloc; ++ PCRE_EXP_DATA_DEFN void (*pcre_free)(void *) = free; ++ PCRE_EXP_DATA_DEFN void *(*pcre_stack_malloc)(size_t) = malloc; ++ PCRE_EXP_DATA_DEFN void (*pcre_stack_free)(void *) = free; ++ PCRE_EXP_DATA_DEFN int (*pcre_callout)(pcre_callout_block *) = NULL; ++ #endif ++ ++ /* End of pcre_globals.c */ ++--- 74,90 ---- ++ PCRE_EXP_DATA_DEFN int (*pcre_callout)(pcre_callout_block *) = NULL; ++ ++ #elif !defined VPCOMPAT +++ #if defined( __cplusplus ) && !defined( __IBMCPP__ ) +++ extern "C" { +++ #endif ++ PCRE_EXP_DATA_DEFN void *(*pcre_malloc)(size_t) = malloc; ++ PCRE_EXP_DATA_DEFN void (*pcre_free)(void *) = free; ++ PCRE_EXP_DATA_DEFN void *(*pcre_stack_malloc)(size_t) = malloc; ++ PCRE_EXP_DATA_DEFN void (*pcre_stack_free)(void *) = free; ++ PCRE_EXP_DATA_DEFN int (*pcre_callout)(pcre_callout_block *) = NULL; +++ #if defined( __cplusplus ) && !defined( __IBMCPP__ ) +++ } +++ #endif ++ #endif ++ ++ /* End of pcre_globals.c */ diff -urN pcre.orig\pcrejitc.c pcre\pcrejitc.c ---- pcre.orig\pcrejitc.c Wed Feb 08 01:29:30 2012 -+++ pcre\pcrejitc.c Wed Feb 08 01:29:30 2012 +--- pcre.orig\pcrejitc.c Wed Feb 08 01:46:37 2012 ++++ pcre\pcrejitc.c Wed Feb 08 01:46:37 2012 @@ -59,7 +59,7 @@ #define SLJIT_VERBOSE 0 #define SLJIT_DEBUG 0 @@ -42,4 +44,4 @@ diff -urN pcre.orig\pcrejitc.c pcre\pcrejitc.c +#include "sjlir.c" #if defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED - #error "Unsupported architecture" + #error Unsupported architecture diff --git a/harbour/src/3rd/pcre/pcre.h b/harbour/src/3rd/pcre/pcre.h index 58ea327e9b..712bd3d714 100644 --- a/harbour/src/3rd/pcre/pcre.h +++ b/harbour/src/3rd/pcre/pcre.h @@ -5,7 +5,7 @@ /* This is the public header file for the PCRE library, to be #included by applications that call the PCRE functions. - Copyright (c) 1997-2011 University of Cambridge + Copyright (c) 1997-2012 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without @@ -42,9 +42,9 @@ POSSIBILITY OF SUCH DAMAGE. /* The current PCRE version information. */ #define PCRE_MAJOR 8 -#define PCRE_MINOR 21 +#define PCRE_MINOR 30 #define PCRE_PRERELEASE -#define PCRE_DATE 2011-12-12 +#define PCRE_DATE 2012-02-04 /* When an application links to a PCRE DLL in Windows, the symbols that are imported have to be identified as such. When building PCRE, the appropriate @@ -116,9 +116,13 @@ compiling). */ #define PCRE_NOTEOL 0x00000100 /* Exec, DFA exec */ #define PCRE_UNGREEDY 0x00000200 /* Compile */ #define PCRE_NOTEMPTY 0x00000400 /* Exec, DFA exec */ -#define PCRE_UTF8 0x00000800 /* Compile, used in exec, DFA exec */ +/* The next two are also used in exec and DFA exec */ +#define PCRE_UTF8 0x00000800 /* Compile (same as PCRE_UTF16) */ +#define PCRE_UTF16 0x00000800 /* Compile (same as PCRE_UTF8) */ #define PCRE_NO_AUTO_CAPTURE 0x00001000 /* Compile */ -#define PCRE_NO_UTF8_CHECK 0x00002000 /* Compile, exec, DFA exec */ +/* The next two are also used in exec and DFA exec */ +#define PCRE_NO_UTF8_CHECK 0x00002000 /* Compile (same as PCRE_NO_UTF16_CHECK) */ +#define PCRE_NO_UTF16_CHECK 0x00002000 /* Compile (same as PCRE_NO_UTF8_CHECK) */ #define PCRE_AUTO_CALLOUT 0x00004000 /* Compile */ #define PCRE_PARTIAL_SOFT 0x00008000 /* Exec, DFA exec */ #define PCRE_PARTIAL 0x00008000 /* Backwards compatible synonym */ @@ -142,34 +146,39 @@ compiling). */ /* Exec-time and get/set-time error codes */ -#define PCRE_ERROR_NOMATCH (-1) -#define PCRE_ERROR_NULL (-2) -#define PCRE_ERROR_BADOPTION (-3) -#define PCRE_ERROR_BADMAGIC (-4) -#define PCRE_ERROR_UNKNOWN_OPCODE (-5) -#define PCRE_ERROR_UNKNOWN_NODE (-5) /* For backward compatibility */ -#define PCRE_ERROR_NOMEMORY (-6) -#define PCRE_ERROR_NOSUBSTRING (-7) -#define PCRE_ERROR_MATCHLIMIT (-8) -#define PCRE_ERROR_CALLOUT (-9) /* Never used by PCRE itself */ -#define PCRE_ERROR_BADUTF8 (-10) -#define PCRE_ERROR_BADUTF8_OFFSET (-11) -#define PCRE_ERROR_PARTIAL (-12) -#define PCRE_ERROR_BADPARTIAL (-13) -#define PCRE_ERROR_INTERNAL (-14) -#define PCRE_ERROR_BADCOUNT (-15) -#define PCRE_ERROR_DFA_UITEM (-16) -#define PCRE_ERROR_DFA_UCOND (-17) -#define PCRE_ERROR_DFA_UMLIMIT (-18) -#define PCRE_ERROR_DFA_WSSIZE (-19) -#define PCRE_ERROR_DFA_RECURSE (-20) -#define PCRE_ERROR_RECURSIONLIMIT (-21) -#define PCRE_ERROR_NULLWSLIMIT (-22) /* No longer actually used */ -#define PCRE_ERROR_BADNEWLINE (-23) -#define PCRE_ERROR_BADOFFSET (-24) -#define PCRE_ERROR_SHORTUTF8 (-25) -#define PCRE_ERROR_RECURSELOOP (-26) -#define PCRE_ERROR_JIT_STACKLIMIT (-27) +#define PCRE_ERROR_NOMATCH (-1) +#define PCRE_ERROR_NULL (-2) +#define PCRE_ERROR_BADOPTION (-3) +#define PCRE_ERROR_BADMAGIC (-4) +#define PCRE_ERROR_UNKNOWN_OPCODE (-5) +#define PCRE_ERROR_UNKNOWN_NODE (-5) /* For backward compatibility */ +#define PCRE_ERROR_NOMEMORY (-6) +#define PCRE_ERROR_NOSUBSTRING (-7) +#define PCRE_ERROR_MATCHLIMIT (-8) +#define PCRE_ERROR_CALLOUT (-9) /* Never used by PCRE itself */ +#define PCRE_ERROR_BADUTF8 (-10) /* Same for 8/16 */ +#define PCRE_ERROR_BADUTF16 (-10) /* Same for 8/16 */ +#define PCRE_ERROR_BADUTF8_OFFSET (-11) /* Same for 8/16 */ +#define PCRE_ERROR_BADUTF16_OFFSET (-11) /* Same for 8/16 */ +#define PCRE_ERROR_PARTIAL (-12) +#define PCRE_ERROR_BADPARTIAL (-13) +#define PCRE_ERROR_INTERNAL (-14) +#define PCRE_ERROR_BADCOUNT (-15) +#define PCRE_ERROR_DFA_UITEM (-16) +#define PCRE_ERROR_DFA_UCOND (-17) +#define PCRE_ERROR_DFA_UMLIMIT (-18) +#define PCRE_ERROR_DFA_WSSIZE (-19) +#define PCRE_ERROR_DFA_RECURSE (-20) +#define PCRE_ERROR_RECURSIONLIMIT (-21) +#define PCRE_ERROR_NULLWSLIMIT (-22) /* No longer actually used */ +#define PCRE_ERROR_BADNEWLINE (-23) +#define PCRE_ERROR_BADOFFSET (-24) +#define PCRE_ERROR_SHORTUTF8 (-25) +#define PCRE_ERROR_SHORTUTF16 (-25) /* Same for 8/16 */ +#define PCRE_ERROR_RECURSELOOP (-26) +#define PCRE_ERROR_JIT_STACKLIMIT (-27) +#define PCRE_ERROR_BADMODE (-28) +#define PCRE_ERROR_BADENDIANNESS (-29) /* Specific error codes for UTF-8 validity checks */ @@ -196,6 +205,14 @@ compiling). */ #define PCRE_UTF8_ERR20 20 #define PCRE_UTF8_ERR21 21 +/* Specific error codes for UTF-16 validity checks */ + +#define PCRE_UTF16_ERR0 0 +#define PCRE_UTF16_ERR1 1 +#define PCRE_UTF16_ERR2 2 +#define PCRE_UTF16_ERR3 3 +#define PCRE_UTF16_ERR4 4 + /* Request types for pcre_fullinfo() */ #define PCRE_INFO_OPTIONS 0 @@ -231,13 +248,15 @@ compatible. */ #define PCRE_CONFIG_MATCH_LIMIT_RECURSION 7 #define PCRE_CONFIG_BSR 8 #define PCRE_CONFIG_JIT 9 +#define PCRE_CONFIG_UTF16 10 +#define PCRE_CONFIG_JITTARGET 11 /* Request types for pcre_study(). Do not re-arrange, in order to remain compatible. */ #define PCRE_STUDY_JIT_COMPILE 0x0001 -/* Bit flags for the pcre_extra structure. Do not re-arrange or redefine +/* Bit flags for the pcre[16]_extra structure. Do not re-arrange or redefine these bits, just add new ones on the end, in order to remain compatible. */ #define PCRE_EXTRA_STUDY_DATA 0x0001 @@ -253,9 +272,26 @@ these bits, just add new ones on the end, in order to remain compatible. */ struct real_pcre; /* declaration; the definition is private */ typedef struct real_pcre pcre; +struct real_pcre16; /* declaration; the definition is private */ +typedef struct real_pcre16 pcre16; + struct real_pcre_jit_stack; /* declaration; the definition is private */ typedef struct real_pcre_jit_stack pcre_jit_stack; +struct real_pcre16_jit_stack; /* declaration; the definition is private */ +typedef struct real_pcre16_jit_stack pcre16_jit_stack; + +/* If PCRE is compiled with 16 bit character support, PCRE_UCHAR16 must contain +a 16 bit wide signed data type. Otherwise it can be a dummy data type since +pcre16 functions are not implemented. There is a check for this in pcre_internal.h. */ +#ifndef PCRE_UCHAR16 +#define PCRE_UCHAR16 unsigned short +#endif + +#ifndef PCRE_SPTR16 +#define PCRE_SPTR16 const PCRE_UCHAR16 * +#endif + /* When PCRE is compiled as a C++ library, the subject pointer type can be replaced with a custom type. For conventional use, the public interface is a const char *. */ @@ -279,6 +315,19 @@ typedef struct pcre_extra { void *executable_jit; /* Contains a pointer to a compiled jit code */ } pcre_extra; +/* Same structure as above, but with 16 bit char pointers. */ + +typedef struct pcre16_extra { + unsigned long int flags; /* Bits for which fields are set */ + void *study_data; /* Opaque data from pcre_study() */ + unsigned long int match_limit; /* Maximum number of calls to match() */ + void *callout_data; /* Data passed back in callouts */ + const unsigned char *tables; /* Pointer to character tables */ + unsigned long int match_limit_recursion; /* Max recursive calls to match() */ + PCRE_UCHAR16 **mark; /* For passing back a mark pointer */ + void *executable_jit; /* Contains a pointer to a compiled jit code */ +} pcre16_extra; + /* The structure for passing out data via the pcre_callout_function. We use a structure so that new fields can be added on the end in future versions, without changing the API of the function, thereby allowing old clients to work @@ -304,6 +353,28 @@ typedef struct pcre_callout_block { /* ------------------------------------------------------------------ */ } pcre_callout_block; +/* Same structure as above, but with 16 bit char pointers. */ + +typedef struct pcre16_callout_block { + int version; /* Identifies version of block */ + /* ------------------------ Version 0 ------------------------------- */ + int callout_number; /* Number compiled into pattern */ + int *offset_vector; /* The offset vector */ + PCRE_SPTR16 subject; /* The subject being matched */ + int subject_length; /* The length of the subject */ + int start_match; /* Offset to start of this match attempt */ + int current_position; /* Where we currently are in the subject */ + int capture_top; /* Max current capture */ + int capture_last; /* Most recently closed capture */ + void *callout_data; /* Data passed in with the call */ + /* ------------------- Added for Version 1 -------------------------- */ + int pattern_position; /* Offset to next item in the pattern */ + int next_item_length; /* Length of next item in the pattern */ + /* ------------------- Added for Version 2 -------------------------- */ + const PCRE_UCHAR16 *mark; /* Pointer to current mark or NULL */ + /* ------------------------------------------------------------------ */ +} pcre16_callout_block; + /* Indirection for store get and free functions. These can be set to alternative malloc/free functions if required. Special ones are used in the non-recursive case for "frames". There is also an optional callout function @@ -316,58 +387,114 @@ PCRE_EXP_DECL void (*pcre_free)(void *); PCRE_EXP_DECL void *(*pcre_stack_malloc)(size_t); PCRE_EXP_DECL void (*pcre_stack_free)(void *); PCRE_EXP_DECL int (*pcre_callout)(pcre_callout_block *); + +PCRE_EXP_DECL void *(*pcre16_malloc)(size_t); +PCRE_EXP_DECL void (*pcre16_free)(void *); +PCRE_EXP_DECL void *(*pcre16_stack_malloc)(size_t); +PCRE_EXP_DECL void (*pcre16_stack_free)(void *); +PCRE_EXP_DECL int (*pcre16_callout)(pcre16_callout_block *); #else /* VPCOMPAT */ PCRE_EXP_DECL void *pcre_malloc(size_t); PCRE_EXP_DECL void pcre_free(void *); PCRE_EXP_DECL void *pcre_stack_malloc(size_t); PCRE_EXP_DECL void pcre_stack_free(void *); PCRE_EXP_DECL int pcre_callout(pcre_callout_block *); + +PCRE_EXP_DECL void *pcre16_malloc(size_t); +PCRE_EXP_DECL void pcre16_free(void *); +PCRE_EXP_DECL void *pcre16_stack_malloc(size_t); +PCRE_EXP_DECL void pcre16_stack_free(void *); +PCRE_EXP_DECL int pcre16_callout(pcre16_callout_block *); #endif /* VPCOMPAT */ /* User defined callback which provides a stack just before the match starts. */ typedef pcre_jit_stack *(*pcre_jit_callback)(void *); +typedef pcre16_jit_stack *(*pcre16_jit_callback)(void *); /* Exported PCRE functions */ PCRE_EXP_DECL pcre *pcre_compile(const char *, int, const char **, int *, const unsigned char *); +PCRE_EXP_DECL pcre16 *pcre16_compile(PCRE_SPTR16, int, const char **, int *, + const unsigned char *); PCRE_EXP_DECL pcre *pcre_compile2(const char *, int, int *, const char **, int *, const unsigned char *); +PCRE_EXP_DECL pcre16 *pcre16_compile2(PCRE_SPTR16, int, int *, const char **, + int *, const unsigned char *); PCRE_EXP_DECL int pcre_config(int, void *); +PCRE_EXP_DECL int pcre16_config(int, void *); PCRE_EXP_DECL int pcre_copy_named_substring(const pcre *, const char *, int *, int, const char *, char *, int); -PCRE_EXP_DECL int pcre_copy_substring(const char *, int *, int, int, char *, - int); +PCRE_EXP_DECL int pcre16_copy_named_substring(const pcre16 *, PCRE_SPTR16, + int *, int, PCRE_SPTR16, PCRE_UCHAR16 *, int); +PCRE_EXP_DECL int pcre_copy_substring(const char *, int *, int, int, + char *, int); +PCRE_EXP_DECL int pcre16_copy_substring(PCRE_SPTR16, int *, int, int, + PCRE_UCHAR16 *, int); PCRE_EXP_DECL int pcre_dfa_exec(const pcre *, const pcre_extra *, const char *, int, int, int, int *, int , int *, int); +PCRE_EXP_DECL int pcre16_dfa_exec(const pcre16 *, const pcre16_extra *, + PCRE_SPTR16, int, int, int, int *, int , int *, int); PCRE_EXP_DECL int pcre_exec(const pcre *, const pcre_extra *, PCRE_SPTR, int, int, int, int *, int); +PCRE_EXP_DECL int pcre16_exec(const pcre16 *, const pcre16_extra *, + PCRE_SPTR16, int, int, int, int *, int); PCRE_EXP_DECL void pcre_free_substring(const char *); +PCRE_EXP_DECL void pcre16_free_substring(PCRE_SPTR16); PCRE_EXP_DECL void pcre_free_substring_list(const char **); +PCRE_EXP_DECL void pcre16_free_substring_list(PCRE_SPTR16 *); PCRE_EXP_DECL int pcre_fullinfo(const pcre *, const pcre_extra *, int, void *); +PCRE_EXP_DECL int pcre16_fullinfo(const pcre16 *, const pcre16_extra *, int, + void *); PCRE_EXP_DECL int pcre_get_named_substring(const pcre *, const char *, int *, int, const char *, const char **); +PCRE_EXP_DECL int pcre16_get_named_substring(const pcre16 *, PCRE_SPTR16, + int *, int, PCRE_SPTR16, PCRE_SPTR16 *); PCRE_EXP_DECL int pcre_get_stringnumber(const pcre *, const char *); +PCRE_EXP_DECL int pcre16_get_stringnumber(const pcre16 *, PCRE_SPTR16); PCRE_EXP_DECL int pcre_get_stringtable_entries(const pcre *, const char *, char **, char **); +PCRE_EXP_DECL int pcre16_get_stringtable_entries(const pcre16 *, PCRE_SPTR16, + PCRE_UCHAR16 **, PCRE_UCHAR16 **); PCRE_EXP_DECL int pcre_get_substring(const char *, int *, int, int, const char **); +PCRE_EXP_DECL int pcre16_get_substring(PCRE_SPTR16, int *, int, int, + PCRE_SPTR16 *); PCRE_EXP_DECL int pcre_get_substring_list(const char *, int *, int, const char ***); -PCRE_EXP_DECL int pcre_info(const pcre *, int *, int *); +PCRE_EXP_DECL int pcre16_get_substring_list(PCRE_SPTR16, int *, int, + PCRE_SPTR16 **); PCRE_EXP_DECL const unsigned char *pcre_maketables(void); +PCRE_EXP_DECL const unsigned char *pcre16_maketables(void); PCRE_EXP_DECL int pcre_refcount(pcre *, int); +PCRE_EXP_DECL int pcre16_refcount(pcre16 *, int); PCRE_EXP_DECL pcre_extra *pcre_study(const pcre *, int, const char **); +PCRE_EXP_DECL pcre16_extra *pcre16_study(const pcre16 *, int, const char **); PCRE_EXP_DECL void pcre_free_study(pcre_extra *); +PCRE_EXP_DECL void pcre16_free_study(pcre16_extra *); PCRE_EXP_DECL const char *pcre_version(void); +PCRE_EXP_DECL const char *pcre16_version(void); + +/* Utility functions for byte order swaps. */ +PCRE_EXP_DECL int pcre_pattern_to_host_byte_order(pcre *, pcre_extra *, + const unsigned char *); +PCRE_EXP_DECL int pcre16_pattern_to_host_byte_order(pcre16 *, pcre16_extra *, + const unsigned char *); +PCRE_EXP_DECL int pcre16_utf16_to_host_byte_order(PCRE_UCHAR16 *, + PCRE_SPTR16, int, int *, int); /* JIT compiler related functions. */ PCRE_EXP_DECL pcre_jit_stack *pcre_jit_stack_alloc(int, int); +PCRE_EXP_DECL pcre16_jit_stack *pcre16_jit_stack_alloc(int, int); PCRE_EXP_DECL void pcre_jit_stack_free(pcre_jit_stack *); -PCRE_EXP_DECL void pcre_assign_jit_stack(pcre_extra *, pcre_jit_callback, void *); +PCRE_EXP_DECL void pcre16_jit_stack_free(pcre16_jit_stack *); +PCRE_EXP_DECL void pcre_assign_jit_stack(pcre_extra *, + pcre_jit_callback, void *); +PCRE_EXP_DECL void pcre16_assign_jit_stack(pcre16_extra *, + pcre16_jit_callback, void *); #ifdef __cplusplus } /* extern "C" */ diff --git a/harbour/src/3rd/pcre/pcrecomp.c b/harbour/src/3rd/pcre/pcrecomp.c index 3d33f06f45..cd34fca1d4 100644 --- a/harbour/src/3rd/pcre/pcrecomp.c +++ b/harbour/src/3rd/pcre/pcrecomp.c @@ -6,7 +6,7 @@ and semantics are as close as possible to those of the Perl 5 language. Written by Philip Hazel - Copyright (c) 1997-2011 University of Cambridge + Copyright (c) 1997-2012 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without @@ -53,12 +53,16 @@ supporting internal functions that are not used by other modules. */ #include "pcreinal.h" -/* When PCRE_DEBUG is defined, we need the pcre_printint() function, which is -also used by pcretest. PCRE_DEBUG is not defined when building a production -library. */ +/* When PCRE_DEBUG is defined, we need the pcre(16)_printint() function, which +is also used by pcretest. PCRE_DEBUG is not defined when building a production +library. We do not need to select pcre16_printint.c specially, because the +COMPILE_PCREx macro will already be appropriately set. */ #ifdef PCRE_DEBUG -#include "pcreprni.h" +/* pcre_printint.c should not include any headers */ +#define PCRE_INCLUDED +#include "pcreprni.c" +#undef PCRE_INCLUDED #endif @@ -104,6 +108,14 @@ overrun before it actually does run off the end of the data block. */ #define WORK_SIZE_SAFETY_MARGIN (100) +/* Private flags added to firstchar and reqchar. */ + +#define REQ_CASELESS 0x10000000l /* Indicates caselessness */ +#define REQ_VARY 0x20000000l /* Reqchar followed non-literal item */ + +/* Repeated character flags. */ + +#define UTF_LENGTH 0x10000000l /* The char contains its length. */ /* Table for handling escaped characters in the range '0'-'z'. Positive returns are simple data values; negative values are for special things like \d and so @@ -238,7 +250,7 @@ static const char posix_names[] = STRING_graph0 STRING_print0 STRING_punct0 STRING_space0 STRING_word0 STRING_xdigit; -static const uschar posix_name_lengths[] = { +static const pcre_uint8 posix_name_lengths[] = { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 4, 6, 0 }; /* Table of class bit maps for each POSIX class. Each class is formed from a @@ -273,47 +285,101 @@ substitutes must be in the order of the names, defined above, and there are both positive and negative cases. NULL means no substitute. */ #ifdef SUPPORT_UCP -static const uschar *substitutes[] = { - (uschar *)"\\P{Nd}", /* \D */ - (uschar *)"\\p{Nd}", /* \d */ - (uschar *)"\\P{Xsp}", /* \S */ /* NOTE: Xsp is Perl space */ - (uschar *)"\\p{Xsp}", /* \s */ - (uschar *)"\\P{Xwd}", /* \W */ - (uschar *)"\\p{Xwd}" /* \w */ +static const pcre_uchar string_PNd[] = { + CHAR_BACKSLASH, CHAR_P, CHAR_LEFT_CURLY_BRACKET, + CHAR_N, CHAR_d, CHAR_RIGHT_CURLY_BRACKET, '\0' }; +static const pcre_uchar string_pNd[] = { + CHAR_BACKSLASH, CHAR_p, CHAR_LEFT_CURLY_BRACKET, + CHAR_N, CHAR_d, CHAR_RIGHT_CURLY_BRACKET, '\0' }; +static const pcre_uchar string_PXsp[] = { + CHAR_BACKSLASH, CHAR_P, CHAR_LEFT_CURLY_BRACKET, + CHAR_X, CHAR_s, CHAR_p, CHAR_RIGHT_CURLY_BRACKET, '\0' }; +static const pcre_uchar string_pXsp[] = { + CHAR_BACKSLASH, CHAR_p, CHAR_LEFT_CURLY_BRACKET, + CHAR_X, CHAR_s, CHAR_p, CHAR_RIGHT_CURLY_BRACKET, '\0' }; +static const pcre_uchar string_PXwd[] = { + CHAR_BACKSLASH, CHAR_P, CHAR_LEFT_CURLY_BRACKET, + CHAR_X, CHAR_w, CHAR_d, CHAR_RIGHT_CURLY_BRACKET, '\0' }; +static const pcre_uchar string_pXwd[] = { + CHAR_BACKSLASH, CHAR_p, CHAR_LEFT_CURLY_BRACKET, + CHAR_X, CHAR_w, CHAR_d, CHAR_RIGHT_CURLY_BRACKET, '\0' }; + +static const pcre_uchar *substitutes[] = { + string_PNd, /* \D */ + string_pNd, /* \d */ + string_PXsp, /* \S */ /* NOTE: Xsp is Perl space */ + string_pXsp, /* \s */ + string_PXwd, /* \W */ + string_pXwd /* \w */ }; -static const uschar *posix_substitutes[] = { - (uschar *)"\\p{L}", /* alpha */ - (uschar *)"\\p{Ll}", /* lower */ - (uschar *)"\\p{Lu}", /* upper */ - (uschar *)"\\p{Xan}", /* alnum */ - NULL, /* ascii */ - (uschar *)"\\h", /* blank */ - NULL, /* cntrl */ - (uschar *)"\\p{Nd}", /* digit */ - NULL, /* graph */ - NULL, /* print */ - NULL, /* punct */ - (uschar *)"\\p{Xps}", /* space */ /* NOTE: Xps is POSIX space */ - (uschar *)"\\p{Xwd}", /* word */ - NULL, /* xdigit */ +static const pcre_uchar string_pL[] = { + CHAR_BACKSLASH, CHAR_p, CHAR_LEFT_CURLY_BRACKET, + CHAR_L, CHAR_RIGHT_CURLY_BRACKET, '\0' }; +static const pcre_uchar string_pLl[] = { + CHAR_BACKSLASH, CHAR_p, CHAR_LEFT_CURLY_BRACKET, + CHAR_L, CHAR_l, CHAR_RIGHT_CURLY_BRACKET, '\0' }; +static const pcre_uchar string_pLu[] = { + CHAR_BACKSLASH, CHAR_p, CHAR_LEFT_CURLY_BRACKET, + CHAR_L, CHAR_u, CHAR_RIGHT_CURLY_BRACKET, '\0' }; +static const pcre_uchar string_pXan[] = { + CHAR_BACKSLASH, CHAR_p, CHAR_LEFT_CURLY_BRACKET, + CHAR_X, CHAR_a, CHAR_n, CHAR_RIGHT_CURLY_BRACKET, '\0' }; +static const pcre_uchar string_h[] = { + CHAR_BACKSLASH, CHAR_h, '\0' }; +static const pcre_uchar string_pXps[] = { + CHAR_BACKSLASH, CHAR_p, CHAR_LEFT_CURLY_BRACKET, + CHAR_X, CHAR_p, CHAR_s, CHAR_RIGHT_CURLY_BRACKET, '\0' }; +static const pcre_uchar string_PL[] = { + CHAR_BACKSLASH, CHAR_P, CHAR_LEFT_CURLY_BRACKET, + CHAR_L, CHAR_RIGHT_CURLY_BRACKET, '\0' }; +static const pcre_uchar string_PLl[] = { + CHAR_BACKSLASH, CHAR_P, CHAR_LEFT_CURLY_BRACKET, + CHAR_L, CHAR_l, CHAR_RIGHT_CURLY_BRACKET, '\0' }; +static const pcre_uchar string_PLu[] = { + CHAR_BACKSLASH, CHAR_P, CHAR_LEFT_CURLY_BRACKET, + CHAR_L, CHAR_u, CHAR_RIGHT_CURLY_BRACKET, '\0' }; +static const pcre_uchar string_PXan[] = { + CHAR_BACKSLASH, CHAR_P, CHAR_LEFT_CURLY_BRACKET, + CHAR_X, CHAR_a, CHAR_n, CHAR_RIGHT_CURLY_BRACKET, '\0' }; +static const pcre_uchar string_H[] = { + CHAR_BACKSLASH, CHAR_H, '\0' }; +static const pcre_uchar string_PXps[] = { + CHAR_BACKSLASH, CHAR_P, CHAR_LEFT_CURLY_BRACKET, + CHAR_X, CHAR_p, CHAR_s, CHAR_RIGHT_CURLY_BRACKET, '\0' }; + +static const pcre_uchar *posix_substitutes[] = { + string_pL, /* alpha */ + string_pLl, /* lower */ + string_pLu, /* upper */ + string_pXan, /* alnum */ + NULL, /* ascii */ + string_h, /* blank */ + NULL, /* cntrl */ + string_pNd, /* digit */ + NULL, /* graph */ + NULL, /* print */ + NULL, /* punct */ + string_pXps, /* space */ /* NOTE: Xps is POSIX space */ + string_pXwd, /* word */ + NULL, /* xdigit */ /* Negated cases */ - (uschar *)"\\P{L}", /* ^alpha */ - (uschar *)"\\P{Ll}", /* ^lower */ - (uschar *)"\\P{Lu}", /* ^upper */ - (uschar *)"\\P{Xan}", /* ^alnum */ - NULL, /* ^ascii */ - (uschar *)"\\H", /* ^blank */ - NULL, /* ^cntrl */ - (uschar *)"\\P{Nd}", /* ^digit */ - NULL, /* ^graph */ - NULL, /* ^print */ - NULL, /* ^punct */ - (uschar *)"\\P{Xps}", /* ^space */ /* NOTE: Xps is POSIX space */ - (uschar *)"\\P{Xwd}", /* ^word */ - NULL /* ^xdigit */ + string_PL, /* ^alpha */ + string_PLl, /* ^lower */ + string_PLu, /* ^upper */ + string_PXan, /* ^alnum */ + NULL, /* ^ascii */ + string_H, /* ^blank */ + NULL, /* ^cntrl */ + string_PNd, /* ^digit */ + NULL, /* ^graph */ + NULL, /* ^print */ + NULL, /* ^punct */ + string_PXps, /* ^space */ /* NOTE: Xps is POSIX space */ + string_PXwd, /* ^word */ + NULL /* ^xdigit */ }; -#define POSIX_SUBSIZE (sizeof(posix_substitutes)/sizeof(uschar *)) +#define POSIX_SUBSIZE (sizeof(posix_substitutes) / sizeof(pcre_uchar *)) #endif #define STRING(a) # a @@ -372,7 +438,7 @@ static const char error_texts[] = /* 30 */ "unknown POSIX class name\0" "POSIX collating elements are not supported\0" - "this version of PCRE is not compiled with PCRE_UTF8 support\0" + "this version of PCRE is compiled without UTF support\0" "spare error\0" /** DEAD **/ "character value in \\x{...} sequence is too large\0" /* 35 */ @@ -395,7 +461,7 @@ static const char error_texts[] = "too many named subpatterns (maximum " XSTRING(MAX_NAME_COUNT) ")\0" /* 50 */ "repeated subpattern is too long\0" /** DEAD **/ - "octal value is greater than \\377 (not in UTF-8 mode)\0" + "octal value is greater than \\377 in 8-bit non-UTF-8 mode\0" "internal error: overran compiling workspace\0" "internal error: previously-checked referenced subpattern not found\0" "DEFINE group contains more than one branch\0" @@ -414,13 +480,15 @@ static const char error_texts[] = /* 65 */ "different names for subpatterns of the same number are not allowed\0" "(*MARK) must have an argument\0" - "this version of PCRE is not compiled with PCRE_UCP support\0" + "this version of PCRE is not compiled with Unicode property support\0" "\\c must be followed by an ASCII character\0" "\\k is not followed by a braced, angle-bracketed, or quoted name\0" /* 70 */ "internal error: unknown opcode in find_fixedlength()\0" "\\N is not supported in a class\0" "too many forward references\0" + "disallowed Unicode code point (>= 0xd800 && <= 0xdfff)\0" + "invalid UTF-16 string\0" ; /* Table to identify digits and hex digits. This is used when compiling @@ -439,12 +507,18 @@ For convenience, we use the same bit definitions as in chartables: Then we can use ctype_digit and ctype_xdigit in the code. */ +/* Using a simple comparison for decimal numbers rather than a memory read +is much faster, and the resulting code is simpler (the compiler turns it +into a subtraction and unsigned comparison). */ + +#define IS_DIGIT(x) ((x) >= CHAR_0 && (x) <= CHAR_9) + #ifndef EBCDIC /* This is the "normal" case, for ASCII systems, and EBCDIC systems running in UTF-8 mode. */ -static const unsigned char digitab[] = +static const pcre_uint8 digitab[] = { 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 0- 7 */ 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 8- 15 */ @@ -483,7 +557,7 @@ static const unsigned char digitab[] = /* This is the "abnormal" case, for EBCDIC systems not running in UTF-8 mode. */ -static const unsigned char digitab[] = +static const pcre_uint8 digitab[] = { 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 0- 7 0 */ 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 8- 15 */ @@ -518,7 +592,7 @@ static const unsigned char digitab[] = 0x0c,0x0c,0x0c,0x0c,0x0c,0x0c,0x0c,0x0c, /* 0 - 7 F0 */ 0x0c,0x0c,0x00,0x00,0x00,0x00,0x00,0x00};/* 8 -255 */ -static const unsigned char ebcdic_chartab[] = { /* chartable partial dup */ +static const pcre_uint8 ebcdic_chartab[] = { /* chartable partial dup */ 0x80,0x00,0x00,0x00,0x00,0x01,0x00,0x00, /* 0- 7 */ 0x00,0x00,0x00,0x00,0x01,0x01,0x00,0x00, /* 8- 15 */ 0x00,0x00,0x00,0x00,0x00,0x01,0x00,0x00, /* 16- 23 */ @@ -557,7 +631,7 @@ static const unsigned char ebcdic_chartab[] = { /* chartable partial dup */ /* Definition to allow mutual recursion */ static BOOL - compile_regex(int, uschar **, const uschar **, int *, BOOL, BOOL, int, int, + compile_regex(int, pcre_uchar **, const pcre_uchar **, int *, BOOL, BOOL, int, int, int *, int *, branch_chain *, compile_data *, int *); @@ -604,7 +678,7 @@ Returns: 0 if all went well, else an error number static int expand_workspace(compile_data *cd) { -uschar *newspace; +pcre_uchar *newspace; int newsize = cd->workspace_size * 2; if (newsize > COMPILE_WORK_SIZE_MAX) newsize = COMPILE_WORK_SIZE_MAX; @@ -612,13 +686,12 @@ if (cd->workspace_size >= COMPILE_WORK_SIZE_MAX || newsize - cd->workspace_size < WORK_SIZE_SAFETY_MARGIN) return ERR72; -newspace = (pcre_malloc)(newsize); +newspace = (PUBL(malloc))(IN_UCHARS(newsize)); if (newspace == NULL) return ERR21; - -memcpy(newspace, cd->start_workspace, cd->workspace_size); -cd->hwm = (uschar *)newspace + (cd->hwm - cd->start_workspace); +memcpy(newspace, cd->start_workspace, cd->workspace_size * sizeof(pcre_uchar)); +cd->hwm = (pcre_uchar *)newspace + (cd->hwm - cd->start_workspace); if (cd->workspace_size > COMPILE_WORK_SIZE) - (pcre_free)((void *)cd->start_workspace); + (PUBL(free))((void *)cd->start_workspace); cd->start_workspace = newspace; cd->workspace_size = newsize; return 0; @@ -642,17 +715,19 @@ Returns: TRUE or FALSE */ static BOOL -is_counted_repeat(const uschar *p) +is_counted_repeat(const pcre_uchar *p) { -if ((digitab[*p++] & ctype_digit) == 0) return FALSE; -while ((digitab[*p] & ctype_digit) != 0) p++; +if (!IS_DIGIT(*p)) return FALSE; +p++; +while (IS_DIGIT(*p)) p++; if (*p == CHAR_RIGHT_CURLY_BRACKET) return TRUE; if (*p++ != CHAR_COMMA) return FALSE; if (*p == CHAR_RIGHT_CURLY_BRACKET) return TRUE; -if ((digitab[*p++] & ctype_digit) == 0) return FALSE; -while ((digitab[*p] & ctype_digit) != 0) p++; +if (!IS_DIGIT(*p)) return FALSE; +p++; +while (IS_DIGIT(*p)) p++; return (*p == CHAR_RIGHT_CURLY_BRACKET); } @@ -684,12 +759,14 @@ Returns: zero or positive => a data character */ static int -check_escape(const uschar **ptrptr, int *errorcodeptr, int bracount, +check_escape(const pcre_uchar **ptrptr, int *errorcodeptr, int bracount, int options, BOOL isclass) { -BOOL utf8 = (options & PCRE_UTF8) != 0; -const uschar *ptr = *ptrptr + 1; -int c, i; +/* PCRE_UTF16 has the same value as PCRE_UTF8. */ +BOOL utf = (options & PCRE_UTF8) != 0; +const pcre_uchar *ptr = *ptrptr + 1; +pcre_int32 c; +int i; GETCHARINCTEST(c, ptr); /* Get character value, increment pointer */ ptr--; /* Set pointer back to the last byte */ @@ -703,11 +780,13 @@ in a table. A non-zero result is something that can be returned immediately. Otherwise further processing may be required. */ #ifndef EBCDIC /* ASCII/UTF-8 coding */ -else if (c < CHAR_0 || c > CHAR_z) {} /* Not alphanumeric */ +/* Not alphanumeric */ +else if (c < CHAR_0 || c > CHAR_z) {} else if ((i = escapes[c - CHAR_0]) != 0) c = i; #else /* EBCDIC coding */ -else if (c < 'a' || (ebcdic_chartab[c] & 0x0E) == 0) {} /* Not alphanumeric */ +/* Not alphanumeric */ +else if (c < 'a' || (!MAX_255(c) || (ebcdic_chartab[c] & 0x0E) == 0)) {} else if ((i = escapes[c - 0x48]) != 0) c = i; #endif @@ -715,7 +794,7 @@ else if ((i = escapes[c - 0x48]) != 0) c = i; else { - const uschar *oldptr; + const pcre_uchar *oldptr; BOOL braced, negated; switch (c) @@ -733,8 +812,10 @@ else { /* In JavaScript, \u must be followed by four hexadecimal numbers. Otherwise it is a lowercase u letter. */ - if ((digitab[ptr[1]] & ctype_xdigit) != 0 && (digitab[ptr[2]] & ctype_xdigit) != 0 - && (digitab[ptr[3]] & ctype_xdigit) != 0 && (digitab[ptr[4]] & ctype_xdigit) != 0) + if (MAX_255(ptr[1]) && (digitab[ptr[1]] & ctype_xdigit) != 0 + && MAX_255(ptr[2]) && (digitab[ptr[2]] & ctype_xdigit) != 0 + && MAX_255(ptr[3]) && (digitab[ptr[3]] & ctype_xdigit) != 0 + && MAX_255(ptr[4]) && (digitab[ptr[4]] & ctype_xdigit) != 0) { c = 0; for (i = 0; i < 4; ++i) @@ -788,9 +869,9 @@ else if (ptr[1] == CHAR_LEFT_CURLY_BRACKET) { - const uschar *p; + const pcre_uchar *p; for (p = ptr+2; *p != 0 && *p != CHAR_RIGHT_CURLY_BRACKET; p++) - if (*p != CHAR_MINUS && (digitab[*p] & ctype_digit) == 0) break; + if (*p != CHAR_MINUS && !IS_DIGIT(*p)) break; if (*p != 0 && *p != CHAR_RIGHT_CURLY_BRACKET) { c = -ESC_k; @@ -808,12 +889,21 @@ else } else negated = FALSE; + /* The integer range is limited by the machine's int representation. */ c = 0; - while ((digitab[ptr[1]] & ctype_digit) != 0) - c = c * 10 + *(++ptr) - CHAR_0; - - if (c < 0) /* Integer overflow */ + while (IS_DIGIT(ptr[1])) { + if (((unsigned int)c) > INT_MAX / 10) /* Integer overflow */ + { + c = -1; + break; + } + c = c * 10 + *(++ptr) - CHAR_0; + } + if (((unsigned int)c) > INT_MAX) /* Integer overflow */ + { + while (IS_DIGIT(ptr[1])) + ptr++; *errorcodeptr = ERR61; break; } @@ -861,11 +951,21 @@ else if (!isclass) { oldptr = ptr; + /* The integer range is limited by the machine's int representation. */ c -= CHAR_0; - while ((digitab[ptr[1]] & ctype_digit) != 0) - c = c * 10 + *(++ptr) - CHAR_0; - if (c < 0) /* Integer overflow */ + while (IS_DIGIT(ptr[1])) { + if (((unsigned int)c) > INT_MAX / 10) /* Integer overflow */ + { + c = -1; + break; + } + c = c * 10 + *(++ptr) - CHAR_0; + } + if (((unsigned int)c) > INT_MAX) /* Integer overflow */ + { + while (IS_DIGIT(ptr[1])) + ptr++; *errorcodeptr = ERR61; break; } @@ -891,26 +991,29 @@ else /* \0 always starts an octal number, but we may drop through to here with a larger first octal digit. The original code used just to take the least significant 8 bits of octal numbers (I think this is what early Perls used - to do). Nowadays we allow for larger numbers in UTF-8 mode, but no more - than 3 octal digits. */ + to do). Nowadays we allow for larger numbers in UTF-8 mode and 16-bit mode, + but no more than 3 octal digits. */ case CHAR_0: c -= CHAR_0; while(i++ < 2 && ptr[1] >= CHAR_0 && ptr[1] <= CHAR_7) c = c * 8 + *(++ptr) - CHAR_0; - if (!utf8 && c > 255) *errorcodeptr = ERR51; +#ifdef COMPILE_PCRE8 + if (!utf && c > 0xff) *errorcodeptr = ERR51; +#endif break; /* \x is complicated. \x{ddd} is a character number which can be greater - than 0xff in utf8 mode, but only if the ddd are hex digits. If not, { is - treated as a data character. */ + than 0xff in utf or non-8bit mode, but only if the ddd are hex digits. + If not, { is treated as a data character. */ case CHAR_x: if ((options & PCRE_JAVASCRIPT_COMPAT) != 0) { /* In JavaScript, \x must be followed by two hexadecimal numbers. Otherwise it is a lowercase x letter. */ - if ((digitab[ptr[1]] & ctype_xdigit) != 0 && (digitab[ptr[2]] & ctype_xdigit) != 0) + if (MAX_255(ptr[1]) && (digitab[ptr[1]] & ctype_xdigit) != 0 + && MAX_255(ptr[2]) && (digitab[ptr[2]] & ctype_xdigit) != 0) { c = 0; for (i = 0; i < 2; ++i) @@ -930,15 +1033,13 @@ else if (ptr[1] == CHAR_LEFT_CURLY_BRACKET) { - const uschar *pt = ptr + 2; - int count = 0; + const pcre_uchar *pt = ptr + 2; c = 0; - while ((digitab[*pt] & ctype_xdigit) != 0) + while (MAX_255(*pt) && (digitab[*pt] & ctype_xdigit) != 0) { register int cc = *pt++; if (c == 0 && cc == CHAR_0) continue; /* Leading zeroes */ - count++; #ifndef EBCDIC /* ASCII/UTF-8 coding */ if (cc >= CHAR_a) cc -= 32; /* Convert to upper case */ @@ -947,11 +1048,25 @@ else if (cc >= CHAR_a && cc <= CHAR_z) cc += 64; /* Convert to upper case */ c = (c << 4) + cc - ((cc >= CHAR_0)? CHAR_0 : (CHAR_A - 10)); #endif + +#ifdef COMPILE_PCRE8 + if (c > (utf ? 0x10ffff : 0xff)) { c = -1; break; } +#else +#ifdef COMPILE_PCRE16 + if (c > (utf ? 0x10ffff : 0xffff)) { c = -1; break; } +#endif +#endif + } + + if (c < 0) + { + while (MAX_255(*pt) && (digitab[*pt] & ctype_xdigit) != 0) pt++; + *errorcodeptr = ERR34; } if (*pt == CHAR_RIGHT_CURLY_BRACKET) { - if (c < 0 || count > (utf8? 8 : 2)) *errorcodeptr = ERR34; + if (utf && c >= 0xd800 && c <= 0xdfff) *errorcodeptr = ERR73; ptr = pt; break; } @@ -963,7 +1078,7 @@ else /* Read just a single-byte hex-defined char */ c = 0; - while (i++ < 2 && (digitab[ptr[1]] & ctype_xdigit) != 0) + while (i++ < 2 && MAX_255(ptr[1]) && (digitab[ptr[1]] & ctype_xdigit) != 0) { int cc; /* Some compilers don't like */ cc = *(++ptr); /* ++ in initializers */ @@ -1061,11 +1176,11 @@ Returns: type value from ucp_type_table, or -1 for an invalid type */ static int -get_ucp(const uschar **ptrptr, BOOL *negptr, int *dptr, int *errorcodeptr) +get_ucp(const pcre_uchar **ptrptr, BOOL *negptr, int *dptr, int *errorcodeptr) { int c, i, bot, top; -const uschar *ptr = *ptrptr; -char name[32]; +const pcre_uchar *ptr = *ptrptr; +pcre_uchar name[32]; c = *(++ptr); if (c == 0) goto ERROR_RETURN; @@ -1082,7 +1197,7 @@ if (c == CHAR_LEFT_CURLY_BRACKET) *negptr = TRUE; ptr++; } - for (i = 0; i < (int)sizeof(name) - 1; i++) + for (i = 0; i < (int)(sizeof(name) / sizeof(pcre_uchar)) - 1; i++) { c = *(++ptr); if (c == 0) goto ERROR_RETURN; @@ -1106,16 +1221,16 @@ else /* Search for a recognized property name using binary chop */ bot = 0; -top = _pcre_utt_size; +top = PRIV(utt_size); while (bot < top) { i = (bot + top) >> 1; - c = strcmp(name, _pcre_utt_names + _pcre_utt[i].name_offset); + c = STRCMP_UC_C8(name, PRIV(utt_names) + PRIV(utt)[i].name_offset); if (c == 0) { - *dptr = _pcre_utt[i].value; - return _pcre_utt[i].type; + *dptr = PRIV(utt)[i].value; + return PRIV(utt)[i].type; } if (c > 0) bot = i + 1; else top = i; } @@ -1153,8 +1268,8 @@ Returns: pointer to '}' on success; current ptr on error, with errorcodeptr set non-zero */ -static const uschar * -read_repeat_counts(const uschar *p, int *minp, int *maxp, int *errorcodeptr) +static const pcre_uchar * +read_repeat_counts(const pcre_uchar *p, int *minp, int *maxp, int *errorcodeptr) { int min = 0; int max = -1; @@ -1162,7 +1277,7 @@ int max = -1; /* Read the minimum value and do a paranoid check: a negative value indicates an integer overflow. */ -while ((digitab[*p] & ctype_digit) != 0) min = min * 10 + *p++ - CHAR_0; +while (IS_DIGIT(*p)) min = min * 10 + *p++ - CHAR_0; if (min < 0 || min > 65535) { *errorcodeptr = ERR5; @@ -1177,7 +1292,7 @@ if (*p == CHAR_RIGHT_CURLY_BRACKET) max = min; else if (*(++p) != CHAR_RIGHT_CURLY_BRACKET) { max = 0; - while((digitab[*p] & ctype_digit) != 0) max = max * 10 + *p++ - CHAR_0; + while(IS_DIGIT(*p)) max = max * 10 + *p++ - CHAR_0; if (max < 0 || max > 65535) { *errorcodeptr = ERR5; @@ -1232,17 +1347,17 @@ Arguments: name name to seek, or NULL if seeking a numbered subpattern lorn name length, or subpattern number if name is NULL xmode TRUE if we are in /x mode - utf8 TRUE if we are in UTF-8 mode + utf TRUE if we are in UTF-8 / UTF-16 mode count pointer to the current capturing subpattern number (updated) Returns: the number of the named subpattern, or -1 if not found */ static int -find_parens_sub(uschar **ptrptr, compile_data *cd, const uschar *name, int lorn, - BOOL xmode, BOOL utf8, int *count) +find_parens_sub(pcre_uchar **ptrptr, compile_data *cd, const pcre_uchar *name, int lorn, + BOOL xmode, BOOL utf, int *count) { -uschar *ptr = *ptrptr; +pcre_uchar *ptr = *ptrptr; int start_count = *count; int hwm_count = start_count; BOOL dup_parens = FALSE; @@ -1309,7 +1424,7 @@ if (ptr[0] == CHAR_LEFT_PARENTHESIS) ptr[1] != CHAR_EQUALS_SIGN) || *ptr == CHAR_APOSTROPHE) { int term; - const uschar *thisname; + const pcre_uchar *thisname; *count += 1; if (name == NULL && *count == lorn) return *count; term = *ptr++; @@ -1317,7 +1432,7 @@ if (ptr[0] == CHAR_LEFT_PARENTHESIS) thisname = ptr; while (*ptr != term) ptr++; if (name != NULL && lorn == ptr - thisname && - strncmp((const char *)name, (const char *)thisname, lorn) == 0) + STRNCMP_UC_UC(name, thisname, lorn) == 0) return *count; term++; } @@ -1360,7 +1475,7 @@ for (; ptr < cd->end_pattern; ptr++) { if (ptr[2] == CHAR_E) ptr+= 2; - else if (strncmp((const char *)ptr+2, + else if (STRNCMP_UC_C8(ptr + 2, STR_Q STR_BACKSLASH STR_E, 3) == 0) ptr += 4; else @@ -1408,8 +1523,8 @@ for (; ptr < cd->end_pattern; ptr++) { if (IS_NEWLINE(ptr)) { ptr += cd->nllen - 1; break; } ptr++; -#ifdef SUPPORT_UTF8 - if (utf8) while ((*ptr & 0xc0) == 0x80) ptr++; +#ifdef SUPPORT_UTF + if (utf) FORWARDCHAR(ptr); #endif } if (*ptr == 0) goto FAIL_EXIT; @@ -1420,7 +1535,7 @@ for (; ptr < cd->end_pattern; ptr++) if (*ptr == CHAR_LEFT_PARENTHESIS) { - int rc = find_parens_sub(&ptr, cd, name, lorn, xmode, utf8, count); + int rc = find_parens_sub(&ptr, cd, name, lorn, xmode, utf, count); if (rc > 0) return rc; if (*ptr == 0) goto FAIL_EXIT; } @@ -1466,16 +1581,16 @@ Arguments: name name to seek, or NULL if seeking a numbered subpattern lorn name length, or subpattern number if name is NULL xmode TRUE if we are in /x mode - utf8 TRUE if we are in UTF-8 mode + utf TRUE if we are in UTF-8 / UTF-16 mode Returns: the number of the found subpattern, or -1 if not found */ static int -find_parens(compile_data *cd, const uschar *name, int lorn, BOOL xmode, - BOOL utf8) +find_parens(compile_data *cd, const pcre_uchar *name, int lorn, BOOL xmode, + BOOL utf) { -uschar *ptr = (uschar *)cd->start_pattern; +pcre_uchar *ptr = (pcre_uchar *)cd->start_pattern; int count = 0; int rc; @@ -1486,7 +1601,7 @@ matching closing parens. That is why we have to have a loop. */ for (;;) { - rc = find_parens_sub(&ptr, cd, name, lorn, xmode, utf8, &count); + rc = find_parens_sub(&ptr, cd, name, lorn, xmode, utf, &count); if (rc > 0 || *ptr++ == 0) break; } @@ -1513,8 +1628,8 @@ Arguments: Returns: pointer to the first significant opcode */ -static const uschar* -first_significant_code(const uschar *code, BOOL skipassert) +static const pcre_uchar* +first_significant_code(const pcre_uchar *code, BOOL skipassert) { for (;;) { @@ -1525,7 +1640,7 @@ for (;;) case OP_ASSERTBACK_NOT: if (!skipassert) return code; do code += GET(code, 1); while (*code == OP_ALT); - code += _pcre_OP_lengths[*code]; + code += PRIV(OP_lengths)[*code]; break; case OP_WORD_BOUNDARY: @@ -1539,7 +1654,7 @@ for (;;) case OP_RREF: case OP_NRREF: case OP_DEF: - code += _pcre_OP_lengths[*code]; + code += PRIV(OP_lengths)[*code]; break; default: @@ -1569,7 +1684,7 @@ and doing the check at the end; a flag specifies which mode we are running in. Arguments: code points to the start of the pattern (the bracket) - utf8 TRUE in UTF-8 mode + utf TRUE in UTF-8 / UTF-16 mode atend TRUE if called when the pattern is complete cd the "compile data" structure @@ -1581,12 +1696,12 @@ Returns: the fixed length, */ static int -find_fixedlength(uschar *code, BOOL utf8, BOOL atend, compile_data *cd) +find_fixedlength(pcre_uchar *code, BOOL utf, BOOL atend, compile_data *cd) { int length = -1; register int branchlength = 0; -register uschar *cc = code + 1 + LINK_SIZE; +register pcre_uchar *cc = code + 1 + LINK_SIZE; /* Scan along the opcodes for this branch. If we get to the end of the branch, check the length against that of the other branches. */ @@ -1594,8 +1709,9 @@ branch, check the length against that of the other branches. */ for (;;) { int d; - uschar *ce, *cs; + pcre_uchar *ce, *cs; register int op = *cc; + switch (op) { /* We only need to continue for OP_CBRA (normal capturing bracket) and @@ -1608,7 +1724,7 @@ for (;;) case OP_ONCE: case OP_ONCE_NC: case OP_COND: - d = find_fixedlength(cc + ((op == OP_CBRA)? 2:0), utf8, atend, cd); + d = find_fixedlength(cc + ((op == OP_CBRA)? IMM2_SIZE : 0), utf, atend, cd); if (d < 0) return d; branchlength += d; do cc += GET(cc, 1); while (*cc == OP_ALT); @@ -1639,10 +1755,10 @@ for (;;) case OP_RECURSE: if (!atend) return -3; - cs = ce = (uschar *)cd->start_code + GET(cc, 1); /* Start subpattern */ - do ce += GET(ce, 1); while (*ce == OP_ALT); /* End subpattern */ - if (cc > cs && cc < ce) return -1; /* Recursion */ - d = find_fixedlength(cs + 2, utf8, atend, cd); + cs = ce = (pcre_uchar *)cd->start_code + GET(cc, 1); /* Start subpattern */ + do ce += GET(ce, 1); while (*ce == OP_ALT); /* End subpattern */ + if (cc > cs && cc < ce) return -1; /* Recursion */ + d = find_fixedlength(cs + IMM2_SIZE, utf, atend, cd); if (d < 0) return d; branchlength += d; cc += 1 + LINK_SIZE; @@ -1655,7 +1771,8 @@ for (;;) case OP_ASSERTBACK: case OP_ASSERTBACK_NOT: do cc += GET(cc, 1); while (*cc == OP_ALT); - /* Fall through */ + cc += PRIV(OP_lengths)[*cc]; + break; /* Skip over things that don't match chars */ @@ -1663,7 +1780,7 @@ for (;;) case OP_PRUNE_ARG: case OP_SKIP_ARG: case OP_THEN_ARG: - cc += cc[1] + _pcre_OP_lengths[*cc]; + cc += cc[1] + PRIV(OP_lengths)[*cc]; break; case OP_CALLOUT: @@ -1690,7 +1807,7 @@ for (;;) case OP_SOM: case OP_THEN: case OP_WORD_BOUNDARY: - cc += _pcre_OP_lengths[*cc]; + cc += PRIV(OP_lengths)[*cc]; break; /* Handle literal characters */ @@ -1701,8 +1818,8 @@ for (;;) case OP_NOTI: branchlength++; cc += 2; -#ifdef SUPPORT_UTF8 - if (utf8 && cc[-1] >= 0xc0) cc += _pcre_utf8_table4[cc[-1] & 0x3f]; +#ifdef SUPPORT_UTF + if (utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]); #endif break; @@ -1714,16 +1831,16 @@ for (;;) case OP_NOTEXACT: case OP_NOTEXACTI: branchlength += GET2(cc,1); - cc += 4; -#ifdef SUPPORT_UTF8 - if (utf8 && cc[-1] >= 0xc0) cc += _pcre_utf8_table4[cc[-1] & 0x3f]; + cc += 2 + IMM2_SIZE; +#ifdef SUPPORT_UTF + if (utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]); #endif break; case OP_TYPEEXACT: branchlength += GET2(cc,1); - if (cc[3] == OP_PROP || cc[3] == OP_NOTPROP) cc += 2; - cc += 4; + if (cc[1 + IMM2_SIZE] == OP_PROP || cc[1 + IMM2_SIZE] == OP_NOTPROP) cc += 2; + cc += 1 + IMM2_SIZE + 1; break; /* Handle single-char matchers */ @@ -1757,15 +1874,15 @@ for (;;) /* Check a class for variable quantification */ -#ifdef SUPPORT_UTF8 +#if defined SUPPORT_UTF || defined COMPILE_PCRE16 case OP_XCLASS: - cc += GET(cc, 1) - 33; + cc += GET(cc, 1) - PRIV(OP_lengths)[OP_CLASS]; /* Fall through */ #endif case OP_CLASS: case OP_NCLASS: - cc += 33; + cc += PRIV(OP_lengths)[OP_CLASS]; switch (*cc) { @@ -1779,9 +1896,9 @@ for (;;) case OP_CRRANGE: case OP_CRMINRANGE: - if (GET2(cc,1) != GET2(cc,3)) return -1; + if (GET2(cc,1) != GET2(cc,1+IMM2_SIZE)) return -1; branchlength += GET2(cc,1); - cc += 5; + cc += 1 + 2 * IMM2_SIZE; break; default: @@ -1896,14 +2013,14 @@ length. Arguments: code points to start of expression - utf8 TRUE in UTF-8 mode + utf TRUE in UTF-8 / UTF-16 mode number the required bracket number or negative to find a lookbehind Returns: pointer to the opcode for the bracket, or NULL if not found */ -const uschar * -_pcre_find_bracket(const uschar *code, BOOL utf8, int number) +const pcre_uchar * +PRIV(find_bracket)(const pcre_uchar *code, BOOL utf, int number) { for (;;) { @@ -1921,8 +2038,8 @@ for (;;) else if (c == OP_REVERSE) { - if (number < 0) return (uschar *)code; - code += _pcre_OP_lengths[c]; + if (number < 0) return (pcre_uchar *)code; + code += PRIV(OP_lengths)[c]; } /* Handle capturing bracket */ @@ -1931,8 +2048,8 @@ for (;;) c == OP_CBRAPOS || c == OP_SCBRAPOS) { int n = GET2(code, 1+LINK_SIZE); - if (n == number) return (uschar *)code; - code += _pcre_OP_lengths[c]; + if (n == number) return (pcre_uchar *)code; + code += PRIV(OP_lengths)[c]; } /* Otherwise, we can get the item's length from the table, except that for @@ -1960,7 +2077,8 @@ for (;;) case OP_TYPEMINUPTO: case OP_TYPEEXACT: case OP_TYPEPOSUPTO: - if (code[3] == OP_PROP || code[3] == OP_NOTPROP) code += 2; + if (code[1 + IMM2_SIZE] == OP_PROP + || code[1 + IMM2_SIZE] == OP_NOTPROP) code += 2; break; case OP_MARK: @@ -1976,14 +2094,14 @@ for (;;) /* Add in the fixed length from the table */ - code += _pcre_OP_lengths[c]; + code += PRIV(OP_lengths)[c]; /* In UTF-8 mode, opcodes that are followed by a character may be followed by a multi-byte character. The length in the table is a minimum, so we have to arrange to skip the extra bytes. */ -#ifdef SUPPORT_UTF8 - if (utf8) switch(c) +#ifdef SUPPORT_UTF + if (utf) switch(c) { case OP_CHAR: case OP_CHARI: @@ -2013,11 +2131,11 @@ for (;;) case OP_MINQUERYI: case OP_POSQUERY: case OP_POSQUERYI: - if (code[-1] >= 0xc0) code += _pcre_utf8_table4[code[-1] & 0x3f]; + if (HAS_EXTRALEN(code[-1])) code += GET_EXTRALEN(code[-1]); break; } #else - (void)(utf8); /* Keep compiler happy by referencing function argument */ + (void)(utf); /* Keep compiler happy by referencing function argument */ #endif } } @@ -2034,13 +2152,13 @@ instance of OP_RECURSE. Arguments: code points to start of expression - utf8 TRUE in UTF-8 mode + utf TRUE in UTF-8 / UTF-16 mode Returns: pointer to the opcode for OP_RECURSE, or NULL if not found */ -static const uschar * -find_recurse(const uschar *code, BOOL utf8) +static const pcre_uchar * +find_recurse(const pcre_uchar *code, BOOL utf) { for (;;) { @@ -2079,7 +2197,8 @@ for (;;) case OP_TYPEUPTO: case OP_TYPEMINUPTO: case OP_TYPEEXACT: - if (code[3] == OP_PROP || code[3] == OP_NOTPROP) code += 2; + if (code[1 + IMM2_SIZE] == OP_PROP + || code[1 + IMM2_SIZE] == OP_NOTPROP) code += 2; break; case OP_MARK: @@ -2095,14 +2214,14 @@ for (;;) /* Add in the fixed length from the table */ - code += _pcre_OP_lengths[c]; + code += PRIV(OP_lengths)[c]; /* In UTF-8 mode, opcodes that are followed by a character may be followed by a multi-byte character. The length in the table is a minimum, so we have to arrange to skip the extra bytes. */ -#ifdef SUPPORT_UTF8 - if (utf8) switch(c) +#ifdef SUPPORT_UTF + if (utf) switch(c) { case OP_CHAR: case OP_CHARI: @@ -2132,11 +2251,11 @@ for (;;) case OP_MINQUERYI: case OP_POSQUERY: case OP_POSQUERYI: - if (code[-1] >= 0xc0) code += _pcre_utf8_table4[code[-1] & 0x3f]; + if (HAS_EXTRALEN(code[-1])) code += GET_EXTRALEN(code[-1]); break; } #else - (void)(utf8); /* Keep compiler happy by referencing function argument */ + (void)(utf); /* Keep compiler happy by referencing function argument */ #endif } } @@ -2159,22 +2278,22 @@ bracket whose current branch will already have been scanned. Arguments: code points to start of search endcode points to where to stop - utf8 TRUE if in UTF8 mode + utf TRUE if in UTF-8 / UTF-16 mode cd contains pointers to tables etc. Returns: TRUE if what is matched could be empty */ static BOOL -could_be_empty_branch(const uschar *code, const uschar *endcode, BOOL utf8, - compile_data *cd) +could_be_empty_branch(const pcre_uchar *code, const pcre_uchar *endcode, + BOOL utf, compile_data *cd) { register int c; -for (code = first_significant_code(code + _pcre_OP_lengths[*code], TRUE); +for (code = first_significant_code(code + PRIV(OP_lengths)[*code], TRUE); code < endcode; - code = first_significant_code(code + _pcre_OP_lengths[c], TRUE)) + code = first_significant_code(code + PRIV(OP_lengths)[c], TRUE)) { - const uschar *ccode; + const pcre_uchar *ccode; c = *code; @@ -2197,7 +2316,7 @@ for (code = first_significant_code(code + _pcre_OP_lengths[*code], TRUE); if (c == OP_RECURSE) { - const uschar *scode; + const pcre_uchar *scode; BOOL empty_branch; /* Test for forward reference */ @@ -2215,7 +2334,7 @@ for (code = first_significant_code(code + _pcre_OP_lengths[*code], TRUE); do { - if (could_be_empty_branch(scode, endcode, utf8, cd)) + if (could_be_empty_branch(scode, endcode, utf, cd)) { empty_branch = TRUE; break; @@ -2233,7 +2352,7 @@ for (code = first_significant_code(code + _pcre_OP_lengths[*code], TRUE); if (c == OP_BRAZERO || c == OP_BRAMINZERO || c == OP_SKIPZERO || c == OP_BRAPOSZERO) { - code += _pcre_OP_lengths[c]; + code += PRIV(OP_lengths)[c]; do code += GET(code, 1); while (*code == OP_ALT); c = *code; continue; @@ -2271,7 +2390,7 @@ for (code = first_significant_code(code + _pcre_OP_lengths[*code], TRUE); empty_branch = FALSE; do { - if (!empty_branch && could_be_empty_branch(code, endcode, utf8, cd)) + if (!empty_branch && could_be_empty_branch(code, endcode, utf, cd)) empty_branch = TRUE; code += GET(code, 1); } @@ -2289,11 +2408,11 @@ for (code = first_significant_code(code + _pcre_OP_lengths[*code], TRUE); { /* Check for quantifiers after a class. XCLASS is used for classes that cannot be represented just by a bit map. This includes negated single - high-valued characters. The length in _pcre_OP_lengths[] is zero; the + high-valued characters. The length in PRIV(OP_lengths)[] is zero; the actual length is stored in the compiled code, so we must update "code" here. */ -#ifdef SUPPORT_UTF8 +#if defined SUPPORT_UTF || !defined COMPILE_PCRE8 case OP_XCLASS: ccode = code += GET(code, 1); goto CHECK_CLASS_REPEAT; @@ -2301,9 +2420,9 @@ for (code = first_significant_code(code + _pcre_OP_lengths[*code], TRUE); case OP_CLASS: case OP_NCLASS: - ccode = code + 33; + ccode = code + PRIV(OP_lengths)[OP_CLASS]; -#ifdef SUPPORT_UTF8 +#if defined SUPPORT_UTF || !defined COMPILE_PCRE8 CHECK_CLASS_REPEAT: #endif @@ -2376,7 +2495,8 @@ for (code = first_significant_code(code + _pcre_OP_lengths[*code], TRUE); case OP_TYPEUPTO: case OP_TYPEMINUPTO: case OP_TYPEPOSUPTO: - if (code[3] == OP_PROP || code[3] == OP_NOTPROP) code += 2; + if (code[1 + IMM2_SIZE] == OP_PROP + || code[1 + IMM2_SIZE] == OP_NOTPROP) code += 2; break; /* End of branch */ @@ -2391,7 +2511,7 @@ for (code = first_significant_code(code + _pcre_OP_lengths[*code], TRUE); /* In UTF-8 mode, STAR, MINSTAR, POSSTAR, QUERY, MINQUERY, POSQUERY, UPTO, MINUPTO, and POSUPTO may be followed by a multibyte character */ -#ifdef SUPPORT_UTF8 +#ifdef SUPPORT_UTF case OP_STAR: case OP_STARI: case OP_MINSTAR: @@ -2404,7 +2524,7 @@ for (code = first_significant_code(code + _pcre_OP_lengths[*code], TRUE); case OP_MINQUERYI: case OP_POSQUERY: case OP_POSQUERYI: - if (utf8 && code[1] >= 0xc0) code += _pcre_utf8_table4[code[1] & 0x3f]; + if (utf && HAS_EXTRALEN(code[1])) code += GET_EXTRALEN(code[1]); break; case OP_UPTO: @@ -2413,7 +2533,7 @@ for (code = first_significant_code(code + _pcre_OP_lengths[*code], TRUE); case OP_MINUPTOI: case OP_POSUPTO: case OP_POSUPTOI: - if (utf8 && code[3] >= 0xc0) code += _pcre_utf8_table4[code[3] & 0x3f]; + if (utf && HAS_EXTRALEN(code[1 + IMM2_SIZE])) code += GET_EXTRALEN(code[1 + IMM2_SIZE]); break; #endif @@ -2457,19 +2577,19 @@ Arguments: code points to start of the recursion endcode points to where to stop (current RECURSE item) bcptr points to the chain of current (unclosed) branch starts - utf8 TRUE if in UTF-8 mode + utf TRUE if in UTF-8 / UTF-16 mode cd pointers to tables etc Returns: TRUE if what is matched could be empty */ static BOOL -could_be_empty(const uschar *code, const uschar *endcode, branch_chain *bcptr, - BOOL utf8, compile_data *cd) +could_be_empty(const pcre_uchar *code, const pcre_uchar *endcode, + branch_chain *bcptr, BOOL utf, compile_data *cd) { while (bcptr != NULL && bcptr->current_branch >= code) { - if (!could_be_empty_branch(bcptr->current_branch, endcode, utf8, cd)) + if (!could_be_empty_branch(bcptr->current_branch, endcode, utf, cd)) return FALSE; bcptr = bcptr->outer; } @@ -2521,7 +2641,7 @@ Returns: TRUE or FALSE */ static BOOL -check_posix_syntax(const uschar *ptr, const uschar **endptr) +check_posix_syntax(const pcre_uchar *ptr, const pcre_uchar **endptr) { int terminator; /* Don't combine these lines; the Solaris cc */ terminator = *(++ptr); /* compiler warns about "non-constant" initializer. */ @@ -2565,14 +2685,14 @@ Returns: a value representing the name, or -1 if unknown */ static int -check_posix_name(const uschar *ptr, int len) +check_posix_name(const pcre_uchar *ptr, int len) { const char *pn = posix_names; register int yield = 0; while (posix_name_lengths[yield] != 0) { if (len == posix_name_lengths[yield] && - strncmp((const char *)ptr, pn, len) == 0) return yield; + STRNCMP_UC_C8(ptr, pn, len) == 0) return yield; pn += posix_name_lengths[yield] + 1; yield++; } @@ -2604,7 +2724,7 @@ value in the reference (which is a group number). Arguments: group points to the start of the group adjust the amount by which the group is to be moved - utf8 TRUE in UTF-8 mode + utf TRUE in UTF-8 / UTF-16 mode cd contains pointers to tables etc. save_hwm the hwm forward reference pointer at the start of the group @@ -2612,15 +2732,15 @@ Returns: nothing */ static void -adjust_recurse(uschar *group, int adjust, BOOL utf8, compile_data *cd, - uschar *save_hwm) +adjust_recurse(pcre_uchar *group, int adjust, BOOL utf, compile_data *cd, + pcre_uchar *save_hwm) { -uschar *ptr = group; +pcre_uchar *ptr = group; -while ((ptr = (uschar *)find_recurse(ptr, utf8)) != NULL) +while ((ptr = (pcre_uchar *)find_recurse(ptr, utf)) != NULL) { int offset; - uschar *hc; + pcre_uchar *hc; /* See if this recursion is on the forward reference list. If so, adjust the reference. */ @@ -2665,14 +2785,14 @@ Arguments: Returns: new code pointer */ -static uschar * -auto_callout(uschar *code, const uschar *ptr, compile_data *cd) +static pcre_uchar * +auto_callout(pcre_uchar *code, const pcre_uchar *ptr, compile_data *cd) { *code++ = OP_CALLOUT; *code++ = 255; PUT(code, 0, (int)(ptr - cd->start_pattern)); /* Pattern offset */ PUT(code, LINK_SIZE, 0); /* Default length */ -return code + 2*LINK_SIZE; +return code + 2 * LINK_SIZE; } @@ -2694,7 +2814,7 @@ Returns: nothing */ static void -complete_callout(uschar *previous_callout, const uschar *ptr, compile_data *cd) +complete_callout(pcre_uchar *previous_callout, const pcre_uchar *ptr, compile_data *cd) { int length = (int)(ptr - cd->start_pattern - GET(previous_callout, 2)); PUT(previous_callout, 2 + LINK_SIZE, length); @@ -2777,7 +2897,7 @@ switch(ptype) prop->chartype == ucp_Lt) == negated; case PT_GC: - return (pdata == _pcre_ucp_gentype[prop->chartype]) == negated; + return (pdata == PRIV(ucp_gentype)[prop->chartype]) == negated; case PT_PC: return (pdata == prop->chartype) == negated; @@ -2788,23 +2908,23 @@ switch(ptype) /* These are specials */ case PT_ALNUM: - return (_pcre_ucp_gentype[prop->chartype] == ucp_L || - _pcre_ucp_gentype[prop->chartype] == ucp_N) == negated; + return (PRIV(ucp_gentype)[prop->chartype] == ucp_L || + PRIV(ucp_gentype)[prop->chartype] == ucp_N) == negated; case PT_SPACE: /* Perl space */ - return (_pcre_ucp_gentype[prop->chartype] == ucp_Z || + return (PRIV(ucp_gentype)[prop->chartype] == ucp_Z || c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR) == negated; case PT_PXSPACE: /* POSIX space */ - return (_pcre_ucp_gentype[prop->chartype] == ucp_Z || + return (PRIV(ucp_gentype)[prop->chartype] == ucp_Z || c == CHAR_HT || c == CHAR_NL || c == CHAR_VT || c == CHAR_FF || c == CHAR_CR) == negated; case PT_WORD: - return (_pcre_ucp_gentype[prop->chartype] == ucp_L || - _pcre_ucp_gentype[prop->chartype] == ucp_N || + return (PRIV(ucp_gentype)[prop->chartype] == ucp_L || + PRIV(ucp_gentype)[prop->chartype] == ucp_N || c == CHAR_UNDERSCORE) == negated; } return FALSE; @@ -2823,7 +2943,7 @@ sense to automatically possessify the repeated item. Arguments: previous pointer to the repeated opcode - utf8 TRUE in UTF-8 mode + utf TRUE in UTF-8 / UTF-16 mode ptr next character in pattern options options bits cd contains pointers to tables etc. @@ -2832,10 +2952,10 @@ Returns: TRUE if possessifying is wanted */ static BOOL -check_auto_possessive(const uschar *previous, BOOL utf8, const uschar *ptr, - int options, compile_data *cd) +check_auto_possessive(const pcre_uchar *previous, BOOL utf, + const pcre_uchar *ptr, int options, compile_data *cd) { -int c, next; +pcre_int32 c, next; int op_code = *previous++; /* Skip whitespace and comments in extended mode */ @@ -2844,7 +2964,7 @@ if ((options & PCRE_EXTENDED) != 0) { for (;;) { - while ((cd->ctypes[*ptr] & ctype_space) != 0) ptr++; + while (MAX_255(*ptr) && (cd->ctypes[*ptr] & ctype_space) != 0) ptr++; if (*ptr == CHAR_NUMBER_SIGN) { ptr++; @@ -2852,8 +2972,8 @@ if ((options & PCRE_EXTENDED) != 0) { if (IS_NEWLINE(ptr)) { ptr += cd->nllen; break; } ptr++; -#ifdef SUPPORT_UTF8 - if (utf8) while ((*ptr & 0xc0) == 0x80) ptr++; +#ifdef SUPPORT_UTF + if (utf) FORWARDCHAR(ptr); #endif } } @@ -2871,15 +2991,13 @@ if (*ptr == CHAR_BACKSLASH) if (temperrorcode != 0) return FALSE; ptr++; /* Point after the escape sequence */ } - -else if ((cd->ctypes[*ptr] & ctype_meta) == 0) +else if (!MAX_255(*ptr) || (cd->ctypes[*ptr] & ctype_meta) == 0) { -#ifdef SUPPORT_UTF8 - if (utf8) { GETCHARINC(next, ptr); } else +#ifdef SUPPORT_UTF + if (utf) { GETCHARINC(next, ptr); } else #endif next = *ptr++; } - else return FALSE; /* Skip whitespace and comments in extended mode */ @@ -2888,7 +3006,7 @@ if ((options & PCRE_EXTENDED) != 0) { for (;;) { - while ((cd->ctypes[*ptr] & ctype_space) != 0) ptr++; + while (MAX_255(*ptr) && (cd->ctypes[*ptr] & ctype_space) != 0) ptr++; if (*ptr == CHAR_NUMBER_SIGN) { ptr++; @@ -2896,8 +3014,8 @@ if ((options & PCRE_EXTENDED) != 0) { if (IS_NEWLINE(ptr)) { ptr += cd->nllen; break; } ptr++; -#ifdef SUPPORT_UTF8 - if (utf8) while ((*ptr & 0xc0) == 0x80) ptr++; +#ifdef SUPPORT_UTF + if (utf) FORWARDCHAR(ptr); #endif } } @@ -2908,7 +3026,7 @@ if ((options & PCRE_EXTENDED) != 0) /* If the next thing is itself optional, we have to give up. */ if (*ptr == CHAR_ASTERISK || *ptr == CHAR_QUESTION_MARK || - strncmp((char *)ptr, STR_LEFT_CURLY_BRACKET STR_0 STR_COMMA, 3) == 0) + STRNCMP_UC_C8(ptr, STR_LEFT_CURLY_BRACKET STR_0 STR_COMMA, 3) == 0) return FALSE; /* Now compare the next item with the previous opcode. First, handle cases when @@ -2917,7 +3035,7 @@ the next item is a character. */ if (next >= 0) switch(op_code) { case OP_CHAR: -#ifdef SUPPORT_UTF8 +#ifdef SUPPORT_UTF GETCHARTEST(c, previous); #else c = *previous; @@ -2929,14 +3047,14 @@ if (next >= 0) switch(op_code) high-valued characters. */ case OP_CHARI: -#ifdef SUPPORT_UTF8 +#ifdef SUPPORT_UTF GETCHARTEST(c, previous); #else c = *previous; #endif if (c == next) return FALSE; -#ifdef SUPPORT_UTF8 - if (utf8) +#ifdef SUPPORT_UTF + if (utf) { unsigned int othercase; if (next < 128) othercase = cd->fcc[next]; else @@ -2948,8 +3066,8 @@ if (next >= 0) switch(op_code) return (unsigned int)c != othercase; } else -#endif /* SUPPORT_UTF8 */ - return (c != cd->fcc[next]); /* Non-UTF-8 mode */ +#endif /* SUPPORT_UTF */ + return (c != TABLE_GET((unsigned int)next, cd->fcc, next)); /* Non-UTF-8 mode */ /* For OP_NOT and OP_NOTI, the data is always a single-byte character. These opcodes are not used for multi-byte characters, because they are coded using @@ -2960,8 +3078,8 @@ if (next >= 0) switch(op_code) case OP_NOTI: if ((c = *previous) == next) return TRUE; -#ifdef SUPPORT_UTF8 - if (utf8) +#ifdef SUPPORT_UTF + if (utf) { unsigned int othercase; if (next < 128) othercase = cd->fcc[next]; else @@ -2973,8 +3091,8 @@ if (next >= 0) switch(op_code) return (unsigned int)c == othercase; } else -#endif /* SUPPORT_UTF8 */ - return (c == cd->fcc[next]); /* Non-UTF-8 mode */ +#endif /* SUPPORT_UTF */ + return (c == (int)(TABLE_GET((unsigned int)next, cd->fcc, next))); /* Non-UTF-8 mode */ /* Note that OP_DIGIT etc. are generated only when PCRE_UCP is *not* set. When it is set, \d etc. are converted into OP_(NOT_)PROP codes. */ @@ -3065,7 +3183,7 @@ switch(op_code) { case OP_CHAR: case OP_CHARI: -#ifdef SUPPORT_UTF8 +#ifdef SUPPORT_UTF GETCHARTEST(c, previous); #else c = *previous; @@ -3170,7 +3288,7 @@ switch(op_code) to the original \d etc. At this point, ptr will point to a zero byte. */ if (*ptr == CHAR_ASTERISK || *ptr == CHAR_QUESTION_MARK || - strncmp((char *)ptr, STR_LEFT_CURLY_BRACKET STR_0 STR_COMMA, 3) == 0) + STRNCMP_UC_C8(ptr, STR_LEFT_CURLY_BRACKET STR_0 STR_COMMA, 3) == 0) return FALSE; /* Do the property check. */ @@ -3248,8 +3366,8 @@ Arguments: codeptr points to the pointer to the current code point ptrptr points to the current pattern pointer errorcodeptr points to error code variable - firstbyteptr set to initial literal character, or < 0 (REQ_UNSET, REQ_NONE) - reqbyteptr set to the last literal character required, else < 0 + firstcharptr set to initial literal character, or < 0 (REQ_UNSET, REQ_NONE) + reqcharptr set to the last literal character required, else < 0 bcptr points to current branch chain cond_depth conditional nesting depth cd contains pointers to tables etc. @@ -3261,47 +3379,54 @@ Returns: TRUE on success */ static BOOL -compile_branch(int *optionsptr, uschar **codeptr, const uschar **ptrptr, - int *errorcodeptr, int *firstbyteptr, int *reqbyteptr, branch_chain *bcptr, - int cond_depth, compile_data *cd, int *lengthptr) +compile_branch(int *optionsptr, pcre_uchar **codeptr, + const pcre_uchar **ptrptr, int *errorcodeptr, pcre_int32 *firstcharptr, + pcre_int32 *reqcharptr, branch_chain *bcptr, int cond_depth, + compile_data *cd, int *lengthptr) { int repeat_type, op_type; int repeat_min = 0, repeat_max = 0; /* To please picky compilers */ int bravalue = 0; int greedy_default, greedy_non_default; -int firstbyte, reqbyte; -int zeroreqbyte, zerofirstbyte; -int req_caseopt, reqvary, tempreqvary; +pcre_int32 firstchar, reqchar; +pcre_int32 zeroreqchar, zerofirstchar; +pcre_int32 req_caseopt, reqvary, tempreqvary; int options = *optionsptr; /* May change dynamically */ int after_manual_callout = 0; int length_prevgroup = 0; register int c; -register uschar *code = *codeptr; -uschar *last_code = code; -uschar *orig_code = code; -uschar *tempcode; +register pcre_uchar *code = *codeptr; +pcre_uchar *last_code = code; +pcre_uchar *orig_code = code; +pcre_uchar *tempcode; BOOL inescq = FALSE; -BOOL groupsetfirstbyte = FALSE; -const uschar *ptr = *ptrptr; -const uschar *tempptr; -const uschar *nestptr = NULL; -uschar *previous = NULL; -uschar *previous_callout = NULL; -uschar *save_hwm = NULL; -uschar classbits[32]; +BOOL groupsetfirstchar = FALSE; +const pcre_uchar *ptr = *ptrptr; +const pcre_uchar *tempptr; +const pcre_uchar *nestptr = NULL; +pcre_uchar *previous = NULL; +pcre_uchar *previous_callout = NULL; +pcre_uchar *save_hwm = NULL; +pcre_uint8 classbits[32]; /* We can fish out the UTF-8 setting once and for all into a BOOL, but we must not do this for other options (e.g. PCRE_EXTENDED) because they may change dynamically as we process the pattern. */ -#ifdef SUPPORT_UTF8 -BOOL class_utf8; -BOOL utf8 = (options & PCRE_UTF8) != 0; -uschar *class_utf8data; -uschar *class_utf8data_base; -uschar utf8_char[6]; +#ifdef SUPPORT_UTF +/* PCRE_UTF16 has the same value as PCRE_UTF8. */ +BOOL utf = (options & PCRE_UTF8) != 0; +pcre_uchar utf_chars[6]; #else -BOOL utf8 = FALSE; +BOOL utf = FALSE; +#endif + +/* Helper variables for OP_XCLASS opcode (for characters > 255). */ + +#if defined SUPPORT_UTF || !defined COMPILE_PCRE8 +BOOL xclass; +pcre_uchar *class_uchardata; +pcre_uchar *class_uchardata_base; #endif #ifdef PCRE_DEBUG @@ -3315,22 +3440,23 @@ greedy_non_default = greedy_default ^ 1; /* Initialize no first byte, no required byte. REQ_UNSET means "no char matching encountered yet". It gets changed to REQ_NONE if we hit something that -matches a non-fixed char first char; reqbyte just remains unset if we never +matches a non-fixed char first char; reqchar just remains unset if we never find one. When we hit a repeat whose minimum is zero, we may have to adjust these values to take the zero repeat into account. This is implemented by setting them to -zerofirstbyte and zeroreqbyte when such a repeat is encountered. The individual +zerofirstbyte and zeroreqchar when such a repeat is encountered. The individual item types that can be repeated set these backoff variables appropriately. */ -firstbyte = reqbyte = zerofirstbyte = zeroreqbyte = REQ_UNSET; +firstchar = reqchar = zerofirstchar = zeroreqchar = REQ_UNSET; -/* The variable req_caseopt contains either the REQ_CASELESS value or zero, -according to the current setting of the caseless flag. REQ_CASELESS is a bit -value > 255. It is added into the firstbyte or reqbyte variables to record the -case status of the value. This is used only for ASCII characters. */ +/* The variable req_caseopt contains either the REQ_CASELESS value +or zero, according to the current setting of the caseless flag. The +REQ_CASELESS leaves the lower 28 bit empty. It is added into the +firstchar or reqchar variables to record the case status of the +value. This is used only for ASCII characters. */ -req_caseopt = ((options & PCRE_CASELESS) != 0)? REQ_CASELESS : 0; +req_caseopt = ((options & PCRE_CASELESS) != 0)? REQ_CASELESS:0; /* Switch on next character until the end of the branch */ @@ -3342,20 +3468,20 @@ for (;; ptr++) BOOL is_quantifier; BOOL is_recurse; BOOL reset_bracount; - int class_charcount; - int class_lastchar; + int class_has_8bitchar; + int class_single_char; int newoptions; int recno; int refsign; int skipbytes; - int subreqbyte; - int subfirstbyte; + int subreqchar; + int subfirstchar; int terminator; int mclength; int tempbracount; - uschar mcbuffer[8]; + pcre_uchar mcbuffer[8]; - /* Get next byte in the pattern */ + /* Get next character in the pattern */ c = *ptr; @@ -3401,8 +3527,8 @@ for (;; ptr++) } *lengthptr += (int)(code - last_code); - DPRINTF(("length=%d added %d c=%c\n", *lengthptr, (int)(code - last_code), - c)); + DPRINTF(("length=%d added %d c=%c (0x%x)\n", *lengthptr, + (int)(code - last_code), c, c)); /* If "previous" is set and it is not at the start of the work space, move it back to there, in order to avoid filling up the work space. Otherwise, @@ -3412,7 +3538,7 @@ for (;; ptr++) { if (previous > orig_code) { - memmove(orig_code, previous, code - previous); + memmove(orig_code, previous, IN_UCHARS(code - previous)); code -= previous - orig_code; previous = orig_code; } @@ -3481,7 +3607,7 @@ for (;; ptr++) if ((options & PCRE_EXTENDED) != 0) { - if ((cd->ctypes[c] & ctype_space) != 0) continue; + if (MAX_255(*ptr) && (cd->ctypes[c] & ctype_space) != 0) continue; if (c == CHAR_NUMBER_SIGN) { ptr++; @@ -3489,8 +3615,8 @@ for (;; ptr++) { if (IS_NEWLINE(ptr)) { ptr += cd->nllen - 1; break; } ptr++; -#ifdef SUPPORT_UTF8 - if (utf8) while ((*ptr & 0xc0) == 0x80) ptr++; +#ifdef SUPPORT_UTF + if (utf) FORWARDCHAR(ptr); #endif } if (*ptr != 0) continue; @@ -3514,8 +3640,8 @@ for (;; ptr++) case 0: /* The branch terminates at string end */ case CHAR_VERTICAL_LINE: /* or | or ) */ case CHAR_RIGHT_PARENTHESIS: - *firstbyteptr = firstbyte; - *reqbyteptr = reqbyte; + *firstcharptr = firstchar; + *reqcharptr = reqchar; *codeptr = code; *ptrptr = ptr; if (lengthptr != NULL) @@ -3539,7 +3665,7 @@ for (;; ptr++) previous = NULL; if ((options & PCRE_MULTILINE) != 0) { - if (firstbyte == REQ_UNSET) firstbyte = REQ_NONE; + if (firstchar == REQ_UNSET) firstchar = REQ_NONE; *code++ = OP_CIRCM; } else *code++ = OP_CIRC; @@ -3551,12 +3677,12 @@ for (;; ptr++) break; /* There can never be a first char if '.' is first, whatever happens about - repeats. The value of reqbyte doesn't change either. */ + repeats. The value of reqchar doesn't change either. */ case CHAR_DOT: - if (firstbyte == REQ_UNSET) firstbyte = REQ_NONE; - zerofirstbyte = firstbyte; - zeroreqbyte = reqbyte; + if (firstchar == REQ_UNSET) firstchar = REQ_NONE; + zerofirstchar = firstchar; + zeroreqchar = reqchar; previous = code; *code++ = ((options & PCRE_DOTALL) != 0)? OP_ALLANY: OP_ANY; break; @@ -3611,8 +3737,7 @@ for (;; ptr++) { if (ptr[1] == CHAR_E) ptr++; - else if (strncmp((const char *)ptr+1, - STR_Q STR_BACKSLASH STR_E, 3) == 0) + else if (STRNCMP_UC_C8(ptr + 1, STR_Q STR_BACKSLASH STR_E, 3) == 0) ptr += 3; else break; @@ -3631,8 +3756,8 @@ for (;; ptr++) (cd->external_options & PCRE_JAVASCRIPT_COMPAT) != 0) { *code++ = negate_class? OP_ALLANY : OP_FAIL; - if (firstbyte == REQ_UNSET) firstbyte = REQ_NONE; - zerofirstbyte = firstbyte; + if (firstchar == REQ_UNSET) firstchar = REQ_NONE; + zerofirstchar = firstchar; break; } @@ -3642,24 +3767,25 @@ for (;; ptr++) should_flip_negation = FALSE; - /* Keep a count of chars with values < 256 so that we can optimize the case - of just a single character (as long as it's < 256). However, For higher - valued UTF-8 characters, we don't yet do any optimization. */ + /* For optimization purposes, we track some properties of the class. + class_has_8bitchar will be non-zero, if the class contains at least one + < 256 character. class_single_char will be 1 if the class contains only + a single character. */ - class_charcount = 0; - class_lastchar = -1; + class_has_8bitchar = 0; + class_single_char = 0; /* Initialize the 32-char bit map to all zeros. We build the map in a temporary bit of memory, in case the class contains only 1 character (less than 256), because in that case the compiled code doesn't use the bit map. */ - memset(classbits, 0, 32 * sizeof(uschar)); + memset(classbits, 0, 32 * sizeof(pcre_uint8)); -#ifdef SUPPORT_UTF8 - class_utf8 = FALSE; /* No chars >= 256 */ - class_utf8data = code + LINK_SIZE + 2; /* For UTF-8 items */ - class_utf8data_base = class_utf8data; /* For resetting in pass 1 */ +#if defined SUPPORT_UTF || !defined COMPILE_PCRE8 + xclass = FALSE; /* No chars >= 256 */ + class_uchardata = code + LINK_SIZE + 2; /* For UTF-8 items */ + class_uchardata_base = class_uchardata; /* For resetting in pass 1 */ #endif /* Process characters until ] is reached. By writing this as a "do" it @@ -3668,25 +3794,26 @@ for (;; ptr++) if (c != 0) do { - const uschar *oldptr; + const pcre_uchar *oldptr; -#ifdef SUPPORT_UTF8 - if (utf8 && c > 127) +#ifdef SUPPORT_UTF + if (utf && HAS_EXTRALEN(c)) { /* Braces are required because the */ GETCHARLEN(c, ptr, ptr); /* macro generates multiple statements */ } +#endif - /* In the pre-compile phase, accumulate the length of any UTF-8 extra +#if defined SUPPORT_UTF || !defined COMPILE_PCRE8 + /* In the pre-compile phase, accumulate the length of any extra data and reset the pointer. This is so that very large classes that - contain a zillion UTF-8 characters no longer overwrite the work space + contain a zillion > 255 characters no longer overwrite the work space (which is on the stack). */ if (lengthptr != NULL) { - *lengthptr += (int)(class_utf8data - class_utf8data_base); - class_utf8data = class_utf8data_base; + *lengthptr += class_uchardata - class_uchardata_base; + class_uchardata = class_uchardata_base; } - #endif /* Inside \Q...\E everything is literal except \E */ @@ -3714,8 +3841,8 @@ for (;; ptr++) { BOOL local_negate = FALSE; int posix_class, taboffset, tabopt; - register const uschar *cbits = cd->cbits; - uschar pbits[32]; + register const pcre_uint8 *cbits = cd->cbits; + pcre_uint8 pbits[32]; if (ptr[1] != CHAR_COLON) { @@ -3770,7 +3897,7 @@ for (;; ptr++) /* Copy in the first table (always present) */ memcpy(pbits, cbits + posix_class_maps[posix_class], - 32 * sizeof(uschar)); + 32 * sizeof(pcre_uint8)); /* If there is a second table, add or remove it as required. */ @@ -3801,16 +3928,20 @@ for (;; ptr++) for (c = 0; c < 32; c++) classbits[c] |= pbits[c]; ptr = tempptr + 1; - class_charcount = 10; /* Set > 1; assumes more than 1 per class */ + /* Every class contains at least one < 256 characters. */ + class_has_8bitchar = 1; + /* Every class contains at least two characters. */ + class_single_char = 2; continue; /* End of POSIX syntax handling */ } /* Backslash may introduce a single character, or it may introduce one of the specials, which just set a flag. The sequence \b is a special case. Inside a class (and only there) it is treated as backspace. We - assume that other escapes have more than one character in them, so set - class_charcount bigger than one. Unrecognized escapes fall through and - are either treated as literal characters (by default), or are faulted if + assume that other escapes have more than one character in them, so + speculatively set both class_has_8bitchar and class_single_char bigger + than one. Unrecognized escapes fall through and are either treated + as literal characters (by default), or are faulted if PCRE_EXTRA is set. */ if (c == CHAR_BACKSLASH) @@ -3837,8 +3968,11 @@ for (;; ptr++) if (c < 0) { - register const uschar *cbits = cd->cbits; - class_charcount += 2; /* Greater than 1 is what matters */ + register const pcre_uint8 *cbits = cd->cbits; + /* Every class contains at least two < 256 characters. */ + class_has_8bitchar++; + /* Every class contains at least two characters. */ + class_single_char += 2; switch (-c) { @@ -3851,7 +3985,7 @@ for (;; ptr++) case ESC_SU: nestptr = ptr; ptr = substitutes[-c - ESC_DU] - 1; /* Just before substitute */ - class_charcount -= 2; /* Undo! */ + class_has_8bitchar--; /* Undo! */ continue; #endif case ESC_d: @@ -3892,23 +4026,38 @@ for (;; ptr++) SETBIT(classbits, 0x09); /* VT */ SETBIT(classbits, 0x20); /* SPACE */ SETBIT(classbits, 0xa0); /* NSBP */ -#ifdef SUPPORT_UTF8 - if (utf8) +#ifndef COMPILE_PCRE8 + xclass = TRUE; + *class_uchardata++ = XCL_SINGLE; + *class_uchardata++ = 0x1680; + *class_uchardata++ = XCL_SINGLE; + *class_uchardata++ = 0x180e; + *class_uchardata++ = XCL_RANGE; + *class_uchardata++ = 0x2000; + *class_uchardata++ = 0x200a; + *class_uchardata++ = XCL_SINGLE; + *class_uchardata++ = 0x202f; + *class_uchardata++ = XCL_SINGLE; + *class_uchardata++ = 0x205f; + *class_uchardata++ = XCL_SINGLE; + *class_uchardata++ = 0x3000; +#elif defined SUPPORT_UTF + if (utf) { - class_utf8 = TRUE; - *class_utf8data++ = XCL_SINGLE; - class_utf8data += _pcre_ord2utf8(0x1680, class_utf8data); - *class_utf8data++ = XCL_SINGLE; - class_utf8data += _pcre_ord2utf8(0x180e, class_utf8data); - *class_utf8data++ = XCL_RANGE; - class_utf8data += _pcre_ord2utf8(0x2000, class_utf8data); - class_utf8data += _pcre_ord2utf8(0x200A, class_utf8data); - *class_utf8data++ = XCL_SINGLE; - class_utf8data += _pcre_ord2utf8(0x202f, class_utf8data); - *class_utf8data++ = XCL_SINGLE; - class_utf8data += _pcre_ord2utf8(0x205f, class_utf8data); - *class_utf8data++ = XCL_SINGLE; - class_utf8data += _pcre_ord2utf8(0x3000, class_utf8data); + xclass = TRUE; + *class_uchardata++ = XCL_SINGLE; + class_uchardata += PRIV(ord2utf)(0x1680, class_uchardata); + *class_uchardata++ = XCL_SINGLE; + class_uchardata += PRIV(ord2utf)(0x180e, class_uchardata); + *class_uchardata++ = XCL_RANGE; + class_uchardata += PRIV(ord2utf)(0x2000, class_uchardata); + class_uchardata += PRIV(ord2utf)(0x200a, class_uchardata); + *class_uchardata++ = XCL_SINGLE; + class_uchardata += PRIV(ord2utf)(0x202f, class_uchardata); + *class_uchardata++ = XCL_SINGLE; + class_uchardata += PRIV(ord2utf)(0x205f, class_uchardata); + *class_uchardata++ = XCL_SINGLE; + class_uchardata += PRIV(ord2utf)(0x3000, class_uchardata); } #endif continue; @@ -3926,32 +4075,59 @@ for (;; ptr++) } classbits[c] |= x; } - -#ifdef SUPPORT_UTF8 - if (utf8) +#ifndef COMPILE_PCRE8 + xclass = TRUE; + *class_uchardata++ = XCL_RANGE; + *class_uchardata++ = 0x0100; + *class_uchardata++ = 0x167f; + *class_uchardata++ = XCL_RANGE; + *class_uchardata++ = 0x1681; + *class_uchardata++ = 0x180d; + *class_uchardata++ = XCL_RANGE; + *class_uchardata++ = 0x180f; + *class_uchardata++ = 0x1fff; + *class_uchardata++ = XCL_RANGE; + *class_uchardata++ = 0x200b; + *class_uchardata++ = 0x202e; + *class_uchardata++ = XCL_RANGE; + *class_uchardata++ = 0x2030; + *class_uchardata++ = 0x205e; + *class_uchardata++ = XCL_RANGE; + *class_uchardata++ = 0x2060; + *class_uchardata++ = 0x2fff; + *class_uchardata++ = XCL_RANGE; + *class_uchardata++ = 0x3001; +#ifdef SUPPORT_UTF + if (utf) + class_uchardata += PRIV(ord2utf)(0x10ffff, class_uchardata); + else +#endif + *class_uchardata++ = 0xffff; +#elif defined SUPPORT_UTF + if (utf) { - class_utf8 = TRUE; - *class_utf8data++ = XCL_RANGE; - class_utf8data += _pcre_ord2utf8(0x0100, class_utf8data); - class_utf8data += _pcre_ord2utf8(0x167f, class_utf8data); - *class_utf8data++ = XCL_RANGE; - class_utf8data += _pcre_ord2utf8(0x1681, class_utf8data); - class_utf8data += _pcre_ord2utf8(0x180d, class_utf8data); - *class_utf8data++ = XCL_RANGE; - class_utf8data += _pcre_ord2utf8(0x180f, class_utf8data); - class_utf8data += _pcre_ord2utf8(0x1fff, class_utf8data); - *class_utf8data++ = XCL_RANGE; - class_utf8data += _pcre_ord2utf8(0x200B, class_utf8data); - class_utf8data += _pcre_ord2utf8(0x202e, class_utf8data); - *class_utf8data++ = XCL_RANGE; - class_utf8data += _pcre_ord2utf8(0x2030, class_utf8data); - class_utf8data += _pcre_ord2utf8(0x205e, class_utf8data); - *class_utf8data++ = XCL_RANGE; - class_utf8data += _pcre_ord2utf8(0x2060, class_utf8data); - class_utf8data += _pcre_ord2utf8(0x2fff, class_utf8data); - *class_utf8data++ = XCL_RANGE; - class_utf8data += _pcre_ord2utf8(0x3001, class_utf8data); - class_utf8data += _pcre_ord2utf8(0x7fffffff, class_utf8data); + xclass = TRUE; + *class_uchardata++ = XCL_RANGE; + class_uchardata += PRIV(ord2utf)(0x0100, class_uchardata); + class_uchardata += PRIV(ord2utf)(0x167f, class_uchardata); + *class_uchardata++ = XCL_RANGE; + class_uchardata += PRIV(ord2utf)(0x1681, class_uchardata); + class_uchardata += PRIV(ord2utf)(0x180d, class_uchardata); + *class_uchardata++ = XCL_RANGE; + class_uchardata += PRIV(ord2utf)(0x180f, class_uchardata); + class_uchardata += PRIV(ord2utf)(0x1fff, class_uchardata); + *class_uchardata++ = XCL_RANGE; + class_uchardata += PRIV(ord2utf)(0x200b, class_uchardata); + class_uchardata += PRIV(ord2utf)(0x202e, class_uchardata); + *class_uchardata++ = XCL_RANGE; + class_uchardata += PRIV(ord2utf)(0x2030, class_uchardata); + class_uchardata += PRIV(ord2utf)(0x205e, class_uchardata); + *class_uchardata++ = XCL_RANGE; + class_uchardata += PRIV(ord2utf)(0x2060, class_uchardata); + class_uchardata += PRIV(ord2utf)(0x2fff, class_uchardata); + *class_uchardata++ = XCL_RANGE; + class_uchardata += PRIV(ord2utf)(0x3001, class_uchardata); + class_uchardata += PRIV(ord2utf)(0x10ffff, class_uchardata); } #endif continue; @@ -3962,13 +4138,18 @@ for (;; ptr++) SETBIT(classbits, 0x0c); /* FF */ SETBIT(classbits, 0x0d); /* CR */ SETBIT(classbits, 0x85); /* NEL */ -#ifdef SUPPORT_UTF8 - if (utf8) +#ifndef COMPILE_PCRE8 + xclass = TRUE; + *class_uchardata++ = XCL_RANGE; + *class_uchardata++ = 0x2028; + *class_uchardata++ = 0x2029; +#elif defined SUPPORT_UTF + if (utf) { - class_utf8 = TRUE; - *class_utf8data++ = XCL_RANGE; - class_utf8data += _pcre_ord2utf8(0x2028, class_utf8data); - class_utf8data += _pcre_ord2utf8(0x2029, class_utf8data); + xclass = TRUE; + *class_uchardata++ = XCL_RANGE; + class_uchardata += PRIV(ord2utf)(0x2028, class_uchardata); + class_uchardata += PRIV(ord2utf)(0x2029, class_uchardata); } #endif continue; @@ -3990,16 +4171,29 @@ for (;; ptr++) classbits[c] |= x; } -#ifdef SUPPORT_UTF8 - if (utf8) +#ifndef COMPILE_PCRE8 + xclass = TRUE; + *class_uchardata++ = XCL_RANGE; + *class_uchardata++ = 0x0100; + *class_uchardata++ = 0x2027; + *class_uchardata++ = XCL_RANGE; + *class_uchardata++ = 0x202a; +#ifdef SUPPORT_UTF + if (utf) + class_uchardata += PRIV(ord2utf)(0x10ffff, class_uchardata); + else +#endif + *class_uchardata++ = 0xffff; +#elif defined SUPPORT_UTF + if (utf) { - class_utf8 = TRUE; - *class_utf8data++ = XCL_RANGE; - class_utf8data += _pcre_ord2utf8(0x0100, class_utf8data); - class_utf8data += _pcre_ord2utf8(0x2027, class_utf8data); - *class_utf8data++ = XCL_RANGE; - class_utf8data += _pcre_ord2utf8(0x2029, class_utf8data); - class_utf8data += _pcre_ord2utf8(0x7fffffff, class_utf8data); + xclass = TRUE; + *class_uchardata++ = XCL_RANGE; + class_uchardata += PRIV(ord2utf)(0x0100, class_uchardata); + class_uchardata += PRIV(ord2utf)(0x2027, class_uchardata); + *class_uchardata++ = XCL_RANGE; + class_uchardata += PRIV(ord2utf)(0x202a, class_uchardata); + class_uchardata += PRIV(ord2utf)(0x10ffff, class_uchardata); } #endif continue; @@ -4012,12 +4206,12 @@ for (;; ptr++) int pdata; int ptype = get_ucp(&ptr, &negated, &pdata, errorcodeptr); if (ptype < 0) goto FAILED; - class_utf8 = TRUE; - *class_utf8data++ = ((-c == ESC_p) != negated)? + xclass = TRUE; + *class_uchardata++ = ((-c == ESC_p) != negated)? XCL_PROP : XCL_NOTPROP; - *class_utf8data++ = ptype; - *class_utf8data++ = pdata; - class_charcount -= 2; /* Not a < 256 character */ + *class_uchardata++ = ptype; + *class_uchardata++ = pdata; + class_has_8bitchar--; /* Undo! */ continue; } #endif @@ -4031,14 +4225,15 @@ for (;; ptr++) *errorcodeptr = ERR7; goto FAILED; } - class_charcount -= 2; /* Undo the default count from above */ - c = *ptr; /* Get the final character and fall through */ + class_has_8bitchar--; /* Undo the speculative increase. */ + class_single_char -= 2; /* Undo the speculative increase. */ + c = *ptr; /* Get the final character and fall through */ break; } } /* Fall through if we have a single character (c >= 0). This may be - greater than 256 in UTF-8 mode. */ + greater than 256. */ } /* End of backslash handling */ @@ -4086,8 +4281,8 @@ for (;; ptr++) goto LONE_SINGLE_CHARACTER; } -#ifdef SUPPORT_UTF8 - if (utf8) +#ifdef SUPPORT_UTF + if (utf) { /* Braces are required because the */ GETCHARLEN(d, ptr, ptr); /* macro generates multiple statements */ } @@ -4131,22 +4326,36 @@ for (;; ptr++) if (d == CHAR_CR || d == CHAR_NL) cd->external_flags |= PCRE_HASCRORLF; + /* Since we found a character range, single character optimizations + cannot be done anymore. */ + class_single_char = 2; + /* In UTF-8 mode, if the upper limit is > 255, or > 127 for caseless matching, we have to use an XCLASS with extra data items. Caseless matching for characters > 127 is available only if UCP support is available. */ -#ifdef SUPPORT_UTF8 - if (utf8 && (d > 255 || ((options & PCRE_CASELESS) != 0 && d > 127))) +#if defined SUPPORT_UTF && !(defined COMPILE_PCRE8) + if ((d > 255) || (utf && ((options & PCRE_CASELESS) != 0 && d > 127))) +#elif defined SUPPORT_UTF + if (utf && (d > 255 || ((options & PCRE_CASELESS) != 0 && d > 127))) +#elif !(defined COMPILE_PCRE8) + if (d > 255) +#endif +#if defined SUPPORT_UTF || !(defined COMPILE_PCRE8) { - class_utf8 = TRUE; + xclass = TRUE; /* With UCP support, we can find the other case equivalents of the relevant characters. There may be several ranges. Optimize how they fit with the basic range. */ #ifdef SUPPORT_UCP +#ifndef COMPILE_PCRE8 + if (utf && (options & PCRE_CASELESS) != 0) +#else if ((options & PCRE_CASELESS) != 0) +#endif { unsigned int occ, ocd; unsigned int cc = c; @@ -4172,14 +4381,14 @@ for (;; ptr++) if (occ == ocd) { - *class_utf8data++ = XCL_SINGLE; + *class_uchardata++ = XCL_SINGLE; } else { - *class_utf8data++ = XCL_RANGE; - class_utf8data += _pcre_ord2utf8(occ, class_utf8data); + *class_uchardata++ = XCL_RANGE; + class_uchardata += PRIV(ord2utf)(occ, class_uchardata); } - class_utf8data += _pcre_ord2utf8(ocd, class_utf8data); + class_uchardata += PRIV(ord2utf)(ocd, class_uchardata); } } #endif /* SUPPORT_UCP */ @@ -4187,33 +4396,69 @@ for (;; ptr++) /* Now record the original range, possibly modified for UCP caseless overlapping ranges. */ - *class_utf8data++ = XCL_RANGE; - class_utf8data += _pcre_ord2utf8(c, class_utf8data); - class_utf8data += _pcre_ord2utf8(d, class_utf8data); + *class_uchardata++ = XCL_RANGE; +#ifdef SUPPORT_UTF +#ifndef COMPILE_PCRE8 + if (utf) + { + class_uchardata += PRIV(ord2utf)(c, class_uchardata); + class_uchardata += PRIV(ord2utf)(d, class_uchardata); + } + else + { + *class_uchardata++ = c; + *class_uchardata++ = d; + } +#else + class_uchardata += PRIV(ord2utf)(c, class_uchardata); + class_uchardata += PRIV(ord2utf)(d, class_uchardata); +#endif +#else /* SUPPORT_UTF */ + *class_uchardata++ = c; + *class_uchardata++ = d; +#endif /* SUPPORT_UTF */ /* With UCP support, we are done. Without UCP support, there is no - caseless matching for UTF-8 characters > 127; we can use the bit map - for the smaller ones. */ + caseless matching for UTF characters > 127; we can use the bit map + for the smaller ones. As for 16 bit characters without UTF, we + can still use */ #ifdef SUPPORT_UCP - continue; /* With next character in the class */ -#else - if ((options & PCRE_CASELESS) == 0 || c > 127) continue; - - /* Adjust upper limit and fall through to set up the map */ - - d = 127; - +#ifndef COMPILE_PCRE8 + if (utf) +#endif + continue; /* With next character in the class */ #endif /* SUPPORT_UCP */ + +#if defined SUPPORT_UTF && !defined(SUPPORT_UCP) && !(defined COMPILE_PCRE8) + if (utf) + { + if ((options & PCRE_CASELESS) == 0 || c > 127) continue; + /* Adjust upper limit and fall through to set up the map */ + d = 127; + } + else + { + if (c > 255) continue; + /* Adjust upper limit and fall through to set up the map */ + d = 255; + } +#elif defined SUPPORT_UTF && !defined(SUPPORT_UCP) + if ((options & PCRE_CASELESS) == 0 || c > 127) continue; + /* Adjust upper limit and fall through to set up the map */ + d = 127; +#else + if (c > 255) continue; + /* Adjust upper limit and fall through to set up the map */ + d = 255; +#endif /* SUPPORT_UTF && !SUPPORT_UCP && !COMPILE_PCRE8 */ } -#endif /* SUPPORT_UTF8 */ +#endif /* SUPPORT_UTF || !COMPILE_PCRE8 */ - /* We use the bit map for all cases when not in UTF-8 mode; else - ranges that lie entirely within 0-127 when there is UCP support; else - for partial ranges without UCP support. */ + /* We use the bit map for 8 bit mode, or when the characters fall + partially or entirely to [0-255] ([0-127] for UCP) ranges. */ - class_charcount += d - c + 1; - class_lastchar = d; + class_has_8bitchar = 1; /* We can save a bit of time by skipping this in the pre-compile. */ @@ -4222,7 +4467,7 @@ for (;; ptr++) classbits[c/8] |= (1 << (c&7)); if ((options & PCRE_CASELESS) != 0) { - int uc = cd->fcc[c]; /* flip case */ + int uc = cd->fcc[c]; /* flip case */ classbits[uc/8] |= (1 << (uc&7)); } } @@ -4236,41 +4481,117 @@ for (;; ptr++) LONE_SINGLE_CHARACTER: - /* Handle a character that cannot go in the bit map */ + /* Only the value of 1 matters for class_single_char. */ + if (class_single_char < 2) class_single_char++; -#ifdef SUPPORT_UTF8 - if (utf8 && (c > 255 || ((options & PCRE_CASELESS) != 0 && c > 127))) + /* If class_charcount is 1, we saw precisely one character. As long as + there were no negated characters >= 128 and there was no use of \p or \P, + in other words, no use of any XCLASS features, we can optimize. + + In UTF-8 mode, we can optimize the negative case only if there were no + characters >= 128 because OP_NOT and the related opcodes like OP_NOTSTAR + operate on single-bytes characters only. This is an historical hangover. + Maybe one day we can tidy these opcodes to handle multi-byte characters. + + The optimization throws away the bit map. We turn the item into a + 1-character OP_CHAR[I] if it's positive, or OP_NOT[I] if it's negative. + Note that OP_NOT[I] does not support multibyte characters. In the positive + case, it can cause firstchar to be set. Otherwise, there can be no first + char if this item is first, whatever repeat count may follow. In the case + of reqchar, save the previous value for reinstating. */ + +#ifdef SUPPORT_UTF + if (class_single_char == 1 && ptr[1] == CHAR_RIGHT_SQUARE_BRACKET + && (!utf || !negate_class || c < (MAX_VALUE_FOR_SINGLE_CHAR + 1))) +#else + if (class_single_char == 1 && ptr[1] == CHAR_RIGHT_SQUARE_BRACKET) +#endif { - class_utf8 = TRUE; - *class_utf8data++ = XCL_SINGLE; - class_utf8data += _pcre_ord2utf8(c, class_utf8data); + ptr++; + zeroreqchar = reqchar; + + /* The OP_NOT[I] opcodes work on single characters only. */ + + if (negate_class) + { + if (firstchar == REQ_UNSET) firstchar = REQ_NONE; + zerofirstchar = firstchar; + *code++ = ((options & PCRE_CASELESS) != 0)? OP_NOTI: OP_NOT; + *code++ = c; + goto NOT_CHAR; + } + + /* For a single, positive character, get the value into mcbuffer, and + then we can handle this with the normal one-character code. */ + +#ifdef SUPPORT_UTF + if (utf && c > MAX_VALUE_FOR_SINGLE_CHAR) + mclength = PRIV(ord2utf)(c, mcbuffer); + else +#endif + { + mcbuffer[0] = c; + mclength = 1; + } + goto ONE_CHAR; + } /* End of 1-char optimization */ + + /* Handle a character that cannot go in the bit map. */ + +#if defined SUPPORT_UTF && !(defined COMPILE_PCRE8) + if ((c > 255) || (utf && ((options & PCRE_CASELESS) != 0 && c > 127))) +#elif defined SUPPORT_UTF + if (utf && (c > 255 || ((options & PCRE_CASELESS) != 0 && c > 127))) +#elif !(defined COMPILE_PCRE8) + if (c > 255) +#endif + +#if defined SUPPORT_UTF || !(defined COMPILE_PCRE8) + { + xclass = TRUE; + *class_uchardata++ = XCL_SINGLE; +#ifdef SUPPORT_UTF +#ifndef COMPILE_PCRE8 + /* In non 8 bit mode, we can get here even if we are not in UTF mode. */ + if (!utf) + *class_uchardata++ = c; + else +#endif + class_uchardata += PRIV(ord2utf)(c, class_uchardata); +#else /* SUPPORT_UTF */ + *class_uchardata++ = c; +#endif /* SUPPORT_UTF */ #ifdef SUPPORT_UCP +#ifdef COMPILE_PCRE8 if ((options & PCRE_CASELESS) != 0) +#else + /* In non 8 bit mode, we can get here even if we are not in UTF mode. */ + if (utf && (options & PCRE_CASELESS) != 0) +#endif { unsigned int othercase; - if ((othercase = UCD_OTHERCASE(c)) != c) + if ((int)(othercase = UCD_OTHERCASE(c)) != c) { - *class_utf8data++ = XCL_SINGLE; - class_utf8data += _pcre_ord2utf8(othercase, class_utf8data); + *class_uchardata++ = XCL_SINGLE; + class_uchardata += PRIV(ord2utf)(othercase, class_uchardata); } } #endif /* SUPPORT_UCP */ } else -#endif /* SUPPORT_UTF8 */ +#endif /* SUPPORT_UTF || COMPILE_PCRE16 */ /* Handle a single-byte character */ { + class_has_8bitchar = 1; classbits[c/8] |= (1 << (c&7)); if ((options & PCRE_CASELESS) != 0) { - c = cd->fcc[c]; /* flip case */ + c = cd->fcc[c]; /* flip case */ classbits[c/8] |= (1 << (c&7)); } - class_charcount++; - class_lastchar = c; } } @@ -4291,66 +4612,13 @@ for (;; ptr++) goto FAILED; } - /* If class_charcount is 1, we saw precisely one character whose value is - less than 256. As long as there were no characters >= 128 and there was no - use of \p or \P, in other words, no use of any XCLASS features, we can - optimize. + /* If this is the first thing in the branch, there can be no first char + setting, whatever the repeat count. Any reqchar setting must remain + unchanged after any kind of repeat. */ - In UTF-8 mode, we can optimize the negative case only if there were no - characters >= 128 because OP_NOT and the related opcodes like OP_NOTSTAR - operate on single-bytes characters only. This is an historical hangover. - Maybe one day we can tidy these opcodes to handle multi-byte characters. - - The optimization throws away the bit map. We turn the item into a - 1-character OP_CHAR[I] if it's positive, or OP_NOT[I] if it's negative. - Note that OP_NOT[I] does not support multibyte characters. In the positive - case, it can cause firstbyte to be set. Otherwise, there can be no first - char if this item is first, whatever repeat count may follow. In the case - of reqbyte, save the previous value for reinstating. */ - -#ifdef SUPPORT_UTF8 - if (class_charcount == 1 && !class_utf8 && - (!utf8 || !negate_class || class_lastchar < 128)) -#else - if (class_charcount == 1) -#endif - { - zeroreqbyte = reqbyte; - - /* The OP_NOT[I] opcodes work on one-byte characters only. */ - - if (negate_class) - { - if (firstbyte == REQ_UNSET) firstbyte = REQ_NONE; - zerofirstbyte = firstbyte; - *code++ = ((options & PCRE_CASELESS) != 0)? OP_NOTI: OP_NOT; - *code++ = class_lastchar; - break; - } - - /* For a single, positive character, get the value into mcbuffer, and - then we can handle this with the normal one-character code. */ - -#ifdef SUPPORT_UTF8 - if (utf8 && class_lastchar > 127) - mclength = _pcre_ord2utf8(class_lastchar, mcbuffer); - else -#endif - { - mcbuffer[0] = class_lastchar; - mclength = 1; - } - goto ONE_CHAR; - } /* End of 1-char optimization */ - - /* The general case - not the one-char optimization. If this is the first - thing in the branch, there can be no first char setting, whatever the - repeat count. Any reqbyte setting must remain unchanged after any kind of - repeat. */ - - if (firstbyte == REQ_UNSET) firstbyte = REQ_NONE; - zerofirstbyte = firstbyte; - zeroreqbyte = reqbyte; + if (firstchar == REQ_UNSET) firstchar = REQ_NONE; + zerofirstchar = firstchar; + zeroreqchar = reqchar; /* If there are characters with values > 255, we have to compile an extended class, with its own opcode, unless there was a negated special @@ -4360,25 +4628,30 @@ for (;; ptr++) be listed) there are no characters < 256, we can omit the bitmap in the actual compiled code. */ -#ifdef SUPPORT_UTF8 - if (class_utf8 && (!should_flip_negation || (options & PCRE_UCP) != 0)) +#ifdef SUPPORT_UTF + if (xclass && (!should_flip_negation || (options & PCRE_UCP) != 0)) +#elif !defined COMPILE_PCRE8 + if (xclass && !should_flip_negation) +#endif +#if defined SUPPORT_UTF || !defined COMPILE_PCRE8 { - *class_utf8data++ = XCL_END; /* Marks the end of extra data */ + *class_uchardata++ = XCL_END; /* Marks the end of extra data */ *code++ = OP_XCLASS; code += LINK_SIZE; - *code = negate_class? XCL_NOT : 0; + *code = negate_class? XCL_NOT:0; /* If the map is required, move up the extra data to make room for it; otherwise just move the code pointer to the end of the extra data. */ - if (class_charcount > 0) + if (class_has_8bitchar > 0) { *code++ |= XCL_MAP; - memmove(code + 32, code, class_utf8data - code); + memmove(code + (32 / sizeof(pcre_uchar)), code, + IN_UCHARS(class_uchardata - code)); memcpy(code, classbits, 32); - code = class_utf8data + 32; + code = class_uchardata + (32 / sizeof(pcre_uchar)); } - else code = class_utf8data; + else code = class_uchardata; /* Now fill in the complete length of the item */ @@ -4394,16 +4667,14 @@ for (;; ptr++) negating it if necessary. */ *code++ = (negate_class == should_flip_negation) ? OP_CLASS : OP_NCLASS; - if (negate_class) - { - if (lengthptr == NULL) /* Save time in the pre-compile phase */ - for (c = 0; c < 32; c++) code[c] = ~classbits[c]; - } - else + if (lengthptr == NULL) /* Save time in the pre-compile phase */ { + if (negate_class) + for (c = 0; c < 32; c++) classbits[c] = ~classbits[c]; memcpy(code, classbits, 32); } - code += 32; + code += 32 / sizeof(pcre_uchar); + NOT_CHAR: break; @@ -4440,8 +4711,8 @@ for (;; ptr++) if (repeat_min == 0) { - firstbyte = zerofirstbyte; /* Adjust for zero repeat */ - reqbyte = zeroreqbyte; /* Ditto */ + firstchar = zerofirstchar; /* Adjust for zero repeat */ + reqchar = zeroreqchar; /* Ditto */ } /* Remember whether this is a variable length repeat */ @@ -4483,7 +4754,7 @@ for (;; ptr++) if (*previous == OP_RECURSE) { - memmove(previous + 1 + LINK_SIZE, previous, 1 + LINK_SIZE); + memmove(previous + 1 + LINK_SIZE, previous, IN_UCHARS(1 + LINK_SIZE)); *previous = OP_ONCE; PUT(previous, 1, 2 + 2*LINK_SIZE); previous[2 + 2*LINK_SIZE] = OP_KET; @@ -4506,37 +4777,36 @@ for (;; ptr++) /* If previous was a character match, abolish the item and generate a repeat item instead. If a char item has a minumum of more than one, ensure - that it is set in reqbyte - it might not be if a sequence such as x{3} is - the first thing in a branch because the x will have gone into firstbyte + that it is set in reqchar - it might not be if a sequence such as x{3} is + the first thing in a branch because the x will have gone into firstchar instead. */ if (*previous == OP_CHAR || *previous == OP_CHARI) { op_type = (*previous == OP_CHAR)? 0 : OP_STARI - OP_STAR; - /* Deal with UTF-8 characters that take up more than one byte. It's + /* Deal with UTF characters that take up more than one character. It's easier to write this out separately than try to macrify it. Use c to - hold the length of the character in bytes, plus 0x80 to flag that it's a - length rather than a small character. */ + hold the length of the character in bytes, plus UTF_LENGTH to flag that + it's a length rather than a small character. */ -#ifdef SUPPORT_UTF8 - if (utf8 && (code[-1] & 0x80) != 0) +#ifdef SUPPORT_UTF + if (utf && NOT_FIRSTCHAR(code[-1])) { - uschar *lastchar = code - 1; - while((*lastchar & 0xc0) == 0x80) lastchar--; + pcre_uchar *lastchar = code - 1; + BACKCHAR(lastchar); c = (int)(code - lastchar); /* Length of UTF-8 character */ - memcpy(utf8_char, lastchar, c); /* Save the char */ - c |= 0x80; /* Flag c as a length */ + memcpy(utf_chars, lastchar, IN_UCHARS(c)); /* Save the char */ + c |= UTF_LENGTH; /* Flag c as a length */ } else -#endif - - /* Handle the case of a single byte - either with no UTF8 support, or - with UTF-8 disabled, or for a UTF-8 character < 128. */ +#endif /* SUPPORT_UTF */ + /* Handle the case of a single charater - either with no UTF support, or + with UTF disabled, or for a single character UTF character. */ { c = code[-1]; - if (repeat_min > 1) reqbyte = c | req_caseopt | cd->req_varyopt; + if (repeat_min > 1) reqchar = c | req_caseopt | cd->req_varyopt; } /* If the repetition is unlimited, it pays to see if the next thing on @@ -4546,7 +4816,7 @@ for (;; ptr++) if (!possessive_quantifier && repeat_max < 0 && - check_auto_possessive(previous, utf8, ptr + 1, options, cd)) + check_auto_possessive(previous, utf, ptr + 1, options, cd)) { repeat_type = 0; /* Force greedy */ possessive_quantifier = TRUE; @@ -4567,7 +4837,7 @@ for (;; ptr++) c = previous[1]; if (!possessive_quantifier && repeat_max < 0 && - check_auto_possessive(previous, utf8, ptr + 1, options, cd)) + check_auto_possessive(previous, utf, ptr + 1, options, cd)) { repeat_type = 0; /* Force greedy */ possessive_quantifier = TRUE; @@ -4584,14 +4854,14 @@ for (;; ptr++) else if (*previous < OP_EODN) { - uschar *oldcode; + pcre_uchar *oldcode; int prop_type, prop_value; op_type = OP_TYPESTAR - OP_STAR; /* Use type opcodes */ c = *previous; if (!possessive_quantifier && repeat_max < 0 && - check_auto_possessive(previous, utf8, ptr + 1, options, cd)) + check_auto_possessive(previous, utf, ptr + 1, options, cd)) { repeat_type = 0; /* Force greedy */ possessive_quantifier = TRUE; @@ -4671,14 +4941,14 @@ for (;; ptr++) we have to insert the character for the previous code. For a repeated Unicode property match, there are two extra bytes that define the required property. In UTF-8 mode, long characters have their length in - c, with the 0x80 bit as a flag. */ + c, with the UTF_LENGTH bit as a flag. */ if (repeat_max < 0) { -#ifdef SUPPORT_UTF8 - if (utf8 && c >= 128) +#ifdef SUPPORT_UTF + if (utf && (c & UTF_LENGTH) != 0) { - memcpy(code, utf8_char, c & 7); + memcpy(code, utf_chars, IN_UCHARS(c & 7)); code += c & 7; } else @@ -4700,10 +4970,10 @@ for (;; ptr++) else if (repeat_max != repeat_min) { -#ifdef SUPPORT_UTF8 - if (utf8 && c >= 128) +#ifdef SUPPORT_UTF + if (utf && (c & UTF_LENGTH) != 0) { - memcpy(code, utf8_char, c & 7); + memcpy(code, utf_chars, IN_UCHARS(c & 7)); code += c & 7; } else @@ -4730,10 +5000,10 @@ for (;; ptr++) /* The character or character type itself comes last in all cases. */ -#ifdef SUPPORT_UTF8 - if (utf8 && c >= 128) +#ifdef SUPPORT_UTF + if (utf && (c & UTF_LENGTH) != 0) { - memcpy(code, utf8_char, c & 7); + memcpy(code, utf_chars, IN_UCHARS(c & 7)); code += c & 7; } else @@ -4757,7 +5027,7 @@ for (;; ptr++) else if (*previous == OP_CLASS || *previous == OP_NCLASS || -#ifdef SUPPORT_UTF8 +#if defined SUPPORT_UTF || !defined COMPILE_PCRE8 *previous == OP_XCLASS || #endif *previous == OP_REF || @@ -4806,8 +5076,8 @@ for (;; ptr++) { register int i; int len = (int)(code - previous); - uschar *bralink = NULL; - uschar *brazeroptr = NULL; + pcre_uchar *bralink = NULL; + pcre_uchar *brazeroptr = NULL; /* Repeating a DEFINE group is pointless, but Perl allows the syntax, so we just ignore the repeat. */ @@ -4860,8 +5130,8 @@ for (;; ptr++) if (repeat_max <= 1) /* Covers 0, 1, and unlimited */ { *code = OP_END; - adjust_recurse(previous, 1, utf8, cd, save_hwm); - memmove(previous+1, previous, len); + adjust_recurse(previous, 1, utf, cd, save_hwm); + memmove(previous + 1, previous, IN_UCHARS(len)); code++; if (repeat_max == 0) { @@ -4884,8 +5154,8 @@ for (;; ptr++) { int offset; *code = OP_END; - adjust_recurse(previous, 2 + LINK_SIZE, utf8, cd, save_hwm); - memmove(previous + 2 + LINK_SIZE, previous, len); + adjust_recurse(previous, 2 + LINK_SIZE, utf, cd, save_hwm); + memmove(previous + 2 + LINK_SIZE, previous, IN_UCHARS(len)); code += 2 + LINK_SIZE; *previous++ = OP_BRAZERO + repeat_type; *previous++ = OP_BRA; @@ -4938,13 +5208,13 @@ for (;; ptr++) else { - if (groupsetfirstbyte && reqbyte < 0) reqbyte = firstbyte; + if (groupsetfirstchar && reqchar < 0) reqchar = firstchar; for (i = 1; i < repeat_min; i++) { - uschar *hc; - uschar *this_hwm = cd->hwm; - memcpy(code, previous, len); + pcre_uchar *hc; + pcre_uchar *this_hwm = cd->hwm; + memcpy(code, previous, IN_UCHARS(len)); while (cd->hwm > cd->start_workspace + cd->workspace_size - WORK_SIZE_SAFETY_MARGIN - (this_hwm - save_hwm)) @@ -4953,8 +5223,8 @@ for (;; ptr++) int this_offset = this_hwm - cd->start_workspace; *errorcodeptr = expand_workspace(cd); if (*errorcodeptr != 0) goto FAILED; - save_hwm = (uschar *)cd->start_workspace + save_offset; - this_hwm = (uschar *)cd->start_workspace + this_offset; + save_hwm = (pcre_uchar *)cd->start_workspace + save_offset; + this_hwm = (pcre_uchar *)cd->start_workspace + this_offset; } for (hc = save_hwm; hc < this_hwm; hc += LINK_SIZE) @@ -5006,8 +5276,8 @@ for (;; ptr++) else for (i = repeat_max - 1; i >= 0; i--) { - uschar *hc; - uschar *this_hwm = cd->hwm; + pcre_uchar *hc; + pcre_uchar *this_hwm = cd->hwm; *code++ = OP_BRAZERO + repeat_type; @@ -5023,7 +5293,7 @@ for (;; ptr++) PUTINC(code, 0, offset); } - memcpy(code, previous, len); + memcpy(code, previous, IN_UCHARS(len)); /* Ensure there is enough workspace for forward references before copying them. */ @@ -5035,8 +5305,8 @@ for (;; ptr++) int this_offset = this_hwm - cd->start_workspace; *errorcodeptr = expand_workspace(cd); if (*errorcodeptr != 0) goto FAILED; - save_hwm = (uschar *)cd->start_workspace + save_offset; - this_hwm = (uschar *)cd->start_workspace + this_offset; + save_hwm = (pcre_uchar *)cd->start_workspace + save_offset; + this_hwm = (pcre_uchar *)cd->start_workspace + this_offset; } for (hc = save_hwm; hc < this_hwm; hc += LINK_SIZE) @@ -5055,7 +5325,7 @@ for (;; ptr++) { int oldlinkoffset; int offset = (int)(code - bralink + 1); - uschar *bra = code - offset; + pcre_uchar *bra = code - offset; oldlinkoffset = GET(bra, 1); bralink = (oldlinkoffset == 0)? NULL : bralink - oldlinkoffset; *code++ = OP_KET; @@ -5091,8 +5361,8 @@ for (;; ptr++) else { - uschar *ketcode = code - 1 - LINK_SIZE; - uschar *bracode = ketcode - GET(ketcode, 1); + pcre_uchar *ketcode = code - 1 - LINK_SIZE; + pcre_uchar *bracode = ketcode - GET(ketcode, 1); /* Convert possessive ONCE brackets to non-capturing */ @@ -5114,10 +5384,10 @@ for (;; ptr++) if (lengthptr == NULL) { - uschar *scode = bracode; + pcre_uchar *scode = bracode; do { - if (could_be_empty_branch(scode, ketcode, utf8, cd)) + if (could_be_empty_branch(scode, ketcode, utf, cd)) { *bracode += OP_SBRA - OP_BRA; break; @@ -5140,8 +5410,8 @@ for (;; ptr++) { int nlen = (int)(code - bracode); *code = OP_END; - adjust_recurse(bracode, 1 + LINK_SIZE, utf8, cd, save_hwm); - memmove(bracode + 1+LINK_SIZE, bracode, nlen); + adjust_recurse(bracode, 1 + LINK_SIZE, utf, cd, save_hwm); + memmove(bracode + 1 + LINK_SIZE, bracode, IN_UCHARS(nlen)); code += 1 + LINK_SIZE; nlen += 1 + LINK_SIZE; *bracode = OP_BRAPOS; @@ -5210,15 +5480,16 @@ for (;; ptr++) int len; if (*tempcode == OP_TYPEEXACT) - tempcode += _pcre_OP_lengths[*tempcode] + - ((tempcode[3] == OP_PROP || tempcode[3] == OP_NOTPROP)? 2 : 0); + tempcode += PRIV(OP_lengths)[*tempcode] + + ((tempcode[1 + IMM2_SIZE] == OP_PROP + || tempcode[1 + IMM2_SIZE] == OP_NOTPROP)? 2 : 0); else if (*tempcode == OP_EXACT || *tempcode == OP_NOTEXACT) { - tempcode += _pcre_OP_lengths[*tempcode]; -#ifdef SUPPORT_UTF8 - if (utf8 && tempcode[-1] >= 0xc0) - tempcode += _pcre_utf8_table4[tempcode[-1] & 0x3f]; + tempcode += PRIV(OP_lengths)[*tempcode]; +#ifdef SUPPORT_UTF + if (utf && HAS_EXTRALEN(tempcode[-1])) + tempcode += GET_EXTRALEN(tempcode[-1]); #endif } @@ -5255,8 +5526,8 @@ for (;; ptr++) default: *code = OP_END; - adjust_recurse(tempcode, 1 + LINK_SIZE, utf8, cd, save_hwm); - memmove(tempcode + 1+LINK_SIZE, tempcode, len); + adjust_recurse(tempcode, 1 + LINK_SIZE, utf, cd, save_hwm); + memmove(tempcode + 1 + LINK_SIZE, tempcode, IN_UCHARS(len)); code += 1 + LINK_SIZE; len += 1 + LINK_SIZE; tempcode[0] = OP_ONCE; @@ -5268,7 +5539,7 @@ for (;; ptr++) } /* In all case we no longer have a previous item. We also set the - "follows varying string" flag for subsequently encountered reqbytes if + "follows varying string" flag for subsequently encountered reqchars if it isn't already set and we have just passed a varying length item. */ END_REPEAT: @@ -5291,16 +5562,18 @@ for (;; ptr++) /* First deal with various "verbs" that can be introduced by '*'. */ - if (*(++ptr) == CHAR_ASTERISK && - ((cd->ctypes[ptr[1]] & ctype_letter) != 0 || ptr[1] == ':')) + ptr++; + if (ptr[0] == CHAR_ASTERISK && (ptr[1] == ':' + || (MAX_255(ptr[1]) && ((cd->ctypes[ptr[1]] & ctype_letter) != 0)))) { int i, namelen; int arglen = 0; const char *vn = verbnames; - const uschar *name = ptr + 1; - const uschar *arg = NULL; + const pcre_uchar *name = ptr + 1; + const pcre_uchar *arg = NULL; previous = NULL; - while ((cd->ctypes[*++ptr] & ctype_letter) != 0) {}; + ptr++; + while (MAX_255(*ptr) && (cd->ctypes[*ptr] & ctype_letter) != 0) ptr++; namelen = (int)(ptr - name); /* It appears that Perl allows any characters whatsoever, other than @@ -5325,7 +5598,7 @@ for (;; ptr++) for (i = 0; i < verbcount; i++) { if (namelen == verbs[i].len && - strncmp((char *)name, vn, namelen) == 0) + STRNCMP_UC_C8(name, vn, namelen) == 0) { /* Check for open captures before ACCEPT and convert it to ASSERT_ACCEPT if in an assertion. */ @@ -5346,8 +5619,8 @@ for (;; ptr++) } *code++ = (cd->assert_depth > 0)? OP_ASSERT_ACCEPT : OP_ACCEPT; - /* Do not set firstbyte after *ACCEPT */ - if (firstbyte == REQ_UNSET) firstbyte = REQ_NONE; + /* Do not set firstchar after *ACCEPT */ + if (firstchar == REQ_UNSET) firstchar = REQ_NONE; } /* Handle other cases with/without an argument */ @@ -5373,7 +5646,7 @@ for (;; ptr++) *code = verbs[i].op_arg; if (*code++ == OP_THEN_ARG) cd->external_flags |= PCRE_HASTHEN; *code++ = arglen; - memcpy(code, arg, arglen); + memcpy(code, arg, IN_UCHARS(arglen)); code += arglen; *code++ = 0; } @@ -5396,8 +5669,8 @@ for (;; ptr++) { int i, set, unset, namelen; int *optset; - const uschar *name; - uschar *slot; + const pcre_uchar *name; + pcre_uchar *slot; switch (*(++ptr)) { @@ -5450,10 +5723,10 @@ for (;; ptr++) break; /* Most other conditions use OP_CREF (a couple change to OP_RREF - below), and all need to skip 3 bytes at the start of the group. */ + below), and all need to skip 1+IMM2_SIZE bytes at the start of the group. */ code[1+LINK_SIZE] = OP_CREF; - skipbytes = 3; + skipbytes = 1+IMM2_SIZE; refsign = -1; /* Check for a test for recursion in a named group. */ @@ -5486,7 +5759,7 @@ for (;; ptr++) /* We now expect to read a name; any thing else is an error */ - if ((cd->ctypes[ptr[1]] & ctype_word) == 0) + if (!MAX_255(ptr[1]) || (cd->ctypes[ptr[1]] & ctype_word) == 0) { ptr += 1; /* To get the right offset */ *errorcodeptr = ERR28; @@ -5497,11 +5770,10 @@ for (;; ptr++) recno = 0; name = ++ptr; - while ((cd->ctypes[*ptr] & ctype_word) != 0) + while (MAX_255(*ptr) && (cd->ctypes[*ptr] & ctype_word) != 0) { if (recno >= 0) - recno = ((digitab[*ptr] & ctype_digit) != 0)? - recno * 10 + *ptr - CHAR_0 : -1; + recno = (IS_DIGIT(*ptr))? recno * 10 + *ptr - CHAR_0 : -1; ptr++; } namelen = (int)(ptr - name); @@ -5549,7 +5821,7 @@ for (;; ptr++) slot = cd->name_table; for (i = 0; i < cd->names_found; i++) { - if (strncmp((char *)name, (char *)slot+2, namelen) == 0) break; + if (STRNCMP_UC_UC(name, slot+IMM2_SIZE, namelen) == 0) break; slot += cd->name_entry_size; } @@ -5565,7 +5837,7 @@ for (;; ptr++) /* Search the pattern for a forward reference */ else if ((i = find_parens(cd, name, namelen, - (options & PCRE_EXTENDED) != 0, utf8)) > 0) + (options & PCRE_EXTENDED) != 0, utf)) > 0) { PUT2(code, 2+LINK_SIZE, i); code[1+LINK_SIZE]++; @@ -5591,7 +5863,7 @@ for (;; ptr++) recno = 0; for (i = 1; i < namelen; i++) { - if ((digitab[name[i]] & ctype_digit) == 0) + if (!IS_DIGIT(name[i])) { *errorcodeptr = ERR15; goto FAILED; @@ -5606,7 +5878,7 @@ for (;; ptr++) /* Similarly, check for the (?(DEFINE) "condition", which is always false. */ - else if (namelen == 6 && strncmp((char *)name, STRING_DEFINE, 6) == 0) + else if (namelen == 6 && STRNCMP_UC_C8(name, STRING_DEFINE, 6) == 0) { code[1+LINK_SIZE] = OP_DEF; skipbytes = 1; @@ -5669,7 +5941,8 @@ for (;; ptr++) break; default: /* Could be name define, else bad */ - if ((cd->ctypes[ptr[1]] & ctype_word) != 0) goto DEFINE_NAME; + if (MAX_255(ptr[1]) && (cd->ctypes[ptr[1]] & ctype_word) != 0) + goto DEFINE_NAME; ptr++; /* Correct offset for error */ *errorcodeptr = ERR24; goto FAILED; @@ -5691,8 +5964,9 @@ for (;; ptr++) *code++ = OP_CALLOUT; { int n = 0; - while ((digitab[*(++ptr)] & ctype_digit) != 0) - n = n * 10 + *ptr - CHAR_0; + ptr++; + while(IS_DIGIT(*ptr)) + n = n * 10 + *ptr++ - CHAR_0; if (*ptr != CHAR_RIGHT_PARENTHESIS) { *errorcodeptr = ERR39; @@ -5737,7 +6011,7 @@ for (;; ptr++) CHAR_GREATER_THAN_SIGN : CHAR_APOSTROPHE; name = ++ptr; - while ((cd->ctypes[*ptr] & ctype_word) != 0) ptr++; + while (MAX_255(*ptr) && (cd->ctypes[*ptr] & ctype_word) != 0) ptr++; namelen = (int)(ptr - name); /* In the pre-compile phase, just do a syntax check. */ @@ -5754,9 +6028,9 @@ for (;; ptr++) *errorcodeptr = ERR49; goto FAILED; } - if (namelen + 3 > cd->name_entry_size) + if (namelen + IMM2_SIZE + 1 > cd->name_entry_size) { - cd->name_entry_size = namelen + 3; + cd->name_entry_size = namelen + IMM2_SIZE + 1; if (namelen > MAX_NAME_SIZE) { *errorcodeptr = ERR48; @@ -5785,10 +6059,10 @@ for (;; ptr++) for (i = 0; i < cd->names_found; i++) { - int crc = memcmp(name, slot+2, namelen); + int crc = memcmp(name, slot+IMM2_SIZE, IN_UCHARS(namelen)); if (crc == 0) { - if (slot[2+namelen] == 0) + if (slot[IMM2_SIZE+namelen] == 0) { if (GET2(slot, 0) != cd->bracount + 1 && (options & PCRE_DUPNAMES) == 0) @@ -5809,7 +6083,7 @@ for (;; ptr++) if (crc < 0) { memmove(slot + cd->name_entry_size, slot, - (cd->names_found - i) * cd->name_entry_size); + IN_UCHARS((cd->names_found - i) * cd->name_entry_size)); break; } @@ -5823,7 +6097,7 @@ for (;; ptr++) if (!dupname) { - uschar *cslot = cd->name_table; + pcre_uchar *cslot = cd->name_table; for (i = 0; i < cd->names_found; i++) { if (cslot != slot) @@ -5840,8 +6114,8 @@ for (;; ptr++) } PUT2(slot, 0, cd->bracount + 1); - memcpy(slot + 2, name, namelen); - slot[2+namelen] = 0; + memcpy(slot + IMM2_SIZE, name, IN_UCHARS(namelen)); + slot[IMM2_SIZE + namelen] = 0; } } @@ -5867,7 +6141,7 @@ for (;; ptr++) NAMED_REF_OR_RECURSE: name = ++ptr; - while ((cd->ctypes[*ptr] & ctype_word) != 0) ptr++; + while (MAX_255(*ptr) && (cd->ctypes[*ptr] & ctype_word) != 0) ptr++; namelen = (int)(ptr - name); /* In the pre-compile phase, do a syntax check. We used to just set @@ -5879,7 +6153,7 @@ for (;; ptr++) if (lengthptr != NULL) { - const uschar *temp; + const pcre_uchar *temp; if (namelen == 0) { @@ -5909,7 +6183,7 @@ for (;; ptr++) temp = cd->end_pattern; cd->end_pattern = ptr; recno = find_parens(cd, name, namelen, - (options & PCRE_EXTENDED) != 0, utf8); + (options & PCRE_EXTENDED) != 0, utf); cd->end_pattern = temp; if (recno < 0) recno = 0; /* Forward ref; set dummy number */ } @@ -5924,8 +6198,8 @@ for (;; ptr++) slot = cd->name_table; for (i = 0; i < cd->names_found; i++) { - if (strncmp((char *)name, (char *)slot+2, namelen) == 0 && - slot[2+namelen] == 0) + if (STRNCMP_UC_UC(name, slot+IMM2_SIZE, namelen) == 0 && + slot[IMM2_SIZE+namelen] == 0) break; slot += cd->name_entry_size; } @@ -5936,7 +6210,7 @@ for (;; ptr++) } else if ((recno = /* Forward back reference */ find_parens(cd, name, namelen, - (options & PCRE_EXTENDED) != 0, utf8)) <= 0) + (options & PCRE_EXTENDED) != 0, utf)) <= 0) { *errorcodeptr = ERR15; goto FAILED; @@ -5961,7 +6235,7 @@ for (;; ptr++) case CHAR_0: case CHAR_1: case CHAR_2: case CHAR_3: case CHAR_4: case CHAR_5: case CHAR_6: case CHAR_7: case CHAR_8: case CHAR_9: { - const uschar *called; + const pcre_uchar *called; terminator = CHAR_RIGHT_PARENTHESIS; /* Come here from the \g<...> and \g'...' code (Oniguruma @@ -5975,7 +6249,7 @@ for (;; ptr++) if ((refsign = *ptr) == CHAR_PLUS) { ptr++; - if ((digitab[*ptr] & ctype_digit) == 0) + if (!IS_DIGIT(*ptr)) { *errorcodeptr = ERR63; goto FAILED; @@ -5983,13 +6257,13 @@ for (;; ptr++) } else if (refsign == CHAR_MINUS) { - if ((digitab[ptr[1]] & ctype_digit) == 0) + if (!IS_DIGIT(ptr[1])) goto OTHER_CHAR_AFTER_QUERY; ptr++; } recno = 0; - while((digitab[*ptr] & ctype_digit) != 0) + while(IS_DIGIT(*ptr)) recno = recno * 10 + *ptr++ - CHAR_0; if (*ptr != terminator) @@ -6040,14 +6314,14 @@ for (;; ptr++) { *code = OP_END; if (recno != 0) - called = _pcre_find_bracket(cd->start_code, utf8, recno); + called = PRIV(find_bracket)(cd->start_code, utf, recno); /* Forward reference */ if (called == NULL) { if (find_parens(cd, NULL, recno, - (options & PCRE_EXTENDED) != 0, utf8) < 0) + (options & PCRE_EXTENDED) != 0, utf) < 0) { *errorcodeptr = ERR15; goto FAILED; @@ -6077,7 +6351,7 @@ for (;; ptr++) conditional subpatterns will be picked up then. */ else if (GET(called, 1) == 0 && cond_depth <= 0 && - could_be_empty(called, code, bcptr, utf8, cd)) + could_be_empty(called, code, bcptr, utf, cd)) { *errorcodeptr = ERR40; goto FAILED; @@ -6085,18 +6359,18 @@ for (;; ptr++) } /* Insert the recursion/subroutine item. It does not have a set first - byte (relevant if it is repeated, because it will then be wrapped - with ONCE brackets). */ + character (relevant if it is repeated, because it will then be + wrapped with ONCE brackets). */ *code = OP_RECURSE; PUT(code, 1, (int)(called - cd->start_code)); code += 1 + LINK_SIZE; - groupsetfirstbyte = FALSE; + groupsetfirstchar = FALSE; } /* Can't determine a first byte now */ - if (firstbyte == REQ_UNSET) firstbyte = REQ_NONE; + if (firstchar == REQ_UNSET) firstchar = REQ_NONE; continue; @@ -6153,7 +6427,7 @@ for (;; ptr++) both phases. If we are not at the pattern start, reset the greedy defaults and the - case value for firstbyte and reqbyte. */ + case value for firstchar and reqchar. */ if (*ptr == CHAR_RIGHT_PARENTHESIS) { @@ -6166,7 +6440,7 @@ for (;; ptr++) { greedy_default = ((newoptions & PCRE_UNGREEDY) != 0); greedy_non_default = greedy_default ^ 1; - req_caseopt = ((newoptions & PCRE_CASELESS) != 0)? REQ_CASELESS : 0; + req_caseopt = ((newoptions & PCRE_CASELESS) != 0)? REQ_CASELESS:0; } /* Change options at this level, and pass them back for use @@ -6203,7 +6477,7 @@ for (;; ptr++) NUMBERED_GROUP: cd->bracount += 1; PUT2(code, 1+LINK_SIZE, cd->bracount); - skipbytes = 2; + skipbytes = IMM2_SIZE; } /* Process nested bracketed regex. Assertions used not to be repeatable, @@ -6229,8 +6503,8 @@ for (;; ptr++) skipbytes, /* Skip over bracket number */ cond_depth + ((bravalue == OP_COND)?1:0), /* Depth of condition subpatterns */ - &subfirstbyte, /* For possible first char */ - &subreqbyte, /* For possible last char */ + &subfirstchar, /* For possible first char */ + &subreqchar, /* For possible last char */ bcptr, /* Current branch chain */ cd, /* Tables block */ (lengthptr == NULL)? NULL : /* Actual compile phase */ @@ -6258,7 +6532,7 @@ for (;; ptr++) if (bravalue == OP_COND && lengthptr == NULL) { - uschar *tc = code; + pcre_uchar *tc = code; int condcount = 0; do { @@ -6281,7 +6555,7 @@ for (;; ptr++) } /* A "normal" conditional group. If there is just one branch, we must not - make use of its firstbyte or reqbyte, because this is equivalent to an + make use of its firstchar or reqchar, because this is equivalent to an empty second branch. */ else @@ -6291,7 +6565,7 @@ for (;; ptr++) *errorcodeptr = ERR27; goto FAILED; } - if (condcount == 1) subfirstbyte = subreqbyte = REQ_NONE; + if (condcount == 1) subfirstchar = subreqchar = REQ_NONE; } } @@ -6335,55 +6609,55 @@ for (;; ptr++) /* Handle updating of the required and first characters for other types of group. Update for normal brackets of all kinds, and conditions with two branches (see code above). If the bracket is followed by a quantifier with - zero repeat, we have to back off. Hence the definition of zeroreqbyte and - zerofirstbyte outside the main loop so that they can be accessed for the + zero repeat, we have to back off. Hence the definition of zeroreqchar and + zerofirstchar outside the main loop so that they can be accessed for the back off. */ - zeroreqbyte = reqbyte; - zerofirstbyte = firstbyte; - groupsetfirstbyte = FALSE; + zeroreqchar = reqchar; + zerofirstchar = firstchar; + groupsetfirstchar = FALSE; if (bravalue >= OP_ONCE) { - /* If we have not yet set a firstbyte in this branch, take it from the + /* If we have not yet set a firstchar in this branch, take it from the subpattern, remembering that it was set here so that a repeat of more - than one can replicate it as reqbyte if necessary. If the subpattern has - no firstbyte, set "none" for the whole branch. In both cases, a zero - repeat forces firstbyte to "none". */ + than one can replicate it as reqchar if necessary. If the subpattern has + no firstchar, set "none" for the whole branch. In both cases, a zero + repeat forces firstchar to "none". */ - if (firstbyte == REQ_UNSET) + if (firstchar == REQ_UNSET) { - if (subfirstbyte >= 0) + if (subfirstchar >= 0) { - firstbyte = subfirstbyte; - groupsetfirstbyte = TRUE; + firstchar = subfirstchar; + groupsetfirstchar = TRUE; } - else firstbyte = REQ_NONE; - zerofirstbyte = REQ_NONE; + else firstchar = REQ_NONE; + zerofirstchar = REQ_NONE; } - /* If firstbyte was previously set, convert the subpattern's firstbyte - into reqbyte if there wasn't one, using the vary flag that was in + /* If firstchar was previously set, convert the subpattern's firstchar + into reqchar if there wasn't one, using the vary flag that was in existence beforehand. */ - else if (subfirstbyte >= 0 && subreqbyte < 0) - subreqbyte = subfirstbyte | tempreqvary; + else if (subfirstchar >= 0 && subreqchar < 0) + subreqchar = subfirstchar | tempreqvary; /* If the subpattern set a required byte (or set a first byte that isn't really the first byte - see above), set it. */ - if (subreqbyte >= 0) reqbyte = subreqbyte; + if (subreqchar >= 0) reqchar = subreqchar; } - /* For a forward assertion, we take the reqbyte, if set. This can be + /* For a forward assertion, we take the reqchar, if set. This can be helpful if the pattern that follows the assertion doesn't set a different - char. For example, it's useful for /(?=abcde).+/. We can't set firstbyte + char. For example, it's useful for /(?=abcde).+/. We can't set firstchar for an assertion, however because it leads to incorrect effect for patterns - such as /(?=a)a.+/ when the "real" "a" would then become a reqbyte instead - of a firstbyte. This is overcome by a scan at the end if there's no - firstbyte, looking for an asserted first char. */ + such as /(?=a)a.+/ when the "real" "a" would then become a reqchar instead + of a firstchar. This is overcome by a scan at the end if there's no + firstchar, looking for an asserted first char. */ - else if (bravalue == OP_ASSERT && subreqbyte >= 0) reqbyte = subreqbyte; + else if (bravalue == OP_ASSERT && subreqchar >= 0) reqchar = subreqchar; break; /* End of processing '(' */ @@ -6416,13 +6690,13 @@ for (;; ptr++) /* For metasequences that actually match a character, we disable the setting of a first character if it hasn't already been set. */ - if (firstbyte == REQ_UNSET && -c > ESC_b && -c < ESC_Z) - firstbyte = REQ_NONE; + if (firstchar == REQ_UNSET && -c > ESC_b && -c < ESC_Z) + firstchar = REQ_NONE; /* Set values to reset to if this is followed by a zero repeat. */ - zerofirstbyte = firstbyte; - zeroreqbyte = reqbyte; + zerofirstchar = firstchar; + zeroreqchar = reqchar; /* \g or \g'name' is a subroutine call by name and \g or \g'n' is a subroutine call by number (Oniguruma syntax). In fact, the value @@ -6433,7 +6707,7 @@ for (;; ptr++) if (-c == ESC_g) { - const uschar *p; + const pcre_uchar *p; save_hwm = cd->hwm; /* Normally this is set when '(' is read */ terminator = (*(++ptr) == CHAR_LESS_THAN_SIGN)? CHAR_GREATER_THAN_SIGN : CHAR_APOSTROPHE; @@ -6450,10 +6724,11 @@ for (;; ptr++) if (ptr[1] != CHAR_PLUS && ptr[1] != CHAR_MINUS) { - BOOL isnumber = TRUE; + BOOL is_a_number = TRUE; for (p = ptr + 1; *p != 0 && *p != terminator; p++) { - if ((cd->ctypes[*p] & ctype_digit) == 0) isnumber = FALSE; + if (!MAX_255(*p)) { is_a_number = FALSE; break; } + if ((cd->ctypes[*p] & ctype_digit) == 0) is_a_number = FALSE; if ((cd->ctypes[*p] & ctype_word) == 0) break; } if (*p != terminator) @@ -6461,7 +6736,7 @@ for (;; ptr++) *errorcodeptr = ERR57; break; } - if (isnumber) + if (is_a_number) { ptr++; goto HANDLE_NUMERICAL_RECURSION; @@ -6473,7 +6748,7 @@ for (;; ptr++) /* Test a signed number in angle brackets or quotes. */ p = ptr + 2; - while ((digitab[*p] & ctype_digit) != 0) p++; + while (IS_DIGIT(*p)) p++; if (*p != terminator) { *errorcodeptr = ERR57; @@ -6501,7 +6776,7 @@ for (;; ptr++) goto NAMED_REF_OR_RECURSE; } - /* Back references are handled specially; must disable firstbyte if + /* Back references are handled specially; must disable firstchar if not set to cope with cases like (?=(\w+))\1: which would otherwise set ':' later. */ @@ -6511,7 +6786,7 @@ for (;; ptr++) recno = -c - ESC_REF; HANDLE_REFERENCE: /* Come here from named backref handling */ - if (firstbyte == REQ_UNSET) firstbyte = REQ_NONE; + if (firstchar == REQ_UNSET) firstchar = REQ_NONE; previous = code; *code++ = ((options & PCRE_CASELESS) != 0)? OP_REFI : OP_REF; PUT2INC(code, 0, recno); @@ -6578,7 +6853,7 @@ for (;; ptr++) { previous = (-c > ESC_b && -c < ESC_Z)? code : NULL; - *code++ = (!utf8 && c == -ESC_C)? OP_ALLANY : -c; + *code++ = (!utf && c == -ESC_C)? OP_ALLANY : -c; } } continue; @@ -6588,9 +6863,9 @@ for (;; ptr++) a value > 127. We set its representation in the length/buffer, and then handle it as a data character. */ -#ifdef SUPPORT_UTF8 - if (utf8 && c > 127) - mclength = _pcre_ord2utf8(c, mcbuffer); +#ifdef SUPPORT_UTF + if (utf && c > MAX_VALUE_FOR_SINGLE_CHAR) + mclength = PRIV(ord2utf)(c, mcbuffer); else #endif @@ -6611,12 +6886,9 @@ for (;; ptr++) mclength = 1; mcbuffer[0] = c; -#ifdef SUPPORT_UTF8 - if (utf8 && c >= 0xc0) - { - while ((ptr[1] & 0xc0) == 0x80) - mcbuffer[mclength++] = *(++ptr); - } +#ifdef SUPPORT_UTF + if (utf && HAS_EXTRALEN(c)) + ACROSSCHAR(TRUE, ptr[1], mcbuffer[mclength++] = *(++ptr)); #endif /* At this point we have the character's bytes in mcbuffer, and the length @@ -6634,34 +6906,34 @@ for (;; ptr++) /* Set the first and required bytes appropriately. If no previous first byte, set it from this character, but revert to none on a zero repeat. - Otherwise, leave the firstbyte value alone, and don't change it on a zero + Otherwise, leave the firstchar value alone, and don't change it on a zero repeat. */ - if (firstbyte == REQ_UNSET) + if (firstchar == REQ_UNSET) { - zerofirstbyte = REQ_NONE; - zeroreqbyte = reqbyte; + zerofirstchar = REQ_NONE; + zeroreqchar = reqchar; - /* If the character is more than one byte long, we can set firstbyte + /* If the character is more than one byte long, we can set firstchar only if it is not to be matched caselessly. */ if (mclength == 1 || req_caseopt == 0) { - firstbyte = mcbuffer[0] | req_caseopt; - if (mclength != 1) reqbyte = code[-1] | cd->req_varyopt; + firstchar = mcbuffer[0] | req_caseopt; + if (mclength != 1) reqchar = code[-1] | cd->req_varyopt; } - else firstbyte = reqbyte = REQ_NONE; + else firstchar = reqchar = REQ_NONE; } - /* firstbyte was previously set; we can set reqbyte only if the length is + /* firstchar was previously set; we can set reqchar only if the length is 1 or the matching is caseful. */ else { - zerofirstbyte = firstbyte; - zeroreqbyte = reqbyte; + zerofirstchar = firstchar; + zeroreqchar = reqchar; if (mclength == 1 || req_caseopt == 0) - reqbyte = code[-1] | req_caseopt | cd->req_varyopt; + reqchar = code[-1] | req_caseopt | cd->req_varyopt; } break; /* End of literal character handling */ @@ -6701,8 +6973,8 @@ Arguments: reset_bracount TRUE to reset the count for each branch skipbytes skip this many bytes at start (for brackets and OP_COND) cond_depth depth of nesting for conditional subpatterns - firstbyteptr place to put the first required character, or a negative number - reqbyteptr place to put the last required character, or a negative number + firstcharptr place to put the first required character, or a negative number + reqcharptr place to put the last required character, or a negative number bcptr pointer to the chain of currently open branches cd points to the data block with tables pointers etc. lengthptr NULL during the real compile phase @@ -6712,20 +6984,20 @@ Returns: TRUE on success */ static BOOL -compile_regex(int options, uschar **codeptr, const uschar **ptrptr, +compile_regex(int options, pcre_uchar **codeptr, const pcre_uchar **ptrptr, int *errorcodeptr, BOOL lookbehind, BOOL reset_bracount, int skipbytes, - int cond_depth, int *firstbyteptr, int *reqbyteptr, branch_chain *bcptr, - compile_data *cd, int *lengthptr) + int cond_depth, pcre_int32 *firstcharptr, pcre_int32 *reqcharptr, + branch_chain *bcptr, compile_data *cd, int *lengthptr) { -const uschar *ptr = *ptrptr; -uschar *code = *codeptr; -uschar *last_branch = code; -uschar *start_bracket = code; -uschar *reverse_count = NULL; +const pcre_uchar *ptr = *ptrptr; +pcre_uchar *code = *codeptr; +pcre_uchar *last_branch = code; +pcre_uchar *start_bracket = code; +pcre_uchar *reverse_count = NULL; open_capitem capitem; int capnumber = 0; -int firstbyte, reqbyte; -int branchfirstbyte, branchreqbyte; +pcre_int32 firstchar, reqchar; +pcre_int32 branchfirstchar, branchreqchar; int length; int orig_bracount; int max_bracount; @@ -6734,7 +7006,7 @@ branch_chain bc; bc.outer = bcptr; bc.current_branch = code; -firstbyte = reqbyte = REQ_UNSET; +firstchar = reqchar = REQ_UNSET; /* Accumulate the length for use in the pre-compile phase. Start with the length of the BRA and KET and any extra bytes that are required at the @@ -6793,8 +7065,8 @@ for (;;) /* Now compile the branch; in the pre-compile phase its length gets added into the length. */ - if (!compile_branch(&options, &code, &ptr, errorcodeptr, &branchfirstbyte, - &branchreqbyte, &bc, cond_depth, cd, + if (!compile_branch(&options, &code, &ptr, errorcodeptr, &branchfirstchar, + &branchreqchar, &bc, cond_depth, cd, (lengthptr == NULL)? NULL : &length)) { *ptrptr = ptr; @@ -6810,43 +7082,43 @@ for (;;) if (lengthptr == NULL) { - /* If this is the first branch, the firstbyte and reqbyte values for the + /* If this is the first branch, the firstchar and reqchar values for the branch become the values for the regex. */ if (*last_branch != OP_ALT) { - firstbyte = branchfirstbyte; - reqbyte = branchreqbyte; + firstchar = branchfirstchar; + reqchar = branchreqchar; } - /* If this is not the first branch, the first char and reqbyte have to + /* If this is not the first branch, the first char and reqchar have to match the values from all the previous branches, except that if the - previous value for reqbyte didn't have REQ_VARY set, it can still match, + previous value for reqchar didn't have REQ_VARY set, it can still match, and we set REQ_VARY for the regex. */ else { - /* If we previously had a firstbyte, but it doesn't match the new branch, - we have to abandon the firstbyte for the regex, but if there was - previously no reqbyte, it takes on the value of the old firstbyte. */ + /* If we previously had a firstchar, but it doesn't match the new branch, + we have to abandon the firstchar for the regex, but if there was + previously no reqchar, it takes on the value of the old firstchar. */ - if (firstbyte >= 0 && firstbyte != branchfirstbyte) + if (firstchar >= 0 && firstchar != branchfirstchar) { - if (reqbyte < 0) reqbyte = firstbyte; - firstbyte = REQ_NONE; + if (reqchar < 0) reqchar = firstchar; + firstchar = REQ_NONE; } - /* If we (now or from before) have no firstbyte, a firstbyte from the - branch becomes a reqbyte if there isn't a branch reqbyte. */ + /* If we (now or from before) have no firstchar, a firstchar from the + branch becomes a reqchar if there isn't a branch reqchar. */ - if (firstbyte < 0 && branchfirstbyte >= 0 && branchreqbyte < 0) - branchreqbyte = branchfirstbyte; + if (firstchar < 0 && branchfirstchar >= 0 && branchreqchar < 0) + branchreqchar = branchfirstchar; - /* Now ensure that the reqbytes match */ + /* Now ensure that the reqchars match */ - if ((reqbyte & ~REQ_VARY) != (branchreqbyte & ~REQ_VARY)) - reqbyte = REQ_NONE; - else reqbyte |= branchreqbyte; /* To "or" REQ_VARY */ + if ((reqchar & ~REQ_VARY) != (branchreqchar & ~REQ_VARY)) + reqchar = REQ_NONE; + else reqchar |= branchreqchar; /* To "or" REQ_VARY */ } /* If lookbehind, check that this branch matches a fixed-length string, and @@ -6916,7 +7188,7 @@ for (;;) if (cd->open_caps->flag) { memmove(start_bracket + 1 + LINK_SIZE, start_bracket, - code - start_bracket); + IN_UCHARS(code - start_bracket)); *start_bracket = OP_ONCE; code += 1 + LINK_SIZE; PUT(start_bracket, 1, (int)(code - start_bracket)); @@ -6936,8 +7208,8 @@ for (;;) *codeptr = code; *ptrptr = ptr; - *firstbyteptr = firstbyte; - *reqbyteptr = reqbyte; + *firstcharptr = firstchar; + *reqcharptr = reqchar; if (lengthptr != NULL) { if (OFLOW_MAX - *lengthptr < length) @@ -7018,12 +7290,12 @@ Returns: TRUE or FALSE */ static BOOL -is_anchored(register const uschar *code, unsigned int bracket_map, +is_anchored(register const pcre_uchar *code, unsigned int bracket_map, unsigned int backref_map) { do { - const uschar *scode = first_significant_code(code + _pcre_OP_lengths[*code], - FALSE); + const pcre_uchar *scode = first_significant_code( + code + PRIV(OP_lengths)[*code], FALSE); register int op = *scode; /* Non-capturing brackets */ @@ -7095,12 +7367,12 @@ Returns: TRUE or FALSE */ static BOOL -is_startline(const uschar *code, unsigned int bracket_map, +is_startline(const pcre_uchar *code, unsigned int bracket_map, unsigned int backref_map) { do { - const uschar *scode = first_significant_code(code + _pcre_OP_lengths[*code], - FALSE); + const pcre_uchar *scode = first_significant_code( + code + PRIV(OP_lengths)[*code], FALSE); register int op = *scode; /* If we are at the start of a conditional assertion group, *both* the @@ -7111,7 +7383,7 @@ do { if (op == OP_COND) { scode += 1 + LINK_SIZE; - if (*scode == OP_CALLOUT) scode += _pcre_OP_lengths[OP_CALLOUT]; + if (*scode == OP_CALLOUT) scode += PRIV(OP_lengths)[OP_CALLOUT]; switch (*scode) { case OP_CREF: @@ -7198,14 +7470,15 @@ Returns: -1 or the fixed first char */ static int -find_firstassertedchar(const uschar *code, BOOL inassert) +find_firstassertedchar(const pcre_uchar *code, BOOL inassert) { register int c = -1; do { int d; int xl = (*code == OP_CBRA || *code == OP_SCBRA || - *code == OP_CBRAPOS || *code == OP_SCBRAPOS)? 2:0; - const uschar *scode = first_significant_code(code + 1+LINK_SIZE + xl, TRUE); + *code == OP_CBRAPOS || *code == OP_SCBRAPOS)? IMM2_SIZE:0; + const pcre_uchar *scode = first_significant_code(code + 1+LINK_SIZE + xl, + TRUE); register int op = *scode; switch(op) @@ -7229,7 +7502,7 @@ do { break; case OP_EXACT: - scode += 2; + scode += IMM2_SIZE; /* Fall through */ case OP_CHAR: @@ -7242,7 +7515,7 @@ do { break; case OP_EXACTI: - scode += 2; + scode += IMM2_SIZE; /* Fall through */ case OP_CHARI: @@ -7285,28 +7558,45 @@ Returns: pointer to compiled data block, or NULL on error, with errorptr and erroroffset set */ +#ifdef COMPILE_PCRE8 PCRE_EXP_DEFN pcre * PCRE_CALL_CONVENTION pcre_compile(const char *pattern, int options, const char **errorptr, int *erroroffset, const unsigned char *tables) +#else +PCRE_EXP_DEFN pcre16 * PCRE_CALL_CONVENTION +pcre16_compile(PCRE_SPTR16 pattern, int options, const char **errorptr, + int *erroroffset, const unsigned char *tables) +#endif { +#ifdef COMPILE_PCRE8 return pcre_compile2(pattern, options, NULL, errorptr, erroroffset, tables); +#else +return pcre16_compile2(pattern, options, NULL, errorptr, erroroffset, tables); +#endif } +#ifdef COMPILE_PCRE8 PCRE_EXP_DEFN pcre * PCRE_CALL_CONVENTION pcre_compile2(const char *pattern, int options, int *errorcodeptr, const char **errorptr, int *erroroffset, const unsigned char *tables) +#else +PCRE_EXP_DEFN pcre16 * PCRE_CALL_CONVENTION +pcre16_compile2(PCRE_SPTR16 pattern, int options, int *errorcodeptr, + const char **errorptr, int *erroroffset, const unsigned char *tables) +#endif { -real_pcre *re; +REAL_PCRE *re; int length = 1; /* For final END opcode */ -int firstbyte, reqbyte, newline; +pcre_int32 firstchar, reqchar; +int newline; int errorcode = 0; int skipatstart = 0; -BOOL utf8; +BOOL utf; size_t size; -uschar *code; -const uschar *codestart; -const uschar *ptr; +pcre_uchar *code; +const pcre_uchar *codestart; +const pcre_uchar *ptr; compile_data compile_block; compile_data *cd = &compile_block; @@ -7317,11 +7607,11 @@ this purpose. The same space is used in the second phase for remembering where to fill in forward references to subpatterns. That may overflow, in which case new memory is obtained from malloc(). */ -uschar cworkspace[COMPILE_WORK_SIZE]; +pcre_uchar cworkspace[COMPILE_WORK_SIZE]; /* Set this early so that early errors get offset 0. */ -ptr = (const uschar *)pattern; +ptr = (const pcre_uchar *)pattern; /* We can't pass back an error message if errorptr is NULL; I guess the best we can do is just return NULL, but we can set a code value if there is a code @@ -7348,7 +7638,7 @@ if (erroroffset == NULL) /* Set up pointers to the individual character tables */ -if (tables == NULL) tables = _pcre_default_tables; +if (tables == NULL) tables = PRIV(default_tables); cd->lcc = tables + lcc_offset; cd->fcc = tables + fcc_offset; cd->cbits = tables + cbits_offset; @@ -7371,27 +7661,33 @@ while (ptr[skipatstart] == CHAR_LEFT_PARENTHESIS && int newnl = 0; int newbsr = 0; - if (strncmp((char *)(ptr+skipatstart+2), STRING_UTF8_RIGHTPAR, 5) == 0) +#ifdef COMPILE_PCRE8 + if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_UTF_RIGHTPAR, 5) == 0) { skipatstart += 7; options |= PCRE_UTF8; continue; } - else if (strncmp((char *)(ptr+skipatstart+2), STRING_UCP_RIGHTPAR, 4) == 0) +#endif +#ifdef COMPILE_PCRE16 + if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_UTF_RIGHTPAR, 6) == 0) + { skipatstart += 8; options |= PCRE_UTF16; continue; } +#endif + else if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_UCP_RIGHTPAR, 4) == 0) { skipatstart += 6; options |= PCRE_UCP; continue; } - else if (strncmp((char *)(ptr+skipatstart+2), STRING_NO_START_OPT_RIGHTPAR, 13) == 0) + else if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_NO_START_OPT_RIGHTPAR, 13) == 0) { skipatstart += 15; options |= PCRE_NO_START_OPTIMIZE; continue; } - if (strncmp((char *)(ptr+skipatstart+2), STRING_CR_RIGHTPAR, 3) == 0) + if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_CR_RIGHTPAR, 3) == 0) { skipatstart += 5; newnl = PCRE_NEWLINE_CR; } - else if (strncmp((char *)(ptr+skipatstart+2), STRING_LF_RIGHTPAR, 3) == 0) + else if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_LF_RIGHTPAR, 3) == 0) { skipatstart += 5; newnl = PCRE_NEWLINE_LF; } - else if (strncmp((char *)(ptr+skipatstart+2), STRING_CRLF_RIGHTPAR, 5) == 0) + else if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_CRLF_RIGHTPAR, 5) == 0) { skipatstart += 7; newnl = PCRE_NEWLINE_CR + PCRE_NEWLINE_LF; } - else if (strncmp((char *)(ptr+skipatstart+2), STRING_ANY_RIGHTPAR, 4) == 0) + else if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_ANY_RIGHTPAR, 4) == 0) { skipatstart += 6; newnl = PCRE_NEWLINE_ANY; } - else if (strncmp((char *)(ptr+skipatstart+2), STRING_ANYCRLF_RIGHTPAR, 8) == 0) + else if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_ANYCRLF_RIGHTPAR, 8) == 0) { skipatstart += 10; newnl = PCRE_NEWLINE_ANYCRLF; } - else if (strncmp((char *)(ptr+skipatstart+2), STRING_BSR_ANYCRLF_RIGHTPAR, 12) == 0) + else if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_BSR_ANYCRLF_RIGHTPAR, 12) == 0) { skipatstart += 14; newbsr = PCRE_BSR_ANYCRLF; } - else if (strncmp((char *)(ptr+skipatstart+2), STRING_BSR_UNICODE_RIGHTPAR, 12) == 0) + else if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_BSR_UNICODE_RIGHTPAR, 12) == 0) { skipatstart += 14; newbsr = PCRE_BSR_UNICODE; } if (newnl != 0) @@ -7401,22 +7697,27 @@ while (ptr[skipatstart] == CHAR_LEFT_PARENTHESIS && else break; } -utf8 = (options & PCRE_UTF8) != 0; +/* PCRE_UTF16 has the same value as PCRE_UTF8. */ +utf = (options & PCRE_UTF8) != 0; -/* Can't support UTF8 unless PCRE has been compiled to include the code. The -return of an error code from _pcre_valid_utf8() is a new feature, introduced in +/* Can't support UTF unless PCRE has been compiled to include the code. The +return of an error code from PRIV(valid_utf)() is a new feature, introduced in release 8.13. It is passed back from pcre_[dfa_]exec(), but at the moment is not used here. */ -#ifdef SUPPORT_UTF8 -if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0 && - (errorcode = _pcre_valid_utf8((USPTR)pattern, -1, erroroffset)) != 0) +#ifdef SUPPORT_UTF +if (utf && (options & PCRE_NO_UTF8_CHECK) == 0 && + (errorcode = PRIV(valid_utf)((PCRE_PUCHAR)pattern, -1, erroroffset)) != 0) { +#ifdef COMPILE_PCRE8 errorcode = ERR44; +#else + errorcode = ERR74; +#endif goto PCRE_EARLY_ERROR_RETURN2; } #else -if (utf8) +if (utf) { errorcode = ERR32; goto PCRE_EARLY_ERROR_RETURN; @@ -7492,7 +7793,10 @@ cd->backref_map = 0; /* Reflect pattern for debugging output */ DPRINTF(("------------------------------------------------------------------\n")); -DPRINTF(("%s\n", pattern)); +#ifdef PCRE_DEBUG +print_puchar(stdout, (PCRE_PUCHAR)pattern); +#endif +DPRINTF(("\n")); /* Pretend to compile the pattern while actually just accumulating the length of memory required. This behaviour is triggered by passing a non-NULL final @@ -7509,9 +7813,10 @@ cd->start_code = cworkspace; cd->hwm = cworkspace; cd->start_workspace = cworkspace; cd->workspace_size = COMPILE_WORK_SIZE; -cd->start_pattern = (const uschar *)pattern; -cd->end_pattern = (const uschar *)(pattern + strlen(pattern)); +cd->start_pattern = (const pcre_uchar *)pattern; +cd->end_pattern = (const pcre_uchar *)(pattern + STRLEN_UC((const pcre_uchar *)pattern)); cd->req_varyopt = 0; +cd->assert_depth = 0; cd->external_options = options; cd->external_flags = 0; cd->open_caps = NULL; @@ -7526,11 +7831,11 @@ ptr += skipatstart; code = cworkspace; *code = OP_BRA; (void)compile_regex(cd->external_options, &code, &ptr, &errorcode, FALSE, - FALSE, 0, 0, &firstbyte, &reqbyte, NULL, cd, &length); + FALSE, 0, 0, &firstchar, &reqchar, NULL, cd, &length); if (errorcode != 0) goto PCRE_EARLY_ERROR_RETURN; DPRINTF(("end pre-compile: length=%d workspace=%d\n", length, - cd->hwm - cworkspace)); + (int)(cd->hwm - cworkspace))); if (length > MAX_PATTERN_SIZE) { @@ -7543,8 +7848,8 @@ externally provided function. Integer overflow should no longer be possible because nowadays we limit the maximum value of cd->names_found and cd->name_entry_size. */ -size = length + sizeof(real_pcre) + cd->names_found * cd->name_entry_size; -re = (real_pcre *)(pcre_malloc)(size); +size = sizeof(REAL_PCRE) + (length + cd->names_found * cd->name_entry_size) * sizeof(pcre_uchar); +re = (REAL_PCRE *)(PUBL(malloc))(size); if (re == NULL) { @@ -7563,13 +7868,13 @@ re->size = (int)size; re->options = cd->external_options; re->flags = cd->external_flags; re->dummy1 = 0; -re->first_byte = 0; -re->req_byte = 0; -re->name_table_offset = sizeof(real_pcre); +re->first_char = 0; +re->req_char = 0; +re->name_table_offset = sizeof(REAL_PCRE) / sizeof(pcre_uchar); re->name_entry_size = cd->name_entry_size; re->name_count = cd->names_found; re->ref_count = 0; -re->tables = (tables == _pcre_default_tables)? NULL : tables; +re->tables = (tables == PRIV(default_tables))? NULL : tables; re->nullpad = NULL; /* The starting points of the name/number translation table and of the code are @@ -7583,10 +7888,10 @@ cd->final_bracount = cd->bracount; /* Save for checking forward references */ cd->assert_depth = 0; cd->bracount = 0; cd->names_found = 0; -cd->name_table = (uschar *)re + re->name_table_offset; +cd->name_table = (pcre_uchar *)re + re->name_table_offset; codestart = cd->name_table + re->name_entry_size * re->name_count; cd->start_code = codestart; -cd->hwm = (uschar *)(cd->start_workspace); +cd->hwm = (pcre_uchar *)(cd->start_workspace); cd->req_varyopt = 0; cd->had_accept = FALSE; cd->check_lookbehind = FALSE; @@ -7596,16 +7901,16 @@ cd->open_caps = NULL; error, errorcode will be set non-zero, so we don't need to look at the result of the function here. */ -ptr = (const uschar *)pattern + skipatstart; -code = (uschar *)codestart; +ptr = (const pcre_uchar *)pattern + skipatstart; +code = (pcre_uchar *)codestart; *code = OP_BRA; (void)compile_regex(re->options, &code, &ptr, &errorcode, FALSE, FALSE, 0, 0, - &firstbyte, &reqbyte, NULL, cd, NULL); + &firstchar, &reqchar, NULL, cd, NULL); re->top_bracket = cd->bracount; re->top_backref = cd->top_backref; -re->flags = cd->external_flags; +re->flags = cd->external_flags | PCRE_MODE; -if (cd->had_accept) reqbyte = REQ_NONE; /* Must disable after (*ACCEPT) */ +if (cd->had_accept) reqchar = REQ_NONE; /* Must disable after (*ACCEPT) */ /* If not reached end of pattern on success, there's an excess bracket. */ @@ -7626,7 +7931,7 @@ references; optimize for them, as searching a large regex takes time. */ if (cd->hwm > cd->start_workspace) { int prev_recno = -1; - const uschar *groupptr = NULL; + const pcre_uchar *groupptr = NULL; while (errorcode == 0 && cd->hwm > cd->start_workspace) { int offset, recno; @@ -7635,18 +7940,18 @@ if (cd->hwm > cd->start_workspace) recno = GET(codestart, offset); if (recno != prev_recno) { - groupptr = _pcre_find_bracket(codestart, utf8, recno); + groupptr = PRIV(find_bracket)(codestart, utf, recno); prev_recno = recno; } if (groupptr == NULL) errorcode = ERR53; - else PUT(((uschar *)codestart), offset, (int)(groupptr - codestart)); + else PUT(((pcre_uchar *)codestart), offset, (int)(groupptr - codestart)); } } /* If the workspace had to be expanded, free the new memory. */ if (cd->workspace_size > COMPILE_WORK_SIZE) - (pcre_free)((void *)cd->start_workspace); + (PUBL(free))((void *)cd->start_workspace); /* Give an error if there's back reference to a non-existent capturing subpattern. */ @@ -7663,21 +7968,21 @@ length, and set their lengths. */ if (cd->check_lookbehind) { - uschar *cc = (uschar *)codestart; + pcre_uchar *cc = (pcre_uchar *)codestart; /* Loop, searching for OP_REVERSE items, and process those that do not have their length set. (Actually, it will also re-process any that have a length of zero, but that is a pathological case, and it does no harm.) When we find one, we temporarily terminate the branch it is in while we scan it. */ - for (cc = (uschar *)_pcre_find_bracket(codestart, utf8, -1); + for (cc = (pcre_uchar *)PRIV(find_bracket)(codestart, utf, -1); cc != NULL; - cc = (uschar *)_pcre_find_bracket(cc, utf8, -1)) + cc = (pcre_uchar *)PRIV(find_bracket)(cc, utf, -1)) { if (GET(cc, 1) == 0) { int fixed_length; - uschar *be = cc - 1 - LINK_SIZE + GET(cc, -LINK_SIZE); + pcre_uchar *be = cc - 1 - LINK_SIZE + GET(cc, -LINK_SIZE); int end_op = *be; *be = OP_END; fixed_length = find_fixedlength(cc, (re->options & PCRE_UTF8) != 0, TRUE, @@ -7700,9 +8005,9 @@ if (cd->check_lookbehind) if (errorcode != 0) { - (pcre_free)(re); + (PUBL(free))(re); PCRE_EARLY_ERROR_RETURN: - *erroroffset = (int)(ptr - (const uschar *)pattern); + *erroroffset = (int)(ptr - (const pcre_uchar *)pattern); PCRE_EARLY_ERROR_RETURN2: *errorptr = find_error_text(errorcode); if (errorcodeptr != NULL) *errorcodeptr = errorcode; @@ -7725,13 +8030,38 @@ if ((re->options & PCRE_ANCHORED) == 0) re->options |= PCRE_ANCHORED; else { - if (firstbyte < 0) - firstbyte = find_firstassertedchar(codestart, FALSE); - if (firstbyte >= 0) /* Remove caseless flag for non-caseable chars */ + if (firstchar < 0) + firstchar = find_firstassertedchar(codestart, FALSE); + if (firstchar >= 0) /* Remove caseless flag for non-caseable chars */ { - int ch = firstbyte & 255; - re->first_byte = ((firstbyte & REQ_CASELESS) != 0 && - cd->fcc[ch] == ch)? ch : firstbyte; +#ifdef COMPILE_PCRE8 + re->first_char = firstchar & 0xff; +#else +#ifdef COMPILE_PCRE16 + re->first_char = firstchar & 0xffff; +#endif +#endif + if ((firstchar & REQ_CASELESS) != 0) + { +#if defined SUPPORT_UCP && !(defined COMPILE_PCRE8) + /* We ignore non-ASCII first chars in 8 bit mode. */ + if (utf) + { + if (re->first_char < 128) + { + if (cd->fcc[re->first_char] != re->first_char) + re->flags |= PCRE_FCH_CASELESS; + } + else if (UCD_OTHERCASE(re->first_char) != re->first_char) + re->flags |= PCRE_FCH_CASELESS; + } + else +#endif + if (MAX_255(re->first_char) + && cd->fcc[re->first_char] != re->first_char) + re->flags |= PCRE_FCH_CASELESS; + } + re->flags |= PCRE_FIRSTSET; } else if (is_startline(codestart, 0, cd->backref_map)) @@ -7743,12 +8073,36 @@ if ((re->options & PCRE_ANCHORED) == 0) variable length item in the regex. Remove the caseless flag for non-caseable bytes. */ -if (reqbyte >= 0 && - ((re->options & PCRE_ANCHORED) == 0 || (reqbyte & REQ_VARY) != 0)) +if (reqchar >= 0 && + ((re->options & PCRE_ANCHORED) == 0 || (reqchar & REQ_VARY) != 0)) { - int ch = reqbyte & 255; - re->req_byte = ((reqbyte & REQ_CASELESS) != 0 && - cd->fcc[ch] == ch)? (reqbyte & ~REQ_CASELESS) : reqbyte; +#ifdef COMPILE_PCRE8 + re->req_char = reqchar & 0xff; +#else +#ifdef COMPILE_PCRE16 + re->req_char = reqchar & 0xffff; +#endif +#endif + if ((reqchar & REQ_CASELESS) != 0) + { +#if defined SUPPORT_UCP && !(defined COMPILE_PCRE8) + /* We ignore non-ASCII first chars in 8 bit mode. */ + if (utf) + { + if (re->req_char < 128) + { + if (cd->fcc[re->req_char] != re->req_char) + re->flags |= PCRE_RCH_CASELESS; + } + else if (UCD_OTHERCASE(re->req_char) != re->req_char) + re->flags |= PCRE_RCH_CASELESS; + } + else +#endif + if (MAX_255(re->req_char) && cd->fcc[re->req_char] != re->req_char) + re->flags |= PCRE_RCH_CASELESS; + } + re->flags |= PCRE_REQCHSET; } @@ -7763,38 +8117,46 @@ printf("Options=%08x\n", re->options); if ((re->flags & PCRE_FIRSTSET) != 0) { - int ch = re->first_byte & 255; - const char *caseless = ((re->first_byte & REQ_CASELESS) == 0)? - "" : " (caseless)"; - if (isprint(ch)) printf("First char = %c%s\n", ch, caseless); + pcre_uchar ch = re->first_char; + const char *caseless = + ((re->flags & PCRE_FCH_CASELESS) == 0)? "" : " (caseless)"; + if (PRINTABLE(ch)) printf("First char = %c%s\n", ch, caseless); else printf("First char = \\x%02x%s\n", ch, caseless); } if ((re->flags & PCRE_REQCHSET) != 0) { - int ch = re->req_byte & 255; - const char *caseless = ((re->req_byte & REQ_CASELESS) == 0)? - "" : " (caseless)"; - if (isprint(ch)) printf("Req char = %c%s\n", ch, caseless); + pcre_uchar ch = re->req_char; + const char *caseless = + ((re->flags & PCRE_RCH_CASELESS) == 0)? "" : " (caseless)"; + if (PRINTABLE(ch)) printf("Req char = %c%s\n", ch, caseless); else printf("Req char = \\x%02x%s\n", ch, caseless); } -pcre_printint(re, stdout, TRUE); +#ifdef COMPILE_PCRE8 +pcre_printint((pcre *)re, stdout, TRUE); +#else +pcre16_printint((pcre *)re, stdout, TRUE); +#endif /* This check is done here in the debugging case so that the code that was compiled can be seen. */ if (code - codestart > length) { - (pcre_free)(re); + (PUBL(free))(re); *errorptr = find_error_text(ERR23); - *erroroffset = ptr - (uschar *)pattern; + *erroroffset = ptr - (pcre_uchar *)pattern; if (errorcodeptr != NULL) *errorcodeptr = ERR23; return NULL; } #endif /* PCRE_DEBUG */ +#ifdef COMPILE_PCRE8 return (pcre *)re; +#else +return (pcre16 *)re; +#endif } /* End of pcre_compile.c */ diff --git a/harbour/src/3rd/pcre/pcreconf.c b/harbour/src/3rd/pcre/pcreconf.c index 2fdc99cf35..8d4a7f078e 100644 --- a/harbour/src/3rd/pcre/pcreconf.c +++ b/harbour/src/3rd/pcre/pcreconf.c @@ -6,7 +6,7 @@ and semantics are as close as possible to those of the Perl 5 language. Written by Philip Hazel - Copyright (c) 1997-2011 University of Cambridge + Copyright (c) 1997-2012 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without @@ -45,6 +45,9 @@ POSSIBILITY OF SUCH DAMAGE. #include "config.h" #endif +/* Keep the original link size. */ +static int real_link_size = LINK_SIZE; + #include "pcreinal.h" @@ -62,18 +65,41 @@ Arguments: Returns: 0 if data returned, negative on error */ +#ifdef COMPILE_PCRE8 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION pcre_config(int what, void *where) +#else +PCRE_EXP_DEFN int PCRE_CALL_CONVENTION +pcre16_config(int what, void *where) +#endif { switch (what) { case PCRE_CONFIG_UTF8: -#ifdef SUPPORT_UTF8 +#if defined COMPILE_PCRE16 + *((int *)where) = 0; + return PCRE_ERROR_BADOPTION; +#else +#if defined SUPPORT_UTF *((int *)where) = 1; #else *((int *)where) = 0; #endif break; +#endif + + case PCRE_CONFIG_UTF16: +#if defined COMPILE_PCRE8 + *((int *)where) = 0; + return PCRE_ERROR_BADOPTION; +#else +#if defined SUPPORT_UTF + *((int *)where) = 1; +#else + *((int *)where) = 0; +#endif + break; +#endif case PCRE_CONFIG_UNICODE_PROPERTIES: #ifdef SUPPORT_UCP @@ -91,6 +117,14 @@ switch (what) #endif break; + case PCRE_CONFIG_JITTARGET: +#ifdef SUPPORT_JIT + *((const char **)where) = PRIV(jit_get_target)(); +#else + *((const char **)where) = NULL; +#endif + break; + case PCRE_CONFIG_NEWLINE: *((int *)where) = NEWLINE; break; @@ -104,7 +138,7 @@ switch (what) break; case PCRE_CONFIG_LINK_SIZE: - *((int *)where) = LINK_SIZE; + *((int *)where) = real_link_size; break; case PCRE_CONFIG_POSIX_MALLOC_THRESHOLD: diff --git a/harbour/src/3rd/pcre/pcredfa.c b/harbour/src/3rd/pcre/pcredfa.c index 5610201752..7b6692a62a 100644 --- a/harbour/src/3rd/pcre/pcredfa.c +++ b/harbour/src/3rd/pcre/pcredfa.c @@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language (but see below for why this module is different). Written by Philip Hazel - Copyright (c) 1997-2011 University of Cambridge + Copyright (c) 1997-2012 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without @@ -113,7 +113,7 @@ small value. Non-zero values in the table are the offsets from the opcode where the character is to be found. ***NOTE*** If the start of this table is modified, the three tables that follow must also be modified. */ -static const uschar coptable[] = { +static const pcre_uint8 coptable[] = { 0, /* End */ 0, 0, 0, 0, 0, /* \A, \G, \K, \B, \b */ 0, 0, 0, 0, 0, 0, /* \D, \d, \S, \s, \W, \w */ @@ -128,22 +128,27 @@ static const uschar coptable[] = { 1, /* noti */ /* Positive single-char repeats */ 1, 1, 1, 1, 1, 1, /* *, *?, +, +?, ?, ?? */ - 3, 3, 3, /* upto, minupto, exact */ - 1, 1, 1, 3, /* *+, ++, ?+, upto+ */ + 1+IMM2_SIZE, 1+IMM2_SIZE, /* upto, minupto */ + 1+IMM2_SIZE, /* exact */ + 1, 1, 1, 1+IMM2_SIZE, /* *+, ++, ?+, upto+ */ 1, 1, 1, 1, 1, 1, /* *I, *?I, +I, +?I, ?I, ??I */ - 3, 3, 3, /* upto I, minupto I, exact I */ - 1, 1, 1, 3, /* *+I, ++I, ?+I, upto+I */ + 1+IMM2_SIZE, 1+IMM2_SIZE, /* upto I, minupto I */ + 1+IMM2_SIZE, /* exact I */ + 1, 1, 1, 1+IMM2_SIZE, /* *+I, ++I, ?+I, upto+I */ /* Negative single-char repeats - only for chars < 256 */ 1, 1, 1, 1, 1, 1, /* NOT *, *?, +, +?, ?, ?? */ - 3, 3, 3, /* NOT upto, minupto, exact */ - 1, 1, 1, 3, /* NOT *+, ++, ?+, upto+ */ + 1+IMM2_SIZE, 1+IMM2_SIZE, /* NOT upto, minupto */ + 1+IMM2_SIZE, /* NOT exact */ + 1, 1, 1, 1+IMM2_SIZE, /* NOT *+, ++, ?+, upto+ */ 1, 1, 1, 1, 1, 1, /* NOT *I, *?I, +I, +?I, ?I, ??I */ - 3, 3, 3, /* NOT upto I, minupto I, exact I */ - 1, 1, 1, 3, /* NOT *+I, ++I, ?+I, upto+I */ + 1+IMM2_SIZE, 1+IMM2_SIZE, /* NOT upto I, minupto I */ + 1+IMM2_SIZE, /* NOT exact I */ + 1, 1, 1, 1+IMM2_SIZE, /* NOT *+I, ++I, ?+I, upto+I */ /* Positive type repeats */ 1, 1, 1, 1, 1, 1, /* Type *, *?, +, +?, ?, ?? */ - 3, 3, 3, /* Type upto, minupto, exact */ - 1, 1, 1, 3, /* Type *+, ++, ?+, upto+ */ + 1+IMM2_SIZE, 1+IMM2_SIZE, /* Type upto, minupto */ + 1+IMM2_SIZE, /* Type exact */ + 1, 1, 1, 1+IMM2_SIZE, /* Type *+, ++, ?+, upto+ */ /* Character class & ref repeats */ 0, 0, 0, 0, 0, 0, /* *, *?, +, +?, ?, ?? */ 0, 0, /* CRRANGE, CRMINRANGE */ @@ -182,7 +187,7 @@ remember the fact that a character could have been inspected when the end of the subject is reached. ***NOTE*** If the start of this table is modified, the two tables that follow must also be modified. */ -static const uschar poptable[] = { +static const pcre_uint8 poptable[] = { 0, /* End */ 0, 0, 0, 1, 1, /* \A, \G, \K, \B, \b */ 1, 1, 1, 1, 1, 1, /* \D, \d, \S, \s, \W, \w */ @@ -249,7 +254,7 @@ static const uschar poptable[] = { /* These 2 tables allow for compact code for testing for \D, \d, \S, \s, \W, and \w */ -static const uschar toptable1[] = { +static const pcre_uint8 toptable1[] = { 0, 0, 0, 0, 0, 0, ctype_digit, ctype_digit, ctype_space, ctype_space, @@ -257,7 +262,7 @@ static const uschar toptable1[] = { 0, 0 /* OP_ANY, OP_ALLANY */ }; -static const uschar toptable2[] = { +static const pcre_uint8 toptable2[] = { 0, 0, 0, 0, 0, 0, ctype_digit, 0, ctype_space, 0, @@ -296,7 +301,7 @@ Returns: nothing */ static void -pchars(unsigned char *p, int length, FILE *f) +pchars(const pcre_uchar *p, int length, FILE *f) { int c; while (length-- > 0) @@ -386,8 +391,8 @@ for the current character, one for the following character). */ static int internal_dfa_exec( dfa_match_data *md, - const uschar *this_start_code, - const uschar *current_subject, + const pcre_uchar *this_start_code, + const pcre_uchar *current_subject, int start_offset, int *offsets, int offsetcount, @@ -398,9 +403,9 @@ internal_dfa_exec( stateblock *active_states, *new_states, *temp_states; stateblock *next_active_state, *next_new_state; -const uschar *ctypes, *lcc, *fcc; -const uschar *ptr; -const uschar *end_code, *first_op; +const pcre_uint8 *ctypes, *lcc, *fcc; +const pcre_uchar *ptr; +const pcre_uchar *end_code, *first_op; dfa_recursion_info new_recursive; @@ -409,14 +414,14 @@ int active_count, new_count, match_count; /* Some fields in the md block are frequently referenced, so we load them into independent variables in the hope that this will perform better. */ -const uschar *start_subject = md->start_subject; -const uschar *end_subject = md->end_subject; -const uschar *start_code = md->start_code; +const pcre_uchar *start_subject = md->start_subject; +const pcre_uchar *end_subject = md->end_subject; +const pcre_uchar *start_code = md->start_code; -#ifdef SUPPORT_UTF8 -BOOL utf8 = (md->poptions & PCRE_UTF8) != 0; +#ifdef SUPPORT_UTF +BOOL utf = (md->poptions & PCRE_UTF8) != 0; #else -BOOL utf8 = FALSE; +BOOL utf = FALSE; #endif rlevel++; @@ -442,7 +447,8 @@ new_count = 0; first_op = this_start_code + 1 + LINK_SIZE + ((*this_start_code == OP_CBRA || *this_start_code == OP_SCBRA || - *this_start_code == OP_CBRAPOS || *this_start_code == OP_SCBRAPOS)? 2:0); + *this_start_code == OP_CBRAPOS || *this_start_code == OP_SCBRAPOS) + ? IMM2_SIZE:0); /* The first thing in any (sub) pattern is a bracket of some sort. Push all the alternative states onto the list, and find out where the end is. This @@ -470,18 +476,16 @@ if (*first_op == OP_REVERSE) /* If we can't go back the amount required for the longest lookbehind pattern, go back as far as we can; some alternatives may still be viable. */ -#ifdef SUPPORT_UTF8 +#ifdef SUPPORT_UTF /* In character mode we have to step back character by character */ - if (utf8) + if (utf) { for (gone_back = 0; gone_back < max_back; gone_back++) { if (current_subject <= start_subject) break; current_subject--; - while (current_subject > start_subject && - (*current_subject & 0xc0) == 0x80) - current_subject--; + ACROSSCHAR(current_subject > start_subject, *current_subject, current_subject--); } } else @@ -542,8 +546,8 @@ else { int length = 1 + LINK_SIZE + ((*this_start_code == OP_CBRA || *this_start_code == OP_SCBRA || - *this_start_code == OP_CBRAPOS || *this_start_code == OP_SCBRAPOS)? - 2:0); + *this_start_code == OP_CBRAPOS || *this_start_code == OP_SCBRAPOS) + ? IMM2_SIZE:0); do { ADD_NEW((int)(end_code - start_code + length), 0); @@ -556,7 +560,7 @@ else workspace[0] = 0; /* Bit indicating which vector is current */ -DPRINTF(("%.*sEnd state = %d\n", rlevel*2-2, SP, end_code - start_code)); +DPRINTF(("%.*sEnd state = %d\n", rlevel*2-2, SP, (int)(end_code - start_code))); /* Loop for scanning the subject */ @@ -583,7 +587,7 @@ for (;;) #ifdef PCRE_DEBUG printf("%.*sNext character: rest of subject = \"", rlevel*2-2, SP); - pchars((uschar *)ptr, strlen((char *)ptr), stdout); + pchars(ptr, STRLEN_UC(ptr), stdout); printf("\"\n"); printf("%.*sActive states: ", rlevel*2-2, SP); @@ -604,9 +608,9 @@ for (;;) if (ptr < end_subject) { clen = 1; /* Number of bytes in the character */ -#ifdef SUPPORT_UTF8 - if (utf8) { GETCHARLEN(c, ptr, clen); } else -#endif /* SUPPORT_UTF8 */ +#ifdef SUPPORT_UTF + if (utf) { GETCHARLEN(c, ptr, clen); } else +#endif /* SUPPORT_UTF */ c = *ptr; } else @@ -624,7 +628,7 @@ for (;;) { stateblock *current_state = active_states + i; BOOL caseless = FALSE; - const uschar *code; + const pcre_uchar *code; int state_offset = current_state->offset; int count, codevalue, rrc; @@ -693,9 +697,9 @@ for (;;) if (coptable[codevalue] > 0) { dlen = 1; -#ifdef SUPPORT_UTF8 - if (utf8) { GETCHARLEN(d, (code + coptable[codevalue]), dlen); } else -#endif /* SUPPORT_UTF8 */ +#ifdef SUPPORT_UTF + if (utf) { GETCHARLEN(d, (code + coptable[codevalue]), dlen); } else +#endif /* SUPPORT_UTF */ d = code[coptable[codevalue]]; if (codevalue >= OP_TYPESTAR) { @@ -816,7 +820,7 @@ for (;;) /*-----------------------------------------------------------------*/ case OP_CBRA: case OP_SCBRA: - ADD_ACTIVE((int)(code - start_code + 3 + LINK_SIZE), 0); + ADD_ACTIVE((int)(code - start_code + 1 + LINK_SIZE + IMM2_SIZE), 0); code += GET(code, 1); while (*code == OP_ALT) { @@ -956,10 +960,10 @@ for (;;) if (ptr > start_subject) { - const uschar *temp = ptr - 1; + const pcre_uchar *temp = ptr - 1; if (temp < md->start_used_ptr) md->start_used_ptr = temp; -#ifdef SUPPORT_UTF8 - if (utf8) BACKCHAR(temp); +#ifdef SUPPORT_UTF + if (utf) { BACKCHAR(temp); } #endif GETCHARTEST(d, temp); #ifdef SUPPORT_UCP @@ -1024,7 +1028,7 @@ for (;;) break; case PT_GC: - OK = _pcre_ucp_gentype[prop->chartype] == code[2]; + OK = PRIV(ucp_gentype)[prop->chartype] == code[2]; break; case PT_PC: @@ -1038,24 +1042,24 @@ for (;;) /* These are specials for combination cases. */ case PT_ALNUM: - OK = _pcre_ucp_gentype[prop->chartype] == ucp_L || - _pcre_ucp_gentype[prop->chartype] == ucp_N; + OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L || + PRIV(ucp_gentype)[prop->chartype] == ucp_N; break; case PT_SPACE: /* Perl space */ - OK = _pcre_ucp_gentype[prop->chartype] == ucp_Z || + OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z || c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR; break; case PT_PXSPACE: /* POSIX space */ - OK = _pcre_ucp_gentype[prop->chartype] == ucp_Z || + OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z || c == CHAR_HT || c == CHAR_NL || c == CHAR_VT || c == CHAR_FF || c == CHAR_CR; break; case PT_WORD: - OK = _pcre_ucp_gentype[prop->chartype] == ucp_L || - _pcre_ucp_gentype[prop->chartype] == ucp_N || + OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L || + PRIV(ucp_gentype)[prop->chartype] == ucp_N || c == CHAR_UNDERSCORE; break; @@ -1157,7 +1161,7 @@ for (;;) ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0)) { if (++count >= GET2(code, 1)) - { ADD_NEW(state_offset + 4, 0); } + { ADD_NEW(state_offset + 1 + IMM2_SIZE + 1, 0); } else { ADD_NEW(state_offset, count); } } @@ -1168,7 +1172,7 @@ for (;;) case OP_TYPEUPTO: case OP_TYPEMINUPTO: case OP_TYPEPOSUPTO: - ADD_ACTIVE(state_offset + 4, 0); + ADD_ACTIVE(state_offset + 2 + IMM2_SIZE, 0); count = current_state->count; /* Number already matched */ if (clen > 0) { @@ -1183,7 +1187,7 @@ for (;;) next_active_state--; } if (++count >= GET2(code, 1)) - { ADD_NEW(state_offset + 4, 0); } + { ADD_NEW(state_offset + 2 + IMM2_SIZE, 0); } else { ADD_NEW(state_offset, count); } } @@ -1218,7 +1222,7 @@ for (;;) break; case PT_GC: - OK = _pcre_ucp_gentype[prop->chartype] == code[3]; + OK = PRIV(ucp_gentype)[prop->chartype] == code[3]; break; case PT_PC: @@ -1232,24 +1236,24 @@ for (;;) /* These are specials for combination cases. */ case PT_ALNUM: - OK = _pcre_ucp_gentype[prop->chartype] == ucp_L || - _pcre_ucp_gentype[prop->chartype] == ucp_N; + OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L || + PRIV(ucp_gentype)[prop->chartype] == ucp_N; break; case PT_SPACE: /* Perl space */ - OK = _pcre_ucp_gentype[prop->chartype] == ucp_Z || + OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z || c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR; break; case PT_PXSPACE: /* POSIX space */ - OK = _pcre_ucp_gentype[prop->chartype] == ucp_Z || + OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z || c == CHAR_HT || c == CHAR_NL || c == CHAR_VT || c == CHAR_FF || c == CHAR_CR; break; case PT_WORD: - OK = _pcre_ucp_gentype[prop->chartype] == ucp_L || - _pcre_ucp_gentype[prop->chartype] == ucp_N || + OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L || + PRIV(ucp_gentype)[prop->chartype] == ucp_N || c == CHAR_UNDERSCORE; break; @@ -1281,7 +1285,7 @@ for (;;) if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); } if (clen > 0 && UCD_CATEGORY(c) != ucp_M) { - const uschar *nptr = ptr + clen; + const pcre_uchar *nptr = ptr + clen; int ncount = 0; if (count > 0 && codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSPLUS) { @@ -1465,7 +1469,7 @@ for (;;) break; case PT_GC: - OK = _pcre_ucp_gentype[prop->chartype] == code[3]; + OK = PRIV(ucp_gentype)[prop->chartype] == code[3]; break; case PT_PC: @@ -1479,24 +1483,24 @@ for (;;) /* These are specials for combination cases. */ case PT_ALNUM: - OK = _pcre_ucp_gentype[prop->chartype] == ucp_L || - _pcre_ucp_gentype[prop->chartype] == ucp_N; + OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L || + PRIV(ucp_gentype)[prop->chartype] == ucp_N; break; case PT_SPACE: /* Perl space */ - OK = _pcre_ucp_gentype[prop->chartype] == ucp_Z || + OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z || c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR; break; case PT_PXSPACE: /* POSIX space */ - OK = _pcre_ucp_gentype[prop->chartype] == ucp_Z || + OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z || c == CHAR_HT || c == CHAR_NL || c == CHAR_VT || c == CHAR_FF || c == CHAR_CR; break; case PT_WORD: - OK = _pcre_ucp_gentype[prop->chartype] == ucp_L || - _pcre_ucp_gentype[prop->chartype] == ucp_N || + OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L || + PRIV(ucp_gentype)[prop->chartype] == ucp_N || c == CHAR_UNDERSCORE; break; @@ -1537,7 +1541,7 @@ for (;;) ADD_ACTIVE(state_offset + 2, 0); if (clen > 0 && UCD_CATEGORY(c) != ucp_M) { - const uschar *nptr = ptr + clen; + const pcre_uchar *nptr = ptr + clen; int ncount = 0; if (codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSSTAR || codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSQUERY) @@ -1719,13 +1723,13 @@ for (;;) case OP_PROP_EXTRA + OP_TYPEMINUPTO: case OP_PROP_EXTRA + OP_TYPEPOSUPTO: if (codevalue != OP_PROP_EXTRA + OP_TYPEEXACT) - { ADD_ACTIVE(state_offset + 6, 0); } + { ADD_ACTIVE(state_offset + 1 + IMM2_SIZE + 3, 0); } count = current_state->count; /* Number already matched */ if (clen > 0) { BOOL OK; const ucd_record * prop = GET_UCD(c); - switch(code[4]) + switch(code[1 + IMM2_SIZE + 1]) { case PT_ANY: OK = TRUE; @@ -1737,38 +1741,38 @@ for (;;) break; case PT_GC: - OK = _pcre_ucp_gentype[prop->chartype] == code[5]; + OK = PRIV(ucp_gentype)[prop->chartype] == code[1 + IMM2_SIZE + 2]; break; case PT_PC: - OK = prop->chartype == code[5]; + OK = prop->chartype == code[1 + IMM2_SIZE + 2]; break; case PT_SC: - OK = prop->script == code[5]; + OK = prop->script == code[1 + IMM2_SIZE + 2]; break; /* These are specials for combination cases. */ case PT_ALNUM: - OK = _pcre_ucp_gentype[prop->chartype] == ucp_L || - _pcre_ucp_gentype[prop->chartype] == ucp_N; + OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L || + PRIV(ucp_gentype)[prop->chartype] == ucp_N; break; case PT_SPACE: /* Perl space */ - OK = _pcre_ucp_gentype[prop->chartype] == ucp_Z || + OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z || c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR; break; case PT_PXSPACE: /* POSIX space */ - OK = _pcre_ucp_gentype[prop->chartype] == ucp_Z || + OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z || c == CHAR_HT || c == CHAR_NL || c == CHAR_VT || c == CHAR_FF || c == CHAR_CR; break; case PT_WORD: - OK = _pcre_ucp_gentype[prop->chartype] == ucp_L || - _pcre_ucp_gentype[prop->chartype] == ucp_N || + OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L || + PRIV(ucp_gentype)[prop->chartype] == ucp_N || c == CHAR_UNDERSCORE; break; @@ -1787,7 +1791,7 @@ for (;;) next_active_state--; } if (++count >= GET2(code, 1)) - { ADD_NEW(state_offset + 6, 0); } + { ADD_NEW(state_offset + 1 + IMM2_SIZE + 3, 0); } else { ADD_NEW(state_offset, count); } } @@ -1800,11 +1804,11 @@ for (;;) case OP_EXTUNI_EXTRA + OP_TYPEMINUPTO: case OP_EXTUNI_EXTRA + OP_TYPEPOSUPTO: if (codevalue != OP_EXTUNI_EXTRA + OP_TYPEEXACT) - { ADD_ACTIVE(state_offset + 4, 0); } + { ADD_ACTIVE(state_offset + 2 + IMM2_SIZE, 0); } count = current_state->count; /* Number already matched */ if (clen > 0 && UCD_CATEGORY(c) != ucp_M) { - const uschar *nptr = ptr + clen; + const pcre_uchar *nptr = ptr + clen; int ncount = 0; if (codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSUPTO) { @@ -1821,7 +1825,7 @@ for (;;) nptr += ndlen; } if (++count >= GET2(code, 1)) - { ADD_NEW_DATA(-(state_offset + 4), 0, ncount); } + { ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, ncount); } else { ADD_NEW_DATA(-state_offset, count, ncount); } } @@ -1834,7 +1838,7 @@ for (;;) case OP_ANYNL_EXTRA + OP_TYPEMINUPTO: case OP_ANYNL_EXTRA + OP_TYPEPOSUPTO: if (codevalue != OP_ANYNL_EXTRA + OP_TYPEEXACT) - { ADD_ACTIVE(state_offset + 4, 0); } + { ADD_ACTIVE(state_offset + 2 + IMM2_SIZE, 0); } count = current_state->count; /* Number already matched */ if (clen > 0) { @@ -1861,7 +1865,7 @@ for (;;) next_active_state--; } if (++count >= GET2(code, 1)) - { ADD_NEW_DATA(-(state_offset + 4), 0, ncount); } + { ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, ncount); } else { ADD_NEW_DATA(-state_offset, count, ncount); } break; @@ -1878,7 +1882,7 @@ for (;;) case OP_VSPACE_EXTRA + OP_TYPEMINUPTO: case OP_VSPACE_EXTRA + OP_TYPEPOSUPTO: if (codevalue != OP_VSPACE_EXTRA + OP_TYPEEXACT) - { ADD_ACTIVE(state_offset + 4, 0); } + { ADD_ACTIVE(state_offset + 2 + IMM2_SIZE, 0); } count = current_state->count; /* Number already matched */ if (clen > 0) { @@ -1907,7 +1911,7 @@ for (;;) next_active_state--; } if (++count >= GET2(code, 1)) - { ADD_NEW_DATA(-(state_offset + 4), 0, 0); } + { ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, 0); } else { ADD_NEW_DATA(-state_offset, count, 0); } } @@ -1920,7 +1924,7 @@ for (;;) case OP_HSPACE_EXTRA + OP_TYPEMINUPTO: case OP_HSPACE_EXTRA + OP_TYPEPOSUPTO: if (codevalue != OP_HSPACE_EXTRA + OP_TYPEEXACT) - { ADD_ACTIVE(state_offset + 4, 0); } + { ADD_ACTIVE(state_offset + 2 + IMM2_SIZE, 0); } count = current_state->count; /* Number already matched */ if (clen > 0) { @@ -1962,7 +1966,7 @@ for (;;) next_active_state--; } if (++count >= GET2(code, 1)) - { ADD_NEW_DATA(-(state_offset + 4), 0, 0); } + { ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, 0); } else { ADD_NEW_DATA(-state_offset, count, 0); } } @@ -1984,32 +1988,32 @@ for (;;) case OP_CHARI: if (clen == 0) break; -#ifdef SUPPORT_UTF8 - if (utf8) +#ifdef SUPPORT_UTF + if (utf) { if (c == d) { ADD_NEW(state_offset + dlen + 1, 0); } else { unsigned int othercase; - if (c < 128) othercase = fcc[c]; else - - /* If we have Unicode property support, we can use it to test the - other case of the character. */ - + if (c < 128) + othercase = fcc[c]; + else + /* If we have Unicode property support, we can use it to test the + other case of the character. */ #ifdef SUPPORT_UCP - othercase = UCD_OTHERCASE(c); + othercase = UCD_OTHERCASE(c); #else - othercase = NOTACHAR; + othercase = NOTACHAR; #endif if (d == othercase) { ADD_NEW(state_offset + dlen + 1, 0); } } } else -#endif /* SUPPORT_UTF8 */ - - /* Non-UTF-8 mode */ +#endif /* SUPPORT_UTF */ + /* Not UTF mode */ { - if (lcc[c] == lcc[d]) { ADD_NEW(state_offset + 2, 0); } + if (TABLE_GET(c, lcc, c) == TABLE_GET(d, lcc, d)) + { ADD_NEW(state_offset + 2, 0); } } break; @@ -2023,7 +2027,7 @@ for (;;) case OP_EXTUNI: if (clen > 0 && UCD_CATEGORY(c) != ucp_M) { - const uschar *nptr = ptr + clen; + const pcre_uchar *nptr = ptr + clen; int ncount = 0; while (nptr < end_subject) { @@ -2209,16 +2213,16 @@ for (;;) unsigned int otherd = NOTACHAR; if (caseless) { -#ifdef SUPPORT_UTF8 - if (utf8 && d >= 128) +#ifdef SUPPORT_UTF + if (utf && d >= 128) { #ifdef SUPPORT_UCP otherd = UCD_OTHERCASE(d); #endif /* SUPPORT_UCP */ } else -#endif /* SUPPORT_UTF8 */ - otherd = fcc[d]; +#endif /* SUPPORT_UTF */ + otherd = TABLE_GET(d, fcc, d); } if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR)) { @@ -2256,16 +2260,16 @@ for (;;) unsigned int otherd = NOTACHAR; if (caseless) { -#ifdef SUPPORT_UTF8 - if (utf8 && d >= 128) +#ifdef SUPPORT_UTF + if (utf && d >= 128) { #ifdef SUPPORT_UCP otherd = UCD_OTHERCASE(d); #endif /* SUPPORT_UCP */ } else -#endif /* SUPPORT_UTF8 */ - otherd = fcc[d]; +#endif /* SUPPORT_UTF */ + otherd = TABLE_GET(d, fcc, d); } if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR)) { @@ -2301,16 +2305,16 @@ for (;;) unsigned int otherd = NOTACHAR; if (caseless) { -#ifdef SUPPORT_UTF8 - if (utf8 && d >= 128) +#ifdef SUPPORT_UTF + if (utf && d >= 128) { #ifdef SUPPORT_UCP otherd = UCD_OTHERCASE(d); #endif /* SUPPORT_UCP */ } else -#endif /* SUPPORT_UTF8 */ - otherd = fcc[d]; +#endif /* SUPPORT_UTF */ + otherd = TABLE_GET(d, fcc, d); } if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR)) { @@ -2338,21 +2342,21 @@ for (;;) unsigned int otherd = NOTACHAR; if (caseless) { -#ifdef SUPPORT_UTF8 - if (utf8 && d >= 128) +#ifdef SUPPORT_UTF + if (utf && d >= 128) { #ifdef SUPPORT_UCP otherd = UCD_OTHERCASE(d); #endif /* SUPPORT_UCP */ } else -#endif /* SUPPORT_UTF8 */ - otherd = fcc[d]; +#endif /* SUPPORT_UTF */ + otherd = TABLE_GET(d, fcc, d); } if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR)) { if (++count >= GET2(code, 1)) - { ADD_NEW(state_offset + dlen + 3, 0); } + { ADD_NEW(state_offset + dlen + 1 + IMM2_SIZE, 0); } else { ADD_NEW(state_offset, count); } } @@ -2375,23 +2379,23 @@ for (;;) case OP_NOTUPTO: case OP_NOTMINUPTO: case OP_NOTPOSUPTO: - ADD_ACTIVE(state_offset + dlen + 3, 0); + ADD_ACTIVE(state_offset + dlen + 1 + IMM2_SIZE, 0); count = current_state->count; /* Number already matched */ if (clen > 0) { unsigned int otherd = NOTACHAR; if (caseless) { -#ifdef SUPPORT_UTF8 - if (utf8 && d >= 128) +#ifdef SUPPORT_UTF + if (utf && d >= 128) { #ifdef SUPPORT_UCP otherd = UCD_OTHERCASE(d); #endif /* SUPPORT_UCP */ } else -#endif /* SUPPORT_UTF8 */ - otherd = fcc[d]; +#endif /* SUPPORT_UTF */ + otherd = TABLE_GET(d, fcc, d); } if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR)) { @@ -2401,7 +2405,7 @@ for (;;) next_active_state--; } if (++count >= GET2(code, 1)) - { ADD_NEW(state_offset + dlen + 3, 0); } + { ADD_NEW(state_offset + dlen + 1 + IMM2_SIZE, 0); } else { ADD_NEW(state_offset, count); } } @@ -2418,18 +2422,18 @@ for (;;) { BOOL isinclass = FALSE; int next_state_offset; - const uschar *ecode; + const pcre_uchar *ecode; /* For a simple class, there is always just a 32-byte table, and we can set isinclass from it. */ if (codevalue != OP_XCLASS) { - ecode = code + 33; + ecode = code + 1 + (32 / sizeof(pcre_uchar)); if (clen > 0) { isinclass = (c > 255)? (codevalue == OP_NCLASS) : - ((code[1 + c/8] & (1 << (c&7))) != 0); + ((((pcre_uint8 *)(code + 1))[c/8] & (1 << (c&7))) != 0); } } @@ -2440,7 +2444,7 @@ for (;;) else { ecode = code + GET(code, 1); - if (clen > 0) isinclass = _pcre_xclass(c, code + 1 + LINK_SIZE); + if (clen > 0) isinclass = PRIV(xclass)(c, code + 1 + LINK_SIZE, utf); } /* At this point, isinclass is set for all kinds of class, and ecode @@ -2474,12 +2478,12 @@ for (;;) case OP_CRMINRANGE: count = current_state->count; /* Already matched */ if (count >= GET2(ecode, 1)) - { ADD_ACTIVE(next_state_offset + 5, 0); } + { ADD_ACTIVE(next_state_offset + 1 + 2 * IMM2_SIZE, 0); } if (isinclass) { - int max = GET2(ecode, 3); + int max = GET2(ecode, 1 + IMM2_SIZE); if (++count >= max && max != 0) /* Max 0 => no limit */ - { ADD_NEW(next_state_offset + 5, 0); } + { ADD_NEW(next_state_offset + 1 + 2 * IMM2_SIZE, 0); } else { ADD_NEW(state_offset, count); } } @@ -2510,7 +2514,7 @@ for (;;) int rc; int local_offsets[2]; int local_workspace[1000]; - const uschar *endasscode = code + GET(code, 1); + const pcre_uchar *endasscode = code + GET(code, 1); while (*endasscode == OP_ALT) endasscode += GET(endasscode, 1); @@ -2547,13 +2551,17 @@ for (;;) if (code[LINK_SIZE+1] == OP_CALLOUT) { rrc = 0; - if (pcre_callout != NULL) + if (PUBL(callout) != NULL) { - pcre_callout_block cb; + PUBL(callout_block) cb; cb.version = 1; /* Version 1 of the callout block */ cb.callout_number = code[LINK_SIZE+2]; cb.offset_vector = offsets; +#ifdef COMPILE_PCRE8 cb.subject = (PCRE_SPTR)start_subject; +#else + cb.subject = (PCRE_SPTR16)start_subject; +#endif cb.subject_length = (int)(end_subject - start_subject); cb.start_match = (int)(current_subject - start_subject); cb.current_position = (int)(ptr - start_subject); @@ -2563,10 +2571,10 @@ for (;;) cb.capture_last = -1; cb.callout_data = md->callout_data; cb.mark = NULL; /* No (*MARK) support */ - if ((rrc = (*pcre_callout)(&cb)) < 0) return rrc; /* Abandon */ + if ((rrc = (*PUBL(callout))(&cb)) < 0) return rrc; /* Abandon */ } if (rrc > 0) break; /* Fail this thread */ - code += _pcre_OP_lengths[OP_CALLOUT]; /* Skip callout data */ + code += PRIV(OP_lengths)[OP_CALLOUT]; /* Skip callout data */ } condcode = code[LINK_SIZE+1]; @@ -2587,10 +2595,10 @@ for (;;) else if (condcode == OP_RREF || condcode == OP_NRREF) { - int value = GET2(code, LINK_SIZE+2); + int value = GET2(code, LINK_SIZE + 2); if (value != RREF_ANY) return PCRE_ERROR_DFA_UCOND; if (md->recursive != NULL) - { ADD_ACTIVE(state_offset + LINK_SIZE + 4, 0); } + { ADD_ACTIVE(state_offset + LINK_SIZE + 2 + IMM2_SIZE, 0); } else { ADD_ACTIVE(state_offset + codelink + LINK_SIZE + 1, 0); } } @@ -2599,8 +2607,8 @@ for (;;) else { int rc; - const uschar *asscode = code + LINK_SIZE + 1; - const uschar *endasscode = asscode + GET(asscode, 1); + const pcre_uchar *asscode = code + LINK_SIZE + 1; + const pcre_uchar *endasscode = asscode + GET(asscode, 1); while (*endasscode == OP_ALT) endasscode += GET(endasscode, 1); @@ -2631,7 +2639,7 @@ for (;;) dfa_recursion_info *ri; int local_offsets[1000]; int local_workspace[1000]; - const uschar *callpat = start_code + GET(code, 1); + const pcre_uchar *callpat = start_code + GET(code, 1); int recno = (callpat == md->start_code)? 0 : GET2(callpat, 1 + LINK_SIZE); int rc; @@ -2682,10 +2690,12 @@ for (;;) { for (rc = rc*2 - 2; rc >= 0; rc -= 2) { - const uschar *p = start_subject + local_offsets[rc]; - const uschar *pp = start_subject + local_offsets[rc+1]; int charcount = local_offsets[rc+1] - local_offsets[rc]; - while (p < pp) if ((*p++ & 0xc0) == 0x80) charcount--; +#ifdef SUPPORT_UTF + const pcre_uchar *p = start_subject + local_offsets[rc]; + const pcre_uchar *pp = start_subject + local_offsets[rc+1]; + while (p < pp) if (NOT_FIRSTCHAR(*p++)) charcount--; +#endif if (charcount > 0) { ADD_NEW_DATA(-(state_offset + LINK_SIZE + 1), 0, (charcount - 1)); @@ -2708,7 +2718,7 @@ for (;;) case OP_BRAPOSZERO: { int charcount, matched_count; - const uschar *local_ptr = ptr; + const pcre_uchar *local_ptr = ptr; BOOL allow_zero; if (codevalue == OP_BRAPOSZERO) @@ -2758,7 +2768,7 @@ for (;;) if (matched_count > 0 || allow_zero) { - const uschar *end_subpattern = code; + const pcre_uchar *end_subpattern = code; int next_state_offset; do { end_subpattern += GET(end_subpattern, 1); } @@ -2779,10 +2789,12 @@ for (;;) } else { - const uschar *p = ptr; - const uschar *pp = local_ptr; + const pcre_uchar *p = ptr; + const pcre_uchar *pp = local_ptr; charcount = (int)(pp - p); - while (p < pp) if ((*p++ & 0xc0) == 0x80) charcount--; +#ifdef SUPPORT_UTF + while (p < pp) if (NOT_FIRSTCHAR(*p++)) charcount--; +#endif ADD_NEW_DATA(-next_state_offset, 0, (charcount - 1)); } } @@ -2809,7 +2821,7 @@ for (;;) if (rc >= 0) { - const uschar *end_subpattern = code; + const pcre_uchar *end_subpattern = code; int charcount = local_offsets[1] - local_offsets[0]; int next_state_offset, repeat_state_offset; @@ -2862,9 +2874,11 @@ for (;;) } else { - const uschar *p = start_subject + local_offsets[0]; - const uschar *pp = start_subject + local_offsets[1]; - while (p < pp) if ((*p++ & 0xc0) == 0x80) charcount--; +#ifdef SUPPORT_UTF + const pcre_uchar *p = start_subject + local_offsets[0]; + const pcre_uchar *pp = start_subject + local_offsets[1]; + while (p < pp) if (NOT_FIRSTCHAR(*p++)) charcount--; +#endif ADD_NEW_DATA(-next_state_offset, 0, (charcount - 1)); if (repeat_state_offset >= 0) { ADD_NEW_DATA(-repeat_state_offset, 0, (charcount - 1)); } @@ -2880,13 +2894,17 @@ for (;;) case OP_CALLOUT: rrc = 0; - if (pcre_callout != NULL) + if (PUBL(callout) != NULL) { - pcre_callout_block cb; + PUBL(callout_block) cb; cb.version = 1; /* Version 1 of the callout block */ cb.callout_number = code[1]; cb.offset_vector = offsets; +#ifdef COMPILE_PCRE8 cb.subject = (PCRE_SPTR)start_subject; +#else + cb.subject = (PCRE_SPTR16)start_subject; +#endif cb.subject_length = (int)(end_subject - start_subject); cb.start_match = (int)(current_subject - start_subject); cb.current_position = (int)(ptr - start_subject); @@ -2896,10 +2914,10 @@ for (;;) cb.capture_last = -1; cb.callout_data = md->callout_data; cb.mark = NULL; /* No (*MARK) support */ - if ((rrc = (*pcre_callout)(&cb)) < 0) return rrc; /* Abandon */ + if ((rrc = (*PUBL(callout))(&cb)) < 0) return rrc; /* Abandon */ } if (rrc == 0) - { ADD_ACTIVE(state_offset + _pcre_OP_lengths[OP_CALLOUT], 0); } + { ADD_ACTIVE(state_offset + PRIV(OP_lengths)[OP_CALLOUT], 0); } break; @@ -2996,28 +3014,33 @@ Returns: > 0 => number of match offset pairs placed in offsets < -1 => some kind of unexpected problem */ +#ifdef COMPILE_PCRE8 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION pcre_dfa_exec(const pcre *argument_re, const pcre_extra *extra_data, const char *subject, int length, int start_offset, int options, int *offsets, int offsetcount, int *workspace, int wscount) +#else +PCRE_EXP_DEFN int PCRE_CALL_CONVENTION +pcre16_dfa_exec(const pcre16 *argument_re, const pcre16_extra *extra_data, + PCRE_SPTR16 subject, int length, int start_offset, int options, int *offsets, + int offsetcount, int *workspace, int wscount) +#endif { -real_pcre *re = (real_pcre *)argument_re; +REAL_PCRE *re = (REAL_PCRE *)argument_re; dfa_match_data match_block; dfa_match_data *md = &match_block; -BOOL utf8, anchored, startline, firstline; -const uschar *current_subject, *end_subject, *lcc; - -pcre_study_data internal_study; +BOOL utf, anchored, startline, firstline; +const pcre_uchar *current_subject, *end_subject; const pcre_study_data *study = NULL; -real_pcre internal_re; -const uschar *req_byte_ptr; -const uschar *start_bits = NULL; -BOOL first_byte_caseless = FALSE; -BOOL req_byte_caseless = FALSE; -int first_byte = -1; -int req_byte = -1; -int req_byte2 = -1; +const pcre_uchar *req_char_ptr; +const pcre_uint8 *start_bits = NULL; +BOOL has_first_char = FALSE; +BOOL has_req_char = FALSE; +pcre_uchar first_char = 0; +pcre_uchar first_char2 = 0; +pcre_uchar req_char = 0; +pcre_uchar req_char2 = 0; int newline; /* Plausibility checks */ @@ -3052,27 +3075,26 @@ if (extra_data != NULL) } /* Check that the first field in the block is the magic number. If it is not, -test for a regex that was compiled on a host of opposite endianness. If this is -the case, flipped values are put in internal_re and internal_study if there was -study data too. */ +return with PCRE_ERROR_BADMAGIC. However, if the magic number is equal to +REVERSED_MAGIC_NUMBER we return with PCRE_ERROR_BADENDIANNESS, which +means that the pattern is likely compiled with different endianness. */ if (re->magic_number != MAGIC_NUMBER) - { - re = _pcre_try_flipped(re, &internal_re, study, &internal_study); - if (re == NULL) return PCRE_ERROR_BADMAGIC; - if (study != NULL) study = &internal_study; - } + return re->magic_number == REVERSED_MAGIC_NUMBER? + PCRE_ERROR_BADENDIANNESS:PCRE_ERROR_BADMAGIC; +if ((re->flags & PCRE_MODE) == 0) return PCRE_ERROR_BADMODE; /* Set some local values */ -current_subject = (const unsigned char *)subject + start_offset; -end_subject = (const unsigned char *)subject + length; -req_byte_ptr = current_subject - 1; +current_subject = (const pcre_uchar *)subject + start_offset; +end_subject = (const pcre_uchar *)subject + length; +req_char_ptr = current_subject - 1; -#ifdef SUPPORT_UTF8 -utf8 = (re->options & PCRE_UTF8) != 0; +#ifdef SUPPORT_UTF +/* PCRE_UTF16 has the same value as PCRE_UTF8. */ +utf = (re->options & PCRE_UTF8) != 0; #else -utf8 = FALSE; +utf = FALSE; #endif anchored = (options & (PCRE_ANCHORED|PCRE_DFA_RESTART)) != 0 || @@ -3080,9 +3102,9 @@ anchored = (options & (PCRE_ANCHORED|PCRE_DFA_RESTART)) != 0 || /* The remaining fixed data for passing around. */ -md->start_code = (const uschar *)argument_re + +md->start_code = (const pcre_uchar *)argument_re + re->name_table_offset + re->name_count * re->name_entry_size; -md->start_subject = (const unsigned char *)subject; +md->start_subject = (const pcre_uchar *)subject; md->end_subject = end_subject; md->start_offset = start_offset; md->moptions = options; @@ -3143,11 +3165,11 @@ else /* Check a UTF-8 string if required. Unfortunately there's no way of passing back the character offset. */ -#ifdef SUPPORT_UTF8 -if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0) +#ifdef SUPPORT_UTF +if (utf && (options & PCRE_NO_UTF8_CHECK) == 0) { int erroroffset; - int errorcode = _pcre_valid_utf8((uschar *)subject, length, &erroroffset); + int errorcode = PRIV(valid_utf)((pcre_uchar *)subject, length, &erroroffset); if (errorcode != 0) { if (offsetcount >= 2) @@ -3159,7 +3181,7 @@ if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0) PCRE_ERROR_SHORTUTF8 : PCRE_ERROR_BADUTF8; } if (start_offset > 0 && start_offset < length && - (((USPTR)subject)[start_offset] & 0xc0) == 0x80) + NOT_FIRSTCHAR(((PCRE_PUCHAR)subject)[start_offset])) return PCRE_ERROR_BADUTF8_OFFSET; } #endif @@ -3168,12 +3190,11 @@ if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0) is a feature that makes it possible to save compiled regex and re-use them in other programs later. */ -if (md->tables == NULL) md->tables = _pcre_default_tables; +if (md->tables == NULL) md->tables = PRIV(default_tables); -/* The lower casing table and the "must be at the start of a line" flag are -used in a loop when finding where to start. */ +/* The "must be at the start of a line" flags are used in a loop when finding +where to start. */ -lcc = md->tables + lcc_offset; startline = (re->flags & PCRE_STARTLINE) != 0; firstline = (re->options & PCRE_FIRSTLINE) != 0; @@ -3187,9 +3208,16 @@ if (!anchored) { if ((re->flags & PCRE_FIRSTSET) != 0) { - first_byte = re->first_byte & 255; - if ((first_byte_caseless = ((re->first_byte & REQ_CASELESS) != 0)) == TRUE) - first_byte = lcc[first_byte]; + has_first_char = TRUE; + first_char = first_char2 = (pcre_uchar)(re->first_char); + if ((re->flags & PCRE_FCH_CASELESS) != 0) + { + first_char2 = TABLE_GET(first_char, md->tables + fcc_offset, first_char); +#if defined SUPPORT_UCP && !(defined COMPILE_PCRE8) + if (utf && first_char > 127) + first_char2 = UCD_OTHERCASE(first_char); +#endif + } } else { @@ -3204,9 +3232,16 @@ character" set. */ if ((re->flags & PCRE_REQCHSET) != 0) { - req_byte = re->req_byte & 255; - req_byte_caseless = (re->req_byte & REQ_CASELESS) != 0; - req_byte2 = (md->tables + fcc_offset)[req_byte]; /* case flipped */ + has_req_char = TRUE; + req_char = req_char2 = (pcre_uchar)(re->req_char); + if ((re->flags & PCRE_RCH_CASELESS) != 0) + { + req_char2 = TABLE_GET(req_char, md->tables + fcc_offset, req_char); +#if defined SUPPORT_UCP && !(defined COMPILE_PCRE8) + if (utf && req_char > 127) + req_char2 = UCD_OTHERCASE(req_char); +#endif + } } /* Call the main matching function, looping for a non-anchored regex after a @@ -3219,7 +3254,7 @@ for (;;) if ((options & PCRE_DFA_RESTART) == 0) { - const uschar *save_end_subject = end_subject; + const pcre_uchar *save_end_subject = end_subject; /* If firstline is TRUE, the start of the match is constrained to the first line of a multiline string. Implement this by temporarily adjusting @@ -3228,14 +3263,14 @@ for (;;) if (firstline) { - USPTR t = current_subject; -#ifdef SUPPORT_UTF8 - if (utf8) + PCRE_PUCHAR t = current_subject; +#ifdef SUPPORT_UTF + if (utf) { while (t < md->end_subject && !IS_NEWLINE(t)) { t++; - while (t < end_subject && (*t & 0xc0) == 0x80) t++; + ACROSSCHAR(t < end_subject, *t, t++); } } else @@ -3252,17 +3287,17 @@ for (;;) if (((options | re->options) & PCRE_NO_START_OPTIMIZE) == 0) { - /* Advance to a known first byte. */ + /* Advance to a known first char. */ - if (first_byte >= 0) + if (has_first_char) { - if (first_byte_caseless) + if (first_char != first_char2) while (current_subject < end_subject && - lcc[*current_subject] != first_byte) + *current_subject != first_char && *current_subject != first_char2) current_subject++; else while (current_subject < end_subject && - *current_subject != first_byte) + *current_subject != first_char) current_subject++; } @@ -3272,16 +3307,15 @@ for (;;) { if (current_subject > md->start_subject + start_offset) { -#ifdef SUPPORT_UTF8 - if (utf8) +#ifdef SUPPORT_UTF + if (utf) { while (current_subject < end_subject && !WAS_NEWLINE(current_subject)) { current_subject++; - while(current_subject < end_subject && - (*current_subject & 0xc0) == 0x80) - current_subject++; + ACROSSCHAR(current_subject < end_subject, *current_subject, + current_subject++); } } else @@ -3308,13 +3342,18 @@ for (;;) while (current_subject < end_subject) { register unsigned int c = *current_subject; +#ifndef COMPILE_PCRE8 + if (c > 255) c = 255; +#endif if ((start_bits[c/8] & (1 << (c&7))) == 0) { current_subject++; -#ifdef SUPPORT_UTF8 - if (utf8) - while(current_subject < end_subject && - (*current_subject & 0xc0) == 0x80) current_subject++; +#if defined SUPPORT_UTF && defined COMPILE_PCRE8 + /* In non 8-bit mode, the iteration will stop for + characters > 255 at the beginning or not stop at all. */ + if (utf) + ACROSSCHAR(current_subject < end_subject, *current_subject, + current_subject++); #endif } else break; @@ -3342,8 +3381,8 @@ for (;;) (pcre_uint32)(end_subject - current_subject) < study->minlength) return PCRE_ERROR_NOMATCH; - /* If req_byte is set, we know that that character must appear in the - subject for the match to succeed. If the first character is set, req_byte + /* If req_char is set, we know that that character must appear in the + subject for the match to succeed. If the first character is set, req_char must be later in the subject; otherwise the test starts at the match point. This optimization can save a huge amount of work in patterns with nested unlimited repeats that aren't going to match. Writing separate @@ -3355,28 +3394,28 @@ for (;;) patterns. This showed up when somebody was matching /^C/ on a 32-megabyte string... so we don't do this when the string is sufficiently long. */ - if (req_byte >= 0 && end_subject - current_subject < REQ_BYTE_MAX) + if (has_req_char && end_subject - current_subject < REQ_BYTE_MAX) { - register const uschar *p = current_subject + ((first_byte >= 0)? 1 : 0); + register PCRE_PUCHAR p = current_subject + (has_first_char? 1:0); /* We don't need to repeat the search if we haven't yet reached the place we found it at last time. */ - if (p > req_byte_ptr) + if (p > req_char_ptr) { - if (req_byte_caseless) + if (req_char != req_char2) { while (p < end_subject) { register int pp = *p++; - if (pp == req_byte || pp == req_byte2) { p--; break; } + if (pp == req_char || pp == req_char2) { p--; break; } } } else { while (p < end_subject) { - if (*p++ == req_byte) { p--; break; } + if (*p++ == req_char) { p--; break; } } } @@ -3389,7 +3428,7 @@ for (;;) found it, so that we don't search again next time round the loop if the start hasn't passed this character yet. */ - req_byte_ptr = p; + req_char_ptr = p; } } } @@ -3421,11 +3460,13 @@ for (;;) if (firstline && IS_NEWLINE(current_subject)) break; current_subject++; - if (utf8) +#ifdef SUPPORT_UTF + if (utf) { - while (current_subject < end_subject && (*current_subject & 0xc0) == 0x80) - current_subject++; + ACROSSCHAR(current_subject < end_subject, *current_subject, + current_subject++); } +#endif if (current_subject > end_subject) break; /* If we have just passed a CR and we are now at a LF, and the pattern does diff --git a/harbour/src/3rd/pcre/pcreexec.c b/harbour/src/3rd/pcre/pcreexec.c index 43997d8422..1d76f9c244 100644 --- a/harbour/src/3rd/pcre/pcreexec.c +++ b/harbour/src/3rd/pcre/pcreexec.c @@ -6,7 +6,7 @@ and semantics are as close as possible to those of the Perl 5 language. Written by Philip Hazel - Copyright (c) 1997-2011 University of Cambridge + Copyright (c) 1997-2012 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without @@ -113,7 +113,7 @@ Returns: nothing */ static void -pchars(const uschar *p, int length, BOOL is_subject, match_data *md) +pchars(const pcre_uchar *p, int length, BOOL is_subject, match_data *md) { unsigned int c; if (is_subject && length > md->end_subject - p) length = md->end_subject - p; @@ -144,11 +144,11 @@ Returns: < 0 if not matched, otherwise the number of subject bytes matched */ static int -match_ref(int offset, register USPTR eptr, int length, match_data *md, +match_ref(int offset, register PCRE_PUCHAR eptr, int length, match_data *md, BOOL caseless) { -USPTR eptr_start = eptr; -register USPTR p = md->start_subject + md->offset_vector[offset]; +PCRE_PUCHAR eptr_start = eptr; +register PCRE_PUCHAR p = md->start_subject + md->offset_vector[offset]; #ifdef PCRE_DEBUG if (eptr >= md->end_subject) @@ -173,9 +173,9 @@ ASCII characters. */ if (caseless) { -#ifdef SUPPORT_UTF8 +#ifdef SUPPORT_UTF #ifdef SUPPORT_UCP - if (md->utf8) + if (md->utf) { /* Match characters up to the end of the reference. NOTE: the number of bytes matched may differ, because there are some characters whose upper and @@ -185,7 +185,7 @@ if (caseless) the latter. It is important, therefore, to check the length along the reference, not along the subject (earlier code did this wrong). */ - USPTR endptr = p + length; + PCRE_PUCHAR endptr = p + length; while (p < endptr) { int c, d; @@ -204,7 +204,11 @@ if (caseless) { if (eptr + length > md->end_subject) return -1; while (length-- > 0) - { if (md->lcc[*p++] != md->lcc[*eptr++]) return -1; } + { + if (TABLE_GET(*p, md->lcc, *p) != TABLE_GET(*eptr, md->lcc, *eptr)) return -1; + p++; + eptr++; + } } } @@ -307,7 +311,7 @@ argument of match(), which never changes. */ #define RMATCH(ra,rb,rc,rd,re,rw)\ {\ - heapframe *newframe = (heapframe *)(pcre_stack_malloc)(sizeof(heapframe));\ + heapframe *newframe = (heapframe *)(PUBL(stack_malloc))(sizeof(heapframe));\ if (newframe == NULL) RRETURN(PCRE_ERROR_NOMEMORY);\ frame->Xwhere = rw; \ newframe->Xeptr = ra;\ @@ -328,7 +332,7 @@ argument of match(), which never changes. */ {\ heapframe *oldframe = frame;\ frame = oldframe->Xprevframe;\ - (pcre_stack_free)(oldframe);\ + if (oldframe != &frame_zero) (PUBL(stack_free))(oldframe);\ if (frame != NULL)\ {\ rrc = ra;\ @@ -345,24 +349,24 @@ typedef struct heapframe { /* Function arguments that may change */ - USPTR Xeptr; - const uschar *Xecode; - USPTR Xmstart; + PCRE_PUCHAR Xeptr; + const pcre_uchar *Xecode; + PCRE_PUCHAR Xmstart; int Xoffset_top; eptrblock *Xeptrb; unsigned int Xrdepth; /* Function local variables */ - USPTR Xcallpat; -#ifdef SUPPORT_UTF8 - USPTR Xcharptr; + PCRE_PUCHAR Xcallpat; +#ifdef SUPPORT_UTF + PCRE_PUCHAR Xcharptr; #endif - USPTR Xdata; - USPTR Xnext; - USPTR Xpp; - USPTR Xprev; - USPTR Xsaved_eptr; + PCRE_PUCHAR Xdata; + PCRE_PUCHAR Xnext; + PCRE_PUCHAR Xpp; + PCRE_PUCHAR Xprev; + PCRE_PUCHAR Xsaved_eptr; recursion_info Xnew_recursive; @@ -375,7 +379,7 @@ typedef struct heapframe { int Xprop_value; int Xprop_fail_result; int Xoclength; - uschar Xocchars[8]; + pcre_uchar Xocchars[6]; #endif int Xcodelink; @@ -440,7 +444,7 @@ the subject. */ /* Performance note: It might be tempting to extract commonly used fields from -the md structure (e.g. utf8, end_subject) into individual variables to improve +the md structure (e.g. utf, end_subject) into individual variables to improve performance. Tests using gcc on a SPARC disproved this; in the first case, it made performance worse. @@ -463,8 +467,9 @@ Returns: MATCH_MATCH if matched ) these values are >= 0 */ static int -match(REGISTER USPTR eptr, REGISTER const uschar *ecode, USPTR mstart, - int offset_top, match_data *md, eptrblock *eptrb, unsigned int rdepth) +match(REGISTER PCRE_PUCHAR eptr, REGISTER const pcre_uchar *ecode, + PCRE_PUCHAR mstart, int offset_top, match_data *md, eptrblock *eptrb, + unsigned int rdepth) { /* These variables do not need to be preserved over recursion in this function, so they can be ordinary variables in all cases. Mark some of them with @@ -473,20 +478,22 @@ so they can be ordinary variables in all cases. Mark some of them with register int rrc; /* Returns from recursive calls */ register int i; /* Used for loops not involving calls to RMATCH() */ register unsigned int c; /* Character values not kept over RMATCH() calls */ -register BOOL utf8; /* Local copy of UTF-8 flag for speed */ +register BOOL utf; /* Local copy of UTF flag for speed */ BOOL minimize, possessive; /* Quantifier options */ BOOL caseless; int condcode; /* When recursion is not being used, all "local" variables that have to be -preserved over calls to RMATCH() are part of a "frame" which is obtained from -heap storage. Set up the top-level frame here; others are obtained from the -heap whenever RMATCH() does a "recursion". See the macro definitions above. */ +preserved over calls to RMATCH() are part of a "frame". We set up the top-level +frame on the stack here; subsequent instantiations are obtained from the heap +whenever RMATCH() does a "recursion". See the macro definitions above. Putting +the top-level on the stack rather than malloc-ing them all gives a performance +boost in many cases where there is not much "recursion". */ #ifdef NO_RECURSE -heapframe *frame = (heapframe *)(pcre_stack_malloc)(sizeof(heapframe)); -if (frame == NULL) RRETURN(PCRE_ERROR_NOMEMORY); +heapframe frame_zero; +heapframe *frame = &frame_zero; frame->Xprevframe = NULL; /* Marks the top level */ /* Copy in the original argument variables */ @@ -513,7 +520,7 @@ HEAP_RECURSE: /* Ditto for the local variables */ -#ifdef SUPPORT_UTF8 +#ifdef SUPPORT_UTF #define charptr frame->Xcharptr #endif #define callpat frame->Xcallpat @@ -571,15 +578,15 @@ declarations can be cut out in a block. The only declarations within blocks below are for variables that do not have to be preserved over a recursive call to RMATCH(). */ -#ifdef SUPPORT_UTF8 -const uschar *charptr; +#ifdef SUPPORT_UTF +const pcre_uchar *charptr; #endif -const uschar *callpat; -const uschar *data; -const uschar *next; -USPTR pp; -const uschar *prev; -USPTR saved_eptr; +const pcre_uchar *callpat; +const pcre_uchar *data; +const pcre_uchar *next; +PCRE_PUCHAR pp; +const pcre_uchar *prev; +PCRE_PUCHAR saved_eptr; recursion_info new_recursive; @@ -592,7 +599,7 @@ int prop_type; int prop_value; int prop_fail_result; int oclength; -uschar occhars[8]; +pcre_uchar occhars[6]; #endif int codelink; @@ -608,6 +615,23 @@ int save_offset1, save_offset2, save_offset3; int stacksave[REC_STACK_SAVE_MAX]; eptrblock newptrb; + +/* There is a special fudge for calling match() in a way that causes it to +measure the size of its basic stack frame when the stack is being used for +recursion. The second argument (ecode) being NULL triggers this behaviour. It +cannot normally ever be NULL. The return is the negated value of the frame +size. */ + +if (ecode == NULL) + { + if (rdepth == 0) + return match((PCRE_PUCHAR)&rdepth, NULL, NULL, 0, NULL, NULL, 1); + else + { + int len = (char *)&rdepth - (char *)eptr; + return (len > 0)? -len : len; + } + } #endif /* NO_RECURSE */ /* To save space on the stack and in the heap frame, I have doubled up on some @@ -620,6 +644,8 @@ the alternative names that are used. */ #define code_offset codelink #define condassert condition #define matched_once prev_is_word +#define foc number +#define save_mark data /* These statements are here to stop the compiler complaining about unitialized variables. */ @@ -645,10 +671,10 @@ defined). However, RMATCH isn't like a function call because it's quite a complicated macro. It has to be used in one particular way. This shouldn't, however, impact performance when true recursion is being used. */ -#ifdef SUPPORT_UTF8 -utf8 = md->utf8; /* Local copy of the flag */ +#ifdef SUPPORT_UTF +utf = md->utf; /* Local copy of the flag */ #else -utf8 = FALSE; +utf = FALSE; #endif /* First check that we haven't called match() too many times, or that we @@ -689,7 +715,7 @@ for (;;) case OP_MARK: md->nomatch_mark = ecode + 2; md->mark = NULL; /* In case previously set by assertion */ - RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md, + RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top, md, eptrb, RM55); if ((rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) && md->mark == NULL) md->mark = ecode + 2; @@ -702,7 +728,7 @@ for (;;) unaltered. */ else if (rrc == MATCH_SKIP_ARG && - strcmp((char *)(ecode + 2), (char *)(md->start_match_ptr)) == 0) + STRCMP_UC_UC(ecode + 2, md->start_match_ptr) == 0) { md->start_match_ptr = eptr; RRETURN(MATCH_SKIP); @@ -715,7 +741,7 @@ for (;;) /* COMMIT overrides PRUNE, SKIP, and THEN */ case OP_COMMIT: - RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, + RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md, eptrb, RM52); if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && rrc != MATCH_SKIP && rrc != MATCH_SKIP_ARG && @@ -726,7 +752,7 @@ for (;;) /* PRUNE overrides THEN */ case OP_PRUNE: - RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, + RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md, eptrb, RM51); if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc); RRETURN(MATCH_PRUNE); @@ -734,7 +760,7 @@ for (;;) case OP_PRUNE_ARG: md->nomatch_mark = ecode + 2; md->mark = NULL; /* In case previously set by assertion */ - RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md, + RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top, md, eptrb, RM56); if ((rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) && md->mark == NULL) md->mark = ecode + 2; @@ -744,7 +770,7 @@ for (;;) /* SKIP overrides PRUNE and THEN */ case OP_SKIP: - RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, + RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md, eptrb, RM53); if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && rrc != MATCH_THEN) RRETURN(rrc); @@ -758,10 +784,10 @@ for (;;) case OP_SKIP_ARG: if (md->ignore_skip_arg) { - ecode += _pcre_OP_lengths[*ecode] + ecode[1]; + ecode += PRIV(OP_lengths)[*ecode] + ecode[1]; break; } - RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md, + RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top, md, eptrb, RM57); if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && rrc != MATCH_THEN) RRETURN(rrc); @@ -779,7 +805,7 @@ for (;;) match pointer to do this. */ case OP_THEN: - RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, + RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md, eptrb, RM54); if (rrc != MATCH_NOMATCH) RRETURN(rrc); md->start_match_ptr = ecode; @@ -788,7 +814,7 @@ for (;;) case OP_THEN_ARG: md->nomatch_mark = ecode + 2; md->mark = NULL; /* In case previously set by assertion */ - RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, + RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top, md, eptrb, RM58); if ((rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) && md->mark == NULL) md->mark = ecode + 2; @@ -812,6 +838,7 @@ for (;;) case OP_ONCE_NC: prev = ecode; saved_eptr = eptr; + save_mark = md->mark; do { RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM64); @@ -830,6 +857,7 @@ for (;;) if (rrc != MATCH_NOMATCH) RRETURN(rrc); ecode += GET(ecode,1); + md->mark = save_mark; } while (*ecode == OP_ALT); @@ -909,6 +937,7 @@ for (;;) save_offset2 = md->offset_vector[offset+1]; save_offset3 = md->offset_vector[md->offset_end - number]; save_capture_last = md->capture_last; + save_mark = md->mark; DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3)); md->offset_vector[md->offset_end - number] = @@ -917,7 +946,7 @@ for (;;) for (;;) { if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP; - RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, + RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md, eptrb, RM1); if (rrc == MATCH_ONCE) break; /* Backing up through an atomic group */ @@ -945,6 +974,7 @@ for (;;) if (rrc != MATCH_NOMATCH) RRETURN(rrc); md->capture_last = save_capture_last; ecode += GET(ecode, 1); + md->mark = save_mark; if (*ecode != OP_ALT) break; } @@ -1004,13 +1034,14 @@ for (;;) else if (!md->hasthen && ecode[GET(ecode, 1)] != OP_ALT) { - ecode += _pcre_OP_lengths[*ecode]; + ecode += PRIV(OP_lengths)[*ecode]; goto TAIL_RECURSE; } /* In all other cases, we have to make another call to match(). */ - RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, eptrb, + save_mark = md->mark; + RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md, eptrb, RM2); /* See comment in the code for capturing groups above about handling @@ -1028,7 +1059,7 @@ for (;;) { if (rrc == MATCH_ONCE) { - const uschar *scode = ecode; + const pcre_uchar *scode = ecode; if (*scode != OP_ONCE) /* If not at start, find it */ { while (*scode == OP_ALT) scode += GET(scode, 1); @@ -1039,6 +1070,7 @@ for (;;) RRETURN(rrc); } ecode += GET(ecode, 1); + md->mark = save_mark; if (*ecode != OP_ALT) break; } @@ -1093,7 +1125,7 @@ for (;;) md->offset_vector[md->offset_end - number] = (int)(eptr - md->start_subject); if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP; - RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, + RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md, eptrb, RM63); if (rrc == MATCH_KETRPOS) { @@ -1165,7 +1197,7 @@ for (;;) for (;;) { if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP; - RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, + RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md, eptrb, RM48); if (rrc == MATCH_KETRPOS) { @@ -1215,13 +1247,17 @@ for (;;) if (ecode[LINK_SIZE+1] == OP_CALLOUT) { - if (pcre_callout != NULL) + if (PUBL(callout) != NULL) { - pcre_callout_block cb; + PUBL(callout_block) cb; cb.version = 2; /* Version 1 of the callout block */ cb.callout_number = ecode[LINK_SIZE+2]; cb.offset_vector = md->offset_vector; +#ifdef COMPILE_PCRE8 cb.subject = (PCRE_SPTR)md->start_subject; +#else + cb.subject = (PCRE_SPTR16)md->start_subject; +#endif cb.subject_length = (int)(md->end_subject - md->start_subject); cb.start_match = (int)(mstart - md->start_subject); cb.current_position = (int)(eptr - md->start_subject); @@ -1231,10 +1267,10 @@ for (;;) cb.capture_last = md->capture_last; cb.callout_data = md->callout_data; cb.mark = md->nomatch_mark; - if ((rrc = (*pcre_callout)(&cb)) > 0) RRETURN(MATCH_NOMATCH); + if ((rrc = (*PUBL(callout))(&cb)) > 0) RRETURN(MATCH_NOMATCH); if (rrc < 0) RRETURN(rrc); } - ecode += _pcre_OP_lengths[OP_CALLOUT]; + ecode += PRIV(OP_lengths)[OP_CALLOUT]; } condcode = ecode[LINK_SIZE+1]; @@ -1260,7 +1296,7 @@ for (;;) if (!condition && condcode == OP_NRREF) { - uschar *slotA = md->name_table; + pcre_uchar *slotA = md->name_table; for (i = 0; i < md->name_count; i++) { if (GET2(slotA, 0) == recno) break; @@ -1273,11 +1309,11 @@ for (;;) if (i < md->name_count) { - uschar *slotB = slotA; + pcre_uchar *slotB = slotA; while (slotB > md->name_table) { slotB -= md->name_entry_size; - if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0) + if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0) { condition = GET2(slotB, 0) == md->recursive->group_num; if (condition) break; @@ -1293,7 +1329,7 @@ for (;;) for (i++; i < md->name_count; i++) { slotB += md->name_entry_size; - if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0) + if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0) { condition = GET2(slotB, 0) == md->recursive->group_num; if (condition) break; @@ -1306,7 +1342,7 @@ for (;;) /* Chose branch according to the condition */ - ecode += condition? 3 : GET(ecode, 1); + ecode += condition? 1 + IMM2_SIZE : GET(ecode, 1); } } @@ -1323,7 +1359,7 @@ for (;;) if (!condition && condcode == OP_NCREF) { int refno = offset >> 1; - uschar *slotA = md->name_table; + pcre_uchar *slotA = md->name_table; for (i = 0; i < md->name_count; i++) { @@ -1337,11 +1373,11 @@ for (;;) if (i < md->name_count) { - uschar *slotB = slotA; + pcre_uchar *slotB = slotA; while (slotB > md->name_table) { slotB -= md->name_entry_size; - if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0) + if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0) { offset = GET2(slotB, 0) << 1; condition = offset < offset_top && @@ -1359,7 +1395,7 @@ for (;;) for (i++; i < md->name_count; i++) { slotB += md->name_entry_size; - if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0) + if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0) { offset = GET2(slotB, 0) << 1; condition = offset < offset_top && @@ -1374,7 +1410,7 @@ for (;;) /* Chose branch according to the condition */ - ecode += condition? 3 : GET(ecode, 1); + ecode += condition? 1 + IMM2_SIZE : GET(ecode, 1); } else if (condcode == OP_DEF) /* DEFINE - always false */ @@ -1466,7 +1502,7 @@ for (;;) md->offset_vector[offset+1] = (int)(eptr - md->start_subject); if (offset_top <= offset) offset_top = offset + 2; } - ecode += 3; + ecode += 1 + IMM2_SIZE; break; @@ -1513,6 +1549,7 @@ for (;;) case OP_ASSERT: case OP_ASSERTBACK: + save_mark = md->mark; if (md->match_function_type == MATCH_CONDASSERT) { condassert = TRUE; @@ -1534,6 +1571,7 @@ for (;;) if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc); ecode += GET(ecode, 1); + md->mark = save_mark; } while (*ecode == OP_ALT); @@ -1557,6 +1595,7 @@ for (;;) case OP_ASSERT_NOT: case OP_ASSERTBACK_NOT: + save_mark = md->mark; if (md->match_function_type == MATCH_CONDASSERT) { condassert = TRUE; @@ -1567,6 +1606,7 @@ for (;;) do { RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, NULL, RM5); + md->mark = save_mark; if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) RRETURN(MATCH_NOMATCH); if (rrc == MATCH_SKIP || rrc == MATCH_PRUNE || rrc == MATCH_COMMIT) { @@ -1593,8 +1633,8 @@ for (;;) back a number of characters, not bytes. */ case OP_REVERSE: -#ifdef SUPPORT_UTF8 - if (utf8) +#ifdef SUPPORT_UTF + if (utf) { i = GET(ecode, 1); while (i-- > 0) @@ -1625,13 +1665,17 @@ for (;;) function is able to force a failure. */ case OP_CALLOUT: - if (pcre_callout != NULL) + if (PUBL(callout) != NULL) { - pcre_callout_block cb; + PUBL(callout_block) cb; cb.version = 2; /* Version 1 of the callout block */ cb.callout_number = ecode[1]; cb.offset_vector = md->offset_vector; +#ifdef COMPILE_PCRE8 cb.subject = (PCRE_SPTR)md->start_subject; +#else + cb.subject = (PCRE_SPTR16)md->start_subject; +#endif cb.subject_length = (int)(md->end_subject - md->start_subject); cb.start_match = (int)(mstart - md->start_subject); cb.current_position = (int)(eptr - md->start_subject); @@ -1641,7 +1685,7 @@ for (;;) cb.capture_last = md->capture_last; cb.callout_data = md->callout_data; cb.mark = md->nomatch_mark; - if ((rrc = (*pcre_callout)(&cb)) > 0) RRETURN(MATCH_NOMATCH); + if ((rrc = (*PUBL(callout))(&cb)) > 0) RRETURN(MATCH_NOMATCH); if (rrc < 0) RRETURN(rrc); } ecode += 2 + 2*LINK_SIZE; @@ -1700,7 +1744,7 @@ for (;;) else { new_recursive.offset_save = - (int *)(pcre_malloc)(new_recursive.saved_max * sizeof(int)); + (int *)(PUBL(malloc))(new_recursive.saved_max * sizeof(int)); if (new_recursive.offset_save == NULL) RRETURN(PCRE_ERROR_NOMEMORY); } memcpy(new_recursive.offset_save, md->offset_vector, @@ -1715,7 +1759,7 @@ for (;;) do { if (cbegroup) md->match_function_type = MATCH_CBEGROUP; - RMATCH(eptr, callpat + _pcre_OP_lengths[*callpat], offset_top, + RMATCH(eptr, callpat + PRIV(OP_lengths)[*callpat], offset_top, md, eptrb, RM6); memcpy(md->offset_vector, new_recursive.offset_save, new_recursive.saved_max * sizeof(int)); @@ -1724,7 +1768,7 @@ for (;;) { DPRINTF(("Recursion matched\n")); if (new_recursive.offset_save != stacksave) - (pcre_free)(new_recursive.offset_save); + (PUBL(free))(new_recursive.offset_save); /* Set where we got to in the subject, and reset the start in case it was changed by \K. This *is* propagated back out of a recursion, @@ -1742,7 +1786,7 @@ for (;;) { DPRINTF(("Recursion gave error %d\n", rrc)); if (new_recursive.offset_save != stacksave) - (pcre_free)(new_recursive.offset_save); + (PUBL(free))(new_recursive.offset_save); RRETURN(rrc); } @@ -1754,7 +1798,7 @@ for (;;) DPRINTF(("Recursion didn't match\n")); md->recursive = new_recursive.prevrec; if (new_recursive.offset_save != stacksave) - (pcre_free)(new_recursive.offset_save); + (PUBL(free))(new_recursive.offset_save); RRETURN(MATCH_NOMATCH); } @@ -2066,15 +2110,15 @@ for (;;) be "non-word" characters. Remember the earliest consulted character for partial matching. */ -#ifdef SUPPORT_UTF8 - if (utf8) +#ifdef SUPPORT_UTF + if (utf) { /* Get status of previous character */ if (eptr == md->start_subject) prev_is_word = FALSE; else { - USPTR lastptr = eptr - 1; - while((*lastptr & 0xc0) == 0x80) lastptr--; + PCRE_PUCHAR lastptr = eptr - 1; + BACKCHAR(lastptr); if (lastptr < md->start_used_ptr) md->start_used_ptr = lastptr; GETCHAR(c, lastptr); #ifdef SUPPORT_UCP @@ -2139,7 +2183,8 @@ for (;;) } else #endif - prev_is_word = ((md->ctypes[eptr[-1]] & ctype_word) != 0); + prev_is_word = MAX_255(eptr[-1]) + && ((md->ctypes[eptr[-1]] & ctype_word) != 0); } /* Get status of next character */ @@ -2162,7 +2207,8 @@ for (;;) } else #endif - cur_is_word = ((md->ctypes[*eptr] & ctype_word) != 0); + cur_is_word = MAX_255(*eptr) + && ((md->ctypes[*eptr] & ctype_word) != 0); } /* Now see if the situation is what we want */ @@ -2186,7 +2232,9 @@ for (;;) RRETURN(MATCH_NOMATCH); } eptr++; - if (utf8) while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++; +#ifdef SUPPORT_UTF + if (utf) ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++); +#endif ecode++; break; @@ -2211,7 +2259,7 @@ for (;;) } GETCHARINCTEST(c, eptr); if ( -#ifdef SUPPORT_UTF8 +#if defined SUPPORT_UTF || !(defined COMPILE_PCRE8) c < 256 && #endif (md->ctypes[c] & ctype_digit) != 0 @@ -2228,8 +2276,8 @@ for (;;) } GETCHARINCTEST(c, eptr); if ( -#ifdef SUPPORT_UTF8 - c >= 256 || +#if defined SUPPORT_UTF || !(defined COMPILE_PCRE8) + c > 255 || #endif (md->ctypes[c] & ctype_digit) == 0 ) @@ -2245,7 +2293,7 @@ for (;;) } GETCHARINCTEST(c, eptr); if ( -#ifdef SUPPORT_UTF8 +#if defined SUPPORT_UTF || !(defined COMPILE_PCRE8) c < 256 && #endif (md->ctypes[c] & ctype_space) != 0 @@ -2262,8 +2310,8 @@ for (;;) } GETCHARINCTEST(c, eptr); if ( -#ifdef SUPPORT_UTF8 - c >= 256 || +#if defined SUPPORT_UTF || !(defined COMPILE_PCRE8) + c > 255 || #endif (md->ctypes[c] & ctype_space) == 0 ) @@ -2279,7 +2327,7 @@ for (;;) } GETCHARINCTEST(c, eptr); if ( -#ifdef SUPPORT_UTF8 +#if defined SUPPORT_UTF || !(defined COMPILE_PCRE8) c < 256 && #endif (md->ctypes[c] & ctype_word) != 0 @@ -2296,8 +2344,8 @@ for (;;) } GETCHARINCTEST(c, eptr); if ( -#ifdef SUPPORT_UTF8 - c >= 256 || +#if defined SUPPORT_UTF || !(defined COMPILE_PCRE8) + c > 255 || #endif (md->ctypes[c] & ctype_word) == 0 ) @@ -2475,7 +2523,7 @@ for (;;) break; case PT_GC: - if ((ecode[2] != _pcre_ucp_gentype[prop->chartype]) == (op == OP_PROP)) + if ((ecode[2] != PRIV(ucp_gentype)[prop->chartype]) == (op == OP_PROP)) RRETURN(MATCH_NOMATCH); break; @@ -2492,20 +2540,20 @@ for (;;) /* These are specials */ case PT_ALNUM: - if ((_pcre_ucp_gentype[prop->chartype] == ucp_L || - _pcre_ucp_gentype[prop->chartype] == ucp_N) == (op == OP_NOTPROP)) + if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L || + PRIV(ucp_gentype)[prop->chartype] == ucp_N) == (op == OP_NOTPROP)) RRETURN(MATCH_NOMATCH); break; case PT_SPACE: /* Perl space */ - if ((_pcre_ucp_gentype[prop->chartype] == ucp_Z || + if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z || c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR) == (op == OP_NOTPROP)) RRETURN(MATCH_NOMATCH); break; case PT_PXSPACE: /* POSIX space */ - if ((_pcre_ucp_gentype[prop->chartype] == ucp_Z || + if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z || c == CHAR_HT || c == CHAR_NL || c == CHAR_VT || c == CHAR_FF || c == CHAR_CR) == (op == OP_NOTPROP)) @@ -2513,8 +2561,8 @@ for (;;) break; case PT_WORD: - if ((_pcre_ucp_gentype[prop->chartype] == ucp_L || - _pcre_ucp_gentype[prop->chartype] == ucp_N || + if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L || + PRIV(ucp_gentype)[prop->chartype] == ucp_N || c == CHAR_UNDERSCORE) == (op == OP_NOTPROP)) RRETURN(MATCH_NOMATCH); break; @@ -2543,7 +2591,7 @@ for (;;) while (eptr < md->end_subject) { int len = 1; - if (!utf8) c = *eptr; else { GETCHARLEN(c, eptr, len); } + if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); } if (UCD_CATEGORY(c) != ucp_M) break; eptr += len; } @@ -2564,7 +2612,7 @@ for (;;) case OP_REFI: caseless = op == OP_REFI; offset = GET2(ecode, 1) << 1; /* Doubled ref number */ - ecode += 3; + ecode += 1 + IMM2_SIZE; /* If the reference is unset, there are two possibilities: @@ -2604,9 +2652,9 @@ for (;;) case OP_CRMINRANGE: minimize = (*ecode == OP_CRMINRANGE); min = GET2(ecode, 1); - max = GET2(ecode, 3); + max = GET2(ecode, 1 + IMM2_SIZE); if (max == 0) max = INT_MAX; - ecode += 5; + ecode += 1 + 2 * IMM2_SIZE; break; default: /* No repeat follows */ @@ -2620,9 +2668,13 @@ for (;;) } /* Handle repeated back references. If the length of the reference is - zero, just continue with the main loop. */ + zero, just continue with the main loop. If the length is negative, it + means the reference is unset in non-Java-compatible mode. If the minimum is + zero, we can continue at the same level without recursion. For any other + minimum, carrying on will result in NOMATCH. */ if (length == 0) continue; + if (length < 0 && min == 0) continue; /* First, ensure the minimum number of matches are present. We get back the length of the reference string explicitly rather than passing the @@ -2703,8 +2755,11 @@ for (;;) case OP_NCLASS: case OP_CLASS: { + /* The data variable is saved across frames, so the byte map needs to + be stored there. */ +#define BYTE_MAP ((pcre_uint8 *)data) data = ecode + 1; /* Save for matching */ - ecode += 33; /* Advance past the item */ + ecode += 1 + (32 / sizeof(pcre_uchar)); /* Advance past the item */ switch (*ecode) { @@ -2725,9 +2780,9 @@ for (;;) case OP_CRMINRANGE: minimize = (*ecode == OP_CRMINRANGE); min = GET2(ecode, 1); - max = GET2(ecode, 3); + max = GET2(ecode, 1 + IMM2_SIZE); if (max == 0) max = INT_MAX; - ecode += 5; + ecode += 1 + 2 * IMM2_SIZE; break; default: /* No repeat follows */ @@ -2737,9 +2792,8 @@ for (;;) /* First, ensure the minimum number of matches are present. */ -#ifdef SUPPORT_UTF8 - /* UTF-8 mode */ - if (utf8) +#ifdef SUPPORT_UTF + if (utf) { for (i = 1; i <= min; i++) { @@ -2754,14 +2808,12 @@ for (;;) if (op == OP_CLASS) RRETURN(MATCH_NOMATCH); } else - { - if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH); - } + if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH); } } else #endif - /* Not UTF-8 mode */ + /* Not UTF mode */ { for (i = 1; i <= min; i++) { @@ -2771,7 +2823,14 @@ for (;;) RRETURN(MATCH_NOMATCH); } c = *eptr++; - if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH); +#ifndef COMPILE_PCRE8 + if (c > 255) + { + if (op == OP_CLASS) RRETURN(MATCH_NOMATCH); + } + else +#endif + if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH); } } @@ -2785,9 +2844,8 @@ for (;;) if (minimize) { -#ifdef SUPPORT_UTF8 - /* UTF-8 mode */ - if (utf8) +#ifdef SUPPORT_UTF + if (utf) { for (fi = min;; fi++) { @@ -2805,14 +2863,12 @@ for (;;) if (op == OP_CLASS) RRETURN(MATCH_NOMATCH); } else - { - if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH); - } + if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH); } } else #endif - /* Not UTF-8 mode */ + /* Not UTF mode */ { for (fi = min;; fi++) { @@ -2825,7 +2881,14 @@ for (;;) RRETURN(MATCH_NOMATCH); } c = *eptr++; - if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH); +#ifndef COMPILE_PCRE8 + if (c > 255) + { + if (op == OP_CLASS) RRETURN(MATCH_NOMATCH); + } + else +#endif + if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH); } } /* Control never gets here */ @@ -2837,9 +2900,8 @@ for (;;) { pp = eptr; -#ifdef SUPPORT_UTF8 - /* UTF-8 mode */ - if (utf8) +#ifdef SUPPORT_UTF + if (utf) { for (i = min; i < max; i++) { @@ -2855,9 +2917,7 @@ for (;;) if (op == OP_CLASS) break; } else - { - if ((data[c/8] & (1 << (c&7))) == 0) break; - } + if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) break; eptr += len; } for (;;) @@ -2870,7 +2930,7 @@ for (;;) } else #endif - /* Not UTF-8 mode */ + /* Not UTF mode */ { for (i = min; i < max; i++) { @@ -2880,7 +2940,14 @@ for (;;) break; } c = *eptr; - if ((data[c/8] & (1 << (c&7))) == 0) break; +#ifndef COMPILE_PCRE8 + if (c > 255) + { + if (op == OP_CLASS) break; + } + else +#endif + if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) break; eptr++; } while (eptr >= pp) @@ -2893,6 +2960,7 @@ for (;;) RRETURN(MATCH_NOMATCH); } +#undef BYTE_MAP } /* Control never gets here */ @@ -2901,7 +2969,7 @@ for (;;) when UTF-8 mode mode is supported. Nevertheless, we may not be in UTF-8 mode, because Unicode properties are supported in non-UTF-8 mode. */ -#ifdef SUPPORT_UTF8 +#if defined SUPPORT_UTF || !defined COMPILE_PCRE8 case OP_XCLASS: { data = ecode + 1 + LINK_SIZE; /* Save for matching */ @@ -2926,9 +2994,9 @@ for (;;) case OP_CRMINRANGE: minimize = (*ecode == OP_CRMINRANGE); min = GET2(ecode, 1); - max = GET2(ecode, 3); + max = GET2(ecode, 1 + IMM2_SIZE); if (max == 0) max = INT_MAX; - ecode += 5; + ecode += 1 + 2 * IMM2_SIZE; break; default: /* No repeat follows */ @@ -2946,7 +3014,7 @@ for (;;) RRETURN(MATCH_NOMATCH); } GETCHARINCTEST(c, eptr); - if (!_pcre_xclass(c, data)) RRETURN(MATCH_NOMATCH); + if (!PRIV(xclass)(c, data, utf)) RRETURN(MATCH_NOMATCH); } /* If max == min we can continue with the main loop without the @@ -2970,7 +3038,7 @@ for (;;) RRETURN(MATCH_NOMATCH); } GETCHARINCTEST(c, eptr); - if (!_pcre_xclass(c, data)) RRETURN(MATCH_NOMATCH); + if (!PRIV(xclass)(c, data, utf)) RRETURN(MATCH_NOMATCH); } /* Control never gets here */ } @@ -2988,8 +3056,12 @@ for (;;) SCHECK_PARTIAL(); break; } +#ifdef SUPPORT_UTF GETCHARLENTEST(c, eptr, len); - if (!_pcre_xclass(c, data)) break; +#else + c = *eptr; +#endif + if (!PRIV(xclass)(c, data, utf)) break; eptr += len; } for(;;) @@ -2997,7 +3069,9 @@ for (;;) RMATCH(eptr, ecode, offset_top, md, eptrb, RM21); if (rrc != MATCH_NOMATCH) RRETURN(rrc); if (eptr-- == pp) break; /* Stop if tried at original pos */ - if (utf8) BACKCHAR(eptr); +#ifdef SUPPORT_UTF + if (utf) BACKCHAR(eptr); +#endif } RRETURN(MATCH_NOMATCH); } @@ -3009,8 +3083,8 @@ for (;;) /* Match a single character, casefully */ case OP_CHAR: -#ifdef SUPPORT_UTF8 - if (utf8) +#ifdef SUPPORT_UTF + if (utf) { length = 1; ecode++; @@ -3024,8 +3098,7 @@ for (;;) } else #endif - - /* Non-UTF-8 mode */ + /* Not UTF mode */ { if (md->end_subject - eptr < 1) { @@ -3047,8 +3120,8 @@ for (;;) RRETURN(MATCH_NOMATCH); } -#ifdef SUPPORT_UTF8 - if (utf8) +#ifdef SUPPORT_UTF + if (utf) { length = 1; ecode++; @@ -3061,7 +3134,10 @@ for (;;) if (fc < 128) { - if (md->lcc[*ecode++] != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH); + if (md->lcc[fc] + != TABLE_GET(*eptr, md->lcc, *eptr)) RRETURN(MATCH_NOMATCH); + ecode++; + eptr++; } /* Otherwise we must pick up the subject character. Note that we cannot @@ -3087,11 +3163,13 @@ for (;;) } } else -#endif /* SUPPORT_UTF8 */ +#endif /* SUPPORT_UTF */ - /* Non-UTF-8 mode */ + /* Not UTF mode */ { - if (md->lcc[ecode[1]] != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH); + if (TABLE_GET(ecode[1], md->lcc, ecode[1]) + != TABLE_GET(*eptr, md->lcc, *eptr)) RRETURN(MATCH_NOMATCH); + eptr++; ecode += 2; } break; @@ -3101,7 +3179,7 @@ for (;;) case OP_EXACT: case OP_EXACTI: min = max = GET2(ecode, 1); - ecode += 3; + ecode += 1 + IMM2_SIZE; goto REPEATCHAR; case OP_POSUPTO: @@ -3116,7 +3194,7 @@ for (;;) min = 0; max = GET2(ecode, 1); minimize = *ecode == OP_MINUPTO || *ecode == OP_MINUPTOI; - ecode += 3; + ecode += 1 + IMM2_SIZE; goto REPEATCHAR; case OP_POSSTAR: @@ -3164,8 +3242,8 @@ for (;;) /* Common code for all repeated single-character matches. */ REPEATCHAR: -#ifdef SUPPORT_UTF8 - if (utf8) +#ifdef SUPPORT_UTF + if (utf) { length = 1; charptr = ecode; @@ -3181,18 +3259,18 @@ for (;;) unsigned int othercase; if (op >= OP_STARI && /* Caseless */ (othercase = UCD_OTHERCASE(fc)) != fc) - oclength = _pcre_ord2utf8(othercase, occhars); + oclength = PRIV(ord2utf)(othercase, occhars); else oclength = 0; #endif /* SUPPORT_UCP */ for (i = 1; i <= min; i++) { if (eptr <= md->end_subject - length && - memcmp(eptr, charptr, length) == 0) eptr += length; + memcmp(eptr, charptr, IN_UCHARS(length)) == 0) eptr += length; #ifdef SUPPORT_UCP else if (oclength > 0 && eptr <= md->end_subject - oclength && - memcmp(eptr, occhars, oclength) == 0) eptr += oclength; + memcmp(eptr, occhars, IN_UCHARS(oclength)) == 0) eptr += oclength; #endif /* SUPPORT_UCP */ else { @@ -3211,11 +3289,11 @@ for (;;) if (rrc != MATCH_NOMATCH) RRETURN(rrc); if (fi >= max) RRETURN(MATCH_NOMATCH); if (eptr <= md->end_subject - length && - memcmp(eptr, charptr, length) == 0) eptr += length; + memcmp(eptr, charptr, IN_UCHARS(length)) == 0) eptr += length; #ifdef SUPPORT_UCP else if (oclength > 0 && eptr <= md->end_subject - oclength && - memcmp(eptr, occhars, oclength) == 0) eptr += oclength; + memcmp(eptr, occhars, IN_UCHARS(oclength)) == 0) eptr += oclength; #endif /* SUPPORT_UCP */ else { @@ -3232,11 +3310,11 @@ for (;;) for (i = min; i < max; i++) { if (eptr <= md->end_subject - length && - memcmp(eptr, charptr, length) == 0) eptr += length; + memcmp(eptr, charptr, IN_UCHARS(length)) == 0) eptr += length; #ifdef SUPPORT_UCP else if (oclength > 0 && eptr <= md->end_subject - oclength && - memcmp(eptr, occhars, oclength) == 0) eptr += oclength; + memcmp(eptr, occhars, IN_UCHARS(oclength)) == 0) eptr += oclength; #endif /* SUPPORT_UCP */ else { @@ -3268,14 +3346,12 @@ for (;;) value of fc will always be < 128. */ } else -#endif /* SUPPORT_UTF8 */ +#endif /* SUPPORT_UTF */ + /* When not in UTF-8 mode, load a single-byte character. */ + fc = *ecode++; - /* When not in UTF-8 mode, load a single-byte character. */ - - fc = *ecode++; - - /* The value of fc at this point is always less than 256, though we may or - may not be in UTF-8 mode. The code is duplicated for the caseless and + /* The value of fc at this point is always one character, though we may + or may not be in UTF mode. The code is duplicated for the caseless and caseful cases, for speed, since matching characters is likely to be quite common. First, ensure the minimum number of matches are present. If min = max, continue at the same level without recursing. Otherwise, if @@ -3288,7 +3364,23 @@ for (;;) if (op >= OP_STARI) /* Caseless */ { - fc = md->lcc[fc]; +#ifdef COMPILE_PCRE8 + /* fc must be < 128 if UTF is enabled. */ + foc = md->fcc[fc]; +#else +#ifdef SUPPORT_UTF +#ifdef SUPPORT_UCP + if (utf && fc > 127) + foc = UCD_OTHERCASE(fc); +#else + if (utf && fc > 127) + foc = fc; +#endif /* SUPPORT_UCP */ + else +#endif /* SUPPORT_UTF */ + foc = TABLE_GET(fc, md->fcc, fc); +#endif /* COMPILE_PCRE8 */ + for (i = 1; i <= min; i++) { if (eptr >= md->end_subject) @@ -3296,7 +3388,8 @@ for (;;) SCHECK_PARTIAL(); RRETURN(MATCH_NOMATCH); } - if (fc != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH); + if (fc != *eptr && foc != *eptr) RRETURN(MATCH_NOMATCH); + eptr++; } if (min == max) continue; if (minimize) @@ -3311,7 +3404,8 @@ for (;;) SCHECK_PARTIAL(); RRETURN(MATCH_NOMATCH); } - if (fc != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH); + if (fc != *eptr && foc != *eptr) RRETURN(MATCH_NOMATCH); + eptr++; } /* Control never gets here */ } @@ -3325,7 +3419,7 @@ for (;;) SCHECK_PARTIAL(); break; } - if (fc != md->lcc[*eptr]) break; + if (fc != *eptr && foc != *eptr) break; eptr++; } @@ -3414,11 +3508,25 @@ for (;;) GETCHARINCTEST(c, eptr); if (op == OP_NOTI) /* The caseless case */ { -#ifdef SUPPORT_UTF8 - if (c < 256) -#endif - c = md->lcc[c]; - if (md->lcc[*ecode++] == c) RRETURN(MATCH_NOMATCH); + register unsigned int ch, och; + ch = *ecode++; +#ifdef COMPILE_PCRE8 + /* ch must be < 128 if UTF is enabled. */ + och = md->fcc[ch]; +#else +#ifdef SUPPORT_UTF +#ifdef SUPPORT_UCP + if (utf && ch > 127) + och = UCD_OTHERCASE(ch); +#else + if (utf && ch > 127) + och = ch; +#endif /* SUPPORT_UCP */ + else +#endif /* SUPPORT_UTF */ + och = TABLE_GET(ch, md->fcc, ch); +#endif /* COMPILE_PCRE8 */ + if (ch == c || och == c) RRETURN(MATCH_NOMATCH); } else /* Caseful */ { @@ -3436,7 +3544,7 @@ for (;;) case OP_NOTEXACT: case OP_NOTEXACTI: min = max = GET2(ecode, 1); - ecode += 3; + ecode += 1 + IMM2_SIZE; goto REPEATNOTCHAR; case OP_NOTUPTO: @@ -3446,7 +3554,7 @@ for (;;) min = 0; max = GET2(ecode, 1); minimize = *ecode == OP_NOTMINUPTO || *ecode == OP_NOTMINUPTOI; - ecode += 3; + ecode += 1 + IMM2_SIZE; goto REPEATNOTCHAR; case OP_NOTPOSSTAR: @@ -3478,7 +3586,7 @@ for (;;) possessive = TRUE; min = 0; max = GET2(ecode, 1); - ecode += 3; + ecode += 1 + IMM2_SIZE; goto REPEATNOTCHAR; case OP_NOTSTAR: @@ -3517,11 +3625,25 @@ for (;;) if (op >= OP_NOTSTARI) /* Caseless */ { - fc = md->lcc[fc]; +#ifdef COMPILE_PCRE8 + /* fc must be < 128 if UTF is enabled. */ + foc = md->fcc[fc]; +#else +#ifdef SUPPORT_UTF +#ifdef SUPPORT_UCP + if (utf && fc > 127) + foc = UCD_OTHERCASE(fc); +#else + if (utf && fc > 127) + foc = fc; +#endif /* SUPPORT_UCP */ + else +#endif /* SUPPORT_UTF */ + foc = TABLE_GET(fc, md->fcc, fc); +#endif /* COMPILE_PCRE8 */ -#ifdef SUPPORT_UTF8 - /* UTF-8 mode */ - if (utf8) +#ifdef SUPPORT_UTF + if (utf) { register unsigned int d; for (i = 1; i <= min; i++) @@ -3532,14 +3654,12 @@ for (;;) RRETURN(MATCH_NOMATCH); } GETCHARINC(d, eptr); - if (d < 256) d = md->lcc[d]; - if (fc == d) RRETURN(MATCH_NOMATCH); + if (fc == d || (unsigned int) foc == d) RRETURN(MATCH_NOMATCH); } } else #endif - - /* Not UTF-8 mode */ + /* Not UTF mode */ { for (i = 1; i <= min; i++) { @@ -3548,7 +3668,8 @@ for (;;) SCHECK_PARTIAL(); RRETURN(MATCH_NOMATCH); } - if (fc == md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH); + if (fc == *eptr || foc == *eptr) RRETURN(MATCH_NOMATCH); + eptr++; } } @@ -3556,9 +3677,8 @@ for (;;) if (minimize) { -#ifdef SUPPORT_UTF8 - /* UTF-8 mode */ - if (utf8) +#ifdef SUPPORT_UTF + if (utf) { register unsigned int d; for (fi = min;; fi++) @@ -3572,13 +3692,12 @@ for (;;) RRETURN(MATCH_NOMATCH); } GETCHARINC(d, eptr); - if (d < 256) d = md->lcc[d]; - if (fc == d) RRETURN(MATCH_NOMATCH); + if (fc == d || (unsigned int)foc == d) RRETURN(MATCH_NOMATCH); } } else #endif - /* Not UTF-8 mode */ + /* Not UTF mode */ { for (fi = min;; fi++) { @@ -3590,7 +3709,8 @@ for (;;) SCHECK_PARTIAL(); RRETURN(MATCH_NOMATCH); } - if (fc == md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH); + if (fc == *eptr || foc == *eptr) RRETURN(MATCH_NOMATCH); + eptr++; } } /* Control never gets here */ @@ -3602,9 +3722,8 @@ for (;;) { pp = eptr; -#ifdef SUPPORT_UTF8 - /* UTF-8 mode */ - if (utf8) +#ifdef SUPPORT_UTF + if (utf) { register unsigned int d; for (i = min; i < max; i++) @@ -3616,12 +3735,11 @@ for (;;) break; } GETCHARLEN(d, eptr, len); - if (d < 256) d = md->lcc[d]; - if (fc == d) break; + if (fc == d || (unsigned int)foc == d) break; eptr += len; } - if (possessive) continue; - for(;;) + if (possessive) continue; + for(;;) { RMATCH(eptr, ecode, offset_top, md, eptrb, RM30); if (rrc != MATCH_NOMATCH) RRETURN(rrc); @@ -3631,7 +3749,7 @@ for (;;) } else #endif - /* Not UTF-8 mode */ + /* Not UTF mode */ { for (i = min; i < max; i++) { @@ -3640,7 +3758,7 @@ for (;;) SCHECK_PARTIAL(); break; } - if (fc == md->lcc[*eptr]) break; + if (fc == *eptr || foc == *eptr) break; eptr++; } if (possessive) continue; @@ -3661,9 +3779,8 @@ for (;;) else { -#ifdef SUPPORT_UTF8 - /* UTF-8 mode */ - if (utf8) +#ifdef SUPPORT_UTF + if (utf) { register unsigned int d; for (i = 1; i <= min; i++) @@ -3679,7 +3796,7 @@ for (;;) } else #endif - /* Not UTF-8 mode */ + /* Not UTF mode */ { for (i = 1; i <= min; i++) { @@ -3696,9 +3813,8 @@ for (;;) if (minimize) { -#ifdef SUPPORT_UTF8 - /* UTF-8 mode */ - if (utf8) +#ifdef SUPPORT_UTF + if (utf) { register unsigned int d; for (fi = min;; fi++) @@ -3717,7 +3833,7 @@ for (;;) } else #endif - /* Not UTF-8 mode */ + /* Not UTF mode */ { for (fi = min;; fi++) { @@ -3741,9 +3857,8 @@ for (;;) { pp = eptr; -#ifdef SUPPORT_UTF8 - /* UTF-8 mode */ - if (utf8) +#ifdef SUPPORT_UTF + if (utf) { register unsigned int d; for (i = min; i < max; i++) @@ -3769,7 +3884,7 @@ for (;;) } else #endif - /* Not UTF-8 mode */ + /* Not UTF mode */ { for (i = min; i < max; i++) { @@ -3802,7 +3917,7 @@ for (;;) case OP_TYPEEXACT: min = max = GET2(ecode, 1); minimize = TRUE; - ecode += 3; + ecode += 1 + IMM2_SIZE; goto REPEATTYPE; case OP_TYPEUPTO: @@ -3810,7 +3925,7 @@ for (;;) min = 0; max = GET2(ecode, 1); minimize = *ecode == OP_TYPEMINUPTO; - ecode += 3; + ecode += 1 + IMM2_SIZE; goto REPEATTYPE; case OP_TYPEPOSSTAR: @@ -3838,7 +3953,7 @@ for (;;) possessive = TRUE; min = 0; max = GET2(ecode, 1); - ecode += 3; + ecode += 1 + IMM2_SIZE; goto REPEATTYPE; case OP_TYPESTAR: @@ -4045,7 +4160,7 @@ for (;;) while (eptr < md->end_subject) { int len = 1; - if (!utf8) c = *eptr; else { GETCHARLEN(c, eptr, len); } + if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); } if (UCD_CATEGORY(c) != ucp_M) break; eptr += len; } @@ -4057,8 +4172,8 @@ for (;;) /* Handle all other cases when the coding is UTF-8 */ -#ifdef SUPPORT_UTF8 - if (utf8) switch(ctype) +#ifdef SUPPORT_UTF + if (utf) switch(ctype) { case OP_ANY: for (i = 1; i <= min; i++) @@ -4070,7 +4185,7 @@ for (;;) } if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH); eptr++; - while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++; + ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++); } break; @@ -4083,7 +4198,7 @@ for (;;) RRETURN(MATCH_NOMATCH); } eptr++; - while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++; + ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++); } break; @@ -4265,8 +4380,9 @@ for (;;) SCHECK_PARTIAL(); RRETURN(MATCH_NOMATCH); } - if (*eptr >= 128 || (md->ctypes[*eptr++] & ctype_digit) == 0) + if (*eptr >= 128 || (md->ctypes[*eptr] & ctype_digit) == 0) RRETURN(MATCH_NOMATCH); + eptr++; /* No need to skip more bytes - we know it's a 1-byte character */ } break; @@ -4281,7 +4397,8 @@ for (;;) } if (*eptr < 128 && (md->ctypes[*eptr] & ctype_space) != 0) RRETURN(MATCH_NOMATCH); - while (++eptr < md->end_subject && (*eptr & 0xc0) == 0x80); + eptr++; + ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++); } break; @@ -4293,8 +4410,9 @@ for (;;) SCHECK_PARTIAL(); RRETURN(MATCH_NOMATCH); } - if (*eptr >= 128 || (md->ctypes[*eptr++] & ctype_space) == 0) + if (*eptr >= 128 || (md->ctypes[*eptr] & ctype_space) == 0) RRETURN(MATCH_NOMATCH); + eptr++; /* No need to skip more bytes - we know it's a 1-byte character */ } break; @@ -4309,7 +4427,8 @@ for (;;) } if (*eptr < 128 && (md->ctypes[*eptr] & ctype_word) != 0) RRETURN(MATCH_NOMATCH); - while (++eptr < md->end_subject && (*eptr & 0xc0) == 0x80); + eptr++; + ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++); } break; @@ -4321,8 +4440,9 @@ for (;;) SCHECK_PARTIAL(); RRETURN(MATCH_NOMATCH); } - if (*eptr >= 128 || (md->ctypes[*eptr++] & ctype_word) == 0) + if (*eptr >= 128 || (md->ctypes[*eptr] & ctype_word) == 0) RRETURN(MATCH_NOMATCH); + eptr++; /* No need to skip more bytes - we know it's a 1-byte character */ } break; @@ -4332,7 +4452,7 @@ for (;;) } /* End switch(ctype) */ else -#endif /* SUPPORT_UTF8 */ +#endif /* SUPPORT_UTF */ /* Code for the non-UTF-8 case for minimum matching of operators other than OP_PROP and OP_NOTPROP. */ @@ -4392,6 +4512,10 @@ for (;;) case 0x000b: case 0x000c: case 0x0085: +#ifdef COMPILE_PCRE16 + case 0x2028: + case 0x2029: +#endif if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH); break; } @@ -4412,6 +4536,24 @@ for (;;) case 0x09: /* HT */ case 0x20: /* SPACE */ case 0xa0: /* NBSP */ +#ifdef COMPILE_PCRE16 + case 0x1680: /* OGHAM SPACE MARK */ + case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */ + case 0x2000: /* EN QUAD */ + case 0x2001: /* EM QUAD */ + case 0x2002: /* EN SPACE */ + case 0x2003: /* EM SPACE */ + case 0x2004: /* THREE-PER-EM SPACE */ + case 0x2005: /* FOUR-PER-EM SPACE */ + case 0x2006: /* SIX-PER-EM SPACE */ + case 0x2007: /* FIGURE SPACE */ + case 0x2008: /* PUNCTUATION SPACE */ + case 0x2009: /* THIN SPACE */ + case 0x200A: /* HAIR SPACE */ + case 0x202f: /* NARROW NO-BREAK SPACE */ + case 0x205f: /* MEDIUM MATHEMATICAL SPACE */ + case 0x3000: /* IDEOGRAPHIC SPACE */ +#endif RRETURN(MATCH_NOMATCH); } } @@ -4431,6 +4573,24 @@ for (;;) case 0x09: /* HT */ case 0x20: /* SPACE */ case 0xa0: /* NBSP */ +#ifdef COMPILE_PCRE16 + case 0x1680: /* OGHAM SPACE MARK */ + case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */ + case 0x2000: /* EN QUAD */ + case 0x2001: /* EM QUAD */ + case 0x2002: /* EN SPACE */ + case 0x2003: /* EM SPACE */ + case 0x2004: /* THREE-PER-EM SPACE */ + case 0x2005: /* FOUR-PER-EM SPACE */ + case 0x2006: /* SIX-PER-EM SPACE */ + case 0x2007: /* FIGURE SPACE */ + case 0x2008: /* PUNCTUATION SPACE */ + case 0x2009: /* THIN SPACE */ + case 0x200A: /* HAIR SPACE */ + case 0x202f: /* NARROW NO-BREAK SPACE */ + case 0x205f: /* MEDIUM MATHEMATICAL SPACE */ + case 0x3000: /* IDEOGRAPHIC SPACE */ +#endif break; } } @@ -4452,6 +4612,10 @@ for (;;) case 0x0c: /* FF */ case 0x0d: /* CR */ case 0x85: /* NEL */ +#ifdef COMPILE_PCRE16 + case 0x2028: /* LINE SEPARATOR */ + case 0x2029: /* PARAGRAPH SEPARATOR */ +#endif RRETURN(MATCH_NOMATCH); } } @@ -4473,6 +4637,10 @@ for (;;) case 0x0c: /* FF */ case 0x0d: /* CR */ case 0x85: /* NEL */ +#ifdef COMPILE_PCRE16 + case 0x2028: /* LINE SEPARATOR */ + case 0x2029: /* PARAGRAPH SEPARATOR */ +#endif break; } } @@ -4486,7 +4654,9 @@ for (;;) SCHECK_PARTIAL(); RRETURN(MATCH_NOMATCH); } - if ((md->ctypes[*eptr++] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH); + if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_digit) != 0) + RRETURN(MATCH_NOMATCH); + eptr++; } break; @@ -4498,7 +4668,9 @@ for (;;) SCHECK_PARTIAL(); RRETURN(MATCH_NOMATCH); } - if ((md->ctypes[*eptr++] & ctype_digit) == 0) RRETURN(MATCH_NOMATCH); + if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_digit) == 0) + RRETURN(MATCH_NOMATCH); + eptr++; } break; @@ -4510,7 +4682,9 @@ for (;;) SCHECK_PARTIAL(); RRETURN(MATCH_NOMATCH); } - if ((md->ctypes[*eptr++] & ctype_space) != 0) RRETURN(MATCH_NOMATCH); + if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_space) != 0) + RRETURN(MATCH_NOMATCH); + eptr++; } break; @@ -4522,7 +4696,9 @@ for (;;) SCHECK_PARTIAL(); RRETURN(MATCH_NOMATCH); } - if ((md->ctypes[*eptr++] & ctype_space) == 0) RRETURN(MATCH_NOMATCH); + if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_space) == 0) + RRETURN(MATCH_NOMATCH); + eptr++; } break; @@ -4534,8 +4710,9 @@ for (;;) SCHECK_PARTIAL(); RRETURN(MATCH_NOMATCH); } - if ((md->ctypes[*eptr++] & ctype_word) != 0) + if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_word) != 0) RRETURN(MATCH_NOMATCH); + eptr++; } break; @@ -4547,8 +4724,9 @@ for (;;) SCHECK_PARTIAL(); RRETURN(MATCH_NOMATCH); } - if ((md->ctypes[*eptr++] & ctype_word) == 0) + if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_word) == 0) RRETURN(MATCH_NOMATCH); + eptr++; } break; @@ -4766,7 +4944,7 @@ for (;;) while (eptr < md->end_subject) { int len = 1; - if (!utf8) c = *eptr; else { GETCHARLEN(c, eptr, len); } + if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); } if (UCD_CATEGORY(c) != ucp_M) break; eptr += len; } @@ -4775,9 +4953,8 @@ for (;;) else #endif /* SUPPORT_UCP */ -#ifdef SUPPORT_UTF8 - /* UTF-8 mode */ - if (utf8) +#ifdef SUPPORT_UTF + if (utf) { for (fi = min;; fi++) { @@ -4919,7 +5096,7 @@ for (;;) break; case OP_WHITESPACE: - if (c >= 256 || (md->ctypes[c] & ctype_space) == 0) + if (c >= 256 || (md->ctypes[c] & ctype_space) == 0) RRETURN(MATCH_NOMATCH); break; @@ -4940,7 +5117,7 @@ for (;;) } else #endif - /* Not UTF-8 mode */ + /* Not UTF mode */ { for (fi = min;; fi++) { @@ -4976,6 +5153,10 @@ for (;;) case 0x000b: case 0x000c: case 0x0085: +#ifdef COMPILE_PCRE16 + case 0x2028: + case 0x2029: +#endif if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH); break; } @@ -4988,6 +5169,24 @@ for (;;) case 0x09: /* HT */ case 0x20: /* SPACE */ case 0xa0: /* NBSP */ +#ifdef COMPILE_PCRE16 + case 0x1680: /* OGHAM SPACE MARK */ + case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */ + case 0x2000: /* EN QUAD */ + case 0x2001: /* EM QUAD */ + case 0x2002: /* EN SPACE */ + case 0x2003: /* EM SPACE */ + case 0x2004: /* THREE-PER-EM SPACE */ + case 0x2005: /* FOUR-PER-EM SPACE */ + case 0x2006: /* SIX-PER-EM SPACE */ + case 0x2007: /* FIGURE SPACE */ + case 0x2008: /* PUNCTUATION SPACE */ + case 0x2009: /* THIN SPACE */ + case 0x200A: /* HAIR SPACE */ + case 0x202f: /* NARROW NO-BREAK SPACE */ + case 0x205f: /* MEDIUM MATHEMATICAL SPACE */ + case 0x3000: /* IDEOGRAPHIC SPACE */ +#endif RRETURN(MATCH_NOMATCH); } break; @@ -4999,6 +5198,24 @@ for (;;) case 0x09: /* HT */ case 0x20: /* SPACE */ case 0xa0: /* NBSP */ +#ifdef COMPILE_PCRE16 + case 0x1680: /* OGHAM SPACE MARK */ + case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */ + case 0x2000: /* EN QUAD */ + case 0x2001: /* EM QUAD */ + case 0x2002: /* EN SPACE */ + case 0x2003: /* EM SPACE */ + case 0x2004: /* THREE-PER-EM SPACE */ + case 0x2005: /* FOUR-PER-EM SPACE */ + case 0x2006: /* SIX-PER-EM SPACE */ + case 0x2007: /* FIGURE SPACE */ + case 0x2008: /* PUNCTUATION SPACE */ + case 0x2009: /* THIN SPACE */ + case 0x200A: /* HAIR SPACE */ + case 0x202f: /* NARROW NO-BREAK SPACE */ + case 0x205f: /* MEDIUM MATHEMATICAL SPACE */ + case 0x3000: /* IDEOGRAPHIC SPACE */ +#endif break; } break; @@ -5012,6 +5229,10 @@ for (;;) case 0x0c: /* FF */ case 0x0d: /* CR */ case 0x85: /* NEL */ +#ifdef COMPILE_PCRE16 + case 0x2028: /* LINE SEPARATOR */ + case 0x2029: /* PARAGRAPH SEPARATOR */ +#endif RRETURN(MATCH_NOMATCH); } break; @@ -5025,32 +5246,36 @@ for (;;) case 0x0c: /* FF */ case 0x0d: /* CR */ case 0x85: /* NEL */ +#ifdef COMPILE_PCRE16 + case 0x2028: /* LINE SEPARATOR */ + case 0x2029: /* PARAGRAPH SEPARATOR */ +#endif break; } break; case OP_NOT_DIGIT: - if ((md->ctypes[c] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH); + if (MAX_255(c) && (md->ctypes[c] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH); break; case OP_DIGIT: - if ((md->ctypes[c] & ctype_digit) == 0) RRETURN(MATCH_NOMATCH); + if (!MAX_255(c) || (md->ctypes[c] & ctype_digit) == 0) RRETURN(MATCH_NOMATCH); break; case OP_NOT_WHITESPACE: - if ((md->ctypes[c] & ctype_space) != 0) RRETURN(MATCH_NOMATCH); + if (MAX_255(c) && (md->ctypes[c] & ctype_space) != 0) RRETURN(MATCH_NOMATCH); break; case OP_WHITESPACE: - if ((md->ctypes[c] & ctype_space) == 0) RRETURN(MATCH_NOMATCH); + if (!MAX_255(c) || (md->ctypes[c] & ctype_space) == 0) RRETURN(MATCH_NOMATCH); break; case OP_NOT_WORDCHAR: - if ((md->ctypes[c] & ctype_word) != 0) RRETURN(MATCH_NOMATCH); + if (MAX_255(c) && (md->ctypes[c] & ctype_word) != 0) RRETURN(MATCH_NOMATCH); break; case OP_WORDCHAR: - if ((md->ctypes[c] & ctype_word) == 0) RRETURN(MATCH_NOMATCH); + if (!MAX_255(c) || (md->ctypes[c] & ctype_word) == 0) RRETURN(MATCH_NOMATCH); break; default: @@ -5239,7 +5464,7 @@ for (;;) RMATCH(eptr, ecode, offset_top, md, eptrb, RM44); if (rrc != MATCH_NOMATCH) RRETURN(rrc); if (eptr-- == pp) break; /* Stop if tried at original pos */ - if (utf8) BACKCHAR(eptr); + if (utf) BACKCHAR(eptr); } } @@ -5256,13 +5481,13 @@ for (;;) SCHECK_PARTIAL(); break; } - if (!utf8) c = *eptr; else { GETCHARLEN(c, eptr, len); } + if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); } if (UCD_CATEGORY(c) == ucp_M) break; eptr += len; while (eptr < md->end_subject) { len = 1; - if (!utf8) c = *eptr; else { GETCHARLEN(c, eptr, len); } + if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); } if (UCD_CATEGORY(c) != ucp_M) break; eptr += len; } @@ -5279,7 +5504,7 @@ for (;;) if (eptr-- == pp) break; /* Stop if tried at original pos */ for (;;) /* Move back over one extended */ { - if (!utf8) c = *eptr; else + if (!utf) c = *eptr; else { BACKCHAR(eptr); GETCHAR(c, eptr); @@ -5293,10 +5518,8 @@ for (;;) else #endif /* SUPPORT_UCP */ -#ifdef SUPPORT_UTF8 - /* UTF-8 mode */ - - if (utf8) +#ifdef SUPPORT_UTF + if (utf) { switch(ctype) { @@ -5312,7 +5535,7 @@ for (;;) } if (IS_NEWLINE(eptr)) break; eptr++; - while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++; + ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++); } } @@ -5329,7 +5552,7 @@ for (;;) } if (IS_NEWLINE(eptr)) break; eptr++; - while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++; + ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++); } } break; @@ -5345,7 +5568,7 @@ for (;;) break; } eptr++; - while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++; + ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++); } } else @@ -5578,9 +5801,8 @@ for (;;) } } else -#endif /* SUPPORT_UTF8 */ - - /* Not UTF-8 mode */ +#endif /* SUPPORT_UTF */ + /* Not UTF mode */ { switch(ctype) { @@ -5624,10 +5846,12 @@ for (;;) } else { - if (c != 0x000a && - (md->bsr_anycrlf || - (c != 0x000b && c != 0x000c && c != 0x0085))) - break; + if (c != 0x000a && (md->bsr_anycrlf || + (c != 0x000b && c != 0x000c && c != 0x0085 +#ifdef COMPILE_PCRE16 + && c != 0x2028 && c != 0x2029 +#endif + ))) break; eptr++; } } @@ -5642,7 +5866,12 @@ for (;;) break; } c = *eptr; - if (c == 0x09 || c == 0x20 || c == 0xa0) break; + if (c == 0x09 || c == 0x20 || c == 0xa0 +#ifdef COMPILE_PCRE16 + || c == 0x1680 || c == 0x180e || (c >= 0x2000 && c <= 0x200A) + || c == 0x202f || c == 0x205f || c == 0x3000 +#endif + ) break; eptr++; } break; @@ -5656,7 +5885,12 @@ for (;;) break; } c = *eptr; - if (c != 0x09 && c != 0x20 && c != 0xa0) break; + if (c != 0x09 && c != 0x20 && c != 0xa0 +#ifdef COMPILE_PCRE16 + && c != 0x1680 && c != 0x180e && (c < 0x2000 || c > 0x200A) + && c != 0x202f && c != 0x205f && c != 0x3000 +#endif + ) break; eptr++; } break; @@ -5670,8 +5904,11 @@ for (;;) break; } c = *eptr; - if (c == 0x0a || c == 0x0b || c == 0x0c || c == 0x0d || c == 0x85) - break; + if (c == 0x0a || c == 0x0b || c == 0x0c || c == 0x0d || c == 0x85 +#ifdef COMPILE_PCRE16 + || c == 0x2028 || c == 0x2029 +#endif + ) break; eptr++; } break; @@ -5685,8 +5922,11 @@ for (;;) break; } c = *eptr; - if (c != 0x0a && c != 0x0b && c != 0x0c && c != 0x0d && c != 0x85) - break; + if (c != 0x0a && c != 0x0b && c != 0x0c && c != 0x0d && c != 0x85 +#ifdef COMPILE_PCRE16 + && c != 0x2028 && c != 0x2029 +#endif + ) break; eptr++; } break; @@ -5699,7 +5939,7 @@ for (;;) SCHECK_PARTIAL(); break; } - if ((md->ctypes[*eptr] & ctype_digit) != 0) break; + if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_digit) != 0) break; eptr++; } break; @@ -5712,7 +5952,7 @@ for (;;) SCHECK_PARTIAL(); break; } - if ((md->ctypes[*eptr] & ctype_digit) == 0) break; + if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_digit) == 0) break; eptr++; } break; @@ -5725,7 +5965,7 @@ for (;;) SCHECK_PARTIAL(); break; } - if ((md->ctypes[*eptr] & ctype_space) != 0) break; + if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_space) != 0) break; eptr++; } break; @@ -5738,7 +5978,7 @@ for (;;) SCHECK_PARTIAL(); break; } - if ((md->ctypes[*eptr] & ctype_space) == 0) break; + if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_space) == 0) break; eptr++; } break; @@ -5751,7 +5991,7 @@ for (;;) SCHECK_PARTIAL(); break; } - if ((md->ctypes[*eptr] & ctype_word) != 0) break; + if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_word) != 0) break; eptr++; } break; @@ -5764,7 +6004,7 @@ for (;;) SCHECK_PARTIAL(); break; } - if ((md->ctypes[*eptr] & ctype_word) == 0) break; + if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_word) == 0) break; eptr++; } break; @@ -5827,16 +6067,23 @@ switch (frame->Xwhere) LBL(35) LBL(43) LBL(47) LBL(48) LBL(49) LBL(50) LBL(51) LBL(52) LBL(53) LBL(54) LBL(55) LBL(56) LBL(57) LBL(58) LBL(63) LBL(64) LBL(65) LBL(66) -#ifdef SUPPORT_UTF8 - LBL(16) LBL(18) LBL(20) LBL(21) LBL(22) LBL(23) LBL(28) LBL(30) +#if defined SUPPORT_UTF || !defined COMPILE_PCRE8 + LBL(21) +#endif +#ifdef SUPPORT_UTF + LBL(16) LBL(18) LBL(20) + LBL(22) LBL(23) LBL(28) LBL(30) LBL(32) LBL(34) LBL(42) LBL(46) #ifdef SUPPORT_UCP LBL(36) LBL(37) LBL(38) LBL(39) LBL(40) LBL(41) LBL(44) LBL(45) LBL(59) LBL(60) LBL(61) LBL(62) #endif /* SUPPORT_UCP */ -#endif /* SUPPORT_UTF8 */ +#endif /* SUPPORT_UTF */ default: DPRINTF(("jump error in pcre match: label %d non-existent\n", frame->Xwhere)); + +printf("+++jump error in pcre match: label %d non-existent\n", frame->Xwhere); + return PCRE_ERROR_INTERNAL; } #undef LBL @@ -5923,64 +6170,90 @@ Returns: > 0 => success; value is the number of elements filled in < -1 => some kind of unexpected problem */ +#ifdef COMPILE_PCRE8 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION pcre_exec(const pcre *argument_re, const pcre_extra *extra_data, PCRE_SPTR subject, int length, int start_offset, int options, int *offsets, int offsetcount) +#else +PCRE_EXP_DEFN int PCRE_CALL_CONVENTION +pcre16_exec(const pcre16 *argument_re, const pcre16_extra *extra_data, + PCRE_SPTR16 subject, int length, int start_offset, int options, int *offsets, + int offsetcount) +#endif { int rc, ocount, arg_offset_max; -int first_byte = -1; -int req_byte = -1; -int req_byte2 = -1; int newline; BOOL using_temporary_offsets = FALSE; BOOL anchored; BOOL startline; BOOL firstline; -BOOL first_byte_caseless = FALSE; -BOOL req_byte_caseless = FALSE; -BOOL utf8; +BOOL utf; +BOOL has_first_char = FALSE; +BOOL has_req_char = FALSE; +pcre_uchar first_char = 0; +pcre_uchar first_char2 = 0; +pcre_uchar req_char = 0; +pcre_uchar req_char2 = 0; match_data match_block; match_data *md = &match_block; -const uschar *tables; -const uschar *start_bits = NULL; -USPTR start_match = (USPTR)subject + start_offset; -USPTR end_subject; -USPTR start_partial = NULL; -USPTR req_byte_ptr = start_match - 1; +const pcre_uint8 *tables; +const pcre_uint8 *start_bits = NULL; +PCRE_PUCHAR start_match = (PCRE_PUCHAR)subject + start_offset; +PCRE_PUCHAR end_subject; +PCRE_PUCHAR start_partial = NULL; +PCRE_PUCHAR req_char_ptr = start_match - 1; -pcre_study_data internal_study; const pcre_study_data *study; +const REAL_PCRE *re = (const REAL_PCRE *)argument_re; -real_pcre internal_re; -const real_pcre *external_re = (const real_pcre *)argument_re; -const real_pcre *re = external_re; +/* Check for the special magic call that measures the size of the stack used +per recursive call of match(). */ + +if (re == NULL && extra_data == NULL && subject == NULL && length == -999 && + start_offset == -999) +#ifdef NO_RECURSE + return -sizeof(heapframe); +#else + return match(NULL, NULL, NULL, 0, NULL, NULL, 0); +#endif /* Plausibility checks */ if ((options & ~PUBLIC_EXEC_OPTIONS) != 0) return PCRE_ERROR_BADOPTION; -if (re == NULL || subject == NULL || - (offsets == NULL && offsetcount > 0)) return PCRE_ERROR_NULL; +if (re == NULL || subject == NULL || (offsets == NULL && offsetcount > 0)) + return PCRE_ERROR_NULL; if (offsetcount < 0) return PCRE_ERROR_BADCOUNT; if (start_offset < 0 || start_offset > length) return PCRE_ERROR_BADOFFSET; +/* Check that the first field in the block is the magic number. If it is not, +return with PCRE_ERROR_BADMAGIC. However, if the magic number is equal to +REVERSED_MAGIC_NUMBER we return with PCRE_ERROR_BADENDIANNESS, which +means that the pattern is likely compiled with different endianness. */ + +if (re->magic_number != MAGIC_NUMBER) + return re->magic_number == REVERSED_MAGIC_NUMBER? + PCRE_ERROR_BADENDIANNESS:PCRE_ERROR_BADMAGIC; +if ((re->flags & PCRE_MODE) == 0) return PCRE_ERROR_BADMODE; + /* These two settings are used in the code for checking a UTF-8 string that follows immediately afterwards. Other values in the md block are used only during "normal" pcre_exec() processing, not when the JIT support is in use, so they are set up later. */ -utf8 = md->utf8 = (re->options & PCRE_UTF8) != 0; +/* PCRE_UTF16 has the same value as PCRE_UTF8. */ +utf = md->utf = (re->options & PCRE_UTF8) != 0; md->partial = ((options & PCRE_PARTIAL_HARD) != 0)? 2 : ((options & PCRE_PARTIAL_SOFT) != 0)? 1 : 0; /* Check a UTF-8 string if required. Pass back the character offset and error code for an invalid string if a results vector is available. */ -#ifdef SUPPORT_UTF8 -if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0) +#ifdef SUPPORT_UTF +if (utf && (options & PCRE_NO_UTF8_CHECK) == 0) { int erroroffset; - int errorcode = _pcre_valid_utf8((USPTR)subject, length, &erroroffset); + int errorcode = PRIV(valid_utf)((PCRE_PUCHAR)subject, length, &erroroffset); if (errorcode != 0) { if (offsetcount >= 2) @@ -5988,13 +6261,18 @@ if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0) offsets[0] = erroroffset; offsets[1] = errorcode; } +#ifdef COMPILE_PCRE16 + return (errorcode <= PCRE_UTF16_ERR1 && md->partial > 1)? + PCRE_ERROR_SHORTUTF16 : PCRE_ERROR_BADUTF16; +#else return (errorcode <= PCRE_UTF8_ERR5 && md->partial > 1)? PCRE_ERROR_SHORTUTF8 : PCRE_ERROR_BADUTF8; +#endif } - /* Check that a start_offset points to the start of a UTF-8 character. */ + /* Check that a start_offset points to the start of a UTF character. */ if (start_offset > 0 && start_offset < length && - (((USPTR)subject)[start_offset] & 0xc0) == 0x80) + NOT_FIRSTCHAR(((PCRE_PUCHAR)subject)[start_offset])) return PCRE_ERROR_BADUTF8_OFFSET; } #endif @@ -6012,15 +6290,16 @@ if (extra_data != NULL && (extra_data->flags & PCRE_EXTRA_TABLES) == 0 && (options & ~(PCRE_NO_UTF8_CHECK | PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART)) == 0) - return _pcre_jit_exec(re, extra_data->executable_jit, subject, length, - start_offset, options, ((extra_data->flags & PCRE_EXTRA_MATCH_LIMIT) == 0) + return PRIV(jit_exec)(re, extra_data->executable_jit, + (const pcre_uchar *)subject, length, start_offset, options, + ((extra_data->flags & PCRE_EXTRA_MATCH_LIMIT) == 0) ? MATCH_LIMIT : extra_data->match_limit, offsets, offsetcount); #endif /* Carry on with non-JIT matching. This information is for finding all the numbers associated with a given name, for condition testing. */ -md->name_table = (uschar *)re + re->name_table_offset; +md->name_table = (pcre_uchar *)re + re->name_table_offset; md->name_count = re->name_count; md->name_entry_size = re->name_entry_size; @@ -6034,7 +6313,7 @@ md->callout_data = NULL; /* The table pointer is always in native byte order. */ -tables = external_re->tables; +tables = re->tables; if (extra_data != NULL) { @@ -6054,19 +6333,7 @@ if (extra_data != NULL) is a feature that makes it possible to save compiled regex and re-use them in other programs later. */ -if (tables == NULL) tables = _pcre_default_tables; - -/* Check that the first field in the block is the magic number. If it is not, -test for a regex that was compiled on a host of opposite endianness. If this is -the case, flipped values are put in internal_re and internal_study if there was -study data too. */ - -if (re->magic_number != MAGIC_NUMBER) - { - re = _pcre_try_flipped(re, &internal_re, study, &internal_study); - if (re == NULL) return PCRE_ERROR_BADMAGIC; - if (study != NULL) study = &internal_study; - } +if (tables == NULL) tables = PRIV(default_tables); /* Set up other data */ @@ -6076,10 +6343,10 @@ firstline = (re->options & PCRE_FIRSTLINE) != 0; /* The code starts after the real_pcre block and the capture name table. */ -md->start_code = (const uschar *)external_re + re->name_table_offset + +md->start_code = (const pcre_uchar *)re + re->name_table_offset + re->name_count * re->name_entry_size; -md->start_subject = (USPTR)subject; +md->start_subject = (PCRE_PUCHAR)subject; md->start_offset = start_offset; md->end_subject = md->start_subject + length; end_subject = md->end_subject; @@ -6104,6 +6371,7 @@ md->recursive = NULL; /* No recursion at top level */ md->hasthen = (re->flags & PCRE_HASTHEN) != 0; md->lcc = tables + lcc_offset; +md->fcc = tables + fcc_offset; md->ctypes = tables + ctypes_offset; /* Handle different \R options. */ @@ -6190,7 +6458,7 @@ arg_offset_max = (2*ocount)/3; if (re->top_backref > 0 && re->top_backref >= ocount/3) { ocount = re->top_backref * 3 + 3; - md->offset_vector = (int *)(pcre_malloc)(ocount * sizeof(int)); + md->offset_vector = (int *)(PUBL(malloc))(ocount * sizeof(int)); if (md->offset_vector == NULL) return PCRE_ERROR_NOMEMORY; using_temporary_offsets = TRUE; DPRINTF(("Got memory to hold back references\n")); @@ -6217,7 +6485,7 @@ if (md->offset_vector != NULL) md->offset_vector[0] = md->offset_vector[1] = -1; } -/* Set up the first character to match, if available. The first_byte value is +/* Set up the first character to match, if available. The first_char value is never set for an anchored regular expression, but the anchoring may be forced at run time, so we have to test for anchoring. The first char may be unset for an unanchored pattern, of course. If there's no first char and the pattern was @@ -6227,9 +6495,16 @@ if (!anchored) { if ((re->flags & PCRE_FIRSTSET) != 0) { - first_byte = re->first_byte & 255; - if ((first_byte_caseless = ((re->first_byte & REQ_CASELESS) != 0)) == TRUE) - first_byte = md->lcc[first_byte]; + has_first_char = TRUE; + first_char = first_char2 = (pcre_uchar)(re->first_char); + if ((re->flags & PCRE_FCH_CASELESS) != 0) + { + first_char2 = TABLE_GET(first_char, md->fcc, first_char); +#if defined SUPPORT_UCP && !(defined COMPILE_PCRE8) + if (utf && first_char > 127) + first_char2 = UCD_OTHERCASE(first_char); +#endif + } } else if (!startline && study != NULL && @@ -6242,14 +6517,19 @@ character" set. */ if ((re->flags & PCRE_REQCHSET) != 0) { - req_byte = re->req_byte & 255; - req_byte_caseless = (re->req_byte & REQ_CASELESS) != 0; - req_byte2 = (tables + fcc_offset)[req_byte]; /* case flipped */ + has_req_char = TRUE; + req_char = req_char2 = (pcre_uchar)(re->req_char); + if ((re->flags & PCRE_RCH_CASELESS) != 0) + { + req_char2 = TABLE_GET(req_char, md->fcc, req_char); +#if defined SUPPORT_UCP && !(defined COMPILE_PCRE8) + if (utf && req_char > 127) + req_char2 = UCD_OTHERCASE(req_char); +#endif + } } - - /* ==========================================================================*/ /* Loop for handling unanchored repeated matching attempts; for anchored regexs @@ -6257,8 +6537,8 @@ the loop runs just once. */ for(;;) { - USPTR save_end_subject = end_subject; - USPTR new_start_match; + PCRE_PUCHAR save_end_subject = end_subject; + PCRE_PUCHAR new_start_match; /* If firstline is TRUE, the start of the match is constrained to the first line of a multiline string. That is, the match must be before or at the first @@ -6268,14 +6548,14 @@ for(;;) if (firstline) { - USPTR t = start_match; -#ifdef SUPPORT_UTF8 - if (utf8) + PCRE_PUCHAR t = start_match; +#ifdef SUPPORT_UTF + if (utf) { while (t < md->end_subject && !IS_NEWLINE(t)) { t++; - while (t < end_subject && (*t & 0xc0) == 0x80) t++; + ACROSSCHAR(t < end_subject, *t, t++); } } else @@ -6292,15 +6572,16 @@ for(;;) if (((options | re->options) & PCRE_NO_START_OPTIMIZE) == 0) { - /* Advance to a unique first byte if there is one. */ + /* Advance to a unique first char if there is one. */ - if (first_byte >= 0) + if (has_first_char) { - if (first_byte_caseless) - while (start_match < end_subject && md->lcc[*start_match] != first_byte) + if (first_char != first_char2) + while (start_match < end_subject && + *start_match != first_char && *start_match != first_char2) start_match++; else - while (start_match < end_subject && *start_match != first_byte) + while (start_match < end_subject && *start_match != first_char) start_match++; } @@ -6310,14 +6591,14 @@ for(;;) { if (start_match > md->start_subject + start_offset) { -#ifdef SUPPORT_UTF8 - if (utf8) +#ifdef SUPPORT_UTF + if (utf) { while (start_match < end_subject && !WAS_NEWLINE(start_match)) { start_match++; - while(start_match < end_subject && (*start_match & 0xc0) == 0x80) - start_match++; + ACROSSCHAR(start_match < end_subject, *start_match, + start_match++); } } else @@ -6344,13 +6625,18 @@ for(;;) while (start_match < end_subject) { register unsigned int c = *start_match; +#ifndef COMPILE_PCRE8 + if (c > 255) c = 255; +#endif if ((start_bits[c/8] & (1 << (c&7))) == 0) { start_match++; -#ifdef SUPPORT_UTF8 - if (utf8) - while(start_match < end_subject && (*start_match & 0xc0) == 0x80) - start_match++; +#if defined SUPPORT_UTF && defined COMPILE_PCRE8 + /* In non 8-bit mode, the iteration will stop for + characters > 255 at the beginning or not stop at all. */ + if (utf) + ACROSSCHAR(start_match < end_subject, *start_match, + start_match++); #endif } else break; @@ -6379,8 +6665,8 @@ for(;;) break; } - /* If req_byte is set, we know that that character must appear in the - subject for the match to succeed. If the first character is set, req_byte + /* If req_char is set, we know that that character must appear in the + subject for the match to succeed. If the first character is set, req_char must be later in the subject; otherwise the test starts at the match point. This optimization can save a huge amount of backtracking in patterns with nested unlimited repeats that aren't going to match. Writing separate code @@ -6393,28 +6679,28 @@ for(;;) 32-megabyte string... so we don't do this when the string is sufficiently long. */ - if (req_byte >= 0 && end_subject - start_match < REQ_BYTE_MAX) + if (has_req_char && end_subject - start_match < REQ_BYTE_MAX) { - register USPTR p = start_match + ((first_byte >= 0)? 1 : 0); + register PCRE_PUCHAR p = start_match + (has_first_char? 1:0); /* We don't need to repeat the search if we haven't yet reached the place we found it at last time. */ - if (p > req_byte_ptr) + if (p > req_char_ptr) { - if (req_byte_caseless) + if (req_char != req_char2) { while (p < end_subject) { register int pp = *p++; - if (pp == req_byte || pp == req_byte2) { p--; break; } + if (pp == req_char || pp == req_char2) { p--; break; } } } else { while (p < end_subject) { - if (*p++ == req_byte) { p--; break; } + if (*p++ == req_char) { p--; break; } } } @@ -6431,7 +6717,7 @@ for(;;) found it, so that we don't search again next time round the loop if the start hasn't passed this character yet. */ - req_byte_ptr = p; + req_char_ptr = p; } } } @@ -6486,10 +6772,10 @@ for(;;) case MATCH_THEN: md->ignore_skip_arg = FALSE; new_start_match = start_match + 1; -#ifdef SUPPORT_UTF8 - if (utf8) - while(new_start_match < end_subject && (*new_start_match & 0xc0) == 0x80) - new_start_match++; +#ifdef SUPPORT_UTF + if (utf) + ACROSSCHAR(new_start_match < end_subject, *new_start_match, + new_start_match++); #endif break; @@ -6527,9 +6813,13 @@ for(;;) /* If we have just passed a CR and we are now at a LF, and the pattern does not contain any explicit matches for \r or \n, and the newline option is CRLF - or ANY or ANYCRLF, advance the match position by one more character. */ + or ANY or ANYCRLF, advance the match position by one more character. In + normal matching start_match will aways be greater than the first position at + this stage, but a failed *SKIP can cause a return at the same point, which is + why the first test exists. */ - if (start_match[-1] == CHAR_CR && + if (start_match > (PCRE_PUCHAR)subject + start_offset && + start_match[-1] == CHAR_CR && start_match < end_subject && *start_match == CHAR_NL && (re->flags & PCRE_HASCRORLF) == 0 && @@ -6575,7 +6865,7 @@ if (rc == MATCH_MATCH || rc == MATCH_ACCEPT) } if (md->end_offset_top > arg_offset_max) md->offset_overflow = TRUE; DPRINTF(("Freeing temporary memory\n")); - (pcre_free)(md->offset_vector); + (PUBL(free))(md->offset_vector); } /* Set the return code to the number of captured strings, or 0 if there were @@ -6616,7 +6906,7 @@ if (rc == MATCH_MATCH || rc == MATCH_ACCEPT) /* Return MARK data if requested */ if (extra_data != NULL && (extra_data->flags & PCRE_EXTRA_MARK) != 0) - *(extra_data->mark) = (unsigned char *)(md->mark); + *(extra_data->mark) = (pcre_uchar *)md->mark; DPRINTF((">>>> returning %d\n", rc)); return rc; } @@ -6627,7 +6917,7 @@ attempt has failed at all permitted starting positions. */ if (using_temporary_offsets) { DPRINTF(("Freeing temporary memory\n")); - (pcre_free)(md->offset_vector); + (PUBL(free))(md->offset_vector); } /* For anything other than nomatch or partial match, just return the code. */ @@ -6646,8 +6936,8 @@ if (start_partial != NULL) md->mark = NULL; if (offsetcount > 1) { - offsets[0] = (int)(start_partial - (USPTR)subject); - offsets[1] = (int)(end_subject - (USPTR)subject); + offsets[0] = (int)(start_partial - (PCRE_PUCHAR)subject); + offsets[1] = (int)(end_subject - (PCRE_PUCHAR)subject); } rc = PCRE_ERROR_PARTIAL; } @@ -6663,7 +6953,7 @@ else /* Return the MARK data if it has been requested. */ if (extra_data != NULL && (extra_data->flags & PCRE_EXTRA_MARK) != 0) - *(extra_data->mark) = (unsigned char *)(md->nomatch_mark); + *(extra_data->mark) = (pcre_uchar *)md->nomatch_mark; return rc; } diff --git a/harbour/src/3rd/pcre/pcrefinf.c b/harbour/src/3rd/pcre/pcrefinf.c index 5e994635b6..72c553fe3e 100644 --- a/harbour/src/3rd/pcre/pcrefinf.c +++ b/harbour/src/3rd/pcre/pcrefinf.c @@ -6,7 +6,7 @@ and semantics are as close as possible to those of the Perl 5 language. Written by Philip Hazel - Copyright (c) 1997-2011 University of Cambridge + Copyright (c) 1997-2012 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without @@ -65,13 +65,17 @@ Arguments: Returns: 0 if data returned, negative on error */ +#ifdef COMPILE_PCRE8 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION -pcre_fullinfo(const pcre *argument_re, const pcre_extra *extra_data, int what, - void *where) +pcre_fullinfo(const pcre *argument_re, const pcre_extra *extra_data, + int what, void *where) +#else +PCRE_EXP_DEFN int PCRE_CALL_CONVENTION +pcre16_fullinfo(const pcre16 *argument_re, const pcre16_extra *extra_data, + int what, void *where) +#endif { -real_pcre internal_re; -pcre_study_data internal_study; -const real_pcre *re = (const real_pcre *)argument_re; +const REAL_PCRE *re = (const REAL_PCRE *)argument_re; const pcre_study_data *study = NULL; if (re == NULL || where == NULL) return PCRE_ERROR_NULL; @@ -79,12 +83,18 @@ if (re == NULL || where == NULL) return PCRE_ERROR_NULL; if (extra_data != NULL && (extra_data->flags & PCRE_EXTRA_STUDY_DATA) != 0) study = (const pcre_study_data *)extra_data->study_data; +/* Check that the first field in the block is the magic number. If it is not, +return with PCRE_ERROR_BADMAGIC. However, if the magic number is equal to +REVERSED_MAGIC_NUMBER we return with PCRE_ERROR_BADENDIANNESS, which +means that the pattern is likely compiled with different endianness. */ + if (re->magic_number != MAGIC_NUMBER) - { - re = _pcre_try_flipped(re, &internal_re, study, &internal_study); - if (re == NULL) return PCRE_ERROR_BADMAGIC; - if (study != NULL) study = &internal_study; - } + return re->magic_number == REVERSED_MAGIC_NUMBER? + PCRE_ERROR_BADENDIANNESS:PCRE_ERROR_BADMAGIC; + +/* Check that this pattern was compiled in the correct bit mode */ + +if ((re->flags & PCRE_MODE) == 0) return PCRE_ERROR_BADMODE; switch (what) { @@ -106,11 +116,10 @@ switch (what) (extra_data != NULL && (extra_data->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0 && extra_data->executable_jit != NULL)? - _pcre_jit_get_size(extra_data->executable_jit) : 0; + PRIV(jit_get_size)(extra_data->executable_jit) : 0; #else *((size_t *)where) = 0; #endif - break; case PCRE_INFO_CAPTURECOUNT: @@ -123,7 +132,7 @@ switch (what) case PCRE_INFO_FIRSTBYTE: *((int *)where) = - ((re->flags & PCRE_FIRSTSET) != 0)? re->first_byte : + ((re->flags & PCRE_FIRSTSET) != 0)? re->first_char : ((re->flags & PCRE_STARTLINE) != 0)? -1 : -2; break; @@ -131,7 +140,7 @@ switch (what) block, not the internal copy (with flipped integer fields). */ case PCRE_INFO_FIRSTTABLE: - *((const uschar **)where) = + *((const pcre_uint8 **)where) = (study != NULL && (study->flags & PCRE_STUDY_MAPPED) != 0)? ((const pcre_study_data *)extra_data->study_data)->start_bits : NULL; break; @@ -139,7 +148,7 @@ switch (what) case PCRE_INFO_MINLENGTH: *((int *)where) = (study != NULL && (study->flags & PCRE_STUDY_MINLEN) != 0)? - (int)study->minlength : -1; + (int)(study->minlength) : -1; break; case PCRE_INFO_JIT: @@ -150,7 +159,7 @@ switch (what) case PCRE_INFO_LASTLITERAL: *((int *)where) = - ((re->flags & PCRE_REQCHSET) != 0)? re->req_byte : -1; + ((re->flags & PCRE_REQCHSET) != 0)? re->req_char : -1; break; case PCRE_INFO_NAMEENTRYSIZE: @@ -162,11 +171,11 @@ switch (what) break; case PCRE_INFO_NAMETABLE: - *((const uschar **)where) = (const uschar *)re + re->name_table_offset; + *((const pcre_uchar **)where) = (const pcre_uchar *)re + re->name_table_offset; break; case PCRE_INFO_DEFAULT_TABLES: - *((const uschar **)where) = (const uschar *)(_pcre_default_tables); + *((const pcre_uint8 **)where) = (const pcre_uint8 *)(PRIV(default_tables)); break; /* From release 8.00 this will always return TRUE because NOPARTIAL is diff --git a/harbour/src/3rd/pcre/pcreget.c b/harbour/src/3rd/pcre/pcreget.c index 7dbd8d86e8..07617fe53f 100644 --- a/harbour/src/3rd/pcre/pcreget.c +++ b/harbour/src/3rd/pcre/pcreget.c @@ -6,7 +6,7 @@ and semantics are as close as possible to those of the Perl 5 language. Written by Philip Hazel - Copyright (c) 1997-2008 University of Cambridge + Copyright (c) 1997-2012 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without @@ -65,14 +65,20 @@ Returns: the number of the named parentheses, or a negative number (PCRE_ERROR_NOSUBSTRING) if not found */ +#ifdef COMPILE_PCRE8 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION pcre_get_stringnumber(const pcre *code, const char *stringname) +#else +PCRE_EXP_DEFN int PCRE_CALL_CONVENTION +pcre16_get_stringnumber(const pcre16 *code, PCRE_SPTR16 stringname) +#endif { int rc; int entrysize; int top, bot; -uschar *nametable; +pcre_uchar *nametable; +#ifdef COMPILE_PCRE8 if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0) return rc; if (top <= 0) return PCRE_ERROR_NOSUBSTRING; @@ -81,14 +87,26 @@ if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize)) != 0) return rc; if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0) return rc; +#endif +#ifdef COMPILE_PCRE16 +if ((rc = pcre16_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0) + return rc; +if (top <= 0) return PCRE_ERROR_NOSUBSTRING; + +if ((rc = pcre16_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize)) != 0) + return rc; +if ((rc = pcre16_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0) + return rc; +#endif bot = 0; while (top > bot) { int mid = (top + bot) / 2; - uschar *entry = nametable + entrysize*mid; - int c = strcmp(stringname, (char *)(entry + 2)); - if (c == 0) return (entry[0] << 8) + entry[1]; + pcre_uchar *entry = nametable + entrysize*mid; + int c = STRCMP_UC_UC((pcre_uchar *)stringname, + (pcre_uchar *)(entry + IMM2_SIZE)); + if (c == 0) return GET2(entry, 0); if (c > 0) bot = mid + 1; else top = mid; } @@ -114,15 +132,22 @@ Returns: the length of each entry, or a negative number (PCRE_ERROR_NOSUBSTRING) if not found */ +#ifdef COMPILE_PCRE8 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION pcre_get_stringtable_entries(const pcre *code, const char *stringname, char **firstptr, char **lastptr) +#else +PCRE_EXP_DEFN int PCRE_CALL_CONVENTION +pcre16_get_stringtable_entries(const pcre16 *code, PCRE_SPTR16 stringname, + PCRE_UCHAR16 **firstptr, PCRE_UCHAR16 **lastptr) +#endif { int rc; int entrysize; int top, bot; -uschar *nametable, *lastentry; +pcre_uchar *nametable, *lastentry; +#ifdef COMPILE_PCRE8 if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0) return rc; if (top <= 0) return PCRE_ERROR_NOSUBSTRING; @@ -131,30 +156,49 @@ if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize)) != 0) return rc; if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0) return rc; +#endif +#ifdef COMPILE_PCRE16 +if ((rc = pcre16_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0) + return rc; +if (top <= 0) return PCRE_ERROR_NOSUBSTRING; + +if ((rc = pcre16_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize)) != 0) + return rc; +if ((rc = pcre16_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0) + return rc; +#endif lastentry = nametable + entrysize * (top - 1); bot = 0; while (top > bot) { int mid = (top + bot) / 2; - uschar *entry = nametable + entrysize*mid; - int c = strcmp(stringname, (char *)(entry + 2)); + pcre_uchar *entry = nametable + entrysize*mid; + int c = STRCMP_UC_UC((pcre_uchar *)stringname, + (pcre_uchar *)(entry + IMM2_SIZE)); if (c == 0) { - uschar *first = entry; - uschar *last = entry; + pcre_uchar *first = entry; + pcre_uchar *last = entry; while (first > nametable) { - if (strcmp(stringname, (char *)(first - entrysize + 2)) != 0) break; + if (STRCMP_UC_UC((pcre_uchar *)stringname, + (pcre_uchar *)(first - entrysize + IMM2_SIZE)) != 0) break; first -= entrysize; } while (last < lastentry) { - if (strcmp(stringname, (char *)(last + entrysize + 2)) != 0) break; + if (STRCMP_UC_UC((pcre_uchar *)stringname, + (pcre_uchar *)(last + entrysize + IMM2_SIZE)) != 0) break; last += entrysize; } +#ifdef COMPILE_PCRE8 *firstptr = (char *)first; *lastptr = (char *)last; +#else + *firstptr = (PCRE_UCHAR16 *)first; + *lastptr = (PCRE_UCHAR16 *)last; +#endif return entrysize; } if (c > 0) bot = mid + 1; else top = mid; @@ -182,23 +226,39 @@ Returns: the number of the first that is set, or a negative number on error */ +#ifdef COMPILE_PCRE8 static int get_first_set(const pcre *code, const char *stringname, int *ovector) +#else +static int +get_first_set(const pcre16 *code, PCRE_SPTR16 stringname, int *ovector) +#endif { -const real_pcre *re = (const real_pcre *)code; +const REAL_PCRE *re = (const REAL_PCRE *)code; int entrysize; +pcre_uchar *entry; +#ifdef COMPILE_PCRE8 char *first, *last; -uschar *entry; +#else +PCRE_UCHAR16 *first, *last; +#endif + +#ifdef COMPILE_PCRE8 if ((re->options & PCRE_DUPNAMES) == 0 && (re->flags & PCRE_JCHANGED) == 0) return pcre_get_stringnumber(code, stringname); entrysize = pcre_get_stringtable_entries(code, stringname, &first, &last); +#else +if ((re->options & PCRE_DUPNAMES) == 0 && (re->flags & PCRE_JCHANGED) == 0) + return pcre16_get_stringnumber(code, stringname); +entrysize = pcre16_get_stringtable_entries(code, stringname, &first, &last); +#endif if (entrysize <= 0) return entrysize; -for (entry = (uschar *)first; entry <= (uschar *)last; entry += entrysize) +for (entry = (pcre_uchar *)first; entry <= (pcre_uchar *)last; entry += entrysize) { - int n = (entry[0] << 8) + entry[1]; + int n = GET2(entry, 0); if (ovector[n*2] >= 0) return n; } -return (first[0] << 8) + first[1]; +return GET2(entry, 0); } @@ -231,9 +291,15 @@ Returns: if successful: PCRE_ERROR_NOSUBSTRING (-7) no such captured substring */ +#ifdef COMPILE_PCRE8 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION pcre_copy_substring(const char *subject, int *ovector, int stringcount, int stringnumber, char *buffer, int size) +#else +PCRE_EXP_DEFN int PCRE_CALL_CONVENTION +pcre16_copy_substring(PCRE_SPTR16 subject, int *ovector, int stringcount, + int stringnumber, PCRE_UCHAR16 *buffer, int size) +#endif { int yield; if (stringnumber < 0 || stringnumber >= stringcount) @@ -241,7 +307,7 @@ if (stringnumber < 0 || stringnumber >= stringcount) stringnumber *= 2; yield = ovector[stringnumber+1] - ovector[stringnumber]; if (size < yield + 1) return PCRE_ERROR_NOMEMORY; -memcpy(buffer, subject + ovector[stringnumber], yield); +memcpy(buffer, subject + ovector[stringnumber], IN_UCHARS(yield)); buffer[yield] = 0; return yield; } @@ -276,13 +342,25 @@ Returns: if successful: PCRE_ERROR_NOSUBSTRING (-7) no such captured substring */ +#ifdef COMPILE_PCRE8 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION -pcre_copy_named_substring(const pcre *code, const char *subject, int *ovector, - int stringcount, const char *stringname, char *buffer, int size) +pcre_copy_named_substring(const pcre *code, const char *subject, + int *ovector, int stringcount, const char *stringname, + char *buffer, int size) +#else +PCRE_EXP_DEFN int PCRE_CALL_CONVENTION +pcre16_copy_named_substring(const pcre16 *code, PCRE_SPTR16 subject, + int *ovector, int stringcount, PCRE_SPTR16 stringname, + PCRE_UCHAR16 *buffer, int size) +#endif { int n = get_first_set(code, stringname, ovector); if (n <= 0) return n; +#ifdef COMPILE_PCRE8 return pcre_copy_substring(subject, ovector, stringcount, n, buffer, size); +#else +return pcre16_copy_substring(subject, ovector, stringcount, n, buffer, size); +#endif } @@ -308,29 +386,39 @@ Returns: if successful: 0 PCRE_ERROR_NOMEMORY (-6) failed to get store */ +#ifdef COMPILE_PCRE8 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION pcre_get_substring_list(const char *subject, int *ovector, int stringcount, const char ***listptr) +#else +PCRE_EXP_DEFN int PCRE_CALL_CONVENTION +pcre16_get_substring_list(PCRE_SPTR16 subject, int *ovector, int stringcount, + PCRE_SPTR16 **listptr) +#endif { int i; -int size = sizeof(char *); +int size = sizeof(pcre_uchar *); int double_count = stringcount * 2; -char **stringlist; -char *p; +pcre_uchar **stringlist; +pcre_uchar *p; for (i = 0; i < double_count; i += 2) - size += sizeof(char *) + ovector[i+1] - ovector[i] + 1; + size += sizeof(pcre_uchar *) + IN_UCHARS(ovector[i+1] - ovector[i] + 1); -stringlist = (char **)(pcre_malloc)(size); +stringlist = (pcre_uchar **)(PUBL(malloc))(size); if (stringlist == NULL) return PCRE_ERROR_NOMEMORY; +#ifdef COMPILE_PCRE8 *listptr = (const char **)stringlist; -p = (char *)(stringlist + stringcount + 1); +#else +*listptr = (PCRE_SPTR16 *)stringlist; +#endif +p = (pcre_uchar *)(stringlist + stringcount + 1); for (i = 0; i < double_count; i += 2) { int len = ovector[i+1] - ovector[i]; - memcpy(p, subject + ovector[i], len); + memcpy(p, subject + ovector[i], IN_UCHARS(len)); *stringlist++ = p; p += len; *p++ = 0; @@ -347,16 +435,22 @@ return 0; *************************************************/ /* This function exists for the benefit of people calling PCRE from non-C -programs that can call its functions, but not free() or (pcre_free)() directly. +programs that can call its functions, but not free() or (PUBL(free))() +directly. Argument: the result of a previous pcre_get_substring_list() Returns: nothing */ +#ifdef COMPILE_PCRE8 PCRE_EXP_DEFN void PCRE_CALL_CONVENTION pcre_free_substring_list(const char **pointer) +#else +PCRE_EXP_DEFN void PCRE_CALL_CONVENTION +pcre16_free_substring_list(PCRE_SPTR16 *pointer) +#endif { -(pcre_free)((void *)pointer); +(PUBL(free))((void *)pointer); } @@ -386,21 +480,31 @@ Returns: if successful: PCRE_ERROR_NOSUBSTRING (-7) substring not present */ +#ifdef COMPILE_PCRE8 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION pcre_get_substring(const char *subject, int *ovector, int stringcount, int stringnumber, const char **stringptr) +#else +PCRE_EXP_DEFN int PCRE_CALL_CONVENTION +pcre16_get_substring(PCRE_SPTR16 subject, int *ovector, int stringcount, + int stringnumber, PCRE_SPTR16 *stringptr) +#endif { int yield; -char *substring; +pcre_uchar *substring; if (stringnumber < 0 || stringnumber >= stringcount) return PCRE_ERROR_NOSUBSTRING; stringnumber *= 2; yield = ovector[stringnumber+1] - ovector[stringnumber]; -substring = (char *)(pcre_malloc)(yield + 1); +substring = (pcre_uchar *)(PUBL(malloc))(IN_UCHARS(yield + 1)); if (substring == NULL) return PCRE_ERROR_NOMEMORY; -memcpy(substring, subject + ovector[stringnumber], yield); +memcpy(substring, subject + ovector[stringnumber], IN_UCHARS(yield)); substring[yield] = 0; -*stringptr = substring; +#ifdef COMPILE_PCRE8 +*stringptr = (const char *)substring; +#else +*stringptr = (PCRE_SPTR16)substring; +#endif return yield; } @@ -433,13 +537,25 @@ Returns: if successful: PCRE_ERROR_NOSUBSTRING (-7) no such captured substring */ +#ifdef COMPILE_PCRE8 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION -pcre_get_named_substring(const pcre *code, const char *subject, int *ovector, - int stringcount, const char *stringname, const char **stringptr) +pcre_get_named_substring(const pcre *code, const char *subject, + int *ovector, int stringcount, const char *stringname, + const char **stringptr) +#else +PCRE_EXP_DEFN int PCRE_CALL_CONVENTION +pcre16_get_named_substring(const pcre16 *code, PCRE_SPTR16 subject, + int *ovector, int stringcount, PCRE_SPTR16 stringname, + PCRE_SPTR16 *stringptr) +#endif { int n = get_first_set(code, stringname, ovector); if (n <= 0) return n; +#ifdef COMPILE_PCRE8 return pcre_get_substring(subject, ovector, stringcount, n, stringptr); +#else +return pcre16_get_substring(subject, ovector, stringcount, n, stringptr); +#endif } @@ -450,16 +566,22 @@ return pcre_get_substring(subject, ovector, stringcount, n, stringptr); *************************************************/ /* This function exists for the benefit of people calling PCRE from non-C -programs that can call its functions, but not free() or (pcre_free)() directly. +programs that can call its functions, but not free() or (PUBL(free))() +directly. Argument: the result of a previous pcre_get_substring() Returns: nothing */ +#ifdef COMPILE_PCRE8 PCRE_EXP_DEFN void PCRE_CALL_CONVENTION pcre_free_substring(const char *pointer) +#else +PCRE_EXP_DEFN void PCRE_CALL_CONVENTION +pcre16_free_substring(PCRE_SPTR16 pointer) +#endif { -(pcre_free)((void *)pointer); +(PUBL(free))((void *)pointer); } /* End of pcre_get.c */ diff --git a/harbour/src/3rd/pcre/pcreglob.c b/harbour/src/3rd/pcre/pcreglob.c index 6824880c59..835eae9b9e 100644 --- a/harbour/src/3rd/pcre/pcreglob.c +++ b/harbour/src/3rd/pcre/pcreglob.c @@ -6,7 +6,7 @@ and semantics are as close as possible to those of the Perl 5 language. Written by Philip Hazel - Copyright (c) 1997-2008 University of Cambridge + Copyright (c) 1997-2012 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without @@ -67,24 +67,18 @@ static void LocalPcreFree(void* aPtr) { free(aPtr); } -PCRE_EXP_DATA_DEFN void *(*pcre_malloc)(size_t) = LocalPcreMalloc; -PCRE_EXP_DATA_DEFN void (*pcre_free)(void *) = LocalPcreFree; -PCRE_EXP_DATA_DEFN void *(*pcre_stack_malloc)(size_t) = LocalPcreMalloc; -PCRE_EXP_DATA_DEFN void (*pcre_stack_free)(void *) = LocalPcreFree; -PCRE_EXP_DATA_DEFN int (*pcre_callout)(pcre_callout_block *) = NULL; +PCRE_EXP_DATA_DEFN void *(*PUBL(malloc))(size_t) = LocalPcreMalloc; +PCRE_EXP_DATA_DEFN void (*PUBL(free))(void *) = LocalPcreFree; +PCRE_EXP_DATA_DEFN void *(*PUBL(stack_malloc))(size_t) = LocalPcreMalloc; +PCRE_EXP_DATA_DEFN void (*PUBL(stack_free))(void *) = LocalPcreFree; +PCRE_EXP_DATA_DEFN int (*PUBL(callout))(PUBL(callout_block) *) = NULL; #elif !defined VPCOMPAT -#if defined( __cplusplus ) && !defined( __IBMCPP__ ) -extern "C" { -#endif -PCRE_EXP_DATA_DEFN void *(*pcre_malloc)(size_t) = malloc; -PCRE_EXP_DATA_DEFN void (*pcre_free)(void *) = free; -PCRE_EXP_DATA_DEFN void *(*pcre_stack_malloc)(size_t) = malloc; -PCRE_EXP_DATA_DEFN void (*pcre_stack_free)(void *) = free; -PCRE_EXP_DATA_DEFN int (*pcre_callout)(pcre_callout_block *) = NULL; -#if defined( __cplusplus ) && !defined( __IBMCPP__ ) -} -#endif +PCRE_EXP_DATA_DEFN void *(*PUBL(malloc))(size_t) = malloc; +PCRE_EXP_DATA_DEFN void (*PUBL(free))(void *) = free; +PCRE_EXP_DATA_DEFN void *(*PUBL(stack_malloc))(size_t) = malloc; +PCRE_EXP_DATA_DEFN void (*PUBL(stack_free))(void *) = free; +PCRE_EXP_DATA_DEFN int (*PUBL(callout))(PUBL(callout_block) *) = NULL; #endif /* End of pcre_globals.c */ diff --git a/harbour/src/3rd/pcre/pcreinal.h b/harbour/src/3rd/pcre/pcreinal.h index 6ea397a39e..e5a4b6a526 100644 --- a/harbour/src/3rd/pcre/pcreinal.h +++ b/harbour/src/3rd/pcre/pcreinal.h @@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language. Written by Philip Hazel - Copyright (c) 1997-2011 University of Cambridge + Copyright (c) 1997-2012 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without @@ -40,7 +40,8 @@ POSSIBILITY OF SUCH DAMAGE. /* This header contains definitions that are shared between the different modules, but which are not relevant to the exported API. This includes some -functions whose names all begin with "_pcre_". */ +functions whose names all begin with "_pcre_" or "_pcre16_" depending on +the PRIV macro. */ #ifndef PCRE_INTERNAL_H #define PCRE_INTERNAL_H @@ -51,20 +52,39 @@ functions whose names all begin with "_pcre_". */ #define PCRE_DEBUG #endif -/* We do not support both EBCDIC and UTF-8 at the same time. The "configure" -script prevents both being selected, but not everybody uses "configure". */ - -#if defined EBCDIC && defined SUPPORT_UTF8 -#error The use of both EBCDIC and SUPPORT_UTF8 is not supported. +/* PCRE is compiled as an 8 bit library if it is not requested otherwise. */ +#ifndef COMPILE_PCRE16 +#define COMPILE_PCRE8 #endif -/* If SUPPORT_UCP is defined, SUPPORT_UTF8 must also be defined. The +/* If SUPPORT_UCP is defined, SUPPORT_UTF must also be defined. The "configure" script ensures this, but not everybody uses "configure". */ -#if defined SUPPORT_UCP && !defined SUPPORT_UTF8 +#if defined SUPPORT_UCP && !(defined SUPPORT_UTF) +#define SUPPORT_UTF 1 +#endif + +/* We define SUPPORT_UTF if SUPPORT_UTF8 is enabled for compatibility +reasons with existing code. */ + +#if defined SUPPORT_UTF8 && !(defined SUPPORT_UTF) +#define SUPPORT_UTF 1 +#endif + +/* Fixme: SUPPORT_UTF8 should be eventually disappear from the code. +Until then we define it if SUPPORT_UTF is defined. */ + +#if defined SUPPORT_UTF && !(defined SUPPORT_UTF8) #define SUPPORT_UTF8 1 #endif +/* We do not support both EBCDIC and UTF-8/16 at the same time. The "configure" +script prevents both being selected, but not everybody uses "configure". */ + +#if defined EBCDIC && defined SUPPORT_UTF +#error The use of both EBCDIC and SUPPORT_UTF8/16 is not supported. +#endif + /* Use a macro for debugging printing, 'cause that eliminates the use of #ifdef inline, and there are *still* stupid compilers about that don't like indented pre-processor statements, or at least there were when I first wrote this. After @@ -158,12 +178,14 @@ set, we ensure here that it has no effect. */ #define PCRE_CALL_CONVENTION #endif -/* We need to have types that specify unsigned 16-bit and 32-bit integers. We +/* We need to have types that specify unsigned 8, 16 and 32-bit integers. We cannot determine these outside the compilation (e.g. by running a program as part of "configure") because PCRE is often cross-compiled for use on other systems. Instead we make use of the maximum sizes that are available at preprocessor time in standard C environments. */ +typedef unsigned char pcre_uint8; + #if USHRT_MAX == 65535 typedef unsigned short pcre_uint16; typedef short pcre_int16; @@ -206,12 +228,47 @@ by "configure". */ /* All character handling must be done as unsigned characters. Otherwise there are problems with top-bit-set characters and functions such as isspace(). -However, we leave the interface to the outside world as char *, because that -should make things easier for callers. We define a short type for unsigned char -to save lots of typing. I tried "uchar", but it causes problems on Digital -Unix, where it is defined in sys/types, so use "uschar" instead. */ +However, we leave the interface to the outside world as char * or short *, +because that should make things easier for callers. This character type is +called pcre_uchar. -typedef unsigned char uschar; +The IN_UCHARS macro multiply its argument with the byte size of the current +pcre_uchar type. Useful for memcpy and such operations, whose require the +byte size of their input/output buffers. + +The MAX_255 macro checks whether its pcre_uchar input is less than 256. + +The TABLE_GET macro is designed for accessing elements of tables whose contain +exactly 256 items. When the character is able to contain more than 256 +items, some check is needed before accessing these tables. +*/ + +#ifdef COMPILE_PCRE8 + +typedef unsigned char pcre_uchar; +#define IN_UCHARS(x) (x) +#define MAX_255(c) 1 +#define TABLE_GET(c, table, default) ((table)[c]) + +#else + +#ifdef COMPILE_PCRE16 +#if USHRT_MAX != 65535 +/* This is a warning message. Change PCRE_UCHAR16 to a 16 bit data type in +pcre.h(.in) and disable (comment out) this message. */ +#error Warning: PCRE_UCHAR16 is not a 16 bit data type. +#endif + +typedef pcre_uint16 pcre_uchar; +#define IN_UCHARS(x) ((x) << 1) +#define MAX_255(c) ((c) <= 255u) +#define TABLE_GET(c, table, default) (MAX_255(c)? ((table)[c]):(default)) + +#else +#error Unsupported compiling mode +#endif /* COMPILE_PCRE16 */ + +#endif /* COMPILE_PCRE8 */ /* This is an unsigned int value that no character can ever have. UTF-8 characters only go up to 0x7fffffff (though Unicode doesn't go beyond @@ -234,8 +291,8 @@ start/end of string field names are. */ #define IS_NEWLINE(p) \ ((NLBLOCK->nltype != NLTYPE_FIXED)? \ ((p) < NLBLOCK->PSEND && \ - _pcre_is_newline((p), NLBLOCK->nltype, NLBLOCK->PSEND, &(NLBLOCK->nllen),\ - utf8)) \ + PRIV(is_newline)((p), NLBLOCK->nltype, NLBLOCK->PSEND, \ + &(NLBLOCK->nllen), utf)) \ : \ ((p) <= NLBLOCK->PSEND - NLBLOCK->nllen && \ (p)[0] == NLBLOCK->nl[0] && \ @@ -248,8 +305,8 @@ start/end of string field names are. */ #define WAS_NEWLINE(p) \ ((NLBLOCK->nltype != NLTYPE_FIXED)? \ ((p) > NLBLOCK->PSSTART && \ - _pcre_was_newline((p), NLBLOCK->nltype, NLBLOCK->PSSTART, \ - &(NLBLOCK->nllen), utf8)) \ + PRIV(was_newline)((p), NLBLOCK->nltype, NLBLOCK->PSSTART, \ + &(NLBLOCK->nllen), utf)) \ : \ ((p) >= NLBLOCK->PSSTART + NLBLOCK->nllen && \ (p)[-NLBLOCK->nllen] == NLBLOCK->nl[0] && \ @@ -267,15 +324,11 @@ used for the external interface and appears in pcre.h, which is why its name must begin with PCRE_. */ #ifdef CUSTOM_SUBJECT_PTR -#define PCRE_SPTR CUSTOM_SUBJECT_PTR -#define USPTR CUSTOM_SUBJECT_PTR +#define PCRE_PUCHAR CUSTOM_SUBJECT_PTR #else -#define PCRE_SPTR const char * -#define USPTR const unsigned char * +#define PCRE_PUCHAR const pcre_uchar * #endif - - /* Include the public PCRE header and the definitions of UCP character property values. */ @@ -343,6 +396,8 @@ The macros are controlled by the value of LINK_SIZE. This defaults to 2 in the config.h file, but can be overridden by using -D on the command line. This is automated on Unix systems via the "configure" command. */ +#ifdef COMPILE_PCRE8 + #if LINK_SIZE == 2 #define PUT(a,n,d) \ @@ -379,13 +434,54 @@ is automated on Unix systems via the "configure" command. */ #define GET(a,n) \ (((a)[n] << 24) | ((a)[(n)+1] << 16) | ((a)[(n)+2] << 8) | (a)[(n)+3]) -#define MAX_PATTERN_SIZE (1 << 30) /* Keep it positive */ - +/* Keep it positive */ +#define MAX_PATTERN_SIZE (1 << 30) #else #error LINK_SIZE must be either 2, 3, or 4 #endif +#else /* COMPILE_PCRE8 */ + +#ifdef COMPILE_PCRE16 + +#if LINK_SIZE == 2 + +#undef LINK_SIZE +#define LINK_SIZE 1 + +#define PUT(a,n,d) \ + (a[n] = (d)) + +#define GET(a,n) \ + (a[n]) + +#define MAX_PATTERN_SIZE (1 << 16) + +#elif LINK_SIZE == 3 || LINK_SIZE == 4 + +#undef LINK_SIZE +#define LINK_SIZE 2 + +#define PUT(a,n,d) \ + (a[n] = (d) >> 16), \ + (a[(n)+1] = (d) & 65535) + +#define GET(a,n) \ + (((a)[n] << 16) | (a)[(n)+1]) + +/* Keep it positive */ +#define MAX_PATTERN_SIZE (1 << 30) + +#else +#error LINK_SIZE must be either 2, 3, or 4 +#endif + +#else +#error Unsupported compiling mode +#endif /* COMPILE_PCRE16 */ + +#endif /* COMPILE_PCRE8 */ /* Convenience macro defined in terms of the others */ @@ -396,6 +492,10 @@ is automated on Unix systems via the "configure" command. */ offsets changes. There are used for repeat counts and for other things such as capturing parenthesis numbers in back references. */ +#ifdef COMPILE_PCRE8 + +#define IMM2_SIZE 2 + #define PUT2(a,n,d) \ a[n] = (d) >> 8; \ a[(n)+1] = (d) & 255 @@ -403,17 +503,39 @@ capturing parenthesis numbers in back references. */ #define GET2(a,n) \ (((a)[n] << 8) | (a)[(n)+1]) -#define PUT2INC(a,n,d) PUT2(a,n,d), a += 2 +#else /* COMPILE_PCRE8 */ +#ifdef COMPILE_PCRE16 -/* When UTF-8 encoding is being used, a character is no longer just a single -byte. The macros for character handling generate simple sequences when used in -byte-mode, and more complicated ones for UTF-8 characters. GETCHARLENTEST is -not used when UTF-8 is not supported, so it is not defined, and BACKCHAR should -never be called in byte mode. To make sure they can never even appear when -UTF-8 support is omitted, we don't even define them. */ +#define IMM2_SIZE 1 -#ifndef SUPPORT_UTF8 +#define PUT2(a,n,d) \ + a[n] = d + +#define GET2(a,n) \ + a[n] + +#else +#error Unsupported compiling mode +#endif /* COMPILE_PCRE16 */ + +#endif /* COMPILE_PCRE8 */ + +#define PUT2INC(a,n,d) PUT2(a,n,d), a += IMM2_SIZE + +/* When UTF encoding is being used, a character is no longer just a single +character. The macros for character handling generate simple sequences when +used in character-mode, and more complicated ones for UTF characters. +GETCHARLENTEST and other macros are not used when UTF is not supported, +so they are not defined. To make sure they can never even appear when +UTF support is omitted, we don't even define them. */ + +#ifndef SUPPORT_UTF + +/* #define MAX_VALUE_FOR_SINGLE_CHAR */ +/* #define HAS_EXTRALEN(c) */ +/* #define GET_EXTRALEN(c) */ +/* #define NOT_FIRSTCHAR(c) */ #define GETCHAR(c, eptr) c = *eptr; #define GETCHARTEST(c, eptr) c = *eptr; #define GETCHARINC(c, eptr) c = *eptr++; @@ -421,14 +543,36 @@ UTF-8 support is omitted, we don't even define them. */ #define GETCHARLEN(c, eptr, len) c = *eptr; /* #define GETCHARLENTEST(c, eptr, len) */ /* #define BACKCHAR(eptr) */ +/* #define FORWARDCHAR(eptr) */ +/* #define ACROSSCHAR(condition, eptr, action) */ -#else /* SUPPORT_UTF8 */ +#else /* SUPPORT_UTF */ + +#ifdef COMPILE_PCRE8 /* These macros were originally written in the form of loops that used data -from the tables whose names start with _pcre_utf8_table. They were rewritten by +from the tables whose names start with PRIV(utf8_table). They were rewritten by a user so as not to use loops, because in some environments this gives a significant performance advantage, and it seems never to do any harm. */ +/* Tells the biggest code point which can be encoded as a single character. */ + +#define MAX_VALUE_FOR_SINGLE_CHAR 127 + +/* Tests whether the code point needs extra characters to decode. */ + +#define HAS_EXTRALEN(c) ((c) >= 0xc0) + +/* Returns with the additional number of characters if IS_MULTICHAR(c) is TRUE. +Otherwise it has an undefined behaviour. */ + +#define GET_EXTRALEN(c) (PRIV(utf8_table4)[(c) & 0x3f]) + +/* Returns TRUE, if the given character is not the first character +of a UTF sequence. */ + +#define NOT_FIRSTCHAR(c) (((c) & 0xc0) == 0x80) + /* Base macro to pick up the remaining bytes of a UTF-8 character, not advancing the pointer. */ @@ -463,7 +607,7 @@ pointer. */ #define GETCHARTEST(c, eptr) \ c = *eptr; \ - if (utf8 && c >= 0xc0) GETUTF8(c, eptr); + if (utf && c >= 0xc0) GETUTF8(c, eptr); /* Base macro to pick up the remaining bytes of a UTF-8 character, advancing the pointer. */ @@ -511,7 +655,7 @@ This is called when we don't know if we are in UTF-8 mode. */ #define GETCHARINCTEST(c, eptr) \ c = *eptr++; \ - if (utf8 && c >= 0xc0) GETUTF8INC(c, eptr); + if (utf && c >= 0xc0) GETUTF8INC(c, eptr); /* Base macro to pick up the remaining bytes of a UTF-8 character, not advancing the pointer, incrementing the length. */ @@ -563,7 +707,7 @@ do not know if we are in UTF-8 mode. */ #define GETCHARLENTEST(c, eptr, len) \ c = *eptr; \ - if (utf8 && c >= 0xc0) GETUTF8LEN(c, eptr, len); + if (utf && c >= 0xc0) GETUTF8LEN(c, eptr, len); /* If the pointer is not at the start of a character, move it back until it is. This is called only in UTF-8 mode - we don't put a test within the macro @@ -571,7 +715,116 @@ because almost all calls are already within a block of UTF-8 only code. */ #define BACKCHAR(eptr) while((*eptr & 0xc0) == 0x80) eptr-- -#endif /* SUPPORT_UTF8 */ +/* Same as above, just in the other direction. */ +#define FORWARDCHAR(eptr) while((*eptr & 0xc0) == 0x80) eptr++ + +/* Same as above, but it allows a fully customizable form. */ +#define ACROSSCHAR(condition, eptr, action) \ + while((condition) && ((eptr) & 0xc0) == 0x80) action + +#else /* COMPILE_PCRE8 */ + +#ifdef COMPILE_PCRE16 + +/* Tells the biggest code point which can be encoded as a single character. */ + +#define MAX_VALUE_FOR_SINGLE_CHAR 65535 + +/* Tests whether the code point needs extra characters to decode. */ + +#define HAS_EXTRALEN(c) (((c) & 0xfc00) == 0xd800) + +/* Returns with the additional number of characters if IS_MULTICHAR(c) is TRUE. +Otherwise it has an undefined behaviour. */ + +#define GET_EXTRALEN(c) 1 + +/* Returns TRUE, if the given character is not the first character +of a UTF sequence. */ + +#define NOT_FIRSTCHAR(c) (((c) & 0xfc00) == 0xdc00) + +/* Base macro to pick up the low surrogate of a UTF-16 character, not +advancing the pointer. */ + +#define GETUTF16(c, eptr) \ + { c = (((c & 0x3ff) << 10) | (eptr[1] & 0x3ff)) + 0x10000; } + +/* Get the next UTF-16 character, not advancing the pointer. This is called when +we know we are in UTF-16 mode. */ + +#define GETCHAR(c, eptr) \ + c = *eptr; \ + if ((c & 0xfc00) == 0xd800) GETUTF16(c, eptr); + +/* Get the next UTF-16 character, testing for UTF-16 mode, and not advancing the +pointer. */ + +#define GETCHARTEST(c, eptr) \ + c = *eptr; \ + if (utf && (c & 0xfc00) == 0xd800) GETUTF16(c, eptr); + +/* Base macro to pick up the low surrogate of a UTF-16 character, advancing +the pointer. */ + +#define GETUTF16INC(c, eptr) \ + { c = (((c & 0x3ff) << 10) | (*eptr++ & 0x3ff)) + 0x10000; } + +/* Get the next UTF-16 character, advancing the pointer. This is called when we +know we are in UTF-16 mode. */ + +#define GETCHARINC(c, eptr) \ + c = *eptr++; \ + if ((c & 0xfc00) == 0xd800) GETUTF16INC(c, eptr); + +/* Get the next character, testing for UTF-16 mode, and advancing the pointer. +This is called when we don't know if we are in UTF-16 mode. */ + +#define GETCHARINCTEST(c, eptr) \ + c = *eptr++; \ + if (utf && (c & 0xfc00) == 0xd800) GETUTF16INC(c, eptr); + +/* Base macro to pick up the low surrogate of a UTF-16 character, not +advancing the pointer, incrementing the length. */ + +#define GETUTF16LEN(c, eptr, len) \ + { c = (((c & 0x3ff) << 10) | (eptr[1] & 0x3ff)) + 0x10000; len++; } + +/* Get the next UTF-16 character, not advancing the pointer, incrementing +length if there is a low surrogate. This is called when we know we are in +UTF-16 mode. */ + +#define GETCHARLEN(c, eptr, len) \ + c = *eptr; \ + if ((c & 0xfc00) == 0xd800) GETUTF16LEN(c, eptr, len); + +/* Get the next UTF-816character, testing for UTF-16 mode, not advancing the +pointer, incrementing length if there is a low surrogate. This is called when +we do not know if we are in UTF-16 mode. */ + +#define GETCHARLENTEST(c, eptr, len) \ + c = *eptr; \ + if (utf && (c & 0xfc00) == 0xd800) GETUTF16LEN(c, eptr, len); + +/* If the pointer is not at the start of a character, move it back until +it is. This is called only in UTF-16 mode - we don't put a test within the +macro because almost all calls are already within a block of UTF-16 only +code. */ + +#define BACKCHAR(eptr) if ((*eptr & 0xfc00) == 0xdc00) eptr-- + +/* Same as above, just in the other direction. */ +#define FORWARDCHAR(eptr) if ((*eptr & 0xfc00) == 0xdc00) eptr++ + +/* Same as above, but it allows a fully customizable form. */ +#define ACROSSCHAR(condition, eptr, action) \ + if ((condition) && ((eptr) & 0xfc00) == 0xdc00) action + +#endif + +#endif /* COMPILE_PCRE8 */ + +#endif /* SUPPORT_UTF */ /* In case there is no definition of offsetof() provided - though any proper @@ -588,13 +841,21 @@ are in a 16-bit flags word. From release 8.00, PCRE_NOPARTIAL is unused, as the restrictions on partial matching have been lifted. It remains for backwards compatibility. */ -#define PCRE_NOPARTIAL 0x0001 /* can't use partial with this regex */ -#define PCRE_FIRSTSET 0x0002 /* first_byte is set */ -#define PCRE_REQCHSET 0x0004 /* req_byte is set */ -#define PCRE_STARTLINE 0x0008 /* start after \n for multiline */ -#define PCRE_JCHANGED 0x0010 /* j option used in regex */ -#define PCRE_HASCRORLF 0x0020 /* explicit \r or \n in pattern */ -#define PCRE_HASTHEN 0x0040 /* pattern contains (*THEN) */ +#ifdef COMPILE_PCRE8 +#define PCRE_MODE 0x0001 /* compiled in 8 bit mode */ +#endif +#ifdef COMPILE_PCRE16 +#define PCRE_MODE 0x0002 /* compiled in 16 bit mode */ +#endif +#define PCRE_FIRSTSET 0x0010 /* first_char is set */ +#define PCRE_FCH_CASELESS 0x0020 /* caseless first char */ +#define PCRE_REQCHSET 0x0040 /* req_byte is set */ +#define PCRE_RCH_CASELESS 0x0080 /* caseless requested char */ +#define PCRE_STARTLINE 0x0100 /* start after \n for multiline */ +#define PCRE_NOPARTIAL 0x0200 /* can't use partial with this regex */ +#define PCRE_JCHANGED 0x0400 /* j option used in regex */ +#define PCRE_HASCRORLF 0x0800 /* explicit \r or \n in pattern */ +#define PCRE_HASTHEN 0x1000 /* pattern contains (*THEN) */ /* Flags for the "extra" block produced by pcre_study(). */ @@ -628,11 +889,15 @@ time, run time, or study time, respectively. */ #define PUBLIC_STUDY_OPTIONS \ PCRE_STUDY_JIT_COMPILE -/* Magic number to provide a small check against being handed junk. Also used -to detect whether a pattern was compiled on a host of different endianness. */ +/* Magic number to provide a small check against being handed junk. */ #define MAGIC_NUMBER 0x50435245UL /* 'PCRE' */ +/* This variable is used to detect a loaded regular expression +in different endianness. */ + +#define REVERSED_MAGIC_NUMBER 0x45524350UL /* 'ERCP' */ + /* Negative values for the firstchar and reqchar variables */ #define REQ_UNSET (-2) @@ -643,12 +908,6 @@ req_byte match. */ #define REQ_BYTE_MAX 1000 -/* Flags added to firstbyte or reqbyte; a "non-literal" item is either a -variable-length repeat, or a anything other than literal characters. */ - -#define REQ_CASELESS 0x0100 /* indicates caselessness */ -#define REQ_VARY 0x0200 /* reqbyte followed non-literal item */ - /* Miscellaneous definitions. The #ifndef is to pacify compiler warnings in environments where these macros are defined elsewhere. Unfortunately, there is no way to do the same for the typedef. */ @@ -677,7 +936,7 @@ for) in a minority area (EBCDIC platforms), this is not sensible. Any application that did need both could compile two versions of the library, using macros to give the functions distinct names. */ -#ifndef SUPPORT_UTF8 +#ifndef SUPPORT_UTF /* UTF-8 support is not enabled; use the platform-dependent character literals so that PCRE works on both ASCII and EBCDIC platforms, in non-UTF-mode only. */ @@ -937,11 +1196,16 @@ so that PCRE works on both ASCII and EBCDIC platforms, in non-UTF-mode only. */ #define STRING_ANYCRLF_RIGHTPAR "ANYCRLF)" #define STRING_BSR_ANYCRLF_RIGHTPAR "BSR_ANYCRLF)" #define STRING_BSR_UNICODE_RIGHTPAR "BSR_UNICODE)" -#define STRING_UTF8_RIGHTPAR "UTF8)" +#ifdef COMPILE_PCRE8 +#define STRING_UTF_RIGHTPAR "UTF8)" +#endif +#ifdef COMPILE_PCRE16 +#define STRING_UTF_RIGHTPAR "UTF16)" +#endif #define STRING_UCP_RIGHTPAR "UCP)" #define STRING_NO_START_OPT_RIGHTPAR "NO_START_OPT)" -#else /* SUPPORT_UTF8 */ +#else /* SUPPORT_UTF */ /* UTF-8 support is enabled; always use UTF-8 (=ASCII) character codes. This works in both modes non-EBCDIC platforms, and on EBCDIC platforms in UTF-8 mode @@ -1192,11 +1456,16 @@ only. */ #define STRING_ANYCRLF_RIGHTPAR STR_A STR_N STR_Y STR_C STR_R STR_L STR_F STR_RIGHT_PARENTHESIS #define STRING_BSR_ANYCRLF_RIGHTPAR STR_B STR_S STR_R STR_UNDERSCORE STR_A STR_N STR_Y STR_C STR_R STR_L STR_F STR_RIGHT_PARENTHESIS #define STRING_BSR_UNICODE_RIGHTPAR STR_B STR_S STR_R STR_UNDERSCORE STR_U STR_N STR_I STR_C STR_O STR_D STR_E STR_RIGHT_PARENTHESIS -#define STRING_UTF8_RIGHTPAR STR_U STR_T STR_F STR_8 STR_RIGHT_PARENTHESIS +#ifdef COMPILE_PCRE8 +#define STRING_UTF_RIGHTPAR STR_U STR_T STR_F STR_8 STR_RIGHT_PARENTHESIS +#endif +#ifdef COMPILE_PCRE16 +#define STRING_UTF_RIGHTPAR STR_U STR_T STR_F STR_1 STR_6 STR_RIGHT_PARENTHESIS +#endif #define STRING_UCP_RIGHTPAR STR_U STR_C STR_P STR_RIGHT_PARENTHESIS #define STRING_NO_START_OPT_RIGHTPAR STR_N STR_O STR_UNDERSCORE STR_S STR_T STR_A STR_R STR_T STR_UNDERSCORE STR_O STR_P STR_T STR_RIGHT_PARENTHESIS -#endif /* SUPPORT_UTF8 */ +#endif /* SUPPORT_UTF */ /* Escape items that are just an encoding of a particular data value. */ @@ -1236,7 +1505,7 @@ only. */ #define PT_WORD 8 /* Word - L plus N plus underscore */ /* Flag bits and data types for the extended class (OP_XCLASS) for classes that -contain UTF-8 characters with values greater than 255. */ +contain characters with values greater than 255. */ #define XCL_NOT 0x01 /* Flag: this is a negative class */ #define XCL_MAP 0x02 /* Flag: a 32-byte map is present */ @@ -1252,7 +1521,7 @@ value such as \n. They must have non-zero values, as check_escape() returns their negation. Also, they must appear in the same order as in the opcode definitions below, up to ESC_z. There's a dummy for OP_ALLANY because it corresponds to "." in DOTALL mode rather than an escape sequence. It is also -used for [^] in JavaScript compatibility mode, and for \C in non-utf8 mode. In +used for [^] in JavaScript compatibility mode, and for \C in non-utf mode. In non-DOTALL mode, "." behaves like \N. The special values ESC_DU, ESC_du, etc. are used instead of ESC_D, ESC_d, etc. @@ -1433,8 +1702,8 @@ enum { OP_CLASS, /* 106 Match a character class, chars < 256 only */ OP_NCLASS, /* 107 Same, but the bitmap was created from a negative class - the difference is relevant only when a - UTF-8 character > 255 is encountered. */ - OP_XCLASS, /* 108 Extended class for handling UTF-8 chars within the + character > 255 is encountered. */ + OP_XCLASS, /* 108 Extended class for handling > 255 chars within the class. This does both positive and negative. */ OP_REF, /* 109 Match a back reference, casefully */ OP_REFI, /* 110 Match a back reference, caselessly */ @@ -1591,30 +1860,35 @@ in UTF-8 mode. The code that uses this table must know about such things. */ 2, /* noti */ \ /* Positive single-char repeats ** These are */ \ 2, 2, 2, 2, 2, 2, /* *, *?, +, +?, ?, ?? ** minima in */ \ - 4, 4, 4, /* upto, minupto, exact ** mode */ \ - 2, 2, 2, 4, /* *+, ++, ?+, upto+ */ \ + 2+IMM2_SIZE, 2+IMM2_SIZE, /* upto, minupto ** mode */ \ + 2+IMM2_SIZE, /* exact */ \ + 2, 2, 2, 2+IMM2_SIZE, /* *+, ++, ?+, upto+ */ \ 2, 2, 2, 2, 2, 2, /* *I, *?I, +I, +?I, ?I, ??I ** UTF-8 */ \ - 4, 4, 4, /* upto I, minupto I, exact I */ \ - 2, 2, 2, 4, /* *+I, ++I, ?+I, upto+I */ \ + 2+IMM2_SIZE, 2+IMM2_SIZE, /* upto I, minupto I */ \ + 2+IMM2_SIZE, /* exact I */ \ + 2, 2, 2, 2+IMM2_SIZE, /* *+I, ++I, ?+I, upto+I */ \ /* Negative single-char repeats - only for chars < 256 */ \ 2, 2, 2, 2, 2, 2, /* NOT *, *?, +, +?, ?, ?? */ \ - 4, 4, 4, /* NOT upto, minupto, exact */ \ - 2, 2, 2, 4, /* Possessive NOT *, +, ?, upto */ \ + 2+IMM2_SIZE, 2+IMM2_SIZE, /* NOT upto, minupto */ \ + 2+IMM2_SIZE, /* NOT exact */ \ + 2, 2, 2, 2+IMM2_SIZE, /* Possessive NOT *, +, ?, upto */ \ 2, 2, 2, 2, 2, 2, /* NOT *I, *?I, +I, +?I, ?I, ??I */ \ - 4, 4, 4, /* NOT upto I, minupto I, exact I */ \ - 2, 2, 2, 4, /* Possessive NOT *I, +I, ?I, upto I */ \ + 2+IMM2_SIZE, 2+IMM2_SIZE, /* NOT upto I, minupto I */ \ + 2+IMM2_SIZE, /* NOT exact I */ \ + 2, 2, 2, 2+IMM2_SIZE, /* Possessive NOT *I, +I, ?I, upto I */ \ /* Positive type repeats */ \ 2, 2, 2, 2, 2, 2, /* Type *, *?, +, +?, ?, ?? */ \ - 4, 4, 4, /* Type upto, minupto, exact */ \ - 2, 2, 2, 4, /* Possessive *+, ++, ?+, upto+ */ \ + 2+IMM2_SIZE, 2+IMM2_SIZE, /* Type upto, minupto */ \ + 2+IMM2_SIZE, /* Type exact */ \ + 2, 2, 2, 2+IMM2_SIZE, /* Possessive *+, ++, ?+, upto+ */ \ /* Character class & ref repeats */ \ 1, 1, 1, 1, 1, 1, /* *, *?, +, +?, ?, ?? */ \ - 5, 5, /* CRRANGE, CRMINRANGE */ \ - 33, /* CLASS */ \ - 33, /* NCLASS */ \ + 1+2*IMM2_SIZE, 1+2*IMM2_SIZE, /* CRRANGE, CRMINRANGE */ \ + 1+(32/sizeof(pcre_uchar)), /* CLASS */ \ + 1+(32/sizeof(pcre_uchar)), /* NCLASS */ \ 0, /* XCLASS - variable length */ \ - 3, /* REF */ \ - 3, /* REFI */ \ + 1+IMM2_SIZE, /* REF */ \ + 1+IMM2_SIZE, /* REFI */ \ 1+LINK_SIZE, /* RECURSE */ \ 2+2*LINK_SIZE, /* CALLOUT */ \ 1+LINK_SIZE, /* Alt */ \ @@ -1631,23 +1905,23 @@ in UTF-8 mode. The code that uses this table must know about such things. */ 1+LINK_SIZE, /* ONCE_NC */ \ 1+LINK_SIZE, /* BRA */ \ 1+LINK_SIZE, /* BRAPOS */ \ - 3+LINK_SIZE, /* CBRA */ \ - 3+LINK_SIZE, /* CBRAPOS */ \ + 1+LINK_SIZE+IMM2_SIZE, /* CBRA */ \ + 1+LINK_SIZE+IMM2_SIZE, /* CBRAPOS */ \ 1+LINK_SIZE, /* COND */ \ 1+LINK_SIZE, /* SBRA */ \ 1+LINK_SIZE, /* SBRAPOS */ \ - 3+LINK_SIZE, /* SCBRA */ \ - 3+LINK_SIZE, /* SCBRAPOS */ \ + 1+LINK_SIZE+IMM2_SIZE, /* SCBRA */ \ + 1+LINK_SIZE+IMM2_SIZE, /* SCBRAPOS */ \ 1+LINK_SIZE, /* SCOND */ \ - 3, 3, /* CREF, NCREF */ \ - 3, 3, /* RREF, NRREF */ \ + 1+IMM2_SIZE, 1+IMM2_SIZE, /* CREF, NCREF */ \ + 1+IMM2_SIZE, 1+IMM2_SIZE, /* RREF, NRREF */ \ 1, /* DEF */ \ 1, 1, 1, /* BRAZERO, BRAMINZERO, BRAPOSZERO */ \ 3, 1, 3, /* MARK, PRUNE, PRUNE_ARG */ \ 1, 3, /* SKIP, SKIP_ARG */ \ 1, 3, /* THEN, THEN_ARG */ \ 1, 1, 1, 1, /* COMMIT, FAIL, ACCEPT, ASSERT_ACCEPT */ \ - 3, 1 /* CLOSE, SKIPZERO */ + 1+IMM2_SIZE, 1 /* CLOSE, SKIPZERO */ /* A magic value for OP_RREF and OP_NRREF to indicate the "any recursion" condition. */ @@ -1665,7 +1939,7 @@ enum { ERR0, ERR1, ERR2, ERR3, ERR4, ERR5, ERR6, ERR7, ERR8, ERR9, ERR40, ERR41, ERR42, ERR43, ERR44, ERR45, ERR46, ERR47, ERR48, ERR49, ERR50, ERR51, ERR52, ERR53, ERR54, ERR55, ERR56, ERR57, ERR58, ERR59, ERR60, ERR61, ERR62, ERR63, ERR64, ERR65, ERR66, ERR67, ERR68, ERR69, - ERR70, ERR71, ERR72, ERRCOUNT }; + ERR70, ERR71, ERR72, ERR73, ERR74, ERRCOUNT }; /* The real format of the start of the pcre block; the index of names and the code vector run on as long as necessary after the end. We store an explicit @@ -1684,7 +1958,13 @@ fields are present. Currently PCRE always sets the dummy fields to zero. NOTE NOTE NOTE */ -typedef struct real_pcre { +#ifdef COMPILE_PCRE8 +#define REAL_PCRE real_pcre +#else +#define REAL_PCRE real_pcre16 +#endif + +typedef struct REAL_PCRE { pcre_uint32 magic_number; pcre_uint32 size; /* Total that was malloced */ pcre_uint32 options; /* Public options */ @@ -1692,16 +1972,16 @@ typedef struct real_pcre { pcre_uint16 dummy1; /* For future use */ pcre_uint16 top_bracket; pcre_uint16 top_backref; - pcre_uint16 first_byte; - pcre_uint16 req_byte; + pcre_uint16 first_char; /* Starting character */ + pcre_uint16 req_char; /* This character must be seen */ pcre_uint16 name_table_offset; /* Offset to name table that follows */ pcre_uint16 name_entry_size; /* Size of any name items */ pcre_uint16 name_count; /* Number of name items */ pcre_uint16 ref_count; /* Reference count */ - const unsigned char *tables; /* Pointer to tables or NULL for std */ - const unsigned char *nullpad; /* NULL padding */ -} real_pcre; + const pcre_uint8 *tables; /* Pointer to tables or NULL for std */ + const pcre_uint8 *nullpad; /* NULL padding */ +} REAL_PCRE; /* The format of the block used to store data from pcre_study(). The same remark (see NOTE above) about extending this structure applies. */ @@ -1709,7 +1989,7 @@ remark (see NOTE above) about extending this structure applies. */ typedef struct pcre_study_data { pcre_uint32 size; /* Total that was malloced */ pcre_uint32 flags; /* Private flags */ - uschar start_bits[32]; /* Starting char bits */ + pcre_uint8 start_bits[32]; /* Starting char bits */ pcre_uint32 minlength; /* Minimum subject length */ } pcre_study_data; @@ -1728,33 +2008,33 @@ typedef struct open_capitem { doing the compiling, so that they are thread-safe. */ typedef struct compile_data { - const uschar *lcc; /* Points to lower casing table */ - const uschar *fcc; /* Points to case-flipping table */ - const uschar *cbits; /* Points to character type table */ - const uschar *ctypes; /* Points to table of type maps */ - const uschar *start_workspace;/* The start of working space */ - const uschar *start_code; /* The start of the compiled code */ - const uschar *start_pattern; /* The start of the pattern */ - const uschar *end_pattern; /* The end of the pattern */ - open_capitem *open_caps; /* Chain of open capture items */ - uschar *hwm; /* High watermark of workspace */ - uschar *name_table; /* The name/number table */ - int names_found; /* Number of entries so far */ - int name_entry_size; /* Size of each entry */ - int workspace_size; /* Size of workspace */ - int bracount; /* Count of capturing parens as we compile */ - int final_bracount; /* Saved value after first pass */ - int top_backref; /* Maximum back reference */ - unsigned int backref_map; /* Bitmap of low back refs */ - int assert_depth; /* Depth of nested assertions */ - int external_options; /* External (initial) options */ - int external_flags; /* External flag bits to be set */ - int req_varyopt; /* "After variable item" flag for reqbyte */ - BOOL had_accept; /* (*ACCEPT) encountered */ - BOOL check_lookbehind; /* Lookbehinds need later checking */ - int nltype; /* Newline type */ - int nllen; /* Newline string length */ - uschar nl[4]; /* Newline string when fixed length */ + const pcre_uint8 *lcc; /* Points to lower casing table */ + const pcre_uint8 *fcc; /* Points to case-flipping table */ + const pcre_uint8 *cbits; /* Points to character type table */ + const pcre_uint8 *ctypes; /* Points to table of type maps */ + const pcre_uchar *start_workspace;/* The start of working space */ + const pcre_uchar *start_code; /* The start of the compiled code */ + const pcre_uchar *start_pattern; /* The start of the pattern */ + const pcre_uchar *end_pattern; /* The end of the pattern */ + open_capitem *open_caps; /* Chain of open capture items */ + pcre_uchar *hwm; /* High watermark of workspace */ + pcre_uchar *name_table; /* The name/number table */ + int names_found; /* Number of entries so far */ + int name_entry_size; /* Size of each entry */ + int workspace_size; /* Size of workspace */ + int bracount; /* Count of capturing parens as we compile */ + int final_bracount; /* Saved value after first pass */ + int top_backref; /* Maximum back reference */ + unsigned int backref_map; /* Bitmap of low back refs */ + int assert_depth; /* Depth of nested assertions */ + int external_options; /* External (initial) options */ + int external_flags; /* External flag bits to be set */ + int req_varyopt; /* "After variable item" flag for reqbyte */ + BOOL had_accept; /* (*ACCEPT) encountered */ + BOOL check_lookbehind; /* Lookbehinds need later checking */ + int nltype; /* Newline type */ + int nllen; /* Newline string length */ + pcre_uchar nl[4]; /* Newline string when fixed length */ } compile_data; /* Structure for maintaining a chain of pointers to the currently incomplete @@ -1762,7 +2042,7 @@ branches, for testing for left recursion while compiling. */ typedef struct branch_chain { struct branch_chain *outer; - uschar *current_branch; + pcre_uchar *current_branch; } branch_chain; /* Structure for items in a linked list that represents an explicit recursive @@ -1773,7 +2053,7 @@ typedef struct recursion_info { int group_num; /* Number of group that was called */ int *offset_save; /* Pointer to start of saved offsets */ int saved_max; /* Number of saved offsets */ - USPTR subject_position; /* Position at start of recursion */ + PCRE_PUCHAR subject_position; /* Position at start of recursion */ } recursion_info; /* A similar structure for pcre_dfa_exec(). */ @@ -1781,7 +2061,7 @@ typedef struct recursion_info { typedef struct dfa_recursion_info { struct dfa_recursion_info *prevrec; int group_num; - USPTR subject_position; + PCRE_PUCHAR subject_position; } dfa_recursion_info; /* Structure for building a chain of data for holding the values of the subject @@ -1791,7 +2071,7 @@ pcre_exec(). */ typedef struct eptrblock { struct eptrblock *epb_prev; - USPTR epb_saved_eptr; + PCRE_PUCHAR epb_saved_eptr; } eptrblock; @@ -1802,67 +2082,68 @@ typedef struct match_data { unsigned long int match_call_count; /* As it says */ unsigned long int match_limit; /* As it says */ unsigned long int match_limit_recursion; /* As it says */ - int *offset_vector; /* Offset vector */ - int offset_end; /* One past the end */ - int offset_max; /* The maximum usable for return data */ - int nltype; /* Newline type */ - int nllen; /* Newline string length */ - int name_count; /* Number of names in name table */ - int name_entry_size; /* Size of entry in names table */ - uschar *name_table; /* Table of names */ - uschar nl[4]; /* Newline string when fixed */ - const uschar *lcc; /* Points to lower casing table */ - const uschar *ctypes; /* Points to table of type maps */ - BOOL offset_overflow; /* Set if too many extractions */ - BOOL notbol; /* NOTBOL flag */ - BOOL noteol; /* NOTEOL flag */ - BOOL utf8; /* UTF8 flag */ - BOOL jscript_compat; /* JAVASCRIPT_COMPAT flag */ - BOOL use_ucp; /* PCRE_UCP flag */ - BOOL endonly; /* Dollar not before final \n */ - BOOL notempty; /* Empty string match not wanted */ - BOOL notempty_atstart; /* Empty string match at start not wanted */ - BOOL hitend; /* Hit the end of the subject at some point */ - BOOL bsr_anycrlf; /* \R is just any CRLF, not full Unicode */ - BOOL hasthen; /* Pattern contains (*THEN) */ - BOOL ignore_skip_arg; /* For re-run when SKIP name not found */ - const uschar *start_code; /* For use when recursing */ - USPTR start_subject; /* Start of the subject string */ - USPTR end_subject; /* End of the subject string */ - USPTR start_match_ptr; /* Start of matched string */ - USPTR end_match_ptr; /* Subject position at end match */ - USPTR start_used_ptr; /* Earliest consulted character */ - int partial; /* PARTIAL options */ - int end_offset_top; /* Highwater mark at end of match */ - int capture_last; /* Most recent capture number */ - int start_offset; /* The start offset value */ - int match_function_type; /* Set for certain special calls of MATCH() */ - eptrblock *eptrchain; /* Chain of eptrblocks for tail recursions */ - int eptrn; /* Next free eptrblock */ - recursion_info *recursive; /* Linked list of recursion data */ - void *callout_data; /* To pass back to callouts */ - const uschar *mark; /* Mark pointer to pass back on success */ - const uschar *nomatch_mark; /* Mark pointer to pass back on failure */ - const uschar *once_target; /* Where to back up to for atomic groups */ + int *offset_vector; /* Offset vector */ + int offset_end; /* One past the end */ + int offset_max; /* The maximum usable for return data */ + int nltype; /* Newline type */ + int nllen; /* Newline string length */ + int name_count; /* Number of names in name table */ + int name_entry_size; /* Size of entry in names table */ + pcre_uchar *name_table; /* Table of names */ + pcre_uchar nl[4]; /* Newline string when fixed */ + const pcre_uint8 *lcc; /* Points to lower casing table */ + const pcre_uint8 *fcc; /* Points to case-flipping table */ + const pcre_uint8 *ctypes; /* Points to table of type maps */ + BOOL offset_overflow; /* Set if too many extractions */ + BOOL notbol; /* NOTBOL flag */ + BOOL noteol; /* NOTEOL flag */ + BOOL utf; /* UTF-8 / UTF-16 flag */ + BOOL jscript_compat; /* JAVASCRIPT_COMPAT flag */ + BOOL use_ucp; /* PCRE_UCP flag */ + BOOL endonly; /* Dollar not before final \n */ + BOOL notempty; /* Empty string match not wanted */ + BOOL notempty_atstart; /* Empty string match at start not wanted */ + BOOL hitend; /* Hit the end of the subject at some point */ + BOOL bsr_anycrlf; /* \R is just any CRLF, not full Unicode */ + BOOL hasthen; /* Pattern contains (*THEN) */ + BOOL ignore_skip_arg; /* For re-run when SKIP name not found */ + const pcre_uchar *start_code; /* For use when recursing */ + PCRE_PUCHAR start_subject; /* Start of the subject string */ + PCRE_PUCHAR end_subject; /* End of the subject string */ + PCRE_PUCHAR start_match_ptr; /* Start of matched string */ + PCRE_PUCHAR end_match_ptr; /* Subject position at end match */ + PCRE_PUCHAR start_used_ptr; /* Earliest consulted character */ + int partial; /* PARTIAL options */ + int end_offset_top; /* Highwater mark at end of match */ + int capture_last; /* Most recent capture number */ + int start_offset; /* The start offset value */ + int match_function_type; /* Set for certain special calls of MATCH() */ + eptrblock *eptrchain; /* Chain of eptrblocks for tail recursions */ + int eptrn; /* Next free eptrblock */ + recursion_info *recursive; /* Linked list of recursion data */ + void *callout_data; /* To pass back to callouts */ + const pcre_uchar *mark; /* Mark pointer to pass back on success */ + const pcre_uchar *nomatch_mark;/* Mark pointer to pass back on failure */ + const pcre_uchar *once_target; /* Where to back up to for atomic groups */ } match_data; /* A similar structure is used for the same purpose by the DFA matching functions. */ typedef struct dfa_match_data { - const uschar *start_code; /* Start of the compiled pattern */ - const uschar *start_subject; /* Start of the subject string */ - const uschar *end_subject; /* End of subject string */ - const uschar *start_used_ptr; /* Earliest consulted character */ - const uschar *tables; /* Character tables */ - int start_offset; /* The start offset value */ - int moptions; /* Match options */ - int poptions; /* Pattern options */ - int nltype; /* Newline type */ - int nllen; /* Newline string length */ - uschar nl[4]; /* Newline string when fixed */ - void *callout_data; /* To pass back to callouts */ - dfa_recursion_info *recursive; /* Linked list of recursion data */ + const pcre_uchar *start_code; /* Start of the compiled pattern */ + const pcre_uchar *start_subject ; /* Start of the subject string */ + const pcre_uchar *end_subject; /* End of subject string */ + const pcre_uchar *start_used_ptr; /* Earliest consulted character */ + const pcre_uint8 *tables; /* Character tables */ + int start_offset; /* The start offset value */ + int moptions; /* Match options */ + int poptions; /* Pattern options */ + int nltype; /* Newline type */ + int nllen; /* Newline string length */ + pcre_uchar nl[4]; /* Newline string when fixed */ + void *callout_data; /* To pass back to callouts */ + dfa_recursion_info *recursive; /* Linked list of recursion data */ } dfa_match_data; /* Bit definitions for entries in the pcre_ctypes table. */ @@ -1898,6 +2179,28 @@ total length. */ #define ctypes_offset (cbits_offset + cbit_length) #define tables_length (ctypes_offset + 256) +/* Internal function prefix */ + +#ifdef COMPILE_PCRE8 +#ifndef PUBL +#define PUBL(name) pcre_##name +#endif +#ifndef PRIV +#define PRIV(name) _pcre_##name +#endif +#else /* COMPILE_PCRE8 */ +#ifdef COMPILE_PCRE16 +#ifndef PUBL +#define PUBL(name) pcre16_##name +#endif +#ifndef PRIV +#define PRIV(name) _pcre16_##name +#endif +#else +#error Unsupported compiling mode +#endif /* COMPILE_PCRE16 */ +#endif /* COMPILE_PCRE8 */ + /* Layout of the UCP type table that translates property names into types and codes. Each entry used to point directly to a name, but to reduce the number of relocations in shared libraries, it now has an offset into a single string @@ -1915,75 +2218,115 @@ of the exported public functions. They have to be "external" in the C sense, but are not part of the PCRE public API. The data for these tables is in the pcre_tables.c module. */ -extern const int _pcre_utf8_table1[]; -extern const int _pcre_utf8_table2[]; -extern const int _pcre_utf8_table3[]; -extern const uschar _pcre_utf8_table4[]; +#ifdef COMPILE_PCRE8 -#ifdef SUPPORT_JIT -extern const uschar _pcre_utf8_char_sizes[]; -#endif +extern const int PRIV(utf8_table1)[]; +extern const int PRIV(utf8_table1_size); +extern const int PRIV(utf8_table2)[]; +extern const int PRIV(utf8_table3)[]; +extern const pcre_uint8 PRIV(utf8_table4)[]; -extern const int _pcre_utf8_table1_size; +#endif /* COMPILE_PCRE8 */ -extern const char _pcre_utt_names[]; -extern const ucp_type_table _pcre_utt[]; -extern const int _pcre_utt_size; +extern const char PRIV(utt_names)[]; +extern const ucp_type_table PRIV(utt)[]; +extern const int PRIV(utt_size); -extern const uschar _pcre_default_tables[]; +extern const pcre_uint8 PRIV(default_tables)[]; -extern const uschar _pcre_OP_lengths[]; +extern const pcre_uint8 PRIV(OP_lengths)[]; /* Internal shared functions. These are functions that are used by more than one of the exported public functions. They have to be "external" in the C sense, but are not part of the PCRE public API. */ -extern const uschar *_pcre_find_bracket(const uschar *, BOOL, int); -extern BOOL _pcre_is_newline(USPTR, int, USPTR, int *, BOOL); -extern int _pcre_ord2utf8(int, uschar *); -extern real_pcre *_pcre_try_flipped(const real_pcre *, real_pcre *, - const pcre_study_data *, pcre_study_data *); -extern int _pcre_valid_utf8(USPTR, int, int *); -extern BOOL _pcre_was_newline(USPTR, int, USPTR, int *, BOOL); -extern BOOL _pcre_xclass(int, const uschar *); +/* String comparison functions. */ +#ifdef COMPILE_PCRE8 + +#define STRCMP_UC_UC(str1, str2) \ + strcmp((char *)(str1), (char *)(str2)) +#define STRCMP_UC_C8(str1, str2) \ + strcmp((char *)(str1), (str2)) +#define STRNCMP_UC_UC(str1, str2, num) \ + strncmp((char *)(str1), (char *)(str2), (num)) +#define STRNCMP_UC_C8(str1, str2, num) \ + strncmp((char *)(str1), (str2), (num)) +#define STRLEN_UC(str) strlen((const char *)str) + +#else + +extern int PRIV(strcmp_uc_uc)(const pcre_uchar *, + const pcre_uchar *); +extern int PRIV(strcmp_uc_c8)(const pcre_uchar *, + const char *); +extern int PRIV(strncmp_uc_uc)(const pcre_uchar *, + const pcre_uchar *, unsigned int num); +extern int PRIV(strncmp_uc_c8)(const pcre_uchar *, + const char *, unsigned int num); +extern unsigned int PRIV(strlen_uc)(const pcre_uchar *str); + +#define STRCMP_UC_UC(str1, str2) \ + PRIV(strcmp_uc_uc)((str1), (str2)) +#define STRCMP_UC_C8(str1, str2) \ + PRIV(strcmp_uc_c8)((str1), (str2)) +#define STRNCMP_UC_UC(str1, str2, num) \ + PRIV(strncmp_uc_uc)((str1), (str2), (num)) +#define STRNCMP_UC_C8(str1, str2, num) \ + PRIV(strncmp_uc_c8)((str1), (str2), (num)) +#define STRLEN_UC(str) PRIV(strlen_uc)(str) + +#endif /* COMPILE_PCRE8 */ + +extern const pcre_uchar *PRIV(find_bracket)(const pcre_uchar *, BOOL, int); +extern BOOL PRIV(is_newline)(PCRE_PUCHAR, int, PCRE_PUCHAR, + int *, BOOL); +extern int PRIV(ord2utf)(pcre_uint32, pcre_uchar *); +extern int PRIV(valid_utf)(PCRE_PUCHAR, int, int *); +extern BOOL PRIV(was_newline)(PCRE_PUCHAR, int, PCRE_PUCHAR, + int *, BOOL); +extern BOOL PRIV(xclass)(int, const pcre_uchar *, BOOL); #ifdef SUPPORT_JIT -extern void _pcre_jit_compile(const real_pcre *, pcre_extra *); -extern int _pcre_jit_exec(const real_pcre *, void *, PCRE_SPTR, - int, int, int, int, int *, int); -extern void _pcre_jit_free(void *); -extern int _pcre_jit_get_size(void *); +extern void PRIV(jit_compile)(const REAL_PCRE *, PUBL(extra) *); +extern int PRIV(jit_exec)(const REAL_PCRE *, void *, + const pcre_uchar *, int, int, int, int, int *, int); +extern void PRIV(jit_free)(void *); +extern int PRIV(jit_get_size)(void *); +extern const char* PRIV(jit_get_target)(void); #endif /* Unicode character database (UCD) */ typedef struct { - uschar script; - uschar chartype; + pcre_uint8 script; + pcre_uint8 chartype; pcre_int32 other_case; } ucd_record; -extern const ucd_record _pcre_ucd_records[]; -extern const uschar _pcre_ucd_stage1[]; -extern const pcre_uint16 _pcre_ucd_stage2[]; -extern const int _pcre_ucp_gentype[]; +extern const ucd_record PRIV(ucd_records)[]; +extern const pcre_uint8 PRIV(ucd_stage1)[]; +extern const pcre_uint16 PRIV(ucd_stage2)[]; +extern const int PRIV(ucp_gentype)[]; #ifdef SUPPORT_JIT -extern const int _pcre_ucp_typerange[]; +extern const int PRIV(ucp_typerange)[]; #endif +#ifdef SUPPORT_UCP /* UCD access macros */ #define UCD_BLOCK_SIZE 128 -#define GET_UCD(ch) (_pcre_ucd_records + \ - _pcre_ucd_stage2[_pcre_ucd_stage1[(ch) / UCD_BLOCK_SIZE] * \ +#define GET_UCD(ch) (PRIV(ucd_records) + \ + PRIV(ucd_stage2)[PRIV(ucd_stage1)[(ch) / UCD_BLOCK_SIZE] * \ UCD_BLOCK_SIZE + (ch) % UCD_BLOCK_SIZE]) #define UCD_CHARTYPE(ch) GET_UCD(ch)->chartype #define UCD_SCRIPT(ch) GET_UCD(ch)->script -#define UCD_CATEGORY(ch) _pcre_ucp_gentype[UCD_CHARTYPE(ch)] +#define UCD_CATEGORY(ch) PRIV(ucp_gentype)[UCD_CHARTYPE(ch)] #define UCD_OTHERCASE(ch) (ch + GET_UCD(ch)->other_case) +#endif /* SUPPORT_UCP */ + #endif /* End of pcre_internal.h */ diff --git a/harbour/src/3rd/pcre/pcreinfo.c b/harbour/src/3rd/pcre/pcreinfo.c deleted file mode 100644 index 1ee4fb1e22..0000000000 --- a/harbour/src/3rd/pcre/pcreinfo.c +++ /dev/null @@ -1,93 +0,0 @@ -/************************************************* -* Perl-Compatible Regular Expressions * -*************************************************/ - -/* PCRE is a library of functions to support regular expressions whose syntax -and semantics are as close as possible to those of the Perl 5 language. - - Written by Philip Hazel - Copyright (c) 1997-2009 University of Cambridge - ------------------------------------------------------------------------------ -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - - * Redistributions of source code must retain the above copyright notice, - this list of conditions and the following disclaimer. - - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - * Neither the name of the University of Cambridge nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. ------------------------------------------------------------------------------ -*/ - - -/* This module contains the external function pcre_info(), which gives some -information about a compiled pattern. However, use of this function is now -deprecated, as it has been superseded by pcre_fullinfo(). */ - - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "pcreinal.h" - - -/************************************************* -* (Obsolete) Return info about compiled pattern * -*************************************************/ - -/* This is the original "info" function. It picks potentially useful data out -of the private structure, but its interface was too rigid. It remains for -backwards compatibility. The public options are passed back in an int - though -the re->options field has been expanded to a long int, all the public options -at the low end of it, and so even on 16-bit systems this will still be OK. -Therefore, I haven't changed the API for pcre_info(). - -Arguments: - argument_re points to compiled code - optptr where to pass back the options - first_byte where to pass back the first character, - or -1 if multiline and all branches start ^, - or -2 otherwise - -Returns: number of capturing subpatterns - or negative values on error -*/ - -PCRE_EXP_DEFN int PCRE_CALL_CONVENTION -pcre_info(const pcre *argument_re, int *optptr, int *first_byte) -{ -real_pcre internal_re; -const real_pcre *re = (const real_pcre *)argument_re; -if (re == NULL) return PCRE_ERROR_NULL; -if (re->magic_number != MAGIC_NUMBER) - { - re = _pcre_try_flipped(re, &internal_re, NULL, NULL); - if (re == NULL) return PCRE_ERROR_BADMAGIC; - } -if (optptr != NULL) *optptr = (int)(re->options & PUBLIC_COMPILE_OPTIONS); -if (first_byte != NULL) - *first_byte = ((re->flags & PCRE_FIRSTSET) != 0)? re->first_byte : - ((re->flags & PCRE_STARTLINE) != 0)? -1 : -2; -return re->top_bracket; -} - -/* End of pcre_info.c */ diff --git a/harbour/src/3rd/pcre/pcrejitc.c b/harbour/src/3rd/pcre/pcrejitc.c index 0ccfdc11e9..6d717281fb 100644 --- a/harbour/src/3rd/pcre/pcrejitc.c +++ b/harbour/src/3rd/pcre/pcrejitc.c @@ -6,10 +6,10 @@ and semantics are as close as possible to those of the Perl 5 language. Written by Philip Hazel - Copyright (c) 1997-2008 University of Cambridge + Copyright (c) 1997-2012 University of Cambridge The machine code generator part (this module) was written by Zoltan Herczeg - Copyright (c) 2010-2011 + Copyright (c) 2010-2012 ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without @@ -52,8 +52,8 @@ POSSIBILITY OF SUCH DAMAGE. we just include it. This way we don't need to touch the build system files. */ -#define SLJIT_MALLOC(size) (pcre_malloc)(size) -#define SLJIT_FREE(ptr) (pcre_free)(ptr) +#define SLJIT_MALLOC(size) (PUBL(malloc))(size) +#define SLJIT_FREE(ptr) (PUBL(free))(ptr) #define SLJIT_CONFIG_AUTO 1 #define SLJIT_CONFIG_STATIC 1 #define SLJIT_VERBOSE 0 @@ -62,7 +62,7 @@ system files. */ #include "sjlir.c" #if defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED -#error "Unsupported architecture" +#error Unsupported architecture #endif /* Allocate memory on the stack. Fast, but limited size. */ @@ -148,23 +148,23 @@ Thus we can restore the locals to a particular point in the stack. typedef struct jit_arguments { /* Pointers first. */ struct sljit_stack *stack; - PCRE_SPTR str; - PCRE_SPTR begin; - PCRE_SPTR end; + const pcre_uchar *str; + const pcre_uchar *begin; + const pcre_uchar *end; int *offsets; - uschar *ptr; + pcre_uchar *ptr; /* Everything else after. */ int offsetcount; int calllimit; - uschar notbol; - uschar noteol; - uschar notempty; - uschar notempty_atstart; + pcre_uint8 notbol; + pcre_uint8 noteol; + pcre_uint8 notempty; + pcre_uint8 notempty_atstart; } jit_arguments; typedef struct executable_function { void *executable_func; - pcre_jit_callback callback; + PUBL(jit_callback) callback; void *userdata; sljit_uw executable_size; } executable_function; @@ -198,7 +198,7 @@ typedef struct fallback_common { struct fallback_common *top; jump_list *topfallbacks; /* Opcode pointer. */ - uschar *cc; + pcre_uchar *cc; } fallback_common; typedef struct assert_fallback { @@ -269,10 +269,10 @@ typedef struct recurse_fallback { typedef struct compiler_common { struct sljit_compiler *compiler; - uschar *start; + pcre_uchar *start; int localsize; int *localptrs; - const uschar *fcc; + const pcre_uint8 *fcc; sljit_w lcc; int cbraptr; int nltype; @@ -298,14 +298,16 @@ typedef struct compiler_common { jump_list *casefulcmp; jump_list *caselesscmp; BOOL jscript_compat; -#ifdef SUPPORT_UTF8 - BOOL utf8; +#ifdef SUPPORT_UTF + BOOL utf; #ifdef SUPPORT_UCP - BOOL useucp; + BOOL use_ucp; #endif - jump_list *utf8readchar; - jump_list *utf8readtype8; + jump_list *utfreadchar; +#ifdef COMPILE_PCRE8 + jump_list *utfreadtype8; #endif +#endif /* SUPPORT_UTF */ #ifdef SUPPORT_UCP jump_list *getucd; #endif @@ -317,18 +319,30 @@ typedef struct compare_context { int length; int sourcereg; #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED - int byteptr; + int ucharptr; union { - int asint; - short asshort; + sljit_i asint; + sljit_uh asushort; +#ifdef COMPILE_PCRE8 sljit_ub asbyte; - sljit_ub asbytes[4]; + sljit_ub asuchars[4]; +#else +#ifdef COMPILE_PCRE16 + sljit_uh asuchars[2]; +#endif +#endif } c; union { - int asint; - short asshort; + sljit_i asint; + sljit_uh asushort; +#ifdef COMPILE_PCRE8 sljit_ub asbyte; - sljit_ub asbytes[4]; + sljit_ub asuchars[4]; +#else +#ifdef COMPILE_PCRE16 + sljit_uh asuchars[2]; +#endif +#endif } oc; #endif } compare_context; @@ -338,18 +352,21 @@ enum { frame_setstrbegin = -1 }; +/* Undefine sljit macros. */ +#undef CMP + /* Used for accessing the elements of the stack. */ #define STACK(i) ((-(i) - 1) * (int)sizeof(sljit_w)) #define TMP1 SLJIT_TEMPORARY_REG1 #define TMP2 SLJIT_TEMPORARY_REG3 #define TMP3 SLJIT_TEMPORARY_EREG2 -#define STR_PTR SLJIT_GENERAL_REG1 -#define STR_END SLJIT_GENERAL_REG2 +#define STR_PTR SLJIT_SAVED_REG1 +#define STR_END SLJIT_SAVED_REG2 #define STACK_TOP SLJIT_TEMPORARY_REG2 -#define STACK_LIMIT SLJIT_GENERAL_REG3 -#define ARGUMENTS SLJIT_GENERAL_EREG1 -#define CALL_COUNT SLJIT_GENERAL_EREG2 +#define STACK_LIMIT SLJIT_SAVED_REG3 +#define ARGUMENTS SLJIT_SAVED_EREG1 +#define CALL_COUNT SLJIT_SAVED_EREG2 #define RETURN_ADDR SLJIT_TEMPORARY_EREG1 /* Locals layout. */ @@ -364,7 +381,7 @@ enum { /* Max limit of recursions. */ #define CALL_LIMIT (5 * sizeof(sljit_w)) /* Last known position of the requested byte. */ -#define REQ_BYTE_PTR (6 * sizeof(sljit_w)) +#define REQ_CHAR_PTR (6 * sizeof(sljit_w)) /* End pointer of the first line. */ #define FIRSTLINE_END (7 * sizeof(sljit_w)) /* The output vector is stored on the stack, and contains pointers @@ -374,7 +391,19 @@ the start pointers when the end of the capturing group has not yet reached. */ #define OVECTOR_START (8 * sizeof(sljit_w)) #define OVECTOR(i) (OVECTOR_START + (i) * sizeof(sljit_w)) #define OVECTOR_PRIV(i) (common->cbraptr + (i) * sizeof(sljit_w)) -#define PRIV(cc) (common->localptrs[(cc) - common->start]) +#define PRIV_DATA(cc) (common->localptrs[(cc) - common->start]) + +#ifdef COMPILE_PCRE8 +#define MOV_UCHAR SLJIT_MOV_UB +#define MOVU_UCHAR SLJIT_MOVU_UB +#else +#ifdef COMPILE_PCRE16 +#define MOV_UCHAR SLJIT_MOV_UH +#define MOVU_UCHAR SLJIT_MOVU_UH +#else +#error Unsupported compiling mode +#endif +#endif /* Shortcuts. */ #define DEFINE_COMPILER \ @@ -398,7 +427,7 @@ the start pointers when the end of the capturing group has not yet reached. */ #define COND_VALUE(op, dst, dstw, type) \ sljit_emit_cond_value(compiler, (op), (dst), (dstw), (type)) -static uschar* bracketend(uschar* cc) +static pcre_uchar* bracketend(pcre_uchar* cc) { SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND)); do cc += GET(cc, 1); while (*cc == OP_ALT); @@ -419,7 +448,7 @@ return cc; compile_fallbackpath */ -static uschar *next_opcode(compiler_common *common, uschar *cc) +static pcre_uchar *next_opcode(compiler_common *common, pcre_uchar *cc) { SLJIT_UNUSED_ARG(common); switch(*cc) @@ -475,8 +504,8 @@ switch(*cc) return cc + 1; case OP_ANYBYTE: -#ifdef SUPPORT_UTF8 - if (common->utf8) return NULL; +#ifdef SUPPORT_UTF + if (common->utf) return NULL; #endif return cc + 1; @@ -484,7 +513,6 @@ switch(*cc) case OP_CHARI: case OP_NOT: case OP_NOTI: - case OP_STAR: case OP_MINSTAR: case OP_PLUS: @@ -522,8 +550,8 @@ switch(*cc) case OP_NOTPOSPLUSI: case OP_NOTPOSQUERYI: cc += 2; -#ifdef SUPPORT_UTF8 - if (common->utf8 && cc[-1] >= 0xc0) cc += _pcre_utf8_table4[cc[-1] & 0x3f]; +#ifdef SUPPORT_UTF + if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]); #endif return cc; @@ -543,14 +571,16 @@ switch(*cc) case OP_NOTMINUPTOI: case OP_NOTEXACTI: case OP_NOTPOSUPTOI: - cc += 4; -#ifdef SUPPORT_UTF8 - if (common->utf8 && cc[-1] >= 0xc0) cc += _pcre_utf8_table4[cc[-1] & 0x3f]; + cc += 2 + IMM2_SIZE; +#ifdef SUPPORT_UTF + if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]); #endif return cc; case OP_NOTPROP: case OP_PROP: + return cc + 1 + 2; + case OP_TYPEUPTO: case OP_TYPEMINUPTO: case OP_TYPEEXACT: @@ -562,18 +592,18 @@ switch(*cc) case OP_RREF: case OP_NRREF: case OP_CLOSE: - cc += 3; + cc += 1 + IMM2_SIZE; return cc; case OP_CRRANGE: case OP_CRMINRANGE: - return cc + 5; + return cc + 1 + 2 * IMM2_SIZE; case OP_CLASS: case OP_NCLASS: - return cc + 33; + return cc + 1 + 32 / sizeof(pcre_uchar); -#ifdef SUPPORT_UTF8 +#if defined SUPPORT_UTF || !defined COMPILE_PCRE8 case OP_XCLASS: return cc + GET(cc, 1); #endif @@ -603,17 +633,17 @@ switch(*cc) case OP_CBRAPOS: case OP_SCBRA: case OP_SCBRAPOS: - return cc + 1 + LINK_SIZE + 2; + return cc + 1 + LINK_SIZE + IMM2_SIZE; default: return NULL; } } -static int get_localspace(compiler_common *common, uschar *cc, uschar *ccend) +static int get_localspace(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend) { int localspace = 0; -uschar *alternative; +pcre_uchar *alternative; /* Calculate important variables (like stack size) and checks whether all opcodes are supported. */ while (cc < ccend) { @@ -636,7 +666,7 @@ while (cc < ccend) case OP_CBRAPOS: case OP_SCBRAPOS: localspace += sizeof(sljit_w); - cc += 1 + LINK_SIZE + 2; + cc += 1 + LINK_SIZE + IMM2_SIZE; break; case OP_COND: @@ -657,10 +687,10 @@ while (cc < ccend) return localspace; } -static void set_localptrs(compiler_common *common, int localptr, uschar *ccend) +static void set_localptrs(compiler_common *common, int localptr, pcre_uchar *ccend) { -uschar *cc = common->start; -uschar *alternative; +pcre_uchar *cc = common->start; +pcre_uchar *alternative; while (cc < ccend) { switch(*cc) @@ -684,7 +714,7 @@ while (cc < ccend) case OP_SCBRAPOS: common->localptrs[cc - common->start] = localptr; localptr += sizeof(sljit_w); - cc += 1 + LINK_SIZE + 2; + cc += 1 + LINK_SIZE + IMM2_SIZE; break; case OP_COND: @@ -707,9 +737,9 @@ while (cc < ccend) } /* Returns with -1 if no need for frame. */ -static int get_framesize(compiler_common *common, uschar *cc, BOOL recursive) +static int get_framesize(compiler_common *common, pcre_uchar *cc, BOOL recursive) { -uschar *ccend = bracketend(cc); +pcre_uchar *ccend = bracketend(cc); int length = 0; BOOL possessive = FALSE; BOOL setsom_found = FALSE; @@ -740,7 +770,7 @@ while (cc < ccend) case OP_SCBRA: case OP_SCBRAPOS: length += 3; - cc += 1 + LINK_SIZE + 2; + cc += 1 + LINK_SIZE + IMM2_SIZE; break; default: @@ -758,14 +788,15 @@ if (length > 0) return -1; } -static void init_frame(compiler_common *common, uschar *cc, int stackpos, int stacktop, BOOL recursive) +static void init_frame(compiler_common *common, pcre_uchar *cc, int stackpos, int stacktop, BOOL recursive) { DEFINE_COMPILER; -uschar *ccend = bracketend(cc); +pcre_uchar *ccend = bracketend(cc); BOOL setsom_found = FALSE; int offset; /* >= 1 + shortest item size (2) */ +SLJIT_UNUSED_ARG(stacktop); SLJIT_ASSERT(stackpos >= stacktop + 2); stackpos = STACK(stackpos); @@ -803,7 +834,7 @@ while (cc < ccend) OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP2, 0); stackpos += (int)sizeof(sljit_w); - cc += 1 + LINK_SIZE + 2; + cc += 1 + LINK_SIZE + IMM2_SIZE; break; default: @@ -816,10 +847,10 @@ OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, frame_end); SLJIT_ASSERT(stackpos == STACK(stacktop)); } -static SLJIT_INLINE int get_localsize(compiler_common *common, uschar *cc, uschar *ccend) +static SLJIT_INLINE int get_localsize(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend) { int localsize = 2; -uschar *alternative; +pcre_uchar *alternative; /* Calculate the sum of the local variables. */ while (cc < ccend) { @@ -842,13 +873,13 @@ while (cc < ccend) case OP_CBRA: case OP_SCBRA: localsize++; - cc += 1 + LINK_SIZE + 2; + cc += 1 + LINK_SIZE + IMM2_SIZE; break; case OP_CBRAPOS: case OP_SCBRAPOS: localsize += 2; - cc += 1 + LINK_SIZE + 2; + cc += 1 + LINK_SIZE + IMM2_SIZE; break; case OP_COND: @@ -869,7 +900,7 @@ SLJIT_ASSERT(cc == ccend); return localsize; } -static void copy_locals(compiler_common *common, uschar *cc, uschar *ccend, +static void copy_locals(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, BOOL save, int stackptr, int stacktop) { DEFINE_COMPILER; @@ -878,7 +909,7 @@ int count; BOOL tmp1next = TRUE; BOOL tmp1empty = TRUE; BOOL tmp2empty = TRUE; -uschar *alternative; +pcre_uchar *alternative; enum { start, loop, @@ -939,7 +970,7 @@ while (status != end) case OP_SBRAPOS: case OP_SCOND: count = 1; - srcw[0] = PRIV(cc); + srcw[0] = PRIV_DATA(cc); SLJIT_ASSERT(srcw[0] != 0); cc += 1 + LINK_SIZE; break; @@ -948,16 +979,16 @@ while (status != end) case OP_SCBRA: count = 1; srcw[0] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE)); - cc += 1 + LINK_SIZE + 2; + cc += 1 + LINK_SIZE + IMM2_SIZE; break; case OP_CBRAPOS: case OP_SCBRAPOS: count = 2; srcw[1] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE)); - srcw[0] = PRIV(cc); + srcw[0] = PRIV_DATA(cc); SLJIT_ASSERT(srcw[0] != 0); - cc += 1 + LINK_SIZE + 2; + cc += 1 + LINK_SIZE + IMM2_SIZE; break; case OP_COND: @@ -966,7 +997,7 @@ while (status != end) if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN) { count = 1; - srcw[0] = PRIV(cc); + srcw[0] = PRIV_DATA(cc); SLJIT_ASSERT(srcw[0] != 0); } cc += 1 + LINK_SIZE; @@ -1174,7 +1205,7 @@ struct sljit_label *loop; int i; /* At this point we can freely use all temporary registers. */ /* TMP1 returns with begin - 1. */ -OP2(SLJIT_SUB, SLJIT_TEMPORARY_REG1, 0, SLJIT_MEM1(SLJIT_GENERAL_REG1), SLJIT_OFFSETOF(jit_arguments, begin), SLJIT_IMM, 1); +OP2(SLJIT_SUB, SLJIT_TEMPORARY_REG1, 0, SLJIT_MEM1(SLJIT_SAVED_REG1), SLJIT_OFFSETOF(jit_arguments, begin), SLJIT_IMM, IN_UCHARS(1)); if (length < 8) { for (i = 0; i < length; i++) @@ -1198,21 +1229,24 @@ struct sljit_label *loop; struct sljit_jump *earlyexit; /* At this point we can freely use all registers. */ -OP1(SLJIT_MOV, SLJIT_GENERAL_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)); +OP1(SLJIT_MOV, SLJIT_SAVED_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)); OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1), STR_PTR, 0); OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG1, 0, ARGUMENTS, 0); OP1(SLJIT_MOV_SI, SLJIT_TEMPORARY_REG2, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG1), SLJIT_OFFSETOF(jit_arguments, offsetcount)); OP2(SLJIT_SUB, SLJIT_TEMPORARY_REG3, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG1), SLJIT_OFFSETOF(jit_arguments, offsets), SLJIT_IMM, sizeof(int)); OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG1, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG1), SLJIT_OFFSETOF(jit_arguments, begin)); -OP2(SLJIT_ADD, SLJIT_GENERAL_REG1, 0, SLJIT_LOCALS_REG, 0, SLJIT_IMM, OVECTOR_START); +OP2(SLJIT_ADD, SLJIT_SAVED_REG1, 0, SLJIT_LOCALS_REG, 0, SLJIT_IMM, OVECTOR_START); /* Unlikely, but possible */ earlyexit = CMP(SLJIT_C_EQUAL, SLJIT_TEMPORARY_REG2, 0, SLJIT_IMM, 0); loop = LABEL(); -OP2(SLJIT_SUB, SLJIT_GENERAL_REG2, 0, SLJIT_MEM1(SLJIT_GENERAL_REG1), 0, SLJIT_TEMPORARY_REG1, 0); -OP2(SLJIT_ADD, SLJIT_GENERAL_REG1, 0, SLJIT_GENERAL_REG1, 0, SLJIT_IMM, sizeof(sljit_w)); +OP2(SLJIT_SUB, SLJIT_SAVED_REG2, 0, SLJIT_MEM1(SLJIT_SAVED_REG1), 0, SLJIT_TEMPORARY_REG1, 0); +OP2(SLJIT_ADD, SLJIT_SAVED_REG1, 0, SLJIT_SAVED_REG1, 0, SLJIT_IMM, sizeof(sljit_w)); /* Copy the integer value to the output buffer */ -OP1(SLJIT_MOVU_SI, SLJIT_MEM1(SLJIT_TEMPORARY_REG3), sizeof(int), SLJIT_GENERAL_REG2, 0); +#ifdef COMPILE_PCRE16 +OP2(SLJIT_ASHR, SLJIT_SAVED_REG2, 0, SLJIT_SAVED_REG2, 0, SLJIT_IMM, 1); +#endif +OP1(SLJIT_MOVU_SI, SLJIT_MEM1(SLJIT_TEMPORARY_REG3), sizeof(int), SLJIT_SAVED_REG2, 0); OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_TEMPORARY_REG2, 0, SLJIT_TEMPORARY_REG2, 0, SLJIT_IMM, 1); JUMPTO(SLJIT_C_NOT_ZERO, loop); JUMPHERE(earlyexit); @@ -1223,24 +1257,24 @@ if (topbracket > 1) OP2(SLJIT_ADD, SLJIT_TEMPORARY_REG1, 0, SLJIT_LOCALS_REG, 0, SLJIT_IMM, OVECTOR_START + topbracket * 2 * sizeof(sljit_w)); OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG2, 0, SLJIT_IMM, topbracket + 1); - /* OVECTOR(0) is never equal to SLJIT_GENERAL_REG3. */ + /* OVECTOR(0) is never equal to SLJIT_SAVED_REG3. */ loop = LABEL(); OP1(SLJIT_MOVU, SLJIT_TEMPORARY_REG3, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG1), -(2 * (sljit_w)sizeof(sljit_w))); OP2(SLJIT_SUB, SLJIT_TEMPORARY_REG2, 0, SLJIT_TEMPORARY_REG2, 0, SLJIT_IMM, 1); - CMPTO(SLJIT_C_EQUAL, SLJIT_TEMPORARY_REG3, 0, SLJIT_GENERAL_REG3, 0, loop); + CMPTO(SLJIT_C_EQUAL, SLJIT_TEMPORARY_REG3, 0, SLJIT_SAVED_REG3, 0, loop); OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_TEMPORARY_REG2, 0); } else OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1); } -static SLJIT_INLINE BOOL char_has_othercase(compiler_common *common, uschar* cc) +static SLJIT_INLINE BOOL char_has_othercase(compiler_common *common, pcre_uchar* cc) { /* Detects if the character has an othercase. */ unsigned int c; -#ifdef SUPPORT_UTF8 -if (common->utf8) +#ifdef SUPPORT_UTF +if (common->utf) { GETCHAR(c, cc); if (c > 127) @@ -1251,18 +1285,21 @@ if (common->utf8) return FALSE; #endif } +#ifndef COMPILE_PCRE8 + return common->fcc[c] != c; +#endif } else #endif c = *cc; -return common->fcc[c] != c; +return MAX_255(c) ? common->fcc[c] != c : FALSE; } static SLJIT_INLINE unsigned int char_othercase(compiler_common *common, unsigned int c) { /* Returns with the othercase. */ -#ifdef SUPPORT_UTF8 -if (common->utf8 && c > 127) +#ifdef SUPPORT_UTF +if (common->utf && c > 127) { #ifdef SUPPORT_UCP return UCD_OTHERCASE(c); @@ -1271,19 +1308,19 @@ if (common->utf8 && c > 127) #endif } #endif -return common->fcc[c]; +return TABLE_GET(c, common->fcc, c); } -static unsigned int char_get_othercase_bit(compiler_common *common, uschar* cc) +static unsigned int char_get_othercase_bit(compiler_common *common, pcre_uchar* cc) { /* Detects if the character and its othercase has only 1 bit difference. */ unsigned int c, oc, bit; -#ifdef SUPPORT_UTF8 +#if defined SUPPORT_UTF && defined COMPILE_PCRE8 int n; #endif -#ifdef SUPPORT_UTF8 -if (common->utf8) +#ifdef SUPPORT_UTF +if (common->utf) { GETCHAR(c, cc); if (c <= 127) @@ -1300,11 +1337,11 @@ if (common->utf8) else { c = *cc; - oc = common->fcc[c]; + oc = TABLE_GET(c, common->fcc, c); } #else c = *cc; -oc = common->fcc[c]; +oc = TABLE_GET(c, common->fcc, c); #endif SLJIT_ASSERT(c != oc); @@ -1318,10 +1355,12 @@ if (c <= 127 && bit == 0x20) if (!ispowerof2(bit)) return 0; -#ifdef SUPPORT_UTF8 -if (common->utf8 && c > 127) +#ifdef COMPILE_PCRE8 + +#ifdef SUPPORT_UTF +if (common->utf && c > 127) { - n = _pcre_utf8_table4[*cc & 0x3f]; + n = GET_EXTRALEN(*cc); while ((bit & 0x3f) == 0) { n--; @@ -1329,8 +1368,25 @@ if (common->utf8 && c > 127) } return (n << 8) | bit; } -#endif +#endif /* SUPPORT_UTF */ return (0 << 8) | bit; + +#else /* COMPILE_PCRE8 */ + +#ifdef COMPILE_PCRE16 +#ifdef SUPPORT_UTF +if (common->utf && c > 65535) + { + if (bit >= (1 << 10)) + bit >>= 10; + else + return (bit < 256) ? ((2 << 8) | bit) : ((3 << 8) | (bit >> 8)); + } +#endif /* SUPPORT_UTF */ +return (bit < 256) ? ((0 << 8) | bit) : ((1 << 8) | (bit >> 8)); +#endif /* COMPILE_PCRE16 */ + +#endif /* COMPILE_PCRE8 */ } static SLJIT_INLINE void check_input_end(compiler_common *common, jump_list **fallbacks) @@ -1344,20 +1400,26 @@ static void read_char(compiler_common *common) /* Reads the character into TMP1, updates STR_PTR. Does not check STR_END. TMP2 Destroyed. */ DEFINE_COMPILER; -#ifdef SUPPORT_UTF8 +#ifdef SUPPORT_UTF struct sljit_jump *jump; #endif -OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0); -#ifdef SUPPORT_UTF8 -if (common->utf8) +OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0); +#ifdef SUPPORT_UTF +if (common->utf) { +#ifdef COMPILE_PCRE8 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0); - add_jump(compiler, &common->utf8readchar, JUMP(SLJIT_FAST_CALL)); +#else +#ifdef COMPILE_PCRE16 + jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800); +#endif +#endif /* COMPILE_PCRE8 */ + add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL)); JUMPHERE(jump); } #endif -OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 1); +OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); } static void peek_char(compiler_common *common) @@ -1365,16 +1427,22 @@ static void peek_char(compiler_common *common) /* Reads the character into TMP1, keeps STR_PTR. Does not check STR_END. TMP2 Destroyed. */ DEFINE_COMPILER; -#ifdef SUPPORT_UTF8 +#ifdef SUPPORT_UTF struct sljit_jump *jump; #endif -OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0); -#ifdef SUPPORT_UTF8 -if (common->utf8) +OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0); +#ifdef SUPPORT_UTF +if (common->utf) { +#ifdef COMPILE_PCRE8 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0); - add_jump(compiler, &common->utf8readchar, JUMP(SLJIT_FAST_CALL)); +#else +#ifdef COMPILE_PCRE16 + jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800); +#endif +#endif /* COMPILE_PCRE8 */ + add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL)); OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0); JUMPHERE(jump); } @@ -1385,47 +1453,84 @@ static void read_char8_type(compiler_common *common) { /* Reads the character type into TMP1, updates STR_PTR. Does not check STR_END. */ DEFINE_COMPILER; -#ifdef SUPPORT_UTF8 +#if defined SUPPORT_UTF || defined COMPILE_PCRE16 struct sljit_jump *jump; #endif -#ifdef SUPPORT_UTF8 -if (common->utf8) +#ifdef SUPPORT_UTF +if (common->utf) { - OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(STR_PTR), 0); - OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 1); + OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0); + OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); +#ifdef COMPILE_PCRE8 /* This can be an extra read in some situations, but hopefully - it is a clever early read in most cases. */ + it is needed in most cases. */ OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes); jump = CMP(SLJIT_C_LESS, TMP2, 0, SLJIT_IMM, 0xc0); - add_jump(compiler, &common->utf8readtype8, JUMP(SLJIT_FAST_CALL)); + add_jump(compiler, &common->utfreadtype8, JUMP(SLJIT_FAST_CALL)); JUMPHERE(jump); +#else +#ifdef COMPILE_PCRE16 + OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0); + jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255); + OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes); + JUMPHERE(jump); + /* Skip low surrogate if necessary. */ + OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xfc00); + OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0xd800); + COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL); + OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1); + OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0); +#endif +#endif /* COMPILE_PCRE8 */ return; } #endif -OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0); -OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 1); -OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), common->ctypes); +OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0); +OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); +#ifdef COMPILE_PCRE16 +/* The ctypes array contains only 256 values. */ +OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0); +jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255); +#endif +OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes); +#ifdef COMPILE_PCRE16 +JUMPHERE(jump); +#endif } static void skip_char_back(compiler_common *common) { -/* Goes one character back. Only affects STR_PTR. Does not check begin. */ +/* Goes one character back. Affects STR_PTR and TMP1. Does not check begin. */ DEFINE_COMPILER; -#ifdef SUPPORT_UTF8 +#if defined SUPPORT_UTF && defined COMPILE_PCRE8 struct sljit_label *label; -if (common->utf8) +if (common->utf) { label = LABEL(); - OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 1); - OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0); + OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1)); + OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0); CMPTO(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, label); return; } #endif -OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 1); +#if defined SUPPORT_UTF && defined COMPILE_PCRE16 +if (common->utf) + { + OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1)); + OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); + /* Skip low surrogate if necessary. */ + OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00); + OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xdc00); + COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL); + OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1); + OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0); + return; + } +#endif +OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); } static void check_newlinechar(compiler_common *common, int nltype, jump_list **fallbacks, BOOL jumpiftrue) @@ -1448,15 +1553,17 @@ else if (nltype == NLTYPE_ANYCRLF) } else { - SLJIT_ASSERT(nltype == NLTYPE_FIXED && common->newline <= 255); + SLJIT_ASSERT(nltype == NLTYPE_FIXED && common->newline < 256); add_jump(compiler, fallbacks, CMP(jumpiftrue ? SLJIT_C_EQUAL : SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline)); } } -#ifdef SUPPORT_UTF8 -static void do_utf8readchar(compiler_common *common) +#ifdef SUPPORT_UTF + +#ifdef COMPILE_PCRE8 +static void do_utfreadchar(compiler_common *common) { -/* Fast decoding an utf8 character. TMP1 contains the first byte +/* Fast decoding a UTF-8 character. TMP1 contains the first byte of the character (>= 0xc0). Return char value in TMP1, length - 1 in TMP2. */ DEFINE_COMPILER; struct sljit_jump *jump; @@ -1465,82 +1572,57 @@ sljit_emit_fast_enter(compiler, RETURN_ADDR, 0, 1, 5, 5, common->localsize); /* Searching for the first zero. */ OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20); jump = JUMP(SLJIT_C_NOT_ZERO); -/* 2 byte sequence */ -OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(STR_PTR), 1); -OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 1); +/* Two byte sequence. */ +OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1)); +OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1f); OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6); OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f); OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0); -OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1); +OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(1)); sljit_emit_fast_return(compiler, RETURN_ADDR, 0); JUMPHERE(jump); OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x10); jump = JUMP(SLJIT_C_NOT_ZERO); -/* 3 byte sequence */ -OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(STR_PTR), 1); +/* Three byte sequence. */ +OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1)); OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0f); OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 12); OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f); OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6); OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0); -OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(STR_PTR), 2); -OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 2); +OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2)); +OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2)); OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f); OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0); -OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 2); +OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(2)); sljit_emit_fast_return(compiler, RETURN_ADDR, 0); JUMPHERE(jump); -OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x08); -jump = JUMP(SLJIT_C_NOT_ZERO); -/* 4 byte sequence */ -OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(STR_PTR), 1); +/* Four byte sequence. */ +OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1)); OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x07); OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 18); OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f); OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 12); OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0); -OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(STR_PTR), 2); +OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2)); OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f); OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6); OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0); -OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(STR_PTR), 3); -OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 3); +OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(3)); +OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3)); OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f); OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0); -OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 3); -sljit_emit_fast_return(compiler, RETURN_ADDR, 0); -JUMPHERE(jump); - -/* 5 byte sequence */ -OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(STR_PTR), 1); -OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x03); -OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 24); -OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f); -OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 18); -OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0); -OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(STR_PTR), 2); -OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f); -OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 12); -OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0); -OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(STR_PTR), 3); -OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f); -OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6); -OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0); -OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(STR_PTR), 4); -OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 4); -OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f); -OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0); -OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 4); +OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(3)); sljit_emit_fast_return(compiler, RETURN_ADDR, 0); } -static void do_utf8readtype8(compiler_common *common) +static void do_utfreadtype8(compiler_common *common) { -/* Fast decoding an utf8 character type. TMP2 contains the first byte -of the character (>= 0xc0) and TMP1 is destroyed. Return value in TMP1. */ +/* Fast decoding a UTF-8 character type. TMP2 contains the first byte +of the character (>= 0xc0). Return value in TMP1. */ DEFINE_COMPILER; struct sljit_jump *jump; struct sljit_jump *compare; @@ -1549,9 +1631,9 @@ sljit_emit_fast_enter(compiler, RETURN_ADDR, 0, 1, 5, 5, common->localsize); OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x20); jump = JUMP(SLJIT_C_NOT_ZERO); -/* 2 byte sequence */ -OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0); -OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 1); +/* Two byte sequence. */ +OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); +OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x1f); OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6); OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f); @@ -1566,13 +1648,44 @@ sljit_emit_fast_return(compiler, RETURN_ADDR, 0); JUMPHERE(jump); /* We only have types for characters less than 256. */ -OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), (sljit_w)_pcre_utf8_char_sizes - 0xc0); +OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), (sljit_w)PRIV(utf8_table4) - 0xc0); OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0); OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0); sljit_emit_fast_return(compiler, RETURN_ADDR, 0); } -#endif +#else /* COMPILE_PCRE8 */ + +#ifdef COMPILE_PCRE16 +static void do_utfreadchar(compiler_common *common) +{ +/* Fast decoding a UTF-16 character. TMP1 contains the first 16 bit char +of the character (>= 0xd800). Return char value in TMP1, length - 1 in TMP2. */ +DEFINE_COMPILER; +struct sljit_jump *jump; + +sljit_emit_fast_enter(compiler, RETURN_ADDR, 0, 1, 5, 5, common->localsize); +jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xdc00); +/* Do nothing, only return. */ +sljit_emit_fast_return(compiler, RETURN_ADDR, 0); + +JUMPHERE(jump); +/* Combine two 16 bit characters. */ +OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1)); +OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); +OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3ff); +OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 10); +OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3ff); +OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0); +OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(1)); +OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000); +sljit_emit_fast_return(compiler, RETURN_ADDR, 0); +} +#endif /* COMPILE_PCRE16 */ + +#endif /* COMPILE_PCRE8 */ + +#endif /* SUPPORT_UTF */ #ifdef SUPPORT_UCP @@ -1590,13 +1703,13 @@ SLJIT_ASSERT(UCD_BLOCK_SIZE == 128 && sizeof(ucd_record) == 8); sljit_emit_fast_enter(compiler, RETURN_ADDR, 0, 1, 5, 5, common->localsize); OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT); -OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_w)_pcre_ucd_stage1); +OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_w)PRIV(ucd_stage1)); OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK); OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT); OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0); -OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_w)_pcre_ucd_stage2); +OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_w)PRIV(ucd_stage2)); OP1(SLJIT_MOV_UH, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1); -OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_w)_pcre_ucd_records + SLJIT_OFFSETOF(ucd_record, chartype)); +OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_w)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype)); OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3); sljit_emit_fast_return(compiler, RETURN_ADDR, 0); } @@ -1610,12 +1723,12 @@ struct sljit_label *newlinelabel = NULL; struct sljit_jump *start; struct sljit_jump *end = NULL; struct sljit_jump *nl = NULL; -#ifdef SUPPORT_UTF8 -struct sljit_jump *singlebyte; +#ifdef SUPPORT_UTF +struct sljit_jump *singlechar; #endif jump_list *newline = NULL; BOOL newlinecheck = FALSE; -BOOL readbyte = FALSE; +BOOL readuchar = FALSE; if (!(hascrorlf || firstline) && (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF || common->newline > 255)) @@ -1630,13 +1743,13 @@ if (firstline) if (common->nltype == NLTYPE_FIXED && common->newline > 255) { mainloop = LABEL(); - OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 1); + OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); - OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), -1); - OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(STR_PTR), 0); + OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1)); + OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, mainloop); CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, mainloop); - OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_LOCALS_REG), FIRSTLINE_END, STR_PTR, 0, SLJIT_IMM, 1); + OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_LOCALS_REG), FIRSTLINE_END, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); } else { @@ -1660,11 +1773,14 @@ start = JUMP(SLJIT_JUMP); if (newlinecheck) { newlinelabel = LABEL(); - OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 1); + OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); - OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0); + OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0); OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, common->newline & 0xff); COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL); +#ifdef COMPILE_PCRE16 + OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1); +#endif OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0); nl = JUMP(SLJIT_JUMP); } @@ -1672,25 +1788,37 @@ if (newlinecheck) mainloop = LABEL(); /* Increasing the STR_PTR here requires one less jump in the most common case. */ -#ifdef SUPPORT_UTF8 -if (common->utf8) readbyte = TRUE; +#ifdef SUPPORT_UTF +if (common->utf) readuchar = TRUE; #endif -if (newlinecheck) readbyte = TRUE; +if (newlinecheck) readuchar = TRUE; -if (readbyte) - OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0); +if (readuchar) + OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0); if (newlinecheck) CMPTO(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, newlinelabel); -OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 1); -#ifdef SUPPORT_UTF8 -if (common->utf8) +OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); +#if defined SUPPORT_UTF && defined COMPILE_PCRE8 +if (common->utf) { - singlebyte = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0); - OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)_pcre_utf8_char_sizes - 0xc0); + singlechar = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0); + OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)PRIV(utf8_table4) - 0xc0); OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0); - JUMPHERE(singlebyte); + JUMPHERE(singlechar); + } +#endif +#if defined SUPPORT_UTF && defined COMPILE_PCRE16 +if (common->utf) + { + singlechar = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800); + OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00); + OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800); + COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL); + OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1); + OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0); + JUMPHERE(singlechar); } #endif JUMPHERE(start); @@ -1704,13 +1832,13 @@ if (newlinecheck) return mainloop; } -static SLJIT_INLINE void fast_forward_first_byte(compiler_common *common, pcre_uint16 firstbyte, BOOL firstline) +static SLJIT_INLINE void fast_forward_first_char(compiler_common *common, pcre_uchar first_char, BOOL caseless, BOOL firstline) { DEFINE_COMPILER; struct sljit_label *start; struct sljit_jump *leave; struct sljit_jump *found; -pcre_uint16 oc, bit; +pcre_uchar oc, bit; if (firstline) { @@ -1720,23 +1848,30 @@ if (firstline) start = LABEL(); leave = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); -OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0); +OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0); -if ((firstbyte & REQ_CASELESS) == 0) - found = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, firstbyte & 0xff); +oc = first_char; +if (caseless) + { + oc = TABLE_GET(first_char, common->fcc, first_char); +#if defined SUPPORT_UCP && !(defined COMPILE_PCRE8) + if (first_char > 127 && common->utf) + oc = UCD_OTHERCASE(first_char); +#endif + } +if (first_char == oc) + found = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, first_char); else { - firstbyte &= 0xff; - oc = common->fcc[firstbyte]; - bit = firstbyte ^ oc; + bit = first_char ^ oc; if (ispowerof2(bit)) { OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, bit); - found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, firstbyte | bit); + found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, first_char | bit); } else { - OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, firstbyte); + OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, first_char); COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL); OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, oc); COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL); @@ -1744,12 +1879,23 @@ else } } -OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 1); -#ifdef SUPPORT_UTF8 -if (common->utf8) +OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); +#if defined SUPPORT_UTF && defined COMPILE_PCRE8 +if (common->utf) { CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0, start); - OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)_pcre_utf8_char_sizes - 0xc0); + OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)PRIV(utf8_table4) - 0xc0); + OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0); + } +#endif +#if defined SUPPORT_UTF && defined COMPILE_PCRE16 +if (common->utf) + { + CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800, start); + OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00); + OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800); + COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL); + OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1); OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0); } #endif @@ -1786,16 +1932,19 @@ if (common->nltype == NLTYPE_FIXED && common->newline > 255) OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin)); firstchar = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP2, 0); - OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 2); + OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(2)); OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, STR_PTR, 0, TMP1, 0); COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_GREATER_EQUAL); +#ifdef COMPILE_PCRE16 + OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1); +#endif OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0); loop = LABEL(); - OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 1); + OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); leave = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); - OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), -2); - OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(STR_PTR), -1); + OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2)); + OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1)); CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, loop); CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, loop); @@ -1826,9 +1975,12 @@ if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF) leave = JUMP(SLJIT_JUMP); JUMPHERE(foundcr); notfoundnl = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); - OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0); + OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0); OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL); COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL); +#ifdef COMPILE_PCRE16 + OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1); +#endif OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0); JUMPHERE(notfoundnl); JUMPHERE(leave); @@ -1846,6 +1998,9 @@ DEFINE_COMPILER; struct sljit_label *start; struct sljit_jump *leave; struct sljit_jump *found; +#ifndef COMPILE_PCRE8 +struct sljit_jump *jump; +#endif if (firstline) { @@ -1855,11 +2010,16 @@ if (firstline) start = LABEL(); leave = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); -OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0); -#ifdef SUPPORT_UTF8 -if (common->utf8) +OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0); +#ifdef SUPPORT_UTF +if (common->utf) OP1(SLJIT_MOV, TMP3, 0, TMP1, 0); #endif +#ifndef COMPILE_PCRE8 +jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 255); +OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 255); +JUMPHERE(jump); +#endif OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7); OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3); OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), start_bits); @@ -1867,16 +2027,27 @@ OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0); OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0); found = JUMP(SLJIT_C_NOT_ZERO); -#ifdef SUPPORT_UTF8 -if (common->utf8) +#ifdef SUPPORT_UTF +if (common->utf) OP1(SLJIT_MOV, TMP1, 0, TMP3, 0); #endif -OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 1); -#ifdef SUPPORT_UTF8 -if (common->utf8) +OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); +#if defined SUPPORT_UTF && defined COMPILE_PCRE8 +if (common->utf) { CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0, start); - OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)_pcre_utf8_char_sizes - 0xc0); + OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)PRIV(utf8_table4) - 0xc0); + OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0); + } +#endif +#if defined SUPPORT_UTF && defined COMPILE_PCRE16 +if (common->utf) + { + CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800, start); + OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00); + OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800); + COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL); + OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1); OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0); } #endif @@ -1888,7 +2059,7 @@ if (firstline) OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0); } -static SLJIT_INLINE struct sljit_jump *search_requested_char(compiler_common *common, pcre_uint16 reqbyte, BOOL has_firstbyte) +static SLJIT_INLINE struct sljit_jump *search_requested_char(compiler_common *common, pcre_uchar req_char, BOOL caseless, BOOL has_firstchar) { DEFINE_COMPILER; struct sljit_label *loop; @@ -1897,47 +2068,54 @@ struct sljit_jump *alreadyfound; struct sljit_jump *found; struct sljit_jump *foundoc = NULL; struct sljit_jump *notfound; -pcre_uint16 oc, bit; +pcre_uchar oc, bit; -OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), REQ_BYTE_PTR); +OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), REQ_CHAR_PTR); OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, REQ_BYTE_MAX); toolong = CMP(SLJIT_C_LESS, TMP1, 0, STR_END, 0); alreadyfound = CMP(SLJIT_C_LESS, STR_PTR, 0, TMP2, 0); -if (has_firstbyte) - OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, 1); +if (has_firstchar) + OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); else OP1(SLJIT_MOV, TMP1, 0, STR_PTR, 0); loop = LABEL(); notfound = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, STR_END, 0); -OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), 0); -if ((reqbyte & REQ_CASELESS) == 0) - found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, reqbyte & 0xff); +OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(TMP1), 0); +oc = req_char; +if (caseless) + { + oc = TABLE_GET(req_char, common->fcc, req_char); +#if defined SUPPORT_UCP && !(defined COMPILE_PCRE8) + if (req_char > 127 && common->utf) + oc = UCD_OTHERCASE(req_char); +#endif + } +if (req_char == oc) + found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char); else { - reqbyte &= 0xff; - oc = common->fcc[reqbyte]; - bit = reqbyte ^ oc; + bit = req_char ^ oc; if (ispowerof2(bit)) { OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, bit); - found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, reqbyte | bit); + found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char | bit); } else { - found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, reqbyte); + found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char); foundoc = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, oc); } } -OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1); +OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1)); JUMPTO(SLJIT_JUMP, loop); JUMPHERE(found); if (foundoc) JUMPHERE(foundoc); -OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), REQ_BYTE_PTR, TMP1, 0); +OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), REQ_CHAR_PTR, TMP1, 0); JUMPHERE(alreadyfound); JUMPHERE(toolong); return notfound; @@ -1985,7 +2163,7 @@ static void check_wordboundary(compiler_common *common) { DEFINE_COMPILER; struct sljit_jump *beginend; -#ifdef SUPPORT_UTF8 +#if !(defined COMPILE_PCRE8) || defined SUPPORT_UTF struct sljit_jump *jump; #endif @@ -2002,7 +2180,7 @@ read_char(common); /* Testing char type. */ #ifdef SUPPORT_UCP -if (common->useucp) +if (common->use_ucp) { OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1); jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE); @@ -2019,20 +2197,24 @@ if (common->useucp) else #endif { -#ifdef SUPPORT_UTF8 +#ifndef COMPILE_PCRE8 + jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255); +#elif defined SUPPORT_UTF /* Here LOCALS1 has already been zeroed. */ jump = NULL; - if (common->utf8) + if (common->utf) jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255); -#endif +#endif /* COMPILE_PCRE8 */ OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), common->ctypes); OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 4 /* ctype_word */); OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 1); OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP1, 0); -#ifdef SUPPORT_UTF8 +#ifndef COMPILE_PCRE8 + JUMPHERE(jump); +#elif defined SUPPORT_UTF if (jump != NULL) JUMPHERE(jump); -#endif +#endif /* COMPILE_PCRE8 */ } JUMPHERE(beginend); @@ -2042,7 +2224,7 @@ peek_char(common); /* Testing char type. This is a code duplication. */ #ifdef SUPPORT_UCP -if (common->useucp) +if (common->use_ucp) { OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1); jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE); @@ -2058,19 +2240,25 @@ if (common->useucp) else #endif { -#ifdef SUPPORT_UTF8 +#ifndef COMPILE_PCRE8 + /* TMP2 may be destroyed by peek_char. */ + OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0); + jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255); +#elif defined SUPPORT_UTF OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0); jump = NULL; - if (common->utf8) + if (common->utf) jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255); #endif OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), common->ctypes); OP2(SLJIT_LSHR, TMP2, 0, TMP2, 0, SLJIT_IMM, 4 /* ctype_word */); OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 1); -#ifdef SUPPORT_UTF8 +#ifndef COMPILE_PCRE8 + JUMPHERE(jump); +#elif defined SUPPORT_UTF if (jump != NULL) JUMPHERE(jump); -#endif +#endif /* COMPILE_PCRE8 */ } JUMPHERE(beginend); @@ -2089,14 +2277,18 @@ OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a); OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a); COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_LESS_EQUAL); OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a); -#ifdef SUPPORT_UTF8 -if (common->utf8) +#if defined SUPPORT_UTF || defined COMPILE_PCRE16 +#ifdef COMPILE_PCRE8 +if (common->utf) { +#endif COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL); OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1); OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a); +#ifdef COMPILE_PCRE8 } #endif +#endif /* SUPPORT_UTF || COMPILE_PCRE16 */ COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL); sljit_emit_fast_return(compiler, RETURN_ADDR, 0); } @@ -2113,9 +2305,11 @@ COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL); OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20); COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL); OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xa0); -#ifdef SUPPORT_UTF8 -if (common->utf8) +#if defined SUPPORT_UTF || defined COMPILE_PCRE16 +#ifdef COMPILE_PCRE8 +if (common->utf) { +#endif COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL); OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x1680); COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL); @@ -2129,8 +2323,10 @@ if (common->utf8) OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x205f - 0x2000); COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL); OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x3000 - 0x2000); +#ifdef COMPILE_PCRE8 } #endif +#endif /* SUPPORT_UTF || COMPILE_PCRE16 */ COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL); sljit_emit_fast_return(compiler, RETURN_ADDR, 0); @@ -2147,14 +2343,18 @@ OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a); OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a); COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_LESS_EQUAL); OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a); -#ifdef SUPPORT_UTF8 -if (common->utf8) +#if defined SUPPORT_UTF || defined COMPILE_PCRE16 +#ifdef COMPILE_PCRE8 +if (common->utf) { +#endif COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL); OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1); OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a); +#ifdef COMPILE_PCRE8 } #endif +#endif /* SUPPORT_UTF || COMPILE_PCRE16 */ COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL); sljit_emit_fast_return(compiler, RETURN_ADDR, 0); @@ -2173,18 +2373,18 @@ sljit_emit_fast_enter(compiler, RETURN_ADDR, 0, 1, 5, 5, common->localsize); OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0); OP1(SLJIT_MOV, TMP3, 0, CHAR1, 0); OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, CHAR2, 0); -OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 1); -OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 1); +OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1)); +OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); label = LABEL(); -OP1(SLJIT_MOVU_UB, CHAR1, 0, SLJIT_MEM1(TMP1), 1); -OP1(SLJIT_MOVU_UB, CHAR2, 0, SLJIT_MEM1(STR_PTR), 1); +OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1)); +OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1)); jump = CMP(SLJIT_C_NOT_EQUAL, CHAR1, 0, CHAR2, 0); -OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, 1); +OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1)); JUMPTO(SLJIT_C_NOT_ZERO, label); JUMPHERE(jump); -OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 1); +OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); OP1(SLJIT_MOV, CHAR1, 0, TMP3, 0); OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0); sljit_emit_fast_return(compiler, RETURN_ADDR, 0); @@ -2205,20 +2405,30 @@ OP1(SLJIT_MOV, TMP3, 0, LCC_TABLE, 0); OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, CHAR1, 0); OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, CHAR2, 0); OP1(SLJIT_MOV, LCC_TABLE, 0, SLJIT_IMM, common->lcc); -OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 1); -OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 1); +OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1)); +OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); label = LABEL(); -OP1(SLJIT_MOVU_UB, CHAR1, 0, SLJIT_MEM1(TMP1), 1); -OP1(SLJIT_MOVU_UB, CHAR2, 0, SLJIT_MEM1(STR_PTR), 1); +OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1)); +OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1)); +#ifndef COMPILE_PCRE8 +jump = CMP(SLJIT_C_GREATER, CHAR1, 0, SLJIT_IMM, 255); +#endif OP1(SLJIT_MOV_UB, CHAR1, 0, SLJIT_MEM2(LCC_TABLE, CHAR1), 0); +#ifndef COMPILE_PCRE8 +JUMPHERE(jump); +jump = CMP(SLJIT_C_GREATER, CHAR2, 0, SLJIT_IMM, 255); +#endif OP1(SLJIT_MOV_UB, CHAR2, 0, SLJIT_MEM2(LCC_TABLE, CHAR2), 0); +#ifndef COMPILE_PCRE8 +JUMPHERE(jump); +#endif jump = CMP(SLJIT_C_NOT_EQUAL, CHAR1, 0, CHAR2, 0); -OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, 1); +OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1)); JUMPTO(SLJIT_C_NOT_ZERO, label); JUMPHERE(jump); -OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 1); +OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); OP1(SLJIT_MOV, LCC_TABLE, 0, TMP3, 0); OP1(SLJIT_MOV, CHAR1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0); OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1); @@ -2229,15 +2439,14 @@ sljit_emit_fast_return(compiler, RETURN_ADDR, 0); #undef CHAR1 #undef CHAR2 -#ifdef SUPPORT_UTF8 -#ifdef SUPPORT_UCP +#if defined SUPPORT_UTF && defined SUPPORT_UCP -static uschar * SLJIT_CALL do_utf8caselesscmp(uschar *src1, jit_arguments *args, uschar *end1) +static const pcre_uchar *SLJIT_CALL do_utf_caselesscmp(pcre_uchar *src1, jit_arguments *args, pcre_uchar *end1) { /* This function would be ineffective to do in JIT level. */ int c1, c2; -uschar *src2 = args->ptr; -uschar *end2 = (uschar*)args->end; +const pcre_uchar *src2 = args->ptr; +const pcre_uchar *end2 = args->end; while (src1 < end1) { @@ -2250,17 +2459,16 @@ while (src1 < end1) return src2; } -#endif -#endif +#endif /* SUPPORT_UTF && SUPPORT_UCP */ -static uschar *byte_sequence_compare(compiler_common *common, BOOL caseless, uschar *cc, +static pcre_uchar *byte_sequence_compare(compiler_common *common, BOOL caseless, pcre_uchar *cc, compare_context* context, jump_list **fallbacks) { DEFINE_COMPILER; unsigned int othercasebit = 0; -uschar *othercasebyte = NULL; -#ifdef SUPPORT_UTF8 -int utf8length; +pcre_uchar *othercasechar = NULL; +#ifdef SUPPORT_UTF +int utflength; #endif if (caseless && char_has_othercase(common, cc)) @@ -2268,93 +2476,130 @@ if (caseless && char_has_othercase(common, cc)) othercasebit = char_get_othercase_bit(common, cc); SLJIT_ASSERT(othercasebit); /* Extracting bit difference info. */ - othercasebyte = cc + (othercasebit >> 8); +#ifdef COMPILE_PCRE8 + othercasechar = cc + (othercasebit >> 8); othercasebit &= 0xff; +#else +#ifdef COMPILE_PCRE16 + othercasechar = cc + (othercasebit >> 9); + if ((othercasebit & 0x100) != 0) + othercasebit = (othercasebit & 0xff) << 8; + else + othercasebit &= 0xff; +#endif +#endif } if (context->sourcereg == -1) { +#ifdef COMPILE_PCRE8 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED if (context->length >= 4) OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length); else if (context->length >= 2) - OP1(SLJIT_MOV_SH, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length); + OP1(SLJIT_MOV_UH, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length); else #endif OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length); +#else +#ifdef COMPILE_PCRE16 +#if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED + if (context->length >= 4) + OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length); + else +#endif + OP1(SLJIT_MOV_UH, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length); +#endif +#endif /* COMPILE_PCRE8 */ context->sourcereg = TMP2; } -#ifdef SUPPORT_UTF8 -utf8length = 1; -if (common->utf8 && *cc >= 0xc0) - utf8length += _pcre_utf8_table4[*cc & 0x3f]; +#ifdef SUPPORT_UTF +utflength = 1; +if (common->utf && HAS_EXTRALEN(*cc)) + utflength += GET_EXTRALEN(*cc); do { #endif - context->length--; + context->length -= IN_UCHARS(1); #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED /* Unaligned read is supported. */ - if (othercasebit != 0 && othercasebyte == cc) + if (othercasebit != 0 && othercasechar == cc) { - context->c.asbytes[context->byteptr] = *cc | othercasebit; - context->oc.asbytes[context->byteptr] = othercasebit; + context->c.asuchars[context->ucharptr] = *cc | othercasebit; + context->oc.asuchars[context->ucharptr] = othercasebit; } else { - context->c.asbytes[context->byteptr] = *cc; - context->oc.asbytes[context->byteptr] = 0; + context->c.asuchars[context->ucharptr] = *cc; + context->oc.asuchars[context->ucharptr] = 0; } - context->byteptr++; + context->ucharptr++; - if (context->byteptr >= 4 || context->length == 0 || (context->byteptr == 2 && context->length == 1)) +#ifdef COMPILE_PCRE8 + if (context->ucharptr >= 4 || context->length == 0 || (context->ucharptr == 2 && context->length == 1)) +#else + if (context->ucharptr >= 2 || context->length == 0) +#endif { if (context->length >= 4) OP1(SLJIT_MOV_SI, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length); +#ifdef COMPILE_PCRE8 else if (context->length >= 2) - OP1(SLJIT_MOV_SH, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length); + OP1(SLJIT_MOV_UH, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length); else if (context->length >= 1) OP1(SLJIT_MOV_UB, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length); +#else + else if (context->length >= 2) + OP1(SLJIT_MOV_UH, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length); +#endif context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1; - switch(context->byteptr) + switch(context->ucharptr) { - case 4: + case 4 / sizeof(pcre_uchar): if (context->oc.asint != 0) OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asint); add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asint | context->oc.asint)); break; - case 2: - if (context->oc.asshort != 0) - OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asshort); - add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asshort | context->oc.asshort)); + case 2 / sizeof(pcre_uchar): + if (context->oc.asushort != 0) + OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asushort); + add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asushort | context->oc.asushort)); break; +#ifdef COMPILE_PCRE8 case 1: if (context->oc.asbyte != 0) OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asbyte); add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asbyte | context->oc.asbyte)); break; +#endif default: SLJIT_ASSERT_STOP(); break; } - context->byteptr = 0; + context->ucharptr = 0; } #else /* Unaligned read is unsupported. */ +#ifdef COMPILE_PCRE8 if (context->length > 0) OP1(SLJIT_MOV_UB, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length); +#else + if (context->length > 0) + OP1(SLJIT_MOV_UH, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length); +#endif context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1; - if (othercasebit != 0 && othercasebyte == cc) + if (othercasebit != 0 && othercasechar == cc) { OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, othercasebit); add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc | othercasebit)); @@ -2365,16 +2610,16 @@ do #endif cc++; -#ifdef SUPPORT_UTF8 - utf8length--; +#ifdef SUPPORT_UTF + utflength--; } -while (utf8length > 0); +while (utflength > 0); #endif return cc; } -#ifdef SUPPORT_UTF8 +#if defined SUPPORT_UTF || !defined COMPILE_PCRE8 #define SET_TYPE_OFFSET(value) \ if ((value) != typeoffset) \ @@ -2396,7 +2641,7 @@ return cc; } \ charoffset = (value); -static void compile_xclass_hotpath(compiler_common *common, uschar *cc, jump_list **fallbacks) +static void compile_xclass_hotpath(compiler_common *common, pcre_uchar *cc, jump_list **fallbacks) { DEFINE_COMPILER; jump_list *found = NULL; @@ -2404,7 +2649,7 @@ jump_list **list = (*cc & XCL_NOT) == 0 ? &found : fallbacks; unsigned int c; int compares; struct sljit_jump *jump = NULL; -uschar *ccbegin; +pcre_uchar *ccbegin; #ifdef SUPPORT_UCP BOOL needstype = FALSE, needsscript = FALSE, needschar = FALSE; BOOL charsaved = FALSE; @@ -2414,15 +2659,19 @@ unsigned int typeoffset; int invertcmp, numberofcmps; unsigned int charoffset; -/* Although SUPPORT_UTF8 must be defined, we are not necessary in utf8 mode. */ +/* Although SUPPORT_UTF must be defined, we are not necessary in utf mode. */ check_input_end(common, fallbacks); read_char(common); if ((*cc++ & XCL_MAP) != 0) { OP1(SLJIT_MOV, TMP3, 0, TMP1, 0); - if (common->utf8) +#ifndef COMPILE_PCRE8 + jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255); +#elif defined SUPPORT_UTF + if (common->utf) jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255); +#endif OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7); OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3); @@ -2431,13 +2680,17 @@ if ((*cc++ & XCL_MAP) != 0) OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0); add_jump(compiler, list, JUMP(SLJIT_C_NOT_ZERO)); - if (common->utf8) +#ifndef COMPILE_PCRE8 + JUMPHERE(jump); +#elif defined SUPPORT_UTF + if (common->utf) JUMPHERE(jump); +#endif OP1(SLJIT_MOV, TMP1, 0, TMP3, 0); #ifdef SUPPORT_UCP charsaved = TRUE; #endif - cc += 32; + cc += 32 / sizeof(pcre_uchar); } /* Scanning the necessary info. */ @@ -2449,8 +2702,8 @@ while (*cc != XCL_END) if (*cc == XCL_SINGLE) { cc += 2; -#ifdef SUPPORT_UTF8 - if (common->utf8 && cc[-1] >= 0xc0) cc += _pcre_utf8_table4[cc[-1] & 0x3f]; +#ifdef SUPPORT_UTF + if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]); #endif #ifdef SUPPORT_UCP needschar = TRUE; @@ -2459,12 +2712,12 @@ while (*cc != XCL_END) else if (*cc == XCL_RANGE) { cc += 2; -#ifdef SUPPORT_UTF8 - if (common->utf8 && cc[-1] >= 0xc0) cc += _pcre_utf8_table4[cc[-1] & 0x3f]; +#ifdef SUPPORT_UTF + if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]); #endif cc++; -#ifdef SUPPORT_UTF8 - if (common->utf8 && cc[-1] >= 0xc0) cc += _pcre_utf8_table4[cc[-1] & 0x3f]; +#ifdef SUPPORT_UTF + if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]); #endif #ifdef SUPPORT_UCP needschar = TRUE; @@ -2534,13 +2787,13 @@ if (needstype || needsscript) { if (scriptreg == TMP1) { - OP1(SLJIT_MOV, scriptreg, 0, SLJIT_IMM, (sljit_w)_pcre_ucd_records + SLJIT_OFFSETOF(ucd_record, script)); + OP1(SLJIT_MOV, scriptreg, 0, SLJIT_IMM, (sljit_w)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script)); OP1(SLJIT_MOV_UB, scriptreg, 0, SLJIT_MEM2(scriptreg, TMP2), 3); } else { OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3); - OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, (sljit_w)_pcre_ucd_records + SLJIT_OFFSETOF(ucd_record, script)); + OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, (sljit_w)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script)); OP1(SLJIT_MOV_UB, scriptreg, 0, SLJIT_MEM1(TMP2), 0); } } @@ -2564,8 +2817,8 @@ while (*cc != XCL_END) if (*cc == XCL_SINGLE) { cc ++; -#ifdef SUPPORT_UTF8 - if (common->utf8) +#ifdef SUPPORT_UTF + if (common->utf) { GETCHARINC(c, cc); } @@ -2595,8 +2848,8 @@ while (*cc != XCL_END) else if (*cc == XCL_RANGE) { cc ++; -#ifdef SUPPORT_UTF8 - if (common->utf8) +#ifdef SUPPORT_UTF + if (common->utf) { GETCHARINC(c, cc); } @@ -2604,8 +2857,8 @@ while (*cc != XCL_END) #endif c = *cc++; SET_CHAR_OFFSET(c); -#ifdef SUPPORT_UTF8 - if (common->utf8) +#ifdef SUPPORT_UTF + if (common->utf) { GETCHARINC(c, cc); } @@ -2661,9 +2914,9 @@ while (*cc != XCL_END) break; case PT_GC: - c = _pcre_ucp_typerange[(int)cc[1] * 2]; + c = PRIV(ucp_typerange)[(int)cc[1] * 2]; SET_TYPE_OFFSET(c); - jump = CMP(SLJIT_C_LESS_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, _pcre_ucp_typerange[(int)cc[1] * 2 + 1] - c); + jump = CMP(SLJIT_C_LESS_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, PRIV(ucp_typerange)[(int)cc[1] * 2 + 1] - c); break; case PT_PC: @@ -2725,17 +2978,17 @@ if (found != NULL) #endif -static uschar *compile_char1_hotpath(compiler_common *common, uschar type, uschar *cc, jump_list **fallbacks) +static pcre_uchar *compile_char1_hotpath(compiler_common *common, pcre_uchar type, pcre_uchar *cc, jump_list **fallbacks) { DEFINE_COMPILER; int length; unsigned int c, oc, bit; compare_context context; struct sljit_jump *jump[4]; -#ifdef SUPPORT_UTF8 +#ifdef SUPPORT_UTF struct sljit_label *label; #ifdef SUPPORT_UCP -uschar propdata[5]; +pcre_uchar propdata[5]; #endif #endif @@ -2790,7 +3043,7 @@ switch(type) { jump[0] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff); jump[1] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); - OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0); + OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0); add_jump(compiler, fallbacks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, common->newline & 0xff)); JUMPHERE(jump[1]); JUMPHERE(jump[0]); @@ -2801,27 +3054,38 @@ switch(type) case OP_ALLANY: check_input_end(common, fallbacks); -#ifdef SUPPORT_UTF8 - if (common->utf8) +#ifdef SUPPORT_UTF + if (common->utf) { - OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0); - OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 1); + OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0); + OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); +#ifdef COMPILE_PCRE8 jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0); - OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)_pcre_utf8_char_sizes - 0xc0); + OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)PRIV(utf8_table4) - 0xc0); OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0); +#else /* COMPILE_PCRE8 */ +#ifdef COMPILE_PCRE16 + jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800); + OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00); + OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800); + COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL); + OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1); + OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0); +#endif /* COMPILE_PCRE16 */ +#endif /* COMPILE_PCRE8 */ JUMPHERE(jump[0]); return cc; } #endif - OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 1); + OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); return cc; case OP_ANYBYTE: check_input_end(common, fallbacks); - OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 1); + OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); return cc; -#ifdef SUPPORT_UTF8 +#ifdef SUPPORT_UTF #ifdef SUPPORT_UCP case OP_NOTPROP: case OP_PROP: @@ -2840,9 +3104,9 @@ switch(type) read_char(common); jump[0] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR); jump[1] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); - OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0); + OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0); jump[2] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL); - OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 1); + OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); jump[3] = JUMP(SLJIT_JUMP); JUMPHERE(jump[0]); check_newlinechar(common, common->bsr_nltype, fallbacks, FALSE); @@ -2892,36 +3156,37 @@ switch(type) jump[0] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); if (common->nltype == NLTYPE_FIXED && common->newline > 255) { - OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, 2); - OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0); + OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2)); + OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_END, 0)); - OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(STR_PTR), 1); + OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1)); add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff)); add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff)); } else if (common->nltype == NLTYPE_FIXED) { - OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, 1); - OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0); + OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); + OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_END, 0)); add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline)); } else { - OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0); + OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); jump[1] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR); - OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, 2); + OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2)); OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0); jump[2] = JUMP(SLJIT_C_GREATER); add_jump(compiler, fallbacks, JUMP(SLJIT_C_LESS)); - OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 1); + /* Equal. */ + OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1)); jump[3] = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL); add_jump(compiler, fallbacks, JUMP(SLJIT_JUMP)); JUMPHERE(jump[1]); if (common->nltype == NLTYPE_ANYCRLF) { - OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, 1); + OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); add_jump(compiler, fallbacks, CMP(SLJIT_C_LESS, TMP2, 0, STR_END, 0)); add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL)); } @@ -2961,15 +3226,13 @@ switch(type) jump[0] = JUMP(SLJIT_JUMP); JUMPHERE(jump[1]); - OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, end)); - add_jump(compiler, fallbacks, CMP(SLJIT_C_EQUAL, TMP2, 0, STR_PTR, 0)); - + add_jump(compiler, fallbacks, CMP(SLJIT_C_EQUAL, STR_PTR, 0, STR_END, 0)); if (common->nltype == NLTYPE_FIXED && common->newline > 255) { - OP2(SLJIT_SUB, TMP2, 0, STR_PTR, 0, SLJIT_IMM, 2); + OP2(SLJIT_SUB, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2)); add_jump(compiler, fallbacks, CMP(SLJIT_C_LESS, TMP2, 0, TMP1, 0)); - OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), -2); - OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(STR_PTR), -1); + OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2)); + OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1)); add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff)); add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff)); } @@ -3003,10 +3266,10 @@ switch(type) if (common->nltype == NLTYPE_FIXED && common->newline > 255) { - OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, 2); + OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2)); add_jump(compiler, fallbacks, CMP(SLJIT_C_GREATER, TMP2, 0, STR_END, 0)); - OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0); - OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(STR_PTR), 1); + OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); + OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1)); add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff)); add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff)); } @@ -3021,25 +3284,25 @@ switch(type) case OP_CHAR: case OP_CHARI: length = 1; -#ifdef SUPPORT_UTF8 - if (common->utf8 && *cc >= 0xc0) length += _pcre_utf8_table4[*cc & 0x3f]; +#ifdef SUPPORT_UTF + if (common->utf && HAS_EXTRALEN(*cc)) length += GET_EXTRALEN(*cc); #endif if (type == OP_CHAR || !char_has_othercase(common, cc) || char_get_othercase_bit(common, cc) != 0) { - OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, length); + OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length)); add_jump(compiler, fallbacks, CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0)); - context.length = length; + context.length = IN_UCHARS(length); context.sourcereg = -1; #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED - context.byteptr = 0; + context.ucharptr = 0; #endif return byte_sequence_compare(common, type == OP_CHARI, cc, &context, fallbacks); } - add_jump(compiler, fallbacks, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0)); + check_input_end(common, fallbacks); read_char(common); -#ifdef SUPPORT_UTF8 - if (common->utf8) +#ifdef SUPPORT_UTF + if (common->utf) { GETCHAR(c, cc); } @@ -3055,16 +3318,14 @@ switch(type) case OP_NOT: case OP_NOTI: + check_input_end(common, fallbacks); length = 1; -#ifdef SUPPORT_UTF8 - if (common->utf8) +#ifdef SUPPORT_UTF + if (common->utf) { - if (*cc >= 0xc0) length += _pcre_utf8_table4[*cc & 0x3f]; - - check_input_end(common, fallbacks); - GETCHAR(c, cc); - - if (c <= 127) +#ifdef COMPILE_PCRE8 + c = *cc; + if (c < 128) { OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0); if (type == OP_NOT || !char_has_othercase(common, cc)) @@ -3076,22 +3337,24 @@ switch(type) add_jump(compiler, fallbacks, CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, c | 0x20)); } /* Skip the variable-length character. */ - OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 1); + OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0); - OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)_pcre_utf8_char_sizes - 0xc0); + OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)PRIV(utf8_table4) - 0xc0); OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0); JUMPHERE(jump[0]); - return cc + length; + return cc + 1; } else +#endif /* COMPILE_PCRE8 */ + { + GETCHARLEN(c, cc, length); read_char(common); + } } else -#endif +#endif /* SUPPORT_UTF */ { - OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 1); - add_jump(compiler, fallbacks, CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0)); - OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), -1); + read_char(common); c = *cc; } @@ -3112,15 +3375,19 @@ switch(type) add_jump(compiler, fallbacks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, oc)); } } - return cc + length; + return cc + 1; case OP_CLASS: case OP_NCLASS: check_input_end(common, fallbacks); read_char(common); -#ifdef SUPPORT_UTF8 +#if defined SUPPORT_UTF || !defined COMPILE_PCRE8 jump[0] = NULL; - if (common->utf8) +#ifdef COMPILE_PCRE8 + /* This check only affects 8 bit mode. In other modes, we + always need to compare the value with 255. */ + if (common->utf) +#endif /* COMPILE_PCRE8 */ { jump[0] = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255); if (type == OP_CLASS) @@ -3129,20 +3396,20 @@ switch(type) jump[0] = NULL; } } -#endif +#endif /* SUPPORT_UTF || !COMPILE_PCRE8 */ OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7); OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3); OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)cc); OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0); OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0); add_jump(compiler, fallbacks, JUMP(SLJIT_C_ZERO)); -#ifdef SUPPORT_UTF8 +#if defined SUPPORT_UTF || !defined COMPILE_PCRE8 if (jump[0] != NULL) JUMPHERE(jump[0]); -#endif - return cc + 32; +#endif /* SUPPORT_UTF || !COMPILE_PCRE8 */ + return cc + 32 / sizeof(pcre_uchar); -#ifdef SUPPORT_UTF8 +#if defined SUPPORT_UTF || defined COMPILE_PCRE16 case OP_XCLASS: compile_xclass_hotpath(common, cc + LINK_SIZE, fallbacks); return cc + GET(cc, 0) - 1; @@ -3152,20 +3419,21 @@ switch(type) length = GET(cc, 0); SLJIT_ASSERT(length > 0); OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0); - OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin)); -#ifdef SUPPORT_UTF8 - if (common->utf8) +#ifdef SUPPORT_UTF + if (common->utf) { + OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin)); OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, length); label = LABEL(); - add_jump(compiler, fallbacks, CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP1, 0)); + add_jump(compiler, fallbacks, CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP3, 0)); skip_char_back(common); OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, 1); JUMPTO(SLJIT_C_NOT_ZERO, label); return cc + LINK_SIZE; } #endif - OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, length); + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin)); + OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length)); add_jump(compiler, fallbacks, CMP(SLJIT_C_LESS, STR_PTR, 0, TMP1, 0)); return cc + LINK_SIZE; } @@ -3173,12 +3441,12 @@ SLJIT_ASSERT_STOP(); return cc; } -static SLJIT_INLINE uschar *compile_charn_hotpath(compiler_common *common, uschar *cc, uschar *ccend, jump_list **fallbacks) +static SLJIT_INLINE pcre_uchar *compile_charn_hotpath(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, jump_list **fallbacks) { /* This function consumes at least one input character. */ /* To decrease the number of length checks, we try to concatenate the fixed length character sequences. */ DEFINE_COMPILER; -uschar *ccbegin = cc; +pcre_uchar *ccbegin = cc; compare_context context; int size; @@ -3191,21 +3459,21 @@ do if (*cc == OP_CHAR) { size = 1; -#ifdef SUPPORT_UTF8 - if (common->utf8 && cc[1] >= 0xc0) - size += _pcre_utf8_table4[cc[1] & 0x3f]; +#ifdef SUPPORT_UTF + if (common->utf && HAS_EXTRALEN(cc[1])) + size += GET_EXTRALEN(cc[1]); #endif } else if (*cc == OP_CHARI) { size = 1; -#ifdef SUPPORT_UTF8 - if (common->utf8) +#ifdef SUPPORT_UTF + if (common->utf) { if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0) size = 0; - else if (cc[1] >= 0xc0) - size += _pcre_utf8_table4[cc[1] & 0x3f]; + else if (HAS_EXTRALEN(cc[1])) + size += GET_EXTRALEN(cc[1]); } else #endif @@ -3216,7 +3484,7 @@ do size = 0; cc += 1 + size; - context.length += size; + context.length += IN_UCHARS(size); } while (size > 0 && context.length <= 128); @@ -3229,7 +3497,7 @@ if (context.length > 0) context.sourcereg = -1; #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED - context.byteptr = 0; + context.ucharptr = 0; #endif do cc = byte_sequence_compare(common, *cc == OP_CHARI, cc + 1, &context, fallbacks); while (context.length > 0); return cc; @@ -3239,7 +3507,7 @@ if (context.length > 0) return compile_char1_hotpath(common, *cc, cc + 1, fallbacks); } -static struct sljit_jump *compile_ref_checks(compiler_common *common, uschar *cc, jump_list **fallbacks) +static struct sljit_jump *compile_ref_checks(compiler_common *common, pcre_uchar *cc, jump_list **fallbacks) { DEFINE_COMPILER; int offset = GET2(cc, 1) << 1; @@ -3261,7 +3529,7 @@ return CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset } /* Forward definitions. */ -static void compile_hotpath(compiler_common *, uschar *, uschar *, fallback_common *); +static void compile_hotpath(compiler_common *, pcre_uchar *, pcre_uchar *, fallback_common *); static void compile_fallbackpath(compiler_common *, struct fallback_common *); #define PUSH_FALLBACK(size, ccstart, error) \ @@ -3292,7 +3560,7 @@ static void compile_fallbackpath(compiler_common *, struct fallback_common *); #define FALLBACK_AS(type) ((type*)fallback) -static uschar *compile_ref_hotpath(compiler_common *common, uschar *cc, jump_list **fallbacks, BOOL withchecks, BOOL emptyfail) +static pcre_uchar *compile_ref_hotpath(compiler_common *common, pcre_uchar *cc, jump_list **fallbacks, BOOL withchecks, BOOL emptyfail) { DEFINE_COMPILER; int offset = GET2(cc, 1) << 1; @@ -3302,9 +3570,8 @@ OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset)); if (withchecks && !common->jscript_compat) add_jump(compiler, fallbacks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1))); -#ifdef SUPPORT_UTF8 -#ifdef SUPPORT_UCP -if (common->utf8 && *cc == OP_REFI) +#if defined SUPPORT_UTF && defined SUPPORT_UCP +if (common->utf && *cc == OP_REFI) { SLJIT_ASSERT(TMP1 == SLJIT_TEMPORARY_REG1 && STACK_TOP == SLJIT_TEMPORARY_REG2 && TMP2 == SLJIT_TEMPORARY_REG3); OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1)); @@ -3315,14 +3582,13 @@ if (common->utf8 && *cc == OP_REFI) OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STACK_TOP, 0); OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG2, 0, ARGUMENTS, 0); OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_TEMPORARY_REG2), SLJIT_OFFSETOF(jit_arguments, ptr), STR_PTR, 0); - sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_utf8caselesscmp)); + sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_utf_caselesscmp)); OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0); add_jump(compiler, fallbacks, CMP(SLJIT_C_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0)); OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0); } else -#endif -#endif +#endif /* SUPPORT_UTF && SUPPORT_UCP */ { OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), TMP1, 0); if (withchecks) @@ -3341,24 +3607,24 @@ if (jump != NULL) else JUMPHERE(jump); } -return cc + 3; +return cc + 1 + IMM2_SIZE; } -static SLJIT_INLINE uschar *compile_ref_iterator_hotpath(compiler_common *common, uschar *cc, fallback_common *parent) +static SLJIT_INLINE pcre_uchar *compile_ref_iterator_hotpath(compiler_common *common, pcre_uchar *cc, fallback_common *parent) { DEFINE_COMPILER; fallback_common *fallback; -uschar type; +pcre_uchar type; struct sljit_label *label; struct sljit_jump *zerolength; struct sljit_jump *jump = NULL; -uschar *ccbegin = cc; +pcre_uchar *ccbegin = cc; int min = 0, max = 0; BOOL minimize; PUSH_FALLBACK(sizeof(iterator_fallback), cc, NULL); -type = cc[3]; +type = cc[1 + IMM2_SIZE]; minimize = (type & 0x1) != 0; switch(type) { @@ -3366,25 +3632,25 @@ switch(type) case OP_CRMINSTAR: min = 0; max = 0; - cc += 4; + cc += 1 + IMM2_SIZE + 1; break; case OP_CRPLUS: case OP_CRMINPLUS: min = 1; max = 0; - cc += 4; + cc += 1 + IMM2_SIZE + 1; break; case OP_CRQUERY: case OP_CRMINQUERY: min = 0; max = 1; - cc += 4; + cc += 1 + IMM2_SIZE + 1; break; case OP_CRRANGE: case OP_CRMINRANGE: - min = GET2(cc, 3 + 1); - max = GET2(cc, 3 + 3); - cc += 8; + min = GET2(cc, 1 + IMM2_SIZE + 1); + max = GET2(cc, 1 + IMM2_SIZE + 1 + IMM2_SIZE); + cc += 1 + IMM2_SIZE + 1 + 2 * IMM2_SIZE; break; default: SLJIT_ASSERT_STOP(); @@ -3488,7 +3754,7 @@ decrease_call_count(common); return cc; } -static SLJIT_INLINE uschar *compile_recurse_hotpath(compiler_common *common, uschar *cc, fallback_common *parent) +static SLJIT_INLINE pcre_uchar *compile_recurse_hotpath(compiler_common *common, pcre_uchar *cc, fallback_common *parent) { DEFINE_COMPILER; fallback_common *fallback; @@ -3534,15 +3800,15 @@ add_jump(compiler, &fallback->topfallbacks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IM return cc + 1 + LINK_SIZE; } -static uschar *compile_assert_hotpath(compiler_common *common, uschar *cc, assert_fallback *fallback, BOOL conditional) +static pcre_uchar *compile_assert_hotpath(compiler_common *common, pcre_uchar *cc, assert_fallback *fallback, BOOL conditional) { DEFINE_COMPILER; int framesize; int localptr; fallback_common altfallback; -uschar *ccbegin; -uschar opcode; -uschar bra = OP_BRA; +pcre_uchar *ccbegin; +pcre_uchar opcode; +pcre_uchar bra = OP_BRA; jump_list *tmp = NULL; jump_list **target = (conditional) ? &fallback->condfailed : &fallback->common.topfallbacks; jump_list **found; @@ -3558,7 +3824,7 @@ if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO) bra = *cc; cc++; } -localptr = PRIV(cc); +localptr = PRIV_DATA(cc); SLJIT_ASSERT(localptr != 0); framesize = get_framesize(common, cc, FALSE); fallback->framesize = framesize; @@ -3804,11 +4070,11 @@ common->accept = save_accept; return cc + 1 + LINK_SIZE; } -static sljit_w SLJIT_CALL do_searchovector(sljit_w refno, sljit_w* locals, uschar *name_table) +static sljit_w SLJIT_CALL do_searchovector(sljit_w refno, sljit_w* locals, pcre_uchar *name_table) { int condition = FALSE; -uschar *slotA = name_table; -uschar *slotB; +pcre_uchar *slotA = name_table; +pcre_uchar *slotB; sljit_w name_count = locals[LOCALS0 / sizeof(sljit_w)]; sljit_w name_entry_size = locals[LOCALS1 / sizeof(sljit_w)]; sljit_w no_capture; @@ -3833,7 +4099,7 @@ if (i < name_count) while (slotB > name_table) { slotB -= name_entry_size; - if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0) + if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0) { condition = locals[GET2(slotB, 0) << 1] != no_capture; if (condition) break; @@ -3848,7 +4114,7 @@ if (i < name_count) for (i++; i < name_count; i++) { slotB += name_entry_size; - if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0) + if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0) { condition = locals[GET2(slotB, 0) << 1] != no_capture; if (condition) break; @@ -3860,11 +4126,11 @@ if (i < name_count) return condition; } -static sljit_w SLJIT_CALL do_searchgroups(sljit_w recno, sljit_w* locals, uschar *name_table) +static sljit_w SLJIT_CALL do_searchgroups(sljit_w recno, sljit_w* locals, pcre_uchar *name_table) { int condition = FALSE; -uschar *slotA = name_table; -uschar *slotB; +pcre_uchar *slotA = name_table; +pcre_uchar *slotB; sljit_w name_count = locals[LOCALS0 / sizeof(sljit_w)]; sljit_w name_entry_size = locals[LOCALS1 / sizeof(sljit_w)]; sljit_w group_num = locals[POSSESSIVE0 / sizeof(sljit_w)]; @@ -3886,7 +4152,7 @@ if (i < name_count) while (slotB > name_table) { slotB -= name_entry_size; - if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0) + if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0) { condition = GET2(slotB, 0) == group_num; if (condition) break; @@ -3901,7 +4167,7 @@ if (i < name_count) for (i++; i < name_count; i++) { slotB += name_entry_size; - if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0) + if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0) { condition = GET2(slotB, 0) == group_num; if (condition) break; @@ -3967,18 +4233,18 @@ return condition; Or nothing, if trace is unnecessary */ -static uschar *compile_bracket_hotpath(compiler_common *common, uschar *cc, fallback_common *parent) +static pcre_uchar *compile_bracket_hotpath(compiler_common *common, pcre_uchar *cc, fallback_common *parent) { DEFINE_COMPILER; fallback_common *fallback; -uschar opcode; +pcre_uchar opcode; int localptr = 0; int offset = 0; int stacksize; -uschar *ccbegin; -uschar *hotpath; -uschar bra = OP_BRA; -uschar ket; +pcre_uchar *ccbegin; +pcre_uchar *hotpath; +pcre_uchar bra = OP_BRA; +pcre_uchar ket; assert_fallback *assert; BOOL has_alternatives; struct sljit_jump *jump; @@ -4039,12 +4305,12 @@ if (opcode == OP_CBRA || opcode == OP_SCBRA) localptr = OVECTOR_PRIV(offset); offset <<= 1; FALLBACK_AS(bracket_fallback)->localptr = localptr; - hotpath += 2; + hotpath += IMM2_SIZE; } else if (opcode == OP_ONCE || opcode == OP_SBRA || opcode == OP_SCOND) { /* Other brackets simply allocate the next entry. */ - localptr = PRIV(ccbegin); + localptr = PRIV_DATA(ccbegin); SLJIT_ASSERT(localptr != 0); FALLBACK_AS(bracket_fallback)->localptr = localptr; if (opcode == OP_ONCE) @@ -4203,7 +4469,7 @@ if (opcode == OP_COND || opcode == OP_SCOND) SLJIT_ASSERT(has_alternatives); add_jump(compiler, &(FALLBACK_AS(bracket_fallback)->u.condfailed), CMP(SLJIT_C_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(GET2(hotpath, 1) << 1), SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1))); - hotpath += 3; + hotpath += 1 + IMM2_SIZE; } else if (*hotpath == OP_NCREF) { @@ -4222,7 +4488,7 @@ if (opcode == OP_COND || opcode == OP_SCOND) add_jump(compiler, &(FALLBACK_AS(bracket_fallback)->u.condfailed), CMP(SLJIT_C_EQUAL, SLJIT_TEMPORARY_REG1, 0, SLJIT_IMM, 0)); JUMPHERE(jump); - hotpath += 3; + hotpath += 1 + IMM2_SIZE; } else if (*hotpath == OP_RREF || *hotpath == OP_NRREF) { @@ -4243,7 +4509,7 @@ if (opcode == OP_COND || opcode == OP_SCOND) { SLJIT_ASSERT(!has_alternatives); if (stacksize != 0) - hotpath += 3; + hotpath += 1 + IMM2_SIZE; else { if (*cc == OP_ALT) @@ -4270,7 +4536,7 @@ if (opcode == OP_COND || opcode == OP_SCOND) sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_searchgroups)); OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1); add_jump(compiler, &(FALLBACK_AS(bracket_fallback)->u.condfailed), CMP(SLJIT_C_EQUAL, SLJIT_TEMPORARY_REG1, 0, SLJIT_IMM, 0)); - hotpath += 3; + hotpath += 1 + IMM2_SIZE; } } else @@ -4406,18 +4672,18 @@ cc += 1 + LINK_SIZE; return cc; } -static uschar *compile_bracketpos_hotpath(compiler_common *common, uschar *cc, fallback_common *parent) +static pcre_uchar *compile_bracketpos_hotpath(compiler_common *common, pcre_uchar *cc, fallback_common *parent) { DEFINE_COMPILER; fallback_common *fallback; -uschar opcode; +pcre_uchar opcode; int localptr; int cbraprivptr = 0; int framesize; int stacksize; int offset = 0; BOOL zero = FALSE; -uschar *ccbegin = NULL; +pcre_uchar *ccbegin = NULL; int stack; struct sljit_label *loop = NULL; struct jump_list *emptymatch = NULL; @@ -4430,7 +4696,7 @@ if (*cc == OP_BRAPOSZERO) } opcode = *cc; -localptr = PRIV(cc); +localptr = PRIV_DATA(cc); SLJIT_ASSERT(localptr != 0); FALLBACK_AS(bracketpos_fallback)->localptr = localptr; switch(opcode) @@ -4445,7 +4711,7 @@ switch(opcode) offset = GET2(cc, 1 + LINK_SIZE); cbraprivptr = OVECTOR_PRIV(offset); offset <<= 1; - ccbegin = cc + 1 + LINK_SIZE + 2; + ccbegin = cc + 1 + LINK_SIZE + IMM2_SIZE; break; default: @@ -4624,7 +4890,7 @@ decrease_call_count(common); return cc + 1 + LINK_SIZE; } -static SLJIT_INLINE uschar *get_iterator_parameters(compiler_common *common, uschar *cc, uschar *opcode, uschar *type, int *arg1, int *arg2, uschar **end) +static SLJIT_INLINE pcre_uchar *get_iterator_parameters(compiler_common *common, pcre_uchar *cc, pcre_uchar *opcode, pcre_uchar *type, int *arg1, int *arg2, pcre_uchar **end) { int class_len; @@ -4663,7 +4929,7 @@ else SLJIT_ASSERT(*opcode >= OP_CLASS || *opcode <= OP_XCLASS); *type = *opcode; cc++; - class_len = (*type < OP_XCLASS) ? 33 : GET(cc, 0); + class_len = (*type < OP_XCLASS) ? (int)(1 + (32 / sizeof(pcre_uchar))) : GET(cc, 0); *opcode = cc[class_len - 1]; if (*opcode >= OP_CRSTAR && *opcode <= OP_CRMINQUERY) { @@ -4674,7 +4940,7 @@ else else { SLJIT_ASSERT(*opcode == OP_CRRANGE || *opcode == OP_CRMINRANGE); - *arg1 = GET2(cc, (class_len + 2)); + *arg1 = GET2(cc, (class_len + IMM2_SIZE)); *arg2 = GET2(cc, class_len); if (*arg2 == 0) @@ -4686,7 +4952,7 @@ else *opcode = OP_EXACT; if (end != NULL) - *end = cc + class_len + 4; + *end = cc + class_len + 2 * IMM2_SIZE; } return cc; } @@ -4694,7 +4960,7 @@ else if (*opcode == OP_UPTO || *opcode == OP_MINUPTO || *opcode == OP_EXACT || *opcode == OP_POSUPTO) { *arg1 = GET2(cc, 0); - cc += 2; + cc += IMM2_SIZE; } if (*type == 0) @@ -4709,21 +4975,21 @@ if (*type == 0) if (end != NULL) { *end = cc + 1; -#ifdef SUPPORT_UTF8 - if (common->utf8 && *cc >= 0xc0) *end += _pcre_utf8_table4[*cc & 0x3f]; +#ifdef SUPPORT_UTF + if (common->utf && HAS_EXTRALEN(*cc)) *end += GET_EXTRALEN(*cc); #endif } return cc; } -static uschar *compile_iterator_hotpath(compiler_common *common, uschar *cc, fallback_common *parent) +static pcre_uchar *compile_iterator_hotpath(compiler_common *common, pcre_uchar *cc, fallback_common *parent) { DEFINE_COMPILER; fallback_common *fallback; -uschar opcode; -uschar type; +pcre_uchar opcode; +pcre_uchar type; int arg1 = -1, arg2 = -1; -uschar* end; +pcre_uchar* end; jump_list *nomatch = NULL; struct sljit_jump *jump = NULL; struct sljit_label *label; @@ -4885,7 +5151,7 @@ decrease_call_count(common); return end; } -static SLJIT_INLINE uschar *compile_fail_accept_hotpath(compiler_common *common, uschar *cc, fallback_common *parent) +static SLJIT_INLINE pcre_uchar *compile_fail_accept_hotpath(compiler_common *common, pcre_uchar *cc, fallback_common *parent) { DEFINE_COMPILER; fallback_common *fallback; @@ -4929,23 +5195,23 @@ add_jump(compiler, &fallback->topfallbacks, JUMP(SLJIT_JUMP)); return cc + 1; } -static SLJIT_INLINE uschar *compile_close_hotpath(compiler_common *common, uschar *cc) +static SLJIT_INLINE pcre_uchar *compile_close_hotpath(compiler_common *common, pcre_uchar *cc) { DEFINE_COMPILER; int offset = GET2(cc, 1); /* Data will be discarded anyway... */ if (common->currententry != NULL) - return cc + 3; + return cc + 1 + IMM2_SIZE; OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR_PRIV(offset)); offset <<= 1; OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0); OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0); -return cc + 3; +return cc + 1 + IMM2_SIZE; } -static void compile_hotpath(compiler_common *common, uschar *cc, uschar *ccend, fallback_common *parent) +static void compile_hotpath(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, fallback_common *parent) { DEFINE_COMPILER; fallback_common *fallback; @@ -5071,13 +5337,13 @@ while (cc < ccend) case OP_CLASS: case OP_NCLASS: - if (cc[33] >= OP_CRSTAR && cc[33] <= OP_CRMINRANGE) + if (cc[1 + (32 / sizeof(pcre_uchar))] >= OP_CRSTAR && cc[1 + (32 / sizeof(pcre_uchar))] <= OP_CRMINRANGE) cc = compile_iterator_hotpath(common, cc, parent); else cc = compile_char1_hotpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextfallbacks : &parent->topfallbacks); break; -#ifdef SUPPORT_UTF8 +#if defined SUPPORT_UTF || defined COMPILE_PCRE16 case OP_XCLASS: if (*(cc + GET(cc, 1)) >= OP_CRSTAR && *(cc + GET(cc, 1)) <= OP_CRMINRANGE) cc = compile_iterator_hotpath(common, cc, parent); @@ -5088,7 +5354,7 @@ while (cc < ccend) case OP_REF: case OP_REFI: - if (cc[3] >= OP_CRSTAR && cc[3] <= OP_CRMINRANGE) + if (cc[1 + IMM2_SIZE] >= OP_CRSTAR && cc[1 + IMM2_SIZE] <= OP_CRMINRANGE) cc = compile_ref_iterator_hotpath(common, cc, parent); else cc = compile_ref_hotpath(common, cc, parent->top != NULL ? &parent->top->nextfallbacks : &parent->topfallbacks, TRUE, FALSE); @@ -5196,9 +5462,9 @@ SLJIT_ASSERT(cc == ccend); static void compile_iterator_fallbackpath(compiler_common *common, struct fallback_common *current) { DEFINE_COMPILER; -uschar *cc = current->cc; -uschar opcode; -uschar type; +pcre_uchar *cc = current->cc; +pcre_uchar opcode; +pcre_uchar type; int arg1 = -1, arg2 = -1; struct sljit_label *label = NULL; struct sljit_jump *jump = NULL; @@ -5323,10 +5589,10 @@ switch(opcode) static void compile_ref_iterator_fallbackpath(compiler_common *common, struct fallback_common *current) { DEFINE_COMPILER; -uschar *cc = current->cc; -uschar type; +pcre_uchar *cc = current->cc; +pcre_uchar type; -type = cc[3]; +type = cc[1 + IMM2_SIZE]; if ((type & 0x1) == 0) { set_jumps(current->topfallbacks, LABEL()); @@ -5355,8 +5621,8 @@ OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0), TMP2, 0); static void compile_assert_fallbackpath(compiler_common *common, struct fallback_common *current) { DEFINE_COMPILER; -uschar *cc = current->cc; -uschar bra = OP_BRA; +pcre_uchar *cc = current->cc; +pcre_uchar bra = OP_BRA; struct sljit_jump *brajump = NULL; SLJIT_ASSERT(*cc != OP_BRAMINZERO); @@ -5427,13 +5693,13 @@ int offset = 0; int localptr = CURRENT_AS(bracket_fallback)->localptr; int stacksize; int count; -uschar *cc = current->cc; -uschar *ccbegin; -uschar *ccprev; +pcre_uchar *cc = current->cc; +pcre_uchar *ccbegin; +pcre_uchar *ccprev; jump_list *jumplist = NULL; jump_list *jumplistitem = NULL; -uschar bra = OP_BRA; -uschar ket; +pcre_uchar bra = OP_BRA; +pcre_uchar ket; assert_fallback *assert; BOOL has_alternatives; struct sljit_jump *brazero = NULL; @@ -5933,7 +6199,9 @@ while (current) case OP_TYPEPOSUPTO: case OP_CLASS: case OP_NCLASS: +#if defined SUPPORT_UTF || !defined COMPILE_PCRE8 case OP_XCLASS: +#endif compile_iterator_fallbackpath(common, current); break; @@ -6000,9 +6268,9 @@ while (current) static SLJIT_INLINE void compile_recurse(compiler_common *common) { DEFINE_COMPILER; -uschar *cc = common->start + common->currententry->start; -uschar *ccbegin = cc + 1 + LINK_SIZE + (*cc == OP_BRA ? 0 : 2); -uschar *ccend = bracketend(cc); +pcre_uchar *cc = common->start + common->currententry->start; +pcre_uchar *ccbegin = cc + 1 + LINK_SIZE + (*cc == OP_BRA ? 0 : IMM2_SIZE); +pcre_uchar *ccend = bracketend(cc); int localsize = get_localsize(common, ccbegin, ccend); int framesize = get_framesize(common, cc, TRUE); int alternativesize; @@ -6090,15 +6358,15 @@ sljit_emit_fast_return(compiler, SLJIT_MEM1(STACK_TOP), 0); #undef CURRENT_AS void -_pcre_jit_compile(const real_pcre *re, pcre_extra *extra) +PRIV(jit_compile)(const REAL_PCRE *re, PUBL(extra) *extra) { struct sljit_compiler *compiler; fallback_common rootfallback; compiler_common common_data; compiler_common *common = &common_data; -const uschar *tables = re->tables; +const pcre_uint8 *tables = re->tables; pcre_study_data *study; -uschar *ccend; +pcre_uchar *ccend; executable_function *function; void *executable_func; sljit_uw executable_size; @@ -6114,10 +6382,10 @@ SLJIT_ASSERT((extra->flags & PCRE_EXTRA_STUDY_DATA) != 0); study = extra->study_data; if (!tables) - tables = _pcre_default_tables; + tables = PRIV(default_tables); memset(&rootfallback, 0, sizeof(fallback_common)); -rootfallback.cc = (uschar *)re + re->name_table_offset + re->name_count * re->name_entry_size; +rootfallback.cc = (pcre_uchar *)re + re->name_table_offset + re->name_count * re->name_entry_size; common->compiler = NULL; common->start = rootfallback.cc; @@ -6158,7 +6426,7 @@ else } common->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0; common->ctypes = (sljit_w)(tables + ctypes_offset); -common->name_table = (sljit_w)re + re->name_table_offset; +common->name_table = (sljit_w)((pcre_uchar *)re + re->name_table_offset); common->name_count = re->name_count; common->name_entry_size = re->name_entry_size; common->acceptlabel = NULL; @@ -6176,14 +6444,17 @@ common->vspace = NULL; common->casefulcmp = NULL; common->caselesscmp = NULL; common->jscript_compat = (re->options & PCRE_JAVASCRIPT_COMPAT) != 0; -#ifdef SUPPORT_UTF8 -common->utf8 = (re->options & PCRE_UTF8) != 0; +#ifdef SUPPORT_UTF +/* PCRE_UTF16 has the same value as PCRE_UTF8. */ +common->utf = (re->options & PCRE_UTF8) != 0; #ifdef SUPPORT_UCP -common->useucp = (re->options & PCRE_UCP) != 0; +common->use_ucp = (re->options & PCRE_UCP) != 0; #endif -common->utf8readchar = NULL; -common->utf8readtype8 = NULL; +common->utfreadchar = NULL; +#ifdef COMPILE_PCRE8 +common->utfreadtype8 = NULL; #endif +#endif /* SUPPORT_UTF */ #ifdef SUPPORT_UCP common->getucd = NULL; #endif @@ -6215,10 +6486,10 @@ sljit_emit_enter(compiler, 1, 5, 5, common->localsize); /* Register init. */ reset_ovector(common, (re->top_bracket + 1) * 2); if ((re->flags & PCRE_REQCHSET) != 0) - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), REQ_BYTE_PTR, SLJIT_TEMPORARY_REG1, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), REQ_CHAR_PTR, SLJIT_TEMPORARY_REG1, 0); -OP1(SLJIT_MOV, ARGUMENTS, 0, SLJIT_GENERAL_REG1, 0); -OP1(SLJIT_MOV, TMP1, 0, SLJIT_GENERAL_REG1, 0); +OP1(SLJIT_MOV, ARGUMENTS, 0, SLJIT_SAVED_REG1, 0); +OP1(SLJIT_MOV, TMP1, 0, SLJIT_SAVED_REG1, 0); OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str)); OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, end)); OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, stack)); @@ -6233,14 +6504,14 @@ if ((re->options & PCRE_ANCHORED) == 0) mainloop = mainloop_entry(common, (re->flags & PCRE_HASCRORLF) != 0, (re->options & PCRE_FIRSTLINE) != 0); /* Forward search if possible. */ if ((re->flags & PCRE_FIRSTSET) != 0) - fast_forward_first_byte(common, re->first_byte, (re->options & PCRE_FIRSTLINE) != 0); + fast_forward_first_char(common, re->first_char, (re->flags & PCRE_FCH_CASELESS) != 0, (re->options & PCRE_FIRSTLINE) != 0); else if ((re->flags & PCRE_STARTLINE) != 0) fast_forward_newline(common, (re->options & PCRE_FIRSTLINE) != 0); else if ((re->flags & PCRE_STARTLINE) == 0 && study != NULL && (study->flags & PCRE_STUDY_MAPPED) != 0) fast_forward_start_bits(common, (sljit_uw)study->start_bits, (re->options & PCRE_FIRSTLINE) != 0); } if ((re->flags & PCRE_REQCHSET) != 0) - reqbyte_notfound = search_requested_char(common, re->req_byte, (re->flags & PCRE_FIRSTSET) != 0); + reqbyte_notfound = search_requested_char(common, re->req_char, (re->flags & PCRE_RCH_CASELESS) != 0, (re->flags & PCRE_FIRSTSET) != 0); /* Store the current STR_PTR in OVECTOR(0). */ OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0), STR_PTR, 0); @@ -6265,7 +6536,7 @@ if (common->accept != NULL) /* This means we have a match. Update the ovector. */ copy_ovector(common, re->top_bracket + 1); leave = LABEL(); -sljit_emit_return(compiler, SLJIT_UNUSED, 0); +sljit_emit_return(compiler, SLJIT_MOV, SLJIT_RETURN_REG, 0); empty_match_fallback = LABEL(); compile_fallbackpath(common, rootfallback.top); @@ -6287,7 +6558,7 @@ if ((re->options & PCRE_ANCHORED) == 0) { if (study != NULL && study->minlength > 1) { - OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, study->minlength); + OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(study->minlength)); CMPTO(SLJIT_C_LESS_EQUAL, TMP1, 0, STR_END, 0, mainloop); } else @@ -6297,7 +6568,7 @@ if ((re->options & PCRE_ANCHORED) == 0) { if (study != NULL && study->minlength > 1) { - OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, study->minlength); + OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(study->minlength)); OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, STR_END, 0); COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_GREATER); OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), FIRSTLINE_END); @@ -6409,18 +6680,20 @@ if (common->caselesscmp != NULL) set_jumps(common->caselesscmp, LABEL()); do_caselesscmp(common); } -#ifdef SUPPORT_UTF8 -if (common->utf8readchar != NULL) +#ifdef SUPPORT_UTF +if (common->utfreadchar != NULL) { - set_jumps(common->utf8readchar, LABEL()); - do_utf8readchar(common); + set_jumps(common->utfreadchar, LABEL()); + do_utfreadchar(common); } -if (common->utf8readtype8 != NULL) +#ifdef COMPILE_PCRE8 +if (common->utfreadtype8 != NULL) { - set_jumps(common->utf8readtype8, LABEL()); - do_utf8readtype8(common); + set_jumps(common->utfreadtype8, LABEL()); + do_utfreadtype8(common); } #endif +#endif /* COMPILE_PCRE8 */ #ifdef SUPPORT_UCP if (common->getucd != NULL) { @@ -6459,7 +6732,7 @@ union { void* executable_func; jit_function call_executable_func; } convert_executable_func; -uschar local_area[LOCAL_SPACE_SIZE]; +pcre_uint8 local_area[LOCAL_SPACE_SIZE]; struct sljit_stack local_stack; local_stack.top = (sljit_w)&local_area; @@ -6472,8 +6745,8 @@ return convert_executable_func.call_executable_func(arguments); } int -_pcre_jit_exec(const real_pcre *re, void *executable_func, - PCRE_SPTR subject, int length, int start_offset, int options, +PRIV(jit_exec)(const REAL_PCRE *re, void *executable_func, + const pcre_uchar *subject, int length, int start_offset, int options, int match_limit, int *offsets, int offsetcount) { executable_function *function = (executable_function*)executable_func; @@ -6503,7 +6776,8 @@ workspace. We don't need the workspace here. For compatibility, we limit the number of captured strings in the same way as pcre_exec(), so that the user gets the same result with and without JIT. */ -offsetcount = ((offsetcount - (offsetcount % 3)) * 2)/3; +if (offsetcount != 2) + offsetcount = ((offsetcount - (offsetcount % 3)) * 2) / 3; maxoffsetcount = (re->top_bracket + 1) * 2; if (offsetcount > maxoffsetcount) offsetcount = maxoffsetcount; @@ -6528,7 +6802,7 @@ return retval; } void -_pcre_jit_free(void *executable_func) +PRIV(jit_free)(void *executable_func) { executable_function *function = (executable_function*)executable_func; sljit_free_code(function->executable_func); @@ -6536,13 +6810,24 @@ SLJIT_FREE(function); } int -_pcre_jit_get_size(void *executable_func) +PRIV(jit_get_size)(void *executable_func) { return ((executable_function*)executable_func)->executable_size; } +const char* +PRIV(jit_get_target)(void) +{ +return sljit_get_platform_name(); +} + +#ifdef COMPILE_PCRE8 PCRE_EXP_DECL pcre_jit_stack * pcre_jit_stack_alloc(int startsize, int maxsize) +#else +PCRE_EXP_DECL pcre16_jit_stack * +pcre16_jit_stack_alloc(int startsize, int maxsize) +#endif { if (startsize < 1 || maxsize < 1) return NULL; @@ -6550,17 +6835,27 @@ if (startsize > maxsize) startsize = maxsize; startsize = (startsize + STACK_GROWTH_RATE - 1) & ~(STACK_GROWTH_RATE - 1); maxsize = (maxsize + STACK_GROWTH_RATE - 1) & ~(STACK_GROWTH_RATE - 1); -return (pcre_jit_stack*)sljit_allocate_stack(startsize, maxsize); +return (PUBL(jit_stack)*)sljit_allocate_stack(startsize, maxsize); } +#ifdef COMPILE_PCRE8 PCRE_EXP_DECL void pcre_jit_stack_free(pcre_jit_stack *stack) +#else +PCRE_EXP_DECL void +pcre16_jit_stack_free(pcre16_jit_stack *stack) +#endif { sljit_free_stack((struct sljit_stack*)stack); } +#ifdef COMPILE_PCRE8 PCRE_EXP_DECL void pcre_assign_jit_stack(pcre_extra *extra, pcre_jit_callback callback, void *userdata) +#else +PCRE_EXP_DECL void +pcre16_assign_jit_stack(pcre16_extra *extra, pcre16_jit_callback callback, void *userdata) +#endif { executable_function *function; if (extra != NULL && @@ -6578,22 +6873,37 @@ if (extra != NULL && /* These are dummy functions to avoid linking errors when JIT support is not being compiled. */ +#ifdef COMPILE_PCRE8 PCRE_EXP_DECL pcre_jit_stack * pcre_jit_stack_alloc(int startsize, int maxsize) +#else +PCRE_EXP_DECL pcre16_jit_stack * +pcre16_jit_stack_alloc(int startsize, int maxsize) +#endif { (void)startsize; (void)maxsize; return NULL; } +#ifdef COMPILE_PCRE8 PCRE_EXP_DECL void pcre_jit_stack_free(pcre_jit_stack *stack) +#else +PCRE_EXP_DECL void +pcre16_jit_stack_free(pcre16_jit_stack *stack) +#endif { (void)stack; } +#ifdef COMPILE_PCRE8 PCRE_EXP_DECL void pcre_assign_jit_stack(pcre_extra *extra, pcre_jit_callback callback, void *userdata) +#else +PCRE_EXP_DECL void +pcre16_assign_jit_stack(pcre16_extra *extra, pcre16_jit_callback callback, void *userdata) +#endif { (void)extra; (void)callback; diff --git a/harbour/src/3rd/pcre/pcremktb.c b/harbour/src/3rd/pcre/pcremktb.c index c739b9ab98..8bb753907e 100644 --- a/harbour/src/3rd/pcre/pcremktb.c +++ b/harbour/src/3rd/pcre/pcremktb.c @@ -6,7 +6,7 @@ and semantics are as close as possible to those of the Perl 5 language. Written by Philip Hazel - Copyright (c) 1997-2008 University of Cambridge + Copyright (c) 1997-2012 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without @@ -59,21 +59,26 @@ compilation of dftables.c, in which case the macro DFTABLES is defined. */ /* This function builds a set of character tables for use by PCRE and returns a pointer to them. They are build using the ctype functions, and consequently their contents will depend upon the current locale setting. When compiled as -part of the library, the store is obtained via pcre_malloc(), but when compiled -inside dftables, use malloc(). +part of the library, the store is obtained via PUBL(malloc)(), but when +compiled inside dftables, use malloc(). Arguments: none Returns: pointer to the contiguous block of data */ +#ifdef COMPILE_PCRE8 const unsigned char * pcre_maketables(void) +#else +const unsigned char * +pcre16_maketables(void) +#endif { unsigned char *yield, *p; int i; #ifndef DFTABLES -yield = (unsigned char*)(pcre_malloc)(tables_length); +yield = (unsigned char*)(PUBL(malloc))(tables_length); #else yield = (unsigned char*)malloc(tables_length); #endif diff --git a/harbour/src/3rd/pcre/pcrenewl.c b/harbour/src/3rd/pcre/pcrenewl.c index b2bb8cb72a..2c8b1ac8ef 100644 --- a/harbour/src/3rd/pcre/pcrenewl.c +++ b/harbour/src/3rd/pcre/pcrenewl.c @@ -6,7 +6,7 @@ and semantics are as close as possible to those of the Perl 5 language. Written by Philip Hazel - Copyright (c) 1997-2009 University of Cambridge + Copyright (c) 1997-2012 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without @@ -67,16 +67,25 @@ Arguments: type the newline type endptr pointer to the end of the string lenptr where to return the length - utf8 TRUE if in utf8 mode + utf TRUE if in utf mode Returns: TRUE or FALSE */ BOOL -_pcre_is_newline(USPTR ptr, int type, USPTR endptr, int *lenptr, BOOL utf8) +PRIV(is_newline)(PCRE_PUCHAR ptr, int type, PCRE_PUCHAR endptr, int *lenptr, + BOOL utf) { int c; -if (utf8) { GETCHAR(c, ptr); } else c = *ptr; +(void)utf; +#ifdef SUPPORT_UTF +if (utf) + { + GETCHAR(c, ptr); + } +else +#endif /* SUPPORT_UTF */ + c = *ptr; if (type == NLTYPE_ANYCRLF) switch(c) { @@ -95,9 +104,15 @@ else switch(c) case 0x000c: *lenptr = 1; return TRUE; /* FF */ case 0x000d: *lenptr = (ptr < endptr - 1 && ptr[1] == 0x0a)? 2 : 1; return TRUE; /* CR */ - case 0x0085: *lenptr = utf8? 2 : 1; return TRUE; /* NEL */ +#ifdef COMPILE_PCRE8 + case 0x0085: *lenptr = utf? 2 : 1; return TRUE; /* NEL */ case 0x2028: /* LS */ case 0x2029: *lenptr = 3; return TRUE; /* PS */ +#else + case 0x0085: /* NEL */ + case 0x2028: /* LS */ + case 0x2029: *lenptr = 1; return TRUE; /* PS */ +#endif /* COMPILE_PCRE8 */ default: return FALSE; } } @@ -116,26 +131,27 @@ Arguments: type the newline type startptr pointer to the start of the string lenptr where to return the length - utf8 TRUE if in utf8 mode + utf TRUE if in utf mode Returns: TRUE or FALSE */ BOOL -_pcre_was_newline(USPTR ptr, int type, USPTR startptr, int *lenptr, BOOL utf8) +PRIV(was_newline)(PCRE_PUCHAR ptr, int type, PCRE_PUCHAR startptr, int *lenptr, + BOOL utf) { int c; +(void)utf; ptr--; -#ifdef SUPPORT_UTF8 -if (utf8) +#ifdef SUPPORT_UTF +if (utf) { BACKCHAR(ptr); GETCHAR(c, ptr); } -else c = *ptr; -#else /* no UTF-8 support */ -c = *ptr; -#endif /* SUPPORT_UTF8 */ +else +#endif /* SUPPORT_UTF */ + c = *ptr; if (type == NLTYPE_ANYCRLF) switch(c) { @@ -152,9 +168,15 @@ else switch(c) case 0x000b: /* VT */ case 0x000c: /* FF */ case 0x000d: *lenptr = 1; return TRUE; /* CR */ - case 0x0085: *lenptr = utf8? 2 : 1; return TRUE; /* NEL */ +#ifdef COMPILE_PCRE8 + case 0x0085: *lenptr = utf? 2 : 1; return TRUE; /* NEL */ case 0x2028: /* LS */ case 0x2029: *lenptr = 3; return TRUE; /* PS */ +#else + case 0x0085: /* NEL */ + case 0x2028: /* LS */ + case 0x2029: *lenptr = 1; return TRUE; /* PS */ +#endif /* COMPILE_PCRE8 */ default: return FALSE; } } diff --git a/harbour/src/3rd/pcre/pcreoutf.c b/harbour/src/3rd/pcre/pcreoutf.c index 95511751f4..a552557b93 100644 --- a/harbour/src/3rd/pcre/pcreoutf.c +++ b/harbour/src/3rd/pcre/pcreoutf.c @@ -6,7 +6,7 @@ and semantics are as close as possible to those of the Perl 5 language. Written by Philip Hazel - Copyright (c) 1997-2008 University of Cambridge + Copyright (c) 1997-2012 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without @@ -52,35 +52,45 @@ character value into a UTF8 string. */ * Convert character value to UTF-8 * *************************************************/ -/* This function takes an integer value in the range 0 - 0x7fffffff -and encodes it as a UTF-8 character in 0 to 6 bytes. +/* This function takes an integer value in the range 0 - 0x10ffff +and encodes it as a UTF-8 character in 1 to 6 pcre_uchars. Arguments: cvalue the character value - buffer pointer to buffer for result - at least 6 bytes long + buffer pointer to buffer for result - at least 6 pcre_uchars long Returns: number of characters placed in the buffer */ int -_pcre_ord2utf8(int cvalue, uschar *buffer) +PRIV(ord2utf)(pcre_uint32 cvalue, pcre_uchar *buffer) { -#ifdef SUPPORT_UTF8 +#ifdef SUPPORT_UTF + register int i, j; -for (i = 0; i < _pcre_utf8_table1_size; i++) - if (cvalue <= _pcre_utf8_table1[i]) break; + +/* Checking invalid cvalue character, encoded as invalid UTF-16 character. +Should never happen in practice. */ +if ((cvalue & 0xf800) == 0xd800 || cvalue >= 0x110000) + cvalue = 0xfffe; + +for (i = 0; i < PRIV(utf8_table1_size); i++) + if ((int)cvalue <= PRIV(utf8_table1)[i]) break; buffer += i; for (j = i; j > 0; j--) { *buffer-- = 0x80 | (cvalue & 0x3f); cvalue >>= 6; } -*buffer = _pcre_utf8_table2[i] | cvalue; +*buffer = PRIV(utf8_table2)[i] | cvalue; return i + 1; + #else + (void)(cvalue); /* Keep compiler happy; this function won't ever be */ -(void)(buffer); /* called when SUPPORT_UTF8 is not defined. */ +(void)(buffer); /* called when SUPPORT_UTF is not defined. */ return 0; + #endif } diff --git a/harbour/src/3rd/pcre/pcreprni.h b/harbour/src/3rd/pcre/pcreprni.c similarity index 69% rename from harbour/src/3rd/pcre/pcreprni.h rename to harbour/src/3rd/pcre/pcreprni.c index 5074cd5f92..6038c7d0c2 100644 --- a/harbour/src/3rd/pcre/pcreprni.h +++ b/harbour/src/3rd/pcre/pcreprni.c @@ -6,7 +6,7 @@ and semantics are as close as possible to those of the Perl 5 language. Written by Philip Hazel - Copyright (c) 1997-2010 University of Cambridge + Copyright (c) 1997-2012 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without @@ -44,16 +44,50 @@ local functions. This source file is used in two places: (1) It is #included by pcre_compile.c when it is compiled in debugging mode (PCRE_DEBUG defined in pcre_internal.h). It is not included in production -compiles. +compiles. In this case PCRE_INCLUDED is defined. -(2) It is always #included by pcretest.c, which can be asked to print out a -compiled regex for debugging purposes. */ +(2) It is also compiled separately and linked with pcretest.c, which can be +asked to print out a compiled regex for debugging purposes. */ +#ifndef PCRE_INCLUDED + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +/* For pcretest program. */ +#define PRIV(name) name + +/* We have to include pcre_internal.h because we need the internal info for +displaying the results of pcre_study() and we also need to know about the +internal macros, structures, and other internal data values; pcretest has +"inside information" compared to a program that strictly follows the PCRE API. + +Although pcre_internal.h does itself include pcre.h, we explicitly include it +here before pcre_internal.h so that the PCRE_EXP_xxx macros get set +appropriately for an application, not for building PCRE. */ + +#include "pcre.h" +#include "pcreinal.h" + +/* These are the funtions that are contained within. It doesn't seem worth +having a separate .h file just for this. */ + +#endif /* PCRE_INCLUDED */ + +#ifdef PCRE_INCLUDED +static /* Keep the following function as private. */ +#endif +#ifdef COMPILE_PCRE8 +void pcre_printint(pcre *external_re, FILE *f, BOOL print_lengths); +#else +void pcre16_printint(pcre *external_re, FILE *f, BOOL print_lengths); +#endif /* Macro that decides whether a character should be output as a literal or in hexadecimal. We don't use isprint() because that can vary from system to system (even without the use of locales) and we want the output always to be the same, -for testing purposes. This macro is used in pcretest as well as in this file. */ +for testing purposes. */ #ifdef EBCDIC #define PRINTABLE(c) ((c) >= 64 && (c) < 255) @@ -63,7 +97,13 @@ for testing purposes. This macro is used in pcretest as well as in this file. */ /* The table of operator names. */ -static const char *OP_names[] = { OP_NAME_LIST }; +static const char *priv_OP_names[] = { OP_NAME_LIST }; + +/* This table of operator lengths is not actually used by the working code, +but its size is needed for a check that ensures it is the correct size for the +number of opcodes (thus catching update omissions). */ + +static const pcre_uint8 priv_OP_lengths[] = { OP_LENGTHS }; @@ -72,17 +112,23 @@ static const char *OP_names[] = { OP_NAME_LIST }; *************************************************/ static int -print_char(FILE *f, uschar *ptr, BOOL utf8) +print_char(FILE *f, pcre_uchar *ptr, BOOL utf) { int c = *ptr; -#ifndef SUPPORT_UTF8 -utf8 = utf8; /* Avoid compiler warning */ -if (PRINTABLE(c)) fprintf(f, "%c", c); else fprintf(f, "\\x%02x", c); +#ifndef SUPPORT_UTF + +(void)utf; /* Avoid compiler warning */ +if (PRINTABLE(c)) fprintf(f, "%c", c); +else if (c <= 0xff) fprintf(f, "\\x%02x", c); +else fprintf(f, "\\x{%x}", c); return 0; #else -if (!utf8 || (c & 0xc0) != 0xc0) + +#ifdef COMPILE_PCRE8 + +if (!utf || (c & 0xc0) != 0xc0) { if (PRINTABLE(c)) fprintf(f, "%c", c); else fprintf(f, "\\x%02x", c); return 0; @@ -90,9 +136,9 @@ if (!utf8 || (c & 0xc0) != 0xc0) else { int i; - int a = _pcre_utf8_table4[c & 0x3f]; /* Number of additional bytes */ + int a = PRIV(utf8_table4)[c & 0x3f]; /* Number of additional bytes */ int s = 6*a; - c = (c & _pcre_utf8_table3[a]) << s; + c = (c & PRIV(utf8_table3)[a]) << s; for (i = 1; i <= a; i++) { /* This is a check for malformed UTF-8; it should only occur if the sanity @@ -110,13 +156,58 @@ else s -= 6; c |= (ptr[i] & 0x3f) << s; } - if (c < 128) fprintf(f, "\\x%02x", c); else fprintf(f, "\\x{%x}", c); + fprintf(f, "\\x{%x}", c); return a; } -#endif + +#else + +#ifdef COMPILE_PCRE16 + +if (!utf || (c & 0xfc00) != 0xd800) + { + if (PRINTABLE(c)) fprintf(f, "%c", c); + else if (c <= 0xff) fprintf(f, "\\x%02x", c); + else fprintf(f, "\\x{%x}", c); + return 0; + } +else + { + /* This is a check for malformed UTF-16; it should only occur if the sanity + check has been turned off. Rather than swallow a low surrogate, just stop if + we hit a bad one. Print it with \X instead of \x as an indication. */ + + if ((ptr[1] & 0xfc00) != 0xdc00) + { + fprintf(f, "\\X{%x}", c); + return 0; + } + + c = (((c & 0x3ff) << 10) | (ptr[1] & 0x3ff)) + 0x10000; + fprintf(f, "\\x{%x}", c); + return 1; + } + +#endif /* COMPILE_PCRE16 */ + +#endif /* COMPILE_PCRE8 */ + +#endif /* SUPPORT_UTF */ } +/************************************************* +* Print uchar string (regardless of utf) * +*************************************************/ +static void +print_puchar(FILE *f, PCRE_PUCHAR ptr) +{ +while (*ptr != '\0') + { + register int c = *ptr++; + if (PRINTABLE(c)) fprintf(f, "%c", c); else fprintf(f, "\\x{%x}", c); + } +} /************************************************* * Find Unicode property name * @@ -127,11 +218,11 @@ get_ucpname(int ptype, int pvalue) { #ifdef SUPPORT_UCP int i; -for (i = _pcre_utt_size - 1; i >= 0; i--) +for (i = PRIV(utt_size) - 1; i >= 0; i--) { - if (ptype == _pcre_utt[i].type && pvalue == _pcre_utt[i].value) break; + if (ptype == PRIV(utt)[i].type && pvalue == PRIV(utt)[i].value) break; } -return (i >= 0)? _pcre_utt_names + _pcre_utt[i].name_offset : "??"; +return (i >= 0)? PRIV(utt_names) + PRIV(utt)[i].name_offset : "??"; #else /* It gets harder and harder to shut off unwanted compiler warnings. */ ptype = ptype * pvalue; @@ -151,12 +242,20 @@ print_lengths flag controls whether offsets and lengths of items are printed. They can be turned off from pcretest so that automatic tests on bytecode can be written that do not depend on the value of LINK_SIZE. */ -static void +#ifdef PCRE_INCLUDED +static /* Keep the following function as private. */ +#endif +#ifdef COMPILE_PCRE8 +void pcre_printint(pcre *external_re, FILE *f, BOOL print_lengths) +#else +void +pcre16_printint(pcre *external_re, FILE *f, BOOL print_lengths) +#endif { -real_pcre *re = (real_pcre *)external_re; -uschar *codestart, *code; -BOOL utf8; +REAL_PCRE *re = (REAL_PCRE *)external_re; +pcre_uchar *codestart, *code; +BOOL utf; unsigned int options = re->options; int offset = re->name_table_offset; @@ -174,12 +273,13 @@ if (re->magic_number != MAGIC_NUMBER) ((options >> 24) & 0x000000ff); } -code = codestart = (uschar *)re + offset + count * size; -utf8 = (options & PCRE_UTF8) != 0; +code = codestart = (pcre_uchar *)re + offset + count * size; +/* PCRE_UTF16 has the same value as PCRE_UTF8. */ +utf = (options & PCRE_UTF8) != 0; for(;;) { - uschar *ccode; + pcre_uchar *ccode; const char *flag = " "; int c; int extra = 0; @@ -193,25 +293,20 @@ for(;;) { /* ========================================================================== */ /* These cases are never obeyed. This is a fudge that causes a compile- - time error if the vectors OP_names or _pcre_OP_lengths, which are indexed + time error if the vectors OP_names or OP_lengths, which are indexed by opcode, are not the correct length. It seems to be the only way to do such a check at compile time, as the sizeof() operator does not work in - the C preprocessor. We do this while compiling pcretest, because that - #includes pcre_tables.c, which holds _pcre_OP_lengths. We can't do this - when building pcre_compile.c with PCRE_DEBUG set, because it doesn't then - know the size of _pcre_OP_lengths. */ + the C preprocessor. */ -#ifdef COMPILING_PCRETEST case OP_TABLE_LENGTH: case OP_TABLE_LENGTH + - ((sizeof(OP_names)/sizeof(const char *) == OP_TABLE_LENGTH) && - (sizeof(_pcre_OP_lengths) == OP_TABLE_LENGTH)): + ((sizeof(priv_OP_names)/sizeof(const char *) == OP_TABLE_LENGTH) && + (sizeof(priv_OP_lengths) == OP_TABLE_LENGTH)): break; -#endif /* ========================================================================== */ case OP_END: - fprintf(f, " %s\n", OP_names[*code]); + fprintf(f, " %s\n", priv_OP_names[*code]); fprintf(f, "------------------------------------------------------------------\n"); return; @@ -220,7 +315,7 @@ for(;;) do { code++; - code += 1 + print_char(f, code, utf8); + code += 1 + print_char(f, code, utf); } while (*code == OP_CHAR); fprintf(f, "\n"); @@ -231,7 +326,7 @@ for(;;) do { code++; - code += 1 + print_char(f, code, utf8); + code += 1 + print_char(f, code, utf); } while (*code == OP_CHARI); fprintf(f, "\n"); @@ -243,7 +338,7 @@ for(;;) case OP_SCBRAPOS: if (print_lengths) fprintf(f, "%3d ", GET(code, 1)); else fprintf(f, " "); - fprintf(f, "%s %d", OP_names[*code], GET2(code, 1+LINK_SIZE)); + fprintf(f, "%s %d", priv_OP_names[*code], GET2(code, 1+LINK_SIZE)); break; case OP_BRA: @@ -266,16 +361,16 @@ for(;;) case OP_REVERSE: if (print_lengths) fprintf(f, "%3d ", GET(code, 1)); else fprintf(f, " "); - fprintf(f, "%s", OP_names[*code]); + fprintf(f, "%s", priv_OP_names[*code]); break; case OP_CLOSE: - fprintf(f, " %s %d", OP_names[*code], GET2(code, 1)); + fprintf(f, " %s %d", priv_OP_names[*code], GET2(code, 1)); break; case OP_CREF: case OP_NCREF: - fprintf(f, "%3d %s", GET2(code,1), OP_names[*code]); + fprintf(f, "%3d %s", GET2(code,1), priv_OP_names[*code]); break; case OP_RREF: @@ -330,15 +425,15 @@ for(;;) fprintf(f, " %s ", flag); if (*code >= OP_TYPESTAR) { - fprintf(f, "%s", OP_names[code[1]]); + fprintf(f, "%s", priv_OP_names[code[1]]); if (code[1] == OP_PROP || code[1] == OP_NOTPROP) { fprintf(f, " %s ", get_ucpname(code[2], code[3])); extra = 2; } } - else extra = print_char(f, code+1, utf8); - fprintf(f, "%s", OP_names[*code]); + else extra = print_char(f, code+1, utf); + fprintf(f, "%s", priv_OP_names[*code]); break; case OP_EXACTI: @@ -352,7 +447,7 @@ for(;;) case OP_MINUPTO: case OP_POSUPTO: fprintf(f, " %s ", flag); - extra = print_char(f, code+3, utf8); + extra = print_char(f, code + 1 + IMM2_SIZE, utf); fprintf(f, "{"); if (*code != OP_EXACT && *code != OP_EXACTI) fprintf(f, "0,"); fprintf(f, "%d}", GET2(code,1)); @@ -364,10 +459,11 @@ for(;;) case OP_TYPEUPTO: case OP_TYPEMINUPTO: case OP_TYPEPOSUPTO: - fprintf(f, " %s", OP_names[code[3]]); - if (code[3] == OP_PROP || code[3] == OP_NOTPROP) + fprintf(f, " %s", priv_OP_names[code[1 + IMM2_SIZE]]); + if (code[1 + IMM2_SIZE] == OP_PROP || code[1 + IMM2_SIZE] == OP_NOTPROP) { - fprintf(f, " %s ", get_ucpname(code[4], code[5])); + fprintf(f, " %s ", get_ucpname(code[1 + IMM2_SIZE + 1], + code[1 + IMM2_SIZE + 2])); extra = 2; } fprintf(f, "{"); @@ -383,7 +479,10 @@ for(;;) case OP_NOT: c = code[1]; if (PRINTABLE(c)) fprintf(f, " %s [^%c]", flag, c); - else fprintf(f, " %s [^\\x%02x]", flag, c); + else if (utf || c > 0xff) + fprintf(f, " %s [^\\x{%02x}]", flag, c); + else + fprintf(f, " %s [^\\x%02x]", flag, c); break; case OP_NOTSTARI: @@ -410,7 +509,7 @@ for(;;) c = code[1]; if (PRINTABLE(c)) fprintf(f, " %s [^%c]", flag, c); else fprintf(f, " %s [^\\x%02x]", flag, c); - fprintf(f, "%s", OP_names[*code]); + fprintf(f, "%s", priv_OP_names[*code]); break; case OP_NOTEXACTI: @@ -424,7 +523,7 @@ for(;;) case OP_NOTUPTO: case OP_NOTMINUPTO: case OP_NOTPOSUPTO: - c = code[3]; + c = code[1 + IMM2_SIZE]; if (PRINTABLE(c)) fprintf(f, " %s [^%c]{", flag, c); else fprintf(f, " %s [^\\x%02x]{", flag, c); if (*code != OP_NOTEXACT && *code != OP_NOTEXACTI) fprintf(f, "0,"); @@ -437,7 +536,7 @@ for(;;) case OP_RECURSE: if (print_lengths) fprintf(f, "%3d ", GET(code, 1)); else fprintf(f, " "); - fprintf(f, "%s", OP_names[*code]); + fprintf(f, "%s", priv_OP_names[*code]); break; case OP_REFI: @@ -445,22 +544,22 @@ for(;;) /* Fall through */ case OP_REF: fprintf(f, " %s \\%d", flag, GET2(code,1)); - ccode = code + _pcre_OP_lengths[*code]; + ccode = code + priv_OP_lengths[*code]; goto CLASS_REF_REPEAT; case OP_CALLOUT: - fprintf(f, " %s %d %d %d", OP_names[*code], code[1], GET(code,2), + fprintf(f, " %s %d %d %d", priv_OP_names[*code], code[1], GET(code,2), GET(code, 2 + LINK_SIZE)); break; case OP_PROP: case OP_NOTPROP: - fprintf(f, " %s %s", OP_names[*code], get_ucpname(code[1], code[2])); + fprintf(f, " %s %s", priv_OP_names[*code], get_ucpname(code[1], code[2])); break; - /* OP_XCLASS can only occur in UTF-8 mode. However, there's no harm in - having this code always here, and it makes it less messy without all those - #ifdefs. */ + /* OP_XCLASS can only occur in UTF or PCRE16 modes. However, there's no + harm in having this code always here, and it makes it less messy without + all those #ifdefs. */ case OP_CLASS: case OP_NCLASS: @@ -468,6 +567,7 @@ for(;;) { int i, min, max; BOOL printmap; + pcre_uint8 *map; fprintf(f, " ["); @@ -488,13 +588,14 @@ for(;;) if (printmap) { + map = (pcre_uint8 *)ccode; for (i = 0; i < 256; i++) { - if ((ccode[i/8] & (1 << (i&7))) != 0) + if ((map[i/8] & (1 << (i&7))) != 0) { int j; for (j = i+1; j < 256; j++) - if ((ccode[j/8] & (1 << (j&7))) == 0) break; + if ((map[j/8] & (1 << (j&7))) == 0) break; if (i == '-' || i == ']') fprintf(f, "\\"); if (PRINTABLE(i)) fprintf(f, "%c", i); else fprintf(f, "\\x%02x", i); @@ -508,7 +609,7 @@ for(;;) i = j; } } - ccode += 32; + ccode += 32 / sizeof(pcre_uchar); } /* For an XCLASS there is always some additional data */ @@ -532,17 +633,17 @@ for(;;) } else { - ccode += 1 + print_char(f, ccode, TRUE); + ccode += 1 + print_char(f, ccode, utf); if (ch == XCL_RANGE) { fprintf(f, "-"); - ccode += 1 + print_char(f, ccode, TRUE); + ccode += 1 + print_char(f, ccode, utf); } } } } - /* Indicate a non-UTF8 class which was created by negation */ + /* Indicate a non-UTF class which was created by negation */ fprintf(f, "]%s", (*code == OP_NCLASS)? " (neg)" : ""); @@ -557,18 +658,18 @@ for(;;) case OP_CRMINPLUS: case OP_CRQUERY: case OP_CRMINQUERY: - fprintf(f, "%s", OP_names[*ccode]); - extra += _pcre_OP_lengths[*ccode]; + fprintf(f, "%s", priv_OP_names[*ccode]); + extra += priv_OP_lengths[*ccode]; break; case OP_CRRANGE: case OP_CRMINRANGE: min = GET2(ccode,1); - max = GET2(ccode,3); + max = GET2(ccode,1 + IMM2_SIZE); if (max == 0) fprintf(f, "{%d,}", min); else fprintf(f, "{%d,%d}", min, max); if (*ccode == OP_CRMINRANGE) fprintf(f, "?"); - extra += _pcre_OP_lengths[*ccode]; + extra += priv_OP_lengths[*ccode]; break; /* Do nothing if it's not a repeat; this code stops picky compilers @@ -583,17 +684,14 @@ for(;;) case OP_MARK: case OP_PRUNE_ARG: case OP_SKIP_ARG: - fprintf(f, " %s %s", OP_names[*code], code + 2); + case OP_THEN_ARG: + fprintf(f, " %s ", priv_OP_names[*code]); + print_puchar(f, code + 2); extra += code[1]; break; case OP_THEN: - fprintf(f, " %s", OP_names[*code]); - break; - - case OP_THEN_ARG: - fprintf(f, " %s %s", OP_names[*code], code + 2); - extra += code[1]; + fprintf(f, " %s", priv_OP_names[*code]); break; case OP_CIRCM: @@ -604,11 +702,11 @@ for(;;) /* Anything else is just an item with no data, but possibly a flag. */ default: - fprintf(f, " %s %s", flag, OP_names[*code]); + fprintf(f, " %s %s", flag, priv_OP_names[*code]); break; } - code += _pcre_OP_lengths[*code] + extra; + code += priv_OP_lengths[*code] + extra; fprintf(f, "\n"); } } diff --git a/harbour/src/3rd/pcre/pcrerefc.c b/harbour/src/3rd/pcre/pcrerefc.c index cba1dc9169..8a0f0c8e19 100644 --- a/harbour/src/3rd/pcre/pcrerefc.c +++ b/harbour/src/3rd/pcre/pcrerefc.c @@ -6,7 +6,7 @@ and semantics are as close as possible to those of the Perl 5 language. Written by Philip Hazel - Copyright (c) 1997-2008 University of Cambridge + Copyright (c) 1997-2012 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without @@ -68,11 +68,18 @@ Returns: the (possibly updated) count value (a non-negative number), or a negative error number */ +#ifdef COMPILE_PCRE8 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION pcre_refcount(pcre *argument_re, int adjust) +#else +PCRE_EXP_DEFN int PCRE_CALL_CONVENTION +pcre16_refcount(pcre16 *argument_re, int adjust) +#endif { -real_pcre *re = (real_pcre *)argument_re; +REAL_PCRE *re = (REAL_PCRE *)argument_re; if (re == NULL) return PCRE_ERROR_NULL; +if (re->magic_number != MAGIC_NUMBER) return PCRE_ERROR_BADMAGIC; +if ((re->flags & PCRE_MODE) == 0) return PCRE_ERROR_BADMODE; re->ref_count = (-adjust > re->ref_count)? 0 : (adjust + re->ref_count > 65535)? 65535 : re->ref_count + adjust; diff --git a/harbour/src/3rd/pcre/pcrestud.c b/harbour/src/3rd/pcre/pcrestud.c index 9ac6ae7897..4a05275c98 100644 --- a/harbour/src/3rd/pcre/pcrestud.c +++ b/harbour/src/3rd/pcre/pcrestud.c @@ -6,7 +6,7 @@ and semantics are as close as possible to those of the Perl 5 language. Written by Philip Hazel - Copyright (c) 1997-2010 University of Cambridge + Copyright (c) 1997-2012 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without @@ -78,17 +78,18 @@ Returns: the minimum length */ static int -find_minlength(const uschar *code, const uschar *startcode, int options, +find_minlength(const pcre_uchar *code, const pcre_uchar *startcode, int options, int recurse_depth) { int length = -1; -BOOL utf8 = (options & PCRE_UTF8) != 0; +/* PCRE_UTF16 has the same value as PCRE_UTF8. */ +BOOL utf = (options & PCRE_UTF8) != 0; BOOL had_recurse = FALSE; register int branchlength = 0; -register uschar *cc = (uschar *)code + 1 + LINK_SIZE; +register pcre_uchar *cc = (pcre_uchar *)code + 1 + LINK_SIZE; if (*code == OP_CBRA || *code == OP_SCBRA || - *code == OP_CBRAPOS || *code == OP_SCBRAPOS) cc += 2; + *code == OP_CBRAPOS || *code == OP_SCBRAPOS) cc += IMM2_SIZE; /* Scan along the opcodes for this branch. If we get to the end of the branch, check the length against that of the other branches. */ @@ -96,7 +97,7 @@ branch, check the length against that of the other branches. */ for (;;) { int d, min; - uschar *cs, *ce; + pcre_uchar *cs, *ce; register int op = *cc; switch (op) @@ -189,7 +190,7 @@ for (;;) case OP_DOLLM: case OP_NOT_WORD_BOUNDARY: case OP_WORD_BOUNDARY: - cc += _pcre_OP_lengths[*cc]; + cc += PRIV(OP_lengths)[*cc]; break; /* Skip over a subpattern that has a {0} or {0,x} quantifier */ @@ -198,7 +199,7 @@ for (;;) case OP_BRAMINZERO: case OP_BRAPOSZERO: case OP_SKIPZERO: - cc += _pcre_OP_lengths[*cc]; + cc += PRIV(OP_lengths)[*cc]; do cc += GET(cc, 1); while (*cc == OP_ALT); cc += 1 + LINK_SIZE; break; @@ -223,8 +224,8 @@ for (;;) case OP_NOTPOSPLUSI: branchlength++; cc += 2; -#ifdef SUPPORT_UTF8 - if (utf8 && cc[-1] >= 0xc0) cc += _pcre_utf8_table4[cc[-1] & 0x3f]; +#ifdef SUPPORT_UTF + if (utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]); #endif break; @@ -243,15 +244,16 @@ for (;;) case OP_NOTEXACT: case OP_NOTEXACTI: branchlength += GET2(cc,1); - cc += 4; -#ifdef SUPPORT_UTF8 - if (utf8 && cc[-1] >= 0xc0) cc += _pcre_utf8_table4[cc[-1] & 0x3f]; + cc += 2 + IMM2_SIZE; +#ifdef SUPPORT_UTF + if (utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]); #endif break; case OP_TYPEEXACT: branchlength += GET2(cc,1); - cc += (cc[3] == OP_PROP || cc[3] == OP_NOTPROP)? 6 : 4; + cc += 2 + IMM2_SIZE + ((cc[1 + IMM2_SIZE] == OP_PROP + || cc[1 + IMM2_SIZE] == OP_NOTPROP)? 2 : 0); break; /* Handle single-char non-literal matchers */ @@ -291,8 +293,8 @@ for (;;) appear, but leave the code, just in case.) */ case OP_ANYBYTE: -#ifdef SUPPORT_UTF8 - if (utf8) return -1; +#ifdef SUPPORT_UTF + if (utf) return -1; #endif branchlength++; cc++; @@ -308,27 +310,28 @@ for (;;) case OP_TYPEPOSSTAR: case OP_TYPEPOSQUERY: if (cc[1] == OP_PROP || cc[1] == OP_NOTPROP) cc += 2; - cc += _pcre_OP_lengths[op]; + cc += PRIV(OP_lengths)[op]; break; case OP_TYPEUPTO: case OP_TYPEMINUPTO: case OP_TYPEPOSUPTO: - if (cc[3] == OP_PROP || cc[3] == OP_NOTPROP) cc += 2; - cc += _pcre_OP_lengths[op]; + if (cc[1 + IMM2_SIZE] == OP_PROP + || cc[1 + IMM2_SIZE] == OP_NOTPROP) cc += 2; + cc += PRIV(OP_lengths)[op]; break; /* Check a class for variable quantification */ -#ifdef SUPPORT_UTF8 +#if defined SUPPORT_UTF || !defined COMPILE_PCRE8 case OP_XCLASS: - cc += GET(cc, 1) - 33; + cc += GET(cc, 1) - PRIV(OP_lengths)[OP_CLASS]; /* Fall through */ #endif case OP_CLASS: case OP_NCLASS: - cc += 33; + cc += PRIV(OP_lengths)[OP_CLASS]; switch (*cc) { @@ -347,7 +350,7 @@ for (;;) case OP_CRRANGE: case OP_CRMINRANGE: branchlength += GET2(cc,1); - cc += 5; + cc += 1 + 2 * IMM2_SIZE; break; default: @@ -372,7 +375,7 @@ for (;;) case OP_REFI: if ((options & PCRE_JAVASCRIPT_COMPAT) == 0) { - ce = cs = (uschar *)_pcre_find_bracket(startcode, utf8, GET2(cc, 1)); + ce = cs = (pcre_uchar *)PRIV(find_bracket)(startcode, utf, GET2(cc, 1)); if (cs == NULL) return -2; do ce += GET(ce, 1); while (*ce == OP_ALT); if (cc > cs && cc < ce) @@ -386,7 +389,7 @@ for (;;) } } else d = 0; - cc += 3; + cc += 1 + IMM2_SIZE; /* Handle repeated back references */ @@ -409,7 +412,7 @@ for (;;) case OP_CRRANGE: case OP_CRMINRANGE: min = GET2(cc, 1); - cc += 5; + cc += 1 + 2 * IMM2_SIZE; break; default: @@ -424,7 +427,7 @@ for (;;) caught by a recursion depth count. */ case OP_RECURSE: - cs = ce = (uschar *)startcode + GET(cc, 1); + cs = ce = (pcre_uchar *)startcode + GET(cc, 1); do ce += GET(ce, 1); while (*ce == OP_ALT); if ((cc > cs && cc < ce) || recurse_depth > 10) had_recurse = TRUE; @@ -482,9 +485,9 @@ for (;;) case OP_NOTPOSQUERY: case OP_NOTPOSQUERYI: - cc += _pcre_OP_lengths[op]; -#ifdef SUPPORT_UTF8 - if (utf8 && cc[-1] >= 0xc0) cc += _pcre_utf8_table4[cc[-1] & 0x3f]; + cc += PRIV(OP_lengths)[op]; +#ifdef SUPPORT_UTF + if (utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]); #endif break; @@ -494,7 +497,7 @@ for (;;) case OP_PRUNE_ARG: case OP_SKIP_ARG: case OP_THEN_ARG: - cc += _pcre_OP_lengths[op] + cc[1]; + cc += PRIV(OP_lengths)[op] + cc[1]; break; /* The remaining opcodes are just skipped over. */ @@ -506,7 +509,7 @@ for (;;) case OP_SET_SOM: case OP_SKIP: case OP_THEN: - cc += _pcre_OP_lengths[op]; + cc += PRIV(OP_lengths)[op]; break; /* This should not occur: we list all opcodes explicitly so that when @@ -535,29 +538,30 @@ Arguments: p points to the character caseless the caseless flag cd the block with char table pointers - utf8 TRUE for UTF-8 mode + utf TRUE for UTF-8 / UTF-16 mode Returns: pointer after the character */ -static const uschar * -set_table_bit(uschar *start_bits, const uschar *p, BOOL caseless, - compile_data *cd, BOOL utf8) +static const pcre_uchar * +set_table_bit(pcre_uint8 *start_bits, const pcre_uchar *p, BOOL caseless, + compile_data *cd, BOOL utf) { unsigned int c = *p; +#ifdef COMPILE_PCRE8 SET_BIT(c); -#ifdef SUPPORT_UTF8 -if (utf8 && c > 127) +#ifdef SUPPORT_UTF +if (utf && c > 127) { GETCHARINC(c, p); #ifdef SUPPORT_UCP if (caseless) { - uschar buff[8]; + pcre_uchar buff[6]; c = UCD_OTHERCASE(c); - (void)_pcre_ord2utf8(c, buff); + (void)PRIV(ord2utf)(c, buff); SET_BIT(buff[0]); } #endif @@ -569,6 +573,36 @@ if (utf8 && c > 127) if (caseless && (cd->ctypes[c] & ctype_letter) != 0) SET_BIT(cd->fcc[c]); return p + 1; +#endif + +#ifdef COMPILE_PCRE16 +if (c > 0xff) + { + c = 0xff; + caseless = FALSE; + } +SET_BIT(c); + +#ifdef SUPPORT_UTF +if (utf && c > 127) + { + GETCHARINC(c, p); +#ifdef SUPPORT_UCP + if (caseless) + { + c = UCD_OTHERCASE(c); + if (c > 0xff) + c = 0xff; + SET_BIT(c); + } +#endif + return p; + } +#endif + +if (caseless && (cd->ctypes[c] & ctype_letter) != 0) SET_BIT(cd->fcc[c]); +return p + 1; +#endif } @@ -594,21 +628,23 @@ Returns: nothing */ static void -set_type_bits(uschar *start_bits, int cbit_type, int table_limit, +set_type_bits(pcre_uint8 *start_bits, int cbit_type, int table_limit, compile_data *cd) { register int c; for (c = 0; c < table_limit; c++) start_bits[c] |= cd->cbits[c+cbit_type]; +#if defined SUPPORT_UTF && defined COMPILE_PCRE8 if (table_limit == 32) return; for (c = 128; c < 256; c++) { if ((cd->cbits[c/8] & (1 << (c&7))) != 0) { - uschar buff[8]; - (void)_pcre_ord2utf8(c, buff); + pcre_uchar buff[6]; + (void)PRIV(ord2utf)(c, buff); SET_BIT(buff[0]); } } +#endif } @@ -634,12 +670,14 @@ Returns: nothing */ static void -set_nottype_bits(uschar *start_bits, int cbit_type, int table_limit, +set_nottype_bits(pcre_uint8 *start_bits, int cbit_type, int table_limit, compile_data *cd) { register int c; for (c = 0; c < table_limit; c++) start_bits[c] |= ~cd->cbits[c+cbit_type]; +#if defined SUPPORT_UTF && defined COMPILE_PCRE8 if (table_limit != 32) for (c = 24; c < 32; c++) start_bits[c] = 0xff; +#endif } @@ -659,7 +697,7 @@ function fails unless the result is SSB_DONE. Arguments: code points to an expression start_bits points to a 32-byte table, initialized to 0 - utf8 TRUE if in UTF-8 mode + utf TRUE if in UTF-8 / UTF-16 mode cd the block with char table pointers Returns: SSB_FAIL => Failed to find any starting bytes @@ -669,12 +707,16 @@ Returns: SSB_FAIL => Failed to find any starting bytes */ static int -set_start_bits(const uschar *code, uschar *start_bits, BOOL utf8, +set_start_bits(const pcre_uchar *code, pcre_uint8 *start_bits, BOOL utf, compile_data *cd) { register int c; int yield = SSB_DONE; -int table_limit = utf8? 16:32; +#if defined SUPPORT_UTF && defined COMPILE_PCRE8 +int table_limit = utf? 16:32; +#else +int table_limit = 32; +#endif #if 0 /* ========================================================================= */ @@ -696,10 +738,10 @@ volatile int dummy; do { BOOL try_next = TRUE; - const uschar *tcode = code + 1 + LINK_SIZE; + const pcre_uchar *tcode = code + 1 + LINK_SIZE; if (*code == OP_CBRA || *code == OP_SCBRA || - *code == OP_CBRAPOS || *code == OP_SCBRAPOS) tcode += 2; + *code == OP_CBRAPOS || *code == OP_SCBRAPOS) tcode += IMM2_SIZE; while (try_next) /* Loop for items in this branch */ { @@ -785,7 +827,9 @@ do case OP_SOM: case OP_THEN: case OP_THEN_ARG: +#if defined SUPPORT_UTF || !defined COMPILE_PCRE8 case OP_XCLASS: +#endif return SSB_FAIL; /* We can ignore word boundary tests. */ @@ -811,7 +855,7 @@ do case OP_ONCE: case OP_ONCE_NC: case OP_ASSERT: - rc = set_start_bits(tcode, start_bits, utf8, cd); + rc = set_start_bits(tcode, start_bits, utf, cd); if (rc == SSB_FAIL || rc == SSB_UNKNOWN) return rc; if (rc == SSB_DONE) try_next = FALSE; else { @@ -858,7 +902,7 @@ do case OP_BRAZERO: case OP_BRAMINZERO: case OP_BRAPOSZERO: - rc = set_start_bits(++tcode, start_bits, utf8, cd); + rc = set_start_bits(++tcode, start_bits, utf, cd); if (rc == SSB_FAIL || rc == SSB_UNKNOWN) return rc; /* ========================================================================= See the comment at the head of this function concerning the next line, @@ -885,7 +929,7 @@ do case OP_QUERY: case OP_MINQUERY: case OP_POSQUERY: - tcode = set_table_bit(start_bits, tcode + 1, FALSE, cd, utf8); + tcode = set_table_bit(start_bits, tcode + 1, FALSE, cd, utf); break; case OP_STARI: @@ -894,7 +938,7 @@ do case OP_QUERYI: case OP_MINQUERYI: case OP_POSQUERYI: - tcode = set_table_bit(start_bits, tcode + 1, TRUE, cd, utf8); + tcode = set_table_bit(start_bits, tcode + 1, TRUE, cd, utf); break; /* Single-char upto sets the bit and tries the next */ @@ -902,36 +946,36 @@ do case OP_UPTO: case OP_MINUPTO: case OP_POSUPTO: - tcode = set_table_bit(start_bits, tcode + 3, FALSE, cd, utf8); + tcode = set_table_bit(start_bits, tcode + 1 + IMM2_SIZE, FALSE, cd, utf); break; case OP_UPTOI: case OP_MINUPTOI: case OP_POSUPTOI: - tcode = set_table_bit(start_bits, tcode + 3, TRUE, cd, utf8); + tcode = set_table_bit(start_bits, tcode + 1 + IMM2_SIZE, TRUE, cd, utf); break; /* At least one single char sets the bit and stops */ case OP_EXACT: - tcode += 2; + tcode += IMM2_SIZE; /* Fall through */ case OP_CHAR: case OP_PLUS: case OP_MINPLUS: case OP_POSPLUS: - (void)set_table_bit(start_bits, tcode + 1, FALSE, cd, utf8); + (void)set_table_bit(start_bits, tcode + 1, FALSE, cd, utf); try_next = FALSE; break; case OP_EXACTI: - tcode += 2; + tcode += IMM2_SIZE; /* Fall through */ case OP_CHARI: case OP_PLUSI: case OP_MINPLUSI: case OP_POSPLUSI: - (void)set_table_bit(start_bits, tcode + 1, TRUE, cd, utf8); + (void)set_table_bit(start_bits, tcode + 1, TRUE, cd, utf); try_next = FALSE; break; @@ -944,14 +988,28 @@ do case OP_HSPACE: SET_BIT(0x09); SET_BIT(0x20); - if (utf8) +#ifdef SUPPORT_UTF + if (utf) { +#ifdef COMPILE_PCRE8 SET_BIT(0xC2); /* For U+00A0 */ SET_BIT(0xE1); /* For U+1680, U+180E */ SET_BIT(0xE2); /* For U+2000 - U+200A, U+202F, U+205F */ SET_BIT(0xE3); /* For U+3000 */ +#endif +#ifdef COMPILE_PCRE16 + SET_BIT(0xA0); + SET_BIT(0xFF); /* For characters > 255 */ +#endif + } + else +#endif /* SUPPORT_UTF */ + { + SET_BIT(0xA0); +#ifdef COMPILE_PCRE16 + SET_BIT(0xFF); /* For characters > 255 */ +#endif } - else SET_BIT(0xA0); try_next = FALSE; break; @@ -961,12 +1019,26 @@ do SET_BIT(0x0B); SET_BIT(0x0C); SET_BIT(0x0D); - if (utf8) +#ifdef SUPPORT_UTF + if (utf) { +#ifdef COMPILE_PCRE8 SET_BIT(0xC2); /* For U+0085 */ SET_BIT(0xE2); /* For U+2028, U+2029 */ +#endif +#ifdef COMPILE_PCRE16 + SET_BIT(0x85); + SET_BIT(0xFF); /* For characters > 255 */ +#endif + } + else +#endif /* SUPPORT_UTF */ + { + SET_BIT(0x85); +#ifdef COMPILE_PCRE16 + SET_BIT(0xFF); /* For characters > 255 */ +#endif } - else SET_BIT(0x85); try_next = FALSE; break; @@ -1024,7 +1096,7 @@ do break; case OP_TYPEEXACT: - tcode += 3; + tcode += 1 + IMM2_SIZE; break; /* Zero or more repeats of character types set the bits and then @@ -1033,7 +1105,7 @@ do case OP_TYPEUPTO: case OP_TYPEMINUPTO: case OP_TYPEPOSUPTO: - tcode += 2; /* Fall through */ + tcode += IMM2_SIZE; /* Fall through */ case OP_TYPESTAR: case OP_TYPEMINSTAR: @@ -1051,14 +1123,23 @@ do case OP_HSPACE: SET_BIT(0x09); SET_BIT(0x20); - if (utf8) +#ifdef COMPILE_PCRE8 + if (utf) { +#ifdef COMPILE_PCRE8 SET_BIT(0xC2); /* For U+00A0 */ SET_BIT(0xE1); /* For U+1680, U+180E */ SET_BIT(0xE2); /* For U+2000 - U+200A, U+202F, U+205F */ SET_BIT(0xE3); /* For U+3000 */ +#endif +#ifdef COMPILE_PCRE16 + SET_BIT(0xA0); + SET_BIT(0xFF); /* For characters > 255 */ +#endif } - else SET_BIT(0xA0); + else +#endif /* SUPPORT_UTF */ + SET_BIT(0xA0); break; case OP_ANYNL: @@ -1067,12 +1148,21 @@ do SET_BIT(0x0B); SET_BIT(0x0C); SET_BIT(0x0D); - if (utf8) +#ifdef COMPILE_PCRE8 + if (utf) { +#ifdef COMPILE_PCRE8 SET_BIT(0xC2); /* For U+0085 */ SET_BIT(0xE2); /* For U+2028, U+2029 */ +#endif +#ifdef COMPILE_PCRE16 + SET_BIT(0x85); + SET_BIT(0xFF); /* For characters > 255 */ +#endif } - else SET_BIT(0x85); + else +#endif /* SUPPORT_UTF */ + SET_BIT(0x85); break; case OP_NOT_DIGIT: @@ -1119,18 +1209,23 @@ do character with a value > 255. */ case OP_NCLASS: -#ifdef SUPPORT_UTF8 - if (utf8) +#if defined SUPPORT_UTF && defined COMPILE_PCRE8 + if (utf) { start_bits[24] |= 0xf0; /* Bits for 0xc4 - 0xc8 */ memset(start_bits+25, 0xff, 7); /* Bits for 0xc9 - 0xff */ } +#endif +#ifdef COMPILE_PCRE16 + SET_BIT(0xFF); /* For characters > 255 */ #endif /* Fall through */ case OP_CLASS: { + pcre_uint8 *map; tcode++; + map = (pcre_uint8 *)tcode; /* In UTF-8 mode, the bits in a bit map correspond to character values, not to byte values. However, the bit map we are constructing is @@ -1138,13 +1233,13 @@ do value is > 127. In fact, there are only two possible starting bytes for characters in the range 128 - 255. */ -#ifdef SUPPORT_UTF8 - if (utf8) +#if defined SUPPORT_UTF && defined COMPILE_PCRE8 + if (utf) { - for (c = 0; c < 16; c++) start_bits[c] |= tcode[c]; + for (c = 0; c < 16; c++) start_bits[c] |= map[c]; for (c = 128; c < 256; c++) { - if ((tcode[c/8] && (1 << (c&7))) != 0) + if ((map[c/8] && (1 << (c&7))) != 0) { int d = (c >> 6) | 0xc0; /* Set bit for this starter */ start_bits[d/8] |= (1 << (d&7)); /* and then skip on to the */ @@ -1152,19 +1247,17 @@ do } } } - - /* In non-UTF-8 mode, the two bit maps are completely compatible. */ - else #endif { - for (c = 0; c < 32; c++) start_bits[c] |= tcode[c]; + /* In non-UTF-8 mode, the two bit maps are completely compatible. */ + for (c = 0; c < 32; c++) start_bits[c] |= map[c]; } /* Advance past the bit map, and act on what follows. For a zero minimum repeat, continue; otherwise stop processing. */ - tcode += 32; + tcode += 32 / sizeof(pcre_uchar); switch (*tcode) { case OP_CRSTAR: @@ -1176,7 +1269,7 @@ do case OP_CRRANGE: case OP_CRMINRANGE: - if (((tcode[1] << 8) + tcode[2]) == 0) tcode += 5; + if (GET2(tcode, 1) == 0) tcode += 1 + 2 * IMM2_SIZE; else try_next = FALSE; break; @@ -1205,7 +1298,7 @@ return yield; *************************************************/ /* This function is handed a compiled expression that it must study to produce -information that will speed up the matching. It returns a pcre_extra block +information that will speed up the matching. It returns a pcre[16]_extra block which then gets handed back to pcre_exec(). Arguments: @@ -1214,23 +1307,28 @@ Arguments: errorptr points to where to place error messages; set NULL unless error -Returns: pointer to a pcre_extra block, with study_data filled in and the - appropriate flags set; +Returns: pointer to a pcre[16]_extra block, with study_data filled in and + the appropriate flags set; NULL on error or if no optimization possible */ +#ifdef COMPILE_PCRE8 PCRE_EXP_DEFN pcre_extra * PCRE_CALL_CONVENTION pcre_study(const pcre *external_re, int options, const char **errorptr) +#else +PCRE_EXP_DEFN pcre16_extra * PCRE_CALL_CONVENTION +pcre16_study(const pcre16 *external_re, int options, const char **errorptr) +#endif { int min; BOOL bits_set = FALSE; -uschar start_bits[32]; -pcre_extra *extra = NULL; +pcre_uint8 start_bits[32]; +PUBL(extra) *extra = NULL; pcre_study_data *study; -const uschar *tables; -uschar *code; +const pcre_uint8 *tables; +pcre_uchar *code; compile_data compile_block; -const real_pcre *re = (const real_pcre *)external_re; +const REAL_PCRE *re = (const REAL_PCRE *)external_re; *errorptr = NULL; @@ -1240,13 +1338,23 @@ if (re == NULL || re->magic_number != MAGIC_NUMBER) return NULL; } +if ((re->flags & PCRE_MODE) == 0) + { +#ifdef COMPILE_PCRE8 + *errorptr = "argument is compiled in 16 bit mode"; +#else + *errorptr = "argument is compiled in 8 bit mode"; +#endif + return NULL; + } + if ((options & ~PUBLIC_STUDY_OPTIONS) != 0) { *errorptr = "unknown or incorrect option bit(s) set"; return NULL; } -code = (uschar *)re + re->name_table_offset + +code = (pcre_uchar *)re + re->name_table_offset + (re->name_count * re->name_entry_size); /* For an anchored pattern, or an unanchored pattern that has a first char, or @@ -1261,9 +1369,16 @@ if ((re->options & PCRE_ANCHORED) == 0 && /* Set the character tables in the block that is passed around */ tables = re->tables; + +#ifdef COMPILE_PCRE8 if (tables == NULL) (void)pcre_fullinfo(external_re, NULL, PCRE_INFO_DEFAULT_TABLES, (void *)(&tables)); +#else + if (tables == NULL) + (void)pcre16_fullinfo(external_re, NULL, PCRE_INFO_DEFAULT_TABLES, + (void *)(&tables)); +#endif compile_block.lcc = tables + lcc_offset; compile_block.fcc = tables + fcc_offset; @@ -1272,7 +1387,7 @@ if ((re->options & PCRE_ANCHORED) == 0 && /* See if we can find a fixed set of initial characters for the pattern. */ - memset(start_bits, 0, 32 * sizeof(uschar)); + memset(start_bits, 0, 32 * sizeof(pcre_uint8)); rc = set_start_bits(code, start_bits, (re->options & PCRE_UTF8) != 0, &compile_block); bits_set = rc == SSB_DONE; @@ -1293,13 +1408,13 @@ switch(min = find_minlength(code, code, re->options, 0)) } /* If a set of starting bytes has been identified, or if the minimum length is -greater than zero, or if JIT optimization has been requested, get a pcre_extra -block and a pcre_study_data block. The study data is put in the latter, which -is pointed to by the former, which may also get additional data set later by -the calling program. At the moment, the size of pcre_study_data is fixed. We -nevertheless save it in a field for returning via the pcre_fullinfo() function -so that if it becomes variable in the future, we don't have to change that -code. */ +greater than zero, or if JIT optimization has been requested, get a +pcre[16]_extra block and a pcre_study_data block. The study data is put in the +latter, which is pointed to by the former, which may also get additional data +set later by the calling program. At the moment, the size of pcre_study_data +is fixed. We nevertheless save it in a field for returning via the +pcre_fullinfo() function so that if it becomes variable in the future, +we don't have to change that code. */ if (bits_set || min > 0 #ifdef SUPPORT_JIT @@ -1307,15 +1422,15 @@ if (bits_set || min > 0 #endif ) { - extra = (pcre_extra *)(pcre_malloc) - (sizeof(pcre_extra) + sizeof(pcre_study_data)); + extra = (PUBL(extra) *)(PUBL(malloc)) + (sizeof(PUBL(extra)) + sizeof(pcre_study_data)); if (extra == NULL) { *errorptr = "failed to get memory"; return NULL; } - study = (pcre_study_data *)((char *)extra + sizeof(pcre_extra)); + study = (pcre_study_data *)((char *)extra + sizeof(PUBL(extra))); extra->flags = PCRE_EXTRA_STUDY_DATA; extra->study_data = study; @@ -1331,7 +1446,19 @@ if (bits_set || min > 0 study->flags |= PCRE_STUDY_MAPPED; memcpy(study->start_bits, start_bits, sizeof(start_bits)); } - else memset(study->start_bits, 0, 32 * sizeof(uschar)); + else memset(study->start_bits, 0, 32 * sizeof(pcre_uint8)); + +#ifdef PCRE_DEBUG + if (bits_set) + { + pcre_uint8 *ptr = start_bits; + int i; + + printf("Start bits:\n"); + for (i = 0; i < 32; i++) + printf("%3d: %02x%s", i * 8, *ptr++, ((i + 1) & 0x7) != 0? " " : "\n"); + } +#endif /* Always set the minlength value in the block, because the JIT compiler makes use of it. However, don't set the bit unless the length is greater than @@ -1351,10 +1478,15 @@ if (bits_set || min > 0 #ifdef SUPPORT_JIT extra->executable_jit = NULL; - if ((options & PCRE_STUDY_JIT_COMPILE) != 0) _pcre_jit_compile(re, extra); + if ((options & PCRE_STUDY_JIT_COMPILE) != 0) PRIV(jit_compile)(re, extra); if (study->flags == 0 && (extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) == 0) { +#ifdef COMPILE_PCRE8 pcre_free_study(extra); +#endif +#ifdef COMPILE_PCRE16 + pcre16_free_study(extra); +#endif extra = NULL; } #endif @@ -1370,19 +1502,26 @@ return extra; /* This function frees the memory that was obtained by pcre_study(). -Argument: a pointer to the pcre_extra block +Argument: a pointer to the pcre[16]_extra block Returns: nothing */ +#ifdef COMPILE_PCRE8 PCRE_EXP_DEFN void pcre_free_study(pcre_extra *extra) +#else +PCRE_EXP_DEFN void +pcre16_free_study(pcre16_extra *extra) +#endif { +if (extra == NULL) + return; #ifdef SUPPORT_JIT if ((extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0 && extra->executable_jit != NULL) - _pcre_jit_free(extra->executable_jit); + PRIV(jit_free)(extra->executable_jit); #endif -pcre_free(extra); +PUBL(free)(extra); } /* End of pcre_study.c */ diff --git a/harbour/src/3rd/pcre/pcretabs.c b/harbour/src/3rd/pcre/pcretabs.c index 6609b21fd1..d930998b3d 100644 --- a/harbour/src/3rd/pcre/pcretabs.c +++ b/harbour/src/3rd/pcre/pcretabs.c @@ -6,7 +6,7 @@ and semantics are as close as possible to those of the Perl 5 language. Written by Philip Hazel - Copyright (c) 1997-2009 University of Cambridge + Copyright (c) 1997-2012 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without @@ -37,6 +37,7 @@ POSSIBILITY OF SUCH DAMAGE. ----------------------------------------------------------------------------- */ +#ifndef PCRE_INCLUDED /* This module contains some fixed tables that are used by more than one of the PCRE code modules. The tables are also #included by the pcretest program, which @@ -50,11 +51,12 @@ clashes with the library. */ #include "pcreinal.h" +#endif /* PCRE_INCLUDED */ /* Table of sizes for the fixed-length opcodes. It's defined in a macro so that the definition is next to the definition of the opcodes in pcre_internal.h. */ -const uschar _pcre_OP_lengths[] = { OP_LENGTHS }; +const pcre_uint8 PRIV(OP_lengths)[] = { OP_LENGTHS }; @@ -65,44 +67,38 @@ const uschar _pcre_OP_lengths[] = { OP_LENGTHS }; /* These are the breakpoints for different numbers of bytes in a UTF-8 character. */ -#ifdef SUPPORT_UTF8 +#if (defined SUPPORT_UTF && defined COMPILE_PCRE8) \ + || (defined PCRE_INCLUDED && defined SUPPORT_PCRE16) -const int _pcre_utf8_table1[] = +/* These tables are also required by pcretest in 16 bit mode. */ + +const int PRIV(utf8_table1)[] = { 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff}; -const int _pcre_utf8_table1_size = sizeof(_pcre_utf8_table1)/sizeof(int); +const int PRIV(utf8_table1_size) = sizeof(PRIV(utf8_table1)) / sizeof(int); /* These are the indicator bits and the mask for the data bits to set in the first byte of a character, indexed by the number of additional bytes. */ -const int _pcre_utf8_table2[] = { 0, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc}; -const int _pcre_utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01}; +const int PRIV(utf8_table2)[] = { 0, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc}; +const int PRIV(utf8_table3)[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01}; /* Table of the number of extra bytes, indexed by the first byte masked with 0x3f. The highest number for a valid UTF-8 first byte is in fact 0x3d. */ -const uschar _pcre_utf8_table4[] = { +const pcre_uint8 PRIV(utf8_table4)[] = { 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 }; -#ifdef SUPPORT_JIT -/* Full table of the number of extra bytes when the -character code is greater or equal than 0xc0. -See _pcre_utf8_table4 above. */ +#endif /* (SUPPORT_UTF && COMPILE_PCRE8) || (PCRE_INCLUDED && SUPPORT_PCRE16)*/ -const uschar _pcre_utf8_char_sizes[] = { - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, - 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, - 3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4, -}; -#endif +#ifdef SUPPORT_UTF /* Table to translate from particular type value to the general value. */ -const int _pcre_ucp_gentype[] = { +const int PRIV(ucp_gentype)[] = { ucp_C, ucp_C, ucp_C, ucp_C, ucp_C, /* Cc, Cf, Cn, Co, Cs */ ucp_L, ucp_L, ucp_L, ucp_L, ucp_L, /* Ll, Lu, Lm, Lo, Lt */ ucp_M, ucp_M, ucp_M, /* Mc, Me, Mn */ @@ -114,10 +110,10 @@ const int _pcre_ucp_gentype[] = { }; #ifdef SUPPORT_JIT -/* This table reverses _pcre_ucp_gentype. We can save the cost +/* This table reverses PRIV(ucp_gentype). We can save the cost of a memory load. */ -const int _pcre_ucp_typerange[] = { +const int PRIV(ucp_typerange)[] = { ucp_Cc, ucp_Cs, ucp_Ll, ucp_Lu, ucp_Mc, ucp_Mn, @@ -126,7 +122,7 @@ const int _pcre_ucp_typerange[] = { ucp_Sc, ucp_So, ucp_Zl, ucp_Zs, }; -#endif +#endif /* SUPPORT_JIT */ /* The pcre_utt[] table below translates Unicode property names into type and code values. It is searched by binary chop, so must be in collating sequence of @@ -284,7 +280,7 @@ strings to make sure that UTF-8 support works on EBCDIC platforms. */ #define STRING_Zp0 STR_Z STR_p "\0" #define STRING_Zs0 STR_Z STR_s "\0" -const char _pcre_utt_names[] = +const char PRIV(utt_names)[] = STRING_Any0 STRING_Arabic0 STRING_Armenian0 @@ -424,7 +420,7 @@ const char _pcre_utt_names[] = STRING_Zp0 STRING_Zs0; -const ucp_type_table _pcre_utt[] = { +const ucp_type_table PRIV(utt)[] = { { 0, PT_ANY, 0 }, { 4, PT_SC, ucp_Arabic }, { 11, PT_SC, ucp_Armenian }, @@ -565,8 +561,8 @@ const ucp_type_table _pcre_utt[] = { { 961, PT_PC, ucp_Zs } }; -const int _pcre_utt_size = sizeof(_pcre_utt)/sizeof(ucp_type_table); +const int PRIV(utt_size) = sizeof(PRIV(utt)) / sizeof(ucp_type_table); -#endif /* SUPPORT_UTF8 */ +#endif /* SUPPORT_UTF */ /* End of pcre_tables.c */ diff --git a/harbour/src/3rd/pcre/pcretryf.c b/harbour/src/3rd/pcre/pcretryf.c deleted file mode 100644 index 66bf5c78a7..0000000000 --- a/harbour/src/3rd/pcre/pcretryf.c +++ /dev/null @@ -1,139 +0,0 @@ -/************************************************* -* Perl-Compatible Regular Expressions * -*************************************************/ - -/* PCRE is a library of functions to support regular expressions whose syntax -and semantics are as close as possible to those of the Perl 5 language. - - Written by Philip Hazel - Copyright (c) 1997-2009 University of Cambridge - ------------------------------------------------------------------------------ -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - - * Redistributions of source code must retain the above copyright notice, - this list of conditions and the following disclaimer. - - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - * Neither the name of the University of Cambridge nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. ------------------------------------------------------------------------------ -*/ - - -/* This module contains an internal function that tests a compiled pattern to -see if it was compiled with the opposite endianness. If so, it uses an -auxiliary local function to flip the appropriate bytes. */ - - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "pcreinal.h" - - -/************************************************* -* Flip bytes in an integer * -*************************************************/ - -/* This function is called when the magic number in a regex doesn't match, in -order to flip its bytes to see if we are dealing with a pattern that was -compiled on a host of different endianness. If so, this function is used to -flip other byte values. - -Arguments: - value the number to flip - n the number of bytes to flip (assumed to be 2 or 4) - -Returns: the flipped value -*/ - -static unsigned long int -byteflip(unsigned long int value, int n) -{ -if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8); -return ((value & 0x000000ff) << 24) | - ((value & 0x0000ff00) << 8) | - ((value & 0x00ff0000) >> 8) | - ((value & 0xff000000) >> 24); -} - - - -/************************************************* -* Test for a byte-flipped compiled regex * -*************************************************/ - -/* This function is called from pcre_exec(), pcre_dfa_exec(), and also from -pcre_fullinfo(). Its job is to test whether the regex is byte-flipped - that -is, it was compiled on a system of opposite endianness. The function is called -only when the native MAGIC_NUMBER test fails. If the regex is indeed flipped, -we flip all the relevant values into a different data block, and return it. - -Arguments: - re points to the regex - study points to study data, or NULL - internal_re points to a new regex block - internal_study points to a new study block - -Returns: the new block if is is indeed a byte-flipped regex - NULL if it is not -*/ - -real_pcre * -_pcre_try_flipped(const real_pcre *re, real_pcre *internal_re, - const pcre_study_data *study, pcre_study_data *internal_study) -{ -if (byteflip(re->magic_number, sizeof(re->magic_number)) != MAGIC_NUMBER) - return NULL; - -*internal_re = *re; /* To copy other fields */ -internal_re->size = byteflip(re->size, sizeof(re->size)); -internal_re->options = byteflip(re->options, sizeof(re->options)); -internal_re->flags = (pcre_uint16)byteflip(re->flags, sizeof(re->flags)); -internal_re->top_bracket = - (pcre_uint16)byteflip(re->top_bracket, sizeof(re->top_bracket)); -internal_re->top_backref = - (pcre_uint16)byteflip(re->top_backref, sizeof(re->top_backref)); -internal_re->first_byte = - (pcre_uint16)byteflip(re->first_byte, sizeof(re->first_byte)); -internal_re->req_byte = - (pcre_uint16)byteflip(re->req_byte, sizeof(re->req_byte)); -internal_re->name_table_offset = - (pcre_uint16)byteflip(re->name_table_offset, sizeof(re->name_table_offset)); -internal_re->name_entry_size = - (pcre_uint16)byteflip(re->name_entry_size, sizeof(re->name_entry_size)); -internal_re->name_count = - (pcre_uint16)byteflip(re->name_count, sizeof(re->name_count)); - -if (study != NULL) - { - *internal_study = *study; /* To copy other fields */ - internal_study->size = byteflip(study->size, sizeof(study->size)); - internal_study->flags = byteflip(study->flags, sizeof(study->flags)); - internal_study->minlength = byteflip(study->minlength, - sizeof(study->minlength)); - } - -return internal_re; -} - -/* End of pcre_tryflipped.c */ diff --git a/harbour/src/3rd/pcre/pcreucd.c b/harbour/src/3rd/pcre/pcreucd.c index 2ab63d2786..04a24bfe06 100644 --- a/harbour/src/3rd/pcre/pcreucd.c +++ b/harbour/src/3rd/pcre/pcreucd.c @@ -18,21 +18,21 @@ /* Instead, just supply small dummy tables. */ #ifndef SUPPORT_UCP -const ucd_record _pcre_ucd_records[] = {{0,0,0 }}; -const uschar _pcre_ucd_stage1[] = {0}; -const pcre_uint16 _pcre_ucd_stage2[] = {0}; +const ucd_record PRIV(ucd_records)[] = {{0,0,0 }}; +const pcre_uint8 PRIV(ucd_stage1)[] = {0}; +const pcre_uint16 PRIV(ucd_stage2)[] = {0}; #else /* When recompiling tables with a new Unicode version, please check types in the structure definition from pcre_internal.h: typedef struct { -uschar property_0; -uschar property_1; +pcre_uint8 property_0; +pcre_uint8 property_1; pcre_int32 property_2; } ucd_record; */ -const ucd_record _pcre_ucd_records[] = { /* 4320 bytes, record size 8 */ +const ucd_record PRIV(ucd_records)[] = { /* 4320 bytes, record size 8 */ { 9, 0, 0, }, /* 0 */ { 9, 29, 0, }, /* 1 */ { 9, 21, 0, }, /* 2 */ @@ -575,7 +575,7 @@ const ucd_record _pcre_ucd_records[] = { /* 4320 bytes, record size 8 */ { 26, 26, 0, }, /* 539 */ }; -const uschar _pcre_ucd_stage1[] = { /* 8704 bytes */ +const pcre_uint8 PRIV(ucd_stage1)[] = { /* 8704 bytes */ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, /* U+0000 */ 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, /* U+0800 */ 32, 33, 34, 34, 35, 36, 37, 38, 39, 40, 40, 40, 41, 42, 43, 44, /* U+1000 */ @@ -1122,7 +1122,7 @@ const uschar _pcre_ucd_stage1[] = { /* 8704 bytes */ 114,114,114,114,114,114,114,114,114,114,114,114,114,114,114,184, /* U+10F800 */ }; -const pcre_uint16 _pcre_ucd_stage2[] = { /* 47360 bytes, block = 128 */ +const pcre_uint16 PRIV(ucd_stage2)[] = { /* 47360 bytes, block = 128 */ /* block 0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, diff --git a/harbour/src/3rd/pcre/pcrever.c b/harbour/src/3rd/pcre/pcrever.c index 6a60ba4faf..1c4bbe1647 100644 --- a/harbour/src/3rd/pcre/pcrever.c +++ b/harbour/src/3rd/pcre/pcrever.c @@ -6,7 +6,7 @@ and semantics are as close as possible to those of the Perl 5 language. Written by Philip Hazel - Copyright (c) 1997-2008 University of Cambridge + Copyright (c) 1997-2012 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without @@ -79,8 +79,13 @@ I could find no way of detecting that a macro is defined as an empty string at pre-processor time. This hack uses a standard trick for avoiding calling the STRING macro with an empty argument when doing the test. */ +#ifdef COMPILE_PCRE8 PCRE_EXP_DEFN const char * PCRE_CALL_CONVENTION pcre_version(void) +#else +PCRE_EXP_DEFN const char * PCRE_CALL_CONVENTION +pcre16_version(void) +#endif { return (XSTRING(Z PCRE_PRERELEASE)[1] == 0)? XSTRING(PCRE_MAJOR.PCRE_MINOR PCRE_DATE) : diff --git a/harbour/src/3rd/pcre/pcrevutf.c b/harbour/src/3rd/pcre/pcrevutf.c index c02b9922a9..7a683812c4 100644 --- a/harbour/src/3rd/pcre/pcrevutf.c +++ b/harbour/src/3rd/pcre/pcrevutf.c @@ -6,7 +6,7 @@ and semantics are as close as possible to those of the Perl 5 language. Written by Philip Hazel - Copyright (c) 1997-2009 University of Cambridge + Copyright (c) 1997-2012 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without @@ -103,10 +103,10 @@ Returns: = 0 if the string is a valid UTF-8 string */ int -_pcre_valid_utf8(USPTR string, int length, int *erroroffset) +PRIV(valid_utf)(PCRE_PUCHAR string, int length, int *erroroffset) { -#ifdef SUPPORT_UTF8 -register USPTR p; +#ifdef SUPPORT_UTF +register PCRE_PUCHAR p; if (length < 0) { @@ -133,7 +133,7 @@ for (p = string; length-- > 0; p++) return PCRE_UTF8_ERR21; } - ab = _pcre_utf8_table4[c & 0x3f]; /* Number of additional bytes */ + ab = PRIV(utf8_table4)[c & 0x3f]; /* Number of additional bytes */ if (length < ab) { *erroroffset = (int)(p - string); /* Missing bytes */ @@ -288,7 +288,7 @@ for (p = string; length-- > 0; p++) } } -#else /* SUPPORT_UTF8 */ +#else /* SUPPORT_UTF */ (void)(string); /* Keep picky compilers happy */ (void)(length); #endif diff --git a/harbour/src/3rd/pcre/pcrexcls.c b/harbour/src/3rd/pcre/pcrexcls.c index 96fd925e04..dcd06ae3c6 100644 --- a/harbour/src/3rd/pcre/pcrexcls.c +++ b/harbour/src/3rd/pcre/pcrexcls.c @@ -6,7 +6,7 @@ and semantics are as close as possible to those of the Perl 5 language. Written by Philip Hazel - Copyright (c) 1997-2010 University of Cambridge + Copyright (c) 1997-2012 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without @@ -64,39 +64,63 @@ Returns: TRUE if character matches, else FALSE */ BOOL -_pcre_xclass(int c, const uschar *data) +PRIV(xclass)(int c, const pcre_uchar *data, BOOL utf) { int t; BOOL negated = (*data & XCL_NOT) != 0; +(void)utf; +#ifdef COMPILE_PCRE8 +/* In 8 bit mode, this must always be TRUE. Help the compiler to know that. */ +utf = TRUE; +#endif + /* Character values < 256 are matched against a bitmap, if one is present. If not, we still carry on, because there may be ranges that start below 256 in the additional data. */ if (c < 256) { - if ((*data & XCL_MAP) != 0 && (data[1 + c/8] & (1 << (c&7))) != 0) - return !negated; /* char found */ + if ((*data & XCL_MAP) != 0 && + (((pcre_uint8 *)(data + 1))[c/8] & (1 << (c&7))) != 0) + return !negated; /* char found */ } /* First skip the bit map if present. Then match against the list of Unicode properties or large chars or ranges that end with a large char. We won't ever encounter XCL_PROP or XCL_NOTPROP when UCP support is not compiled. */ -if ((*data++ & XCL_MAP) != 0) data += 32; +if ((*data++ & XCL_MAP) != 0) data += 32 / sizeof(pcre_uchar); while ((t = *data++) != XCL_END) { int x, y; if (t == XCL_SINGLE) { - GETCHARINC(x, data); +#ifdef SUPPORT_UTF + if (utf) + { + GETCHARINC(x, data); /* macro generates multiple statements */ + } + else +#endif + x = *data++; if (c == x) return !negated; } else if (t == XCL_RANGE) { - GETCHARINC(x, data); - GETCHARINC(y, data); +#ifdef SUPPORT_UTF + if (utf) + { + GETCHARINC(x, data); /* macro generates multiple statements */ + GETCHARINC(y, data); /* macro generates multiple statements */ + } + else +#endif + { + x = *data++; + y = *data++; + } if (c >= x && c <= y) return !negated; } @@ -117,7 +141,7 @@ while ((t = *data++) != XCL_END) break; case PT_GC: - if ((data[1] == _pcre_ucp_gentype[prop->chartype]) == (t == XCL_PROP)) + if ((data[1] == PRIV(ucp_gentype)[prop->chartype]) == (t == XCL_PROP)) return !negated; break; @@ -130,28 +154,28 @@ while ((t = *data++) != XCL_END) break; case PT_ALNUM: - if ((_pcre_ucp_gentype[prop->chartype] == ucp_L || - _pcre_ucp_gentype[prop->chartype] == ucp_N) == (t == XCL_PROP)) + if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L || + PRIV(ucp_gentype)[prop->chartype] == ucp_N) == (t == XCL_PROP)) return !negated; break; case PT_SPACE: /* Perl space */ - if ((_pcre_ucp_gentype[prop->chartype] == ucp_Z || + if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z || c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR) == (t == XCL_PROP)) return !negated; break; case PT_PXSPACE: /* POSIX space */ - if ((_pcre_ucp_gentype[prop->chartype] == ucp_Z || + if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z || c == CHAR_HT || c == CHAR_NL || c == CHAR_VT || c == CHAR_FF || c == CHAR_CR) == (t == XCL_PROP)) return !negated; break; case PT_WORD: - if ((_pcre_ucp_gentype[prop->chartype] == ucp_L || - _pcre_ucp_gentype[prop->chartype] == ucp_N || c == CHAR_UNDERSCORE) + if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L || + PRIV(ucp_gentype)[prop->chartype] == ucp_N || c == CHAR_UNDERSCORE) == (t == XCL_PROP)) return !negated; break; diff --git a/harbour/src/3rd/pcre/sjarmth2.c b/harbour/src/3rd/pcre/sjarmth2.c index 3764aebd04..a51536b4a7 100644 --- a/harbour/src/3rd/pcre/sjarmth2.c +++ b/harbour/src/3rd/pcre/sjarmth2.c @@ -1,7 +1,7 @@ /* * Stack-less Just-In-Time compiler * - * Copyright 2009-2010 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. + * Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. * * Redistribution and use in source and binary forms, with or without modification, are * permitted provided that the following conditions are met: @@ -26,7 +26,7 @@ SLJIT_API_FUNC_ATTRIBUTE SLJIT_CONST char* sljit_get_platform_name() { - return "arm-thumb2"; + return "ARM-Thumb2" SLJIT_CPUINFO; } /* Last register + 1. */ @@ -38,7 +38,7 @@ SLJIT_API_FUNC_ATTRIBUTE SLJIT_CONST char* sljit_get_platform_name() #define TMP_FREG1 (SLJIT_FLOAT_REG4 + 1) #define TMP_FREG2 (SLJIT_FLOAT_REG4 + 2) -/* See sljit_emit_enter if you want to change them. */ +/* See sljit_emit_enter and sljit_emit_op0 if you want to change them. */ static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 5] = { 0, 0, 1, 2, 12, 5, 6, 7, 8, 10, 11, 13, 3, 4, 14, 15 }; @@ -122,9 +122,11 @@ typedef sljit_ui sljit_ins; #define LSR_W 0xfa20f000 #define LSR_WI 0xea4f0010 #define MOV 0x4600 +#define MOVS 0x0000 #define MOVSI 0x2000 #define MOVT 0xf2c00000 #define MOVW 0xf2400000 +#define MOV_W 0xea4f0000 #define MOV_WI 0xf04f0000 #define MUL 0xfb00f000 #define MVNS 0x43c0 @@ -158,6 +160,7 @@ typedef sljit_ui sljit_ins; #define SXTH 0xb200 #define SXTH_W 0xfa0ff080 #define TST 0x4200 +#define UMULL 0xfba00000 #define UXTB 0xb2c0 #define UXTB_W 0xfa5ff080 #define UXTH 0xb280 @@ -616,6 +619,13 @@ static int emit_op_imm(struct sljit_compiler *compiler, int flags, int dst, slji case SLJIT_SHL: if (flags & ARG2_IMM) { imm &= 0x1f; + if (imm == 0) { + if (!(flags & SET_FLAGS)) + return push_inst16(compiler, MOV | SET_REGS44(dst, reg)); + if (IS_2_LO_REGS(dst, reg)) + return push_inst16(compiler, MOVS | RD3(dst) | RN3(reg)); + return push_inst32(compiler, MOV_W | SET_FLAGS | RD4(dst) | RM4(reg)); + } if (!(flags & KEEP_FLAGS) && IS_2_LO_REGS(dst, reg)) return push_inst16(compiler, LSLSI | RD3(dst) | RN3(reg) | (imm << 6)); return push_inst32(compiler, LSL_WI | (flags & SET_FLAGS) | RD4(dst) | RM4(reg) | IMM5(imm)); @@ -624,6 +634,13 @@ static int emit_op_imm(struct sljit_compiler *compiler, int flags, int dst, slji case SLJIT_LSHR: if (flags & ARG2_IMM) { imm &= 0x1f; + if (imm == 0) { + if (!(flags & SET_FLAGS)) + return push_inst16(compiler, MOV | SET_REGS44(dst, reg)); + if (IS_2_LO_REGS(dst, reg)) + return push_inst16(compiler, MOVS | RD3(dst) | RN3(reg)); + return push_inst32(compiler, MOV_W | SET_FLAGS | RD4(dst) | RM4(reg)); + } if (!(flags & KEEP_FLAGS) && IS_2_LO_REGS(dst, reg)) return push_inst16(compiler, LSRSI | RD3(dst) | RN3(reg) | (imm << 6)); return push_inst32(compiler, LSR_WI | (flags & SET_FLAGS) | RD4(dst) | RM4(reg) | IMM5(imm)); @@ -632,6 +649,13 @@ static int emit_op_imm(struct sljit_compiler *compiler, int flags, int dst, slji case SLJIT_ASHR: if (flags & ARG2_IMM) { imm &= 0x1f; + if (imm == 0) { + if (!(flags & SET_FLAGS)) + return push_inst16(compiler, MOV | SET_REGS44(dst, reg)); + if (IS_2_LO_REGS(dst, reg)) + return push_inst16(compiler, MOVS | RD3(dst) | RN3(reg)); + return push_inst32(compiler, MOV_W | SET_FLAGS | RD4(dst) | RM4(reg)); + } if (!(flags & KEEP_FLAGS) && IS_2_LO_REGS(dst, reg)) return push_inst16(compiler, ASRSI | RD3(dst) | RN3(reg) | (imm << 6)); return push_inst32(compiler, ASR_WI | (flags & SET_FLAGS) | RD4(dst) | RM4(reg) | IMM5(imm)); @@ -1077,36 +1101,36 @@ static SLJIT_INLINE int emit_op_mem(struct sljit_compiler *compiler, int flags, return getput_arg(compiler, flags, reg, arg, argw, 0, 0); } -SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_enter(struct sljit_compiler *compiler, int args, int temporaries, int generals, int local_size) +SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_enter(struct sljit_compiler *compiler, int args, int temporaries, int saveds, int local_size) { int size; sljit_ins push; CHECK_ERROR(); - check_sljit_emit_enter(compiler, args, temporaries, generals, local_size); + check_sljit_emit_enter(compiler, args, temporaries, saveds, local_size); compiler->temporaries = temporaries; - compiler->generals = generals; + compiler->saveds = saveds; push = (1 << 4); - if (generals >= 5) + if (saveds >= 5) push |= 1 << 11; - if (generals >= 4) + if (saveds >= 4) push |= 1 << 10; - if (generals >= 3) + if (saveds >= 3) push |= 1 << 8; - if (generals >= 2) + if (saveds >= 2) push |= 1 << 7; - if (generals >= 1) + if (saveds >= 1) push |= 1 << 6; if (temporaries >= 5) push |= 1 << 5; - FAIL_IF(generals >= 3 + FAIL_IF(saveds >= 3 ? push_inst32(compiler, PUSH_W | (1 << 14) | push) : push_inst16(compiler, PUSH | push)); /* Stack must be aligned to 8 bytes: */ - size = (3 + generals) * sizeof(sljit_uw); + size = (3 + saveds) * sizeof(sljit_uw); local_size += size; local_size = (local_size + 7) & ~7; local_size -= size; @@ -1119,45 +1143,40 @@ SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_enter(struct sljit_compiler *compiler, i } if (args >= 1) - FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(SLJIT_GENERAL_REG1, SLJIT_TEMPORARY_REG1))); + FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(SLJIT_SAVED_REG1, SLJIT_TEMPORARY_REG1))); if (args >= 2) - FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(SLJIT_GENERAL_REG2, SLJIT_TEMPORARY_REG2))); + FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(SLJIT_SAVED_REG2, SLJIT_TEMPORARY_REG2))); if (args >= 3) - FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(SLJIT_GENERAL_REG3, SLJIT_TEMPORARY_REG3))); + FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(SLJIT_SAVED_REG3, SLJIT_TEMPORARY_REG3))); return SLJIT_SUCCESS; } -SLJIT_API_FUNC_ATTRIBUTE void sljit_fake_enter(struct sljit_compiler *compiler, int args, int temporaries, int generals, int local_size) +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_context(struct sljit_compiler *compiler, int args, int temporaries, int saveds, int local_size) { int size; CHECK_ERROR_VOID(); - check_sljit_fake_enter(compiler, args, temporaries, generals, local_size); + check_sljit_set_context(compiler, args, temporaries, saveds, local_size); compiler->temporaries = temporaries; - compiler->generals = generals; + compiler->saveds = saveds; - size = (3 + generals) * sizeof(sljit_uw); + size = (3 + saveds) * sizeof(sljit_uw); local_size += size; local_size = (local_size + 7) & ~7; local_size -= size; compiler->local_size = local_size; } -SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_return(struct sljit_compiler *compiler, int src, sljit_w srcw) +SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_return(struct sljit_compiler *compiler, int op, int src, sljit_w srcw) { sljit_ins pop; CHECK_ERROR(); - check_sljit_emit_return(compiler, src, srcw); + check_sljit_emit_return(compiler, op, src, srcw); - if (src != SLJIT_UNUSED && src != SLJIT_RETURN_REG) { - if (src >= SLJIT_TEMPORARY_REG1 && src <= TMP_REG3) - FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(SLJIT_RETURN_REG, src))); - else - FAIL_IF(emit_op_mem(compiler, WORD_SIZE, SLJIT_RETURN_REG, src, srcw)); - } + FAIL_IF(emit_mov_before_return(compiler, op, src, srcw)); if (compiler->local_size > 0) { if (compiler->local_size <= (127 << 2)) @@ -1167,19 +1186,19 @@ SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_return(struct sljit_compiler *compiler, } pop = (1 << 4); - if (compiler->generals >= 5) + if (compiler->saveds >= 5) pop |= 1 << 11; - if (compiler->generals >= 4) + if (compiler->saveds >= 4) pop |= 1 << 10; - if (compiler->generals >= 3) + if (compiler->saveds >= 3) pop |= 1 << 8; - if (compiler->generals >= 2) + if (compiler->saveds >= 2) pop |= 1 << 7; - if (compiler->generals >= 1) + if (compiler->saveds >= 1) pop |= 1 << 6; if (compiler->temporaries >= 5) pop |= 1 << 5; - return compiler->generals >= 3 + return compiler->saveds >= 3 ? push_inst32(compiler, POP_W | (1 << 15) | pop) : push_inst16(compiler, POP | pop); } @@ -1188,6 +1207,21 @@ SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_return(struct sljit_compiler *compiler, /* Operators */ /* --------------------------------------------------------------------- */ +#ifdef __cplusplus +extern "C" { +#endif + +#if defined(__GNUC__) +extern unsigned int __aeabi_uidivmod(unsigned numerator, unsigned denominator); +extern unsigned int __aeabi_idivmod(unsigned numerator, unsigned denominator); +#else +#error "Software divmod functions are needed" +#endif + +#ifdef __cplusplus +} +#endif + SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_op0(struct sljit_compiler *compiler, int op) { CHECK_ERROR(); @@ -1201,6 +1235,32 @@ SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_op0(struct sljit_compiler *compiler, int case SLJIT_NOP: push_inst16(compiler, NOP); break; + case SLJIT_UMUL: + case SLJIT_SMUL: + return push_inst32(compiler, (op == SLJIT_UMUL ? UMULL : SMULL) + | (reg_map[SLJIT_TEMPORARY_REG2] << 8) + | (reg_map[SLJIT_TEMPORARY_REG1] << 12) + | (reg_map[SLJIT_TEMPORARY_REG1] << 16) + | reg_map[SLJIT_TEMPORARY_REG2]); + case SLJIT_UDIV: + case SLJIT_SDIV: + if (compiler->temporaries >= 4) { + FAIL_IF(push_inst32(compiler, 0xf84d2d04 /* str r2, [sp, #-4]! */)); + FAIL_IF(push_inst32(compiler, 0xf84dcd04 /* str ip, [sp, #-4]! */)); + } else if (compiler->temporaries >= 3) + FAIL_IF(push_inst32(compiler, 0xf84d2d08 /* str r2, [sp, #-8]! */)); +#if defined(__GNUC__) + FAIL_IF(sljit_emit_ijump(compiler, SLJIT_FAST_CALL, SLJIT_IMM, + (op == SLJIT_UDIV ? SLJIT_FUNC_OFFSET(__aeabi_uidivmod) : SLJIT_FUNC_OFFSET(__aeabi_idivmod)))); +#else +#error "Software divmod functions are needed" +#endif + if (compiler->temporaries >= 4) { + FAIL_IF(push_inst32(compiler, 0xf85dcb04 /* ldr ip, [sp], #4 */)); + return push_inst32(compiler, 0xf85d2b04 /* ldr r2, [sp], #4 */); + } else if (compiler->temporaries >= 3) + return push_inst32(compiler, 0xf85d2b08 /* ldr r2, [sp], #8 */); + return SLJIT_SUCCESS; } return SLJIT_SUCCESS; @@ -1412,6 +1472,24 @@ SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_op2(struct sljit_compiler *compiler, int return SLJIT_SUCCESS; } +SLJIT_API_FUNC_ATTRIBUTE int sljit_get_register_index(int reg) +{ + check_sljit_get_register_index(reg); + return reg_map[reg]; +} + +SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_op_custom(struct sljit_compiler *compiler, + void *instruction, int size) +{ + CHECK_ERROR(); + check_sljit_emit_op_custom(compiler, instruction, size); + SLJIT_ASSERT(size == 2 || size == 4); + + if (size == 2) + return push_inst16(compiler, *(sljit_uh*)instruction); + return push_inst32(compiler, *(sljit_ins*)instruction); +} + /* --------------------------------------------------------------------- */ /* Floating point operators */ /* --------------------------------------------------------------------- */ @@ -1567,17 +1645,17 @@ SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_fop2(struct sljit_compiler *compiler, in /* Other instructions */ /* --------------------------------------------------------------------- */ -SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_fast_enter(struct sljit_compiler *compiler, int dst, sljit_w dstw, int args, int temporaries, int generals, int local_size) +SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_fast_enter(struct sljit_compiler *compiler, int dst, sljit_w dstw, int args, int temporaries, int saveds, int local_size) { int size; CHECK_ERROR(); - check_sljit_emit_fast_enter(compiler, dst, dstw, args, temporaries, generals, local_size); + check_sljit_emit_fast_enter(compiler, dst, dstw, args, temporaries, saveds, local_size); compiler->temporaries = temporaries; - compiler->generals = generals; + compiler->saveds = saveds; - size = (3 + generals) * sizeof(sljit_uw); + size = (3 + saveds) * sizeof(sljit_uw); local_size += size; local_size = (local_size + 7) & ~7; local_size -= size; diff --git a/harbour/src/3rd/pcre/sjarmv5.c b/harbour/src/3rd/pcre/sjarmv5.c index 99584cfc88..e3a5873247 100644 --- a/harbour/src/3rd/pcre/sjarmv5.c +++ b/harbour/src/3rd/pcre/sjarmv5.c @@ -1,7 +1,7 @@ /* * Stack-less Just-In-Time compiler * - * Copyright 2009-2010 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. + * Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. * * Redistribution and use in source and binary forms, with or without modification, are * permitted provided that the following conditions are met: @@ -27,9 +27,9 @@ SLJIT_API_FUNC_ATTRIBUTE SLJIT_CONST char* sljit_get_platform_name() { #if (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7) - return "arm-v7"; + return "ARMv7" SLJIT_CPUINFO; #elif (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) - return "arm-v5"; + return "ARMv5" SLJIT_CPUINFO; #else #error "Internal error: Unknown ARM architecture" #endif @@ -54,7 +54,7 @@ SLJIT_API_FUNC_ATTRIBUTE SLJIT_CONST char* sljit_get_platform_name() #define MAX_DIFFERENCE(max_diff) \ (((max_diff) / (int)sizeof(sljit_uw)) - (CONST_POOL_ALIGNMENT - 1)) -/* See sljit_emit_enter if you want to change them. */ +/* See sljit_emit_enter and sljit_emit_op0 if you want to change them. */ static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 5] = { 0, 0, 1, 2, 10, 11, 4, 5, 6, 7, 8, 13, 3, 12, 14, 15 }; @@ -84,7 +84,7 @@ static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 5] = { #define BX 0xe12fff10 #define CLZ 0xe16f0f10 #define CMP_DP 0xa -#define DEBUGGER 0xe1200070 +#define BKPT 0xe1200070 #define EOR_DP 0x1 #define MOV_DP 0xd #define MUL 0xe0000090 @@ -98,6 +98,7 @@ static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 5] = { #define SBC_DP 0x6 #define SMULL 0xe0c00090 #define SUB_DP 0x2 +#define UMULL 0xe0800090 #define VABS_F64 0xeeb00bc0 #define VADD_F64 0xee300b00 #define VCMP_F64 0xeeb40b40 @@ -819,38 +820,38 @@ static int emit_op(struct sljit_compiler *compiler, int op, int inp_flags, int src1, sljit_w src1w, int src2, sljit_w src2w); -SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_enter(struct sljit_compiler *compiler, int args, int temporaries, int generals, int local_size) +SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_enter(struct sljit_compiler *compiler, int args, int temporaries, int saveds, int local_size) { int size; sljit_uw push; CHECK_ERROR(); - check_sljit_emit_enter(compiler, args, temporaries, generals, local_size); + check_sljit_emit_enter(compiler, args, temporaries, saveds, local_size); compiler->temporaries = temporaries; - compiler->generals = generals; + compiler->saveds = saveds; - /* Push general registers, temporary registers + /* Push saved registers, temporary registers stmdb sp!, {..., lr} */ push = PUSH | (1 << 14); if (temporaries >= 5) push |= 1 << 11; if (temporaries >= 4) push |= 1 << 10; - if (generals >= 5) + if (saveds >= 5) push |= 1 << 8; - if (generals >= 4) + if (saveds >= 4) push |= 1 << 7; - if (generals >= 3) + if (saveds >= 3) push |= 1 << 6; - if (generals >= 2) + if (saveds >= 2) push |= 1 << 5; - if (generals >= 1) + if (saveds >= 1) push |= 1 << 4; EMIT_INSTRUCTION(push); /* Stack must be aligned to 8 bytes: */ - size = (1 + generals) * sizeof(sljit_uw); + size = (1 + saveds) * sizeof(sljit_uw); if (temporaries >= 4) size += (temporaries - 3) * sizeof(sljit_uw); local_size += size; @@ -861,26 +862,26 @@ SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_enter(struct sljit_compiler *compiler, i FAIL_IF(emit_op(compiler, SLJIT_SUB, ALLOW_IMM, SLJIT_LOCALS_REG, 0, SLJIT_LOCALS_REG, 0, SLJIT_IMM, local_size)); if (args >= 1) - EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(MOV_DP, 0, SLJIT_GENERAL_REG1, SLJIT_UNUSED, RM(SLJIT_TEMPORARY_REG1))); + EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(MOV_DP, 0, SLJIT_SAVED_REG1, SLJIT_UNUSED, RM(SLJIT_TEMPORARY_REG1))); if (args >= 2) - EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(MOV_DP, 0, SLJIT_GENERAL_REG2, SLJIT_UNUSED, RM(SLJIT_TEMPORARY_REG2))); + EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(MOV_DP, 0, SLJIT_SAVED_REG2, SLJIT_UNUSED, RM(SLJIT_TEMPORARY_REG2))); if (args >= 3) - EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(MOV_DP, 0, SLJIT_GENERAL_REG3, SLJIT_UNUSED, RM(SLJIT_TEMPORARY_REG3))); + EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(MOV_DP, 0, SLJIT_SAVED_REG3, SLJIT_UNUSED, RM(SLJIT_TEMPORARY_REG3))); return SLJIT_SUCCESS; } -SLJIT_API_FUNC_ATTRIBUTE void sljit_fake_enter(struct sljit_compiler *compiler, int args, int temporaries, int generals, int local_size) +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_context(struct sljit_compiler *compiler, int args, int temporaries, int saveds, int local_size) { int size; CHECK_ERROR_VOID(); - check_sljit_fake_enter(compiler, args, temporaries, generals, local_size); + check_sljit_set_context(compiler, args, temporaries, saveds, local_size); compiler->temporaries = temporaries; - compiler->generals = generals; + compiler->saveds = saveds; - size = (1 + generals) * sizeof(sljit_uw); + size = (1 + saveds) * sizeof(sljit_uw); if (temporaries >= 4) size += (temporaries - 3) * sizeof(sljit_uw); local_size += size; @@ -889,35 +890,34 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_fake_enter(struct sljit_compiler *compiler, compiler->local_size = local_size; } -SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_return(struct sljit_compiler *compiler, int src, sljit_w srcw) +SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_return(struct sljit_compiler *compiler, int op, int src, sljit_w srcw) { sljit_uw pop; CHECK_ERROR(); - check_sljit_emit_return(compiler, src, srcw); + check_sljit_emit_return(compiler, op, src, srcw); - if (src != SLJIT_UNUSED && src != SLJIT_RETURN_REG) - FAIL_IF(emit_op(compiler, SLJIT_MOV, ALLOW_ANY_IMM, SLJIT_RETURN_REG, 0, TMP_REG1, 0, src, srcw)); + FAIL_IF(emit_mov_before_return(compiler, op, src, srcw)); if (compiler->local_size > 0) FAIL_IF(emit_op(compiler, SLJIT_ADD, ALLOW_IMM, SLJIT_LOCALS_REG, 0, SLJIT_LOCALS_REG, 0, SLJIT_IMM, compiler->local_size)); pop = POP | (1 << 15); - /* Push general registers, temporary registers + /* Push saved registers, temporary registers ldmia sp!, {..., pc} */ if (compiler->temporaries >= 5) pop |= 1 << 11; if (compiler->temporaries >= 4) pop |= 1 << 10; - if (compiler->generals >= 5) + if (compiler->saveds >= 5) pop |= 1 << 8; - if (compiler->generals >= 4) + if (compiler->saveds >= 4) pop |= 1 << 7; - if (compiler->generals >= 3) + if (compiler->saveds >= 3) pop |= 1 << 6; - if (compiler->generals >= 2) + if (compiler->saveds >= 2) pop |= 1 << 5; - if (compiler->generals >= 1) + if (compiler->saveds >= 1) pop |= 1 << 4; return push_inst(compiler, pop); @@ -992,7 +992,9 @@ static sljit_w data_transfer_insts[16] = { if (compiler->shift_imm != 0x20) { \ SLJIT_ASSERT(src1 == TMP_REG1); \ SLJIT_ASSERT(!(flags & ARGS_SWAPPED)); \ - return push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, flags & SET_FLAGS, dst, SLJIT_UNUSED, (compiler->shift_imm << 7) | (opcode << 5) | reg_map[src2])); \ + if (compiler->shift_imm != 0) \ + return push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, flags & SET_FLAGS, dst, SLJIT_UNUSED, (compiler->shift_imm << 7) | (opcode << 5) | reg_map[src2])); \ + return push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, flags & SET_FLAGS, dst, SLJIT_UNUSED, reg_map[src2])); \ } \ return push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, flags & SET_FLAGS, dst, SLJIT_UNUSED, (reg_map[(flags & ARGS_SWAPPED) ? src1 : src2] << 8) | (opcode << 5) | 0x10 | ((flags & ARGS_SWAPPED) ? reg_map[src2] : reg_map[src1]))); @@ -1755,6 +1757,21 @@ static int emit_op(struct sljit_compiler *compiler, int op, int inp_flags, return SLJIT_SUCCESS; } +#ifdef __cplusplus +extern "C" { +#endif + +#if defined(__GNUC__) +extern unsigned int __aeabi_uidivmod(unsigned numerator, unsigned denominator); +extern unsigned int __aeabi_idivmod(unsigned numerator, unsigned denominator); +#else +#error "Software divmod functions are needed" +#endif + +#ifdef __cplusplus +} +#endif + SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_op0(struct sljit_compiler *compiler, int op) { CHECK_ERROR(); @@ -1763,11 +1780,40 @@ SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_op0(struct sljit_compiler *compiler, int op = GET_OPCODE(op); switch (op) { case SLJIT_BREAKPOINT: - EMIT_INSTRUCTION(DEBUGGER); + EMIT_INSTRUCTION(BKPT); break; case SLJIT_NOP: EMIT_INSTRUCTION(NOP); break; + case SLJIT_UMUL: + case SLJIT_SMUL: +#if (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7) + return push_inst(compiler, (op == SLJIT_UMUL ? UMULL : SMULL) + | (reg_map[SLJIT_TEMPORARY_REG2] << 16) + | (reg_map[SLJIT_TEMPORARY_REG1] << 12) + | (reg_map[SLJIT_TEMPORARY_REG1] << 8) + | reg_map[SLJIT_TEMPORARY_REG2]); +#else + EMIT_INSTRUCTION(EMIT_DATA_PROCESS_INS(MOV_DP, 0, TMP_REG1, SLJIT_UNUSED, RM(SLJIT_TEMPORARY_REG2))); + return push_inst(compiler, (op == SLJIT_UMUL ? UMULL : SMULL) + | (reg_map[SLJIT_TEMPORARY_REG2] << 16) + | (reg_map[SLJIT_TEMPORARY_REG1] << 12) + | (reg_map[SLJIT_TEMPORARY_REG1] << 8) + | reg_map[TMP_REG1]); +#endif + case SLJIT_UDIV: + case SLJIT_SDIV: + if (compiler->temporaries >= 3) + EMIT_INSTRUCTION(0xe52d2008 /* str r2, [sp, #-8]! */); +#if defined(__GNUC__) + FAIL_IF(sljit_emit_ijump(compiler, SLJIT_FAST_CALL, SLJIT_IMM, + (op == SLJIT_UDIV ? SLJIT_FUNC_OFFSET(__aeabi_uidivmod) : SLJIT_FUNC_OFFSET(__aeabi_idivmod)))); +#else +#error "Software divmod functions are needed" +#endif + if (compiler->temporaries >= 3) + return push_inst(compiler, 0xe49d2008 /* ldr r2, [sp], #8 */); + return SLJIT_SUCCESS; } return SLJIT_SUCCESS; @@ -1870,6 +1916,22 @@ SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_op2(struct sljit_compiler *compiler, int return SLJIT_SUCCESS; } +SLJIT_API_FUNC_ATTRIBUTE int sljit_get_register_index(int reg) +{ + check_sljit_get_register_index(reg); + return reg_map[reg]; +} + +SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_op_custom(struct sljit_compiler *compiler, + void *instruction, int size) +{ + CHECK_ERROR(); + check_sljit_emit_op_custom(compiler, instruction, size); + SLJIT_ASSERT(size == 4); + + return push_inst(compiler, *(sljit_uw*)instruction); +} + /* --------------------------------------------------------------------- */ /* Floating point operators */ /* --------------------------------------------------------------------- */ @@ -2079,17 +2141,17 @@ SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_fop2(struct sljit_compiler *compiler, in /* Other instructions */ /* --------------------------------------------------------------------- */ -SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_fast_enter(struct sljit_compiler *compiler, int dst, sljit_w dstw, int args, int temporaries, int generals, int local_size) +SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_fast_enter(struct sljit_compiler *compiler, int dst, sljit_w dstw, int args, int temporaries, int saveds, int local_size) { int size; CHECK_ERROR(); - check_sljit_emit_fast_enter(compiler, dst, dstw, args, temporaries, generals, local_size); + check_sljit_emit_fast_enter(compiler, dst, dstw, args, temporaries, saveds, local_size); compiler->temporaries = temporaries; - compiler->generals = generals; + compiler->saveds = saveds; - size = (1 + generals) * sizeof(sljit_uw); + size = (1 + saveds) * sizeof(sljit_uw); if (temporaries >= 4) size += (temporaries - 3) * sizeof(sljit_uw); local_size += size; diff --git a/harbour/src/3rd/pcre/sjconf.h b/harbour/src/3rd/pcre/sjconf.h index b71c03e949..c832dfe60d 100644 --- a/harbour/src/3rd/pcre/sjconf.h +++ b/harbour/src/3rd/pcre/sjconf.h @@ -1,7 +1,7 @@ /* * Stack-less Just-In-Time compiler * - * Copyright 2009-2010 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. + * Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. * * Redistribution and use in source and binary forms, with or without modification, are * permitted provided that the following conditions are met: diff --git a/harbour/src/3rd/pcre/sjconfi.h b/harbour/src/3rd/pcre/sjconfi.h index ad0be19e2a..de6e9f07e2 100644 --- a/harbour/src/3rd/pcre/sjconfi.h +++ b/harbour/src/3rd/pcre/sjconfi.h @@ -1,7 +1,7 @@ /* * Stack-less Just-In-Time compiler * - * Copyright 2009-2010 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. + * Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. * * Redistribution and use in source and binary forms, with or without modification, are * permitted provided that the following conditions are met: @@ -28,21 +28,25 @@ #define _SLJIT_CONFIG_INTERNAL_H_ /* - SLJIT defines the following variables itself depending on the configuration: - sljit_b, sljit_ub : signed and unsigned 8 bit byte - sljit_h, sljit_uh : signed and unsigned 16 bit half-word (short) type - sljit_i, sljit_ui : signed and unsigned 32 bit integer type - sljit_w, sljit_uw : signed and unsigned machine word, enough to store a pointer (same as intptr_t) - SLJIT_CALL : C calling convention for both calling JIT and C callbacks from JIT + SLJIT defines the following macros depending on the target architecture: + + Feature detection (boolean) macros: SLJIT_32BIT_ARCHITECTURE : 32 bit architecture SLJIT_64BIT_ARCHITECTURE : 64 bit architecture SLJIT_WORD_SHIFT : the shift required to apply when accessing a sljit_w/sljit_uw array by index SLJIT_FLOAT_SHIFT : the shift required to apply when accessing a double array by index - SLJIT_BIG_ENDIAN : big endian architecture SLJIT_LITTLE_ENDIAN : little endian architecture - SLJIT_INDIRECT_CALL : see SLJIT_FUNC_OFFSET() - SLJIT_W : for defining 64 bit constants on 64 bit architectures (compiler workaround) - SLJIT_UNALIGNED : allows unaligned memory accesses for integer arithmetic (only!) + SLJIT_BIG_ENDIAN : big endian architecture + SLJIT_UNALIGNED : allows unaligned memory accesses for non-fpu operations (only!) + SLJIT_INDIRECT_CALL : see SLJIT_FUNC_OFFSET() for more information + + Types and useful macros: + sljit_b, sljit_ub : signed and unsigned 8 bit byte + sljit_h, sljit_uh : signed and unsigned 16 bit half-word (short) type + sljit_i, sljit_ui : signed and unsigned 32 bit integer type + sljit_w, sljit_uw : signed and unsigned machine word, enough to store a pointer (same as intptr_t) + SLJIT_CALL : C calling convention define for both calling JIT form C and C callbacks for JIT + SLJIT_W(number) : defining 64 bit constants on 64 bit architectures (compiler independent helper) */ #if !((defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) \ @@ -226,7 +230,12 @@ typedef signed int sljit_i; /* Machine word type. Can encapsulate a pointer. 32 bit for 32 bit machines. 64 bit for 64 bit machines. */ -#if !(defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) && !(defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) +#if (defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED) +/* Just to have something. */ +#define SLJIT_WORD_SHIFT 0 +typedef unsigned long int sljit_uw; +typedef long int sljit_w; +#elif !(defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) && !(defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) #define SLJIT_32BIT_ARCHITECTURE 1 #define SLJIT_WORD_SHIFT 2 typedef unsigned int sljit_uw; diff --git a/harbour/src/3rd/pcre/sjexeca.c b/harbour/src/3rd/pcre/sjexeca.c index cdea3460d6..f66744df82 100644 --- a/harbour/src/3rd/pcre/sjexeca.c +++ b/harbour/src/3rd/pcre/sjexeca.c @@ -1,7 +1,7 @@ /* * Stack-less Just-In-Time compiler * - * Copyright 2009-2010 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. + * Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. * * Redistribution and use in source and binary forms, with or without modification, are * permitted provided that the following conditions are met: @@ -263,8 +263,11 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_free_exec(void* ptr) header->prev_size = free_block->size; } + /* The whole chunk is free. */ if (SLJIT_UNLIKELY(!free_block->header.prev_size && header->size == 1)) { + /* If this block is freed, we still have (allocated_size / 2) free space. */ if (total_size - free_block->size > (allocated_size * 3 / 2)) { + total_size -= free_block->size; sljit_remove_free_block(free_block); free_chunk(free_block, free_block->size + sizeof(struct block_header)); } diff --git a/harbour/src/3rd/pcre/sjlir.c b/harbour/src/3rd/pcre/sjlir.c index 0a06395613..2b0703e8ae 100644 --- a/harbour/src/3rd/pcre/sjlir.c +++ b/harbour/src/3rd/pcre/sjlir.c @@ -1,7 +1,7 @@ /* * Stack-less Just-In-Time compiler * - * Copyright 2009-2010 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. + * Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. * * Redistribution and use in source and binary forms, with or without modification, are * permitted provided that the following conditions are met: @@ -228,7 +228,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_compiler* sljit_create_compiler(void) compiler->abuf->used_size = 0; compiler->temporaries = -1; - compiler->generals = -1; + compiler->saveds = -1; #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) compiler->args = -1; @@ -475,12 +475,12 @@ static SLJIT_INLINE void set_const(struct sljit_const *const_, struct sljit_comp #define FUNCTION_CHECK_IS_REG(r) \ ((r) == SLJIT_UNUSED || (r) == SLJIT_LOCALS_REG || \ ((r) >= SLJIT_TEMPORARY_REG1 && (r) <= SLJIT_TEMPORARY_REG3 && (r) <= SLJIT_TEMPORARY_REG1 - 1 + compiler->temporaries) || \ - ((r) >= SLJIT_GENERAL_REG1 && (r) <= SLJIT_GENERAL_REG3 && (r) <= SLJIT_GENERAL_REG1 - 1 + compiler->generals)) \ + ((r) >= SLJIT_SAVED_REG1 && (r) <= SLJIT_SAVED_REG3 && (r) <= SLJIT_SAVED_REG1 - 1 + compiler->saveds)) \ #define FUNCTION_CHECK_SRC(p, i) \ - SLJIT_ASSERT(compiler->temporaries != -1 && compiler->generals != -1); \ + SLJIT_ASSERT(compiler->temporaries != -1 && compiler->saveds != -1); \ if (((p) >= SLJIT_TEMPORARY_REG1 && (p) <= SLJIT_TEMPORARY_REG1 - 1 + compiler->temporaries) || \ - ((p) >= SLJIT_GENERAL_REG1 && (p) <= SLJIT_GENERAL_REG1 - 1 + compiler->generals) || \ + ((p) >= SLJIT_SAVED_REG1 && (p) <= SLJIT_SAVED_REG1 - 1 + compiler->saveds) || \ (p) == SLJIT_LOCALS_REG) \ SLJIT_ASSERT(i == 0); \ else if ((p) == SLJIT_IMM) \ @@ -498,9 +498,9 @@ static SLJIT_INLINE void set_const(struct sljit_const *const_, struct sljit_comp SLJIT_ASSERT_STOP(); #define FUNCTION_CHECK_DST(p, i) \ - SLJIT_ASSERT(compiler->temporaries != -1 && compiler->generals != -1); \ + SLJIT_ASSERT(compiler->temporaries != -1 && compiler->saveds != -1); \ if (((p) >= SLJIT_TEMPORARY_REG1 && (p) <= SLJIT_TEMPORARY_REG1 - 1 + compiler->temporaries) || \ - ((p) >= SLJIT_GENERAL_REG1 && (p) <= SLJIT_GENERAL_REG1 - 1 + compiler->generals) || \ + ((p) >= SLJIT_SAVED_REG1 && (p) <= SLJIT_SAVED_REG1 - 1 + compiler->saveds) || \ (p) == SLJIT_UNUSED) \ SLJIT_ASSERT(i == 0); \ else if ((p) & SLJIT_MEM) { \ @@ -551,9 +551,9 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_compiler_verbose(struct sljit_compiler *comp } static char* reg_names[] = { - (char*)"", (char*)"tmp_r1", (char*)"tmp_r2", (char*)"tmp_r3", - (char*)"tmp_er1", (char*)"tmp_er2", (char*)"gen_r1", (char*)"gen_r2", - (char*)"gen_r3", (char*)"gen_er1", (char*)"gen_er2", (char*)"stack_r" + (char*)"", (char*)"t1", (char*)"t2", (char*)"t3", + (char*)"te1", (char*)"te2", (char*)"s1", (char*)"s2", + (char*)"s3", (char*)"se1", (char*)"se2", (char*)"lcr" }; static char* freg_names[] = { @@ -616,6 +616,7 @@ static char* freg_names[] = { static SLJIT_CONST char* op_names[] = { /* op0 */ (char*)"breakpoint", (char*)"nop", + (char*)"umul", (char*)"smul", (char*)"udiv", (char*)"sdiv", /* op1 */ (char*)"mov", (char*)"mov.ub", (char*)"mov.sb", (char*)"mov.uh", (char*)"mov.sh", (char*)"mov.ui", (char*)"mov.si", (char*)"movu", @@ -673,70 +674,76 @@ static SLJIT_INLINE void check_sljit_generate_code(struct sljit_compiler *compil #endif } -static SLJIT_INLINE void check_sljit_emit_enter(struct sljit_compiler *compiler, int args, int temporaries, int generals, int local_size) +static SLJIT_INLINE void check_sljit_emit_enter(struct sljit_compiler *compiler, int args, int temporaries, int saveds, int local_size) { /* If debug and verbose are disabled, all arguments are unused. */ SLJIT_UNUSED_ARG(compiler); SLJIT_UNUSED_ARG(args); SLJIT_UNUSED_ARG(temporaries); - SLJIT_UNUSED_ARG(generals); + SLJIT_UNUSED_ARG(saveds); SLJIT_UNUSED_ARG(local_size); SLJIT_ASSERT(args >= 0 && args <= 3); SLJIT_ASSERT(temporaries >= 0 && temporaries <= SLJIT_NO_TMP_REGISTERS); - SLJIT_ASSERT(generals >= 0 && generals <= SLJIT_NO_GEN_REGISTERS); - SLJIT_ASSERT(args <= generals); + SLJIT_ASSERT(saveds >= 0 && saveds <= SLJIT_NO_GEN_REGISTERS); + SLJIT_ASSERT(args <= saveds); SLJIT_ASSERT(local_size >= 0 && local_size <= SLJIT_MAX_LOCAL_SIZE); #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) if (SLJIT_UNLIKELY(!!compiler->verbose)) - fprintf(compiler->verbose, " enter args=%d temporaries=%d generals=%d local_size=%d\n", args, temporaries, generals, local_size); + fprintf(compiler->verbose, " enter args=%d temporaries=%d saveds=%d local_size=%d\n", args, temporaries, saveds, local_size); #endif } -static SLJIT_INLINE void check_sljit_fake_enter(struct sljit_compiler *compiler, int args, int temporaries, int generals, int local_size) +static SLJIT_INLINE void check_sljit_set_context(struct sljit_compiler *compiler, int args, int temporaries, int saveds, int local_size) { /* If debug and verbose are disabled, all arguments are unused. */ SLJIT_UNUSED_ARG(compiler); SLJIT_UNUSED_ARG(args); SLJIT_UNUSED_ARG(temporaries); - SLJIT_UNUSED_ARG(generals); + SLJIT_UNUSED_ARG(saveds); SLJIT_UNUSED_ARG(local_size); SLJIT_ASSERT(args >= 0 && args <= 3); SLJIT_ASSERT(temporaries >= 0 && temporaries <= SLJIT_NO_TMP_REGISTERS); - SLJIT_ASSERT(generals >= 0 && generals <= SLJIT_NO_GEN_REGISTERS); - SLJIT_ASSERT(args <= generals); + SLJIT_ASSERT(saveds >= 0 && saveds <= SLJIT_NO_GEN_REGISTERS); + SLJIT_ASSERT(args <= saveds); SLJIT_ASSERT(local_size >= 0 && local_size <= SLJIT_MAX_LOCAL_SIZE); #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) if (SLJIT_UNLIKELY(!!compiler->verbose)) - fprintf(compiler->verbose, " fake_enter args=%d temporaries=%d generals=%d local_size=%d\n", args, temporaries, generals, local_size); + fprintf(compiler->verbose, " fake_enter args=%d temporaries=%d saveds=%d local_size=%d\n", args, temporaries, saveds, local_size); #endif } -static SLJIT_INLINE void check_sljit_emit_return(struct sljit_compiler *compiler, int src, sljit_w srcw) +static SLJIT_INLINE void check_sljit_emit_return(struct sljit_compiler *compiler, int op, int src, sljit_w srcw) { /* If debug and verbose are disabled, all arguments are unused. */ SLJIT_UNUSED_ARG(compiler); + SLJIT_UNUSED_ARG(op); SLJIT_UNUSED_ARG(src); SLJIT_UNUSED_ARG(srcw); #if (defined SLJIT_DEBUG && SLJIT_DEBUG) - if (src != SLJIT_UNUSED) { + if (op != SLJIT_UNUSED) { + SLJIT_ASSERT(op >= SLJIT_MOV && op <= SLJIT_MOV_SI); FUNCTION_CHECK_SRC(src, srcw); } else - SLJIT_ASSERT(srcw == 0); + SLJIT_ASSERT(src == 0 && srcw == 0); #endif #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) if (SLJIT_UNLIKELY(!!compiler->verbose)) { - fprintf(compiler->verbose, " return "); - sljit_verbose_param(src, srcw); - fprintf(compiler->verbose, "\n"); + if (op == SLJIT_UNUSED) + fprintf(compiler->verbose, " return\n"); + else { + fprintf(compiler->verbose, " return %s ", op_names[op]); + sljit_verbose_param(src, srcw); + fprintf(compiler->verbose, "\n"); + } } #endif } -static SLJIT_INLINE void check_sljit_emit_fast_enter(struct sljit_compiler *compiler, int dst, sljit_w dstw, int args, int temporaries, int generals, int local_size) +static SLJIT_INLINE void check_sljit_emit_fast_enter(struct sljit_compiler *compiler, int dst, sljit_w dstw, int args, int temporaries, int saveds, int local_size) { /* If debug and verbose are disabled, all arguments are unused. */ SLJIT_UNUSED_ARG(compiler); @@ -744,26 +751,26 @@ static SLJIT_INLINE void check_sljit_emit_fast_enter(struct sljit_compiler *comp SLJIT_UNUSED_ARG(dstw); SLJIT_UNUSED_ARG(args); SLJIT_UNUSED_ARG(temporaries); - SLJIT_UNUSED_ARG(generals); + SLJIT_UNUSED_ARG(saveds); SLJIT_UNUSED_ARG(local_size); SLJIT_ASSERT(args >= 0 && args <= 3); SLJIT_ASSERT(temporaries >= 0 && temporaries <= SLJIT_NO_TMP_REGISTERS); - SLJIT_ASSERT(generals >= 0 && generals <= SLJIT_NO_GEN_REGISTERS); - SLJIT_ASSERT(args <= generals); + SLJIT_ASSERT(saveds >= 0 && saveds <= SLJIT_NO_GEN_REGISTERS); + SLJIT_ASSERT(args <= saveds); SLJIT_ASSERT(local_size >= 0 && local_size <= SLJIT_MAX_LOCAL_SIZE); #if (defined SLJIT_DEBUG && SLJIT_DEBUG) compiler->temporaries = temporaries; - compiler->generals = generals; + compiler->saveds = saveds; FUNCTION_CHECK_DST(dst, dstw); compiler->temporaries = -1; - compiler->generals = -1; + compiler->saveds = -1; #endif #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) if (SLJIT_UNLIKELY(!!compiler->verbose)) { fprintf(compiler->verbose, " fast_enter "); sljit_verbose_param(dst, dstw); - fprintf(compiler->verbose, " args=%d temporaries=%d generals=%d local_size=%d\n", args, temporaries, generals, local_size); + fprintf(compiler->verbose, " args=%d temporaries=%d saveds=%d local_size=%d\n", args, temporaries, saveds, local_size); } #endif } @@ -793,10 +800,11 @@ static SLJIT_INLINE void check_sljit_emit_op0(struct sljit_compiler *compiler, i SLJIT_UNUSED_ARG(compiler); SLJIT_UNUSED_ARG(op); - SLJIT_ASSERT(op >= SLJIT_BREAKPOINT && op <= SLJIT_NOP); + SLJIT_ASSERT((op >= SLJIT_BREAKPOINT && op <= SLJIT_SMUL) + || ((op & ~SLJIT_INT_OP) >= SLJIT_UDIV && (op & ~SLJIT_INT_OP) <= SLJIT_SDIV)); #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) if (SLJIT_UNLIKELY(!!compiler->verbose)) - fprintf(compiler->verbose, " %s\n", op_names[op]); + fprintf(compiler->verbose, " %s%s\n", !(op & SLJIT_INT_OP) ? "" : "i", op_names[GET_OPCODE(op)]); #endif } @@ -812,6 +820,13 @@ static SLJIT_INLINE void check_sljit_emit_op1(struct sljit_compiler *compiler, i SLJIT_UNUSED_ARG(src); SLJIT_UNUSED_ARG(srcw); +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) || (defined SLJIT_DEBUG && SLJIT_DEBUG) + if (SLJIT_UNLIKELY(compiler->skip_checks)) { + compiler->skip_checks = 0; + return; + } +#endif + SLJIT_ASSERT(GET_OPCODE(op) >= SLJIT_MOV && GET_OPCODE(op) <= SLJIT_CLZ); #if (defined SLJIT_DEBUG && SLJIT_DEBUG) FUNCTION_CHECK_OP(); @@ -874,6 +889,21 @@ static SLJIT_INLINE void check_sljit_emit_op2(struct sljit_compiler *compiler, i #endif } +static SLJIT_INLINE void check_sljit_get_register_index(int reg) +{ + SLJIT_UNUSED_ARG(reg); + SLJIT_ASSERT(reg > 0 && reg <= SLJIT_NO_REGISTERS); +} + +static SLJIT_INLINE void check_sljit_emit_op_custom(struct sljit_compiler *compiler, + void *instruction, int size) +{ + SLJIT_UNUSED_ARG(compiler); + SLJIT_UNUSED_ARG(instruction); + SLJIT_UNUSED_ARG(size); + SLJIT_ASSERT(instruction); +} + static SLJIT_INLINE void check_sljit_emit_fop1(struct sljit_compiler *compiler, int op, int dst, sljit_w dstw, int src, sljit_w srcw) @@ -886,6 +916,13 @@ static SLJIT_INLINE void check_sljit_emit_fop1(struct sljit_compiler *compiler, SLJIT_UNUSED_ARG(src); SLJIT_UNUSED_ARG(srcw); +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) || (defined SLJIT_DEBUG && SLJIT_DEBUG) + if (SLJIT_UNLIKELY(compiler->skip_checks)) { + compiler->skip_checks = 0; + return; + } +#endif + SLJIT_ASSERT(sljit_is_fpu_available()); SLJIT_ASSERT(GET_OPCODE(op) >= SLJIT_FCMP && GET_OPCODE(op) <= SLJIT_FABS); #if (defined SLJIT_DEBUG && SLJIT_DEBUG) @@ -1001,6 +1038,35 @@ static SLJIT_INLINE void check_sljit_emit_cmp(struct sljit_compiler *compiler, i #endif } +static SLJIT_INLINE void check_sljit_emit_fcmp(struct sljit_compiler *compiler, int type, + int src1, sljit_w src1w, + int src2, sljit_w src2w) +{ + SLJIT_UNUSED_ARG(compiler); + SLJIT_UNUSED_ARG(type); + SLJIT_UNUSED_ARG(src1); + SLJIT_UNUSED_ARG(src1w); + SLJIT_UNUSED_ARG(src2); + SLJIT_UNUSED_ARG(src2w); + + SLJIT_ASSERT(sljit_is_fpu_available()); + SLJIT_ASSERT(!(type & ~(0xff | SLJIT_REWRITABLE_JUMP))); + SLJIT_ASSERT((type & 0xff) >= SLJIT_C_FLOAT_EQUAL && (type & 0xff) <= SLJIT_C_FLOAT_NOT_NAN); +#if (defined SLJIT_DEBUG && SLJIT_DEBUG) + FUNCTION_FCHECK(src1, src1w); + FUNCTION_FCHECK(src2, src2w); +#endif +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + if (SLJIT_UNLIKELY(!!compiler->verbose)) { + fprintf(compiler->verbose, " fcmp%s <%s> ", !(type & SLJIT_REWRITABLE_JUMP) ? "" : "R", jump_names[type & 0xff]); + sljit_verbose_fparam(src1, src1w); + fprintf(compiler->verbose, ", "); + sljit_verbose_fparam(src2, src2w); + fprintf(compiler->verbose, "\n"); + } +#endif +} + static SLJIT_INLINE void check_sljit_emit_ijump(struct sljit_compiler *compiler, int type, int src, sljit_w srcw) { /* If debug and verbose are disabled, all arguments are unused. */ @@ -1067,6 +1133,52 @@ static SLJIT_INLINE void check_sljit_emit_const(struct sljit_compiler *compiler, #endif } +static SLJIT_INLINE int emit_mov_before_return(struct sljit_compiler *compiler, int op, int src, sljit_w srcw) +{ + /* Return if don't need to do anything. */ + if (op == SLJIT_UNUSED) + return SLJIT_SUCCESS; + +#if (defined SLJIT_64BIT_ARCHITECTURE && SLJIT_64BIT_ARCHITECTURE) + if (src == SLJIT_RETURN_REG && op == SLJIT_MOV) + return SLJIT_SUCCESS; +#else + if (src == SLJIT_RETURN_REG && (op == SLJIT_MOV || op == SLJIT_MOV_UI || op == SLJIT_MOV_SI)) + return SLJIT_SUCCESS; +#endif + +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) || (defined SLJIT_DEBUG && SLJIT_DEBUG) + compiler->skip_checks = 1; +#endif + return sljit_emit_op1(compiler, op, SLJIT_RETURN_REG, 0, src, srcw); +} + +/* CPU description section */ + +#if (defined SLJIT_32BIT_ARCHITECTURE && SLJIT_32BIT_ARCHITECTURE) +#define SLJIT_CPUINFO_PART1 " 32bit (" +#elif (defined SLJIT_64BIT_ARCHITECTURE && SLJIT_64BIT_ARCHITECTURE) +#define SLJIT_CPUINFO_PART1 " 64bit (" +#else +#error "Internal error: CPU type info missing" +#endif + +#if (defined SLJIT_LITTLE_ENDIAN && SLJIT_LITTLE_ENDIAN) +#define SLJIT_CPUINFO_PART2 "little endian + " +#elif (defined SLJIT_BIG_ENDIAN && SLJIT_BIG_ENDIAN) +#define SLJIT_CPUINFO_PART2 "big endian + " +#else +#error "Internal error: CPU type info missing" +#endif + +#if (defined SLJIT_UNALIGNED && SLJIT_UNALIGNED) +#define SLJIT_CPUINFO_PART3 "unaligned)" +#else +#define SLJIT_CPUINFO_PART3 "aligned)" +#endif + +#define SLJIT_CPUINFO SLJIT_CPUINFO_PART1 SLJIT_CPUINFO_PART2 SLJIT_CPUINFO_PART3 + #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) #include "sjx86c.c" #elif (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) @@ -1086,6 +1198,7 @@ static SLJIT_INLINE void check_sljit_emit_const(struct sljit_compiler *compiler, #endif #if !(defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) + SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_cmp(struct sljit_compiler *compiler, int type, int src1, sljit_w src1w, int src2, sljit_w src2w) @@ -1152,6 +1265,32 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_cmp(struct sljit_compiler #endif return sljit_emit_jump(compiler, condition | (type & SLJIT_REWRITABLE_JUMP)); } + +SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_fcmp(struct sljit_compiler *compiler, int type, + int src1, sljit_w src1w, + int src2, sljit_w src2w) +{ + int flags, condition; + + check_sljit_emit_fcmp(compiler, type, src1, src1w, src2, src2w); + + condition = type & 0xff; + if (condition <= SLJIT_C_FLOAT_NOT_EQUAL) + flags = SLJIT_SET_E; + else + flags = SLJIT_SET_S; + +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) || (defined SLJIT_DEBUG && SLJIT_DEBUG) + compiler->skip_checks = 1; +#endif + sljit_emit_fop1(compiler, SLJIT_FCMP | flags, src1, src1w, src2, src2w); + +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) || (defined SLJIT_DEBUG && SLJIT_DEBUG) + compiler->skip_checks = 1; +#endif + return sljit_emit_jump(compiler, condition | (type & SLJIT_REWRITABLE_JUMP)); +} + #endif #else /* SLJIT_CONFIG_UNSUPPORTED */ @@ -1205,44 +1344,45 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_free_code(void* code) SLJIT_ASSERT_STOP(); } -SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_enter(struct sljit_compiler *compiler, int args, int temporaries, int generals, int local_size) +SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_enter(struct sljit_compiler *compiler, int args, int temporaries, int saveds, int local_size) { SLJIT_UNUSED_ARG(compiler); SLJIT_UNUSED_ARG(args); SLJIT_UNUSED_ARG(temporaries); - SLJIT_UNUSED_ARG(generals); + SLJIT_UNUSED_ARG(saveds); SLJIT_UNUSED_ARG(local_size); SLJIT_ASSERT_STOP(); return SLJIT_ERR_UNSUPPORTED; } -SLJIT_API_FUNC_ATTRIBUTE void sljit_fake_enter(struct sljit_compiler *compiler, int args, int temporaries, int generals, int local_size) +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_context(struct sljit_compiler *compiler, int args, int temporaries, int saveds, int local_size) { SLJIT_UNUSED_ARG(compiler); SLJIT_UNUSED_ARG(args); SLJIT_UNUSED_ARG(temporaries); - SLJIT_UNUSED_ARG(generals); + SLJIT_UNUSED_ARG(saveds); SLJIT_UNUSED_ARG(local_size); SLJIT_ASSERT_STOP(); } -SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_return(struct sljit_compiler *compiler, int src, sljit_w srcw) +SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_return(struct sljit_compiler *compiler, int op, int src, sljit_w srcw) { SLJIT_UNUSED_ARG(compiler); + SLJIT_UNUSED_ARG(op); SLJIT_UNUSED_ARG(src); SLJIT_UNUSED_ARG(srcw); SLJIT_ASSERT_STOP(); return SLJIT_ERR_UNSUPPORTED; } -SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_fast_enter(struct sljit_compiler *compiler, int dst, sljit_w dstw, int args, int temporaries, int generals, int local_size) +SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_fast_enter(struct sljit_compiler *compiler, int dst, sljit_w dstw, int args, int temporaries, int saveds, int local_size) { SLJIT_UNUSED_ARG(compiler); SLJIT_UNUSED_ARG(dst); SLJIT_UNUSED_ARG(dstw); SLJIT_UNUSED_ARG(args); SLJIT_UNUSED_ARG(temporaries); - SLJIT_UNUSED_ARG(generals); + SLJIT_UNUSED_ARG(saveds); SLJIT_UNUSED_ARG(local_size); SLJIT_ASSERT_STOP(); return SLJIT_ERR_UNSUPPORTED; @@ -1296,6 +1436,22 @@ SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_op2(struct sljit_compiler *compiler, int return SLJIT_ERR_UNSUPPORTED; } +SLJIT_API_FUNC_ATTRIBUTE int sljit_get_register_index(int reg) +{ + SLJIT_ASSERT_STOP(); + return reg; +} + +SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_op_custom(struct sljit_compiler *compiler, + void *instruction, int size) +{ + SLJIT_UNUSED_ARG(compiler); + SLJIT_UNUSED_ARG(instruction); + SLJIT_UNUSED_ARG(size); + SLJIT_ASSERT_STOP(); + return SLJIT_ERR_UNSUPPORTED; +} + SLJIT_API_FUNC_ATTRIBUTE int sljit_is_fpu_available(void) { SLJIT_ASSERT_STOP(); @@ -1362,6 +1518,20 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_cmp(struct sljit_compiler return NULL; } +SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_fcmp(struct sljit_compiler *compiler, int type, + int src1, sljit_w src1w, + int src2, sljit_w src2w) +{ + SLJIT_UNUSED_ARG(compiler); + SLJIT_UNUSED_ARG(type); + SLJIT_UNUSED_ARG(src1); + SLJIT_UNUSED_ARG(src1w); + SLJIT_UNUSED_ARG(src2); + SLJIT_UNUSED_ARG(src2w); + SLJIT_ASSERT_STOP(); + return NULL; +} + SLJIT_API_FUNC_ATTRIBUTE void sljit_set_label(struct sljit_jump *jump, struct sljit_label* label) { SLJIT_UNUSED_ARG(jump); diff --git a/harbour/src/3rd/pcre/sjlir.h b/harbour/src/3rd/pcre/sjlir.h index c6595a6fe1..104afc05e9 100644 --- a/harbour/src/3rd/pcre/sjlir.h +++ b/harbour/src/3rd/pcre/sjlir.h @@ -1,7 +1,7 @@ /* * Stack-less Just-In-Time compiler * - * Copyright 2009-2010 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. + * Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. * * Redistribution and use in source and binary forms, with or without modification, are * permitted provided that the following conditions are met: @@ -44,10 +44,10 @@ - The compiler is thread-safe Disadvantages: - Limited number of registers (only 6+4 integer registers, max 3+2 - temporary and max 3+2 general, and 4 floating point registers) + temporary, max 3+2 saved and 4 floating point registers) In practice: - This approach is very effective for interpreters - - One of the general registers typically points to a stack interface + - One of the saved registers typically points to a stack interface - It can jump to any exception handler anytime (even for another function. It is safe for SLJIT.) - Fast paths can be modified during runtime reflecting the changes @@ -64,6 +64,11 @@ #if !(defined SLJIT_NO_DEFAULT_CONFIG && SLJIT_NO_DEFAULT_CONFIG) #include "sjconf.h" #endif + +/* The following header file defines useful macros for fine tuning +sljit based code generators. They are listed in the begining +of sljitConfigInternal.h */ + #include "sjconfi.h" /* --------------------------------------------------------------------- */ @@ -99,14 +104,14 @@ #define SLJIT_TEMPORARY_EREG1 4 #define SLJIT_TEMPORARY_EREG2 5 -/* General (saved) registers preserve their values across function calls. */ -#define SLJIT_GENERAL_REG1 6 -#define SLJIT_GENERAL_REG2 7 -#define SLJIT_GENERAL_REG3 8 +/* Saved registers whose preserve their values across function calls. */ +#define SLJIT_SAVED_REG1 6 +#define SLJIT_SAVED_REG2 7 +#define SLJIT_SAVED_REG3 8 /* Note: Extra Registers cannot be used for memory addressing. */ /* Note: on x86-32, these registers are emulated (using stack loads & stores). */ -#define SLJIT_GENERAL_EREG1 9 -#define SLJIT_GENERAL_EREG2 10 +#define SLJIT_SAVED_EREG1 9 +#define SLJIT_SAVED_EREG2 10 /* Read-only register (cannot be the destination of an operation). */ /* Note: SLJIT_MEM2( ... , SLJIT_LOCALS_REG) is not supported (x86 limitation). */ @@ -122,9 +127,11 @@ #define SLJIT_RETURN_REG SLJIT_TEMPORARY_REG1 -/* x86 prefers temporary registers for special purposes. If other - registers are used such purpose, it costs a little performance - drawback. It doesn't matter for other archs. */ +/* x86 prefers specific registers for special purposes. In case of shift + by register it supports only SLJIT_TEMPORARY_REG3 for shift argument + (which is the src2 argument of sljit_emit_op2). If another register is + used, sljit must exchange data between registers which cause a minor + slowdown. Other architectures has no such limitation. */ #define SLJIT_PREF_SHIFT_REG SLJIT_TEMPORARY_REG3 @@ -189,8 +196,8 @@ struct sljit_compiler { /* Used local registers. */ int temporaries; - /* Used general registers. */ - int generals; + /* Used saved registers. */ + int saveds; /* Local stack size. */ int local_size; /* Code size. */ @@ -201,7 +208,7 @@ struct sljit_compiler { #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) int args; int temporaries_start; - int generals_start; + int saveds_start; #endif #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) @@ -221,7 +228,7 @@ struct sljit_compiler { sljit_ub *cpool_unique; sljit_uw cpool_diff; sljit_uw cpool_fill; - /* General fields. */ + /* Other members. */ /* Contains pointer, "ldr pc, [...]" pairs. */ sljit_uw patches; #endif @@ -305,35 +312,56 @@ static SLJIT_INLINE sljit_uw sljit_get_generated_code_size(struct sljit_compiler /* Instruction generation. Returns with error code. */ /* - Entry instruction. The instruction has "args" number of arguments - and will use the first "general" number of general registers. - The arguments are passed into the general registers (arg1 to general_reg1, and so on). - Thus, "args" must be less or equal than "general". A local_size extra - stack space is allocated for the jit code (must be less or equal than - SLJIT_MAX_LOCAL_SIZE), which can accessed through SLJIT_LOCALS_REG (see - the notes there). SLJIT_LOCALS_REG is not necessary the real stack pointer! - It just points somewhere in the stack if local_size > 0 (!). Thus, the only - thing which is known that the memory area between SLJIT_LOCALS_REG and - SLJIT_LOCALS_REG + local_size is a valid stack area if local_size > 0 -*/ + The executable code is basically a function call from the viewpoint of + the C language. The function calls must obey to the ABI (Application + Binary Interface) of the platform, which specify the purpose of machine + registers and stack handling among other things. The sljit_emit_enter + function emits the necessary instructions for setting up a new context + for the executable code and moves function arguments to the saved + registers. The number of arguments are specified in the "args" + parameter and the first argument goes to SLJIT_SAVED_REG1, the second + goes to SLJIT_SAVED_REG2 and so on. The number of temporary and + saved registers are passed in "temporaries" and "saveds" arguments + respectively. Since the saved registers contains the arguments, + "args" must be less or equal than "saveds". The sljit_emit_enter + is also capable of allocating a stack space for local variables. The + "local_size" argument contains the size in bytes of this local area + and its staring address is stored in SLJIT_LOCALS_REG. However + the SLJIT_LOCALS_REG is not necessary the machine stack pointer. + The memory bytes between SLJIT_LOCALS_REG (inclusive) and + SLJIT_LOCALS_REG + local_size (exclusive) can be modified freely + until the function returns. The stack space is uninitialized. -/* Note: multiple calls of this function overwrites the previous call. */ + Note: every call of sljit_emit_enter and sljit_set_context overwrites + the previous context. */ #define SLJIT_MAX_LOCAL_SIZE 65536 -SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_enter(struct sljit_compiler *compiler, int args, int temporaries, int generals, int local_size); +SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_enter(struct sljit_compiler *compiler, + int args, int temporaries, int saveds, int local_size); -/* Since sljit_emit_return (and many asserts) uses variables which are initialized - by sljit_emit_enter, a simple return is not possible if these variables are not - initialized. sljit_fake_enter does not emit any instruction, just initialize - those variables. */ +/* The machine code has a context (which contains the local stack space size, + number of used registers, etc.) which initialized by sljit_emit_enter. Several + functions (like sljit_emit_return) requres this context to be able to generate + the appropriate code. However, some code fragments (like inline cache) may have + no normal entry point so their context is unknown for the compiler. Using the + function below we can specify thir context. + + Note: every call of sljit_emit_enter and sljit_set_context overwrites + the previous context. */ /* Note: multiple calls of this function overwrites the previous call. */ -SLJIT_API_FUNC_ATTRIBUTE void sljit_fake_enter(struct sljit_compiler *compiler, int args, int temporaries, int generals, int local_size); +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_context(struct sljit_compiler *compiler, + int args, int temporaries, int saveds, int local_size); -/* Return from jit. See below the possible values for src and srcw. */ -SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_return(struct sljit_compiler *compiler, int src, sljit_w srcw); +/* Return from machine code. The op argument can be SLJIT_UNUSED which means the + function does not return with anything or any opcode between SLJIT_MOV and + SLJIT_MOV_SI (see sljit_emit_op1). As for src and srcw they must be 0 if op + is SLJIT_UNUSED, otherwise see below the description about source and + destination arguments. */ +SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_return(struct sljit_compiler *compiler, int op, + int src, sljit_w srcw); /* Really fast calling method for utility functions inside sljit (see SLJIT_FAST_CALL). All registers and even the stack frame is passed to the callee. The return address is @@ -341,7 +369,7 @@ SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_return(struct sljit_compiler *compiler, use this as a return value later. */ /* Note: only for sljit specific, non ABI compilant calls. Fast, since only a few machine instructions - are needed. Excellent for small uility functions, where saving general registers and setting up + are needed. Excellent for small uility functions, where saving registers and setting up a new stack frame would cost too much performance. However, it is still possible to return to the address of the caller (or anywhere else). */ @@ -350,7 +378,7 @@ SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_return(struct sljit_compiler *compiler, /* Note: although sljit_emit_fast_return could be replaced by an ijump, it is not suggested, since many architectures do clever branch prediction on call / return instruction pairs. */ -SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_fast_enter(struct sljit_compiler *compiler, int dst, sljit_w dstw, int args, int temporaries, int generals, int local_size); +SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_fast_enter(struct sljit_compiler *compiler, int dst, sljit_w dstw, int args, int temporaries, int saveds, int local_size); SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_fast_return(struct sljit_compiler *compiler, int src, sljit_w srcw); /* @@ -365,15 +393,16 @@ SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_fast_return(struct sljit_compiler *compi */ /* - IMPORATNT NOTE: memory access MUST be naturally aligned. + IMPORATNT NOTE: memory access MUST be naturally aligned except + SLJIT_UNALIGNED macro is defined and its value is 1. + length | alignment ---------+----------- byte | 1 byte (not aligned) half | 2 byte (real_address & 0x1 == 0) int | 4 byte (real_address & 0x3 == 0) - sljit_w | 4 byte if SLJIT_32BIT_ARCHITECTURE defined - | 8 byte if SLJIT_64BIT_ARCHITECTURE defined - (This is a strict requirement for embedded systems.) + sljit_w | 4 byte if SLJIT_32BIT_ARCHITECTURE is defined and its value is 1 + | 8 byte if SLJIT_64BIT_ARCHITECTURE is defined and its value is 1 Note: different architectures have different addressing limitations Thus sljit may generate several instructions for other addressing modes @@ -445,6 +474,24 @@ SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_fast_return(struct sljit_compiler *compi Note: may or may not cause an extra cycle wait it can even decrease the runtime in a few cases. */ #define SLJIT_NOP 1 +/* Flags: may destroy flags + Unsigned multiplication of SLJIT_TEMPORARY_REG1 and SLJIT_TEMPORARY_REG2. + Result goes to SLJIT_TEMPORARY_REG2:SLJIT_TEMPORARY_REG1 (high:low) word */ +#define SLJIT_UMUL 2 +/* Flags: may destroy flags + Signed multiplication of SLJIT_TEMPORARY_REG1 and SLJIT_TEMPORARY_REG2. + Result goes to SLJIT_TEMPORARY_REG2:SLJIT_TEMPORARY_REG1 (high:low) word */ +#define SLJIT_SMUL 3 +/* Flags: I | may destroy flags + Unsigned divide of the value in SLJIT_TEMPORARY_REG1 by the value in SLJIT_TEMPORARY_REG2. + The result is placed in SLJIT_TEMPORARY_REG1 and the remainder goes to SLJIT_TEMPORARY_REG2. + Note: if SLJIT_TEMPORARY_REG2 contains 0, the behaviour is undefined. */ +#define SLJIT_UDIV 4 +/* Flags: I | may destroy flags + Signed divide of the value in SLJIT_TEMPORARY_REG1 by the value in SLJIT_TEMPORARY_REG2. + The result is placed in SLJIT_TEMPORARY_REG1 and the remainder goes to SLJIT_TEMPORARY_REG2. + Note: if SLJIT_TEMPORARY_REG2 contains 0, the behaviour is undefined. */ +#define SLJIT_SDIV 5 SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_op0(struct sljit_compiler *compiler, int op); @@ -457,100 +504,136 @@ SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_op0(struct sljit_compiler *compiler, int SH = unsgined half (16 bit) */ /* Flags: - (never set any flags) */ -#define SLJIT_MOV 2 +#define SLJIT_MOV 6 /* Flags: - (never set any flags) */ -#define SLJIT_MOV_UB 3 +#define SLJIT_MOV_UB 7 /* Flags: - (never set any flags) */ -#define SLJIT_MOV_SB 4 +#define SLJIT_MOV_SB 8 /* Flags: - (never set any flags) */ -#define SLJIT_MOV_UH 5 +#define SLJIT_MOV_UH 9 /* Flags: - (never set any flags) */ -#define SLJIT_MOV_SH 6 +#define SLJIT_MOV_SH 10 /* Flags: - (never set any flags) */ -#define SLJIT_MOV_UI 7 +#define SLJIT_MOV_UI 11 /* Flags: - (never set any flags) */ -#define SLJIT_MOV_SI 8 +#define SLJIT_MOV_SI 12 /* Flags: - (never set any flags) */ -#define SLJIT_MOVU 9 +#define SLJIT_MOVU 13 /* Flags: - (never set any flags) */ -#define SLJIT_MOVU_UB 10 +#define SLJIT_MOVU_UB 14 /* Flags: - (never set any flags) */ -#define SLJIT_MOVU_SB 11 +#define SLJIT_MOVU_SB 15 /* Flags: - (never set any flags) */ -#define SLJIT_MOVU_UH 12 +#define SLJIT_MOVU_UH 16 /* Flags: - (never set any flags) */ -#define SLJIT_MOVU_SH 13 +#define SLJIT_MOVU_SH 17 /* Flags: - (never set any flags) */ -#define SLJIT_MOVU_UI 14 +#define SLJIT_MOVU_UI 18 /* Flags: - (never set any flags) */ -#define SLJIT_MOVU_SI 15 +#define SLJIT_MOVU_SI 19 /* Flags: I | E | K */ -#define SLJIT_NOT 16 +#define SLJIT_NOT 20 /* Flags: I | E | O | K */ -#define SLJIT_NEG 17 +#define SLJIT_NEG 21 /* Count leading zeroes Flags: I | E | K */ -#define SLJIT_CLZ 18 +#define SLJIT_CLZ 22 SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_op1(struct sljit_compiler *compiler, int op, int dst, sljit_w dstw, int src, sljit_w srcw); /* Flags: I | E | O | C | K */ -#define SLJIT_ADD 19 +#define SLJIT_ADD 23 /* Flags: I | C | K */ -#define SLJIT_ADDC 20 +#define SLJIT_ADDC 24 /* Flags: I | E | S | U | O | C | K */ -#define SLJIT_SUB 21 +#define SLJIT_SUB 25 /* Flags: I | C | K */ -#define SLJIT_SUBC 22 -/* Note: integer mul */ -/* Flags: I | O (see SLJIT_C_MUL_*) | K */ -#define SLJIT_MUL 23 +#define SLJIT_SUBC 26 +/* Note: integer mul + Flags: I | O (see SLJIT_C_MUL_*) | K */ +#define SLJIT_MUL 27 /* Flags: I | E | K */ -#define SLJIT_AND 24 +#define SLJIT_AND 28 /* Flags: I | E | K */ -#define SLJIT_OR 25 +#define SLJIT_OR 29 /* Flags: I | E | K */ -#define SLJIT_XOR 26 -/* Flags: I | E | K */ -#define SLJIT_SHL 27 -/* Flags: I | E | K */ -#define SLJIT_LSHR 28 -/* Flags: I | E | K */ -#define SLJIT_ASHR 29 +#define SLJIT_XOR 30 +/* Flags: I | E | K + Let bit_length be the length of the shift operation: 32 or 64. + If src2 is immediate, src2w is masked by (bit_length - 1). + Otherwise, if the content of src2 is outside the range from 0 + to bit_length - 1, the operation is undefined. */ +#define SLJIT_SHL 31 +/* Flags: I | E | K + Let bit_length be the length of the shift operation: 32 or 64. + If src2 is immediate, src2w is masked by (bit_length - 1). + Otherwise, if the content of src2 is outside the range from 0 + to bit_length - 1, the operation is undefined. */ +#define SLJIT_LSHR 32 +/* Flags: I | E | K + Let bit_length be the length of the shift operation: 32 or 64. + If src2 is immediate, src2w is masked by (bit_length - 1). + Otherwise, if the content of src2 is outside the range from 0 + to bit_length - 1, the operation is undefined. */ +#define SLJIT_ASHR 33 SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_op2(struct sljit_compiler *compiler, int op, int dst, sljit_w dstw, int src1, sljit_w src1w, int src2, sljit_w src2w); +/* The following function is a helper function for sljit_emit_op_custom. + It returns with the real machine register index of any SLJIT_TEMPORARY + SLJIT_SAVED or SLJIT_LOCALS register. + Note: it returns with -1 for virtual registers (all EREGs on x86-32). + Note: register returned by SLJIT_LOCALS_REG is not necessary the real + stack pointer register of the target architecture. */ + +SLJIT_API_FUNC_ATTRIBUTE int sljit_get_register_index(int reg); + +/* Any instruction can be inserted into the instruction stream by + sljit_emit_op_custom. It has a similar purpose as inline assembly. + The size parameter must match to the instruction size of the target + architecture: + + x86: 0 < size <= 15. The instruction argument can be byte aligned. + Thumb2: if size == 2, the instruction argument must be 2 byte aligned. + if size == 4, the instruction argument must be 4 byte aligned. + Otherwise: size must be 4 and instruction argument must be 4 byte aligned. */ + +SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_op_custom(struct sljit_compiler *compiler, + void *instruction, int size); + +/* Returns with non-zero if fpu is available. */ + SLJIT_API_FUNC_ATTRIBUTE int sljit_is_fpu_available(void); /* Note: dst is the left and src is the right operand for SLJIT_FCMP. Note: NaN check is always performed. If SLJIT_C_FLOAT_NAN is set, the comparison result is unpredictable. Flags: E | S (see SLJIT_C_FLOAT_*) */ -#define SLJIT_FCMP 30 +#define SLJIT_FCMP 34 /* Flags: - (never set any flags) */ -#define SLJIT_FMOV 31 +#define SLJIT_FMOV 35 /* Flags: - (never set any flags) */ -#define SLJIT_FNEG 32 +#define SLJIT_FNEG 36 /* Flags: - (never set any flags) */ -#define SLJIT_FABS 33 +#define SLJIT_FABS 37 SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_fop1(struct sljit_compiler *compiler, int op, int dst, sljit_w dstw, int src, sljit_w srcw); /* Flags: - (never set any flags) */ -#define SLJIT_FADD 34 +#define SLJIT_FADD 38 /* Flags: - (never set any flags) */ -#define SLJIT_FSUB 35 +#define SLJIT_FSUB 39 /* Flags: - (never set any flags) */ -#define SLJIT_FMUL 36 +#define SLJIT_FMUL 40 /* Flags: - (never set any flags) */ -#define SLJIT_FDIV 37 +#define SLJIT_FDIV 41 SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_fop2(struct sljit_compiler *compiler, int op, int dst, sljit_w dstw, @@ -610,11 +693,11 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compi Flags: destroy all flags for calls. */ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, int type); -/* Basic arithmetic comparison. In most architectures it is equal to - an SLJIT_SUB operation (with SLJIT_UNUSED destination) followed by a - sljit_emit_jump. However some architectures (i.e: MIPS) may employ - special optimizations here. It is suggested to use this comparison - form when flags are unimportant. +/* Basic arithmetic comparison. In most architectures it is implemented as + an SLJIT_SUB operation (with SLJIT_UNUSED destination and setting + appropriate flags) followed by a sljit_emit_jump. However some + architectures (i.e: MIPS) may employ special optimizations here. It is + suggested to use this comparison form when appropriate. type must be between SLJIT_C_EQUAL and SLJIT_C_SIG_LESS_EQUAL type can be combined (or'ed) with SLJIT_REWRITABLE_JUMP or SLJIT_INT_OP Flags: destroy flags. */ @@ -622,6 +705,20 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_cmp(struct sljit_compiler int src1, sljit_w src1w, int src2, sljit_w src2w); +/* Basic floating point comparison. In most architectures it is implemented as + an SLJIT_FCMP operation (setting appropriate flags) followed by a + sljit_emit_jump. However some architectures (i.e: MIPS) may employ + special optimizations here. It is suggested to use this comparison form + when appropriate. + type must be between SLJIT_C_FLOAT_EQUAL and SLJIT_C_FLOAT_NOT_NAN + type can be combined (or'ed) with SLJIT_REWRITABLE_JUMP + Flags: destroy flags. + Note: if either operand is NaN, the behaviour is undefined for + type <= SLJIT_C_FLOAT_LESS_EQUAL. */ +SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_fcmp(struct sljit_compiler *compiler, int type, + int src1, sljit_w src1w, + int src2, sljit_w src2w); + /* Set the destination of the jump to this label. */ SLJIT_API_FUNC_ATTRIBUTE void sljit_set_label(struct sljit_jump *jump, struct sljit_label* label); /* Only for jumps defined with SLJIT_REWRITABLE_JUMP flag. @@ -667,7 +764,7 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_w new_constan /* --------------------------------------------------------------------- */ #define SLJIT_MAJOR_VERSION 0 -#define SLJIT_MINOR_VERSION 82 +#define SLJIT_MINOR_VERSION 87 /* Get the human readable name of the platfrom. Can be useful for debugging on platforms like ARM, where ARM and @@ -675,7 +772,7 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_w new_constan SLJIT_API_FUNC_ATTRIBUTE SLJIT_CONST char* sljit_get_platform_name(void); /* Portble helper function to get an offset of a member. */ -#define SLJIT_OFFSETOF(base, member) ((sljit_w)(&((base*)0x10)->member) - 0x10) +#define SLJIT_OFFSETOF(base, member) ((sljit_w)(&((base*)0x10)->member) - 0x10) #if (defined SLJIT_UTIL_GLOBAL_LOCK && SLJIT_UTIL_GLOBAL_LOCK) /* This global lock is useful to compile common functions. */ diff --git a/harbour/src/3rd/pcre/sjmips32.c b/harbour/src/3rd/pcre/sjmips32.c index d8b7db4956..c0cc8b58bb 100644 --- a/harbour/src/3rd/pcre/sjmips32.c +++ b/harbour/src/3rd/pcre/sjmips32.c @@ -1,7 +1,7 @@ /* * Stack-less Just-In-Time compiler * - * Copyright 2009-2010 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. + * Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. * * Redistribution and use in source and binary forms, with or without modification, are * permitted provided that the following conditions are met: diff --git a/harbour/src/3rd/pcre/sjmipsc.c b/harbour/src/3rd/pcre/sjmipsc.c index 87c19c6678..74f04d2da6 100644 --- a/harbour/src/3rd/pcre/sjmipsc.c +++ b/harbour/src/3rd/pcre/sjmipsc.c @@ -1,7 +1,7 @@ /* * Stack-less Just-In-Time compiler * - * Copyright 2009-2010 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. + * Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. * * Redistribution and use in source and binary forms, with or without modification, are * permitted provided that the following conditions are met: @@ -26,11 +26,7 @@ SLJIT_API_FUNC_ATTRIBUTE SLJIT_CONST char* sljit_get_platform_name() { -#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) - return "mips-32"; -#else -#error "mips-64 is not yet supported" -#endif + return "MIPS" SLJIT_CPUINFO; } /* Latest MIPS architecture. */ @@ -109,7 +105,10 @@ typedef sljit_ui sljit_ins; #define BREAK (HI(0) | LO(13)) #define C_UN_D (HI(17) | FMT_D | LO(49)) #define C_UEQ_D (HI(17) | FMT_D | LO(51)) +#define C_ULE_D (HI(17) | FMT_D | LO(55)) #define C_ULT_D (HI(17) | FMT_D | LO(53)) +#define DIV (HI(0) | LO(26)) +#define DIVU (HI(0) | LO(27)) #define DIV_D (HI(17) | FMT_D | LO(3)) #define J (HI(2)) #define JAL (HI(3)) @@ -128,6 +127,7 @@ typedef sljit_ui sljit_ins; #define MOVZ (HI(0) | LO(10)) #define MUL_D (HI(17) | FMT_D | LO(2)) #define MULT (HI(0) | LO(24)) +#define MULTU (HI(0) | LO(25)) #define NOP (HI(0) | LO(0)) #define NOR (HI(0) | LO(39)) #define OR (HI(0) | LO(37)) @@ -455,18 +455,18 @@ static int emit_op(struct sljit_compiler *compiler, int op, int inp_flags, int src1, sljit_w src1w, int src2, sljit_w src2w); -SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_enter(struct sljit_compiler *compiler, int args, int temporaries, int generals, int local_size) +SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_enter(struct sljit_compiler *compiler, int args, int temporaries, int saveds, int local_size) { sljit_ins base; CHECK_ERROR(); - check_sljit_emit_enter(compiler, args, temporaries, generals, local_size); + check_sljit_emit_enter(compiler, args, temporaries, saveds, local_size); compiler->temporaries = temporaries; - compiler->generals = generals; + compiler->saveds = saveds; compiler->has_locals = local_size > 0; - local_size += (generals + 2 + 4) * sizeof(sljit_w); + local_size += (saveds + 2 + 4) * sizeof(sljit_w); local_size = (local_size + 15) & ~0xf; compiler->local_size = local_size; @@ -486,56 +486,54 @@ SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_enter(struct sljit_compiler *compiler, i FAIL_IF(push_inst(compiler, STACK_STORE | base | TA(RETURN_ADDR_REG) | IMM(local_size - 1 * (int)sizeof(sljit_w)), MOVABLE_INS)); if (compiler->has_locals) FAIL_IF(push_inst(compiler, STACK_STORE | base | T(SLJIT_LOCALS_REG) | IMM(local_size - 2 * (int)sizeof(sljit_w)), MOVABLE_INS)); - if (generals >= 1) - FAIL_IF(push_inst(compiler, STACK_STORE | base | T(SLJIT_GENERAL_REG1) | IMM(local_size - 3 * (int)sizeof(sljit_w)), MOVABLE_INS)); - if (generals >= 2) - FAIL_IF(push_inst(compiler, STACK_STORE | base | T(SLJIT_GENERAL_REG2) | IMM(local_size - 4 * (int)sizeof(sljit_w)), MOVABLE_INS)); - if (generals >= 3) - FAIL_IF(push_inst(compiler, STACK_STORE | base | T(SLJIT_GENERAL_REG3) | IMM(local_size - 5 * (int)sizeof(sljit_w)), MOVABLE_INS)); - if (generals >= 4) - FAIL_IF(push_inst(compiler, STACK_STORE | base | T(SLJIT_GENERAL_EREG1) | IMM(local_size - 6 * (int)sizeof(sljit_w)), MOVABLE_INS)); - if (generals >= 5) - FAIL_IF(push_inst(compiler, STACK_STORE | base | T(SLJIT_GENERAL_EREG2) | IMM(local_size - 7 * (int)sizeof(sljit_w)), MOVABLE_INS)); + if (saveds >= 1) + FAIL_IF(push_inst(compiler, STACK_STORE | base | T(SLJIT_SAVED_REG1) | IMM(local_size - 3 * (int)sizeof(sljit_w)), MOVABLE_INS)); + if (saveds >= 2) + FAIL_IF(push_inst(compiler, STACK_STORE | base | T(SLJIT_SAVED_REG2) | IMM(local_size - 4 * (int)sizeof(sljit_w)), MOVABLE_INS)); + if (saveds >= 3) + FAIL_IF(push_inst(compiler, STACK_STORE | base | T(SLJIT_SAVED_REG3) | IMM(local_size - 5 * (int)sizeof(sljit_w)), MOVABLE_INS)); + if (saveds >= 4) + FAIL_IF(push_inst(compiler, STACK_STORE | base | T(SLJIT_SAVED_EREG1) | IMM(local_size - 6 * (int)sizeof(sljit_w)), MOVABLE_INS)); + if (saveds >= 5) + FAIL_IF(push_inst(compiler, STACK_STORE | base | T(SLJIT_SAVED_EREG2) | IMM(local_size - 7 * (int)sizeof(sljit_w)), MOVABLE_INS)); if (compiler->has_locals) FAIL_IF(push_inst(compiler, ADDIU_W | S(REAL_STACK_PTR) | T(SLJIT_LOCALS_REG) | IMM(4 * sizeof(sljit_w)), DR(SLJIT_LOCALS_REG))); if (args >= 1) - FAIL_IF(push_inst(compiler, ADDU_W | SA(4) | TA(0) | D(SLJIT_GENERAL_REG1), DR(SLJIT_GENERAL_REG1))); + FAIL_IF(push_inst(compiler, ADDU_W | SA(4) | TA(0) | D(SLJIT_SAVED_REG1), DR(SLJIT_SAVED_REG1))); if (args >= 2) - FAIL_IF(push_inst(compiler, ADDU_W | SA(5) | TA(0) | D(SLJIT_GENERAL_REG2), DR(SLJIT_GENERAL_REG2))); + FAIL_IF(push_inst(compiler, ADDU_W | SA(5) | TA(0) | D(SLJIT_SAVED_REG2), DR(SLJIT_SAVED_REG2))); if (args >= 3) - FAIL_IF(push_inst(compiler, ADDU_W | SA(6) | TA(0) | D(SLJIT_GENERAL_REG3), DR(SLJIT_GENERAL_REG3))); + FAIL_IF(push_inst(compiler, ADDU_W | SA(6) | TA(0) | D(SLJIT_SAVED_REG3), DR(SLJIT_SAVED_REG3))); return SLJIT_SUCCESS; } -SLJIT_API_FUNC_ATTRIBUTE void sljit_fake_enter(struct sljit_compiler *compiler, int args, int temporaries, int generals, int local_size) +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_context(struct sljit_compiler *compiler, int args, int temporaries, int saveds, int local_size) { CHECK_ERROR_VOID(); - check_sljit_fake_enter(compiler, args, temporaries, generals, local_size); + check_sljit_set_context(compiler, args, temporaries, saveds, local_size); compiler->temporaries = temporaries; - compiler->generals = generals; + compiler->saveds = saveds; compiler->has_locals = local_size > 0; - local_size += (generals + 2 + 4) * sizeof(sljit_w); + local_size += (saveds + 2 + 4) * sizeof(sljit_w); compiler->local_size = (local_size + 15) & ~0xf; } -SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_return(struct sljit_compiler *compiler, int src, sljit_w srcw) +SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_return(struct sljit_compiler *compiler, int op, int src, sljit_w srcw) { int local_size; sljit_ins base; CHECK_ERROR(); - check_sljit_emit_return(compiler, src, srcw); + check_sljit_emit_return(compiler, op, src, srcw); + + FAIL_IF(emit_mov_before_return(compiler, op, src, srcw)); local_size = compiler->local_size; - - if (src != SLJIT_UNUSED && src != SLJIT_RETURN_REG) - FAIL_IF(emit_op(compiler, SLJIT_MOV, WORD_DATA, SLJIT_RETURN_REG, 0, TMP_REG1, 0, src, srcw)); - if (local_size <= SIMM_MAX) base = S(REAL_STACK_PTR); else { @@ -546,16 +544,16 @@ SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_return(struct sljit_compiler *compiler, } FAIL_IF(push_inst(compiler, STACK_LOAD | base | TA(RETURN_ADDR_REG) | IMM(local_size - 1 * (int)sizeof(sljit_w)), RETURN_ADDR_REG)); - if (compiler->generals >= 5) - FAIL_IF(push_inst(compiler, STACK_LOAD | base | T(SLJIT_GENERAL_EREG2) | IMM(local_size - 7 * (int)sizeof(sljit_w)), DR(SLJIT_GENERAL_EREG2))); - if (compiler->generals >= 4) - FAIL_IF(push_inst(compiler, STACK_LOAD | base | T(SLJIT_GENERAL_EREG1) | IMM(local_size - 6 * (int)sizeof(sljit_w)), DR(SLJIT_GENERAL_EREG1))); - if (compiler->generals >= 3) - FAIL_IF(push_inst(compiler, STACK_LOAD | base | T(SLJIT_GENERAL_REG3) | IMM(local_size - 5 * (int)sizeof(sljit_w)), DR(SLJIT_GENERAL_REG3))); - if (compiler->generals >= 2) - FAIL_IF(push_inst(compiler, STACK_LOAD | base | T(SLJIT_GENERAL_REG2) | IMM(local_size - 4 * (int)sizeof(sljit_w)), DR(SLJIT_GENERAL_REG2))); - if (compiler->generals >= 1) - FAIL_IF(push_inst(compiler, STACK_LOAD | base | T(SLJIT_GENERAL_REG1) | IMM(local_size - 3 * (int)sizeof(sljit_w)), DR(SLJIT_GENERAL_REG1))); + if (compiler->saveds >= 5) + FAIL_IF(push_inst(compiler, STACK_LOAD | base | T(SLJIT_SAVED_EREG2) | IMM(local_size - 7 * (int)sizeof(sljit_w)), DR(SLJIT_SAVED_EREG2))); + if (compiler->saveds >= 4) + FAIL_IF(push_inst(compiler, STACK_LOAD | base | T(SLJIT_SAVED_EREG1) | IMM(local_size - 6 * (int)sizeof(sljit_w)), DR(SLJIT_SAVED_EREG1))); + if (compiler->saveds >= 3) + FAIL_IF(push_inst(compiler, STACK_LOAD | base | T(SLJIT_SAVED_REG3) | IMM(local_size - 5 * (int)sizeof(sljit_w)), DR(SLJIT_SAVED_REG3))); + if (compiler->saveds >= 2) + FAIL_IF(push_inst(compiler, STACK_LOAD | base | T(SLJIT_SAVED_REG2) | IMM(local_size - 4 * (int)sizeof(sljit_w)), DR(SLJIT_SAVED_REG2))); + if (compiler->saveds >= 1) + FAIL_IF(push_inst(compiler, STACK_LOAD | base | T(SLJIT_SAVED_REG1) | IMM(local_size - 3 * (int)sizeof(sljit_w)), DR(SLJIT_SAVED_REG1))); if (compiler->has_locals) FAIL_IF(push_inst(compiler, STACK_LOAD | base | T(SLJIT_LOCALS_REG) | IMM(local_size - 2 * (int)sizeof(sljit_w)), DR(SLJIT_LOCALS_REG))); @@ -929,6 +927,20 @@ SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_op0(struct sljit_compiler *compiler, int return push_inst(compiler, BREAK, UNMOVABLE_INS); case SLJIT_NOP: return push_inst(compiler, NOP, UNMOVABLE_INS); + case SLJIT_UMUL: + case SLJIT_SMUL: + FAIL_IF(push_inst(compiler, (op == SLJIT_UMUL ? MULTU : MULT) | S(SLJIT_TEMPORARY_REG1) | T(SLJIT_TEMPORARY_REG2), MOVABLE_INS)); + FAIL_IF(push_inst(compiler, MFLO | D(SLJIT_TEMPORARY_REG1), DR(SLJIT_TEMPORARY_REG1))); + return push_inst(compiler, MFHI | D(SLJIT_TEMPORARY_REG2), DR(SLJIT_TEMPORARY_REG2)); + case SLJIT_UDIV: + case SLJIT_SDIV: +#if !(defined SLJIT_MIPS_32_64 && SLJIT_MIPS_32_64) + FAIL_IF(push_inst(compiler, NOP, UNMOVABLE_INS)); + FAIL_IF(push_inst(compiler, NOP, UNMOVABLE_INS)); +#endif + FAIL_IF(push_inst(compiler, (op == SLJIT_UDIV ? DIVU : DIV) | S(SLJIT_TEMPORARY_REG1) | T(SLJIT_TEMPORARY_REG2), MOVABLE_INS)); + FAIL_IF(push_inst(compiler, MFLO | D(SLJIT_TEMPORARY_REG1), DR(SLJIT_TEMPORARY_REG1))); + return push_inst(compiler, MFHI | D(SLJIT_TEMPORARY_REG2), DR(SLJIT_TEMPORARY_REG2)); } return SLJIT_SUCCESS; @@ -1054,6 +1066,22 @@ SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_op2(struct sljit_compiler *compiler, int #endif } +SLJIT_API_FUNC_ATTRIBUTE int sljit_get_register_index(int reg) +{ + check_sljit_get_register_index(reg); + return reg_map[reg]; +} + +SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_op_custom(struct sljit_compiler *compiler, + void *instruction, int size) +{ + CHECK_ERROR(); + check_sljit_emit_op_custom(compiler, instruction, size); + SLJIT_ASSERT(size == 4); + + return push_inst(compiler, *(sljit_ins*)instruction, UNMOVABLE_INS); +} + /* --------------------------------------------------------------------- */ /* Floating point operators */ /* --------------------------------------------------------------------- */ @@ -1232,16 +1260,16 @@ SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_fop2(struct sljit_compiler *compiler, in /* Other instructions */ /* --------------------------------------------------------------------- */ -SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_fast_enter(struct sljit_compiler *compiler, int dst, sljit_w dstw, int args, int temporaries, int generals, int local_size) +SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_fast_enter(struct sljit_compiler *compiler, int dst, sljit_w dstw, int args, int temporaries, int saveds, int local_size) { CHECK_ERROR(); - check_sljit_emit_fast_enter(compiler, dst, dstw, args, temporaries, generals, local_size); + check_sljit_emit_fast_enter(compiler, dst, dstw, args, temporaries, saveds, local_size); compiler->temporaries = temporaries; - compiler->generals = generals; + compiler->saveds = saveds; compiler->has_locals = local_size > 0; - local_size += (generals + 2 + 4) * sizeof(sljit_w); + local_size += (saveds + 2 + 4) * sizeof(sljit_w); compiler->local_size = (local_size + 15) & ~0xf; if (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) @@ -1555,6 +1583,81 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_cmp(struct sljit_compiler #undef RESOLVE_IMM1 #undef RESOLVE_IMM2 +SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_fcmp(struct sljit_compiler *compiler, int type, + int src1, sljit_w src1w, + int src2, sljit_w src2w) +{ + struct sljit_jump *jump; + sljit_ins inst; + int if_true; + + CHECK_ERROR_PTR(); + check_sljit_emit_fcmp(compiler, type, src1, src1w, src2, src2w); + + compiler->cache_arg = 0; + compiler->cache_argw = 0; + + if (src1 > SLJIT_FLOAT_REG4) { + PTR_FAIL_IF(emit_fpu_data_transfer(compiler, TMP_FREG1, 1, src1, src1w)); + src1 = TMP_FREG1; + } + if (src2 > SLJIT_FLOAT_REG4) { + PTR_FAIL_IF(emit_fpu_data_transfer(compiler, TMP_FREG2, 1, src2, src2w)); + src2 = TMP_FREG2; + } + + jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); + PTR_FAIL_IF(!jump); + set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP); + jump->flags |= IS_BIT16_COND; + type &= 0xff; + + switch (type) { + case SLJIT_C_FLOAT_EQUAL: + inst = C_UEQ_D; + if_true = 1; + break; + case SLJIT_C_FLOAT_NOT_EQUAL: + inst = C_UEQ_D; + if_true = 0; + break; + case SLJIT_C_FLOAT_LESS: + inst = C_ULT_D; + if_true = 1; + break; + case SLJIT_C_FLOAT_GREATER_EQUAL: + inst = C_ULT_D; + if_true = 0; + break; + case SLJIT_C_FLOAT_GREATER: + inst = C_ULE_D; + if_true = 0; + break; + case SLJIT_C_FLOAT_LESS_EQUAL: + inst = C_ULE_D; + if_true = 1; + break; + case SLJIT_C_FLOAT_NAN: + inst = C_UN_D; + if_true = 1; + break; + case SLJIT_C_FLOAT_NOT_NAN: + default: /* Make compilers happy. */ + inst = C_UN_D; + if_true = 0; + break; + } + + PTR_FAIL_IF(push_inst(compiler, inst | FT(src2) | FS(src1), UNMOVABLE_INS)); + /* Intentionally the other opcode. */ + PTR_FAIL_IF(push_inst(compiler, (if_true ? BC1F : BC1T) | JUMP_LENGTH, UNMOVABLE_INS)); + PTR_FAIL_IF(emit_const(compiler, TMP_REG2, 0)); + PTR_FAIL_IF(push_inst(compiler, JR | S(TMP_REG2), UNMOVABLE_INS)); + jump->addr = compiler->size; + PTR_FAIL_IF(push_inst(compiler, NOP, UNMOVABLE_INS)); + return jump; +} + #undef JUMP_LENGTH #undef BR_Z #undef BR_NZ diff --git a/harbour/src/3rd/pcre/sjppc32.c b/harbour/src/3rd/pcre/sjppc32.c index 4b01a769ae..82d0508ac1 100644 --- a/harbour/src/3rd/pcre/sjppc32.c +++ b/harbour/src/3rd/pcre/sjppc32.c @@ -1,7 +1,7 @@ /* * Stack-less Just-In-Time compiler * - * Copyright 2009-2010 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. + * Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. * * Redistribution and use in source and binary forms, with or without modification, are * permitted provided that the following conditions are met: @@ -31,6 +31,9 @@ static int load_immediate(struct sljit_compiler *compiler, int reg, sljit_w imm) if (imm <= SIMM_MAX && imm >= SIMM_MIN) return push_inst(compiler, ADDI | D(reg) | A(0) | IMM(imm)); + if (!(imm & ~0xffff)) + return push_inst(compiler, ORI | S(ZERO_REG) | A(reg) | IMM(imm)); + FAIL_IF(push_inst(compiler, ADDIS | D(reg) | A(0) | IMM(imm >> 16))); return (imm & 0xffff) ? push_inst(compiler, ORI | S(reg) | A(reg) | IMM(imm)) : SLJIT_SUCCESS; } @@ -44,10 +47,12 @@ static SLJIT_INLINE int emit_single_op(struct sljit_compiler *compiler, int op, switch (op) { case SLJIT_ADD: if (flags & ALT_FORM1) { + /* Flags does not set: BIN_IMM_EXTS unnecessary. */ SLJIT_ASSERT(src2 == TMP_REG2); return push_inst(compiler, ADDI | D(dst) | A(src1) | compiler->imm); } if (flags & ALT_FORM2) { + /* Flags does not set: BIN_IMM_EXTS unnecessary. */ SLJIT_ASSERT(src2 == TMP_REG2); return push_inst(compiler, ADDIS | D(dst) | A(src1) | compiler->imm); } @@ -55,6 +60,11 @@ static SLJIT_INLINE int emit_single_op(struct sljit_compiler *compiler, int op, SLJIT_ASSERT(src2 == TMP_REG2); return push_inst(compiler, ADDIC | D(dst) | A(src1) | compiler->imm); } + if (flags & ALT_FORM4) { + /* Flags does not set: BIN_IMM_EXTS unnecessary. */ + FAIL_IF(push_inst(compiler, ADDI | D(dst) | A(src1) | (compiler->imm & 0xffff))); + return push_inst(compiler, ADDIS | D(dst) | A(dst) | (((compiler->imm >> 16) & 0xffff) + ((compiler->imm >> 15) & 0x1))); + } if (!(flags & ALT_SET_FLAGS)) return push_inst(compiler, ADD | D(dst) | A(src1) | B(src2)); return push_inst(compiler, ADDC | OERC(ALT_SET_FLAGS) | D(dst) | A(src1) | B(src2)); @@ -69,22 +79,28 @@ static SLJIT_INLINE int emit_single_op(struct sljit_compiler *compiler, int op, case SLJIT_SUB: if (flags & ALT_FORM1) { + /* Flags does not set: BIN_IMM_EXTS unnecessary. */ SLJIT_ASSERT(src2 == TMP_REG2); return push_inst(compiler, SUBFIC | D(dst) | A(src1) | compiler->imm); } - if (flags & ALT_FORM2) { + if (flags & (ALT_FORM2 | ALT_FORM3)) { SLJIT_ASSERT(src2 == TMP_REG2); - return push_inst(compiler, CMPI | CRD(0) | A(src1) | compiler->imm); + if (flags & ALT_FORM2) + FAIL_IF(push_inst(compiler, CMPI | CRD(0) | A(src1) | compiler->imm)); + if (flags & ALT_FORM3) + return push_inst(compiler, CMPLI | CRD(4) | A(src1) | compiler->imm); + return SLJIT_SUCCESS; } - if (flags & ALT_FORM3) { - SLJIT_ASSERT(src2 == TMP_REG2); - return push_inst(compiler, CMPLI | CRD(4) | A(src1) | compiler->imm); + if (flags & (ALT_FORM4 | ALT_FORM5)) { + if (flags & ALT_FORM4) + FAIL_IF(push_inst(compiler, CMPL | CRD(4) | A(src1) | B(src2))); + if (flags & ALT_FORM5) + FAIL_IF(push_inst(compiler, CMP | CRD(0) | A(src1) | B(src2))); + return SLJIT_SUCCESS; } - if (flags & ALT_FORM4) - return push_inst(compiler, CMPL | CRD(4) | A(src1) | B(src2)); if (!(flags & ALT_SET_FLAGS)) return push_inst(compiler, SUBF | D(dst) | A(src2) | B(src1)); - if (flags & ALT_FORM5) + if (flags & ALT_FORM6) FAIL_IF(push_inst(compiler, CMPL | CRD(4) | A(src1) | B(src2))); return push_inst(compiler, SUBFC | OERC(ALT_SET_FLAGS) | D(dst) | A(src2) | B(src1)); diff --git a/harbour/src/3rd/pcre/sjppc64.c b/harbour/src/3rd/pcre/sjppc64.c index e3da625adf..cc2ae37eb9 100644 --- a/harbour/src/3rd/pcre/sjppc64.c +++ b/harbour/src/3rd/pcre/sjppc64.c @@ -1,7 +1,7 @@ /* * Stack-less Just-In-Time compiler * - * Copyright 2009-2010 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. + * Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. * * Redistribution and use in source and binary forms, with or without modification, are * permitted provided that the following conditions are met: @@ -49,6 +49,9 @@ static int load_immediate(struct sljit_compiler *compiler, int reg, sljit_w imm) if (imm <= SIMM_MAX && imm >= SIMM_MIN) return push_inst(compiler, ADDI | D(reg) | A(0) | IMM(imm)); + if (!(imm & ~0xffff)) + return push_inst(compiler, ORI | S(ZERO_REG) | A(reg) | IMM(imm)); + if (imm <= SLJIT_W(0x7fffffff) && imm >= SLJIT_W(-0x80000000)) { FAIL_IF(push_inst(compiler, ADDIS | D(reg) | A(0) | IMM(imm >> 16))); return (imm & 0xffff) ? push_inst(compiler, ORI | S(reg) | A(reg) | IMM(imm)) : SLJIT_SUCCESS; @@ -146,12 +149,12 @@ static SLJIT_INLINE int emit_single_op(struct sljit_compiler *compiler, int op, switch (op) { case SLJIT_ADD: if (flags & ALT_FORM1) { - /* Flags not set: BIN_IMM_EXTS unnecessary. */ + /* Flags does not set: BIN_IMM_EXTS unnecessary. */ SLJIT_ASSERT(src2 == TMP_REG2); return push_inst(compiler, ADDI | D(dst) | A(src1) | compiler->imm); } if (flags & ALT_FORM2) { - /* Flags not set: BIN_IMM_EXTS unnecessary. */ + /* Flags does not set: BIN_IMM_EXTS unnecessary. */ SLJIT_ASSERT(src2 == TMP_REG2); return push_inst(compiler, ADDIS | D(dst) | A(src1) | compiler->imm); } @@ -160,6 +163,11 @@ static SLJIT_INLINE int emit_single_op(struct sljit_compiler *compiler, int op, BIN_IMM_EXTS(); return push_inst(compiler, ADDIC | D(dst) | A(src1) | compiler->imm); } + if (flags & ALT_FORM4) { + /* Flags does not set: BIN_IMM_EXTS unnecessary. */ + FAIL_IF(push_inst(compiler, ADDI | D(dst) | A(src1) | (compiler->imm & 0xffff))); + return push_inst(compiler, ADDIS | D(dst) | A(dst) | (((compiler->imm >> 16) & 0xffff) + ((compiler->imm >> 15) & 0x1))); + } if (!(flags & ALT_SET_FLAGS)) return push_inst(compiler, ADD | D(dst) | A(src1) | B(src2)); BIN_EXTS(); @@ -176,24 +184,29 @@ static SLJIT_INLINE int emit_single_op(struct sljit_compiler *compiler, int op, case SLJIT_SUB: if (flags & ALT_FORM1) { - /* Flags not set: BIN_IMM_EXTS unnecessary. */ + /* Flags does not set: BIN_IMM_EXTS unnecessary. */ SLJIT_ASSERT(src2 == TMP_REG2); return push_inst(compiler, SUBFIC | D(dst) | A(src1) | compiler->imm); } - if (flags & ALT_FORM2) { + if (flags & (ALT_FORM2 | ALT_FORM3)) { SLJIT_ASSERT(src2 == TMP_REG2); - return push_inst(compiler, CMPI | CRD(0 | ((flags & ALT_SIGN_EXT) ? 0 : 1)) | A(src1) | compiler->imm); + if (flags & ALT_FORM2) + FAIL_IF(push_inst(compiler, CMPI | CRD(0 | ((flags & ALT_SIGN_EXT) ? 0 : 1)) | A(src1) | compiler->imm)); + if (flags & ALT_FORM3) + return push_inst(compiler, CMPLI | CRD(4 | ((flags & ALT_SIGN_EXT) ? 0 : 1)) | A(src1) | compiler->imm); + return SLJIT_SUCCESS; } - if (flags & ALT_FORM3) { - SLJIT_ASSERT(src2 == TMP_REG2); - return push_inst(compiler, CMPLI | CRD(4 | ((flags & ALT_SIGN_EXT) ? 0 : 1)) | A(src1) | compiler->imm); + if (flags & (ALT_FORM4 | ALT_FORM5)) { + if (flags & ALT_FORM4) + FAIL_IF(push_inst(compiler, CMPL | CRD(4 | ((flags & ALT_SIGN_EXT) ? 0 : 1)) | A(src1) | B(src2))); + if (flags & ALT_FORM5) + return push_inst(compiler, CMP | CRD(0 | ((flags & ALT_SIGN_EXT) ? 0 : 1)) | A(src1) | B(src2)); + return SLJIT_SUCCESS; } - if (flags & ALT_FORM4) - return push_inst(compiler, CMPL | CRD(4 | ((flags & ALT_SIGN_EXT) ? 0 : 1)) | A(src1) | B(src2)); if (!(flags & ALT_SET_FLAGS)) return push_inst(compiler, SUBF | D(dst) | A(src2) | B(src1)); BIN_EXTS(); - if (flags & ALT_FORM5) + if (flags & ALT_FORM6) FAIL_IF(push_inst(compiler, CMPL | CRD(4 | ((flags & ALT_SIGN_EXT) ? 0 : 1)) | A(src1) | B(src2))); return push_inst(compiler, SUBFC | OERC(ALT_SET_FLAGS) | D(dst) | A(src2) | B(src1)); diff --git a/harbour/src/3rd/pcre/sjppcc.c b/harbour/src/3rd/pcre/sjppcc.c index 9ecaf10795..1b948947c9 100644 --- a/harbour/src/3rd/pcre/sjppcc.c +++ b/harbour/src/3rd/pcre/sjppcc.c @@ -1,7 +1,7 @@ /* * Stack-less Just-In-Time compiler * - * Copyright 2009-2010 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. + * Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. * * Redistribution and use in source and binary forms, with or without modification, are * permitted provided that the following conditions are met: @@ -26,11 +26,7 @@ SLJIT_API_FUNC_ATTRIBUTE SLJIT_CONST char* sljit_get_platform_name() { -#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) - return "ppc-32"; -#else - return "ppc-64"; -#endif + return "PowerPC" SLJIT_CPUINFO; } /* Length of an instruction word. @@ -75,9 +71,9 @@ static void ppc_cache_flush(sljit_ins *from, sljit_ins *to) /* Instruction bit sections. OE and Rc flag (see ALT_SET_FLAGS). */ -#define OERC(flags) (((flags & ALT_SET_FLAGS) >> 15) | ((flags & ALT_SET_FLAGS) >> 5)) +#define OERC(flags) (((flags & ALT_SET_FLAGS) >> 10) | (flags & ALT_SET_FLAGS)) /* Rc flag (see ALT_SET_FLAGS). */ -#define RC(flags) ((flags & ALT_SET_FLAGS) >> 15) +#define RC(flags) ((flags & ALT_SET_FLAGS) >> 10) #define HI(opcode) ((opcode) << 26) #define LO(opcode) ((opcode) << 1) @@ -97,10 +93,15 @@ static void ppc_cache_flush(sljit_ins *from, sljit_ins *to) #define BLR (HI(19) | LO(16) | (0x14 << 21)) #define CNTLZD (HI(31) | LO(58)) #define CNTLZW (HI(31) | LO(26)) +#define CMP (HI(31) | LO(0)) #define CMPI (HI(11)) #define CMPL (HI(31) | LO(32)) #define CMPLI (HI(10)) #define CROR (HI(19) | LO(449)) +#define DIVD (HI(31) | LO(489)) +#define DIVDU (HI(31) | LO(457)) +#define DIVW (HI(31) | LO(491)) +#define DIVWU (HI(31) | LO(459)) #define EXTSB (HI(31) | LO(954)) #define EXTSH (HI(31) | LO(922)) #define EXTSW (HI(31) | LO(986)) @@ -123,6 +124,10 @@ static void ppc_cache_flush(sljit_ins *from, sljit_ins *to) #define MTCTR (HI(31) | LO(467) | 0x90000) #define MTLR (HI(31) | LO(467) | 0x80000) #define MTXER (HI(31) | LO(467) | 0x10000) +#define MULHD (HI(31) | LO(73)) +#define MULHDU (HI(31) | LO(9)) +#define MULHW (HI(31) | LO(75)) +#define MULHWU (HI(31) | LO(11)) #define MULLD (HI(31) | LO(233)) #define MULLI (HI(7)) #define MULLW (HI(31) | LO(235)) @@ -382,32 +387,32 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil /* Other inp_flags. */ -#define ARG_TEST 0x0100 -#define ALT_FORM1 0x0200 -#define ALT_FORM2 0x0400 -#define ALT_FORM3 0x0800 -#define ALT_FORM4 0x1000 -#define ALT_FORM5 0x2000 +#define ARG_TEST 0x000100 /* Integer opertion and set flags -> requires exts on 64 bit systems. */ -#define ALT_SIGN_EXT 0x4000 +#define ALT_SIGN_EXT 0x000200 /* This flag affects the RC() and OERC() macros. */ -#define ALT_SET_FLAGS 0x8000 +#define ALT_SET_FLAGS 0x000400 +#define ALT_FORM1 0x010000 +#define ALT_FORM2 0x020000 +#define ALT_FORM3 0x040000 +#define ALT_FORM4 0x080000 +#define ALT_FORM5 0x100000 +#define ALT_FORM6 0x200000 - /* Source and destination is register. */ -#define REG_DEST 0x0001 -#define REG1_SOURCE 0x0002 -#define REG2_SOURCE 0x0004 - /* getput_arg_fast returned true. */ -#define FAST_DEST 0x0008 - /* Multiple instructions are required. */ -#define SLOW_DEST 0x0010 -/* ALT_FORM1 0x0200 - ALT_FORM2 0x0400 - ALT_FORM3 0x0800 - ALT_FORM4 0x1000 - ALT_FORM5 0x2000 - ALT_SIGN_EXT 0x4000 - ALT_SET_FLAGS 0x8000 */ +/* Source and destination is register. */ +#define REG_DEST 0x000001 +#define REG1_SOURCE 0x000002 +#define REG2_SOURCE 0x000004 +/* getput_arg_fast returned true. */ +#define FAST_DEST 0x000008 +/* Multiple instructions are required. */ +#define SLOW_DEST 0x000010 +/* +ALT_SIGN_EXT 0x000200 +ALT_SET_FLAGS 0x000400 +ALT_FORM1 0x010000 +... +ALT_FORM6 0x200000 */ #if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) #include "sjppc32.c" @@ -428,43 +433,43 @@ static int emit_op(struct sljit_compiler *compiler, int op, int inp_flags, int src1, sljit_w src1w, int src2, sljit_w src2w); -SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_enter(struct sljit_compiler *compiler, int args, int temporaries, int generals, int local_size) +SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_enter(struct sljit_compiler *compiler, int args, int temporaries, int saveds, int local_size) { CHECK_ERROR(); - check_sljit_emit_enter(compiler, args, temporaries, generals, local_size); + check_sljit_emit_enter(compiler, args, temporaries, saveds, local_size); compiler->temporaries = temporaries; - compiler->generals = generals; + compiler->saveds = saveds; compiler->has_locals = local_size > 0; FAIL_IF(push_inst(compiler, MFLR | D(0))); if (compiler->has_locals) FAIL_IF(push_inst(compiler, STACK_STORE | S(SLJIT_LOCALS_REG) | A(REAL_STACK_PTR) | IMM(-(int)(sizeof(sljit_w))) )); FAIL_IF(push_inst(compiler, STACK_STORE | S(ZERO_REG) | A(REAL_STACK_PTR) | IMM(-2 * (int)(sizeof(sljit_w))) )); - if (generals >= 1) - FAIL_IF(push_inst(compiler, STACK_STORE | S(SLJIT_GENERAL_REG1) | A(REAL_STACK_PTR) | IMM(-3 * (int)(sizeof(sljit_w))) )); - if (generals >= 2) - FAIL_IF(push_inst(compiler, STACK_STORE | S(SLJIT_GENERAL_REG2) | A(REAL_STACK_PTR) | IMM(-4 * (int)(sizeof(sljit_w))) )); - if (generals >= 3) - FAIL_IF(push_inst(compiler, STACK_STORE | S(SLJIT_GENERAL_REG3) | A(REAL_STACK_PTR) | IMM(-5 * (int)(sizeof(sljit_w))) )); - if (generals >= 4) - FAIL_IF(push_inst(compiler, STACK_STORE | S(SLJIT_GENERAL_EREG1) | A(REAL_STACK_PTR) | IMM(-6 * (int)(sizeof(sljit_w))) )); - if (generals >= 5) - FAIL_IF(push_inst(compiler, STACK_STORE | S(SLJIT_GENERAL_EREG2) | A(REAL_STACK_PTR) | IMM(-7 * (int)(sizeof(sljit_w))) )); + if (saveds >= 1) + FAIL_IF(push_inst(compiler, STACK_STORE | S(SLJIT_SAVED_REG1) | A(REAL_STACK_PTR) | IMM(-3 * (int)(sizeof(sljit_w))) )); + if (saveds >= 2) + FAIL_IF(push_inst(compiler, STACK_STORE | S(SLJIT_SAVED_REG2) | A(REAL_STACK_PTR) | IMM(-4 * (int)(sizeof(sljit_w))) )); + if (saveds >= 3) + FAIL_IF(push_inst(compiler, STACK_STORE | S(SLJIT_SAVED_REG3) | A(REAL_STACK_PTR) | IMM(-5 * (int)(sizeof(sljit_w))) )); + if (saveds >= 4) + FAIL_IF(push_inst(compiler, STACK_STORE | S(SLJIT_SAVED_EREG1) | A(REAL_STACK_PTR) | IMM(-6 * (int)(sizeof(sljit_w))) )); + if (saveds >= 5) + FAIL_IF(push_inst(compiler, STACK_STORE | S(SLJIT_SAVED_EREG2) | A(REAL_STACK_PTR) | IMM(-7 * (int)(sizeof(sljit_w))) )); FAIL_IF(push_inst(compiler, STACK_STORE | S(0) | A(REAL_STACK_PTR) | IMM(sizeof(sljit_w)) )); FAIL_IF(push_inst(compiler, ADDI | D(ZERO_REG) | A(0) | 0)); if (args >= 1) - FAIL_IF(push_inst(compiler, OR | S(SLJIT_TEMPORARY_REG1) | A(SLJIT_GENERAL_REG1) | B(SLJIT_TEMPORARY_REG1))); + FAIL_IF(push_inst(compiler, OR | S(SLJIT_TEMPORARY_REG1) | A(SLJIT_SAVED_REG1) | B(SLJIT_TEMPORARY_REG1))); if (args >= 2) - FAIL_IF(push_inst(compiler, OR | S(SLJIT_TEMPORARY_REG2) | A(SLJIT_GENERAL_REG2) | B(SLJIT_TEMPORARY_REG2))); + FAIL_IF(push_inst(compiler, OR | S(SLJIT_TEMPORARY_REG2) | A(SLJIT_SAVED_REG2) | B(SLJIT_TEMPORARY_REG2))); if (args >= 3) - FAIL_IF(push_inst(compiler, OR | S(SLJIT_TEMPORARY_REG3) | A(SLJIT_GENERAL_REG3) | B(SLJIT_TEMPORARY_REG3))); + FAIL_IF(push_inst(compiler, OR | S(SLJIT_TEMPORARY_REG3) | A(SLJIT_SAVED_REG3) | B(SLJIT_TEMPORARY_REG3))); #if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) - compiler->local_size = (2 + generals + 2) * sizeof(sljit_w) + local_size; + compiler->local_size = (2 + saveds + 2) * sizeof(sljit_w) + local_size; #else - compiler->local_size = (2 + generals + 7 + 8) * sizeof(sljit_w) + local_size; + compiler->local_size = (2 + saveds + 7 + 8) * sizeof(sljit_w) + local_size; #endif compiler->local_size = (compiler->local_size + 15) & ~0xf; @@ -491,30 +496,29 @@ SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_enter(struct sljit_compiler *compiler, i return SLJIT_SUCCESS; } -SLJIT_API_FUNC_ATTRIBUTE void sljit_fake_enter(struct sljit_compiler *compiler, int args, int temporaries, int generals, int local_size) +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_context(struct sljit_compiler *compiler, int args, int temporaries, int saveds, int local_size) { CHECK_ERROR_VOID(); - check_sljit_fake_enter(compiler, args, temporaries, generals, local_size); + check_sljit_set_context(compiler, args, temporaries, saveds, local_size); compiler->temporaries = temporaries; - compiler->generals = generals; + compiler->saveds = saveds; compiler->has_locals = local_size > 0; #if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) - compiler->local_size = (2 + generals + 2) * sizeof(sljit_w) + local_size; + compiler->local_size = (2 + saveds + 2) * sizeof(sljit_w) + local_size; #else - compiler->local_size = (2 + generals + 7 + 8) * sizeof(sljit_w) + local_size; + compiler->local_size = (2 + saveds + 7 + 8) * sizeof(sljit_w) + local_size; #endif compiler->local_size = (compiler->local_size + 15) & ~0xf; } -SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_return(struct sljit_compiler *compiler, int src, sljit_w srcw) +SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_return(struct sljit_compiler *compiler, int op, int src, sljit_w srcw) { CHECK_ERROR(); - check_sljit_emit_return(compiler, src, srcw); + check_sljit_emit_return(compiler, op, src, srcw); - if (src != SLJIT_UNUSED && src != SLJIT_RETURN_REG) - FAIL_IF(emit_op(compiler, SLJIT_MOV, WORD_DATA, SLJIT_RETURN_REG, 0, TMP_REG1, 0, src, srcw)); + FAIL_IF(emit_mov_before_return(compiler, op, src, srcw)); if (compiler->local_size <= SIMM_MAX) FAIL_IF(push_inst(compiler, ADDI | D(REAL_STACK_PTR) | A(REAL_STACK_PTR) | IMM(compiler->local_size))); @@ -524,16 +528,16 @@ SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_return(struct sljit_compiler *compiler, } FAIL_IF(push_inst(compiler, STACK_LOAD | D(0) | A(REAL_STACK_PTR) | IMM(sizeof(sljit_w)))); - if (compiler->generals >= 5) - FAIL_IF(push_inst(compiler, STACK_LOAD | D(SLJIT_GENERAL_EREG2) | A(REAL_STACK_PTR) | IMM(-7 * (int)(sizeof(sljit_w))) )); - if (compiler->generals >= 4) - FAIL_IF(push_inst(compiler, STACK_LOAD | D(SLJIT_GENERAL_EREG1) | A(REAL_STACK_PTR) | IMM(-6 * (int)(sizeof(sljit_w))) )); - if (compiler->generals >= 3) - FAIL_IF(push_inst(compiler, STACK_LOAD | D(SLJIT_GENERAL_REG3) | A(REAL_STACK_PTR) | IMM(-5 * (int)(sizeof(sljit_w))) )); - if (compiler->generals >= 2) - FAIL_IF(push_inst(compiler, STACK_LOAD | D(SLJIT_GENERAL_REG2) | A(REAL_STACK_PTR) | IMM(-4 * (int)(sizeof(sljit_w))) )); - if (compiler->generals >= 1) - FAIL_IF(push_inst(compiler, STACK_LOAD | D(SLJIT_GENERAL_REG1) | A(REAL_STACK_PTR) | IMM(-3 * (int)(sizeof(sljit_w))) )); + if (compiler->saveds >= 5) + FAIL_IF(push_inst(compiler, STACK_LOAD | D(SLJIT_SAVED_EREG2) | A(REAL_STACK_PTR) | IMM(-7 * (int)(sizeof(sljit_w))) )); + if (compiler->saveds >= 4) + FAIL_IF(push_inst(compiler, STACK_LOAD | D(SLJIT_SAVED_EREG1) | A(REAL_STACK_PTR) | IMM(-6 * (int)(sizeof(sljit_w))) )); + if (compiler->saveds >= 3) + FAIL_IF(push_inst(compiler, STACK_LOAD | D(SLJIT_SAVED_REG3) | A(REAL_STACK_PTR) | IMM(-5 * (int)(sizeof(sljit_w))) )); + if (compiler->saveds >= 2) + FAIL_IF(push_inst(compiler, STACK_LOAD | D(SLJIT_SAVED_REG2) | A(REAL_STACK_PTR) | IMM(-4 * (int)(sizeof(sljit_w))) )); + if (compiler->saveds >= 1) + FAIL_IF(push_inst(compiler, STACK_LOAD | D(SLJIT_SAVED_REG1) | A(REAL_STACK_PTR) | IMM(-3 * (int)(sizeof(sljit_w))) )); FAIL_IF(push_inst(compiler, STACK_LOAD | D(ZERO_REG) | A(REAL_STACK_PTR) | IMM(-2 * (int)(sizeof(sljit_w))) )); if (compiler->has_locals) FAIL_IF(push_inst(compiler, STACK_LOAD | D(SLJIT_LOCALS_REG) | A(REAL_STACK_PTR) | IMM(-(int)(sizeof(sljit_w))) )); @@ -893,7 +897,7 @@ static int emit_op(struct sljit_compiler *compiler, int op, int inp_flags, int src1_r; int src2_r; int sugg_src2_r = TMP_REG2; - int flags = inp_flags & (ALT_FORM1 | ALT_FORM2 | ALT_FORM3 | ALT_FORM4 | ALT_FORM5 | ALT_SIGN_EXT | ALT_SET_FLAGS); + int flags = inp_flags & (ALT_FORM1 | ALT_FORM2 | ALT_FORM3 | ALT_FORM4 | ALT_FORM5 | ALT_FORM6 | ALT_SIGN_EXT | ALT_SET_FLAGS); compiler->cache_arg = 0; compiler->cache_argw = 0; @@ -1028,12 +1032,38 @@ SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_op0(struct sljit_compiler *compiler, int CHECK_ERROR(); check_sljit_emit_op0(compiler, op); - op = GET_OPCODE(op); - switch (op) { + switch (GET_OPCODE(op)) { case SLJIT_BREAKPOINT: case SLJIT_NOP: return push_inst(compiler, NOP); break; + case SLJIT_UMUL: + case SLJIT_SMUL: + FAIL_IF(push_inst(compiler, OR | S(SLJIT_TEMPORARY_REG1) | A(TMP_REG1) | B(SLJIT_TEMPORARY_REG1))); +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) + FAIL_IF(push_inst(compiler, MULLD | D(SLJIT_TEMPORARY_REG1) | A(TMP_REG1) | B(SLJIT_TEMPORARY_REG2))); + return push_inst(compiler, (GET_OPCODE(op) == SLJIT_UMUL ? MULHDU : MULHD) | D(SLJIT_TEMPORARY_REG2) | A(TMP_REG1) | B(SLJIT_TEMPORARY_REG2)); +#else + FAIL_IF(push_inst(compiler, MULLW | D(SLJIT_TEMPORARY_REG1) | A(TMP_REG1) | B(SLJIT_TEMPORARY_REG2))); + return push_inst(compiler, (GET_OPCODE(op) == SLJIT_UMUL ? MULHWU : MULHW) | D(SLJIT_TEMPORARY_REG2) | A(TMP_REG1) | B(SLJIT_TEMPORARY_REG2)); +#endif + case SLJIT_UDIV: + case SLJIT_SDIV: + FAIL_IF(push_inst(compiler, OR | S(SLJIT_TEMPORARY_REG1) | A(TMP_REG1) | B(SLJIT_TEMPORARY_REG1))); +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) + if (op & SLJIT_INT_OP) { + FAIL_IF(push_inst(compiler, (GET_OPCODE(op) == SLJIT_UDIV ? DIVWU : DIVW) | D(SLJIT_TEMPORARY_REG1) | A(TMP_REG1) | B(SLJIT_TEMPORARY_REG2))); + FAIL_IF(push_inst(compiler, MULLW | D(SLJIT_TEMPORARY_REG2) | A(SLJIT_TEMPORARY_REG1) | B(SLJIT_TEMPORARY_REG2))); + return push_inst(compiler, SUBF | D(SLJIT_TEMPORARY_REG2) | A(SLJIT_TEMPORARY_REG2) | B(TMP_REG1)); + } + FAIL_IF(push_inst(compiler, (GET_OPCODE(op) == SLJIT_UDIV ? DIVDU : DIVD) | D(SLJIT_TEMPORARY_REG1) | A(TMP_REG1) | B(SLJIT_TEMPORARY_REG2))); + FAIL_IF(push_inst(compiler, MULLD | D(SLJIT_TEMPORARY_REG2) | A(SLJIT_TEMPORARY_REG1) | B(SLJIT_TEMPORARY_REG2))); + return push_inst(compiler, SUBF | D(SLJIT_TEMPORARY_REG2) | A(SLJIT_TEMPORARY_REG2) | B(TMP_REG1)); +#else + FAIL_IF(push_inst(compiler, (GET_OPCODE(op) == SLJIT_UDIV ? DIVWU : DIVW) | D(SLJIT_TEMPORARY_REG1) | A(TMP_REG1) | B(SLJIT_TEMPORARY_REG2))); + FAIL_IF(push_inst(compiler, MULLW | D(SLJIT_TEMPORARY_REG2) | A(SLJIT_TEMPORARY_REG1) | B(SLJIT_TEMPORARY_REG2))); + return push_inst(compiler, SUBF | D(SLJIT_TEMPORARY_REG2) | A(SLJIT_TEMPORARY_REG2) | B(TMP_REG1)); +#endif } return SLJIT_SUCCESS; @@ -1138,6 +1168,14 @@ SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_op1(struct sljit_compiler *compiler, int #define TEST_UH_IMM(src, srcw) \ (((src) & SLJIT_IMM) && !((srcw) & ~0xffff0000)) +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) +#define TEST_ADD_IMM(src, srcw) \ + (((src) & SLJIT_IMM) && (srcw) <= SLJIT_W(0x7fff7fff) && (srcw) >= SLJIT_W(-0x80000000)) +#else +#define TEST_ADD_IMM(src, srcw) \ + ((src) & SLJIT_IMM) +#endif + #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) #define TEST_UI_IMM(src, srcw) \ (((src) & SLJIT_IMM) && !((srcw) & ~0xffffffff)) @@ -1177,7 +1215,7 @@ SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_op2(struct sljit_compiler *compiler, int switch (GET_OPCODE(op)) { case SLJIT_ADD: - if (!GET_FLAGS(op)) { + if (!GET_FLAGS(op) && ((src1 | src2) & SLJIT_IMM)) { if (TEST_SL_IMM(src2, src2w)) { compiler->imm = src2w & 0xffff; return emit_op(compiler, SLJIT_ADD, inp_flags | ALT_FORM1, dst, dstw, src1, src1w, TMP_REG2, 0); @@ -1194,6 +1232,15 @@ SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_op2(struct sljit_compiler *compiler, int compiler->imm = (src1w >> 16) & 0xffff; return emit_op(compiler, SLJIT_ADD, inp_flags | ALT_FORM2, dst, dstw, src2, src2w, TMP_REG2, 0); } + /* Range between -1 and -32768 is covered above. */ + if (TEST_ADD_IMM(src2, src2w)) { + compiler->imm = src2w & 0xffffffff; + return emit_op(compiler, SLJIT_ADD, inp_flags | ALT_FORM4, dst, dstw, src1, src1w, TMP_REG2, 0); + } + if (TEST_ADD_IMM(src1, src1w)) { + compiler->imm = src1w & 0xffffffff; + return emit_op(compiler, SLJIT_ADD, inp_flags | ALT_FORM4, dst, dstw, src2, src2w, TMP_REG2, 0); + } } if (!(GET_FLAGS(op) & (SLJIT_SET_E | SLJIT_SET_O))) { if (TEST_SL_IMM(src2, src2w)) { @@ -1211,7 +1258,7 @@ SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_op2(struct sljit_compiler *compiler, int return emit_op(compiler, SLJIT_ADDC, inp_flags | (!(op & SLJIT_KEEP_FLAGS) ? 0 : ALT_FORM1), dst, dstw, src1, src1w, src2, src2w); case SLJIT_SUB: - if (!GET_FLAGS(op)) { + if (!GET_FLAGS(op) && ((src1 | src2) & SLJIT_IMM)) { if (TEST_SL_IMM(src2, -src2w)) { compiler->imm = (-src2w) & 0xffff; return emit_op(compiler, SLJIT_ADD, inp_flags | ALT_FORM1, dst, dstw, src1, src1w, TMP_REG2, 0); @@ -1224,25 +1271,37 @@ SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_op2(struct sljit_compiler *compiler, int compiler->imm = ((-src2w) >> 16) & 0xffff; return emit_op(compiler, SLJIT_ADD, inp_flags | ALT_FORM2, dst, dstw, src1, src1w, TMP_REG2, 0); } - } - if (dst == SLJIT_UNUSED && !(GET_FLAGS(op) & ~(SLJIT_SET_E | SLJIT_SET_S))) { - /* We know ALT_SIGN_EXT is set if it is an SLJIT_INT_OP on 64 bit systems. */ - if (TEST_SL_IMM(src2, src2w)) { - compiler->imm = src2w & 0xffff; - return emit_op(compiler, SLJIT_SUB, inp_flags | ALT_FORM2, dst, dstw, src1, src1w, TMP_REG2, 0); - } - if (GET_FLAGS(op) == SLJIT_SET_E && TEST_SL_IMM(src1, src1w)) { - compiler->imm = src1w & 0xffff; - return emit_op(compiler, SLJIT_SUB, inp_flags | ALT_FORM2, dst, dstw, src2, src2w, TMP_REG2, 0); + /* Range between -1 and -32768 is covered above. */ + if (TEST_ADD_IMM(src2, -src2w)) { + compiler->imm = -src2w & 0xffffffff; + return emit_op(compiler, SLJIT_ADD, inp_flags | ALT_FORM4, dst, dstw, src1, src1w, TMP_REG2, 0); } } - if (dst == SLJIT_UNUSED && GET_FLAGS(op) == SLJIT_SET_U) { - /* We know ALT_SIGN_EXT is set if it is an SLJIT_INT_OP on 64 bit systems. */ - if (TEST_UL_IMM(src2, src2w)) { - compiler->imm = src2w & 0xffff; - return emit_op(compiler, SLJIT_SUB, inp_flags | ALT_FORM3, dst, dstw, src1, src1w, TMP_REG2, 0); + if (dst == SLJIT_UNUSED && (op & (SLJIT_SET_E | SLJIT_SET_S | SLJIT_SET_U)) && !(op & (SLJIT_SET_O | SLJIT_SET_C))) { + if (!(op & SLJIT_SET_U)) { + /* We know ALT_SIGN_EXT is set if it is an SLJIT_INT_OP on 64 bit systems. */ + if (TEST_SL_IMM(src2, src2w)) { + compiler->imm = src2w & 0xffff; + return emit_op(compiler, SLJIT_SUB, inp_flags | ALT_FORM2, dst, dstw, src1, src1w, TMP_REG2, 0); + } + if (GET_FLAGS(op) == SLJIT_SET_E && TEST_SL_IMM(src1, src1w)) { + compiler->imm = src1w & 0xffff; + return emit_op(compiler, SLJIT_SUB, inp_flags | ALT_FORM2, dst, dstw, src2, src2w, TMP_REG2, 0); + } } - return emit_op(compiler, SLJIT_SUB, inp_flags | ALT_FORM4, dst, dstw, src1, src1w, src2, src2w); + if (!(op & (SLJIT_SET_E | SLJIT_SET_S))) { + /* We know ALT_SIGN_EXT is set if it is an SLJIT_INT_OP on 64 bit systems. */ + if (TEST_UL_IMM(src2, src2w)) { + compiler->imm = src2w & 0xffff; + return emit_op(compiler, SLJIT_SUB, inp_flags | ALT_FORM3, dst, dstw, src1, src1w, TMP_REG2, 0); + } + return emit_op(compiler, SLJIT_SUB, inp_flags | ALT_FORM4, dst, dstw, src1, src1w, src2, src2w); + } + if ((src2 & SLJIT_IMM) && src2w >= 0 && src2w <= 0x7fff) { + compiler->imm = src2w; + return emit_op(compiler, SLJIT_SUB, inp_flags | ALT_FORM2 | ALT_FORM3, dst, dstw, src1, src1w, TMP_REG2, 0); + } + return emit_op(compiler, SLJIT_SUB, inp_flags | ((op & SLJIT_SET_U) ? ALT_FORM4 : 0) | ((op & (SLJIT_SET_E | SLJIT_SET_S)) ? ALT_FORM5 : 0), dst, dstw, src1, src1w, src2, src2w); } if (!(op & (SLJIT_SET_E | SLJIT_SET_S | SLJIT_SET_U | SLJIT_SET_O))) { if (TEST_SL_IMM(src2, -src2w)) { @@ -1251,7 +1310,7 @@ SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_op2(struct sljit_compiler *compiler, int } } /* We know ALT_SIGN_EXT is set if it is an SLJIT_INT_OP on 64 bit systems. */ - return emit_op(compiler, SLJIT_SUB, inp_flags | (!(op & SLJIT_SET_U) ? 0 : ALT_FORM5), dst, dstw, src1, src1w, src2, src2w); + return emit_op(compiler, SLJIT_SUB, inp_flags | (!(op & SLJIT_SET_U) ? 0 : ALT_FORM6), dst, dstw, src1, src1w, src2, src2w); case SLJIT_SUBC: return emit_op(compiler, SLJIT_SUBC, inp_flags | (!(op & SLJIT_KEEP_FLAGS) ? 0 : ALT_FORM1), dst, dstw, src1, src1w, src2, src2w); @@ -1324,6 +1383,22 @@ SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_op2(struct sljit_compiler *compiler, int return SLJIT_SUCCESS; } +SLJIT_API_FUNC_ATTRIBUTE int sljit_get_register_index(int reg) +{ + check_sljit_get_register_index(reg); + return reg_map[reg]; +} + +SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_op_custom(struct sljit_compiler *compiler, + void *instruction, int size) +{ + CHECK_ERROR(); + check_sljit_emit_op_custom(compiler, instruction, size); + SLJIT_ASSERT(size == 4); + + return push_inst(compiler, *(sljit_ins*)instruction); +} + /* --------------------------------------------------------------------- */ /* Floating point operators */ /* --------------------------------------------------------------------- */ @@ -1475,19 +1550,19 @@ SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_fop2(struct sljit_compiler *compiler, in /* Other instructions */ /* --------------------------------------------------------------------- */ -SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_fast_enter(struct sljit_compiler *compiler, int dst, sljit_w dstw, int args, int temporaries, int generals, int local_size) +SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_fast_enter(struct sljit_compiler *compiler, int dst, sljit_w dstw, int args, int temporaries, int saveds, int local_size) { CHECK_ERROR(); - check_sljit_emit_fast_enter(compiler, dst, dstw, args, temporaries, generals, local_size); + check_sljit_emit_fast_enter(compiler, dst, dstw, args, temporaries, saveds, local_size); compiler->temporaries = temporaries; - compiler->generals = generals; + compiler->saveds = saveds; compiler->has_locals = local_size > 0; #if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) - compiler->local_size = (2 + generals + 2) * sizeof(sljit_w) + local_size; + compiler->local_size = (2 + saveds + 2) * sizeof(sljit_w) + local_size; #else - compiler->local_size = (2 + generals + 7 + 8) * sizeof(sljit_w) + local_size; + compiler->local_size = (2 + saveds + 7 + 8) * sizeof(sljit_w) + local_size; #endif compiler->local_size = (compiler->local_size + 15) & ~0xf; diff --git a/harbour/src/3rd/pcre/sjutils.c b/harbour/src/3rd/pcre/sjutils.c index 49d7aaa23a..98beaa0b5e 100644 --- a/harbour/src/3rd/pcre/sjutils.c +++ b/harbour/src/3rd/pcre/sjutils.c @@ -1,7 +1,7 @@ /* * Stack-less Just-In-Time compiler * - * Copyright 2009-2010 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. + * Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. * * Redistribution and use in source and binary forms, with or without modification, are * permitted provided that the following conditions are met: diff --git a/harbour/src/3rd/pcre/sjx8632.c b/harbour/src/3rd/pcre/sjx8632.c index 69ac8fe60e..68bca8441a 100644 --- a/harbour/src/3rd/pcre/sjx8632.c +++ b/harbour/src/3rd/pcre/sjx8632.c @@ -1,7 +1,7 @@ /* * Stack-less Just-In-Time compiler * - * Copyright 2009-2010 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. + * Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. * * Redistribution and use in source and binary forms, with or without modification, are * permitted provided that the following conditions are met: @@ -63,23 +63,23 @@ static sljit_ub* generate_far_jump_code(struct sljit_jump *jump, sljit_ub *code_ return code_ptr; } -SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_enter(struct sljit_compiler *compiler, int args, int temporaries, int generals, int local_size) +SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_enter(struct sljit_compiler *compiler, int args, int temporaries, int saveds, int local_size) { int size; sljit_ub *buf; CHECK_ERROR(); - check_sljit_emit_enter(compiler, args, temporaries, generals, local_size); + check_sljit_emit_enter(compiler, args, temporaries, saveds, local_size); compiler->temporaries = temporaries; - compiler->generals = generals; + compiler->saveds = saveds; compiler->args = args; compiler->flags_saved = 0; #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) - size = 1 + (generals <= 3 ? generals : 3) + (args > 0 ? (args * 2) : 0) + (args > 2 ? 2 : 0); + size = 1 + (saveds <= 3 ? saveds : 3) + (args > 0 ? (args * 2) : 0) + (args > 2 ? 2 : 0); #else - size = 1 + (generals <= 3 ? generals : 3) + (args > 0 ? (2 + args * 3) : 0); + size = 1 + (saveds <= 3 ? saveds : 3) + (args > 0 ? (2 + args * 3) : 0); #endif buf = (sljit_ub*)ensure_buf(compiler, 1 + size); FAIL_IF(!buf); @@ -92,42 +92,42 @@ SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_enter(struct sljit_compiler *compiler, i *buf++ = 0xc4 | (reg_map[TMP_REGISTER] << 3); } #endif - if (generals > 2) - PUSH_REG(reg_map[SLJIT_GENERAL_REG3]); - if (generals > 1) - PUSH_REG(reg_map[SLJIT_GENERAL_REG2]); - if (generals > 0) - PUSH_REG(reg_map[SLJIT_GENERAL_REG1]); + if (saveds > 2) + PUSH_REG(reg_map[SLJIT_SAVED_REG3]); + if (saveds > 1) + PUSH_REG(reg_map[SLJIT_SAVED_REG2]); + if (saveds > 0) + PUSH_REG(reg_map[SLJIT_SAVED_REG1]); #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) if (args > 0) { *buf++ = 0x8b; - *buf++ = 0xc0 | (reg_map[SLJIT_GENERAL_REG1] << 3) | reg_map[SLJIT_TEMPORARY_REG3]; + *buf++ = 0xc0 | (reg_map[SLJIT_SAVED_REG1] << 3) | reg_map[SLJIT_TEMPORARY_REG3]; } if (args > 1) { *buf++ = 0x8b; - *buf++ = 0xc0 | (reg_map[SLJIT_GENERAL_REG2] << 3) | reg_map[SLJIT_TEMPORARY_REG2]; + *buf++ = 0xc0 | (reg_map[SLJIT_SAVED_REG2] << 3) | reg_map[SLJIT_TEMPORARY_REG2]; } if (args > 2) { *buf++ = 0x8b; - *buf++ = 0x44 | (reg_map[SLJIT_GENERAL_REG3] << 3); + *buf++ = 0x44 | (reg_map[SLJIT_SAVED_REG3] << 3); *buf++ = 0x24; - *buf++ = sizeof(sljit_w) * (3 + 2); /* generals >= 3 as well. */ + *buf++ = sizeof(sljit_w) * (3 + 2); /* saveds >= 3 as well. */ } #else if (args > 0) { *buf++ = 0x8b; - *buf++ = 0x40 | (reg_map[SLJIT_GENERAL_REG1] << 3) | reg_map[TMP_REGISTER]; + *buf++ = 0x40 | (reg_map[SLJIT_SAVED_REG1] << 3) | reg_map[TMP_REGISTER]; *buf++ = sizeof(sljit_w) * 2; } if (args > 1) { *buf++ = 0x8b; - *buf++ = 0x40 | (reg_map[SLJIT_GENERAL_REG2] << 3) | reg_map[TMP_REGISTER]; + *buf++ = 0x40 | (reg_map[SLJIT_SAVED_REG2] << 3) | reg_map[TMP_REGISTER]; *buf++ = sizeof(sljit_w) * 3; } if (args > 2) { *buf++ = 0x8b; - *buf++ = 0x40 | (reg_map[SLJIT_GENERAL_REG3] << 3) | reg_map[TMP_REGISTER]; + *buf++ = 0x40 | (reg_map[SLJIT_SAVED_REG3] << 3) | reg_map[TMP_REGISTER]; *buf++ = sizeof(sljit_w) * 4; } #endif @@ -136,9 +136,9 @@ SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_enter(struct sljit_compiler *compiler, i compiler->temporaries_start = local_size; if (temporaries > 3) local_size += (temporaries - 3) * sizeof(sljit_uw); - compiler->generals_start = local_size; - if (generals > 3) - local_size += (generals - 3) * sizeof(sljit_uw); + compiler->saveds_start = local_size; + if (saveds > 3) + local_size += (saveds - 3) * sizeof(sljit_uw); #ifdef _WIN32 if (local_size > 1024) { @@ -152,47 +152,43 @@ SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_enter(struct sljit_compiler *compiler, i return emit_non_cum_binary(compiler, 0x2b, 0x29, 0x5 << 3, 0x2d, SLJIT_LOCALS_REG, 0, SLJIT_LOCALS_REG, 0, SLJIT_IMM, local_size); - /* Mov arguments to general registers. */ return SLJIT_SUCCESS; } -SLJIT_API_FUNC_ATTRIBUTE void sljit_fake_enter(struct sljit_compiler *compiler, int args, int temporaries, int generals, int local_size) +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_context(struct sljit_compiler *compiler, int args, int temporaries, int saveds, int local_size) { CHECK_ERROR_VOID(); - check_sljit_fake_enter(compiler, args, temporaries, generals, local_size); + check_sljit_set_context(compiler, args, temporaries, saveds, local_size); compiler->temporaries = temporaries; - compiler->generals = generals; + compiler->saveds = saveds; compiler->args = args; compiler->local_size = (local_size + sizeof(sljit_uw) - 1) & ~(sizeof(sljit_uw) - 1); compiler->temporaries_start = compiler->local_size; if (temporaries > 3) compiler->local_size += (temporaries - 3) * sizeof(sljit_uw); - compiler->generals_start = compiler->local_size; - if (generals > 3) - compiler->local_size += (generals - 3) * sizeof(sljit_uw); + compiler->saveds_start = compiler->local_size; + if (saveds > 3) + compiler->local_size += (saveds - 3) * sizeof(sljit_uw); } -SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_return(struct sljit_compiler *compiler, int src, sljit_w srcw) +SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_return(struct sljit_compiler *compiler, int op, int src, sljit_w srcw) { int size; sljit_ub *buf; CHECK_ERROR(); - check_sljit_emit_return(compiler, src, srcw); + check_sljit_emit_return(compiler, op, src, srcw); SLJIT_ASSERT(compiler->args >= 0); compiler->flags_saved = 0; - CHECK_EXTRA_REGS(src, srcw, (void)0); - - if (src != SLJIT_UNUSED && src != SLJIT_RETURN_REG) - FAIL_IF(emit_mov(compiler, SLJIT_RETURN_REG, 0, src, srcw)); + FAIL_IF(emit_mov_before_return(compiler, op, src, srcw)); if (compiler->local_size > 0) FAIL_IF(emit_cum_binary(compiler, 0x03, 0x01, 0x0 << 3, 0x05, SLJIT_LOCALS_REG, 0, SLJIT_LOCALS_REG, 0, SLJIT_IMM, compiler->local_size)); - size = 2 + (compiler->generals <= 3 ? compiler->generals : 3); + size = 2 + (compiler->saveds <= 3 ? compiler->saveds : 3); #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) if (compiler->args > 2) size += 2; @@ -205,12 +201,12 @@ SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_return(struct sljit_compiler *compiler, INC_SIZE(size); - if (compiler->generals > 0) - POP_REG(reg_map[SLJIT_GENERAL_REG1]); - if (compiler->generals > 1) - POP_REG(reg_map[SLJIT_GENERAL_REG2]); - if (compiler->generals > 2) - POP_REG(reg_map[SLJIT_GENERAL_REG3]); + if (compiler->saveds > 0) + POP_REG(reg_map[SLJIT_SAVED_REG1]); + if (compiler->saveds > 1) + POP_REG(reg_map[SLJIT_SAVED_REG2]); + if (compiler->saveds > 2) + POP_REG(reg_map[SLJIT_SAVED_REG3]); POP_REG(reg_map[TMP_REGISTER]); #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) if (compiler->args > 2) @@ -435,23 +431,23 @@ static SLJIT_INLINE int call_with_args(struct sljit_compiler *compiler, int type return SLJIT_SUCCESS; } -SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_fast_enter(struct sljit_compiler *compiler, int dst, sljit_w dstw, int args, int temporaries, int generals, int local_size) +SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_fast_enter(struct sljit_compiler *compiler, int dst, sljit_w dstw, int args, int temporaries, int saveds, int local_size) { sljit_ub *buf; CHECK_ERROR(); - check_sljit_emit_fast_enter(compiler, dst, dstw, args, temporaries, generals, local_size); + check_sljit_emit_fast_enter(compiler, dst, dstw, args, temporaries, saveds, local_size); compiler->temporaries = temporaries; - compiler->generals = generals; + compiler->saveds = saveds; compiler->args = args; compiler->local_size = (local_size + sizeof(sljit_uw) - 1) & ~(sizeof(sljit_uw) - 1); compiler->temporaries_start = compiler->local_size; if (temporaries > 3) compiler->local_size += (temporaries - 3) * sizeof(sljit_uw); - compiler->generals_start = compiler->local_size; - if (generals > 3) - compiler->local_size += (generals - 3) * sizeof(sljit_uw); + compiler->saveds_start = compiler->local_size; + if (saveds > 3) + compiler->local_size += (saveds - 3) * sizeof(sljit_uw); CHECK_EXTRA_REGS(dst, dstw, (void)0); diff --git a/harbour/src/3rd/pcre/sjx8664.c b/harbour/src/3rd/pcre/sjx8664.c index 5f577f3ac6..40d875b841 100644 --- a/harbour/src/3rd/pcre/sjx8664.c +++ b/harbour/src/3rd/pcre/sjx8664.c @@ -1,7 +1,7 @@ /* * Stack-less Just-In-Time compiler * - * Copyright 2009-2010 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. + * Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. * * Redistribution and use in source and binary forms, with or without modification, are * permitted provided that the following conditions are met: @@ -86,24 +86,24 @@ static sljit_ub* generate_fixed_jump(sljit_ub *code_ptr, sljit_w addr, int type) return code_ptr; } -SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_enter(struct sljit_compiler *compiler, int args, int temporaries, int generals, int local_size) +SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_enter(struct sljit_compiler *compiler, int args, int temporaries, int saveds, int local_size) { int size, pushed_size; sljit_ub *buf; CHECK_ERROR(); - check_sljit_emit_enter(compiler, args, temporaries, generals, local_size); + check_sljit_emit_enter(compiler, args, temporaries, saveds, local_size); compiler->temporaries = temporaries; - compiler->generals = generals; + compiler->saveds = saveds; compiler->flags_saved = 0; - size = generals; + size = saveds; /* Including the return address saved by the call instruction. */ - pushed_size = (generals + 1) * sizeof(sljit_w); + pushed_size = (saveds + 1) * sizeof(sljit_w); #ifndef _WIN64 - if (generals >= 2) - size += generals - 1; + if (saveds >= 2) + size += saveds - 1; #else /* Saving the virtual stack pointer. */ compiler->has_locals = local_size > 0; @@ -111,8 +111,8 @@ SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_enter(struct sljit_compiler *compiler, i size += 2; pushed_size += sizeof(sljit_w); } - if (generals >= 4) - size += generals - 3; + if (saveds >= 4) + size += saveds - 3; if (temporaries >= 5) { size += (5 - 4) * 2; pushed_size += sizeof(sljit_w); @@ -124,37 +124,37 @@ SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_enter(struct sljit_compiler *compiler, i FAIL_IF(!buf); INC_SIZE(size); - if (generals >= 5) { - SLJIT_COMPILE_ASSERT(reg_map[SLJIT_GENERAL_EREG2] >= 8, general_ereg2_is_hireg); + if (saveds >= 5) { + SLJIT_COMPILE_ASSERT(reg_map[SLJIT_SAVED_EREG2] >= 8, saved_ereg2_is_hireg); *buf++ = REX_B; - PUSH_REG(reg_lmap[SLJIT_GENERAL_EREG2]); + PUSH_REG(reg_lmap[SLJIT_SAVED_EREG2]); } - if (generals >= 4) { - SLJIT_COMPILE_ASSERT(reg_map[SLJIT_GENERAL_EREG1] >= 8, general_ereg1_is_hireg); + if (saveds >= 4) { + SLJIT_COMPILE_ASSERT(reg_map[SLJIT_SAVED_EREG1] >= 8, saved_ereg1_is_hireg); *buf++ = REX_B; - PUSH_REG(reg_lmap[SLJIT_GENERAL_EREG1]); + PUSH_REG(reg_lmap[SLJIT_SAVED_EREG1]); } - if (generals >= 3) { + if (saveds >= 3) { #ifndef _WIN64 - SLJIT_COMPILE_ASSERT(reg_map[SLJIT_GENERAL_REG3] >= 8, general_reg3_is_hireg); + SLJIT_COMPILE_ASSERT(reg_map[SLJIT_SAVED_REG3] >= 8, saved_reg3_is_hireg); *buf++ = REX_B; #else - SLJIT_COMPILE_ASSERT(reg_map[SLJIT_GENERAL_REG3] < 8, general_reg3_is_loreg); + SLJIT_COMPILE_ASSERT(reg_map[SLJIT_SAVED_REG3] < 8, saved_reg3_is_loreg); #endif - PUSH_REG(reg_lmap[SLJIT_GENERAL_REG3]); + PUSH_REG(reg_lmap[SLJIT_SAVED_REG3]); } - if (generals >= 2) { + if (saveds >= 2) { #ifndef _WIN64 - SLJIT_COMPILE_ASSERT(reg_map[SLJIT_GENERAL_REG2] >= 8, general_reg2_is_hireg); + SLJIT_COMPILE_ASSERT(reg_map[SLJIT_SAVED_REG2] >= 8, saved_reg2_is_hireg); *buf++ = REX_B; #else - SLJIT_COMPILE_ASSERT(reg_map[SLJIT_GENERAL_REG2] < 8, general_reg2_is_loreg); + SLJIT_COMPILE_ASSERT(reg_map[SLJIT_SAVED_REG2] < 8, saved_reg2_is_loreg); #endif - PUSH_REG(reg_lmap[SLJIT_GENERAL_REG2]); + PUSH_REG(reg_lmap[SLJIT_SAVED_REG2]); } - if (generals >= 1) { - SLJIT_COMPILE_ASSERT(reg_map[SLJIT_GENERAL_REG1] < 8, general_reg1_is_loreg); - PUSH_REG(reg_lmap[SLJIT_GENERAL_REG1]); + if (saveds >= 1) { + SLJIT_COMPILE_ASSERT(reg_map[SLJIT_SAVED_REG1] < 8, saved_reg1_is_loreg); + PUSH_REG(reg_lmap[SLJIT_SAVED_REG1]); } #ifdef _WIN64 if (temporaries >= 5) { @@ -173,33 +173,33 @@ SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_enter(struct sljit_compiler *compiler, i if (args > 0) { *buf++ = REX_W; *buf++ = 0x8b; - *buf++ = 0xc0 | (reg_map[SLJIT_GENERAL_REG1] << 3) | 0x7; + *buf++ = 0xc0 | (reg_map[SLJIT_SAVED_REG1] << 3) | 0x7; } if (args > 1) { *buf++ = REX_W | REX_R; *buf++ = 0x8b; - *buf++ = 0xc0 | (reg_lmap[SLJIT_GENERAL_REG2] << 3) | 0x6; + *buf++ = 0xc0 | (reg_lmap[SLJIT_SAVED_REG2] << 3) | 0x6; } if (args > 2) { *buf++ = REX_W | REX_R; *buf++ = 0x8b; - *buf++ = 0xc0 | (reg_lmap[SLJIT_GENERAL_REG3] << 3) | 0x2; + *buf++ = 0xc0 | (reg_lmap[SLJIT_SAVED_REG3] << 3) | 0x2; } #else if (args > 0) { *buf++ = REX_W; *buf++ = 0x8b; - *buf++ = 0xc0 | (reg_map[SLJIT_GENERAL_REG1] << 3) | 0x1; + *buf++ = 0xc0 | (reg_map[SLJIT_SAVED_REG1] << 3) | 0x1; } if (args > 1) { *buf++ = REX_W; *buf++ = 0x8b; - *buf++ = 0xc0 | (reg_map[SLJIT_GENERAL_REG2] << 3) | 0x2; + *buf++ = 0xc0 | (reg_map[SLJIT_SAVED_REG2] << 3) | 0x2; } if (args > 2) { *buf++ = REX_W | REX_B; *buf++ = 0x8b; - *buf++ = 0xc0 | (reg_map[SLJIT_GENERAL_REG3] << 3) | 0x0; + *buf++ = 0xc0 | (reg_map[SLJIT_SAVED_REG3] << 3) | 0x0; } #endif } @@ -269,21 +269,20 @@ SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_enter(struct sljit_compiler *compiler, i } #endif - /* Mov arguments to general registers. */ return SLJIT_SUCCESS; } -SLJIT_API_FUNC_ATTRIBUTE void sljit_fake_enter(struct sljit_compiler *compiler, int args, int temporaries, int generals, int local_size) +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_context(struct sljit_compiler *compiler, int args, int temporaries, int saveds, int local_size) { int pushed_size; CHECK_ERROR_VOID(); - check_sljit_fake_enter(compiler, args, temporaries, generals, local_size); + check_sljit_set_context(compiler, args, temporaries, saveds, local_size); compiler->temporaries = temporaries; - compiler->generals = generals; + compiler->saveds = saveds; /* Including the return address saved by the call instruction. */ - pushed_size = (generals + 1) * sizeof(sljit_w); + pushed_size = (saveds + 1) * sizeof(sljit_w); #ifdef _WIN64 compiler->has_locals = local_size > 0; if (local_size > 0) @@ -297,20 +296,16 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_fake_enter(struct sljit_compiler *compiler, #endif } -SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_return(struct sljit_compiler *compiler, int src, sljit_w srcw) +SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_return(struct sljit_compiler *compiler, int op, int src, sljit_w srcw) { int size; sljit_ub *buf; CHECK_ERROR(); - check_sljit_emit_return(compiler, src, srcw); + check_sljit_emit_return(compiler, op, src, srcw); compiler->flags_saved = 0; - - if (src != SLJIT_UNUSED && src != SLJIT_RETURN_REG) { - compiler->mode32 = 0; - FAIL_IF(emit_mov(compiler, SLJIT_RETURN_REG, 0, src, srcw)); - } + FAIL_IF(emit_mov_before_return(compiler, op, src, srcw)); if (compiler->local_size > 0) { if (compiler->local_size <= 127) { @@ -333,15 +328,15 @@ SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_return(struct sljit_compiler *compiler, } } - size = 1 + compiler->generals; + size = 1 + compiler->saveds; #ifndef _WIN64 - if (compiler->generals >= 2) - size += compiler->generals - 1; + if (compiler->saveds >= 2) + size += compiler->saveds - 1; #else if (compiler->has_locals) size += 2; - if (compiler->generals >= 4) - size += compiler->generals - 3; + if (compiler->saveds >= 4) + size += compiler->saveds - 3; if (compiler->temporaries >= 5) size += (5 - 4) * 2; #endif @@ -360,27 +355,27 @@ SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_return(struct sljit_compiler *compiler, POP_REG(reg_lmap[SLJIT_TEMPORARY_EREG2]); } #endif - if (compiler->generals >= 1) - POP_REG(reg_map[SLJIT_GENERAL_REG1]); - if (compiler->generals >= 2) { + if (compiler->saveds >= 1) + POP_REG(reg_map[SLJIT_SAVED_REG1]); + if (compiler->saveds >= 2) { #ifndef _WIN64 *buf++ = REX_B; #endif - POP_REG(reg_lmap[SLJIT_GENERAL_REG2]); + POP_REG(reg_lmap[SLJIT_SAVED_REG2]); } - if (compiler->generals >= 3) { + if (compiler->saveds >= 3) { #ifndef _WIN64 *buf++ = REX_B; #endif - POP_REG(reg_lmap[SLJIT_GENERAL_REG3]); + POP_REG(reg_lmap[SLJIT_SAVED_REG3]); } - if (compiler->generals >= 4) { + if (compiler->saveds >= 4) { *buf++ = REX_B; - POP_REG(reg_lmap[SLJIT_GENERAL_EREG1]); + POP_REG(reg_lmap[SLJIT_SAVED_EREG1]); } - if (compiler->generals >= 5) { + if (compiler->saveds >= 5) { *buf++ = REX_B; - POP_REG(reg_lmap[SLJIT_GENERAL_EREG2]); + POP_REG(reg_lmap[SLJIT_SAVED_EREG2]); } RET(); @@ -508,7 +503,7 @@ static sljit_ub* emit_x86_instruction(struct sljit_compiler *compiler, int size, inst_size += 4; } else if (flags & EX86_SHIFT_INS) { - imma &= 0x3f; + imma &= compiler->mode32 ? 0x1f : 0x3f; if (imma != 1) { inst_size ++; flags |= EX86_BYTE_ARG; @@ -676,15 +671,15 @@ static SLJIT_INLINE int call_with_args(struct sljit_compiler *compiler, int type return SLJIT_SUCCESS; } -SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_fast_enter(struct sljit_compiler *compiler, int dst, sljit_w dstw, int args, int temporaries, int generals, int local_size) +SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_fast_enter(struct sljit_compiler *compiler, int dst, sljit_w dstw, int args, int temporaries, int saveds, int local_size) { sljit_ub *buf; CHECK_ERROR(); - check_sljit_emit_fast_enter(compiler, dst, dstw, args, temporaries, generals, local_size); + check_sljit_emit_fast_enter(compiler, dst, dstw, args, temporaries, saveds, local_size); compiler->temporaries = temporaries; - compiler->generals = generals; + compiler->saveds = saveds; compiler->local_size = (local_size + sizeof(sljit_uw) - 1) & ~(sizeof(sljit_uw) - 1); #ifdef _WIN64 compiler->local_size += 4 * sizeof(sljit_w); @@ -802,52 +797,36 @@ static int emit_mov_int(struct sljit_compiler *compiler, int sign, return SLJIT_SUCCESS; /* Empty instruction. */ if (src & SLJIT_IMM) { - if (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) + if (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) { + if (sign || ((sljit_uw)srcw <= 0x7fffffff)) { + code = emit_x86_instruction(compiler, 1, SLJIT_IMM, (sljit_w)(sljit_i)srcw, dst, dstw); + FAIL_IF(!code); + *code = 0xc7; + return SLJIT_SUCCESS; + } return emit_load_imm64(compiler, dst, srcw); + } compiler->mode32 = 1; - code = emit_x86_instruction(compiler, 1, SLJIT_IMM, (sljit_w)(int)srcw, dst, dstw); + code = emit_x86_instruction(compiler, 1, SLJIT_IMM, (sljit_w)(sljit_i)srcw, dst, dstw); FAIL_IF(!code); *code = 0xc7; compiler->mode32 = 0; return SLJIT_SUCCESS; } - dst_r = (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_GENERAL_REG3) ? dst : TMP_REGISTER; + dst_r = (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_SAVED_REG3) ? dst : TMP_REGISTER; - if ((dst & SLJIT_MEM) && (src >= SLJIT_TEMPORARY_REG1 && src <= SLJIT_GENERAL_REG3)) + if ((dst & SLJIT_MEM) && (src >= SLJIT_TEMPORARY_REG1 && src <= SLJIT_SAVED_REG3)) dst_r = src; else { if (sign) { code = emit_x86_instruction(compiler, 1, dst_r, 0, src, srcw); FAIL_IF(!code); *code++ = 0x63; - } - else { - if (dst_r == src) { - compiler->mode32 = 1; - code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, src, 0); - FAIL_IF(!code); - *code++ = 0x8b; - compiler->mode32 = 0; - } - /* xor reg, reg. */ - code = emit_x86_instruction(compiler, 1, dst_r, 0, dst_r, 0); - FAIL_IF(!code); - *code++ = 0x33; - if (dst_r != src) { - compiler->mode32 = 1; - code = emit_x86_instruction(compiler, 1, dst_r, 0, src, srcw); - FAIL_IF(!code); - *code++ = 0x8b; - compiler->mode32 = 0; - } - else { - compiler->mode32 = 1; - code = emit_x86_instruction(compiler, 1, src, 0, TMP_REGISTER, 0); - FAIL_IF(!code); - *code++ = 0x8b; - compiler->mode32 = 0; - } + } else { + compiler->mode32 = 1; + FAIL_IF(emit_mov(compiler, dst_r, 0, src, srcw)); + compiler->mode32 = 0; } } diff --git a/harbour/src/3rd/pcre/sjx86c.c b/harbour/src/3rd/pcre/sjx86c.c index ebe819de1f..06f204421c 100644 --- a/harbour/src/3rd/pcre/sjx86c.c +++ b/harbour/src/3rd/pcre/sjx86c.c @@ -1,7 +1,7 @@ /* * Stack-less Just-In-Time compiler * - * Copyright 2009-2010 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. + * Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. * * Redistribution and use in source and binary forms, with or without modification, are * permitted provided that the following conditions are met: @@ -26,11 +26,7 @@ SLJIT_API_FUNC_ATTRIBUTE SLJIT_CONST char* sljit_get_platform_name() { -#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) - return "x86-32"; -#else - return "x86-64"; -#endif + return "x86" SLJIT_CPUINFO; } /* @@ -80,8 +76,8 @@ static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 2] = { p = SLJIT_MEM1(SLJIT_LOCALS_REG); \ do; \ } \ - else if (p >= SLJIT_GENERAL_EREG1 && p <= SLJIT_GENERAL_EREG2) { \ - w = compiler->generals_start + (p - SLJIT_GENERAL_EREG1) * sizeof(sljit_w); \ + else if (p >= SLJIT_SAVED_EREG1 && p <= SLJIT_SAVED_EREG2) { \ + w = compiler->saveds_start + (p - SLJIT_SAVED_EREG1) * sizeof(sljit_w); \ p = SLJIT_MEM1(SLJIT_LOCALS_REG); \ do; \ } @@ -95,7 +91,7 @@ static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 2] = { /* Note: r12 & 0x7 == 0b100, which decoded as SIB byte present Note: avoid to use r12 and r13 for memory addessing - therefore r12 is better for GENERAL_EREG than GENERAL_REG. */ + therefore r12 is better for SAVED_EREG than SAVED_REG. */ #ifndef _WIN64 /* 1st passed in rdi, 2nd argument passed in rsi, 3rd in rdx. */ static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 4] = { @@ -474,32 +470,6 @@ static void SLJIT_CALL sljit_touch_stack(sljit_w local_size) #include "sjx8664.c" #endif -SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_op0(struct sljit_compiler *compiler, int op) -{ - sljit_ub *buf; - - CHECK_ERROR(); - check_sljit_emit_op0(compiler, op); - - op = GET_OPCODE(op); - switch (op) { - case SLJIT_BREAKPOINT: - buf = (sljit_ub*)ensure_buf(compiler, 1 + 1); - FAIL_IF(!buf); - INC_SIZE(1); - *buf = 0xcc; - break; - case SLJIT_NOP: - buf = (sljit_ub*)ensure_buf(compiler, 1 + 1); - FAIL_IF(!buf); - INC_SIZE(1); - *buf = 0x90; - break; - } - - return SLJIT_SUCCESS; -} - static int emit_mov(struct sljit_compiler *compiler, int dst, sljit_w dstw, int src, sljit_w srcw) @@ -568,6 +538,142 @@ static int emit_mov(struct sljit_compiler *compiler, #define EMIT_MOV(compiler, dst, dstw, src, srcw) \ FAIL_IF(emit_mov(compiler, dst, dstw, src, srcw)); +SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_op0(struct sljit_compiler *compiler, int op) +{ + sljit_ub *buf; +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + int size; +#endif + + CHECK_ERROR(); + check_sljit_emit_op0(compiler, op); + + switch (GET_OPCODE(op)) { + case SLJIT_BREAKPOINT: + buf = (sljit_ub*)ensure_buf(compiler, 1 + 1); + FAIL_IF(!buf); + INC_SIZE(1); + *buf = 0xcc; + break; + case SLJIT_NOP: + buf = (sljit_ub*)ensure_buf(compiler, 1 + 1); + FAIL_IF(!buf); + INC_SIZE(1); + *buf = 0x90; + break; + case SLJIT_UMUL: + case SLJIT_SMUL: + case SLJIT_UDIV: + case SLJIT_SDIV: + compiler->flags_saved = 0; +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) +#ifdef _WIN64 + SLJIT_COMPILE_ASSERT( + reg_map[SLJIT_TEMPORARY_REG1] == 0 + && reg_map[SLJIT_TEMPORARY_REG2] == 2 + && reg_map[TMP_REGISTER] > 7, + invalid_register_assignment_for_div_mul); +#else + SLJIT_COMPILE_ASSERT( + reg_map[SLJIT_TEMPORARY_REG1] == 0 + && reg_map[SLJIT_TEMPORARY_REG2] < 7 + && reg_map[TMP_REGISTER] == 2, + invalid_register_assignment_for_div_mul); +#endif + compiler->mode32 = op & SLJIT_INT_OP; +#endif + + op = GET_OPCODE(op); + if (op == SLJIT_UDIV) { +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || defined(_WIN64) + EMIT_MOV(compiler, TMP_REGISTER, 0, SLJIT_TEMPORARY_REG2, 0); + buf = emit_x86_instruction(compiler, 1, SLJIT_TEMPORARY_REG2, 0, SLJIT_TEMPORARY_REG2, 0); +#else + buf = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, TMP_REGISTER, 0); +#endif + FAIL_IF(!buf); + *buf = 0x33; + } + + if (op == SLJIT_SDIV) { +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || defined(_WIN64) + EMIT_MOV(compiler, TMP_REGISTER, 0, SLJIT_TEMPORARY_REG2, 0); +#endif + + /* CDQ instruction */ +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + buf = (sljit_ub*)ensure_buf(compiler, 1 + 1); + FAIL_IF(!buf); + INC_SIZE(1); + *buf = 0x99; +#else + if (compiler->mode32) { + buf = (sljit_ub*)ensure_buf(compiler, 1 + 1); + FAIL_IF(!buf); + INC_SIZE(1); + *buf = 0x99; + } else { + buf = (sljit_ub*)ensure_buf(compiler, 1 + 2); + FAIL_IF(!buf); + INC_SIZE(2); + *buf++ = REX_W; + *buf = 0x99; + } +#endif + } + +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + buf = (sljit_ub*)ensure_buf(compiler, 1 + 2); + FAIL_IF(!buf); + INC_SIZE(2); + *buf++ = 0xf7; + *buf = 0xc0 | ((op >= SLJIT_UDIV) ? reg_map[TMP_REGISTER] : reg_map[SLJIT_TEMPORARY_REG2]); +#else +#ifdef _WIN64 + size = (!compiler->mode32 || op >= SLJIT_UDIV) ? 3 : 2; +#else + size = (!compiler->mode32) ? 3 : 2; +#endif + buf = (sljit_ub*)ensure_buf(compiler, 1 + size); + FAIL_IF(!buf); + INC_SIZE(size); +#ifdef _WIN64 + if (!compiler->mode32) + *buf++ = REX_W | ((op >= SLJIT_UDIV) ? REX_B : 0); + else if (op >= SLJIT_UDIV) + *buf++ = REX_B; + *buf++ = 0xf7; + *buf = 0xc0 | ((op >= SLJIT_UDIV) ? reg_lmap[TMP_REGISTER] : reg_lmap[SLJIT_TEMPORARY_REG2]); +#else + if (!compiler->mode32) + *buf++ = REX_W; + *buf++ = 0xf7; + *buf = 0xc0 | reg_map[SLJIT_TEMPORARY_REG2]; +#endif +#endif + switch (op) { + case SLJIT_UMUL: + *buf |= 4 << 3; + break; + case SLJIT_SMUL: + *buf |= 5 << 3; + break; + case SLJIT_UDIV: + *buf |= 6 << 3; + break; + case SLJIT_SDIV: + *buf |= 7 << 3; + break; + } +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) && !defined(_WIN64) + EMIT_MOV(compiler, SLJIT_TEMPORARY_REG2, 0, TMP_REGISTER, 0); +#endif + break; + } + + return SLJIT_SUCCESS; +} + #define ENCODE_PREFIX(prefix) \ do { \ code = (sljit_ub*)ensure_buf(compiler, 1 + 1); \ @@ -853,6 +959,7 @@ static int emit_clz(struct sljit_compiler *compiler, int op, sljit_ub* code; int dst_r; + SLJIT_UNUSED_ARG(op); if (SLJIT_UNLIKELY(dst == SLJIT_UNUSED)) { /* Just set the zero flag. */ EMIT_MOV(compiler, TMP_REGISTER, 0, src, srcw); @@ -1718,21 +1825,19 @@ static int emit_shift(struct sljit_compiler *compiler, EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REGISTER, 0); } else { - /* This case is really difficult, since ecx can be used for - addressing as well, and we must ensure to work even in that case. */ + /* This case is really difficult, since ecx itself may used for + addressing, and we must ensure to work even in that case. */ + EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w); #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_PREF_SHIFT_REG, 0); #else /* [esp - 4] is reserved for eflags. */ EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), -(int)(2 * sizeof(sljit_w)), SLJIT_PREF_SHIFT_REG, 0); #endif - - EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w); EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w); code = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REGISTER, 0); FAIL_IF(!code); *code |= mode; - #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG2, 0); #else @@ -1745,6 +1850,41 @@ static int emit_shift(struct sljit_compiler *compiler, return SLJIT_SUCCESS; } +static int emit_shift_with_flags(struct sljit_compiler *compiler, + sljit_ub mode, int set_flags, + int dst, sljit_w dstw, + int src1, sljit_w src1w, + int src2, sljit_w src2w) +{ + /* The CPU does not set flags if the shift count is 0. */ + if (src2 & SLJIT_IMM) { +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + if ((src2w & 0x3f) != 0 || (compiler->mode32 && (src2w & 0x1f) != 0)) + return emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w); +#else + if ((src2w & 0x1f) != 0) + return emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w); +#endif + if (!set_flags) + return emit_mov(compiler, dst, dstw, src1, src1w); + /* OR dst, src, 0 */ + return emit_cum_binary(compiler, 0x0b, 0x09, 0x1 << 3, 0x0d, + dst, dstw, src1, src1w, SLJIT_IMM, 0); + } + + if (!set_flags) + return emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w); + + if (!(dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS)) + FAIL_IF(emit_cmp_binary(compiler, src1, src1w, SLJIT_IMM, 0)); + + FAIL_IF(emit_shift(compiler,mode, dst, dstw, src1, src1w, src2, src2w)); + + if (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) + return emit_cmp_binary(compiler, dst, dstw, SLJIT_IMM, 0); + return SLJIT_SUCCESS; +} + SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_op2(struct sljit_compiler *compiler, int op, int dst, sljit_w dstw, int src1, sljit_w src1w, @@ -1824,19 +1964,46 @@ SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_op2(struct sljit_compiler *compiler, int return emit_cum_binary(compiler, 0x33, 0x31, 0x6 << 3, 0x35, dst, dstw, src1, src1w, src2, src2w); case SLJIT_SHL: - return emit_shift(compiler, 0x4 << 3, + return emit_shift_with_flags(compiler, 0x4 << 3, GET_FLAGS(op), dst, dstw, src1, src1w, src2, src2w); case SLJIT_LSHR: - return emit_shift(compiler, 0x5 << 3, + return emit_shift_with_flags(compiler, 0x5 << 3, GET_FLAGS(op), dst, dstw, src1, src1w, src2, src2w); case SLJIT_ASHR: - return emit_shift(compiler, 0x7 << 3, + return emit_shift_with_flags(compiler, 0x7 << 3, GET_FLAGS(op), dst, dstw, src1, src1w, src2, src2w); } return SLJIT_SUCCESS; } +SLJIT_API_FUNC_ATTRIBUTE int sljit_get_register_index(int reg) +{ + check_sljit_get_register_index(reg); +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + if (reg == SLJIT_TEMPORARY_EREG1 || reg == SLJIT_TEMPORARY_EREG2 + || reg == SLJIT_SAVED_EREG1 || reg == SLJIT_SAVED_EREG2) + return -1; +#endif + return reg_map[reg]; +} + +SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_op_custom(struct sljit_compiler *compiler, + void *instruction, int size) +{ + sljit_ub *buf; + + CHECK_ERROR(); + check_sljit_emit_op_custom(compiler, instruction, size); + SLJIT_ASSERT(size > 0 && size < 16); + + buf = (sljit_ub*)ensure_buf(compiler, 1 + size); + FAIL_IF(!buf); + INC_SIZE(size); + SLJIT_MEMMOVE(buf, instruction, size); + return SLJIT_SUCCESS; +} + /* --------------------------------------------------------------------- */ /* Floating point operators */ /* --------------------------------------------------------------------- */ @@ -2582,7 +2749,7 @@ SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_cond_value(struct sljit_compiler *compil *buf++ = 0x0f; *buf++ = 0xb6; - if (dst >= SLJIT_GENERAL_REG1 && dst <= SLJIT_NO_REGISTERS) + if (dst >= SLJIT_SAVED_REG1 && dst <= SLJIT_NO_REGISTERS) *buf = 0xC0 | (reg_map[dst] << 3); else { *buf = 0xC0;