2008-08-31 10:53 UTC+0200 Viktor Szakats (harbour.01 syenar hu)

* contrib/hbole/oleauto.prg
   * contrib/hbw32/w32_tole.prg
     * MessageBox() calls converted to Alert()s.
       A #translate added for the to be reconverted to MessageBox(), 
       since I'm not sure why should we use MessageBox()es here?
       IMO it'd be better to throw RTEs, or handle these cases more 
       "natively". Opinions?

   * contrib/hbole/oleauto.prg
     + Modified to generate RTE when the object could not 
       be created, so that it works more like OLE in hbw32.lib, 
       and the test doesn't crash in this case. I don't have 
       Excel, and this is what I could fix without it.

   * contrib/hbw32/w32_tole.prg
   * contrib/hbw32/w32_ole.c
     ! Typos, minor formatting.

   * common.mak
   * source/hbpcre/Makefile
   * source/hbpcre/cnv_hb2o.bat
   * source/hbpcre/cnv_o2hb.bat
   - source/hbpcre/ucpinter.h
   - source/hbpcre/ucptable.h
   - source/hbpcre/pcrefind.c
   + source/hbpcre/pcreucd.c
   * source/hbpcre/pcrevutf.c
   * source/hbpcre/pcre.h
   * source/hbpcre/pcreconf.c
   * source/hbpcre/pcreinfo.c
   * source/hbpcre/pcremktb.c
   * source/hbpcre/config.h
   * source/hbpcre/pcrenewl.c
   * source/hbpcre/pcreoutf.c
   * source/hbpcre/chartabs.c
   * source/hbpcre/pcrerefc.c
   * source/hbpcre/pcreget.c
   * source/hbpcre/pcrefinf.c
   * source/hbpcre/pcretryf.c
   * source/hbpcre/pcreexec.c
   * source/hbpcre/pcreinal.h
   * source/hbpcre/pcretabs.c
   * source/hbpcre/pcredfa.c
   * source/hbpcre/pcrever.c
   * source/hbpcre/pcrecomp.c
   * source/hbpcre/pcrexcls.c
   * source/hbpcre/pcrestud.c
   * source/hbpcre/ucp.h
     + Updated to PCRE 7.8 RC1
       (will be updated to final pretty soon)
       This version contains the small patches we 
       so far had locally in Harbour. Thanks to 
       Philip Hazel for applying them.
This commit is contained in:
Viktor Szakats
2008-08-31 09:07:57 +00:00
parent ffdf363493
commit edc7abc792
34 changed files with 3152 additions and 3673 deletions

View File

@@ -8,6 +8,61 @@
2008-12-31 13:59 UTC+0100 Foo Bar (foo.bar foobar.org)
*/
2008-08-31 10:53 UTC+0200 Viktor Szakats (harbour.01 syenar hu)
* contrib/hbole/oleauto.prg
* contrib/hbw32/w32_tole.prg
* MessageBox() calls converted to Alert()s.
A #translate added for the to be reconverted to MessageBox(),
since I'm not sure why should we use MessageBox()es here?
IMO it'd be better to throw RTEs, or handle these cases more
"natively". Opinions?
* contrib/hbole/oleauto.prg
+ Modified to generate RTE when the object could not
be created, so that it works more like OLE in hbw32.lib,
and the test doesn't crash in this case. I don't have
Excel, and this is what I could fix without it.
* contrib/hbw32/w32_tole.prg
* contrib/hbw32/w32_ole.c
! Typos, minor formatting.
* common.mak
* source/hbpcre/Makefile
* source/hbpcre/cnv_hb2o.bat
* source/hbpcre/cnv_o2hb.bat
- source/hbpcre/ucpinter.h
- source/hbpcre/ucptable.h
- source/hbpcre/pcrefind.c
+ source/hbpcre/pcreucd.c
* source/hbpcre/pcrevutf.c
* source/hbpcre/pcre.h
* source/hbpcre/pcreconf.c
* source/hbpcre/pcreinfo.c
* source/hbpcre/pcremktb.c
* source/hbpcre/config.h
* source/hbpcre/pcrenewl.c
* source/hbpcre/pcreoutf.c
* source/hbpcre/chartabs.c
* source/hbpcre/pcrerefc.c
* source/hbpcre/pcreget.c
* source/hbpcre/pcrefinf.c
* source/hbpcre/pcretryf.c
* source/hbpcre/pcreexec.c
* source/hbpcre/pcreinal.h
* source/hbpcre/pcretabs.c
* source/hbpcre/pcredfa.c
* source/hbpcre/pcrever.c
* source/hbpcre/pcrecomp.c
* source/hbpcre/pcrexcls.c
* source/hbpcre/pcrestud.c
* source/hbpcre/ucp.h
+ Updated to PCRE 7.8 RC1
(will be updated to final pretty soon)
This version contains the small patches we
so far had locally in Harbour. Thanks to
Philip Hazel for applying them.
2008-08-30 11:58 UTC+0200 Viktor Szakats (harbour.01 syenar hu)
* include/hbsetup.ch
+ Added HB_LEGACY_OFF macro to turn off HB_LEGACY_LEVEL.

View File

@@ -760,7 +760,6 @@ PCRE_LIB_OBJS = \
$(OBJ_DIR)\pcreconf$(OBJEXT) \
$(OBJ_DIR)\pcredfa$(OBJEXT) \
$(OBJ_DIR)\pcreexec$(OBJEXT) \
$(OBJ_DIR)\pcrefind$(OBJEXT) \
$(OBJ_DIR)\pcrefinf$(OBJEXT) \
$(OBJ_DIR)\pcreget$(OBJEXT) \
$(OBJ_DIR)\pcreinfo$(OBJEXT) \
@@ -771,6 +770,7 @@ PCRE_LIB_OBJS = \
$(OBJ_DIR)\pcrestud$(OBJEXT) \
$(OBJ_DIR)\pcretabs$(OBJEXT) \
$(OBJ_DIR)\pcretryf$(OBJEXT) \
$(OBJ_DIR)\pcreucd$(OBJEXT) \
$(OBJ_DIR)\pcrever$(OBJEXT) \
$(OBJ_DIR)\pcrevutf$(OBJEXT) \
$(OBJ_DIR)\pcrexcls$(OBJEXT) \

View File

@@ -53,7 +53,11 @@
#include "hbclass.ch"
#include "common.ch"
#include "error.ch"
#translate Alert( <x> ) => MessageBox( 0, <x>, "OLE Error", 0 )
#define EG_OLEEXCEPTION 1001
CLASS TOleAuto
@@ -73,7 +77,16 @@ ENDCLASS
//--------------------------------------------------------------------
STATIC PROCEDURE THROW( oError )
LOCAL lError := Eval( ErrorBlock(), oError )
IF !HB_ISLOGICAL( lError ) .OR. lError
__ErrInHandler()
ENDIF
Break( oError )
RETURN
METHOD New( uObj ) CLASS TOleAuto
LOCAL oErr
IF ISCHARACTER( uObj )
::hObj := CreateOleObject( uObj )
@@ -81,6 +94,22 @@ METHOD New( uObj ) CLASS TOleAuto
::hObj := uObj
ENDIF
IF Empty( ::hObj )
oErr := ErrorNew()
oErr:Args := hb_AParams()
oErr:CanDefault := .F.
oErr:CanRetry := .F.
oErr:CanSubstitute := .T.
oErr:Description := Ole2TxtError()
oErr:GenCode := EG_OLEEXCEPTION
oErr:Operation := ProcName()
oErr:Severity := ES_ERROR
oErr:SubCode := -1
oErr:SubSystem := "TOleAuto"
RETURN Throw( oErr )
ENDIF
RETURN Self
METHOD GetActiveObject( cClass ) CLASS TOleAuto
@@ -89,8 +118,8 @@ METHOD GetActiveObject( cClass ) CLASS TOleAuto
::hObj := GetOleObject( cClass )
// ::cClassName := cClass
ELSE
MessageBox( 0, "Invalid parameter type to constructor TOleAuto():GetActiveObject()!", "OLE Interface", 0 )
::hObj := 0
Alert( "OLE interface: Invalid parameter type to constructor TOleAuto():GetActiveObject()" )
::hObj := NIL
ENDIF
RETURN Self
@@ -131,7 +160,7 @@ METHOD Invoke( cMethod, uParam1, uParam2, uParam3, uParam4, uParam5, uParam6 ) C
OLEShowException()
RETURN Self
ELSEIF OleError() != 0
MessageBox( 0, cMethod + ": " + Ole2TxtError(), "OLE Error", 0 )
Alert( "OLE error: " + cMethod + ": " + Ole2TxtError() )
ENDIF
RETURN uObj
@@ -157,7 +186,7 @@ METHOD Set( cProperty, uParam1, uParam2, uParam3, uParam4, uParam5, uParam6 ) CL
IF Ole2TxtError() == "DISP_E_EXCEPTION"
OLEShowException()
ELSEIF OleError() != 0
MessageBox( 0, cProperty + ": " + Ole2TxtError(), "OLE Error", 0 )
Alert( "OLE error: " + cProperty + ": " + Ole2TxtError() )
ENDIF
RETURN nil
@@ -192,7 +221,7 @@ METHOD Get( cProperty, uParam1, uParam2, uParam3, uParam4, uParam5, uParam6 ) CL
IF OleIsObject()
RETURN TOleAuto():New( uObj )
ELSEIF OleError() != 0
MessageBox( 0, cProperty + ": " + Ole2TxtError(), "OLE Error", 0 )
Alert( "OLE error: " + cProperty + ": " + Ole2TxtError() )
ENDIF
ENDIF

View File

@@ -121,7 +121,7 @@ static BOOL s_bInit = FALSE;
/* ----------------------------------------------------------------------- */
#define EG_OLEEXECPTION 1001
#define EG_OLEEXCEPTION 1001
#define HB_STRING_ALLOC( p, l ) hb_itemReSizeString( p, l )
static void hb_itemPushForward( PHB_ITEM pItem )
@@ -1623,7 +1623,7 @@ static void OleThrowError( void )
/*HB_TRACE(HB_TR_INFO, ("Desc: '%s'\n", sDescription));*/
pReturn = hb_errRT_SubstParams( hb_parcx( -1 ), EG_OLEEXECPTION, (ULONG) s_nOleError, sDescription, hb_itemGetSymbol( hb_stackBaseItem() )->szName );
pReturn = hb_errRT_SubstParams( hb_parcx( -1 ), EG_OLEEXCEPTION, (ULONG) s_nOleError, sDescription, hb_itemGetSymbol( hb_stackBaseItem() )->szName );
if( fFree )
{

View File

@@ -63,9 +63,11 @@
#include "hbclass.ch"
#include "error.ch"
#translate Alert( <x> ) => MessageBox( 0, <x>, "OLE Error", 0 )
#ifndef __XHARBOUR__
#define EG_OLEEXECPTION 1001
#define EG_OLEEXCEPTION 1001
#xcommand TRY => BEGIN SEQUENCE WITH s_bBreak
#xcommand CATCH [<!oErr!>] => RECOVER [USING <oErr>] <-oErr->
@@ -76,7 +78,7 @@ static s_bBreak := { |oErr| break( oErr ) }
STATIC PROCEDURE THROW( oError )
LOCAL lError := Eval( ErrorBlock(), oError )
IF !HB_ISLOGICAL( lError ) .OR. lError
__ErrInHandler()
__ErrInHandler()
ENDIF
Break( oError )
RETURN
@@ -211,7 +213,7 @@ METHOD New( uObj, cClass ) CLASS TOleAuto
LOCAL oErr
// Hack incase OLE Server already created and New() is attempted as an OLE Method.
// Hack in case OLE Server already created and New() is attempted as an OLE Method.
IF ::hObj != NIL
RETURN HB_ExecFromArray( Self, "_New", HB_aParams() )
ENDIF
@@ -227,7 +229,7 @@ METHOD New( uObj, cClass ) CLASS TOleAuto
oErr:CanRetry := .F.
oErr:CanSubstitute := .T.
oErr:Description := OLEExceptionDescription()
oErr:GenCode := EG_OLEEXECPTION
oErr:GenCode := EG_OLEEXCEPTION
oErr:Operation := ProcName()
oErr:Severity := ES_ERROR
oErr:SubCode := -1
@@ -241,7 +243,7 @@ METHOD New( uObj, cClass ) CLASS TOleAuto
oErr:CanRetry := .F.
oErr:CanSubstitute := .T.
oErr:Description := Ole2TxtError()
oErr:GenCode := EG_OLEEXECPTION
oErr:GenCode := EG_OLEEXCEPTION
oErr:Operation := ProcName()
oErr:Severity := ES_ERROR
oErr:SubCode := -1
@@ -267,7 +269,7 @@ METHOD New( uObj, cClass ) CLASS TOleAuto
oErr:CanDefault := .F.
oErr:CanRetry := .F.
oErr:CanSubstitute := .T.
oErr:Description := "Invalid argument to contrustor!"
oErr:Description := "Invalid argument to contructor!"
oErr:GenCode := 0
oErr:Operation := ProcName()
oErr:Severity := ES_ERROR
@@ -309,7 +311,7 @@ METHOD GetActiveObject( cClass ) CLASS TOleAuto
oErr:CanRetry := .F.
oErr:CanSubstitute := .T.
oErr:Description := OLEExceptionDescription()
oErr:GenCode := EG_OLEEXECPTION
oErr:GenCode := EG_OLEEXCEPTION
oErr:Operation := ProcName()
oErr:Severity := ES_ERROR
oErr:SubCode := -1
@@ -323,7 +325,7 @@ METHOD GetActiveObject( cClass ) CLASS TOleAuto
oErr:CanRetry := .F.
oErr:CanSubstitute := .T.
oErr:Description := Ole2TxtError()
oErr:GenCode := EG_OLEEXECPTION
oErr:GenCode := EG_OLEEXCEPTION
oErr:Operation := ProcName()
oErr:Severity := ES_ERROR
oErr:SubCode := -1
@@ -335,7 +337,7 @@ METHOD GetActiveObject( cClass ) CLASS TOleAuto
::cClassName := cClass
ELSE
MessageBox( 0, "Invalid parameter type to constructor TOleAuto():GetActiveObject()!", "OLE Interface", 0 )
Alert( "OLE interface: Invalid parameter type to constructor TOleAuto():GetActiveObject()" )
::hObj := 0
ENDIF

View File

@@ -11,7 +11,6 @@ C_SOURCES=\
pcreconf.c \
pcredfa.c \
pcreexec.c \
pcrefind.c \
pcrefinf.c \
pcreget.c \
pcreinfo.c \
@@ -22,6 +21,7 @@ C_SOURCES=\
pcrestud.c \
pcretabs.c \
pcretryf.c \
pcreucd.c \
pcrever.c \
pcrevutf.c \
pcrexcls.c \

View File

@@ -20,7 +20,7 @@ and dead code stripping is activated. This leads to link errors. Pulling in the
header ensures that the array gets flagged as "someone outside this compilation
unit might reference this" and so it will always be supplied to the linker. */
#if 1
#if 2875
#include "_hbconf.h"
#endif

View File

@@ -26,8 +26,6 @@ copy config.h ori_dst\config.h.generic
copy pcre.h ori_dst\pcre.h.generic
copy pcreinal.h ori_dst\pcre_internal.h
copy ucp.h ori_dst\ucp.h
copy ucpinter.h ori_dst\ucpinternal.h
copy ucptable.h ori_dst\ucptable.h
copy chartabs.c ori_dst\pcre_chartables.c.dist
copy pcrecomp.c ori_dst\pcre_compile.c
copy pcreconf.c ori_dst\pcre_config.c
@@ -44,7 +42,7 @@ copy pcrerefc.c ori_dst\pcre_refcount.c
copy pcrestud.c ori_dst\pcre_study.c
copy pcretabs.c ori_dst\pcre_tables.c
copy pcretryf.c ori_dst\pcre_try_flipped.c
copy pcrefind.c ori_dst\pcre_ucp_searchfuncs.c
copy pcreucd.c ori_dst\pcre_ucd.c
copy pcrevutf.c ori_dst\pcre_valid_utf8.c
copy pcrever.c ori_dst\pcre_version.c
copy pcrexcls.c ori_dst\pcre_xclass.c

View File

@@ -33,8 +33,6 @@ copy ori_src\config.h.generic config.h
copy ori_src\pcre.h.generic pcre.h
copy ori_src\pcre_internal.h pcreinal.h
copy ori_src\ucp.h ucp.h
copy ori_src\ucpinternal.h ucpinter.h
copy ori_src\ucptable.h ucptable.h
copy ori_src\pcre_chartables.c.dist chartabs.c
copy ori_src\pcre_compile.c pcrecomp.c
copy ori_src\pcre_config.c pcreconf.c
@@ -51,7 +49,7 @@ copy ori_src\pcre_refcount.c pcrerefc.c
copy ori_src\pcre_study.c pcrestud.c
copy ori_src\pcre_tables.c pcretabs.c
copy ori_src\pcre_try_flipped.c pcretryf.c
copy ori_src\pcre_ucp_searchfuncs.c pcrefind.c
copy ori_src\pcre_ucd.c pcreucd.c
copy ori_src\pcre_valid_utf8.c pcrevutf.c
copy ori_src\pcre_version.c pcrever.c
copy ori_src\pcre_xclass.c pcrexcls.c

View File

@@ -238,13 +238,13 @@ them both to 0; an emulation function will be used. */
#define PACKAGE_NAME "PCRE"
/* Define to the full name and version of this package. */
#define PACKAGE_STRING "PCRE 7.7"
#define PACKAGE_STRING "PCRE 7.8-RC1"
/* Define to the one symbol short name of this package. */
#define PACKAGE_TARNAME "pcre"
/* Define to the version of this package. */
#define PACKAGE_VERSION "7.7"
#define PACKAGE_VERSION "7.8-RC1"
/* If you are compiling for a system other than a Unix-like system or
@@ -297,7 +297,7 @@ them both to 0; an emulation function will be used. */
/* Version number of package */
#ifndef VERSION
#define VERSION "7.7"
#define VERSION "7.8-RC1"
#endif
/* Define to empty if `const' does not conform to ANSI C. */

View File

@@ -42,9 +42,9 @@ POSSIBILITY OF SUCH DAMAGE.
/* The current PCRE version information. */
#define PCRE_MAJOR 7
#define PCRE_MINOR 7
#define PCRE_PRERELEASE
#define PCRE_DATE 2008-05-07
#define PCRE_MINOR 8
#define PCRE_PRERELEASE -RC1
#define PCRE_DATE 2008-08-25
/* When an application links to a PCRE DLL in Windows, the symbols that are
imported have to be identified as such. When building PCRE, the appropriate

View File

@@ -42,7 +42,7 @@ POSSIBILITY OF SUCH DAMAGE.
supporting internal functions that are not used by other modules. */
#if 1
#if 2875
#include "_hbconf.h"
#endif
@@ -1451,8 +1451,7 @@ for (;;)
break;
}
#else
/* pacify warnings */
(void)(utf8);
(void)(utf8); /* Keep compiler happy by referencing function argument */
#endif
}
}
@@ -1547,8 +1546,7 @@ for (;;)
break;
}
#else
/* pacify warnings */
(void)(utf8);
(void)(utf8); /* Keep compiler happy by referencing function argument */
#endif
}
}
@@ -2021,7 +2019,7 @@ get_othercase_range(unsigned int *cptr, unsigned int d, unsigned int *ocptr,
unsigned int c, othercase, next;
for (c = *cptr; c <= d; c++)
{ if ((othercase = _pcre_ucp_othercase(c)) != NOTACHAR) break; }
{ if ((othercase = UCD_OTHERCASE(c)) != c) break; }
if (c > d) return FALSE;
@@ -2030,7 +2028,7 @@ next = othercase + 1;
for (++c; c <= d; c++)
{
if (_pcre_ucp_othercase(c) != next) break;
if (UCD_OTHERCASE(c) != next) break;
next++;
}
@@ -2141,8 +2139,7 @@ if (next >= 0) switch(op_code)
#ifdef SUPPORT_UTF8
if (utf8 && item > 127) { GETCHAR(item, utf8_char); }
#else
/* pacify warnings */
(void)(utf8_char);
(void)(utf8_char); /* Keep compiler happy by referencing function argument */
#endif
return item != next;
@@ -2161,7 +2158,7 @@ if (next >= 0) switch(op_code)
unsigned int othercase;
if (next < 128) othercase = cd->fcc[next]; else
#ifdef SUPPORT_UCP
othercase = _pcre_ucp_othercase((unsigned int)next);
othercase = UCD_OTHERCASE((unsigned int)next);
#else
othercase = NOTACHAR;
#endif
@@ -2182,7 +2179,7 @@ if (next >= 0) switch(op_code)
unsigned int othercase;
if (next < 128) othercase = cd->fcc[next]; else
#ifdef SUPPORT_UCP
othercase = _pcre_ucp_othercase(next);
othercase = UCD_OTHERCASE(next);
#else
othercase = NOTACHAR;
#endif
@@ -3348,7 +3345,7 @@ for (;; ptr++)
if ((options & PCRE_CASELESS) != 0)
{
unsigned int othercase;
if ((othercase = _pcre_ucp_othercase(c)) != NOTACHAR)
if ((othercase = UCD_OTHERCASE(c)) != c)
{
*class_utf8data++ = XCL_SINGLE;
class_utf8data += _pcre_ord2utf8(othercase, class_utf8data);
@@ -4929,10 +4926,8 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
both phases.
If we are not at the pattern start, compile code to change the ims
options if this setting actually changes any of them. We also pass the
new setting back so that it can be put at the start of any following
branches, and when this group ends (if we are in a group), a resetting
item can be compiled. */
options if this setting actually changes any of them, and reset the
greedy defaults and the case value for firstbyte and reqbyte. */
if (*ptr == ')')
{
@@ -4940,7 +4935,6 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
(lengthptr == NULL || *lengthptr == 2 + 2*LINK_SIZE))
{
cd->external_options = newoptions;
options = newoptions;
}
else
{
@@ -4949,17 +4943,17 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
*code++ = OP_OPT;
*code++ = newoptions & PCRE_IMS;
}
/* Change options at this level, and pass them back for use
in subsequent branches. Reset the greedy defaults and the case
value for firstbyte and reqbyte. */
*optionsptr = options = newoptions;
greedy_default = ((newoptions & PCRE_UNGREEDY) != 0);
greedy_non_default = greedy_default ^ 1;
req_caseopt = ((options & PCRE_CASELESS) != 0)? REQ_CASELESS : 0;
req_caseopt = ((newoptions & PCRE_CASELESS) != 0)? REQ_CASELESS : 0;
}
/* Change options at this level, and pass them back for use
in subsequent branches. When not at the start of the pattern, this
information is also necessary so that a resetting item can be
compiled at the end of a group (if we are in a group). */
*optionsptr = options = newoptions;
previous = NULL; /* This item can't be repeated */
continue; /* It is complete */
}
@@ -5953,7 +5947,7 @@ Returns: pointer to compiled data block, or NULL on error,
with errorptr and erroroffset set
*/
PCRE_EXP_DEFN pcre *
PCRE_EXP_DEFN pcre * PCRE_CALL_CONVENTION
pcre_compile(const char *pattern, int options, const char **errorptr,
int *erroroffset, const unsigned char *tables)
{
@@ -5961,7 +5955,7 @@ return pcre_compile2(pattern, options, NULL, errorptr, erroroffset, tables);
}
PCRE_EXP_DEFN pcre *
PCRE_EXP_DEFN pcre * PCRE_CALL_CONVENTION
pcre_compile2(const char *pattern, int options, int *errorcodeptr,
const char **errorptr, int *erroroffset, const unsigned char *tables)
{

View File

@@ -41,7 +41,7 @@ POSSIBILITY OF SUCH DAMAGE.
/* This module contains the external function pcre_config(). */
#if 1
#if 2875
#include "_hbconf.h"
#endif
@@ -62,7 +62,7 @@ Arguments:
Returns: 0 if data returned, negative on error
*/
PCRE_EXP_DEFN int
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
pcre_config(int what, void *where)
{
switch (what)

View File

@@ -44,7 +44,7 @@ FSM). This is NOT Perl- compatible, but it has advantages in certain
applications. */
#if 1
#if 2875
#include "_hbconf.h"
#endif
@@ -512,9 +512,6 @@ for (;;)
const uschar *code;
int state_offset = current_state->offset;
int count, codevalue;
#ifdef SUPPORT_UCP
int chartype, script;
#endif
#ifdef DEBUG
printf ("%.*sProcessing state %d c=", rlevel*2-2, SP, state_offset);
@@ -825,7 +822,7 @@ for (;;)
if (clen > 0)
{
BOOL OK;
int category = _pcre_ucp_findprop(c, &chartype, &script);
const ucd_record * prop = GET_UCD(c);
switch(code[1])
{
case PT_ANY:
@@ -833,19 +830,19 @@ for (;;)
break;
case PT_LAMP:
OK = chartype == ucp_Lu || chartype == ucp_Ll || chartype == ucp_Lt;
OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll || prop->chartype == ucp_Lt;
break;
case PT_GC:
OK = category == code[2];
OK = _pcre_ucp_gentype[prop->chartype] == code[2];
break;
case PT_PC:
OK = chartype == code[2];
OK = prop->chartype == code[2];
break;
case PT_SC:
OK = script == code[2];
OK = prop->script == code[2];
break;
/* Should never occur, but keep compilers from grumbling. */
@@ -994,7 +991,7 @@ for (;;)
if (clen > 0)
{
BOOL OK;
int category = _pcre_ucp_findprop(c, &chartype, &script);
const ucd_record * prop = GET_UCD(c);
switch(code[2])
{
case PT_ANY:
@@ -1002,19 +999,19 @@ for (;;)
break;
case PT_LAMP:
OK = chartype == ucp_Lu || chartype == ucp_Ll || chartype == ucp_Lt;
OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll || prop->chartype == ucp_Lt;
break;
case PT_GC:
OK = category == code[3];
OK = _pcre_ucp_gentype[prop->chartype] == code[3];
break;
case PT_PC:
OK = chartype == code[3];
OK = prop->chartype == code[3];
break;
case PT_SC:
OK = script == code[3];
OK = prop->script == code[3];
break;
/* Should never occur, but keep compilers from grumbling. */
@@ -1043,7 +1040,7 @@ for (;;)
case OP_EXTUNI_EXTRA + OP_TYPEPOSPLUS:
count = current_state->count; /* Already matched */
if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
if (clen > 0 && _pcre_ucp_findprop(c, &chartype, &script) != ucp_M)
if (clen > 0 && UCD_CATEGORY(c) != ucp_M)
{
const uschar *nptr = ptr + clen;
int ncount = 0;
@@ -1057,7 +1054,7 @@ for (;;)
int nd;
int ndlen = 1;
GETCHARLEN(nd, nptr, ndlen);
if (_pcre_ucp_findprop(nd, &chartype, &script) != ucp_M) break;
if (UCD_CATEGORY(nd) != ucp_M) break;
ncount++;
nptr += ndlen;
}
@@ -1216,7 +1213,7 @@ for (;;)
if (clen > 0)
{
BOOL OK;
int category = _pcre_ucp_findprop(c, &chartype, &script);
const ucd_record * prop = GET_UCD(c);
switch(code[2])
{
case PT_ANY:
@@ -1224,19 +1221,19 @@ for (;;)
break;
case PT_LAMP:
OK = chartype == ucp_Lu || chartype == ucp_Ll || chartype == ucp_Lt;
OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll || prop->chartype == ucp_Lt;
break;
case PT_GC:
OK = category == code[3];
OK = _pcre_ucp_gentype[prop->chartype] == code[3];
break;
case PT_PC:
OK = chartype == code[3];
OK = prop->chartype == code[3];
break;
case PT_SC:
OK = script == code[3];
OK = prop->script == code[3];
break;
/* Should never occur, but keep compilers from grumbling. */
@@ -1274,7 +1271,7 @@ for (;;)
QS2:
ADD_ACTIVE(state_offset + 2, 0);
if (clen > 0 && _pcre_ucp_findprop(c, &chartype, &script) != ucp_M)
if (clen > 0 && UCD_CATEGORY(c) != ucp_M)
{
const uschar *nptr = ptr + clen;
int ncount = 0;
@@ -1289,7 +1286,7 @@ for (;;)
int nd;
int ndlen = 1;
GETCHARLEN(nd, nptr, ndlen);
if (_pcre_ucp_findprop(nd, &chartype, &script) != ucp_M) break;
if (UCD_CATEGORY(nd) != ucp_M) break;
ncount++;
nptr += ndlen;
}
@@ -1463,7 +1460,7 @@ for (;;)
if (clen > 0)
{
BOOL OK;
int category = _pcre_ucp_findprop(c, &chartype, &script);
const ucd_record * prop = GET_UCD(c);
switch(code[4])
{
case PT_ANY:
@@ -1471,19 +1468,19 @@ for (;;)
break;
case PT_LAMP:
OK = chartype == ucp_Lu || chartype == ucp_Ll || chartype == ucp_Lt;
OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll || prop->chartype == ucp_Lt;
break;
case PT_GC:
OK = category == code[5];
OK = _pcre_ucp_gentype[prop->chartype] == code[5];
break;
case PT_PC:
OK = chartype == code[5];
OK = prop->chartype == code[5];
break;
case PT_SC:
OK = script == code[5];
OK = prop->script == code[5];
break;
/* Should never occur, but keep compilers from grumbling. */
@@ -1516,7 +1513,7 @@ for (;;)
if (codevalue != OP_EXTUNI_EXTRA + OP_TYPEEXACT)
{ ADD_ACTIVE(state_offset + 4, 0); }
count = current_state->count; /* Number already matched */
if (clen > 0 && _pcre_ucp_findprop(c, &chartype, &script) != ucp_M)
if (clen > 0 && UCD_CATEGORY(c) != ucp_M)
{
const uschar *nptr = ptr + clen;
int ncount = 0;
@@ -1530,7 +1527,7 @@ for (;;)
int nd;
int ndlen = 1;
GETCHARLEN(nd, nptr, ndlen);
if (_pcre_ucp_findprop(nd, &chartype, &script) != ucp_M) break;
if (UCD_CATEGORY(nd) != ucp_M) break;
ncount++;
nptr += ndlen;
}
@@ -1710,7 +1707,7 @@ for (;;)
other case of the character. */
#ifdef SUPPORT_UCP
othercase = _pcre_ucp_othercase(c);
othercase = UCD_OTHERCASE(c);
#else
othercase = NOTACHAR;
#endif
@@ -1735,7 +1732,7 @@ for (;;)
to wait for them to pass before continuing. */
case OP_EXTUNI:
if (clen > 0 && _pcre_ucp_findprop(c, &chartype, &script) != ucp_M)
if (clen > 0 && UCD_CATEGORY(c) != ucp_M)
{
const uschar *nptr = ptr + clen;
int ncount = 0;
@@ -1743,7 +1740,7 @@ for (;;)
{
int nclen = 1;
GETCHARLEN(c, nptr, nclen);
if (_pcre_ucp_findprop(c, &chartype, &script) != ucp_M) break;
if (UCD_CATEGORY(c) != ucp_M) break;
ncount++;
nptr += nclen;
}
@@ -1911,7 +1908,7 @@ for (;;)
if (utf8 && d >= 128)
{
#ifdef SUPPORT_UCP
otherd = _pcre_ucp_othercase(d);
otherd = UCD_OTHERCASE(d);
#endif /* SUPPORT_UCP */
}
else
@@ -1949,7 +1946,7 @@ for (;;)
if (utf8 && d >= 128)
{
#ifdef SUPPORT_UCP
otherd = _pcre_ucp_othercase(d);
otherd = UCD_OTHERCASE(d);
#endif /* SUPPORT_UCP */
}
else
@@ -1985,7 +1982,7 @@ for (;;)
if (utf8 && d >= 128)
{
#ifdef SUPPORT_UCP
otherd = _pcre_ucp_othercase(d);
otherd = UCD_OTHERCASE(d);
#endif /* SUPPORT_UCP */
}
else
@@ -2017,7 +2014,7 @@ for (;;)
if (utf8 && d >= 128)
{
#ifdef SUPPORT_UCP
otherd = _pcre_ucp_othercase(d);
otherd = UCD_OTHERCASE(d);
#endif /* SUPPORT_UCP */
}
else
@@ -2052,7 +2049,7 @@ for (;;)
if (utf8 && d >= 128)
{
#ifdef SUPPORT_UCP
otherd = _pcre_ucp_othercase(d);
otherd = UCD_OTHERCASE(d);
#endif /* SUPPORT_UCP */
}
else
@@ -2508,7 +2505,7 @@ Returns: > 0 => number of match offset pairs placed in offsets
< -1 => some kind of unexpected problem
*/
PCRE_EXP_DEFN int
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
pcre_dfa_exec(const pcre *argument_re, const pcre_extra *extra_data,
const char *subject, int length, int start_offset, int options, int *offsets,
int offsetcount, int *workspace, int wscount)
@@ -2736,7 +2733,18 @@ for (;;)
if (firstline)
{
const uschar *t = current_subject;
USPTR t = current_subject;
#ifdef SUPPORT_UTF8
if (utf8)
{
while (t < md->end_subject && !IS_NEWLINE(t))
{
t++;
while (t < end_subject && (*t & 0xc0) == 0x80) t++;
}
}
else
#endif
while (t < md->end_subject && !IS_NEWLINE(t)) t++;
end_subject = t;
}
@@ -2758,7 +2766,20 @@ for (;;)
{
if (current_subject > md->start_subject + start_offset)
{
while (current_subject <= end_subject && !WAS_NEWLINE(current_subject))
#ifdef SUPPORT_UTF8
if (utf8)
{
while (current_subject < end_subject && !WAS_NEWLINE(current_subject))
{
current_subject++;
while(current_subject < end_subject &&
(*current_subject & 0xc0) == 0x80)
current_subject++;
}
}
else
#endif
while (current_subject < end_subject && !WAS_NEWLINE(current_subject))
current_subject++;
/* If we have just passed a CR and the newline option is ANY or

View File

@@ -42,7 +42,7 @@ POSSIBILITY OF SUCH DAMAGE.
pattern matching using an NFA algorithm, trying to mimic Perl as closely as
possible. There are also some static supporting functions. */
#if 1
#if 2875
#include "_hbconf.h"
#endif
@@ -158,13 +158,39 @@ printf("\n");
if (length > md->end_subject - eptr) return FALSE;
/* Separate the caselesss case for speed */
/* Separate the caseless case for speed. In UTF-8 mode we can only do this
properly if Unicode properties are supported. Otherwise, we can check only
ASCII characters. */
if ((ims & PCRE_CASELESS) != 0)
{
#ifdef SUPPORT_UTF8
#ifdef SUPPORT_UCP
if (md->utf8)
{
USPTR endptr = eptr + length;
while (eptr < endptr)
{
int c, d;
GETCHARINC(c, eptr);
GETCHARINC(d, p);
if (c != d && c != UCD_OTHERCASE(d)) return FALSE;
}
}
else
#endif
#endif
/* The same code works when not in UTF-8 mode and in UTF-8 mode when there
is no UCP support. */
while (length-- > 0)
if (md->lcc[*p++] != md->lcc[*eptr++]) return FALSE;
{ if (md->lcc[*p++] != md->lcc[*eptr++]) return FALSE; }
}
/* In the caseful case, we can just compare the bytes, whether or not we
are in UTF-8 mode. */
else
{ while (length-- > 0) if (*p++ != *eptr++) return FALSE; }
@@ -1653,8 +1679,7 @@ for (;;)
if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
GETCHARINCTEST(c, eptr);
{
int chartype, script;
int category = _pcre_ucp_findprop(c, &chartype, &script);
const ucd_record * prop = GET_UCD(c);
switch(ecode[1])
{
@@ -1663,24 +1688,24 @@ for (;;)
break;
case PT_LAMP:
if ((chartype == ucp_Lu ||
chartype == ucp_Ll ||
chartype == ucp_Lt) == (op == OP_NOTPROP))
if ((prop->chartype == ucp_Lu ||
prop->chartype == ucp_Ll ||
prop->chartype == ucp_Lt) == (op == OP_NOTPROP))
RRETURN(MATCH_NOMATCH);
break;
case PT_GC:
if ((ecode[2] != category) == (op == OP_PROP))
if ((ecode[2] != _pcre_ucp_gentype[prop->chartype]) == (op == OP_PROP))
RRETURN(MATCH_NOMATCH);
break;
case PT_PC:
if ((ecode[2] != chartype) == (op == OP_PROP))
if ((ecode[2] != prop->chartype) == (op == OP_PROP))
RRETURN(MATCH_NOMATCH);
break;
case PT_SC:
if ((ecode[2] != script) == (op == OP_PROP))
if ((ecode[2] != prop->script) == (op == OP_PROP))
RRETURN(MATCH_NOMATCH);
break;
@@ -1699,8 +1724,7 @@ for (;;)
if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
GETCHARINCTEST(c, eptr);
{
int chartype, script;
int category = _pcre_ucp_findprop(c, &chartype, &script);
int category = UCD_CATEGORY(c);
if (category == ucp_M) RRETURN(MATCH_NOMATCH);
while (eptr < md->end_subject)
{
@@ -1709,7 +1733,7 @@ for (;;)
{
GETCHARLEN(c, eptr, len);
}
category = _pcre_ucp_findprop(c, &chartype, &script);
category = UCD_CATEGORY(c);
if (category != ucp_M) break;
eptr += len;
}
@@ -2174,7 +2198,7 @@ for (;;)
if (fc != dc)
{
#ifdef SUPPORT_UCP
if (dc != _pcre_ucp_othercase(fc))
if (dc != UCD_OTHERCASE(fc))
#endif
RRETURN(MATCH_NOMATCH);
}
@@ -2265,7 +2289,7 @@ for (;;)
#ifdef SUPPORT_UCP
unsigned int othercase;
if ((ims & PCRE_CASELESS) != 0 &&
(othercase = _pcre_ucp_othercase(fc)) != NOTACHAR)
(othercase = UCD_OTHERCASE(fc)) != fc)
oclength = _pcre_ord2utf8(othercase, occhars);
else oclength = 0;
#endif /* SUPPORT_UCP */
@@ -2585,10 +2609,11 @@ for (;;)
{
RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM28);
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
GETCHARINC(d, eptr);
if (d < 256) d = md->lcc[d];
if (fi >= max || eptr >= md->end_subject || fc == d)
RRETURN(MATCH_NOMATCH);
if (fc == d) RRETURN(MATCH_NOMATCH);
}
}
else
@@ -2694,9 +2719,9 @@ for (;;)
{
RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM32);
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
GETCHARINC(d, eptr);
if (fi >= max || eptr >= md->end_subject || fc == d)
RRETURN(MATCH_NOMATCH);
if (fc == d) RRETURN(MATCH_NOMATCH);
}
}
else
@@ -2870,7 +2895,7 @@ for (;;)
{
if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
GETCHARINCTEST(c, eptr);
prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
prop_chartype = UCD_CHARTYPE(c);
if ((prop_chartype == ucp_Lu ||
prop_chartype == ucp_Ll ||
prop_chartype == ucp_Lt) == prop_fail_result)
@@ -2883,7 +2908,7 @@ for (;;)
{
if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
GETCHARINCTEST(c, eptr);
prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
prop_category = UCD_CATEGORY(c);
if ((prop_category == prop_value) == prop_fail_result)
RRETURN(MATCH_NOMATCH);
}
@@ -2894,7 +2919,7 @@ for (;;)
{
if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
GETCHARINCTEST(c, eptr);
prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
prop_chartype = UCD_CHARTYPE(c);
if ((prop_chartype == prop_value) == prop_fail_result)
RRETURN(MATCH_NOMATCH);
}
@@ -2905,7 +2930,7 @@ for (;;)
{
if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
GETCHARINCTEST(c, eptr);
prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
prop_script = UCD_SCRIPT(c);
if ((prop_script == prop_value) == prop_fail_result)
RRETURN(MATCH_NOMATCH);
}
@@ -2924,7 +2949,7 @@ for (;;)
for (i = 1; i <= min; i++)
{
GETCHARINCTEST(c, eptr);
prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
prop_category = UCD_CATEGORY(c);
if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);
while (eptr < md->end_subject)
{
@@ -2933,7 +2958,7 @@ for (;;)
{
GETCHARLEN(c, eptr, len);
}
prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
prop_category = UCD_CATEGORY(c);
if (prop_category != ucp_M) break;
eptr += len;
}
@@ -3349,7 +3374,7 @@ for (;;)
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
GETCHARINC(c, eptr);
prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
prop_chartype = UCD_CHARTYPE(c);
if ((prop_chartype == ucp_Lu ||
prop_chartype == ucp_Ll ||
prop_chartype == ucp_Lt) == prop_fail_result)
@@ -3364,7 +3389,7 @@ for (;;)
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
GETCHARINC(c, eptr);
prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
prop_category = UCD_CATEGORY(c);
if ((prop_category == prop_value) == prop_fail_result)
RRETURN(MATCH_NOMATCH);
}
@@ -3377,7 +3402,7 @@ for (;;)
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
GETCHARINC(c, eptr);
prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
prop_chartype = UCD_CHARTYPE(c);
if ((prop_chartype == prop_value) == prop_fail_result)
RRETURN(MATCH_NOMATCH);
}
@@ -3390,7 +3415,7 @@ for (;;)
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
GETCHARINC(c, eptr);
prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
prop_script = UCD_SCRIPT(c);
if ((prop_script == prop_value) == prop_fail_result)
RRETURN(MATCH_NOMATCH);
}
@@ -3412,7 +3437,7 @@ for (;;)
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
GETCHARINCTEST(c, eptr);
prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
prop_category = UCD_CATEGORY(c);
if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);
while (eptr < md->end_subject)
{
@@ -3421,7 +3446,7 @@ for (;;)
{
GETCHARLEN(c, eptr, len);
}
prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
prop_category = UCD_CATEGORY(c);
if (prop_category != ucp_M) break;
eptr += len;
}
@@ -3739,7 +3764,7 @@ for (;;)
int len = 1;
if (eptr >= md->end_subject) break;
GETCHARLEN(c, eptr, len);
prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
prop_chartype = UCD_CHARTYPE(c);
if ((prop_chartype == ucp_Lu ||
prop_chartype == ucp_Ll ||
prop_chartype == ucp_Lt) == prop_fail_result)
@@ -3754,7 +3779,7 @@ for (;;)
int len = 1;
if (eptr >= md->end_subject) break;
GETCHARLEN(c, eptr, len);
prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
prop_category = UCD_CATEGORY(c);
if ((prop_category == prop_value) == prop_fail_result)
break;
eptr+= len;
@@ -3767,7 +3792,7 @@ for (;;)
int len = 1;
if (eptr >= md->end_subject) break;
GETCHARLEN(c, eptr, len);
prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
prop_chartype = UCD_CHARTYPE(c);
if ((prop_chartype == prop_value) == prop_fail_result)
break;
eptr+= len;
@@ -3780,7 +3805,7 @@ for (;;)
int len = 1;
if (eptr >= md->end_subject) break;
GETCHARLEN(c, eptr, len);
prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
prop_script = UCD_SCRIPT(c);
if ((prop_script == prop_value) == prop_fail_result)
break;
eptr+= len;
@@ -3809,7 +3834,7 @@ for (;;)
{
if (eptr >= md->end_subject) break;
GETCHARINCTEST(c, eptr);
prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
prop_category = UCD_CATEGORY(c);
if (prop_category == ucp_M) break;
while (eptr < md->end_subject)
{
@@ -3818,7 +3843,7 @@ for (;;)
{
GETCHARLEN(c, eptr, len);
}
prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
prop_category = UCD_CATEGORY(c);
if (prop_category != ucp_M) break;
eptr += len;
}
@@ -3840,7 +3865,7 @@ for (;;)
BACKCHAR(eptr);
GETCHARLEN(c, eptr, len);
}
prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
prop_category = UCD_CATEGORY(c);
if (prop_category != ucp_M) break;
eptr--;
}
@@ -4360,7 +4385,7 @@ Returns: > 0 => success; value is the number of elements filled in
< -1 => some kind of unexpected problem
*/
PCRE_EXP_DEFN int
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
pcre_exec(const pcre *argument_re, const pcre_extra *extra_data,
PCRE_SPTR subject, int length, int start_offset, int options, int *offsets,
int offsetcount)
@@ -4672,31 +4697,53 @@ for(;;)
if (firstline)
{
USPTR t = start_match;
#ifdef SUPPORT_UTF8
if (utf8)
{
while (t < md->end_subject && !IS_NEWLINE(t))
{
t++;
while (t < end_subject && (*t & 0xc0) == 0x80) t++;
}
}
else
#endif
while (t < md->end_subject && !IS_NEWLINE(t)) t++;
end_subject = t;
}
/* Now test for a unique first byte */
/* Now advance to a unique first byte if there is one. */
if (first_byte >= 0)
{
if (first_byte_caseless)
while (start_match < end_subject &&
md->lcc[*start_match] != first_byte)
{ NEXTCHAR(start_match); }
while (start_match < end_subject && md->lcc[*start_match] != first_byte)
start_match++;
else
while (start_match < end_subject && *start_match != first_byte)
{ NEXTCHAR(start_match); }
start_match++;
}
/* Or to just after a linebreak for a multiline match if possible */
/* Or to just after a linebreak for a multiline match */
else if (startline)
{
if (start_match > md->start_subject + start_offset)
{
while (start_match <= end_subject && !WAS_NEWLINE(start_match))
{ NEXTCHAR(start_match); }
#ifdef SUPPORT_UTF8
if (utf8)
{
while (start_match < end_subject && !WAS_NEWLINE(start_match))
{
start_match++;
while(start_match < end_subject && (*start_match & 0xc0) == 0x80)
start_match++;
}
}
else
#endif
while (start_match < end_subject && !WAS_NEWLINE(start_match))
start_match++;
/* If we have just passed a CR and the newline option is ANY or ANYCRLF,
and we are now at a LF, advance the match position by one more character.
@@ -4710,16 +4757,15 @@ for(;;)
}
}
/* Or to a non-unique first char after study */
/* Or to a non-unique first byte after study */
else if (start_bits != NULL)
{
while (start_match < end_subject)
{
register unsigned int c = *start_match;
if ((start_bits[c/8] & (1 << (c&7))) == 0)
{ NEXTCHAR(start_match); }
else break;
if ((start_bits[c/8] & (1 << (c&7))) == 0) start_match++;
else break;
}
}

View File

@@ -1,179 +0,0 @@
/*************************************************
* Perl-Compatible Regular Expressions *
*************************************************/
/* PCRE is a library of functions to support regular expressions whose syntax
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Copyright (c) 1997-2008 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the University of Cambridge nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
-----------------------------------------------------------------------------
*/
/* This module contains code for searching the table of Unicode character
properties. */
#if 1
#include "_hbconf.h"
#endif
#include "pcreinal.h"
#include "ucp.h" /* Category definitions */
#include "ucpinter.h" /* Internal table details */
#include "ucptable.h" /* The table itself */
/* Table to translate from particular type value to the general value. */
static const int ucp_gentype[] = {
ucp_C, ucp_C, ucp_C, ucp_C, ucp_C, /* Cc, Cf, Cn, Co, Cs */
ucp_L, ucp_L, ucp_L, ucp_L, ucp_L, /* Ll, Lu, Lm, Lo, Lt */
ucp_M, ucp_M, ucp_M, /* Mc, Me, Mn */
ucp_N, ucp_N, ucp_N, /* Nd, Nl, No */
ucp_P, ucp_P, ucp_P, ucp_P, ucp_P, /* Pc, Pd, Pe, Pf, Pi */
ucp_P, ucp_P, /* Ps, Po */
ucp_S, ucp_S, ucp_S, ucp_S, /* Sc, Sk, Sm, So */
ucp_Z, ucp_Z, ucp_Z /* Zl, Zp, Zs */
};
/*************************************************
* Search table and return type *
*************************************************/
/* Three values are returned: the category is ucp_C, ucp_L, etc. The detailed
character type is ucp_Lu, ucp_Nd, etc. The script is ucp_Latin, etc.
Arguments:
c the character value
type_ptr the detailed character type is returned here
script_ptr the script is returned here
Returns: the character type category
*/
int
_pcre_ucp_findprop(const unsigned int c, int *type_ptr, int *script_ptr)
{
int bot = 0;
int top = sizeof(ucp_table)/sizeof(cnode);
int mid;
/* The table is searched using a binary chop. You might think that using
intermediate variables to hold some of the common expressions would speed
things up, but tests with gcc 3.4.4 on Linux showed that, on the contrary, it
makes things a lot slower. */
for (;;)
{
if (top <= bot)
{
*type_ptr = ucp_Cn;
*script_ptr = ucp_Common;
return ucp_C;
}
mid = (bot + top) >> 1;
if (c == (ucp_table[mid].f0 & f0_charmask)) break;
if (c < (ucp_table[mid].f0 & f0_charmask)) top = mid;
else
{
if ((ucp_table[mid].f0 & f0_rangeflag) != 0 &&
c <= (ucp_table[mid].f0 & f0_charmask) +
(ucp_table[mid].f1 & f1_rangemask)) break;
bot = mid + 1;
}
}
/* Found an entry in the table. Set the script and detailed type values, and
return the general type. */
*script_ptr = (ucp_table[mid].f0 & f0_scriptmask) >> f0_scriptshift;
*type_ptr = (ucp_table[mid].f1 & f1_typemask) >> f1_typeshift;
return ucp_gentype[*type_ptr];
}
/*************************************************
* Search table and return other case *
*************************************************/
/* If the given character is a letter, and there is another case for the
letter, return the other case. Otherwise, return -1.
Arguments:
c the character value
Returns: the other case or NOTACHAR if none
*/
unsigned int
_pcre_ucp_othercase(const unsigned int c)
{
int bot = 0;
int top = sizeof(ucp_table)/sizeof(cnode);
int mid, offset;
/* The table is searched using a binary chop. You might think that using
intermediate variables to hold some of the common expressions would speed
things up, but tests with gcc 3.4.4 on Linux showed that, on the contrary, it
makes things a lot slower. */
for (;;)
{
if (top <= bot) return (unsigned int) -1;
mid = (bot + top) >> 1;
if (c == (ucp_table[mid].f0 & f0_charmask)) break;
if (c < (ucp_table[mid].f0 & f0_charmask)) top = mid;
else
{
if ((ucp_table[mid].f0 & f0_rangeflag) != 0 &&
c <= (ucp_table[mid].f0 & f0_charmask) +
(ucp_table[mid].f1 & f1_rangemask)) break;
bot = mid + 1;
}
}
/* Found an entry in the table. Return NOTACHAR for a range entry. Otherwise
return the other case if there is one, else NOTACHAR. */
if ((ucp_table[mid].f0 & f0_rangeflag) != 0) return NOTACHAR;
offset = ucp_table[mid].f1 & f1_casemask;
if ((offset & f1_caseneg) != 0) offset |= f1_caseneg;
return (offset == 0)? NOTACHAR : c + offset;
}
/* End of pcre_ucp_searchfuncs.c */

View File

@@ -42,7 +42,7 @@ POSSIBILITY OF SUCH DAMAGE.
information about a compiled pattern. */
#if 1
#if 2875
#include "_hbconf.h"
#endif
@@ -65,7 +65,7 @@ Arguments:
Returns: 0 if data returned, negative on error
*/
PCRE_EXP_DEFN int
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
pcre_fullinfo(const pcre *argument_re, const pcre_extra *extra_data, int what,
void *where)
{

View File

@@ -43,7 +43,7 @@ from the subject string after a regex match has succeeded. The original idea
for these functions came from Scott Wimer. */
#if 1
#if 2875
#include "_hbconf.h"
#endif
@@ -65,7 +65,7 @@ Returns: the number of the named parentheses, or a negative number
(PCRE_ERROR_NOSUBSTRING) if not found
*/
int
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
pcre_get_stringnumber(const pcre *code, const char *stringname)
{
int rc;
@@ -114,7 +114,7 @@ Returns: the length of each entry, or a negative number
(PCRE_ERROR_NOSUBSTRING) if not found
*/
int
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
pcre_get_stringtable_entries(const pcre *code, const char *stringname,
char **firstptr, char **lastptr)
{
@@ -231,7 +231,7 @@ Returns: if successful:
PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
*/
int
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
pcre_copy_substring(const char *subject, int *ovector, int stringcount,
int stringnumber, char *buffer, int size)
{
@@ -276,7 +276,7 @@ Returns: if successful:
PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
*/
int
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
pcre_copy_named_substring(const pcre *code, const char *subject, int *ovector,
int stringcount, const char *stringname, char *buffer, int size)
{
@@ -308,7 +308,7 @@ Returns: if successful: 0
PCRE_ERROR_NOMEMORY (-6) failed to get store
*/
int
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
pcre_get_substring_list(const char *subject, int *ovector, int stringcount,
const char ***listptr)
{
@@ -353,7 +353,7 @@ Argument: the result of a previous pcre_get_substring_list()
Returns: nothing
*/
void
PCRE_EXP_DEFN void PCRE_CALL_CONVENTION
pcre_free_substring_list(const char **pointer)
{
(pcre_free)((void *)pointer);
@@ -386,7 +386,7 @@ Returns: if successful:
PCRE_ERROR_NOSUBSTRING (-7) substring not present
*/
int
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
pcre_get_substring(const char *subject, int *ovector, int stringcount,
int stringnumber, const char **stringptr)
{
@@ -433,7 +433,7 @@ Returns: if successful:
PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
*/
int
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
pcre_get_named_substring(const pcre *code, const char *subject, int *ovector,
int stringcount, const char *stringname, const char **stringptr)
{
@@ -456,7 +456,7 @@ Argument: the result of a previous pcre_get_substring()
Returns: nothing
*/
void
PCRE_EXP_DEFN void PCRE_CALL_CONVENTION
pcre_free_substring(const char *pointer)
{
(pcre_free)((void *)pointer);

View File

@@ -132,6 +132,20 @@ PCRE_EXP_DATA_DEFN only if they are not already set. */
# endif
#endif
/* When compiling with the MSVC compiler, it is sometimes necessary to include
a "calling convention" before exported function names. (This is secondhand
information; I know nothing about MSVC myself). For example, something like
void __cdecl function(....)
might be needed. In order so make this easy, all the exported functions have
PCRE_CALL_CONVENTION just before their names. It is rarely needed; if not
set, we ensure here that it has no effect. */
#ifndef PCRE_CALL_CONVENTION
#define PCRE_CALL_CONVENTION
#endif
/* We need to have types that specify unsigned 16-bit and 32-bit integers. We
cannot determine these outside the compilation (e.g. by running a program as
part of "configure") because PCRE is often cross-compiled for use on other
@@ -140,16 +154,20 @@ preprocessor time in standard C environments. */
#if USHRT_MAX == 65535
typedef unsigned short pcre_uint16;
typedef short pcre_int16;
#elif UINT_MAX == 65535
typedef unsigned int pcre_uint16;
typedef int pcre_int16;
#else
#error Cannot determine a type for 16-bit unsigned integers
#endif
#if UINT_MAX == 4294967295
typedef unsigned int pcre_uint32;
typedef int pcre_int32;
#elif ULONG_MAX == 4294967295
typedef unsigned long int pcre_uint32;
typedef long int pcre_int32;
#else
#error Cannot determine a type for 32-bit unsigned integers
#endif
@@ -363,7 +381,6 @@ never be called in byte mode. To make sure it can never even appear when UTF-8
support is omitted, we don't even define it. */
#ifndef SUPPORT_UTF8
#define NEXTCHAR(p) p++;
#define GETCHAR(c, eptr) c = *eptr;
#define GETCHARTEST(c, eptr) c = *eptr;
#define GETCHARINC(c, eptr) c = *eptr++;
@@ -373,13 +390,6 @@ support is omitted, we don't even define it. */
#else /* SUPPORT_UTF8 */
/* Advance a character pointer one byte in non-UTF-8 mode and by one character
in UTF-8 mode. */
#define NEXTCHAR(p) \
p++; \
if (utf8) { while((*p & 0xc0) == 0x80) p++; }
/* Get the next UTF-8 character, not advancing the pointer. This is called when
we know we are in UTF-8 mode. */
@@ -549,7 +559,8 @@ variable-length repeat, or a anything other than literal characters. */
#define REQ_CASELESS 0x0100 /* indicates caselessness */
#define REQ_VARY 0x0200 /* reqbyte followed non-literal item */
/* Miscellaneous definitions */
/* Miscellaneous definitions. The #ifndef is to pacify compiler warnings in
environments where these macros are defined elsewhere. */
#ifndef FALSE
typedef int BOOL;
@@ -1128,13 +1139,38 @@ extern BOOL _pcre_is_newline(const uschar *, int, const uschar *,
extern int _pcre_ord2utf8(int, uschar *);
extern real_pcre *_pcre_try_flipped(const real_pcre *, real_pcre *,
const pcre_study_data *, pcre_study_data *);
extern int _pcre_ucp_findprop(const unsigned int, int *, int *);
extern unsigned int _pcre_ucp_othercase(const unsigned int);
extern int _pcre_valid_utf8(const uschar *, int);
extern BOOL _pcre_was_newline(const uschar *, int, const uschar *,
int *, BOOL);
extern BOOL _pcre_xclass(int, const uschar *);
/* Unicode character database (UCD) */
typedef struct {
uschar script;
uschar chartype;
pcre_int32 other_case;
} ucd_record;
extern const ucd_record _pcre_ucd_records[];
extern const uschar _pcre_ucd_stage1[];
extern const pcre_uint16 _pcre_ucd_stage2[];
extern const int _pcre_ucp_gentype[];
/* UCD access macros */
#define UCD_BLOCK_SIZE 128
#define GET_UCD(ch) (_pcre_ucd_records + \
_pcre_ucd_stage2[_pcre_ucd_stage1[(ch) / UCD_BLOCK_SIZE] * \
UCD_BLOCK_SIZE + ch % UCD_BLOCK_SIZE])
#define UCD_CHARTYPE(ch) GET_UCD(ch)->chartype
#define UCD_SCRIPT(ch) GET_UCD(ch)->script
#define UCD_CATEGORY(ch) _pcre_ucp_gentype[UCD_CHARTYPE(ch)]
#define UCD_OTHERCASE(ch) (ch + GET_UCD(ch)->other_case)
#endif
/* End of pcre_internal.h */

View File

@@ -43,7 +43,7 @@ information about a compiled pattern. However, use of this function is now
deprecated, as it has been superseded by pcre_fullinfo(). */
#if 1
#if 2875
#include "_hbconf.h"
#endif
@@ -72,7 +72,7 @@ Returns: number of capturing subpatterns
or negative values on error
*/
PCRE_EXP_DEFN int
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
pcre_info(const pcre *argument_re, int *optptr, int *first_byte)
{
real_pcre internal_re;

View File

@@ -45,7 +45,7 @@ compilation of dftables.c, in which case the macro DFTABLES is defined. */
#ifndef DFTABLES
# if 1
# if 2875
# include "_hbconf.h"
# endif
# include "pcreinal.h"

View File

@@ -47,7 +47,7 @@ and NLTYPE_ANY. The full list of Unicode newline characters is taken from
http://unicode.org/unicode/reports/tr18/. */
#if 1
#if 2875
#include "_hbconf.h"
#endif

View File

@@ -41,7 +41,7 @@ POSSIBILITY OF SUCH DAMAGE.
/* This file contains a private PCRE function that converts an ordinal
character value into a UTF8 string. */
#if 1
#if 2875
#include "_hbconf.h"
#endif
@@ -78,11 +78,10 @@ for (j = i; j > 0; j--)
*buffer = _pcre_utf8_table2[i] | cvalue;
return i + 1;
#else
/* pacify warnings */
(void)(cvalue);
(void)(buffer);
return 0; /* Keep compiler happy; this function won't ever be */
#endif /* called when SUPPORT_UTF8 is not defined. */
(void)(cvalue); /* Keep compiler happy; this function won't ever be */
(void)(buffer); /* called when SUPPORT_UTF8 is not defined. */
return 0;
#endif
}
/* End of pcre_ord2utf8.c */

View File

@@ -44,7 +44,7 @@ pattern data block. This might be helpful in applications where the block is
shared by different users. */
#if 1
#if 2875
#include "_hbconf.h"
#endif
@@ -68,7 +68,7 @@ Returns: the (possibly updated) count value (a non-negative number), or
a negative error number
*/
PCRE_EXP_DEFN int
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
pcre_refcount(pcre *argument_re, int adjust)
{
real_pcre *re = (real_pcre *)argument_re;

View File

@@ -42,7 +42,7 @@ POSSIBILITY OF SUCH DAMAGE.
supporting functions. */
#if 1
#if 2875
#include "_hbconf.h"
#endif
@@ -220,6 +220,7 @@ do
/* SKIPZERO skips the bracket. */
case OP_SKIPZERO:
tcode++;
do tcode += GET(tcode,1); while (*tcode == OP_ALT);
tcode += 1 + LINK_SIZE;
break;
@@ -503,7 +504,7 @@ Returns: pointer to a pcre_extra block, with study_data filled in and the
NULL on error or if no optimization possible
*/
PCRE_EXP_DEFN pcre_extra *
PCRE_EXP_DEFN pcre_extra * PCRE_CALL_CONVENTION
pcre_study(const pcre *external_re, int options, const char **errorptr)
{
uschar start_bits[32];

View File

@@ -44,7 +44,7 @@ uses macros to change their names from _pcre_xxx to xxxx, thereby avoiding name
clashes with the library. */
#if 1
#if 2875
#include "_hbconf.h"
#endif
@@ -87,6 +87,19 @@ const uschar _pcre_utf8_table4[] = {
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
/* Table to translate from particular type value to the general value. */
const int _pcre_ucp_gentype[] = {
ucp_C, ucp_C, ucp_C, ucp_C, ucp_C, /* Cc, Cf, Cn, Co, Cs */
ucp_L, ucp_L, ucp_L, ucp_L, ucp_L, /* Ll, Lu, Lm, Lo, Lt */
ucp_M, ucp_M, ucp_M, /* Mc, Me, Mn */
ucp_N, ucp_N, ucp_N, /* Nd, Nl, No */
ucp_P, ucp_P, ucp_P, ucp_P, ucp_P, /* Pc, Pd, Pe, Pf, Pi */
ucp_P, ucp_P, /* Ps, Po */
ucp_S, ucp_S, ucp_S, ucp_S, /* Sc, Sk, Sm, So */
ucp_Z, ucp_Z, ucp_Z /* Zl, Zp, Zs */
};
/* The pcre_utt[] table below translates Unicode property names into type and
code values. It is searched by binary chop, so must be in collating sequence of
name. Originally, the table contained pointers to the name strings in the first
@@ -94,7 +107,10 @@ field of each entry. However, that leads to a large number of relocations when
a shared library is dynamically loaded. A significant reduction is made by
putting all the names into a single, large string and then using offsets in the
table itself. Maintenance is more error-prone, but frequent changes to this
data is unlikely. */
data are unlikely.
July 2008: There is now a script called maint/GenerateUtt.py which can be used
to generate this data instead of maintaining it entirely by hand. */
const char _pcre_utt_names[] =
"Any\0"
@@ -108,8 +124,10 @@ const char _pcre_utt_names[] =
"Buhid\0"
"C\0"
"Canadian_Aboriginal\0"
"Carian\0"
"Cc\0"
"Cf\0"
"Cham\0"
"Cherokee\0"
"Cn\0"
"Co\0"
@@ -136,12 +154,14 @@ const char _pcre_utt_names[] =
"Inherited\0"
"Kannada\0"
"Katakana\0"
"Kayah_Li\0"
"Kharoshthi\0"
"Khmer\0"
"L\0"
"L&\0"
"Lao\0"
"Latin\0"
"Lepcha\0"
"Limbu\0"
"Linear_B\0"
"Ll\0"
@@ -149,6 +169,8 @@ const char _pcre_utt_names[] =
"Lo\0"
"Lt\0"
"Lu\0"
"Lycian\0"
"Lydian\0"
"M\0"
"Malayalam\0"
"Mc\0"
@@ -163,6 +185,7 @@ const char _pcre_utt_names[] =
"Nl\0"
"No\0"
"Ogham\0"
"Ol_Chiki\0"
"Old_Italic\0"
"Old_Persian\0"
"Oriya\0"
@@ -177,14 +200,17 @@ const char _pcre_utt_names[] =
"Pi\0"
"Po\0"
"Ps\0"
"Rejang\0"
"Runic\0"
"S\0"
"Saurashtra\0"
"Sc\0"
"Shavian\0"
"Sinhala\0"
"Sk\0"
"Sm\0"
"So\0"
"Sundanese\0"
"Syloti_Nagri\0"
"Syriac\0"
"Tagalog\0"
@@ -197,6 +223,7 @@ const char _pcre_utt_names[] =
"Tibetan\0"
"Tifinagh\0"
"Ugaritic\0"
"Vai\0"
"Yi\0"
"Z\0"
"Zl\0"
@@ -204,111 +231,122 @@ const char _pcre_utt_names[] =
"Zs\0";
const ucp_type_table _pcre_utt[] = {
{ 0, PT_ANY, 0 },
{ 4, PT_SC, ucp_Arabic },
{ 11, PT_SC, ucp_Armenian },
{ 20, PT_SC, ucp_Balinese },
{ 29, PT_SC, ucp_Bengali },
{ 37, PT_SC, ucp_Bopomofo },
{ 46, PT_SC, ucp_Braille },
{ 54, PT_SC, ucp_Buginese },
{ 63, PT_SC, ucp_Buhid },
{ 69, PT_GC, ucp_C },
{ 71, PT_SC, ucp_Canadian_Aboriginal },
{ 91, PT_PC, ucp_Cc },
{ 94, PT_PC, ucp_Cf },
{ 97, PT_SC, ucp_Cherokee },
{ 106, PT_PC, ucp_Cn },
{ 109, PT_PC, ucp_Co },
{ 112, PT_SC, ucp_Common },
{ 119, PT_SC, ucp_Coptic },
{ 126, PT_PC, ucp_Cs },
{ 129, PT_SC, ucp_Cuneiform },
{ 139, PT_SC, ucp_Cypriot },
{ 147, PT_SC, ucp_Cyrillic },
{ 156, PT_SC, ucp_Deseret },
{ 164, PT_SC, ucp_Devanagari },
{ 175, PT_SC, ucp_Ethiopic },
{ 184, PT_SC, ucp_Georgian },
{ 193, PT_SC, ucp_Glagolitic },
{ 204, PT_SC, ucp_Gothic },
{ 211, PT_SC, ucp_Greek },
{ 217, PT_SC, ucp_Gujarati },
{ 226, PT_SC, ucp_Gurmukhi },
{ 235, PT_SC, ucp_Han },
{ 239, PT_SC, ucp_Hangul },
{ 246, PT_SC, ucp_Hanunoo },
{ 254, PT_SC, ucp_Hebrew },
{ 261, PT_SC, ucp_Hiragana },
{ 270, PT_SC, ucp_Inherited },
{ 280, PT_SC, ucp_Kannada },
{ 288, PT_SC, ucp_Katakana },
{ 297, PT_SC, ucp_Kharoshthi },
{ 308, PT_SC, ucp_Khmer },
{ 314, PT_GC, ucp_L },
{ 316, PT_LAMP, 0 },
{ 319, PT_SC, ucp_Lao },
{ 323, PT_SC, ucp_Latin },
{ 329, PT_SC, ucp_Limbu },
{ 335, PT_SC, ucp_Linear_B },
{ 344, PT_PC, ucp_Ll },
{ 347, PT_PC, ucp_Lm },
{ 350, PT_PC, ucp_Lo },
{ 353, PT_PC, ucp_Lt },
{ 356, PT_PC, ucp_Lu },
{ 359, PT_GC, ucp_M },
{ 361, PT_SC, ucp_Malayalam },
{ 371, PT_PC, ucp_Mc },
{ 374, PT_PC, ucp_Me },
{ 377, PT_PC, ucp_Mn },
{ 380, PT_SC, ucp_Mongolian },
{ 390, PT_SC, ucp_Myanmar },
{ 398, PT_GC, ucp_N },
{ 400, PT_PC, ucp_Nd },
{ 403, PT_SC, ucp_New_Tai_Lue },
{ 415, PT_SC, ucp_Nko },
{ 419, PT_PC, ucp_Nl },
{ 422, PT_PC, ucp_No },
{ 425, PT_SC, ucp_Ogham },
{ 431, PT_SC, ucp_Old_Italic },
{ 442, PT_SC, ucp_Old_Persian },
{ 454, PT_SC, ucp_Oriya },
{ 460, PT_SC, ucp_Osmanya },
{ 468, PT_GC, ucp_P },
{ 470, PT_PC, ucp_Pc },
{ 473, PT_PC, ucp_Pd },
{ 476, PT_PC, ucp_Pe },
{ 479, PT_PC, ucp_Pf },
{ 482, PT_SC, ucp_Phags_Pa },
{ 491, PT_SC, ucp_Phoenician },
{ 502, PT_PC, ucp_Pi },
{ 505, PT_PC, ucp_Po },
{ 508, PT_PC, ucp_Ps },
{ 511, PT_SC, ucp_Runic },
{ 517, PT_GC, ucp_S },
{ 519, PT_PC, ucp_Sc },
{ 522, PT_SC, ucp_Shavian },
{ 530, PT_SC, ucp_Sinhala },
{ 538, PT_PC, ucp_Sk },
{ 541, PT_PC, ucp_Sm },
{ 544, PT_PC, ucp_So },
{ 547, PT_SC, ucp_Syloti_Nagri },
{ 560, PT_SC, ucp_Syriac },
{ 567, PT_SC, ucp_Tagalog },
{ 575, PT_SC, ucp_Tagbanwa },
{ 584, PT_SC, ucp_Tai_Le },
{ 591, PT_SC, ucp_Tamil },
{ 597, PT_SC, ucp_Telugu },
{ 604, PT_SC, ucp_Thaana },
{ 611, PT_SC, ucp_Thai },
{ 616, PT_SC, ucp_Tibetan },
{ 624, PT_SC, ucp_Tifinagh },
{ 633, PT_SC, ucp_Ugaritic },
{ 642, PT_SC, ucp_Yi },
{ 645, PT_GC, ucp_Z },
{ 647, PT_PC, ucp_Zl },
{ 650, PT_PC, ucp_Zp },
{ 653, PT_PC, ucp_Zs }
{ 0, PT_ANY, 0 },
{ 4, PT_SC, ucp_Arabic },
{ 11, PT_SC, ucp_Armenian },
{ 20, PT_SC, ucp_Balinese },
{ 29, PT_SC, ucp_Bengali },
{ 37, PT_SC, ucp_Bopomofo },
{ 46, PT_SC, ucp_Braille },
{ 54, PT_SC, ucp_Buginese },
{ 63, PT_SC, ucp_Buhid },
{ 69, PT_GC, ucp_C },
{ 71, PT_SC, ucp_Canadian_Aboriginal },
{ 91, PT_SC, ucp_Carian },
{ 98, PT_PC, ucp_Cc },
{ 101, PT_PC, ucp_Cf },
{ 104, PT_SC, ucp_Cham },
{ 109, PT_SC, ucp_Cherokee },
{ 118, PT_PC, ucp_Cn },
{ 121, PT_PC, ucp_Co },
{ 124, PT_SC, ucp_Common },
{ 131, PT_SC, ucp_Coptic },
{ 138, PT_PC, ucp_Cs },
{ 141, PT_SC, ucp_Cuneiform },
{ 151, PT_SC, ucp_Cypriot },
{ 159, PT_SC, ucp_Cyrillic },
{ 168, PT_SC, ucp_Deseret },
{ 176, PT_SC, ucp_Devanagari },
{ 187, PT_SC, ucp_Ethiopic },
{ 196, PT_SC, ucp_Georgian },
{ 205, PT_SC, ucp_Glagolitic },
{ 216, PT_SC, ucp_Gothic },
{ 223, PT_SC, ucp_Greek },
{ 229, PT_SC, ucp_Gujarati },
{ 238, PT_SC, ucp_Gurmukhi },
{ 247, PT_SC, ucp_Han },
{ 251, PT_SC, ucp_Hangul },
{ 258, PT_SC, ucp_Hanunoo },
{ 266, PT_SC, ucp_Hebrew },
{ 273, PT_SC, ucp_Hiragana },
{ 282, PT_SC, ucp_Inherited },
{ 292, PT_SC, ucp_Kannada },
{ 300, PT_SC, ucp_Katakana },
{ 309, PT_SC, ucp_Kayah_Li },
{ 318, PT_SC, ucp_Kharoshthi },
{ 329, PT_SC, ucp_Khmer },
{ 335, PT_GC, ucp_L },
{ 337, PT_LAMP, 0 },
{ 340, PT_SC, ucp_Lao },
{ 344, PT_SC, ucp_Latin },
{ 350, PT_SC, ucp_Lepcha },
{ 357, PT_SC, ucp_Limbu },
{ 363, PT_SC, ucp_Linear_B },
{ 372, PT_PC, ucp_Ll },
{ 375, PT_PC, ucp_Lm },
{ 378, PT_PC, ucp_Lo },
{ 381, PT_PC, ucp_Lt },
{ 384, PT_PC, ucp_Lu },
{ 387, PT_SC, ucp_Lycian },
{ 394, PT_SC, ucp_Lydian },
{ 401, PT_GC, ucp_M },
{ 403, PT_SC, ucp_Malayalam },
{ 413, PT_PC, ucp_Mc },
{ 416, PT_PC, ucp_Me },
{ 419, PT_PC, ucp_Mn },
{ 422, PT_SC, ucp_Mongolian },
{ 432, PT_SC, ucp_Myanmar },
{ 440, PT_GC, ucp_N },
{ 442, PT_PC, ucp_Nd },
{ 445, PT_SC, ucp_New_Tai_Lue },
{ 457, PT_SC, ucp_Nko },
{ 461, PT_PC, ucp_Nl },
{ 464, PT_PC, ucp_No },
{ 467, PT_SC, ucp_Ogham },
{ 473, PT_SC, ucp_Ol_Chiki },
{ 482, PT_SC, ucp_Old_Italic },
{ 493, PT_SC, ucp_Old_Persian },
{ 505, PT_SC, ucp_Oriya },
{ 511, PT_SC, ucp_Osmanya },
{ 519, PT_GC, ucp_P },
{ 521, PT_PC, ucp_Pc },
{ 524, PT_PC, ucp_Pd },
{ 527, PT_PC, ucp_Pe },
{ 530, PT_PC, ucp_Pf },
{ 533, PT_SC, ucp_Phags_Pa },
{ 542, PT_SC, ucp_Phoenician },
{ 553, PT_PC, ucp_Pi },
{ 556, PT_PC, ucp_Po },
{ 559, PT_PC, ucp_Ps },
{ 562, PT_SC, ucp_Rejang },
{ 569, PT_SC, ucp_Runic },
{ 575, PT_GC, ucp_S },
{ 577, PT_SC, ucp_Saurashtra },
{ 588, PT_PC, ucp_Sc },
{ 591, PT_SC, ucp_Shavian },
{ 599, PT_SC, ucp_Sinhala },
{ 607, PT_PC, ucp_Sk },
{ 610, PT_PC, ucp_Sm },
{ 613, PT_PC, ucp_So },
{ 616, PT_SC, ucp_Sundanese },
{ 626, PT_SC, ucp_Syloti_Nagri },
{ 639, PT_SC, ucp_Syriac },
{ 646, PT_SC, ucp_Tagalog },
{ 654, PT_SC, ucp_Tagbanwa },
{ 663, PT_SC, ucp_Tai_Le },
{ 670, PT_SC, ucp_Tamil },
{ 676, PT_SC, ucp_Telugu },
{ 683, PT_SC, ucp_Thaana },
{ 690, PT_SC, ucp_Thai },
{ 695, PT_SC, ucp_Tibetan },
{ 703, PT_SC, ucp_Tifinagh },
{ 712, PT_SC, ucp_Ugaritic },
{ 721, PT_SC, ucp_Vai },
{ 725, PT_SC, ucp_Yi },
{ 728, PT_GC, ucp_Z },
{ 730, PT_PC, ucp_Zl },
{ 733, PT_PC, ucp_Zp },
{ 736, PT_PC, ucp_Zs }
};
const int _pcre_utt_size = sizeof(_pcre_utt)/sizeof(ucp_type_table);

View File

@@ -43,7 +43,7 @@ see if it was compiled with the opposite endianness. If so, it uses an
auxiliary local function to flip the appropriate bytes. */
#if 1
#if 2875
#include "_hbconf.h"
#endif

File diff suppressed because it is too large Load Diff

View File

@@ -42,7 +42,7 @@ POSSIBILITY OF SUCH DAMAGE.
string that identifies the PCRE version that is in use. */
#if 1
#if 2875
#include "_hbconf.h"
#endif
@@ -79,7 +79,7 @@ I could find no way of detecting that a macro is defined as an empty string at
pre-processor time. This hack uses a standard trick for avoiding calling
the STRING macro with an empty argument when doing the test. */
PCRE_EXP_DEFN const char *
PCRE_EXP_DEFN const char * PCRE_CALL_CONVENTION
pcre_version(void)
{
return (XSTRING(Z PCRE_PRERELEASE)[1] == 0)?

View File

@@ -42,7 +42,7 @@ POSSIBILITY OF SUCH DAMAGE.
strings. */
#if 1
#if 2875
#include "_hbconf.h"
#endif
@@ -155,8 +155,7 @@ for (p = string; length-- > 0; p++)
}
}
#else
/* pacify warnings */
(void)(string);
(void)(string); /* Keep picky compilers happy */
(void)(length);
#endif

View File

@@ -43,7 +43,7 @@ class (one that contains characters whose values are > 255). It is used by both
pcre_exec() and pcre_def_exec(). */
#if 1
#if 2875
#include "_hbconf.h"
#endif
@@ -104,8 +104,7 @@ while ((t = *data++) != XCL_END)
#ifdef SUPPORT_UCP
else /* XCL_PROP & XCL_NOTPROP */
{
int chartype, script;
int category = _pcre_ucp_findprop(c, &chartype, &script);
const ucd_record * prop = GET_UCD(c);
switch(*data)
{
@@ -114,20 +113,20 @@ while ((t = *data++) != XCL_END)
break;
case PT_LAMP:
if ((chartype == ucp_Lu || chartype == ucp_Ll || chartype == ucp_Lt) ==
if ((prop->chartype == ucp_Lu || prop->chartype == ucp_Ll || prop->chartype == ucp_Lt) ==
(t == XCL_PROP)) return !negated;
break;
case PT_GC:
if ((data[1] == category) == (t == XCL_PROP)) return !negated;
if ((data[1] == _pcre_ucp_gentype[prop->chartype]) == (t == XCL_PROP)) return !negated;
break;
case PT_PC:
if ((data[1] == chartype) == (t == XCL_PROP)) return !negated;
if ((data[1] == prop->chartype) == (t == XCL_PROP)) return !negated;
break;
case PT_SC:
if ((data[1] == script) == (t == XCL_PROP)) return !negated;
if ((data[1] == prop->script) == (t == XCL_PROP)) return !negated;
break;
/* This should never occur, but compilers may mutter if there is no

View File

@@ -121,11 +121,24 @@ enum {
ucp_Tifinagh,
ucp_Ugaritic,
ucp_Yi,
ucp_Balinese, /* New for Unicode 5.0.0 */
ucp_Cuneiform, /* New for Unicode 5.0.0 */
ucp_Nko, /* New for Unicode 5.0.0 */
ucp_Phags_Pa, /* New for Unicode 5.0.0 */
ucp_Phoenician /* New for Unicode 5.0.0 */
/* New for Unicode 5.0: */
ucp_Balinese,
ucp_Cuneiform,
ucp_Nko,
ucp_Phags_Pa,
ucp_Phoenician,
/* New for Unicode 5.1: */
ucp_Carian,
ucp_Cham,
ucp_Kayah_Li,
ucp_Lepcha,
ucp_Lycian,
ucp_Lydian,
ucp_Ol_Chiki,
ucp_Rejang,
ucp_Saurashtra,
ucp_Sundanese,
ucp_Vai
};
#endif

View File

@@ -1,92 +0,0 @@
/*************************************************
* Unicode Property Table handler *
*************************************************/
#ifndef _UCPINTERNAL_H
#define _UCPINTERNAL_H
/* Internal header file defining the layout of the bits in each pair of 32-bit
words that form a data item in the table. */
typedef struct cnode {
pcre_uint32 f0;
pcre_uint32 f1;
} cnode;
/* Things for the f0 field */
#define f0_scriptmask 0xff000000 /* Mask for script field */
#define f0_scriptshift 24 /* Shift for script value */
#define f0_rangeflag 0x00800000 /* Flag for a range item */
#define f0_charmask 0x001fffff /* Mask for code point value */
/* Things for the f1 field */
#define f1_typemask 0xfc000000 /* Mask for char type field */
#define f1_typeshift 26 /* Shift for the type field */
#define f1_rangemask 0x0000ffff /* Mask for a range offset */
#define f1_casemask 0x0000ffff /* Mask for a case offset */
#define f1_caseneg 0xffff8000 /* Bits for negation */
/* The data consists of a vector of structures of type cnode. The two unsigned
32-bit integers are used as follows:
(f0) (1) The most significant byte holds the script number. The numbers are
defined by the enum in ucp.h.
(2) The 0x00800000 bit is set if this entry defines a range of characters.
It is not set if this entry defines a single character
(3) The 0x00600000 bits are spare.
(4) The 0x001fffff bits contain the code point. No Unicode code point will
ever be greater than 0x0010ffff, so this should be OK for ever.
(f1) (1) The 0xfc000000 bits contain the character type number. The numbers are
defined by an enum in ucp.h.
(2) The 0x03ff0000 bits are spare.
(3) The 0x0000ffff bits contain EITHER the unsigned offset to the top of
range if this entry defines a range, OR the *signed* offset to the
character's "other case" partner if this entry defines a single
character. There is no partner if the value is zero.
-------------------------------------------------------------------------------
| script (8) |.|.|.| codepoint (21) || type (6) |.|.| spare (8) | offset (16) |
-------------------------------------------------------------------------------
| | | | |
| | |-> spare | |-> spare
| | |
| |-> spare |-> spare
|
|-> range flag
The upper/lower casing information is set only for characters that come in
pairs. The non-one-to-one mappings in the Unicode data are ignored.
When searching the data, proceed as follows:
(1) Set up for a binary chop search.
(2) If the top is not greater than the bottom, the character is not in the
table. Its type must therefore be "Cn" ("Undefined").
(3) Find the middle vector element.
(4) Extract the code point and compare. If equal, we are done.
(5) If the test character is smaller, set the top to the current point, and
goto (2).
(6) If the current entry defines a range, compute the last character by adding
the offset, and see if the test character is within the range. If it is,
we are done.
(7) Otherwise, set the bottom to one element past the current point and goto
(2).
*/
#endif /* _UCPINTERNAL_H */
/* End of ucpinternal.h */

File diff suppressed because it is too large Load Diff