2008-08-31 10:53 UTC+0200 Viktor Szakats (harbour.01 syenar hu)
* contrib/hbole/oleauto.prg
* contrib/hbw32/w32_tole.prg
* MessageBox() calls converted to Alert()s.
A #translate added for the to be reconverted to MessageBox(),
since I'm not sure why should we use MessageBox()es here?
IMO it'd be better to throw RTEs, or handle these cases more
"natively". Opinions?
* contrib/hbole/oleauto.prg
+ Modified to generate RTE when the object could not
be created, so that it works more like OLE in hbw32.lib,
and the test doesn't crash in this case. I don't have
Excel, and this is what I could fix without it.
* contrib/hbw32/w32_tole.prg
* contrib/hbw32/w32_ole.c
! Typos, minor formatting.
* common.mak
* source/hbpcre/Makefile
* source/hbpcre/cnv_hb2o.bat
* source/hbpcre/cnv_o2hb.bat
- source/hbpcre/ucpinter.h
- source/hbpcre/ucptable.h
- source/hbpcre/pcrefind.c
+ source/hbpcre/pcreucd.c
* source/hbpcre/pcrevutf.c
* source/hbpcre/pcre.h
* source/hbpcre/pcreconf.c
* source/hbpcre/pcreinfo.c
* source/hbpcre/pcremktb.c
* source/hbpcre/config.h
* source/hbpcre/pcrenewl.c
* source/hbpcre/pcreoutf.c
* source/hbpcre/chartabs.c
* source/hbpcre/pcrerefc.c
* source/hbpcre/pcreget.c
* source/hbpcre/pcrefinf.c
* source/hbpcre/pcretryf.c
* source/hbpcre/pcreexec.c
* source/hbpcre/pcreinal.h
* source/hbpcre/pcretabs.c
* source/hbpcre/pcredfa.c
* source/hbpcre/pcrever.c
* source/hbpcre/pcrecomp.c
* source/hbpcre/pcrexcls.c
* source/hbpcre/pcrestud.c
* source/hbpcre/ucp.h
+ Updated to PCRE 7.8 RC1
(will be updated to final pretty soon)
This version contains the small patches we
so far had locally in Harbour. Thanks to
Philip Hazel for applying them.
This commit is contained in:
@@ -8,6 +8,61 @@
|
||||
2008-12-31 13:59 UTC+0100 Foo Bar (foo.bar foobar.org)
|
||||
*/
|
||||
|
||||
2008-08-31 10:53 UTC+0200 Viktor Szakats (harbour.01 syenar hu)
|
||||
* contrib/hbole/oleauto.prg
|
||||
* contrib/hbw32/w32_tole.prg
|
||||
* MessageBox() calls converted to Alert()s.
|
||||
A #translate added for the to be reconverted to MessageBox(),
|
||||
since I'm not sure why should we use MessageBox()es here?
|
||||
IMO it'd be better to throw RTEs, or handle these cases more
|
||||
"natively". Opinions?
|
||||
|
||||
* contrib/hbole/oleauto.prg
|
||||
+ Modified to generate RTE when the object could not
|
||||
be created, so that it works more like OLE in hbw32.lib,
|
||||
and the test doesn't crash in this case. I don't have
|
||||
Excel, and this is what I could fix without it.
|
||||
|
||||
* contrib/hbw32/w32_tole.prg
|
||||
* contrib/hbw32/w32_ole.c
|
||||
! Typos, minor formatting.
|
||||
|
||||
* common.mak
|
||||
* source/hbpcre/Makefile
|
||||
* source/hbpcre/cnv_hb2o.bat
|
||||
* source/hbpcre/cnv_o2hb.bat
|
||||
- source/hbpcre/ucpinter.h
|
||||
- source/hbpcre/ucptable.h
|
||||
- source/hbpcre/pcrefind.c
|
||||
+ source/hbpcre/pcreucd.c
|
||||
* source/hbpcre/pcrevutf.c
|
||||
* source/hbpcre/pcre.h
|
||||
* source/hbpcre/pcreconf.c
|
||||
* source/hbpcre/pcreinfo.c
|
||||
* source/hbpcre/pcremktb.c
|
||||
* source/hbpcre/config.h
|
||||
* source/hbpcre/pcrenewl.c
|
||||
* source/hbpcre/pcreoutf.c
|
||||
* source/hbpcre/chartabs.c
|
||||
* source/hbpcre/pcrerefc.c
|
||||
* source/hbpcre/pcreget.c
|
||||
* source/hbpcre/pcrefinf.c
|
||||
* source/hbpcre/pcretryf.c
|
||||
* source/hbpcre/pcreexec.c
|
||||
* source/hbpcre/pcreinal.h
|
||||
* source/hbpcre/pcretabs.c
|
||||
* source/hbpcre/pcredfa.c
|
||||
* source/hbpcre/pcrever.c
|
||||
* source/hbpcre/pcrecomp.c
|
||||
* source/hbpcre/pcrexcls.c
|
||||
* source/hbpcre/pcrestud.c
|
||||
* source/hbpcre/ucp.h
|
||||
+ Updated to PCRE 7.8 RC1
|
||||
(will be updated to final pretty soon)
|
||||
This version contains the small patches we
|
||||
so far had locally in Harbour. Thanks to
|
||||
Philip Hazel for applying them.
|
||||
|
||||
2008-08-30 11:58 UTC+0200 Viktor Szakats (harbour.01 syenar hu)
|
||||
* include/hbsetup.ch
|
||||
+ Added HB_LEGACY_OFF macro to turn off HB_LEGACY_LEVEL.
|
||||
|
||||
@@ -760,7 +760,6 @@ PCRE_LIB_OBJS = \
|
||||
$(OBJ_DIR)\pcreconf$(OBJEXT) \
|
||||
$(OBJ_DIR)\pcredfa$(OBJEXT) \
|
||||
$(OBJ_DIR)\pcreexec$(OBJEXT) \
|
||||
$(OBJ_DIR)\pcrefind$(OBJEXT) \
|
||||
$(OBJ_DIR)\pcrefinf$(OBJEXT) \
|
||||
$(OBJ_DIR)\pcreget$(OBJEXT) \
|
||||
$(OBJ_DIR)\pcreinfo$(OBJEXT) \
|
||||
@@ -771,6 +770,7 @@ PCRE_LIB_OBJS = \
|
||||
$(OBJ_DIR)\pcrestud$(OBJEXT) \
|
||||
$(OBJ_DIR)\pcretabs$(OBJEXT) \
|
||||
$(OBJ_DIR)\pcretryf$(OBJEXT) \
|
||||
$(OBJ_DIR)\pcreucd$(OBJEXT) \
|
||||
$(OBJ_DIR)\pcrever$(OBJEXT) \
|
||||
$(OBJ_DIR)\pcrevutf$(OBJEXT) \
|
||||
$(OBJ_DIR)\pcrexcls$(OBJEXT) \
|
||||
|
||||
@@ -53,7 +53,11 @@
|
||||
|
||||
#include "hbclass.ch"
|
||||
#include "common.ch"
|
||||
#include "error.ch"
|
||||
|
||||
#translate Alert( <x> ) => MessageBox( 0, <x>, "OLE Error", 0 )
|
||||
|
||||
#define EG_OLEEXCEPTION 1001
|
||||
|
||||
CLASS TOleAuto
|
||||
|
||||
@@ -73,7 +77,16 @@ ENDCLASS
|
||||
|
||||
//--------------------------------------------------------------------
|
||||
|
||||
STATIC PROCEDURE THROW( oError )
|
||||
LOCAL lError := Eval( ErrorBlock(), oError )
|
||||
IF !HB_ISLOGICAL( lError ) .OR. lError
|
||||
__ErrInHandler()
|
||||
ENDIF
|
||||
Break( oError )
|
||||
RETURN
|
||||
|
||||
METHOD New( uObj ) CLASS TOleAuto
|
||||
LOCAL oErr
|
||||
|
||||
IF ISCHARACTER( uObj )
|
||||
::hObj := CreateOleObject( uObj )
|
||||
@@ -81,6 +94,22 @@ METHOD New( uObj ) CLASS TOleAuto
|
||||
::hObj := uObj
|
||||
ENDIF
|
||||
|
||||
IF Empty( ::hObj )
|
||||
oErr := ErrorNew()
|
||||
oErr:Args := hb_AParams()
|
||||
oErr:CanDefault := .F.
|
||||
oErr:CanRetry := .F.
|
||||
oErr:CanSubstitute := .T.
|
||||
oErr:Description := Ole2TxtError()
|
||||
oErr:GenCode := EG_OLEEXCEPTION
|
||||
oErr:Operation := ProcName()
|
||||
oErr:Severity := ES_ERROR
|
||||
oErr:SubCode := -1
|
||||
oErr:SubSystem := "TOleAuto"
|
||||
|
||||
RETURN Throw( oErr )
|
||||
ENDIF
|
||||
|
||||
RETURN Self
|
||||
|
||||
METHOD GetActiveObject( cClass ) CLASS TOleAuto
|
||||
@@ -89,8 +118,8 @@ METHOD GetActiveObject( cClass ) CLASS TOleAuto
|
||||
::hObj := GetOleObject( cClass )
|
||||
// ::cClassName := cClass
|
||||
ELSE
|
||||
MessageBox( 0, "Invalid parameter type to constructor TOleAuto():GetActiveObject()!", "OLE Interface", 0 )
|
||||
::hObj := 0
|
||||
Alert( "OLE interface: Invalid parameter type to constructor TOleAuto():GetActiveObject()" )
|
||||
::hObj := NIL
|
||||
ENDIF
|
||||
|
||||
RETURN Self
|
||||
@@ -131,7 +160,7 @@ METHOD Invoke( cMethod, uParam1, uParam2, uParam3, uParam4, uParam5, uParam6 ) C
|
||||
OLEShowException()
|
||||
RETURN Self
|
||||
ELSEIF OleError() != 0
|
||||
MessageBox( 0, cMethod + ": " + Ole2TxtError(), "OLE Error", 0 )
|
||||
Alert( "OLE error: " + cMethod + ": " + Ole2TxtError() )
|
||||
ENDIF
|
||||
|
||||
RETURN uObj
|
||||
@@ -157,7 +186,7 @@ METHOD Set( cProperty, uParam1, uParam2, uParam3, uParam4, uParam5, uParam6 ) CL
|
||||
IF Ole2TxtError() == "DISP_E_EXCEPTION"
|
||||
OLEShowException()
|
||||
ELSEIF OleError() != 0
|
||||
MessageBox( 0, cProperty + ": " + Ole2TxtError(), "OLE Error", 0 )
|
||||
Alert( "OLE error: " + cProperty + ": " + Ole2TxtError() )
|
||||
ENDIF
|
||||
|
||||
RETURN nil
|
||||
@@ -192,7 +221,7 @@ METHOD Get( cProperty, uParam1, uParam2, uParam3, uParam4, uParam5, uParam6 ) CL
|
||||
IF OleIsObject()
|
||||
RETURN TOleAuto():New( uObj )
|
||||
ELSEIF OleError() != 0
|
||||
MessageBox( 0, cProperty + ": " + Ole2TxtError(), "OLE Error", 0 )
|
||||
Alert( "OLE error: " + cProperty + ": " + Ole2TxtError() )
|
||||
ENDIF
|
||||
ENDIF
|
||||
|
||||
|
||||
@@ -121,7 +121,7 @@ static BOOL s_bInit = FALSE;
|
||||
|
||||
/* ----------------------------------------------------------------------- */
|
||||
|
||||
#define EG_OLEEXECPTION 1001
|
||||
#define EG_OLEEXCEPTION 1001
|
||||
#define HB_STRING_ALLOC( p, l ) hb_itemReSizeString( p, l )
|
||||
|
||||
static void hb_itemPushForward( PHB_ITEM pItem )
|
||||
@@ -1623,7 +1623,7 @@ static void OleThrowError( void )
|
||||
|
||||
/*HB_TRACE(HB_TR_INFO, ("Desc: '%s'\n", sDescription));*/
|
||||
|
||||
pReturn = hb_errRT_SubstParams( hb_parcx( -1 ), EG_OLEEXECPTION, (ULONG) s_nOleError, sDescription, hb_itemGetSymbol( hb_stackBaseItem() )->szName );
|
||||
pReturn = hb_errRT_SubstParams( hb_parcx( -1 ), EG_OLEEXCEPTION, (ULONG) s_nOleError, sDescription, hb_itemGetSymbol( hb_stackBaseItem() )->szName );
|
||||
|
||||
if( fFree )
|
||||
{
|
||||
|
||||
@@ -63,9 +63,11 @@
|
||||
#include "hbclass.ch"
|
||||
#include "error.ch"
|
||||
|
||||
#translate Alert( <x> ) => MessageBox( 0, <x>, "OLE Error", 0 )
|
||||
|
||||
#ifndef __XHARBOUR__
|
||||
|
||||
#define EG_OLEEXECPTION 1001
|
||||
#define EG_OLEEXCEPTION 1001
|
||||
|
||||
#xcommand TRY => BEGIN SEQUENCE WITH s_bBreak
|
||||
#xcommand CATCH [<!oErr!>] => RECOVER [USING <oErr>] <-oErr->
|
||||
@@ -76,7 +78,7 @@ static s_bBreak := { |oErr| break( oErr ) }
|
||||
STATIC PROCEDURE THROW( oError )
|
||||
LOCAL lError := Eval( ErrorBlock(), oError )
|
||||
IF !HB_ISLOGICAL( lError ) .OR. lError
|
||||
__ErrInHandler()
|
||||
__ErrInHandler()
|
||||
ENDIF
|
||||
Break( oError )
|
||||
RETURN
|
||||
@@ -211,7 +213,7 @@ METHOD New( uObj, cClass ) CLASS TOleAuto
|
||||
|
||||
LOCAL oErr
|
||||
|
||||
// Hack incase OLE Server already created and New() is attempted as an OLE Method.
|
||||
// Hack in case OLE Server already created and New() is attempted as an OLE Method.
|
||||
IF ::hObj != NIL
|
||||
RETURN HB_ExecFromArray( Self, "_New", HB_aParams() )
|
||||
ENDIF
|
||||
@@ -227,7 +229,7 @@ METHOD New( uObj, cClass ) CLASS TOleAuto
|
||||
oErr:CanRetry := .F.
|
||||
oErr:CanSubstitute := .T.
|
||||
oErr:Description := OLEExceptionDescription()
|
||||
oErr:GenCode := EG_OLEEXECPTION
|
||||
oErr:GenCode := EG_OLEEXCEPTION
|
||||
oErr:Operation := ProcName()
|
||||
oErr:Severity := ES_ERROR
|
||||
oErr:SubCode := -1
|
||||
@@ -241,7 +243,7 @@ METHOD New( uObj, cClass ) CLASS TOleAuto
|
||||
oErr:CanRetry := .F.
|
||||
oErr:CanSubstitute := .T.
|
||||
oErr:Description := Ole2TxtError()
|
||||
oErr:GenCode := EG_OLEEXECPTION
|
||||
oErr:GenCode := EG_OLEEXCEPTION
|
||||
oErr:Operation := ProcName()
|
||||
oErr:Severity := ES_ERROR
|
||||
oErr:SubCode := -1
|
||||
@@ -267,7 +269,7 @@ METHOD New( uObj, cClass ) CLASS TOleAuto
|
||||
oErr:CanDefault := .F.
|
||||
oErr:CanRetry := .F.
|
||||
oErr:CanSubstitute := .T.
|
||||
oErr:Description := "Invalid argument to contrustor!"
|
||||
oErr:Description := "Invalid argument to contructor!"
|
||||
oErr:GenCode := 0
|
||||
oErr:Operation := ProcName()
|
||||
oErr:Severity := ES_ERROR
|
||||
@@ -309,7 +311,7 @@ METHOD GetActiveObject( cClass ) CLASS TOleAuto
|
||||
oErr:CanRetry := .F.
|
||||
oErr:CanSubstitute := .T.
|
||||
oErr:Description := OLEExceptionDescription()
|
||||
oErr:GenCode := EG_OLEEXECPTION
|
||||
oErr:GenCode := EG_OLEEXCEPTION
|
||||
oErr:Operation := ProcName()
|
||||
oErr:Severity := ES_ERROR
|
||||
oErr:SubCode := -1
|
||||
@@ -323,7 +325,7 @@ METHOD GetActiveObject( cClass ) CLASS TOleAuto
|
||||
oErr:CanRetry := .F.
|
||||
oErr:CanSubstitute := .T.
|
||||
oErr:Description := Ole2TxtError()
|
||||
oErr:GenCode := EG_OLEEXECPTION
|
||||
oErr:GenCode := EG_OLEEXCEPTION
|
||||
oErr:Operation := ProcName()
|
||||
oErr:Severity := ES_ERROR
|
||||
oErr:SubCode := -1
|
||||
@@ -335,7 +337,7 @@ METHOD GetActiveObject( cClass ) CLASS TOleAuto
|
||||
|
||||
::cClassName := cClass
|
||||
ELSE
|
||||
MessageBox( 0, "Invalid parameter type to constructor TOleAuto():GetActiveObject()!", "OLE Interface", 0 )
|
||||
Alert( "OLE interface: Invalid parameter type to constructor TOleAuto():GetActiveObject()" )
|
||||
::hObj := 0
|
||||
ENDIF
|
||||
|
||||
|
||||
@@ -11,7 +11,6 @@ C_SOURCES=\
|
||||
pcreconf.c \
|
||||
pcredfa.c \
|
||||
pcreexec.c \
|
||||
pcrefind.c \
|
||||
pcrefinf.c \
|
||||
pcreget.c \
|
||||
pcreinfo.c \
|
||||
@@ -22,6 +21,7 @@ C_SOURCES=\
|
||||
pcrestud.c \
|
||||
pcretabs.c \
|
||||
pcretryf.c \
|
||||
pcreucd.c \
|
||||
pcrever.c \
|
||||
pcrevutf.c \
|
||||
pcrexcls.c \
|
||||
|
||||
@@ -20,7 +20,7 @@ and dead code stripping is activated. This leads to link errors. Pulling in the
|
||||
header ensures that the array gets flagged as "someone outside this compilation
|
||||
unit might reference this" and so it will always be supplied to the linker. */
|
||||
|
||||
#if 1
|
||||
#if 2875
|
||||
#include "_hbconf.h"
|
||||
#endif
|
||||
|
||||
|
||||
@@ -26,8 +26,6 @@ copy config.h ori_dst\config.h.generic
|
||||
copy pcre.h ori_dst\pcre.h.generic
|
||||
copy pcreinal.h ori_dst\pcre_internal.h
|
||||
copy ucp.h ori_dst\ucp.h
|
||||
copy ucpinter.h ori_dst\ucpinternal.h
|
||||
copy ucptable.h ori_dst\ucptable.h
|
||||
copy chartabs.c ori_dst\pcre_chartables.c.dist
|
||||
copy pcrecomp.c ori_dst\pcre_compile.c
|
||||
copy pcreconf.c ori_dst\pcre_config.c
|
||||
@@ -44,7 +42,7 @@ copy pcrerefc.c ori_dst\pcre_refcount.c
|
||||
copy pcrestud.c ori_dst\pcre_study.c
|
||||
copy pcretabs.c ori_dst\pcre_tables.c
|
||||
copy pcretryf.c ori_dst\pcre_try_flipped.c
|
||||
copy pcrefind.c ori_dst\pcre_ucp_searchfuncs.c
|
||||
copy pcreucd.c ori_dst\pcre_ucd.c
|
||||
copy pcrevutf.c ori_dst\pcre_valid_utf8.c
|
||||
copy pcrever.c ori_dst\pcre_version.c
|
||||
copy pcrexcls.c ori_dst\pcre_xclass.c
|
||||
|
||||
@@ -33,8 +33,6 @@ copy ori_src\config.h.generic config.h
|
||||
copy ori_src\pcre.h.generic pcre.h
|
||||
copy ori_src\pcre_internal.h pcreinal.h
|
||||
copy ori_src\ucp.h ucp.h
|
||||
copy ori_src\ucpinternal.h ucpinter.h
|
||||
copy ori_src\ucptable.h ucptable.h
|
||||
copy ori_src\pcre_chartables.c.dist chartabs.c
|
||||
copy ori_src\pcre_compile.c pcrecomp.c
|
||||
copy ori_src\pcre_config.c pcreconf.c
|
||||
@@ -51,7 +49,7 @@ copy ori_src\pcre_refcount.c pcrerefc.c
|
||||
copy ori_src\pcre_study.c pcrestud.c
|
||||
copy ori_src\pcre_tables.c pcretabs.c
|
||||
copy ori_src\pcre_try_flipped.c pcretryf.c
|
||||
copy ori_src\pcre_ucp_searchfuncs.c pcrefind.c
|
||||
copy ori_src\pcre_ucd.c pcreucd.c
|
||||
copy ori_src\pcre_valid_utf8.c pcrevutf.c
|
||||
copy ori_src\pcre_version.c pcrever.c
|
||||
copy ori_src\pcre_xclass.c pcrexcls.c
|
||||
|
||||
@@ -238,13 +238,13 @@ them both to 0; an emulation function will be used. */
|
||||
#define PACKAGE_NAME "PCRE"
|
||||
|
||||
/* Define to the full name and version of this package. */
|
||||
#define PACKAGE_STRING "PCRE 7.7"
|
||||
#define PACKAGE_STRING "PCRE 7.8-RC1"
|
||||
|
||||
/* Define to the one symbol short name of this package. */
|
||||
#define PACKAGE_TARNAME "pcre"
|
||||
|
||||
/* Define to the version of this package. */
|
||||
#define PACKAGE_VERSION "7.7"
|
||||
#define PACKAGE_VERSION "7.8-RC1"
|
||||
|
||||
|
||||
/* If you are compiling for a system other than a Unix-like system or
|
||||
@@ -297,7 +297,7 @@ them both to 0; an emulation function will be used. */
|
||||
|
||||
/* Version number of package */
|
||||
#ifndef VERSION
|
||||
#define VERSION "7.7"
|
||||
#define VERSION "7.8-RC1"
|
||||
#endif
|
||||
|
||||
/* Define to empty if `const' does not conform to ANSI C. */
|
||||
|
||||
@@ -42,9 +42,9 @@ POSSIBILITY OF SUCH DAMAGE.
|
||||
/* The current PCRE version information. */
|
||||
|
||||
#define PCRE_MAJOR 7
|
||||
#define PCRE_MINOR 7
|
||||
#define PCRE_PRERELEASE
|
||||
#define PCRE_DATE 2008-05-07
|
||||
#define PCRE_MINOR 8
|
||||
#define PCRE_PRERELEASE -RC1
|
||||
#define PCRE_DATE 2008-08-25
|
||||
|
||||
/* When an application links to a PCRE DLL in Windows, the symbols that are
|
||||
imported have to be identified as such. When building PCRE, the appropriate
|
||||
|
||||
@@ -42,7 +42,7 @@ POSSIBILITY OF SUCH DAMAGE.
|
||||
supporting internal functions that are not used by other modules. */
|
||||
|
||||
|
||||
#if 1
|
||||
#if 2875
|
||||
#include "_hbconf.h"
|
||||
#endif
|
||||
|
||||
@@ -1451,8 +1451,7 @@ for (;;)
|
||||
break;
|
||||
}
|
||||
#else
|
||||
/* pacify warnings */
|
||||
(void)(utf8);
|
||||
(void)(utf8); /* Keep compiler happy by referencing function argument */
|
||||
#endif
|
||||
}
|
||||
}
|
||||
@@ -1547,8 +1546,7 @@ for (;;)
|
||||
break;
|
||||
}
|
||||
#else
|
||||
/* pacify warnings */
|
||||
(void)(utf8);
|
||||
(void)(utf8); /* Keep compiler happy by referencing function argument */
|
||||
#endif
|
||||
}
|
||||
}
|
||||
@@ -2021,7 +2019,7 @@ get_othercase_range(unsigned int *cptr, unsigned int d, unsigned int *ocptr,
|
||||
unsigned int c, othercase, next;
|
||||
|
||||
for (c = *cptr; c <= d; c++)
|
||||
{ if ((othercase = _pcre_ucp_othercase(c)) != NOTACHAR) break; }
|
||||
{ if ((othercase = UCD_OTHERCASE(c)) != c) break; }
|
||||
|
||||
if (c > d) return FALSE;
|
||||
|
||||
@@ -2030,7 +2028,7 @@ next = othercase + 1;
|
||||
|
||||
for (++c; c <= d; c++)
|
||||
{
|
||||
if (_pcre_ucp_othercase(c) != next) break;
|
||||
if (UCD_OTHERCASE(c) != next) break;
|
||||
next++;
|
||||
}
|
||||
|
||||
@@ -2141,8 +2139,7 @@ if (next >= 0) switch(op_code)
|
||||
#ifdef SUPPORT_UTF8
|
||||
if (utf8 && item > 127) { GETCHAR(item, utf8_char); }
|
||||
#else
|
||||
/* pacify warnings */
|
||||
(void)(utf8_char);
|
||||
(void)(utf8_char); /* Keep compiler happy by referencing function argument */
|
||||
#endif
|
||||
return item != next;
|
||||
|
||||
@@ -2161,7 +2158,7 @@ if (next >= 0) switch(op_code)
|
||||
unsigned int othercase;
|
||||
if (next < 128) othercase = cd->fcc[next]; else
|
||||
#ifdef SUPPORT_UCP
|
||||
othercase = _pcre_ucp_othercase((unsigned int)next);
|
||||
othercase = UCD_OTHERCASE((unsigned int)next);
|
||||
#else
|
||||
othercase = NOTACHAR;
|
||||
#endif
|
||||
@@ -2182,7 +2179,7 @@ if (next >= 0) switch(op_code)
|
||||
unsigned int othercase;
|
||||
if (next < 128) othercase = cd->fcc[next]; else
|
||||
#ifdef SUPPORT_UCP
|
||||
othercase = _pcre_ucp_othercase(next);
|
||||
othercase = UCD_OTHERCASE(next);
|
||||
#else
|
||||
othercase = NOTACHAR;
|
||||
#endif
|
||||
@@ -3348,7 +3345,7 @@ for (;; ptr++)
|
||||
if ((options & PCRE_CASELESS) != 0)
|
||||
{
|
||||
unsigned int othercase;
|
||||
if ((othercase = _pcre_ucp_othercase(c)) != NOTACHAR)
|
||||
if ((othercase = UCD_OTHERCASE(c)) != c)
|
||||
{
|
||||
*class_utf8data++ = XCL_SINGLE;
|
||||
class_utf8data += _pcre_ord2utf8(othercase, class_utf8data);
|
||||
@@ -4929,10 +4926,8 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
|
||||
both phases.
|
||||
|
||||
If we are not at the pattern start, compile code to change the ims
|
||||
options if this setting actually changes any of them. We also pass the
|
||||
new setting back so that it can be put at the start of any following
|
||||
branches, and when this group ends (if we are in a group), a resetting
|
||||
item can be compiled. */
|
||||
options if this setting actually changes any of them, and reset the
|
||||
greedy defaults and the case value for firstbyte and reqbyte. */
|
||||
|
||||
if (*ptr == ')')
|
||||
{
|
||||
@@ -4940,7 +4935,6 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
|
||||
(lengthptr == NULL || *lengthptr == 2 + 2*LINK_SIZE))
|
||||
{
|
||||
cd->external_options = newoptions;
|
||||
options = newoptions;
|
||||
}
|
||||
else
|
||||
{
|
||||
@@ -4949,17 +4943,17 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
|
||||
*code++ = OP_OPT;
|
||||
*code++ = newoptions & PCRE_IMS;
|
||||
}
|
||||
|
||||
/* Change options at this level, and pass them back for use
|
||||
in subsequent branches. Reset the greedy defaults and the case
|
||||
value for firstbyte and reqbyte. */
|
||||
|
||||
*optionsptr = options = newoptions;
|
||||
greedy_default = ((newoptions & PCRE_UNGREEDY) != 0);
|
||||
greedy_non_default = greedy_default ^ 1;
|
||||
req_caseopt = ((options & PCRE_CASELESS) != 0)? REQ_CASELESS : 0;
|
||||
req_caseopt = ((newoptions & PCRE_CASELESS) != 0)? REQ_CASELESS : 0;
|
||||
}
|
||||
|
||||
/* Change options at this level, and pass them back for use
|
||||
in subsequent branches. When not at the start of the pattern, this
|
||||
information is also necessary so that a resetting item can be
|
||||
compiled at the end of a group (if we are in a group). */
|
||||
|
||||
*optionsptr = options = newoptions;
|
||||
previous = NULL; /* This item can't be repeated */
|
||||
continue; /* It is complete */
|
||||
}
|
||||
@@ -5953,7 +5947,7 @@ Returns: pointer to compiled data block, or NULL on error,
|
||||
with errorptr and erroroffset set
|
||||
*/
|
||||
|
||||
PCRE_EXP_DEFN pcre *
|
||||
PCRE_EXP_DEFN pcre * PCRE_CALL_CONVENTION
|
||||
pcre_compile(const char *pattern, int options, const char **errorptr,
|
||||
int *erroroffset, const unsigned char *tables)
|
||||
{
|
||||
@@ -5961,7 +5955,7 @@ return pcre_compile2(pattern, options, NULL, errorptr, erroroffset, tables);
|
||||
}
|
||||
|
||||
|
||||
PCRE_EXP_DEFN pcre *
|
||||
PCRE_EXP_DEFN pcre * PCRE_CALL_CONVENTION
|
||||
pcre_compile2(const char *pattern, int options, int *errorcodeptr,
|
||||
const char **errorptr, int *erroroffset, const unsigned char *tables)
|
||||
{
|
||||
|
||||
@@ -41,7 +41,7 @@ POSSIBILITY OF SUCH DAMAGE.
|
||||
/* This module contains the external function pcre_config(). */
|
||||
|
||||
|
||||
#if 1
|
||||
#if 2875
|
||||
#include "_hbconf.h"
|
||||
#endif
|
||||
|
||||
@@ -62,7 +62,7 @@ Arguments:
|
||||
Returns: 0 if data returned, negative on error
|
||||
*/
|
||||
|
||||
PCRE_EXP_DEFN int
|
||||
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
|
||||
pcre_config(int what, void *where)
|
||||
{
|
||||
switch (what)
|
||||
|
||||
@@ -44,7 +44,7 @@ FSM). This is NOT Perl- compatible, but it has advantages in certain
|
||||
applications. */
|
||||
|
||||
|
||||
#if 1
|
||||
#if 2875
|
||||
#include "_hbconf.h"
|
||||
#endif
|
||||
|
||||
@@ -512,9 +512,6 @@ for (;;)
|
||||
const uschar *code;
|
||||
int state_offset = current_state->offset;
|
||||
int count, codevalue;
|
||||
#ifdef SUPPORT_UCP
|
||||
int chartype, script;
|
||||
#endif
|
||||
|
||||
#ifdef DEBUG
|
||||
printf ("%.*sProcessing state %d c=", rlevel*2-2, SP, state_offset);
|
||||
@@ -825,7 +822,7 @@ for (;;)
|
||||
if (clen > 0)
|
||||
{
|
||||
BOOL OK;
|
||||
int category = _pcre_ucp_findprop(c, &chartype, &script);
|
||||
const ucd_record * prop = GET_UCD(c);
|
||||
switch(code[1])
|
||||
{
|
||||
case PT_ANY:
|
||||
@@ -833,19 +830,19 @@ for (;;)
|
||||
break;
|
||||
|
||||
case PT_LAMP:
|
||||
OK = chartype == ucp_Lu || chartype == ucp_Ll || chartype == ucp_Lt;
|
||||
OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll || prop->chartype == ucp_Lt;
|
||||
break;
|
||||
|
||||
case PT_GC:
|
||||
OK = category == code[2];
|
||||
OK = _pcre_ucp_gentype[prop->chartype] == code[2];
|
||||
break;
|
||||
|
||||
case PT_PC:
|
||||
OK = chartype == code[2];
|
||||
OK = prop->chartype == code[2];
|
||||
break;
|
||||
|
||||
case PT_SC:
|
||||
OK = script == code[2];
|
||||
OK = prop->script == code[2];
|
||||
break;
|
||||
|
||||
/* Should never occur, but keep compilers from grumbling. */
|
||||
@@ -994,7 +991,7 @@ for (;;)
|
||||
if (clen > 0)
|
||||
{
|
||||
BOOL OK;
|
||||
int category = _pcre_ucp_findprop(c, &chartype, &script);
|
||||
const ucd_record * prop = GET_UCD(c);
|
||||
switch(code[2])
|
||||
{
|
||||
case PT_ANY:
|
||||
@@ -1002,19 +999,19 @@ for (;;)
|
||||
break;
|
||||
|
||||
case PT_LAMP:
|
||||
OK = chartype == ucp_Lu || chartype == ucp_Ll || chartype == ucp_Lt;
|
||||
OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll || prop->chartype == ucp_Lt;
|
||||
break;
|
||||
|
||||
case PT_GC:
|
||||
OK = category == code[3];
|
||||
OK = _pcre_ucp_gentype[prop->chartype] == code[3];
|
||||
break;
|
||||
|
||||
case PT_PC:
|
||||
OK = chartype == code[3];
|
||||
OK = prop->chartype == code[3];
|
||||
break;
|
||||
|
||||
case PT_SC:
|
||||
OK = script == code[3];
|
||||
OK = prop->script == code[3];
|
||||
break;
|
||||
|
||||
/* Should never occur, but keep compilers from grumbling. */
|
||||
@@ -1043,7 +1040,7 @@ for (;;)
|
||||
case OP_EXTUNI_EXTRA + OP_TYPEPOSPLUS:
|
||||
count = current_state->count; /* Already matched */
|
||||
if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
|
||||
if (clen > 0 && _pcre_ucp_findprop(c, &chartype, &script) != ucp_M)
|
||||
if (clen > 0 && UCD_CATEGORY(c) != ucp_M)
|
||||
{
|
||||
const uschar *nptr = ptr + clen;
|
||||
int ncount = 0;
|
||||
@@ -1057,7 +1054,7 @@ for (;;)
|
||||
int nd;
|
||||
int ndlen = 1;
|
||||
GETCHARLEN(nd, nptr, ndlen);
|
||||
if (_pcre_ucp_findprop(nd, &chartype, &script) != ucp_M) break;
|
||||
if (UCD_CATEGORY(nd) != ucp_M) break;
|
||||
ncount++;
|
||||
nptr += ndlen;
|
||||
}
|
||||
@@ -1216,7 +1213,7 @@ for (;;)
|
||||
if (clen > 0)
|
||||
{
|
||||
BOOL OK;
|
||||
int category = _pcre_ucp_findprop(c, &chartype, &script);
|
||||
const ucd_record * prop = GET_UCD(c);
|
||||
switch(code[2])
|
||||
{
|
||||
case PT_ANY:
|
||||
@@ -1224,19 +1221,19 @@ for (;;)
|
||||
break;
|
||||
|
||||
case PT_LAMP:
|
||||
OK = chartype == ucp_Lu || chartype == ucp_Ll || chartype == ucp_Lt;
|
||||
OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll || prop->chartype == ucp_Lt;
|
||||
break;
|
||||
|
||||
case PT_GC:
|
||||
OK = category == code[3];
|
||||
OK = _pcre_ucp_gentype[prop->chartype] == code[3];
|
||||
break;
|
||||
|
||||
case PT_PC:
|
||||
OK = chartype == code[3];
|
||||
OK = prop->chartype == code[3];
|
||||
break;
|
||||
|
||||
case PT_SC:
|
||||
OK = script == code[3];
|
||||
OK = prop->script == code[3];
|
||||
break;
|
||||
|
||||
/* Should never occur, but keep compilers from grumbling. */
|
||||
@@ -1274,7 +1271,7 @@ for (;;)
|
||||
QS2:
|
||||
|
||||
ADD_ACTIVE(state_offset + 2, 0);
|
||||
if (clen > 0 && _pcre_ucp_findprop(c, &chartype, &script) != ucp_M)
|
||||
if (clen > 0 && UCD_CATEGORY(c) != ucp_M)
|
||||
{
|
||||
const uschar *nptr = ptr + clen;
|
||||
int ncount = 0;
|
||||
@@ -1289,7 +1286,7 @@ for (;;)
|
||||
int nd;
|
||||
int ndlen = 1;
|
||||
GETCHARLEN(nd, nptr, ndlen);
|
||||
if (_pcre_ucp_findprop(nd, &chartype, &script) != ucp_M) break;
|
||||
if (UCD_CATEGORY(nd) != ucp_M) break;
|
||||
ncount++;
|
||||
nptr += ndlen;
|
||||
}
|
||||
@@ -1463,7 +1460,7 @@ for (;;)
|
||||
if (clen > 0)
|
||||
{
|
||||
BOOL OK;
|
||||
int category = _pcre_ucp_findprop(c, &chartype, &script);
|
||||
const ucd_record * prop = GET_UCD(c);
|
||||
switch(code[4])
|
||||
{
|
||||
case PT_ANY:
|
||||
@@ -1471,19 +1468,19 @@ for (;;)
|
||||
break;
|
||||
|
||||
case PT_LAMP:
|
||||
OK = chartype == ucp_Lu || chartype == ucp_Ll || chartype == ucp_Lt;
|
||||
OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll || prop->chartype == ucp_Lt;
|
||||
break;
|
||||
|
||||
case PT_GC:
|
||||
OK = category == code[5];
|
||||
OK = _pcre_ucp_gentype[prop->chartype] == code[5];
|
||||
break;
|
||||
|
||||
case PT_PC:
|
||||
OK = chartype == code[5];
|
||||
OK = prop->chartype == code[5];
|
||||
break;
|
||||
|
||||
case PT_SC:
|
||||
OK = script == code[5];
|
||||
OK = prop->script == code[5];
|
||||
break;
|
||||
|
||||
/* Should never occur, but keep compilers from grumbling. */
|
||||
@@ -1516,7 +1513,7 @@ for (;;)
|
||||
if (codevalue != OP_EXTUNI_EXTRA + OP_TYPEEXACT)
|
||||
{ ADD_ACTIVE(state_offset + 4, 0); }
|
||||
count = current_state->count; /* Number already matched */
|
||||
if (clen > 0 && _pcre_ucp_findprop(c, &chartype, &script) != ucp_M)
|
||||
if (clen > 0 && UCD_CATEGORY(c) != ucp_M)
|
||||
{
|
||||
const uschar *nptr = ptr + clen;
|
||||
int ncount = 0;
|
||||
@@ -1530,7 +1527,7 @@ for (;;)
|
||||
int nd;
|
||||
int ndlen = 1;
|
||||
GETCHARLEN(nd, nptr, ndlen);
|
||||
if (_pcre_ucp_findprop(nd, &chartype, &script) != ucp_M) break;
|
||||
if (UCD_CATEGORY(nd) != ucp_M) break;
|
||||
ncount++;
|
||||
nptr += ndlen;
|
||||
}
|
||||
@@ -1710,7 +1707,7 @@ for (;;)
|
||||
other case of the character. */
|
||||
|
||||
#ifdef SUPPORT_UCP
|
||||
othercase = _pcre_ucp_othercase(c);
|
||||
othercase = UCD_OTHERCASE(c);
|
||||
#else
|
||||
othercase = NOTACHAR;
|
||||
#endif
|
||||
@@ -1735,7 +1732,7 @@ for (;;)
|
||||
to wait for them to pass before continuing. */
|
||||
|
||||
case OP_EXTUNI:
|
||||
if (clen > 0 && _pcre_ucp_findprop(c, &chartype, &script) != ucp_M)
|
||||
if (clen > 0 && UCD_CATEGORY(c) != ucp_M)
|
||||
{
|
||||
const uschar *nptr = ptr + clen;
|
||||
int ncount = 0;
|
||||
@@ -1743,7 +1740,7 @@ for (;;)
|
||||
{
|
||||
int nclen = 1;
|
||||
GETCHARLEN(c, nptr, nclen);
|
||||
if (_pcre_ucp_findprop(c, &chartype, &script) != ucp_M) break;
|
||||
if (UCD_CATEGORY(c) != ucp_M) break;
|
||||
ncount++;
|
||||
nptr += nclen;
|
||||
}
|
||||
@@ -1911,7 +1908,7 @@ for (;;)
|
||||
if (utf8 && d >= 128)
|
||||
{
|
||||
#ifdef SUPPORT_UCP
|
||||
otherd = _pcre_ucp_othercase(d);
|
||||
otherd = UCD_OTHERCASE(d);
|
||||
#endif /* SUPPORT_UCP */
|
||||
}
|
||||
else
|
||||
@@ -1949,7 +1946,7 @@ for (;;)
|
||||
if (utf8 && d >= 128)
|
||||
{
|
||||
#ifdef SUPPORT_UCP
|
||||
otherd = _pcre_ucp_othercase(d);
|
||||
otherd = UCD_OTHERCASE(d);
|
||||
#endif /* SUPPORT_UCP */
|
||||
}
|
||||
else
|
||||
@@ -1985,7 +1982,7 @@ for (;;)
|
||||
if (utf8 && d >= 128)
|
||||
{
|
||||
#ifdef SUPPORT_UCP
|
||||
otherd = _pcre_ucp_othercase(d);
|
||||
otherd = UCD_OTHERCASE(d);
|
||||
#endif /* SUPPORT_UCP */
|
||||
}
|
||||
else
|
||||
@@ -2017,7 +2014,7 @@ for (;;)
|
||||
if (utf8 && d >= 128)
|
||||
{
|
||||
#ifdef SUPPORT_UCP
|
||||
otherd = _pcre_ucp_othercase(d);
|
||||
otherd = UCD_OTHERCASE(d);
|
||||
#endif /* SUPPORT_UCP */
|
||||
}
|
||||
else
|
||||
@@ -2052,7 +2049,7 @@ for (;;)
|
||||
if (utf8 && d >= 128)
|
||||
{
|
||||
#ifdef SUPPORT_UCP
|
||||
otherd = _pcre_ucp_othercase(d);
|
||||
otherd = UCD_OTHERCASE(d);
|
||||
#endif /* SUPPORT_UCP */
|
||||
}
|
||||
else
|
||||
@@ -2508,7 +2505,7 @@ Returns: > 0 => number of match offset pairs placed in offsets
|
||||
< -1 => some kind of unexpected problem
|
||||
*/
|
||||
|
||||
PCRE_EXP_DEFN int
|
||||
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
|
||||
pcre_dfa_exec(const pcre *argument_re, const pcre_extra *extra_data,
|
||||
const char *subject, int length, int start_offset, int options, int *offsets,
|
||||
int offsetcount, int *workspace, int wscount)
|
||||
@@ -2736,7 +2733,18 @@ for (;;)
|
||||
|
||||
if (firstline)
|
||||
{
|
||||
const uschar *t = current_subject;
|
||||
USPTR t = current_subject;
|
||||
#ifdef SUPPORT_UTF8
|
||||
if (utf8)
|
||||
{
|
||||
while (t < md->end_subject && !IS_NEWLINE(t))
|
||||
{
|
||||
t++;
|
||||
while (t < end_subject && (*t & 0xc0) == 0x80) t++;
|
||||
}
|
||||
}
|
||||
else
|
||||
#endif
|
||||
while (t < md->end_subject && !IS_NEWLINE(t)) t++;
|
||||
end_subject = t;
|
||||
}
|
||||
@@ -2758,7 +2766,20 @@ for (;;)
|
||||
{
|
||||
if (current_subject > md->start_subject + start_offset)
|
||||
{
|
||||
while (current_subject <= end_subject && !WAS_NEWLINE(current_subject))
|
||||
#ifdef SUPPORT_UTF8
|
||||
if (utf8)
|
||||
{
|
||||
while (current_subject < end_subject && !WAS_NEWLINE(current_subject))
|
||||
{
|
||||
current_subject++;
|
||||
while(current_subject < end_subject &&
|
||||
(*current_subject & 0xc0) == 0x80)
|
||||
current_subject++;
|
||||
}
|
||||
}
|
||||
else
|
||||
#endif
|
||||
while (current_subject < end_subject && !WAS_NEWLINE(current_subject))
|
||||
current_subject++;
|
||||
|
||||
/* If we have just passed a CR and the newline option is ANY or
|
||||
|
||||
@@ -42,7 +42,7 @@ POSSIBILITY OF SUCH DAMAGE.
|
||||
pattern matching using an NFA algorithm, trying to mimic Perl as closely as
|
||||
possible. There are also some static supporting functions. */
|
||||
|
||||
#if 1
|
||||
#if 2875
|
||||
#include "_hbconf.h"
|
||||
#endif
|
||||
|
||||
@@ -158,13 +158,39 @@ printf("\n");
|
||||
|
||||
if (length > md->end_subject - eptr) return FALSE;
|
||||
|
||||
/* Separate the caselesss case for speed */
|
||||
/* Separate the caseless case for speed. In UTF-8 mode we can only do this
|
||||
properly if Unicode properties are supported. Otherwise, we can check only
|
||||
ASCII characters. */
|
||||
|
||||
if ((ims & PCRE_CASELESS) != 0)
|
||||
{
|
||||
#ifdef SUPPORT_UTF8
|
||||
#ifdef SUPPORT_UCP
|
||||
if (md->utf8)
|
||||
{
|
||||
USPTR endptr = eptr + length;
|
||||
while (eptr < endptr)
|
||||
{
|
||||
int c, d;
|
||||
GETCHARINC(c, eptr);
|
||||
GETCHARINC(d, p);
|
||||
if (c != d && c != UCD_OTHERCASE(d)) return FALSE;
|
||||
}
|
||||
}
|
||||
else
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/* The same code works when not in UTF-8 mode and in UTF-8 mode when there
|
||||
is no UCP support. */
|
||||
|
||||
while (length-- > 0)
|
||||
if (md->lcc[*p++] != md->lcc[*eptr++]) return FALSE;
|
||||
{ if (md->lcc[*p++] != md->lcc[*eptr++]) return FALSE; }
|
||||
}
|
||||
|
||||
/* In the caseful case, we can just compare the bytes, whether or not we
|
||||
are in UTF-8 mode. */
|
||||
|
||||
else
|
||||
{ while (length-- > 0) if (*p++ != *eptr++) return FALSE; }
|
||||
|
||||
@@ -1653,8 +1679,7 @@ for (;;)
|
||||
if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
|
||||
GETCHARINCTEST(c, eptr);
|
||||
{
|
||||
int chartype, script;
|
||||
int category = _pcre_ucp_findprop(c, &chartype, &script);
|
||||
const ucd_record * prop = GET_UCD(c);
|
||||
|
||||
switch(ecode[1])
|
||||
{
|
||||
@@ -1663,24 +1688,24 @@ for (;;)
|
||||
break;
|
||||
|
||||
case PT_LAMP:
|
||||
if ((chartype == ucp_Lu ||
|
||||
chartype == ucp_Ll ||
|
||||
chartype == ucp_Lt) == (op == OP_NOTPROP))
|
||||
if ((prop->chartype == ucp_Lu ||
|
||||
prop->chartype == ucp_Ll ||
|
||||
prop->chartype == ucp_Lt) == (op == OP_NOTPROP))
|
||||
RRETURN(MATCH_NOMATCH);
|
||||
break;
|
||||
|
||||
case PT_GC:
|
||||
if ((ecode[2] != category) == (op == OP_PROP))
|
||||
if ((ecode[2] != _pcre_ucp_gentype[prop->chartype]) == (op == OP_PROP))
|
||||
RRETURN(MATCH_NOMATCH);
|
||||
break;
|
||||
|
||||
case PT_PC:
|
||||
if ((ecode[2] != chartype) == (op == OP_PROP))
|
||||
if ((ecode[2] != prop->chartype) == (op == OP_PROP))
|
||||
RRETURN(MATCH_NOMATCH);
|
||||
break;
|
||||
|
||||
case PT_SC:
|
||||
if ((ecode[2] != script) == (op == OP_PROP))
|
||||
if ((ecode[2] != prop->script) == (op == OP_PROP))
|
||||
RRETURN(MATCH_NOMATCH);
|
||||
break;
|
||||
|
||||
@@ -1699,8 +1724,7 @@ for (;;)
|
||||
if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
|
||||
GETCHARINCTEST(c, eptr);
|
||||
{
|
||||
int chartype, script;
|
||||
int category = _pcre_ucp_findprop(c, &chartype, &script);
|
||||
int category = UCD_CATEGORY(c);
|
||||
if (category == ucp_M) RRETURN(MATCH_NOMATCH);
|
||||
while (eptr < md->end_subject)
|
||||
{
|
||||
@@ -1709,7 +1733,7 @@ for (;;)
|
||||
{
|
||||
GETCHARLEN(c, eptr, len);
|
||||
}
|
||||
category = _pcre_ucp_findprop(c, &chartype, &script);
|
||||
category = UCD_CATEGORY(c);
|
||||
if (category != ucp_M) break;
|
||||
eptr += len;
|
||||
}
|
||||
@@ -2174,7 +2198,7 @@ for (;;)
|
||||
if (fc != dc)
|
||||
{
|
||||
#ifdef SUPPORT_UCP
|
||||
if (dc != _pcre_ucp_othercase(fc))
|
||||
if (dc != UCD_OTHERCASE(fc))
|
||||
#endif
|
||||
RRETURN(MATCH_NOMATCH);
|
||||
}
|
||||
@@ -2265,7 +2289,7 @@ for (;;)
|
||||
#ifdef SUPPORT_UCP
|
||||
unsigned int othercase;
|
||||
if ((ims & PCRE_CASELESS) != 0 &&
|
||||
(othercase = _pcre_ucp_othercase(fc)) != NOTACHAR)
|
||||
(othercase = UCD_OTHERCASE(fc)) != fc)
|
||||
oclength = _pcre_ord2utf8(othercase, occhars);
|
||||
else oclength = 0;
|
||||
#endif /* SUPPORT_UCP */
|
||||
@@ -2585,10 +2609,11 @@ for (;;)
|
||||
{
|
||||
RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM28);
|
||||
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
|
||||
if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
|
||||
GETCHARINC(d, eptr);
|
||||
if (d < 256) d = md->lcc[d];
|
||||
if (fi >= max || eptr >= md->end_subject || fc == d)
|
||||
RRETURN(MATCH_NOMATCH);
|
||||
if (fc == d) RRETURN(MATCH_NOMATCH);
|
||||
|
||||
}
|
||||
}
|
||||
else
|
||||
@@ -2694,9 +2719,9 @@ for (;;)
|
||||
{
|
||||
RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM32);
|
||||
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
|
||||
if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
|
||||
GETCHARINC(d, eptr);
|
||||
if (fi >= max || eptr >= md->end_subject || fc == d)
|
||||
RRETURN(MATCH_NOMATCH);
|
||||
if (fc == d) RRETURN(MATCH_NOMATCH);
|
||||
}
|
||||
}
|
||||
else
|
||||
@@ -2870,7 +2895,7 @@ for (;;)
|
||||
{
|
||||
if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
|
||||
GETCHARINCTEST(c, eptr);
|
||||
prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
|
||||
prop_chartype = UCD_CHARTYPE(c);
|
||||
if ((prop_chartype == ucp_Lu ||
|
||||
prop_chartype == ucp_Ll ||
|
||||
prop_chartype == ucp_Lt) == prop_fail_result)
|
||||
@@ -2883,7 +2908,7 @@ for (;;)
|
||||
{
|
||||
if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
|
||||
GETCHARINCTEST(c, eptr);
|
||||
prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
|
||||
prop_category = UCD_CATEGORY(c);
|
||||
if ((prop_category == prop_value) == prop_fail_result)
|
||||
RRETURN(MATCH_NOMATCH);
|
||||
}
|
||||
@@ -2894,7 +2919,7 @@ for (;;)
|
||||
{
|
||||
if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
|
||||
GETCHARINCTEST(c, eptr);
|
||||
prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
|
||||
prop_chartype = UCD_CHARTYPE(c);
|
||||
if ((prop_chartype == prop_value) == prop_fail_result)
|
||||
RRETURN(MATCH_NOMATCH);
|
||||
}
|
||||
@@ -2905,7 +2930,7 @@ for (;;)
|
||||
{
|
||||
if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
|
||||
GETCHARINCTEST(c, eptr);
|
||||
prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
|
||||
prop_script = UCD_SCRIPT(c);
|
||||
if ((prop_script == prop_value) == prop_fail_result)
|
||||
RRETURN(MATCH_NOMATCH);
|
||||
}
|
||||
@@ -2924,7 +2949,7 @@ for (;;)
|
||||
for (i = 1; i <= min; i++)
|
||||
{
|
||||
GETCHARINCTEST(c, eptr);
|
||||
prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
|
||||
prop_category = UCD_CATEGORY(c);
|
||||
if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);
|
||||
while (eptr < md->end_subject)
|
||||
{
|
||||
@@ -2933,7 +2958,7 @@ for (;;)
|
||||
{
|
||||
GETCHARLEN(c, eptr, len);
|
||||
}
|
||||
prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
|
||||
prop_category = UCD_CATEGORY(c);
|
||||
if (prop_category != ucp_M) break;
|
||||
eptr += len;
|
||||
}
|
||||
@@ -3349,7 +3374,7 @@ for (;;)
|
||||
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
|
||||
if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
|
||||
GETCHARINC(c, eptr);
|
||||
prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
|
||||
prop_chartype = UCD_CHARTYPE(c);
|
||||
if ((prop_chartype == ucp_Lu ||
|
||||
prop_chartype == ucp_Ll ||
|
||||
prop_chartype == ucp_Lt) == prop_fail_result)
|
||||
@@ -3364,7 +3389,7 @@ for (;;)
|
||||
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
|
||||
if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
|
||||
GETCHARINC(c, eptr);
|
||||
prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
|
||||
prop_category = UCD_CATEGORY(c);
|
||||
if ((prop_category == prop_value) == prop_fail_result)
|
||||
RRETURN(MATCH_NOMATCH);
|
||||
}
|
||||
@@ -3377,7 +3402,7 @@ for (;;)
|
||||
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
|
||||
if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
|
||||
GETCHARINC(c, eptr);
|
||||
prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
|
||||
prop_chartype = UCD_CHARTYPE(c);
|
||||
if ((prop_chartype == prop_value) == prop_fail_result)
|
||||
RRETURN(MATCH_NOMATCH);
|
||||
}
|
||||
@@ -3390,7 +3415,7 @@ for (;;)
|
||||
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
|
||||
if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
|
||||
GETCHARINC(c, eptr);
|
||||
prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
|
||||
prop_script = UCD_SCRIPT(c);
|
||||
if ((prop_script == prop_value) == prop_fail_result)
|
||||
RRETURN(MATCH_NOMATCH);
|
||||
}
|
||||
@@ -3412,7 +3437,7 @@ for (;;)
|
||||
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
|
||||
if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
|
||||
GETCHARINCTEST(c, eptr);
|
||||
prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
|
||||
prop_category = UCD_CATEGORY(c);
|
||||
if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);
|
||||
while (eptr < md->end_subject)
|
||||
{
|
||||
@@ -3421,7 +3446,7 @@ for (;;)
|
||||
{
|
||||
GETCHARLEN(c, eptr, len);
|
||||
}
|
||||
prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
|
||||
prop_category = UCD_CATEGORY(c);
|
||||
if (prop_category != ucp_M) break;
|
||||
eptr += len;
|
||||
}
|
||||
@@ -3739,7 +3764,7 @@ for (;;)
|
||||
int len = 1;
|
||||
if (eptr >= md->end_subject) break;
|
||||
GETCHARLEN(c, eptr, len);
|
||||
prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
|
||||
prop_chartype = UCD_CHARTYPE(c);
|
||||
if ((prop_chartype == ucp_Lu ||
|
||||
prop_chartype == ucp_Ll ||
|
||||
prop_chartype == ucp_Lt) == prop_fail_result)
|
||||
@@ -3754,7 +3779,7 @@ for (;;)
|
||||
int len = 1;
|
||||
if (eptr >= md->end_subject) break;
|
||||
GETCHARLEN(c, eptr, len);
|
||||
prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
|
||||
prop_category = UCD_CATEGORY(c);
|
||||
if ((prop_category == prop_value) == prop_fail_result)
|
||||
break;
|
||||
eptr+= len;
|
||||
@@ -3767,7 +3792,7 @@ for (;;)
|
||||
int len = 1;
|
||||
if (eptr >= md->end_subject) break;
|
||||
GETCHARLEN(c, eptr, len);
|
||||
prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
|
||||
prop_chartype = UCD_CHARTYPE(c);
|
||||
if ((prop_chartype == prop_value) == prop_fail_result)
|
||||
break;
|
||||
eptr+= len;
|
||||
@@ -3780,7 +3805,7 @@ for (;;)
|
||||
int len = 1;
|
||||
if (eptr >= md->end_subject) break;
|
||||
GETCHARLEN(c, eptr, len);
|
||||
prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
|
||||
prop_script = UCD_SCRIPT(c);
|
||||
if ((prop_script == prop_value) == prop_fail_result)
|
||||
break;
|
||||
eptr+= len;
|
||||
@@ -3809,7 +3834,7 @@ for (;;)
|
||||
{
|
||||
if (eptr >= md->end_subject) break;
|
||||
GETCHARINCTEST(c, eptr);
|
||||
prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
|
||||
prop_category = UCD_CATEGORY(c);
|
||||
if (prop_category == ucp_M) break;
|
||||
while (eptr < md->end_subject)
|
||||
{
|
||||
@@ -3818,7 +3843,7 @@ for (;;)
|
||||
{
|
||||
GETCHARLEN(c, eptr, len);
|
||||
}
|
||||
prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
|
||||
prop_category = UCD_CATEGORY(c);
|
||||
if (prop_category != ucp_M) break;
|
||||
eptr += len;
|
||||
}
|
||||
@@ -3840,7 +3865,7 @@ for (;;)
|
||||
BACKCHAR(eptr);
|
||||
GETCHARLEN(c, eptr, len);
|
||||
}
|
||||
prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
|
||||
prop_category = UCD_CATEGORY(c);
|
||||
if (prop_category != ucp_M) break;
|
||||
eptr--;
|
||||
}
|
||||
@@ -4360,7 +4385,7 @@ Returns: > 0 => success; value is the number of elements filled in
|
||||
< -1 => some kind of unexpected problem
|
||||
*/
|
||||
|
||||
PCRE_EXP_DEFN int
|
||||
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
|
||||
pcre_exec(const pcre *argument_re, const pcre_extra *extra_data,
|
||||
PCRE_SPTR subject, int length, int start_offset, int options, int *offsets,
|
||||
int offsetcount)
|
||||
@@ -4672,31 +4697,53 @@ for(;;)
|
||||
if (firstline)
|
||||
{
|
||||
USPTR t = start_match;
|
||||
#ifdef SUPPORT_UTF8
|
||||
if (utf8)
|
||||
{
|
||||
while (t < md->end_subject && !IS_NEWLINE(t))
|
||||
{
|
||||
t++;
|
||||
while (t < end_subject && (*t & 0xc0) == 0x80) t++;
|
||||
}
|
||||
}
|
||||
else
|
||||
#endif
|
||||
while (t < md->end_subject && !IS_NEWLINE(t)) t++;
|
||||
end_subject = t;
|
||||
}
|
||||
|
||||
/* Now test for a unique first byte */
|
||||
/* Now advance to a unique first byte if there is one. */
|
||||
|
||||
if (first_byte >= 0)
|
||||
{
|
||||
if (first_byte_caseless)
|
||||
while (start_match < end_subject &&
|
||||
md->lcc[*start_match] != first_byte)
|
||||
{ NEXTCHAR(start_match); }
|
||||
while (start_match < end_subject && md->lcc[*start_match] != first_byte)
|
||||
start_match++;
|
||||
else
|
||||
while (start_match < end_subject && *start_match != first_byte)
|
||||
{ NEXTCHAR(start_match); }
|
||||
start_match++;
|
||||
}
|
||||
|
||||
/* Or to just after a linebreak for a multiline match if possible */
|
||||
/* Or to just after a linebreak for a multiline match */
|
||||
|
||||
else if (startline)
|
||||
{
|
||||
if (start_match > md->start_subject + start_offset)
|
||||
{
|
||||
while (start_match <= end_subject && !WAS_NEWLINE(start_match))
|
||||
{ NEXTCHAR(start_match); }
|
||||
#ifdef SUPPORT_UTF8
|
||||
if (utf8)
|
||||
{
|
||||
while (start_match < end_subject && !WAS_NEWLINE(start_match))
|
||||
{
|
||||
start_match++;
|
||||
while(start_match < end_subject && (*start_match & 0xc0) == 0x80)
|
||||
start_match++;
|
||||
}
|
||||
}
|
||||
else
|
||||
#endif
|
||||
while (start_match < end_subject && !WAS_NEWLINE(start_match))
|
||||
start_match++;
|
||||
|
||||
/* If we have just passed a CR and the newline option is ANY or ANYCRLF,
|
||||
and we are now at a LF, advance the match position by one more character.
|
||||
@@ -4710,16 +4757,15 @@ for(;;)
|
||||
}
|
||||
}
|
||||
|
||||
/* Or to a non-unique first char after study */
|
||||
/* Or to a non-unique first byte after study */
|
||||
|
||||
else if (start_bits != NULL)
|
||||
{
|
||||
while (start_match < end_subject)
|
||||
{
|
||||
register unsigned int c = *start_match;
|
||||
if ((start_bits[c/8] & (1 << (c&7))) == 0)
|
||||
{ NEXTCHAR(start_match); }
|
||||
else break;
|
||||
if ((start_bits[c/8] & (1 << (c&7))) == 0) start_match++;
|
||||
else break;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -1,179 +0,0 @@
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Copyright (c) 1997-2008 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
|
||||
/* This module contains code for searching the table of Unicode character
|
||||
properties. */
|
||||
|
||||
#if 1
|
||||
#include "_hbconf.h"
|
||||
#endif
|
||||
|
||||
#include "pcreinal.h"
|
||||
|
||||
#include "ucp.h" /* Category definitions */
|
||||
#include "ucpinter.h" /* Internal table details */
|
||||
#include "ucptable.h" /* The table itself */
|
||||
|
||||
|
||||
/* Table to translate from particular type value to the general value. */
|
||||
|
||||
static const int ucp_gentype[] = {
|
||||
ucp_C, ucp_C, ucp_C, ucp_C, ucp_C, /* Cc, Cf, Cn, Co, Cs */
|
||||
ucp_L, ucp_L, ucp_L, ucp_L, ucp_L, /* Ll, Lu, Lm, Lo, Lt */
|
||||
ucp_M, ucp_M, ucp_M, /* Mc, Me, Mn */
|
||||
ucp_N, ucp_N, ucp_N, /* Nd, Nl, No */
|
||||
ucp_P, ucp_P, ucp_P, ucp_P, ucp_P, /* Pc, Pd, Pe, Pf, Pi */
|
||||
ucp_P, ucp_P, /* Ps, Po */
|
||||
ucp_S, ucp_S, ucp_S, ucp_S, /* Sc, Sk, Sm, So */
|
||||
ucp_Z, ucp_Z, ucp_Z /* Zl, Zp, Zs */
|
||||
};
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Search table and return type *
|
||||
*************************************************/
|
||||
|
||||
/* Three values are returned: the category is ucp_C, ucp_L, etc. The detailed
|
||||
character type is ucp_Lu, ucp_Nd, etc. The script is ucp_Latin, etc.
|
||||
|
||||
Arguments:
|
||||
c the character value
|
||||
type_ptr the detailed character type is returned here
|
||||
script_ptr the script is returned here
|
||||
|
||||
Returns: the character type category
|
||||
*/
|
||||
|
||||
int
|
||||
_pcre_ucp_findprop(const unsigned int c, int *type_ptr, int *script_ptr)
|
||||
{
|
||||
int bot = 0;
|
||||
int top = sizeof(ucp_table)/sizeof(cnode);
|
||||
int mid;
|
||||
|
||||
/* The table is searched using a binary chop. You might think that using
|
||||
intermediate variables to hold some of the common expressions would speed
|
||||
things up, but tests with gcc 3.4.4 on Linux showed that, on the contrary, it
|
||||
makes things a lot slower. */
|
||||
|
||||
for (;;)
|
||||
{
|
||||
if (top <= bot)
|
||||
{
|
||||
*type_ptr = ucp_Cn;
|
||||
*script_ptr = ucp_Common;
|
||||
return ucp_C;
|
||||
}
|
||||
mid = (bot + top) >> 1;
|
||||
if (c == (ucp_table[mid].f0 & f0_charmask)) break;
|
||||
if (c < (ucp_table[mid].f0 & f0_charmask)) top = mid;
|
||||
else
|
||||
{
|
||||
if ((ucp_table[mid].f0 & f0_rangeflag) != 0 &&
|
||||
c <= (ucp_table[mid].f0 & f0_charmask) +
|
||||
(ucp_table[mid].f1 & f1_rangemask)) break;
|
||||
bot = mid + 1;
|
||||
}
|
||||
}
|
||||
|
||||
/* Found an entry in the table. Set the script and detailed type values, and
|
||||
return the general type. */
|
||||
|
||||
*script_ptr = (ucp_table[mid].f0 & f0_scriptmask) >> f0_scriptshift;
|
||||
*type_ptr = (ucp_table[mid].f1 & f1_typemask) >> f1_typeshift;
|
||||
|
||||
return ucp_gentype[*type_ptr];
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Search table and return other case *
|
||||
*************************************************/
|
||||
|
||||
/* If the given character is a letter, and there is another case for the
|
||||
letter, return the other case. Otherwise, return -1.
|
||||
|
||||
Arguments:
|
||||
c the character value
|
||||
|
||||
Returns: the other case or NOTACHAR if none
|
||||
*/
|
||||
|
||||
unsigned int
|
||||
_pcre_ucp_othercase(const unsigned int c)
|
||||
{
|
||||
int bot = 0;
|
||||
int top = sizeof(ucp_table)/sizeof(cnode);
|
||||
int mid, offset;
|
||||
|
||||
/* The table is searched using a binary chop. You might think that using
|
||||
intermediate variables to hold some of the common expressions would speed
|
||||
things up, but tests with gcc 3.4.4 on Linux showed that, on the contrary, it
|
||||
makes things a lot slower. */
|
||||
|
||||
for (;;)
|
||||
{
|
||||
if (top <= bot) return (unsigned int) -1;
|
||||
mid = (bot + top) >> 1;
|
||||
if (c == (ucp_table[mid].f0 & f0_charmask)) break;
|
||||
if (c < (ucp_table[mid].f0 & f0_charmask)) top = mid;
|
||||
else
|
||||
{
|
||||
if ((ucp_table[mid].f0 & f0_rangeflag) != 0 &&
|
||||
c <= (ucp_table[mid].f0 & f0_charmask) +
|
||||
(ucp_table[mid].f1 & f1_rangemask)) break;
|
||||
bot = mid + 1;
|
||||
}
|
||||
}
|
||||
|
||||
/* Found an entry in the table. Return NOTACHAR for a range entry. Otherwise
|
||||
return the other case if there is one, else NOTACHAR. */
|
||||
|
||||
if ((ucp_table[mid].f0 & f0_rangeflag) != 0) return NOTACHAR;
|
||||
|
||||
offset = ucp_table[mid].f1 & f1_casemask;
|
||||
if ((offset & f1_caseneg) != 0) offset |= f1_caseneg;
|
||||
return (offset == 0)? NOTACHAR : c + offset;
|
||||
}
|
||||
|
||||
|
||||
/* End of pcre_ucp_searchfuncs.c */
|
||||
@@ -42,7 +42,7 @@ POSSIBILITY OF SUCH DAMAGE.
|
||||
information about a compiled pattern. */
|
||||
|
||||
|
||||
#if 1
|
||||
#if 2875
|
||||
#include "_hbconf.h"
|
||||
#endif
|
||||
|
||||
@@ -65,7 +65,7 @@ Arguments:
|
||||
Returns: 0 if data returned, negative on error
|
||||
*/
|
||||
|
||||
PCRE_EXP_DEFN int
|
||||
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
|
||||
pcre_fullinfo(const pcre *argument_re, const pcre_extra *extra_data, int what,
|
||||
void *where)
|
||||
{
|
||||
|
||||
@@ -43,7 +43,7 @@ from the subject string after a regex match has succeeded. The original idea
|
||||
for these functions came from Scott Wimer. */
|
||||
|
||||
|
||||
#if 1
|
||||
#if 2875
|
||||
#include "_hbconf.h"
|
||||
#endif
|
||||
|
||||
@@ -65,7 +65,7 @@ Returns: the number of the named parentheses, or a negative number
|
||||
(PCRE_ERROR_NOSUBSTRING) if not found
|
||||
*/
|
||||
|
||||
int
|
||||
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
|
||||
pcre_get_stringnumber(const pcre *code, const char *stringname)
|
||||
{
|
||||
int rc;
|
||||
@@ -114,7 +114,7 @@ Returns: the length of each entry, or a negative number
|
||||
(PCRE_ERROR_NOSUBSTRING) if not found
|
||||
*/
|
||||
|
||||
int
|
||||
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
|
||||
pcre_get_stringtable_entries(const pcre *code, const char *stringname,
|
||||
char **firstptr, char **lastptr)
|
||||
{
|
||||
@@ -231,7 +231,7 @@ Returns: if successful:
|
||||
PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
|
||||
*/
|
||||
|
||||
int
|
||||
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
|
||||
pcre_copy_substring(const char *subject, int *ovector, int stringcount,
|
||||
int stringnumber, char *buffer, int size)
|
||||
{
|
||||
@@ -276,7 +276,7 @@ Returns: if successful:
|
||||
PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
|
||||
*/
|
||||
|
||||
int
|
||||
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
|
||||
pcre_copy_named_substring(const pcre *code, const char *subject, int *ovector,
|
||||
int stringcount, const char *stringname, char *buffer, int size)
|
||||
{
|
||||
@@ -308,7 +308,7 @@ Returns: if successful: 0
|
||||
PCRE_ERROR_NOMEMORY (-6) failed to get store
|
||||
*/
|
||||
|
||||
int
|
||||
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
|
||||
pcre_get_substring_list(const char *subject, int *ovector, int stringcount,
|
||||
const char ***listptr)
|
||||
{
|
||||
@@ -353,7 +353,7 @@ Argument: the result of a previous pcre_get_substring_list()
|
||||
Returns: nothing
|
||||
*/
|
||||
|
||||
void
|
||||
PCRE_EXP_DEFN void PCRE_CALL_CONVENTION
|
||||
pcre_free_substring_list(const char **pointer)
|
||||
{
|
||||
(pcre_free)((void *)pointer);
|
||||
@@ -386,7 +386,7 @@ Returns: if successful:
|
||||
PCRE_ERROR_NOSUBSTRING (-7) substring not present
|
||||
*/
|
||||
|
||||
int
|
||||
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
|
||||
pcre_get_substring(const char *subject, int *ovector, int stringcount,
|
||||
int stringnumber, const char **stringptr)
|
||||
{
|
||||
@@ -433,7 +433,7 @@ Returns: if successful:
|
||||
PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
|
||||
*/
|
||||
|
||||
int
|
||||
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
|
||||
pcre_get_named_substring(const pcre *code, const char *subject, int *ovector,
|
||||
int stringcount, const char *stringname, const char **stringptr)
|
||||
{
|
||||
@@ -456,7 +456,7 @@ Argument: the result of a previous pcre_get_substring()
|
||||
Returns: nothing
|
||||
*/
|
||||
|
||||
void
|
||||
PCRE_EXP_DEFN void PCRE_CALL_CONVENTION
|
||||
pcre_free_substring(const char *pointer)
|
||||
{
|
||||
(pcre_free)((void *)pointer);
|
||||
|
||||
@@ -132,6 +132,20 @@ PCRE_EXP_DATA_DEFN only if they are not already set. */
|
||||
# endif
|
||||
#endif
|
||||
|
||||
/* When compiling with the MSVC compiler, it is sometimes necessary to include
|
||||
a "calling convention" before exported function names. (This is secondhand
|
||||
information; I know nothing about MSVC myself). For example, something like
|
||||
|
||||
void __cdecl function(....)
|
||||
|
||||
might be needed. In order so make this easy, all the exported functions have
|
||||
PCRE_CALL_CONVENTION just before their names. It is rarely needed; if not
|
||||
set, we ensure here that it has no effect. */
|
||||
|
||||
#ifndef PCRE_CALL_CONVENTION
|
||||
#define PCRE_CALL_CONVENTION
|
||||
#endif
|
||||
|
||||
/* We need to have types that specify unsigned 16-bit and 32-bit integers. We
|
||||
cannot determine these outside the compilation (e.g. by running a program as
|
||||
part of "configure") because PCRE is often cross-compiled for use on other
|
||||
@@ -140,16 +154,20 @@ preprocessor time in standard C environments. */
|
||||
|
||||
#if USHRT_MAX == 65535
|
||||
typedef unsigned short pcre_uint16;
|
||||
typedef short pcre_int16;
|
||||
#elif UINT_MAX == 65535
|
||||
typedef unsigned int pcre_uint16;
|
||||
typedef int pcre_int16;
|
||||
#else
|
||||
#error Cannot determine a type for 16-bit unsigned integers
|
||||
#endif
|
||||
|
||||
#if UINT_MAX == 4294967295
|
||||
typedef unsigned int pcre_uint32;
|
||||
typedef int pcre_int32;
|
||||
#elif ULONG_MAX == 4294967295
|
||||
typedef unsigned long int pcre_uint32;
|
||||
typedef long int pcre_int32;
|
||||
#else
|
||||
#error Cannot determine a type for 32-bit unsigned integers
|
||||
#endif
|
||||
@@ -363,7 +381,6 @@ never be called in byte mode. To make sure it can never even appear when UTF-8
|
||||
support is omitted, we don't even define it. */
|
||||
|
||||
#ifndef SUPPORT_UTF8
|
||||
#define NEXTCHAR(p) p++;
|
||||
#define GETCHAR(c, eptr) c = *eptr;
|
||||
#define GETCHARTEST(c, eptr) c = *eptr;
|
||||
#define GETCHARINC(c, eptr) c = *eptr++;
|
||||
@@ -373,13 +390,6 @@ support is omitted, we don't even define it. */
|
||||
|
||||
#else /* SUPPORT_UTF8 */
|
||||
|
||||
/* Advance a character pointer one byte in non-UTF-8 mode and by one character
|
||||
in UTF-8 mode. */
|
||||
|
||||
#define NEXTCHAR(p) \
|
||||
p++; \
|
||||
if (utf8) { while((*p & 0xc0) == 0x80) p++; }
|
||||
|
||||
/* Get the next UTF-8 character, not advancing the pointer. This is called when
|
||||
we know we are in UTF-8 mode. */
|
||||
|
||||
@@ -549,7 +559,8 @@ variable-length repeat, or a anything other than literal characters. */
|
||||
#define REQ_CASELESS 0x0100 /* indicates caselessness */
|
||||
#define REQ_VARY 0x0200 /* reqbyte followed non-literal item */
|
||||
|
||||
/* Miscellaneous definitions */
|
||||
/* Miscellaneous definitions. The #ifndef is to pacify compiler warnings in
|
||||
environments where these macros are defined elsewhere. */
|
||||
|
||||
#ifndef FALSE
|
||||
typedef int BOOL;
|
||||
@@ -1128,13 +1139,38 @@ extern BOOL _pcre_is_newline(const uschar *, int, const uschar *,
|
||||
extern int _pcre_ord2utf8(int, uschar *);
|
||||
extern real_pcre *_pcre_try_flipped(const real_pcre *, real_pcre *,
|
||||
const pcre_study_data *, pcre_study_data *);
|
||||
extern int _pcre_ucp_findprop(const unsigned int, int *, int *);
|
||||
extern unsigned int _pcre_ucp_othercase(const unsigned int);
|
||||
extern int _pcre_valid_utf8(const uschar *, int);
|
||||
extern BOOL _pcre_was_newline(const uschar *, int, const uschar *,
|
||||
int *, BOOL);
|
||||
extern BOOL _pcre_xclass(int, const uschar *);
|
||||
|
||||
|
||||
/* Unicode character database (UCD) */
|
||||
|
||||
typedef struct {
|
||||
uschar script;
|
||||
uschar chartype;
|
||||
pcre_int32 other_case;
|
||||
} ucd_record;
|
||||
|
||||
extern const ucd_record _pcre_ucd_records[];
|
||||
extern const uschar _pcre_ucd_stage1[];
|
||||
extern const pcre_uint16 _pcre_ucd_stage2[];
|
||||
extern const int _pcre_ucp_gentype[];
|
||||
|
||||
|
||||
/* UCD access macros */
|
||||
|
||||
#define UCD_BLOCK_SIZE 128
|
||||
#define GET_UCD(ch) (_pcre_ucd_records + \
|
||||
_pcre_ucd_stage2[_pcre_ucd_stage1[(ch) / UCD_BLOCK_SIZE] * \
|
||||
UCD_BLOCK_SIZE + ch % UCD_BLOCK_SIZE])
|
||||
|
||||
#define UCD_CHARTYPE(ch) GET_UCD(ch)->chartype
|
||||
#define UCD_SCRIPT(ch) GET_UCD(ch)->script
|
||||
#define UCD_CATEGORY(ch) _pcre_ucp_gentype[UCD_CHARTYPE(ch)]
|
||||
#define UCD_OTHERCASE(ch) (ch + GET_UCD(ch)->other_case)
|
||||
|
||||
#endif
|
||||
|
||||
/* End of pcre_internal.h */
|
||||
|
||||
@@ -43,7 +43,7 @@ information about a compiled pattern. However, use of this function is now
|
||||
deprecated, as it has been superseded by pcre_fullinfo(). */
|
||||
|
||||
|
||||
#if 1
|
||||
#if 2875
|
||||
#include "_hbconf.h"
|
||||
#endif
|
||||
|
||||
@@ -72,7 +72,7 @@ Returns: number of capturing subpatterns
|
||||
or negative values on error
|
||||
*/
|
||||
|
||||
PCRE_EXP_DEFN int
|
||||
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
|
||||
pcre_info(const pcre *argument_re, int *optptr, int *first_byte)
|
||||
{
|
||||
real_pcre internal_re;
|
||||
|
||||
@@ -45,7 +45,7 @@ compilation of dftables.c, in which case the macro DFTABLES is defined. */
|
||||
|
||||
|
||||
#ifndef DFTABLES
|
||||
# if 1
|
||||
# if 2875
|
||||
# include "_hbconf.h"
|
||||
# endif
|
||||
# include "pcreinal.h"
|
||||
|
||||
@@ -47,7 +47,7 @@ and NLTYPE_ANY. The full list of Unicode newline characters is taken from
|
||||
http://unicode.org/unicode/reports/tr18/. */
|
||||
|
||||
|
||||
#if 1
|
||||
#if 2875
|
||||
#include "_hbconf.h"
|
||||
#endif
|
||||
|
||||
|
||||
@@ -41,7 +41,7 @@ POSSIBILITY OF SUCH DAMAGE.
|
||||
/* This file contains a private PCRE function that converts an ordinal
|
||||
character value into a UTF8 string. */
|
||||
|
||||
#if 1
|
||||
#if 2875
|
||||
#include "_hbconf.h"
|
||||
#endif
|
||||
|
||||
@@ -78,11 +78,10 @@ for (j = i; j > 0; j--)
|
||||
*buffer = _pcre_utf8_table2[i] | cvalue;
|
||||
return i + 1;
|
||||
#else
|
||||
/* pacify warnings */
|
||||
(void)(cvalue);
|
||||
(void)(buffer);
|
||||
return 0; /* Keep compiler happy; this function won't ever be */
|
||||
#endif /* called when SUPPORT_UTF8 is not defined. */
|
||||
(void)(cvalue); /* Keep compiler happy; this function won't ever be */
|
||||
(void)(buffer); /* called when SUPPORT_UTF8 is not defined. */
|
||||
return 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
/* End of pcre_ord2utf8.c */
|
||||
|
||||
@@ -44,7 +44,7 @@ pattern data block. This might be helpful in applications where the block is
|
||||
shared by different users. */
|
||||
|
||||
|
||||
#if 1
|
||||
#if 2875
|
||||
#include "_hbconf.h"
|
||||
#endif
|
||||
|
||||
@@ -68,7 +68,7 @@ Returns: the (possibly updated) count value (a non-negative number), or
|
||||
a negative error number
|
||||
*/
|
||||
|
||||
PCRE_EXP_DEFN int
|
||||
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
|
||||
pcre_refcount(pcre *argument_re, int adjust)
|
||||
{
|
||||
real_pcre *re = (real_pcre *)argument_re;
|
||||
|
||||
@@ -42,7 +42,7 @@ POSSIBILITY OF SUCH DAMAGE.
|
||||
supporting functions. */
|
||||
|
||||
|
||||
#if 1
|
||||
#if 2875
|
||||
#include "_hbconf.h"
|
||||
#endif
|
||||
|
||||
@@ -220,6 +220,7 @@ do
|
||||
/* SKIPZERO skips the bracket. */
|
||||
|
||||
case OP_SKIPZERO:
|
||||
tcode++;
|
||||
do tcode += GET(tcode,1); while (*tcode == OP_ALT);
|
||||
tcode += 1 + LINK_SIZE;
|
||||
break;
|
||||
@@ -503,7 +504,7 @@ Returns: pointer to a pcre_extra block, with study_data filled in and the
|
||||
NULL on error or if no optimization possible
|
||||
*/
|
||||
|
||||
PCRE_EXP_DEFN pcre_extra *
|
||||
PCRE_EXP_DEFN pcre_extra * PCRE_CALL_CONVENTION
|
||||
pcre_study(const pcre *external_re, int options, const char **errorptr)
|
||||
{
|
||||
uschar start_bits[32];
|
||||
|
||||
@@ -44,7 +44,7 @@ uses macros to change their names from _pcre_xxx to xxxx, thereby avoiding name
|
||||
clashes with the library. */
|
||||
|
||||
|
||||
#if 1
|
||||
#if 2875
|
||||
#include "_hbconf.h"
|
||||
#endif
|
||||
|
||||
@@ -87,6 +87,19 @@ const uschar _pcre_utf8_table4[] = {
|
||||
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
|
||||
3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
|
||||
|
||||
/* Table to translate from particular type value to the general value. */
|
||||
|
||||
const int _pcre_ucp_gentype[] = {
|
||||
ucp_C, ucp_C, ucp_C, ucp_C, ucp_C, /* Cc, Cf, Cn, Co, Cs */
|
||||
ucp_L, ucp_L, ucp_L, ucp_L, ucp_L, /* Ll, Lu, Lm, Lo, Lt */
|
||||
ucp_M, ucp_M, ucp_M, /* Mc, Me, Mn */
|
||||
ucp_N, ucp_N, ucp_N, /* Nd, Nl, No */
|
||||
ucp_P, ucp_P, ucp_P, ucp_P, ucp_P, /* Pc, Pd, Pe, Pf, Pi */
|
||||
ucp_P, ucp_P, /* Ps, Po */
|
||||
ucp_S, ucp_S, ucp_S, ucp_S, /* Sc, Sk, Sm, So */
|
||||
ucp_Z, ucp_Z, ucp_Z /* Zl, Zp, Zs */
|
||||
};
|
||||
|
||||
/* The pcre_utt[] table below translates Unicode property names into type and
|
||||
code values. It is searched by binary chop, so must be in collating sequence of
|
||||
name. Originally, the table contained pointers to the name strings in the first
|
||||
@@ -94,7 +107,10 @@ field of each entry. However, that leads to a large number of relocations when
|
||||
a shared library is dynamically loaded. A significant reduction is made by
|
||||
putting all the names into a single, large string and then using offsets in the
|
||||
table itself. Maintenance is more error-prone, but frequent changes to this
|
||||
data is unlikely. */
|
||||
data are unlikely.
|
||||
|
||||
July 2008: There is now a script called maint/GenerateUtt.py which can be used
|
||||
to generate this data instead of maintaining it entirely by hand. */
|
||||
|
||||
const char _pcre_utt_names[] =
|
||||
"Any\0"
|
||||
@@ -108,8 +124,10 @@ const char _pcre_utt_names[] =
|
||||
"Buhid\0"
|
||||
"C\0"
|
||||
"Canadian_Aboriginal\0"
|
||||
"Carian\0"
|
||||
"Cc\0"
|
||||
"Cf\0"
|
||||
"Cham\0"
|
||||
"Cherokee\0"
|
||||
"Cn\0"
|
||||
"Co\0"
|
||||
@@ -136,12 +154,14 @@ const char _pcre_utt_names[] =
|
||||
"Inherited\0"
|
||||
"Kannada\0"
|
||||
"Katakana\0"
|
||||
"Kayah_Li\0"
|
||||
"Kharoshthi\0"
|
||||
"Khmer\0"
|
||||
"L\0"
|
||||
"L&\0"
|
||||
"Lao\0"
|
||||
"Latin\0"
|
||||
"Lepcha\0"
|
||||
"Limbu\0"
|
||||
"Linear_B\0"
|
||||
"Ll\0"
|
||||
@@ -149,6 +169,8 @@ const char _pcre_utt_names[] =
|
||||
"Lo\0"
|
||||
"Lt\0"
|
||||
"Lu\0"
|
||||
"Lycian\0"
|
||||
"Lydian\0"
|
||||
"M\0"
|
||||
"Malayalam\0"
|
||||
"Mc\0"
|
||||
@@ -163,6 +185,7 @@ const char _pcre_utt_names[] =
|
||||
"Nl\0"
|
||||
"No\0"
|
||||
"Ogham\0"
|
||||
"Ol_Chiki\0"
|
||||
"Old_Italic\0"
|
||||
"Old_Persian\0"
|
||||
"Oriya\0"
|
||||
@@ -177,14 +200,17 @@ const char _pcre_utt_names[] =
|
||||
"Pi\0"
|
||||
"Po\0"
|
||||
"Ps\0"
|
||||
"Rejang\0"
|
||||
"Runic\0"
|
||||
"S\0"
|
||||
"Saurashtra\0"
|
||||
"Sc\0"
|
||||
"Shavian\0"
|
||||
"Sinhala\0"
|
||||
"Sk\0"
|
||||
"Sm\0"
|
||||
"So\0"
|
||||
"Sundanese\0"
|
||||
"Syloti_Nagri\0"
|
||||
"Syriac\0"
|
||||
"Tagalog\0"
|
||||
@@ -197,6 +223,7 @@ const char _pcre_utt_names[] =
|
||||
"Tibetan\0"
|
||||
"Tifinagh\0"
|
||||
"Ugaritic\0"
|
||||
"Vai\0"
|
||||
"Yi\0"
|
||||
"Z\0"
|
||||
"Zl\0"
|
||||
@@ -204,111 +231,122 @@ const char _pcre_utt_names[] =
|
||||
"Zs\0";
|
||||
|
||||
const ucp_type_table _pcre_utt[] = {
|
||||
{ 0, PT_ANY, 0 },
|
||||
{ 4, PT_SC, ucp_Arabic },
|
||||
{ 11, PT_SC, ucp_Armenian },
|
||||
{ 20, PT_SC, ucp_Balinese },
|
||||
{ 29, PT_SC, ucp_Bengali },
|
||||
{ 37, PT_SC, ucp_Bopomofo },
|
||||
{ 46, PT_SC, ucp_Braille },
|
||||
{ 54, PT_SC, ucp_Buginese },
|
||||
{ 63, PT_SC, ucp_Buhid },
|
||||
{ 69, PT_GC, ucp_C },
|
||||
{ 71, PT_SC, ucp_Canadian_Aboriginal },
|
||||
{ 91, PT_PC, ucp_Cc },
|
||||
{ 94, PT_PC, ucp_Cf },
|
||||
{ 97, PT_SC, ucp_Cherokee },
|
||||
{ 106, PT_PC, ucp_Cn },
|
||||
{ 109, PT_PC, ucp_Co },
|
||||
{ 112, PT_SC, ucp_Common },
|
||||
{ 119, PT_SC, ucp_Coptic },
|
||||
{ 126, PT_PC, ucp_Cs },
|
||||
{ 129, PT_SC, ucp_Cuneiform },
|
||||
{ 139, PT_SC, ucp_Cypriot },
|
||||
{ 147, PT_SC, ucp_Cyrillic },
|
||||
{ 156, PT_SC, ucp_Deseret },
|
||||
{ 164, PT_SC, ucp_Devanagari },
|
||||
{ 175, PT_SC, ucp_Ethiopic },
|
||||
{ 184, PT_SC, ucp_Georgian },
|
||||
{ 193, PT_SC, ucp_Glagolitic },
|
||||
{ 204, PT_SC, ucp_Gothic },
|
||||
{ 211, PT_SC, ucp_Greek },
|
||||
{ 217, PT_SC, ucp_Gujarati },
|
||||
{ 226, PT_SC, ucp_Gurmukhi },
|
||||
{ 235, PT_SC, ucp_Han },
|
||||
{ 239, PT_SC, ucp_Hangul },
|
||||
{ 246, PT_SC, ucp_Hanunoo },
|
||||
{ 254, PT_SC, ucp_Hebrew },
|
||||
{ 261, PT_SC, ucp_Hiragana },
|
||||
{ 270, PT_SC, ucp_Inherited },
|
||||
{ 280, PT_SC, ucp_Kannada },
|
||||
{ 288, PT_SC, ucp_Katakana },
|
||||
{ 297, PT_SC, ucp_Kharoshthi },
|
||||
{ 308, PT_SC, ucp_Khmer },
|
||||
{ 314, PT_GC, ucp_L },
|
||||
{ 316, PT_LAMP, 0 },
|
||||
{ 319, PT_SC, ucp_Lao },
|
||||
{ 323, PT_SC, ucp_Latin },
|
||||
{ 329, PT_SC, ucp_Limbu },
|
||||
{ 335, PT_SC, ucp_Linear_B },
|
||||
{ 344, PT_PC, ucp_Ll },
|
||||
{ 347, PT_PC, ucp_Lm },
|
||||
{ 350, PT_PC, ucp_Lo },
|
||||
{ 353, PT_PC, ucp_Lt },
|
||||
{ 356, PT_PC, ucp_Lu },
|
||||
{ 359, PT_GC, ucp_M },
|
||||
{ 361, PT_SC, ucp_Malayalam },
|
||||
{ 371, PT_PC, ucp_Mc },
|
||||
{ 374, PT_PC, ucp_Me },
|
||||
{ 377, PT_PC, ucp_Mn },
|
||||
{ 380, PT_SC, ucp_Mongolian },
|
||||
{ 390, PT_SC, ucp_Myanmar },
|
||||
{ 398, PT_GC, ucp_N },
|
||||
{ 400, PT_PC, ucp_Nd },
|
||||
{ 403, PT_SC, ucp_New_Tai_Lue },
|
||||
{ 415, PT_SC, ucp_Nko },
|
||||
{ 419, PT_PC, ucp_Nl },
|
||||
{ 422, PT_PC, ucp_No },
|
||||
{ 425, PT_SC, ucp_Ogham },
|
||||
{ 431, PT_SC, ucp_Old_Italic },
|
||||
{ 442, PT_SC, ucp_Old_Persian },
|
||||
{ 454, PT_SC, ucp_Oriya },
|
||||
{ 460, PT_SC, ucp_Osmanya },
|
||||
{ 468, PT_GC, ucp_P },
|
||||
{ 470, PT_PC, ucp_Pc },
|
||||
{ 473, PT_PC, ucp_Pd },
|
||||
{ 476, PT_PC, ucp_Pe },
|
||||
{ 479, PT_PC, ucp_Pf },
|
||||
{ 482, PT_SC, ucp_Phags_Pa },
|
||||
{ 491, PT_SC, ucp_Phoenician },
|
||||
{ 502, PT_PC, ucp_Pi },
|
||||
{ 505, PT_PC, ucp_Po },
|
||||
{ 508, PT_PC, ucp_Ps },
|
||||
{ 511, PT_SC, ucp_Runic },
|
||||
{ 517, PT_GC, ucp_S },
|
||||
{ 519, PT_PC, ucp_Sc },
|
||||
{ 522, PT_SC, ucp_Shavian },
|
||||
{ 530, PT_SC, ucp_Sinhala },
|
||||
{ 538, PT_PC, ucp_Sk },
|
||||
{ 541, PT_PC, ucp_Sm },
|
||||
{ 544, PT_PC, ucp_So },
|
||||
{ 547, PT_SC, ucp_Syloti_Nagri },
|
||||
{ 560, PT_SC, ucp_Syriac },
|
||||
{ 567, PT_SC, ucp_Tagalog },
|
||||
{ 575, PT_SC, ucp_Tagbanwa },
|
||||
{ 584, PT_SC, ucp_Tai_Le },
|
||||
{ 591, PT_SC, ucp_Tamil },
|
||||
{ 597, PT_SC, ucp_Telugu },
|
||||
{ 604, PT_SC, ucp_Thaana },
|
||||
{ 611, PT_SC, ucp_Thai },
|
||||
{ 616, PT_SC, ucp_Tibetan },
|
||||
{ 624, PT_SC, ucp_Tifinagh },
|
||||
{ 633, PT_SC, ucp_Ugaritic },
|
||||
{ 642, PT_SC, ucp_Yi },
|
||||
{ 645, PT_GC, ucp_Z },
|
||||
{ 647, PT_PC, ucp_Zl },
|
||||
{ 650, PT_PC, ucp_Zp },
|
||||
{ 653, PT_PC, ucp_Zs }
|
||||
{ 0, PT_ANY, 0 },
|
||||
{ 4, PT_SC, ucp_Arabic },
|
||||
{ 11, PT_SC, ucp_Armenian },
|
||||
{ 20, PT_SC, ucp_Balinese },
|
||||
{ 29, PT_SC, ucp_Bengali },
|
||||
{ 37, PT_SC, ucp_Bopomofo },
|
||||
{ 46, PT_SC, ucp_Braille },
|
||||
{ 54, PT_SC, ucp_Buginese },
|
||||
{ 63, PT_SC, ucp_Buhid },
|
||||
{ 69, PT_GC, ucp_C },
|
||||
{ 71, PT_SC, ucp_Canadian_Aboriginal },
|
||||
{ 91, PT_SC, ucp_Carian },
|
||||
{ 98, PT_PC, ucp_Cc },
|
||||
{ 101, PT_PC, ucp_Cf },
|
||||
{ 104, PT_SC, ucp_Cham },
|
||||
{ 109, PT_SC, ucp_Cherokee },
|
||||
{ 118, PT_PC, ucp_Cn },
|
||||
{ 121, PT_PC, ucp_Co },
|
||||
{ 124, PT_SC, ucp_Common },
|
||||
{ 131, PT_SC, ucp_Coptic },
|
||||
{ 138, PT_PC, ucp_Cs },
|
||||
{ 141, PT_SC, ucp_Cuneiform },
|
||||
{ 151, PT_SC, ucp_Cypriot },
|
||||
{ 159, PT_SC, ucp_Cyrillic },
|
||||
{ 168, PT_SC, ucp_Deseret },
|
||||
{ 176, PT_SC, ucp_Devanagari },
|
||||
{ 187, PT_SC, ucp_Ethiopic },
|
||||
{ 196, PT_SC, ucp_Georgian },
|
||||
{ 205, PT_SC, ucp_Glagolitic },
|
||||
{ 216, PT_SC, ucp_Gothic },
|
||||
{ 223, PT_SC, ucp_Greek },
|
||||
{ 229, PT_SC, ucp_Gujarati },
|
||||
{ 238, PT_SC, ucp_Gurmukhi },
|
||||
{ 247, PT_SC, ucp_Han },
|
||||
{ 251, PT_SC, ucp_Hangul },
|
||||
{ 258, PT_SC, ucp_Hanunoo },
|
||||
{ 266, PT_SC, ucp_Hebrew },
|
||||
{ 273, PT_SC, ucp_Hiragana },
|
||||
{ 282, PT_SC, ucp_Inherited },
|
||||
{ 292, PT_SC, ucp_Kannada },
|
||||
{ 300, PT_SC, ucp_Katakana },
|
||||
{ 309, PT_SC, ucp_Kayah_Li },
|
||||
{ 318, PT_SC, ucp_Kharoshthi },
|
||||
{ 329, PT_SC, ucp_Khmer },
|
||||
{ 335, PT_GC, ucp_L },
|
||||
{ 337, PT_LAMP, 0 },
|
||||
{ 340, PT_SC, ucp_Lao },
|
||||
{ 344, PT_SC, ucp_Latin },
|
||||
{ 350, PT_SC, ucp_Lepcha },
|
||||
{ 357, PT_SC, ucp_Limbu },
|
||||
{ 363, PT_SC, ucp_Linear_B },
|
||||
{ 372, PT_PC, ucp_Ll },
|
||||
{ 375, PT_PC, ucp_Lm },
|
||||
{ 378, PT_PC, ucp_Lo },
|
||||
{ 381, PT_PC, ucp_Lt },
|
||||
{ 384, PT_PC, ucp_Lu },
|
||||
{ 387, PT_SC, ucp_Lycian },
|
||||
{ 394, PT_SC, ucp_Lydian },
|
||||
{ 401, PT_GC, ucp_M },
|
||||
{ 403, PT_SC, ucp_Malayalam },
|
||||
{ 413, PT_PC, ucp_Mc },
|
||||
{ 416, PT_PC, ucp_Me },
|
||||
{ 419, PT_PC, ucp_Mn },
|
||||
{ 422, PT_SC, ucp_Mongolian },
|
||||
{ 432, PT_SC, ucp_Myanmar },
|
||||
{ 440, PT_GC, ucp_N },
|
||||
{ 442, PT_PC, ucp_Nd },
|
||||
{ 445, PT_SC, ucp_New_Tai_Lue },
|
||||
{ 457, PT_SC, ucp_Nko },
|
||||
{ 461, PT_PC, ucp_Nl },
|
||||
{ 464, PT_PC, ucp_No },
|
||||
{ 467, PT_SC, ucp_Ogham },
|
||||
{ 473, PT_SC, ucp_Ol_Chiki },
|
||||
{ 482, PT_SC, ucp_Old_Italic },
|
||||
{ 493, PT_SC, ucp_Old_Persian },
|
||||
{ 505, PT_SC, ucp_Oriya },
|
||||
{ 511, PT_SC, ucp_Osmanya },
|
||||
{ 519, PT_GC, ucp_P },
|
||||
{ 521, PT_PC, ucp_Pc },
|
||||
{ 524, PT_PC, ucp_Pd },
|
||||
{ 527, PT_PC, ucp_Pe },
|
||||
{ 530, PT_PC, ucp_Pf },
|
||||
{ 533, PT_SC, ucp_Phags_Pa },
|
||||
{ 542, PT_SC, ucp_Phoenician },
|
||||
{ 553, PT_PC, ucp_Pi },
|
||||
{ 556, PT_PC, ucp_Po },
|
||||
{ 559, PT_PC, ucp_Ps },
|
||||
{ 562, PT_SC, ucp_Rejang },
|
||||
{ 569, PT_SC, ucp_Runic },
|
||||
{ 575, PT_GC, ucp_S },
|
||||
{ 577, PT_SC, ucp_Saurashtra },
|
||||
{ 588, PT_PC, ucp_Sc },
|
||||
{ 591, PT_SC, ucp_Shavian },
|
||||
{ 599, PT_SC, ucp_Sinhala },
|
||||
{ 607, PT_PC, ucp_Sk },
|
||||
{ 610, PT_PC, ucp_Sm },
|
||||
{ 613, PT_PC, ucp_So },
|
||||
{ 616, PT_SC, ucp_Sundanese },
|
||||
{ 626, PT_SC, ucp_Syloti_Nagri },
|
||||
{ 639, PT_SC, ucp_Syriac },
|
||||
{ 646, PT_SC, ucp_Tagalog },
|
||||
{ 654, PT_SC, ucp_Tagbanwa },
|
||||
{ 663, PT_SC, ucp_Tai_Le },
|
||||
{ 670, PT_SC, ucp_Tamil },
|
||||
{ 676, PT_SC, ucp_Telugu },
|
||||
{ 683, PT_SC, ucp_Thaana },
|
||||
{ 690, PT_SC, ucp_Thai },
|
||||
{ 695, PT_SC, ucp_Tibetan },
|
||||
{ 703, PT_SC, ucp_Tifinagh },
|
||||
{ 712, PT_SC, ucp_Ugaritic },
|
||||
{ 721, PT_SC, ucp_Vai },
|
||||
{ 725, PT_SC, ucp_Yi },
|
||||
{ 728, PT_GC, ucp_Z },
|
||||
{ 730, PT_PC, ucp_Zl },
|
||||
{ 733, PT_PC, ucp_Zp },
|
||||
{ 736, PT_PC, ucp_Zs }
|
||||
};
|
||||
|
||||
const int _pcre_utt_size = sizeof(_pcre_utt)/sizeof(ucp_type_table);
|
||||
|
||||
@@ -43,7 +43,7 @@ see if it was compiled with the opposite endianness. If so, it uses an
|
||||
auxiliary local function to flip the appropriate bytes. */
|
||||
|
||||
|
||||
#if 1
|
||||
#if 2875
|
||||
#include "_hbconf.h"
|
||||
#endif
|
||||
|
||||
|
||||
2610
harbour/source/hbpcre/pcreucd.c
Normal file
2610
harbour/source/hbpcre/pcreucd.c
Normal file
File diff suppressed because it is too large
Load Diff
@@ -42,7 +42,7 @@ POSSIBILITY OF SUCH DAMAGE.
|
||||
string that identifies the PCRE version that is in use. */
|
||||
|
||||
|
||||
#if 1
|
||||
#if 2875
|
||||
#include "_hbconf.h"
|
||||
#endif
|
||||
|
||||
@@ -79,7 +79,7 @@ I could find no way of detecting that a macro is defined as an empty string at
|
||||
pre-processor time. This hack uses a standard trick for avoiding calling
|
||||
the STRING macro with an empty argument when doing the test. */
|
||||
|
||||
PCRE_EXP_DEFN const char *
|
||||
PCRE_EXP_DEFN const char * PCRE_CALL_CONVENTION
|
||||
pcre_version(void)
|
||||
{
|
||||
return (XSTRING(Z PCRE_PRERELEASE)[1] == 0)?
|
||||
|
||||
@@ -42,7 +42,7 @@ POSSIBILITY OF SUCH DAMAGE.
|
||||
strings. */
|
||||
|
||||
|
||||
#if 1
|
||||
#if 2875
|
||||
#include "_hbconf.h"
|
||||
#endif
|
||||
|
||||
@@ -155,8 +155,7 @@ for (p = string; length-- > 0; p++)
|
||||
}
|
||||
}
|
||||
#else
|
||||
/* pacify warnings */
|
||||
(void)(string);
|
||||
(void)(string); /* Keep picky compilers happy */
|
||||
(void)(length);
|
||||
#endif
|
||||
|
||||
|
||||
@@ -43,7 +43,7 @@ class (one that contains characters whose values are > 255). It is used by both
|
||||
pcre_exec() and pcre_def_exec(). */
|
||||
|
||||
|
||||
#if 1
|
||||
#if 2875
|
||||
#include "_hbconf.h"
|
||||
#endif
|
||||
|
||||
@@ -104,8 +104,7 @@ while ((t = *data++) != XCL_END)
|
||||
#ifdef SUPPORT_UCP
|
||||
else /* XCL_PROP & XCL_NOTPROP */
|
||||
{
|
||||
int chartype, script;
|
||||
int category = _pcre_ucp_findprop(c, &chartype, &script);
|
||||
const ucd_record * prop = GET_UCD(c);
|
||||
|
||||
switch(*data)
|
||||
{
|
||||
@@ -114,20 +113,20 @@ while ((t = *data++) != XCL_END)
|
||||
break;
|
||||
|
||||
case PT_LAMP:
|
||||
if ((chartype == ucp_Lu || chartype == ucp_Ll || chartype == ucp_Lt) ==
|
||||
if ((prop->chartype == ucp_Lu || prop->chartype == ucp_Ll || prop->chartype == ucp_Lt) ==
|
||||
(t == XCL_PROP)) return !negated;
|
||||
break;
|
||||
|
||||
case PT_GC:
|
||||
if ((data[1] == category) == (t == XCL_PROP)) return !negated;
|
||||
if ((data[1] == _pcre_ucp_gentype[prop->chartype]) == (t == XCL_PROP)) return !negated;
|
||||
break;
|
||||
|
||||
case PT_PC:
|
||||
if ((data[1] == chartype) == (t == XCL_PROP)) return !negated;
|
||||
if ((data[1] == prop->chartype) == (t == XCL_PROP)) return !negated;
|
||||
break;
|
||||
|
||||
case PT_SC:
|
||||
if ((data[1] == script) == (t == XCL_PROP)) return !negated;
|
||||
if ((data[1] == prop->script) == (t == XCL_PROP)) return !negated;
|
||||
break;
|
||||
|
||||
/* This should never occur, but compilers may mutter if there is no
|
||||
|
||||
@@ -121,11 +121,24 @@ enum {
|
||||
ucp_Tifinagh,
|
||||
ucp_Ugaritic,
|
||||
ucp_Yi,
|
||||
ucp_Balinese, /* New for Unicode 5.0.0 */
|
||||
ucp_Cuneiform, /* New for Unicode 5.0.0 */
|
||||
ucp_Nko, /* New for Unicode 5.0.0 */
|
||||
ucp_Phags_Pa, /* New for Unicode 5.0.0 */
|
||||
ucp_Phoenician /* New for Unicode 5.0.0 */
|
||||
/* New for Unicode 5.0: */
|
||||
ucp_Balinese,
|
||||
ucp_Cuneiform,
|
||||
ucp_Nko,
|
||||
ucp_Phags_Pa,
|
||||
ucp_Phoenician,
|
||||
/* New for Unicode 5.1: */
|
||||
ucp_Carian,
|
||||
ucp_Cham,
|
||||
ucp_Kayah_Li,
|
||||
ucp_Lepcha,
|
||||
ucp_Lycian,
|
||||
ucp_Lydian,
|
||||
ucp_Ol_Chiki,
|
||||
ucp_Rejang,
|
||||
ucp_Saurashtra,
|
||||
ucp_Sundanese,
|
||||
ucp_Vai
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
@@ -1,92 +0,0 @@
|
||||
/*************************************************
|
||||
* Unicode Property Table handler *
|
||||
*************************************************/
|
||||
|
||||
#ifndef _UCPINTERNAL_H
|
||||
#define _UCPINTERNAL_H
|
||||
|
||||
/* Internal header file defining the layout of the bits in each pair of 32-bit
|
||||
words that form a data item in the table. */
|
||||
|
||||
typedef struct cnode {
|
||||
pcre_uint32 f0;
|
||||
pcre_uint32 f1;
|
||||
} cnode;
|
||||
|
||||
/* Things for the f0 field */
|
||||
|
||||
#define f0_scriptmask 0xff000000 /* Mask for script field */
|
||||
#define f0_scriptshift 24 /* Shift for script value */
|
||||
#define f0_rangeflag 0x00800000 /* Flag for a range item */
|
||||
#define f0_charmask 0x001fffff /* Mask for code point value */
|
||||
|
||||
/* Things for the f1 field */
|
||||
|
||||
#define f1_typemask 0xfc000000 /* Mask for char type field */
|
||||
#define f1_typeshift 26 /* Shift for the type field */
|
||||
#define f1_rangemask 0x0000ffff /* Mask for a range offset */
|
||||
#define f1_casemask 0x0000ffff /* Mask for a case offset */
|
||||
#define f1_caseneg 0xffff8000 /* Bits for negation */
|
||||
|
||||
/* The data consists of a vector of structures of type cnode. The two unsigned
|
||||
32-bit integers are used as follows:
|
||||
|
||||
(f0) (1) The most significant byte holds the script number. The numbers are
|
||||
defined by the enum in ucp.h.
|
||||
|
||||
(2) The 0x00800000 bit is set if this entry defines a range of characters.
|
||||
It is not set if this entry defines a single character
|
||||
|
||||
(3) The 0x00600000 bits are spare.
|
||||
|
||||
(4) The 0x001fffff bits contain the code point. No Unicode code point will
|
||||
ever be greater than 0x0010ffff, so this should be OK for ever.
|
||||
|
||||
(f1) (1) The 0xfc000000 bits contain the character type number. The numbers are
|
||||
defined by an enum in ucp.h.
|
||||
|
||||
(2) The 0x03ff0000 bits are spare.
|
||||
|
||||
(3) The 0x0000ffff bits contain EITHER the unsigned offset to the top of
|
||||
range if this entry defines a range, OR the *signed* offset to the
|
||||
character's "other case" partner if this entry defines a single
|
||||
character. There is no partner if the value is zero.
|
||||
|
||||
-------------------------------------------------------------------------------
|
||||
| script (8) |.|.|.| codepoint (21) || type (6) |.|.| spare (8) | offset (16) |
|
||||
-------------------------------------------------------------------------------
|
||||
| | | | |
|
||||
| | |-> spare | |-> spare
|
||||
| | |
|
||||
| |-> spare |-> spare
|
||||
|
|
||||
|-> range flag
|
||||
|
||||
The upper/lower casing information is set only for characters that come in
|
||||
pairs. The non-one-to-one mappings in the Unicode data are ignored.
|
||||
|
||||
When searching the data, proceed as follows:
|
||||
|
||||
(1) Set up for a binary chop search.
|
||||
|
||||
(2) If the top is not greater than the bottom, the character is not in the
|
||||
table. Its type must therefore be "Cn" ("Undefined").
|
||||
|
||||
(3) Find the middle vector element.
|
||||
|
||||
(4) Extract the code point and compare. If equal, we are done.
|
||||
|
||||
(5) If the test character is smaller, set the top to the current point, and
|
||||
goto (2).
|
||||
|
||||
(6) If the current entry defines a range, compute the last character by adding
|
||||
the offset, and see if the test character is within the range. If it is,
|
||||
we are done.
|
||||
|
||||
(7) Otherwise, set the bottom to one element past the current point and goto
|
||||
(2).
|
||||
*/
|
||||
|
||||
#endif /* _UCPINTERNAL_H */
|
||||
|
||||
/* End of ucpinternal.h */
|
||||
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user