2007-05-31 15:10 UTC+0200 Przemyslaw Czerpak (druzus/at/priv.onet.pl)
* harbour/common.mak
* harbour/makefile.bc
* harbour/makefile.vc
* harbour/bin/hb-func.sh
* harbour/config/bsd/gcc.cf
* harbour/config/darwin/gcc.cf
* harbour/config/hpux/gcc.cf
* harbour/config/linux/gcc.cf
* harbour/config/sunos/gcc.cf
* harbour/source/Makefile
+ harbour/source/hbpcre/ChangeLog
+ harbour/source/hbpcre/Makefile
+ harbour/source/hbpcre/chartabs.c
+ harbour/source/hbpcre/config.h
+ harbour/source/hbpcre/dftables.c
+ harbour/source/hbpcre/pcre.h
+ harbour/source/hbpcre/pcrecomp.c
+ harbour/source/hbpcre/pcreconf.c
+ harbour/source/hbpcre/pcredfa.c
+ harbour/source/hbpcre/pcreexec.c
+ harbour/source/hbpcre/pcrefind.c
+ harbour/source/hbpcre/pcrefinf.c
+ harbour/source/hbpcre/pcreget.c
+ harbour/source/hbpcre/pcreglob.c
+ harbour/source/hbpcre/pcreinal.h
+ harbour/source/hbpcre/pcreinfo.c
+ harbour/source/hbpcre/pcremktb.c
+ harbour/source/hbpcre/pcreoutf.c
+ harbour/source/hbpcre/pcreprni.c
+ harbour/source/hbpcre/pcrerefc.c
+ harbour/source/hbpcre/pcrestud.c
+ harbour/source/hbpcre/pcretabs.c
+ harbour/source/hbpcre/pcretryf.c
+ harbour/source/hbpcre/pcrever.c
+ harbour/source/hbpcre/pcrevutf.c
+ harbour/source/hbpcre/pcrexcls.c
+ harbour/source/hbpcre/ucp.h
+ harbour/source/hbpcre/ucpinter.h
+ harbour/source/hbpcre/ucptable.c
+ added HBPCRE library - based on older xHarbour 6.3 version
filenames changed to 8.3 DOS format
* harbour/include/hbregex.h
* harbour/source/rtl/Makefile
* harbour/source/rtl/hbregex.c
+ harbour/source/rtl/hbregexc.c
* divided harbour regular expression functions into two files
Now regular expression low level library is not linked with
application until user will not use or REQUEST for one of HB_REGEX*
functions. It also means that also DBOI_SKIPREGEX will not work when
regex module is not linked.
+ added support for build-in regular expression library
+ added ulLen parameter to hb_regexMatch() to support strings with
embedded 0
* harbour/contrib/bmdbfcdx/bmdbfcdx1.c
* harbour/source/rdd/dbfcdx/dbfcdx1.c
* harbour/source/rdd/dbfntx/dbfntx1.c
* harbour/source/rtl/strmatch.c
* use new hb_regexMatch() format
* harbour/utils/hbrun/Makefile
+ added hbpcre to linked library list
This commit is contained in:
@@ -8,6 +8,71 @@
|
||||
2002-12-01 13:30 UTC+0100 Foo Bar <foo.bar@foobar.org>
|
||||
*/
|
||||
|
||||
2007-05-31 15:10 UTC+0200 Przemyslaw Czerpak (druzus/at/priv.onet.pl)
|
||||
* harbour/common.mak
|
||||
* harbour/makefile.bc
|
||||
* harbour/makefile.vc
|
||||
* harbour/bin/hb-func.sh
|
||||
* harbour/config/bsd/gcc.cf
|
||||
* harbour/config/darwin/gcc.cf
|
||||
* harbour/config/hpux/gcc.cf
|
||||
* harbour/config/linux/gcc.cf
|
||||
* harbour/config/sunos/gcc.cf
|
||||
* harbour/source/Makefile
|
||||
+ harbour/source/hbpcre/ChangeLog
|
||||
+ harbour/source/hbpcre/Makefile
|
||||
+ harbour/source/hbpcre/chartabs.c
|
||||
+ harbour/source/hbpcre/config.h
|
||||
+ harbour/source/hbpcre/dftables.c
|
||||
+ harbour/source/hbpcre/pcre.h
|
||||
+ harbour/source/hbpcre/pcrecomp.c
|
||||
+ harbour/source/hbpcre/pcreconf.c
|
||||
+ harbour/source/hbpcre/pcredfa.c
|
||||
+ harbour/source/hbpcre/pcreexec.c
|
||||
+ harbour/source/hbpcre/pcrefind.c
|
||||
+ harbour/source/hbpcre/pcrefinf.c
|
||||
+ harbour/source/hbpcre/pcreget.c
|
||||
+ harbour/source/hbpcre/pcreglob.c
|
||||
+ harbour/source/hbpcre/pcreinal.h
|
||||
+ harbour/source/hbpcre/pcreinfo.c
|
||||
+ harbour/source/hbpcre/pcremktb.c
|
||||
+ harbour/source/hbpcre/pcreoutf.c
|
||||
+ harbour/source/hbpcre/pcreprni.c
|
||||
+ harbour/source/hbpcre/pcrerefc.c
|
||||
+ harbour/source/hbpcre/pcrestud.c
|
||||
+ harbour/source/hbpcre/pcretabs.c
|
||||
+ harbour/source/hbpcre/pcretryf.c
|
||||
+ harbour/source/hbpcre/pcrever.c
|
||||
+ harbour/source/hbpcre/pcrevutf.c
|
||||
+ harbour/source/hbpcre/pcrexcls.c
|
||||
+ harbour/source/hbpcre/ucp.h
|
||||
+ harbour/source/hbpcre/ucpinter.h
|
||||
+ harbour/source/hbpcre/ucptable.c
|
||||
+ added HBPCRE library - based on older xHarbour 6.3 version
|
||||
filenames changed to 8.3 DOS format
|
||||
|
||||
* harbour/include/hbregex.h
|
||||
* harbour/source/rtl/Makefile
|
||||
* harbour/source/rtl/hbregex.c
|
||||
+ harbour/source/rtl/hbregexc.c
|
||||
* divided harbour regular expression functions into two files
|
||||
Now regular expression low level library is not linked with
|
||||
application until user will not use or REQUEST for one of HB_REGEX*
|
||||
functions. It also means that also DBOI_SKIPREGEX will not work when
|
||||
regex module is not linked.
|
||||
+ added support for build-in regular expression library
|
||||
+ added ulLen parameter to hb_regexMatch() to support strings with
|
||||
embedded 0
|
||||
|
||||
* harbour/contrib/bmdbfcdx/bmdbfcdx1.c
|
||||
* harbour/source/rdd/dbfcdx/dbfcdx1.c
|
||||
* harbour/source/rdd/dbfntx/dbfntx1.c
|
||||
* harbour/source/rtl/strmatch.c
|
||||
* use new hb_regexMatch() format
|
||||
|
||||
* harbour/utils/hbrun/Makefile
|
||||
+ added hbpcre to linked library list
|
||||
|
||||
2007-05-30 01:35 UTC+0200 Przemyslaw Czerpak (druzus/at/priv.onet.pl)
|
||||
* harbour/source/main/harbour.c
|
||||
- removed unnecessary code for HB_PARANOID_MEM_CHECK macro
|
||||
|
||||
@@ -64,7 +64,7 @@ mk_hbgetlibs()
|
||||
{
|
||||
if [ -z "$@" ]
|
||||
then
|
||||
echo -n "vm pp rtl rdd dbffpt dbfcdx dbfntx hsx hbsix usrrdd ${HB_DB_DRVEXT} macro common lang codepage gtcrs gtsln gtxvt gtxwc gtalleg gtcgi gtstd gtpca gtwin gtwvt gtdos gtos2 debug profiler compiler"
|
||||
echo -n "vm pp rtl rdd dbffpt dbfcdx dbfntx hsx hbsix usrrdd ${HB_DB_DRVEXT} macro common lang codepage gtcrs gtsln gtxvt gtxwc gtalleg gtcgi gtstd gtpca gtwin gtwvt gtdos gtos2 debug profiler compiler hbpcre"
|
||||
else
|
||||
echo -n "$@"
|
||||
fi
|
||||
@@ -119,7 +119,10 @@ mk_hbtools()
|
||||
HB_CRS_LIB=""
|
||||
HB_SLN_LIB=""
|
||||
if [ "${C_USR//-DHB_PCRE_REGEX/}" != "${C_USR}" ]; then
|
||||
HB_SYS_LIBS="-lpcreposix -lpcre ${HB_SYS_LIBS}"
|
||||
HB_SYS_LIBS="-lpcre ${HB_SYS_LIBS}"
|
||||
hb_libs="${hb_libs//hbpcre/}"
|
||||
elif [ "${C_USR//-DHB_POSIX_REGEX/}" = "${C_USR}" ]; then
|
||||
hb_libs="${hb_libs//hbpcre/}"
|
||||
fi
|
||||
if [ "${HB_COMPILER}" = "mingw32" ]; then
|
||||
HB_SYS_LIBS="${HB_SYS_LIBS} -luser32 -lwinspool -lgdi32 -lcomctl32 -lcomdlg32 -lole32 -loleaut32 -luuid -lwsock32 -lws2_32"
|
||||
|
||||
@@ -388,6 +388,7 @@ RTL_LIB_OBJS = \
|
||||
$(OBJ_DIR)\hbinet.obj \
|
||||
$(OBJ_DIR)\hbrandom.obj \
|
||||
$(OBJ_DIR)\hbregex.obj \
|
||||
$(OBJ_DIR)\hbregexc.obj \
|
||||
$(OBJ_DIR)\hbtoken.obj \
|
||||
$(OBJ_DIR)\idle.obj \
|
||||
$(OBJ_DIR)\inkey.obj \
|
||||
@@ -603,6 +604,29 @@ LANG_LIB_OBJS = \
|
||||
$(OBJ_DIR)\msgzhb5.obj \
|
||||
$(OBJ_DIR)\msgzhgb.obj \
|
||||
|
||||
#**********************************************************
|
||||
PCRE_LIB_OBJS = \
|
||||
$(OBJ_DIR)\chartabs.obj \
|
||||
$(OBJ_DIR)\pcrecomp.obj \
|
||||
$(OBJ_DIR)\pcreconf.obj \
|
||||
$(OBJ_DIR)\pcredfa.obj \
|
||||
$(OBJ_DIR)\pcreexec.obj \
|
||||
$(OBJ_DIR)\pcrefinf.obj \
|
||||
$(OBJ_DIR)\pcreget.obj \
|
||||
$(OBJ_DIR)\pcreglob.obj \
|
||||
$(OBJ_DIR)\pcreinfo.obj \
|
||||
$(OBJ_DIR)\pcremktb.obj \
|
||||
$(OBJ_DIR)\pcreoutf.obj \
|
||||
$(OBJ_DIR)\pcreprni.obj \
|
||||
$(OBJ_DIR)\pcrerefc.obj \
|
||||
$(OBJ_DIR)\pcrestud.obj \
|
||||
$(OBJ_DIR)\pcretabs.obj \
|
||||
$(OBJ_DIR)\pcretryf.obj \
|
||||
$(OBJ_DIR)\pcrefind.obj \
|
||||
$(OBJ_DIR)\pcrevutf.obj \
|
||||
$(OBJ_DIR)\pcrever.obj \
|
||||
$(OBJ_DIR)\pcrexcls.obj \
|
||||
|
||||
#**********************************************************
|
||||
|
||||
CODEPAGE_LIB_OBJS = \
|
||||
@@ -899,6 +923,7 @@ TMP_DLL_OBJS = \
|
||||
$(DEBUG_LIB_OBJS) \
|
||||
$(LANG_LIB_OBJS) \
|
||||
$(CODEPAGE_LIB_OBJS) \
|
||||
$(PCRE_LIB_OBJS) \
|
||||
$(RDD_LIB_OBJS) \
|
||||
$(DBFNTX_LIB_OBJS) \
|
||||
$(DBFCDX_LIB_OBJS) \
|
||||
|
||||
@@ -82,7 +82,7 @@ endif
|
||||
endif
|
||||
|
||||
ifneq ($(findstring -DHB_PCRE_REGEX, $(C_USR)),)
|
||||
LINKLIBS += -lpcreposix -lpcre
|
||||
LINKLIBS += -lpcre
|
||||
endif
|
||||
|
||||
LINKLIBS += -lm -Wl,--end-group
|
||||
|
||||
@@ -106,7 +106,7 @@ endif
|
||||
endif
|
||||
|
||||
ifneq ($(findstring -DHB_PCRE_REGEX, $(C_USR)),)
|
||||
LINKLIBS += -lpcreposix -lpcre
|
||||
LINKLIBS += -lpcre
|
||||
endif
|
||||
|
||||
LINKLIBS += -lm
|
||||
|
||||
@@ -89,7 +89,7 @@ endif
|
||||
endif
|
||||
|
||||
ifneq ($(findstring -DHB_PCRE_REGEX, $(C_USR)),)
|
||||
LINKLIBS += -lpcreposix -lpcre
|
||||
LINKLIBS += -lpcre
|
||||
endif
|
||||
|
||||
LINKLIBS += -lm -lrt
|
||||
|
||||
@@ -102,7 +102,7 @@ endif
|
||||
endif
|
||||
|
||||
ifneq ($(findstring -DHB_PCRE_REGEX, $(C_USR)),)
|
||||
LINKLIBS += -lpcreposix -lpcre
|
||||
LINKLIBS += -lpcre
|
||||
endif
|
||||
|
||||
LINKLIBS += -lm -ldl -Wl,--end-group
|
||||
|
||||
@@ -89,7 +89,7 @@ endif
|
||||
endif
|
||||
|
||||
ifneq ($(findstring -DHB_PCRE_REGEX, $(C_USR)),)
|
||||
LINKLIBS += -lpcreposix -lpcre
|
||||
LINKLIBS += -lpcre
|
||||
endif
|
||||
|
||||
LINKLIBS += -lm -lrt
|
||||
|
||||
@@ -5538,7 +5538,7 @@ static BOOL hb_cdxRegexMatch( CDXAREAP pArea, PHB_REGEX pRegEx, LPCDXKEY pKey )
|
||||
#else
|
||||
HB_SYMBOL_UNUSED( pArea );
|
||||
#endif
|
||||
return hb_regexMatch( pRegEx, szKey, FALSE );
|
||||
return hb_regexMatch( pRegEx, szKey, pKey->len, FALSE );
|
||||
}
|
||||
|
||||
/*
|
||||
|
||||
@@ -57,47 +57,64 @@
|
||||
|
||||
#if defined( _HB_REGEX_INTERNAL_ )
|
||||
|
||||
#if defined( __BORLANDC__ )
|
||||
# if __BORLANDC__ >= 0x550 && !defined( HB_PCRE_REGEX_BCC )
|
||||
# define HB_PCRE_REGEX_BCC
|
||||
# endif
|
||||
#elif ( defined( OS_UNIX_COMPATIBLE ) && !defined( __WATCOMC__ ) ) || \
|
||||
defined( __DJGPP__ )
|
||||
# if !defined( HB_POSIX_REGEX ) && !defined( HB_PCRE_REGEX )
|
||||
# define HB_POSIX_REGEX
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#if defined( HB_PCRE_REGEX_BCC )
|
||||
# include <pcre.h>
|
||||
# include <pcreposi.h>
|
||||
# if !defined( HB_PCRE_REGEX )
|
||||
# define HB_PCRE_REGEX
|
||||
# undef HB_PCRE_REGEX
|
||||
# if !defined( HB_POSIX_REGEX )
|
||||
# define HB_POSIX_REGEX
|
||||
# endif
|
||||
#elif defined( HB_PCRE_REGEX )
|
||||
# include <pcre.h>
|
||||
# include <pcreposix.h>
|
||||
# undef HB_POSIX_REGEX
|
||||
#elif defined( HB_POSIX_REGEX )
|
||||
# include <sys/types.h>
|
||||
# include <regex.h>
|
||||
#else
|
||||
# undef _HB_REGEX_INTERNAL_
|
||||
# define HB_PCRE_REGEX
|
||||
# if defined(__XCC__) || defined(__LCC__)
|
||||
# include "source\hbpcre\pcre.h"
|
||||
# else
|
||||
# include "../source/hbpcre/pcre.h"
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#endif /* _HB_REGEX_INTERNAL_ */
|
||||
|
||||
#if defined( _HB_REGEX_INTERNAL_ )
|
||||
|
||||
typedef struct
|
||||
{
|
||||
regex_t reg;
|
||||
regmatch_t aMatches[1];
|
||||
BOOL fFree;
|
||||
int iCFlags;
|
||||
int iFlags;
|
||||
int iEFlags;
|
||||
#if defined( HB_PCRE_REGEX )
|
||||
pcre * re_pcre;
|
||||
#elif defined( HB_POSIX_REGEX )
|
||||
regex_t reg;
|
||||
#endif
|
||||
} HB_REGEX;
|
||||
typedef HB_REGEX * PHB_REGEX;
|
||||
|
||||
#if defined( HB_PCRE_REGEX )
|
||||
#define HB_REGMATCH int
|
||||
#define HB_REGMATCH_SIZE( n ) ( ( n + 1 ) * 3 )
|
||||
#define HB_REGMATCH_SO( p, n ) ( p )[ ( n ) * 2 ]
|
||||
#define HB_REGMATCH_EO( p, n ) ( p )[ ( n ) * 2 + 1 ]
|
||||
#elif defined( HB_POSIX_REGEX )
|
||||
#define HB_REGMATCH regmatch_t
|
||||
#define HB_REGMATCH_SIZE( n ) ( n )
|
||||
#define HB_REGMATCH_SO( p, n ) ( p )[ n ].rm_so
|
||||
#define HB_REGMATCH_EO( p, n ) ( p )[ n ].rm_eo
|
||||
#else
|
||||
#define HB_REGMATCH int
|
||||
#define HB_REGMATCH_SIZE( n ) ( ( n ) * 2 )
|
||||
#define HB_REGMATCH_SO( p, n ) ( p )[ ( n ) * 2 ]
|
||||
#define HB_REGMATCH_EO( p, n ) ( p )[ ( n ) * 2 + 1 ]
|
||||
#endif
|
||||
|
||||
typedef void ( * HB_REG_FREE )( PHB_REGEX );
|
||||
typedef int ( * HB_REG_COMP )( PHB_REGEX, const char * );
|
||||
typedef int ( * HB_REG_EXEC )( PHB_REGEX, const char *, ULONG, int, HB_REGMATCH * );
|
||||
|
||||
extern void hb_regexInit( HB_REG_FREE pFree, HB_REG_COMP pComp, HB_REG_EXEC pExec );
|
||||
extern HB_GARBAGE_FUNC( hb_regexRelease );
|
||||
|
||||
#ifndef REG_EXTENDED
|
||||
# define REG_EXTENDED 0x00
|
||||
#endif
|
||||
@@ -117,6 +134,7 @@ typedef void * PHB_REGEX;
|
||||
#define HBREG_NOTEOL 0x08
|
||||
#define HBREG_EXTENDED 0x10
|
||||
#define HBREG_NOSUB 0x20
|
||||
#define HBREG_DOTALL 0x40
|
||||
|
||||
#ifndef REGEX_MAX_GROUPS
|
||||
# define REGEX_MAX_GROUPS 16
|
||||
@@ -124,10 +142,10 @@ typedef void * PHB_REGEX;
|
||||
|
||||
HB_EXTERN_BEGIN
|
||||
|
||||
extern HB_EXPORT PHB_REGEX hb_regexCompile( const char *szRegEx, ULONG ulLen, int iFlags );
|
||||
extern HB_EXPORT PHB_REGEX hb_regexCompile( const char * szRegEx, ULONG ulLen, int iFlags );
|
||||
extern HB_EXPORT PHB_REGEX hb_regexGet( PHB_ITEM pRegExItm, int iFlags );
|
||||
extern HB_EXPORT void hb_regexFree( PHB_REGEX pRegEx );
|
||||
extern HB_EXPORT BOOL hb_regexMatch( PHB_REGEX pRegEx, const char *szString, BOOL fFull );
|
||||
extern HB_EXPORT BOOL hb_regexMatch( PHB_REGEX pRegEx, const char * szString, ULONG UlLen, BOOL fFull );
|
||||
|
||||
HB_EXTERN_END
|
||||
|
||||
|
||||
@@ -354,6 +354,13 @@ $(CODEPAGE_LIB) : $(CODEPAGE_LIB_OBJS)
|
||||
+)
|
||||
!
|
||||
#**********************************************************
|
||||
$(PCRE_LIB) : $(PCRE_LIB_OBJS)
|
||||
IF EXIST "$(PCRE_LIB)" $(DEL) "$(PCRE_LIB)" > NUL
|
||||
$(MKLIB) "$(PCRE_LIB)" $(ARFLAGS) @&&!
|
||||
+$(**: = &^
|
||||
+)
|
||||
!
|
||||
#**********************************************************
|
||||
$(RDD_LIB) :: BasicExes
|
||||
$(RDD_LIB) :: $(RDD_LIB_OBJS)
|
||||
IF EXIST "$(RDD_LIB)" $(DEL) "$(RDD_LIB)" > NUL
|
||||
|
||||
@@ -754,6 +754,9 @@ $(LANG_LIB) : $(LANG_LIB_OBJS)
|
||||
$(CODEPAGE_LIB) : $(CODEPAGE_LIB_OBJS)
|
||||
$(MKLIB) /out:$@ $**
|
||||
#**********************************************************
|
||||
$(PCRE_LIB) : $(PCRE_LIB_OBJS)
|
||||
$(MKLIB) /out:$@ $**
|
||||
#**********************************************************
|
||||
$(RDD_LIB) : $(RDD_LIB_OBJS)
|
||||
$(MKLIB) /out:$@ $**
|
||||
#**********************************************************
|
||||
|
||||
@@ -34,6 +34,7 @@ DIRS=\
|
||||
codepage \
|
||||
lang \
|
||||
rdd \
|
||||
hbpcre \
|
||||
debug \
|
||||
|
||||
endif
|
||||
|
||||
43
harbour/source/hbpcre/ChangeLog
Normal file
43
harbour/source/hbpcre/ChangeLog
Normal file
@@ -0,0 +1,43 @@
|
||||
/*
|
||||
* $Id$
|
||||
*/
|
||||
|
||||
/*
|
||||
* Harbour Project - http://www.harbour-project.org
|
||||
* xHarbour Project - http://www.xharbour.org
|
||||
*
|
||||
* Please left here information about any modifications to original
|
||||
* PCRE files. It will make upgrading PCRE library much more easier
|
||||
* and safer in the future.
|
||||
*
|
||||
*
|
||||
* Use this format for the entry headers:
|
||||
* YYYY-MM-DD HH:MM UTC[-|+]hhmm Your Full Name <your_email@address>
|
||||
* For example:
|
||||
* 2002-12-01 23:12 UTC+0100 Foo Bar <foo.bar@foobar.org>
|
||||
*
|
||||
*/
|
||||
|
||||
2007-05-31 15:10 UTC+0200 Przemyslaw Czerpak (druzus/at/priv.onet.pl)
|
||||
* initial upload to Harbour CVS
|
||||
* filenames changed to keep 8.3 DOS format
|
||||
|
||||
2006-01-31 22:00 UTC+0100 Przemyslaw Czerpak (druzus/at/priv.onet.pl)
|
||||
* Makefile
|
||||
* config.h
|
||||
* moved SUPPORT_UCP and SUPPORT_UTF8 setting to reduce command line
|
||||
overhead in DOS builds
|
||||
|
||||
2005-09-27 10:50 UTC+0200 Przemyslaw Czerpak (druzus/at/priv.onet.pl)
|
||||
* pcreglob.c
|
||||
* minor modification to avoid G++ warning messages
|
||||
|
||||
2005-09-01 12:30 UTC+0200 Przemyslaw Czerpak (druzus/at/priv.onet.pl)
|
||||
+ ChangeLog
|
||||
+ added change log for PCRE files
|
||||
* pcredfa.c
|
||||
* removed unused variables: is_repeated, endcode
|
||||
* chartabs.c
|
||||
* dftables.c
|
||||
* add extern declaration for _pcre_default_tables - some C++ compilers
|
||||
may need it
|
||||
31
harbour/source/hbpcre/Makefile
Normal file
31
harbour/source/hbpcre/Makefile
Normal file
@@ -0,0 +1,31 @@
|
||||
#
|
||||
# $Id$
|
||||
#
|
||||
|
||||
ROOT = ../../
|
||||
|
||||
C_SOURCES=\
|
||||
chartabs.c \
|
||||
pcrecomp.c \
|
||||
pcreconf.c \
|
||||
pcredfa.c \
|
||||
pcreexec.c \
|
||||
pcrefinf.c \
|
||||
pcreget.c \
|
||||
pcreglob.c \
|
||||
pcreinfo.c \
|
||||
pcremktb.c \
|
||||
pcreoutf.c \
|
||||
pcreprni.c \
|
||||
pcrerefc.c \
|
||||
pcrestud.c \
|
||||
pcretabs.c \
|
||||
pcretryf.c \
|
||||
pcrefind.c \
|
||||
pcrevutf.c \
|
||||
pcrever.c \
|
||||
pcrexcls.c \
|
||||
|
||||
LIBNAME=hbpcre
|
||||
|
||||
include $(TOP)$(ROOT)config/lib.cf
|
||||
184
harbour/source/hbpcre/chartabs.c
Normal file
184
harbour/source/hbpcre/chartabs.c
Normal file
@@ -0,0 +1,184 @@
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* This file is automatically written by the dftables auxiliary
|
||||
program. If you edit it by hand, you might like to edit the Makefile to
|
||||
prevent its ever being regenerated.
|
||||
|
||||
This file contains the default tables for characters with codes less than
|
||||
128 (ASCII characters). These tables are used when no external tables are
|
||||
passed to PCRE. */
|
||||
|
||||
extern const unsigned char _pcre_default_tables[];
|
||||
const unsigned char _pcre_default_tables[] = {
|
||||
|
||||
/* This table is a lower casing table. */
|
||||
|
||||
0, 1, 2, 3, 4, 5, 6, 7,
|
||||
8, 9, 10, 11, 12, 13, 14, 15,
|
||||
16, 17, 18, 19, 20, 21, 22, 23,
|
||||
24, 25, 26, 27, 28, 29, 30, 31,
|
||||
32, 33, 34, 35, 36, 37, 38, 39,
|
||||
40, 41, 42, 43, 44, 45, 46, 47,
|
||||
48, 49, 50, 51, 52, 53, 54, 55,
|
||||
56, 57, 58, 59, 60, 61, 62, 63,
|
||||
64, 97, 98, 99,100,101,102,103,
|
||||
104,105,106,107,108,109,110,111,
|
||||
112,113,114,115,116,117,118,119,
|
||||
120,121,122, 91, 92, 93, 94, 95,
|
||||
96, 97, 98, 99,100,101,102,103,
|
||||
104,105,106,107,108,109,110,111,
|
||||
112,113,114,115,116,117,118,119,
|
||||
120,121,122,123,124,125,126,127,
|
||||
128,129,130,131,132,133,134,135,
|
||||
136,137,138,139,140,141,142,143,
|
||||
144,145,146,147,148,149,150,151,
|
||||
152,153,154,155,156,157,158,159,
|
||||
160,161,162,163,164,165,166,167,
|
||||
168,169,170,171,172,173,174,175,
|
||||
176,177,178,179,180,181,182,183,
|
||||
184,185,186,187,188,189,190,191,
|
||||
192,193,194,195,196,197,198,199,
|
||||
200,201,202,203,204,205,206,207,
|
||||
208,209,210,211,212,213,214,215,
|
||||
216,217,218,219,220,221,222,223,
|
||||
224,225,226,227,228,229,230,231,
|
||||
232,233,234,235,236,237,238,239,
|
||||
240,241,242,243,244,245,246,247,
|
||||
248,249,250,251,252,253,254,255,
|
||||
|
||||
/* This table is a case flipping table. */
|
||||
|
||||
0, 1, 2, 3, 4, 5, 6, 7,
|
||||
8, 9, 10, 11, 12, 13, 14, 15,
|
||||
16, 17, 18, 19, 20, 21, 22, 23,
|
||||
24, 25, 26, 27, 28, 29, 30, 31,
|
||||
32, 33, 34, 35, 36, 37, 38, 39,
|
||||
40, 41, 42, 43, 44, 45, 46, 47,
|
||||
48, 49, 50, 51, 52, 53, 54, 55,
|
||||
56, 57, 58, 59, 60, 61, 62, 63,
|
||||
64, 97, 98, 99,100,101,102,103,
|
||||
104,105,106,107,108,109,110,111,
|
||||
112,113,114,115,116,117,118,119,
|
||||
120,121,122, 91, 92, 93, 94, 95,
|
||||
96, 65, 66, 67, 68, 69, 70, 71,
|
||||
72, 73, 74, 75, 76, 77, 78, 79,
|
||||
80, 81, 82, 83, 84, 85, 86, 87,
|
||||
88, 89, 90,123,124,125,126,127,
|
||||
128,129,130,131,132,133,134,135,
|
||||
136,137,138,139,140,141,142,143,
|
||||
144,145,146,147,148,149,150,151,
|
||||
152,153,154,155,156,157,158,159,
|
||||
160,161,162,163,164,165,166,167,
|
||||
168,169,170,171,172,173,174,175,
|
||||
176,177,178,179,180,181,182,183,
|
||||
184,185,186,187,188,189,190,191,
|
||||
192,193,194,195,196,197,198,199,
|
||||
200,201,202,203,204,205,206,207,
|
||||
208,209,210,211,212,213,214,215,
|
||||
216,217,218,219,220,221,222,223,
|
||||
224,225,226,227,228,229,230,231,
|
||||
232,233,234,235,236,237,238,239,
|
||||
240,241,242,243,244,245,246,247,
|
||||
248,249,250,251,252,253,254,255,
|
||||
|
||||
/* This table contains bit maps for various character classes.
|
||||
Each map is 32 bytes long and the bits run from the least
|
||||
significant end of each byte. The classes that have their own
|
||||
maps are: space, xdigit, digit, upper, lower, word, graph
|
||||
print, punct, and cntrl. Other classes are built from combinations. */
|
||||
|
||||
0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
|
||||
0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
|
||||
0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
|
||||
0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff,
|
||||
0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
|
||||
0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,
|
||||
0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
|
||||
0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc,
|
||||
0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
|
||||
0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
|
||||
/* This table identifies various classes of character by individual bits:
|
||||
0x01 white space character
|
||||
0x02 letter
|
||||
0x04 decimal digit
|
||||
0x08 hexadecimal digit
|
||||
0x10 alphanumeric or '_'
|
||||
0x80 regular expression metacharacter or binary zero
|
||||
*/
|
||||
|
||||
0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 0- 7 */
|
||||
0x00,0x01,0x01,0x00,0x01,0x01,0x00,0x00, /* 8- 15 */
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 16- 23 */
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 24- 31 */
|
||||
0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /* - ' */
|
||||
0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /* ( - / */
|
||||
0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /* 0 - 7 */
|
||||
0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /* 8 - ? */
|
||||
0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* @ - G */
|
||||
0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* H - O */
|
||||
0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* P - W */
|
||||
0x12,0x12,0x12,0x80,0x00,0x00,0x80,0x10, /* X - _ */
|
||||
0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* ` - g */
|
||||
0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* h - o */
|
||||
0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* p - w */
|
||||
0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /* x -127 */
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
|
||||
|
||||
/* End of chartables.c */
|
||||
116
harbour/source/hbpcre/config.h
Normal file
116
harbour/source/hbpcre/config.h
Normal file
@@ -0,0 +1,116 @@
|
||||
|
||||
/* On Unix systems config.in is converted by configure into config.h. PCRE is
|
||||
written in Standard C, but there are a few non-standard things it can cope
|
||||
with, allowing it to run on SunOS4 and other "close to standard" systems.
|
||||
|
||||
On a non-Unix system you should just copy this file into config.h, and set up
|
||||
the macros the way you need them. You should normally change the definitions of
|
||||
HAVE_STRERROR and HAVE_MEMMOVE to 1. Unfortunately, because of the way autoconf
|
||||
works, these cannot be made the defaults. If your system has bcopy() and not
|
||||
memmove(), change the definition of HAVE_BCOPY instead of HAVE_MEMMOVE. If your
|
||||
system has neither bcopy() nor memmove(), leave them both as 0; an emulation
|
||||
function will be used. */
|
||||
|
||||
/* If you are compiling for a system that uses EBCDIC instead of ASCII
|
||||
character codes, define this macro as 1. On systems that can use "configure",
|
||||
this can be done via --enable-ebcdic. */
|
||||
|
||||
#ifndef EBCDIC
|
||||
#define EBCDIC 0
|
||||
#endif
|
||||
|
||||
/* If you are compiling for a system that needs some magic to be inserted
|
||||
before the definition of an exported function, define this macro to contain the
|
||||
relevant magic. It apears at the start of every exported function. */
|
||||
|
||||
#define EXPORT
|
||||
|
||||
/* Define to empty if the "const" keyword does not work. */
|
||||
|
||||
#undef const
|
||||
|
||||
/* Define to "unsigned" if <stddef.h> doesn't define size_t. */
|
||||
|
||||
#undef size_t
|
||||
|
||||
/* The following two definitions are mainly for the benefit of SunOS4, which
|
||||
doesn't have the strerror() or memmove() functions that should be present in
|
||||
all Standard C libraries. The macros HAVE_STRERROR and HAVE_MEMMOVE should
|
||||
normally be defined with the value 1 for other systems, but unfortunately we
|
||||
can't make this the default because "configure" files generated by autoconf
|
||||
will only change 0 to 1; they won't change 1 to 0 if the functions are not
|
||||
found. */
|
||||
|
||||
#define HAVE_STRERROR 1
|
||||
#define HAVE_MEMMOVE 1
|
||||
|
||||
/* There are some non-Unix systems that don't even have bcopy(). If this macro
|
||||
is false, an emulation is used. If HAVE_MEMMOVE is set to 1, the value of
|
||||
HAVE_BCOPY is not relevant. */
|
||||
|
||||
#define HAVE_BCOPY 0
|
||||
|
||||
/* The value of NEWLINE determines the newline character. The default is to
|
||||
leave it up to the compiler, but some sites want to force a particular value.
|
||||
On Unix systems, "configure" can be used to override this default. */
|
||||
|
||||
#ifndef NEWLINE
|
||||
#define NEWLINE '\n'
|
||||
#endif
|
||||
|
||||
/* The value of LINK_SIZE determines the number of bytes used to store
|
||||
links as offsets within the compiled regex. The default is 2, which allows for
|
||||
compiled patterns up to 64K long. This covers the vast majority of cases.
|
||||
However, PCRE can also be compiled to use 3 or 4 bytes instead. This allows for
|
||||
longer patterns in extreme cases. On Unix systems, "configure" can be used to
|
||||
override this default. */
|
||||
|
||||
#ifndef LINK_SIZE
|
||||
#define LINK_SIZE 2
|
||||
#endif
|
||||
|
||||
/* The value of MATCH_LIMIT determines the default number of times the match()
|
||||
function can be called during a single execution of pcre_exec(). (There is a
|
||||
runtime method of setting a different limit.) The limit exists in order to
|
||||
catch runaway regular expressions that take for ever to determine that they do
|
||||
not match. The default is set very large so that it does not accidentally catch
|
||||
legitimate cases. On Unix systems, "configure" can be used to override this
|
||||
default default. */
|
||||
|
||||
#ifndef MATCH_LIMIT
|
||||
#define MATCH_LIMIT 10000000
|
||||
#endif
|
||||
|
||||
/* When calling PCRE via the POSIX interface, additional working storage is
|
||||
required for holding the pointers to capturing substrings because PCRE requires
|
||||
three integers per substring, whereas the POSIX interface provides only two. If
|
||||
the number of expected substrings is small, the wrapper function uses space on
|
||||
the stack, because this is faster than using malloc() for each call. The
|
||||
threshold above which the stack is no longer use is defined by POSIX_MALLOC_
|
||||
THRESHOLD. On Unix systems, "configure" can be used to override this default.
|
||||
*/
|
||||
|
||||
#ifndef POSIX_MALLOC_THRESHOLD
|
||||
#define POSIX_MALLOC_THRESHOLD 10
|
||||
#endif
|
||||
|
||||
/* PCRE uses recursive function calls to handle backtracking while matching.
|
||||
This can sometimes be a problem on systems that have stacks of limited size.
|
||||
Define NO_RECURSE to get a version that doesn't use recursion in the match()
|
||||
function; instead it creates its own stack by steam using pcre_recurse_malloc
|
||||
to get memory. For more detail, see comments and other stuff just above the
|
||||
match() function. On Unix systems, "configure" can be used to set this in the
|
||||
Makefile (use --disable-stack-for-recursion). */
|
||||
|
||||
/* #define NO_RECURSE */
|
||||
|
||||
/* xHarbour stuff - default definitions */
|
||||
#ifndef SUPPORT_UTF8
|
||||
#define SUPPORT_UTF8
|
||||
#endif
|
||||
|
||||
#ifndef SUPPORT_UCP
|
||||
#define SUPPORT_UCP
|
||||
#endif
|
||||
|
||||
/* End */
|
||||
173
harbour/source/hbpcre/dftables.c
Normal file
173
harbour/source/hbpcre/dftables.c
Normal file
@@ -0,0 +1,173 @@
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Copyright (c) 1997-2005 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
|
||||
/* This is a freestanding support program to generate a file containing default
|
||||
character tables for PCRE. The tables are built according to the default C
|
||||
locale. Now that pcre_maketables is a function visible to the outside world, we
|
||||
make use of its code from here in order to be consistent. */
|
||||
|
||||
#include <ctype.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "pcreinal.h"
|
||||
|
||||
#define DFTABLES /* pcremktb.c notices this */
|
||||
#include "pcremktb.c"
|
||||
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
int i;
|
||||
FILE *f;
|
||||
const unsigned char *tables = pcre_maketables();
|
||||
const unsigned char *base_of_tables = tables;
|
||||
|
||||
if (argc != 2)
|
||||
{
|
||||
fprintf(stderr, "dftables: one filename argument is required\n");
|
||||
return 1;
|
||||
}
|
||||
|
||||
f = fopen(argv[1], "wb");
|
||||
if (f == NULL)
|
||||
{
|
||||
fprintf(stderr, "dftables: failed to open %s for writing\n", argv[1]);
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* There are two fprintf() calls here, because gcc in pedantic mode complains
|
||||
about the very long string otherwise. */
|
||||
|
||||
fprintf(f,
|
||||
"/*************************************************\n"
|
||||
"* Perl-Compatible Regular Expressions *\n"
|
||||
"*************************************************/\n\n"
|
||||
"/* This file is automatically written by the dftables auxiliary \n"
|
||||
"program. If you edit it by hand, you might like to edit the Makefile to \n"
|
||||
"prevent its ever being regenerated.\n\n");
|
||||
fprintf(f,
|
||||
"This file contains the default tables for characters with codes less than\n"
|
||||
"128 (ASCII characters). These tables are used when no external tables are\n"
|
||||
"passed to PCRE. */\n\n"
|
||||
"extern const unsigned char _pcre_default_tables[];\n"
|
||||
"const unsigned char _pcre_default_tables[] = {\n\n"
|
||||
"/* This table is a lower casing table. */\n\n");
|
||||
|
||||
fprintf(f, " ");
|
||||
for (i = 0; i < 256; i++)
|
||||
{
|
||||
if ((i & 7) == 0 && i != 0) fprintf(f, "\n ");
|
||||
fprintf(f, "%3d", *tables++);
|
||||
if (i != 255) fprintf(f, ",");
|
||||
}
|
||||
fprintf(f, ",\n\n");
|
||||
|
||||
fprintf(f, "/* This table is a case flipping table. */\n\n");
|
||||
|
||||
fprintf(f, " ");
|
||||
for (i = 0; i < 256; i++)
|
||||
{
|
||||
if ((i & 7) == 0 && i != 0) fprintf(f, "\n ");
|
||||
fprintf(f, "%3d", *tables++);
|
||||
if (i != 255) fprintf(f, ",");
|
||||
}
|
||||
fprintf(f, ",\n\n");
|
||||
|
||||
fprintf(f,
|
||||
"/* This table contains bit maps for various character classes.\n"
|
||||
"Each map is 32 bytes long and the bits run from the least\n"
|
||||
"significant end of each byte. The classes that have their own\n"
|
||||
"maps are: space, xdigit, digit, upper, lower, word, graph\n"
|
||||
"print, punct, and cntrl. Other classes are built from combinations. */\n\n");
|
||||
|
||||
fprintf(f, " ");
|
||||
for (i = 0; i < cbit_length; i++)
|
||||
{
|
||||
if ((i & 7) == 0 && i != 0)
|
||||
{
|
||||
if ((i & 31) == 0) fprintf(f, "\n");
|
||||
fprintf(f, "\n ");
|
||||
}
|
||||
fprintf(f, "0x%02x", *tables++);
|
||||
if (i != cbit_length - 1) fprintf(f, ",");
|
||||
}
|
||||
fprintf(f, ",\n\n");
|
||||
|
||||
fprintf(f,
|
||||
"/* This table identifies various classes of character by individual bits:\n"
|
||||
" 0x%02x white space character\n"
|
||||
" 0x%02x letter\n"
|
||||
" 0x%02x decimal digit\n"
|
||||
" 0x%02x hexadecimal digit\n"
|
||||
" 0x%02x alphanumeric or '_'\n"
|
||||
" 0x%02x regular expression metacharacter or binary zero\n*/\n\n",
|
||||
ctype_space, ctype_letter, ctype_digit, ctype_xdigit, ctype_word,
|
||||
ctype_meta);
|
||||
|
||||
fprintf(f, " ");
|
||||
for (i = 0; i < 256; i++)
|
||||
{
|
||||
if ((i & 7) == 0 && i != 0)
|
||||
{
|
||||
fprintf(f, " /* ");
|
||||
if (isprint(i-8)) fprintf(f, " %c -", i-8);
|
||||
else fprintf(f, "%3d-", i-8);
|
||||
if (isprint(i-1)) fprintf(f, " %c ", i-1);
|
||||
else fprintf(f, "%3d", i-1);
|
||||
fprintf(f, " */\n ");
|
||||
}
|
||||
fprintf(f, "0x%02x", *tables++);
|
||||
if (i != 255) fprintf(f, ",");
|
||||
}
|
||||
|
||||
fprintf(f, "};/* ");
|
||||
if (isprint(i-8)) fprintf(f, " %c -", i-8);
|
||||
else fprintf(f, "%3d-", i-8);
|
||||
if (isprint(i-1)) fprintf(f, " %c ", i-1);
|
||||
else fprintf(f, "%3d", i-1);
|
||||
fprintf(f, " */\n\n/* End of chartabs.c */\n");
|
||||
|
||||
fclose(f);
|
||||
free((void *)base_of_tables);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* End of dftables.c */
|
||||
258
harbour/source/hbpcre/pcre.h
Normal file
258
harbour/source/hbpcre/pcre.h
Normal file
@@ -0,0 +1,258 @@
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* In its original form, this is the .in file that is transformed by
|
||||
"configure" into pcre.h.
|
||||
|
||||
Copyright (c) 1997-2005 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
#ifndef _PCRE_H
|
||||
#define _PCRE_H
|
||||
|
||||
/* The file pcre.h is build by "configure". Do not edit it; instead
|
||||
make changes to pcre.in. */
|
||||
|
||||
#define PCRE_MAJOR 6
|
||||
#define PCRE_MINOR 3
|
||||
#define PCRE_DATE 15-Aug-2005
|
||||
|
||||
/* Win32 uses DLL by default; it needs special stuff for exported functions. */
|
||||
|
||||
#ifdef _WIN32
|
||||
# ifdef PCRE_DEFINITION
|
||||
# ifdef DLL_EXPORT
|
||||
# define PCRE_DATA_SCOPE __declspec(dllexport)
|
||||
# endif
|
||||
# else
|
||||
# ifndef PCRE_STATIC
|
||||
# define PCRE_DATA_SCOPE extern __declspec(dllimport)
|
||||
# endif
|
||||
# endif
|
||||
#endif
|
||||
|
||||
/* For other operating systems, we use the standard "extern". */
|
||||
|
||||
#ifndef PCRE_DATA_SCOPE
|
||||
# ifdef __cplusplus
|
||||
# define PCRE_DATA_SCOPE extern "C"
|
||||
# else
|
||||
# define PCRE_DATA_SCOPE extern
|
||||
# endif
|
||||
#endif
|
||||
|
||||
/* Have to include stdlib.h in order to ensure that size_t is defined;
|
||||
it is needed here for malloc. */
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
/* Allow for C++ users */
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/* Options */
|
||||
|
||||
#define PCRE_CASELESS 0x00000001
|
||||
#define PCRE_MULTILINE 0x00000002
|
||||
#define PCRE_DOTALL 0x00000004
|
||||
#define PCRE_EXTENDED 0x00000008
|
||||
#define PCRE_ANCHORED 0x00000010
|
||||
#define PCRE_DOLLAR_ENDONLY 0x00000020
|
||||
#define PCRE_EXTRA 0x00000040
|
||||
#define PCRE_NOTBOL 0x00000080
|
||||
#define PCRE_NOTEOL 0x00000100
|
||||
#define PCRE_UNGREEDY 0x00000200
|
||||
#define PCRE_NOTEMPTY 0x00000400
|
||||
#define PCRE_UTF8 0x00000800
|
||||
#define PCRE_NO_AUTO_CAPTURE 0x00001000
|
||||
#define PCRE_NO_UTF8_CHECK 0x00002000
|
||||
#define PCRE_AUTO_CALLOUT 0x00004000
|
||||
#define PCRE_PARTIAL 0x00008000
|
||||
#define PCRE_DFA_SHORTEST 0x00010000
|
||||
#define PCRE_DFA_RESTART 0x00020000
|
||||
#define PCRE_FIRSTLINE 0x00040000
|
||||
|
||||
/* Exec-time and get/set-time error codes */
|
||||
|
||||
#define PCRE_ERROR_NOMATCH (-1)
|
||||
#define PCRE_ERROR_NULL (-2)
|
||||
#define PCRE_ERROR_BADOPTION (-3)
|
||||
#define PCRE_ERROR_BADMAGIC (-4)
|
||||
#define PCRE_ERROR_UNKNOWN_NODE (-5)
|
||||
#define PCRE_ERROR_NOMEMORY (-6)
|
||||
#define PCRE_ERROR_NOSUBSTRING (-7)
|
||||
#define PCRE_ERROR_MATCHLIMIT (-8)
|
||||
#define PCRE_ERROR_CALLOUT (-9) /* Never used by PCRE itself */
|
||||
#define PCRE_ERROR_BADUTF8 (-10)
|
||||
#define PCRE_ERROR_BADUTF8_OFFSET (-11)
|
||||
#define PCRE_ERROR_PARTIAL (-12)
|
||||
#define PCRE_ERROR_BADPARTIAL (-13)
|
||||
#define PCRE_ERROR_INTERNAL (-14)
|
||||
#define PCRE_ERROR_BADCOUNT (-15)
|
||||
#define PCRE_ERROR_DFA_UITEM (-16)
|
||||
#define PCRE_ERROR_DFA_UCOND (-17)
|
||||
#define PCRE_ERROR_DFA_UMLIMIT (-18)
|
||||
#define PCRE_ERROR_DFA_WSSIZE (-19)
|
||||
#define PCRE_ERROR_DFA_RECURSE (-20)
|
||||
|
||||
/* Request types for pcre_fullinfo() */
|
||||
|
||||
#define PCRE_INFO_OPTIONS 0
|
||||
#define PCRE_INFO_SIZE 1
|
||||
#define PCRE_INFO_CAPTURECOUNT 2
|
||||
#define PCRE_INFO_BACKREFMAX 3
|
||||
#define PCRE_INFO_FIRSTBYTE 4
|
||||
#define PCRE_INFO_FIRSTCHAR 4 /* For backwards compatibility */
|
||||
#define PCRE_INFO_FIRSTTABLE 5
|
||||
#define PCRE_INFO_LASTLITERAL 6
|
||||
#define PCRE_INFO_NAMEENTRYSIZE 7
|
||||
#define PCRE_INFO_NAMECOUNT 8
|
||||
#define PCRE_INFO_NAMETABLE 9
|
||||
#define PCRE_INFO_STUDYSIZE 10
|
||||
#define PCRE_INFO_DEFAULT_TABLES 11
|
||||
|
||||
/* Request types for pcre_config() */
|
||||
|
||||
#define PCRE_CONFIG_UTF8 0
|
||||
#define PCRE_CONFIG_NEWLINE 1
|
||||
#define PCRE_CONFIG_LINK_SIZE 2
|
||||
#define PCRE_CONFIG_POSIX_MALLOC_THRESHOLD 3
|
||||
#define PCRE_CONFIG_MATCH_LIMIT 4
|
||||
#define PCRE_CONFIG_STACKRECURSE 5
|
||||
#define PCRE_CONFIG_UNICODE_PROPERTIES 6
|
||||
|
||||
/* Bit flags for the pcre_extra structure */
|
||||
|
||||
#define PCRE_EXTRA_STUDY_DATA 0x0001
|
||||
#define PCRE_EXTRA_MATCH_LIMIT 0x0002
|
||||
#define PCRE_EXTRA_CALLOUT_DATA 0x0004
|
||||
#define PCRE_EXTRA_TABLES 0x0008
|
||||
|
||||
/* Types */
|
||||
|
||||
struct real_pcre; /* declaration; the definition is private */
|
||||
typedef struct real_pcre pcre;
|
||||
|
||||
/* The structure for passing additional data to pcre_exec(). This is defined in
|
||||
such as way as to be extensible. Always add new fields at the end, in order to
|
||||
remain compatible. */
|
||||
|
||||
typedef struct pcre_extra {
|
||||
unsigned long int flags; /* Bits for which fields are set */
|
||||
void *study_data; /* Opaque data from pcre_study() */
|
||||
unsigned long int match_limit; /* Maximum number of calls to match() */
|
||||
void *callout_data; /* Data passed back in callouts */
|
||||
const unsigned char *tables; /* Pointer to character tables */
|
||||
} pcre_extra;
|
||||
|
||||
/* The structure for passing out data via the pcre_callout_function. We use a
|
||||
structure so that new fields can be added on the end in future versions,
|
||||
without changing the API of the function, thereby allowing old clients to work
|
||||
without modification. */
|
||||
|
||||
typedef struct pcre_callout_block {
|
||||
int version; /* Identifies version of block */
|
||||
/* ------------------------ Version 0 ------------------------------- */
|
||||
int callout_number; /* Number compiled into pattern */
|
||||
int *offset_vector; /* The offset vector */
|
||||
const char *subject; /* The subject being matched */
|
||||
int subject_length; /* The length of the subject */
|
||||
int start_match; /* Offset to start of this match attempt */
|
||||
int current_position; /* Where we currently are in the subject */
|
||||
int capture_top; /* Max current capture */
|
||||
int capture_last; /* Most recently closed capture */
|
||||
void *callout_data; /* Data passed in with the call */
|
||||
/* ------------------- Added for Version 1 -------------------------- */
|
||||
int pattern_position; /* Offset to next item in the pattern */
|
||||
int next_item_length; /* Length of next item in the pattern */
|
||||
/* ------------------------------------------------------------------ */
|
||||
} pcre_callout_block;
|
||||
|
||||
/* Indirection for store get and free functions. These can be set to
|
||||
alternative malloc/free functions if required. Special ones are used in the
|
||||
non-recursive case for "frames". There is also an optional callout function
|
||||
that is triggered by the (?) regex item. For Virtual Pascal, these definitions
|
||||
have to take another form. */
|
||||
|
||||
#ifndef VPCOMPAT
|
||||
PCRE_DATA_SCOPE void *(*pcre_malloc)(size_t);
|
||||
PCRE_DATA_SCOPE void (*pcre_free)(void *);
|
||||
PCRE_DATA_SCOPE void *(*pcre_stack_malloc)(size_t);
|
||||
PCRE_DATA_SCOPE void (*pcre_stack_free)(void *);
|
||||
PCRE_DATA_SCOPE int (*pcre_callout)(pcre_callout_block *);
|
||||
#else /* VPCOMPAT */
|
||||
PCRE_DATA_SCOPE void *pcre_malloc(size_t);
|
||||
PCRE_DATA_SCOPE void pcre_free(void *);
|
||||
PCRE_DATA_SCOPE void *pcre_stack_malloc(size_t);
|
||||
PCRE_DATA_SCOPE void pcre_stack_free(void *);
|
||||
PCRE_DATA_SCOPE int pcre_callout(pcre_callout_block *);
|
||||
#endif /* VPCOMPAT */
|
||||
|
||||
/* Exported PCRE functions */
|
||||
|
||||
PCRE_DATA_SCOPE pcre *pcre_compile(const char *, int, const char **, int *,
|
||||
const unsigned char *);
|
||||
PCRE_DATA_SCOPE pcre *pcre_compile2(const char *, int, int *, const char **,
|
||||
int *, const unsigned char *);
|
||||
PCRE_DATA_SCOPE int pcre_config(int, void *);
|
||||
PCRE_DATA_SCOPE int pcre_copy_named_substring(const pcre *, const char *,
|
||||
int *, int, const char *, char *, int);
|
||||
PCRE_DATA_SCOPE int pcre_copy_substring(const char *, int *, int, int, char *,
|
||||
int);
|
||||
PCRE_DATA_SCOPE int pcre_dfa_exec(const pcre *, const pcre_extra *,
|
||||
const char *, int, int, int, int *, int , int *, int);
|
||||
PCRE_DATA_SCOPE int pcre_exec(const pcre *, const pcre_extra *, const char *,
|
||||
int, int, int, int *, int);
|
||||
PCRE_DATA_SCOPE void pcre_free_substring(const char *);
|
||||
PCRE_DATA_SCOPE void pcre_free_substring_list(const char **);
|
||||
PCRE_DATA_SCOPE int pcre_fullinfo(const pcre *, const pcre_extra *, int,
|
||||
void *);
|
||||
PCRE_DATA_SCOPE int pcre_get_named_substring(const pcre *, const char *,
|
||||
int *, int, const char *, const char **);
|
||||
PCRE_DATA_SCOPE int pcre_get_stringnumber(const pcre *, const char *);
|
||||
PCRE_DATA_SCOPE int pcre_get_substring(const char *, int *, int, int,
|
||||
const char **);
|
||||
PCRE_DATA_SCOPE int pcre_get_substring_list(const char *, int *, int,
|
||||
const char ***);
|
||||
PCRE_DATA_SCOPE int pcre_info(const pcre *, int *, int *);
|
||||
PCRE_DATA_SCOPE const unsigned char *pcre_maketables(void);
|
||||
PCRE_DATA_SCOPE int pcre_refcount(pcre *, int);
|
||||
PCRE_DATA_SCOPE pcre_extra *pcre_study(const pcre *, int, const char **);
|
||||
PCRE_DATA_SCOPE const char *pcre_version(void);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} /* extern "C" */
|
||||
#endif
|
||||
|
||||
#endif /* End of pcre.h */
|
||||
5059
harbour/source/hbpcre/pcrecomp.c
Normal file
5059
harbour/source/hbpcre/pcrecomp.c
Normal file
File diff suppressed because it is too large
Load Diff
112
harbour/source/hbpcre/pcreconf.c
Normal file
112
harbour/source/hbpcre/pcreconf.c
Normal file
@@ -0,0 +1,112 @@
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Copyright (c) 1997-2005 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
|
||||
/* This module contains the external function pcre_config(). */
|
||||
|
||||
|
||||
#include "pcreinal.h"
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Return info about what features are configured *
|
||||
*************************************************/
|
||||
|
||||
/* This function has an extensible interface so that additional items can be
|
||||
added compatibly.
|
||||
|
||||
Arguments:
|
||||
what what information is required
|
||||
where where to put the information
|
||||
|
||||
Returns: 0 if data returned, negative on error
|
||||
*/
|
||||
|
||||
EXPORT int
|
||||
pcre_config(int what, void *where)
|
||||
{
|
||||
switch (what)
|
||||
{
|
||||
case PCRE_CONFIG_UTF8:
|
||||
#ifdef SUPPORT_UTF8
|
||||
*((int *)where) = 1;
|
||||
#else
|
||||
*((int *)where) = 0;
|
||||
#endif
|
||||
break;
|
||||
|
||||
case PCRE_CONFIG_UNICODE_PROPERTIES:
|
||||
#ifdef SUPPORT_UCP
|
||||
*((int *)where) = 1;
|
||||
#else
|
||||
*((int *)where) = 0;
|
||||
#endif
|
||||
break;
|
||||
|
||||
case PCRE_CONFIG_NEWLINE:
|
||||
*((int *)where) = NEWLINE;
|
||||
break;
|
||||
|
||||
case PCRE_CONFIG_LINK_SIZE:
|
||||
*((int *)where) = LINK_SIZE;
|
||||
break;
|
||||
|
||||
case PCRE_CONFIG_POSIX_MALLOC_THRESHOLD:
|
||||
*((int *)where) = POSIX_MALLOC_THRESHOLD;
|
||||
break;
|
||||
|
||||
case PCRE_CONFIG_MATCH_LIMIT:
|
||||
*((unsigned int *)where) = MATCH_LIMIT;
|
||||
break;
|
||||
|
||||
case PCRE_CONFIG_STACKRECURSE:
|
||||
#ifdef NO_RECURSE
|
||||
*((int *)where) = 0;
|
||||
#else
|
||||
*((int *)where) = 1;
|
||||
#endif
|
||||
break;
|
||||
|
||||
default: return PCRE_ERROR_BADOPTION;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* End of pcreconf.c */
|
||||
1924
harbour/source/hbpcre/pcredfa.c
Normal file
1924
harbour/source/hbpcre/pcredfa.c
Normal file
File diff suppressed because it is too large
Load Diff
3632
harbour/source/hbpcre/pcreexec.c
Normal file
3632
harbour/source/hbpcre/pcreexec.c
Normal file
File diff suppressed because it is too large
Load Diff
161
harbour/source/hbpcre/pcrefind.c
Normal file
161
harbour/source/hbpcre/pcrefind.c
Normal file
@@ -0,0 +1,161 @@
|
||||
/*************************************************
|
||||
* libucp - Unicode Property Table handler *
|
||||
*************************************************/
|
||||
|
||||
/* Copyright (c) University of Cambridge 2004 */
|
||||
|
||||
/* This little library provides a fast way of obtaining the basic Unicode
|
||||
properties of a character, using a compact binary tree that occupies less than
|
||||
100K bytes.
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
#include "pcreinal.h"
|
||||
|
||||
#include "ucp.h" /* Exported interface */
|
||||
#include "ucpinter.h" /* Internal table details */
|
||||
#include "ucptable.c" /* The table itself */
|
||||
|
||||
|
||||
/* In some environments, external functions have to be preceded by some magic.
|
||||
In my world (Unix), they do not. Use a macro to deal with this. */
|
||||
|
||||
#ifndef EXPORT
|
||||
#define EXPORT
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Search table and return data *
|
||||
*************************************************/
|
||||
|
||||
/* Two values are returned: the category is ucp_C, ucp_L, etc. The detailed
|
||||
character type is ucp_Lu, ucp_Nd, etc.
|
||||
|
||||
Arguments:
|
||||
c the character value
|
||||
type_ptr the detailed character type is returned here
|
||||
case_ptr for letters, the opposite case is returned here, if there
|
||||
is one, else zero
|
||||
|
||||
Returns: the character type category or -1 if not found
|
||||
*/
|
||||
|
||||
EXPORT int
|
||||
ucp_findchar(const int c, int *type_ptr, int *case_ptr)
|
||||
{
|
||||
cnode *node = ucp_table;
|
||||
register int cc = c;
|
||||
int case_offset;
|
||||
|
||||
for (;;)
|
||||
{
|
||||
register int d = node->f1 | ((node->f0 & f0_chhmask) << 16);
|
||||
if (cc == d) break;
|
||||
if (cc < d)
|
||||
{
|
||||
if ((node->f0 & f0_leftexists) == 0) return -1;
|
||||
node ++;
|
||||
}
|
||||
else
|
||||
{
|
||||
register int roffset = (node->f2 & f2_rightmask) >> f2_rightshift;
|
||||
if (roffset == 0) return -1;
|
||||
node += 1 << (roffset - 1);
|
||||
}
|
||||
}
|
||||
|
||||
switch ((*type_ptr = ((node->f0 & f0_typemask) >> f0_typeshift)))
|
||||
{
|
||||
case ucp_Cc:
|
||||
case ucp_Cf:
|
||||
case ucp_Cn:
|
||||
case ucp_Co:
|
||||
case ucp_Cs:
|
||||
return ucp_C;
|
||||
break;
|
||||
|
||||
case ucp_Ll:
|
||||
case ucp_Lu:
|
||||
case_offset = node->f2 & f2_casemask;
|
||||
if ((case_offset & 0x0100) != 0) case_offset |= 0xfffff000;
|
||||
*case_ptr = (case_offset == 0)? 0 : cc + case_offset;
|
||||
return ucp_L;
|
||||
|
||||
case ucp_Lm:
|
||||
case ucp_Lo:
|
||||
case ucp_Lt:
|
||||
*case_ptr = 0;
|
||||
return ucp_L;
|
||||
break;
|
||||
|
||||
case ucp_Mc:
|
||||
case ucp_Me:
|
||||
case ucp_Mn:
|
||||
return ucp_M;
|
||||
break;
|
||||
|
||||
case ucp_Nd:
|
||||
case ucp_Nl:
|
||||
case ucp_No:
|
||||
return ucp_N;
|
||||
break;
|
||||
|
||||
case ucp_Pc:
|
||||
case ucp_Pd:
|
||||
case ucp_Pe:
|
||||
case ucp_Pf:
|
||||
case ucp_Pi:
|
||||
case ucp_Ps:
|
||||
case ucp_Po:
|
||||
return ucp_P;
|
||||
break;
|
||||
|
||||
case ucp_Sc:
|
||||
case ucp_Sk:
|
||||
case ucp_Sm:
|
||||
case ucp_So:
|
||||
return ucp_S;
|
||||
break;
|
||||
|
||||
case ucp_Zl:
|
||||
case ucp_Zp:
|
||||
case ucp_Zs:
|
||||
return ucp_Z;
|
||||
break;
|
||||
|
||||
default: /* "Should never happen" */
|
||||
return -1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* End of pcrefind.c */
|
||||
149
harbour/source/hbpcre/pcrefinf.c
Normal file
149
harbour/source/hbpcre/pcrefinf.c
Normal file
@@ -0,0 +1,149 @@
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/*PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Copyright (c) 1997-2005 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
|
||||
/* This module contains the external function pcre_fullinfo(), which returns
|
||||
information about a compiled pattern. */
|
||||
|
||||
|
||||
#include "pcreinal.h"
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Return info about compiled pattern *
|
||||
*************************************************/
|
||||
|
||||
/* This is a newer "info" function which has an extensible interface so
|
||||
that additional items can be added compatibly.
|
||||
|
||||
Arguments:
|
||||
argument_re points to compiled code
|
||||
extra_data points extra data, or NULL
|
||||
what what information is required
|
||||
where where to put the information
|
||||
|
||||
Returns: 0 if data returned, negative on error
|
||||
*/
|
||||
|
||||
EXPORT int
|
||||
pcre_fullinfo(const pcre *argument_re, const pcre_extra *extra_data, int what,
|
||||
void *where)
|
||||
{
|
||||
real_pcre internal_re;
|
||||
pcre_study_data internal_study;
|
||||
const real_pcre *re = (const real_pcre *)argument_re;
|
||||
const pcre_study_data *study = NULL;
|
||||
|
||||
if (re == NULL || where == NULL) return PCRE_ERROR_NULL;
|
||||
|
||||
if (extra_data != NULL && (extra_data->flags & PCRE_EXTRA_STUDY_DATA) != 0)
|
||||
study = (const pcre_study_data *)extra_data->study_data;
|
||||
|
||||
if (re->magic_number != MAGIC_NUMBER)
|
||||
{
|
||||
re = _pcre_try_flipped(re, &internal_re, study, &internal_study);
|
||||
if (re == NULL) return PCRE_ERROR_BADMAGIC;
|
||||
if (study != NULL) study = &internal_study;
|
||||
}
|
||||
|
||||
switch (what)
|
||||
{
|
||||
case PCRE_INFO_OPTIONS:
|
||||
*((unsigned long int *)where) = re->options & PUBLIC_OPTIONS;
|
||||
break;
|
||||
|
||||
case PCRE_INFO_SIZE:
|
||||
*((size_t *)where) = re->size;
|
||||
break;
|
||||
|
||||
case PCRE_INFO_STUDYSIZE:
|
||||
*((size_t *)where) = (study == NULL)? 0 : study->size;
|
||||
break;
|
||||
|
||||
case PCRE_INFO_CAPTURECOUNT:
|
||||
*((int *)where) = re->top_bracket;
|
||||
break;
|
||||
|
||||
case PCRE_INFO_BACKREFMAX:
|
||||
*((int *)where) = re->top_backref;
|
||||
break;
|
||||
|
||||
case PCRE_INFO_FIRSTBYTE:
|
||||
*((int *)where) =
|
||||
((re->options & PCRE_FIRSTSET) != 0)? re->first_byte :
|
||||
((re->options & PCRE_STARTLINE) != 0)? -1 : -2;
|
||||
break;
|
||||
|
||||
/* Make sure we pass back the pointer to the bit vector in the external
|
||||
block, not the internal copy (with flipped integer fields). */
|
||||
|
||||
case PCRE_INFO_FIRSTTABLE:
|
||||
*((const uschar **)where) =
|
||||
(study != NULL && (study->options & PCRE_STUDY_MAPPED) != 0)?
|
||||
((const pcre_study_data *)extra_data->study_data)->start_bits : NULL;
|
||||
break;
|
||||
|
||||
case PCRE_INFO_LASTLITERAL:
|
||||
*((int *)where) =
|
||||
((re->options & PCRE_REQCHSET) != 0)? re->req_byte : -1;
|
||||
break;
|
||||
|
||||
case PCRE_INFO_NAMEENTRYSIZE:
|
||||
*((int *)where) = re->name_entry_size;
|
||||
break;
|
||||
|
||||
case PCRE_INFO_NAMECOUNT:
|
||||
*((int *)where) = re->name_count;
|
||||
break;
|
||||
|
||||
case PCRE_INFO_NAMETABLE:
|
||||
*((const uschar **)where) = (const uschar *)re + re->name_table_offset;
|
||||
break;
|
||||
|
||||
case PCRE_INFO_DEFAULT_TABLES:
|
||||
*((const uschar **)where) = (const uschar *)(_pcre_default_tables);
|
||||
break;
|
||||
|
||||
default: return PCRE_ERROR_BADOPTION;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* End of pcrefinf.c */
|
||||
352
harbour/source/hbpcre/pcreget.c
Normal file
352
harbour/source/hbpcre/pcreget.c
Normal file
@@ -0,0 +1,352 @@
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Copyright (c) 1997-2005 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
|
||||
/* This module contains some convenience functions for extracting substrings
|
||||
from the subject string after a regex match has succeeded. The original idea
|
||||
for these functions came from Scott Wimer. */
|
||||
|
||||
|
||||
#include "pcreinal.h"
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Find number for named string *
|
||||
*************************************************/
|
||||
|
||||
/* This function is used by the two extraction functions below, as well
|
||||
as being generally available.
|
||||
|
||||
Arguments:
|
||||
code the compiled regex
|
||||
stringname the name whose number is required
|
||||
|
||||
Returns: the number of the named parentheses, or a negative number
|
||||
(PCRE_ERROR_NOSUBSTRING) if not found
|
||||
*/
|
||||
|
||||
int
|
||||
pcre_get_stringnumber(const pcre *code, const char *stringname)
|
||||
{
|
||||
int rc;
|
||||
int entrysize;
|
||||
int top, bot;
|
||||
uschar *nametable;
|
||||
|
||||
if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0)
|
||||
return rc;
|
||||
if (top <= 0) return PCRE_ERROR_NOSUBSTRING;
|
||||
|
||||
if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize)) != 0)
|
||||
return rc;
|
||||
if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0)
|
||||
return rc;
|
||||
|
||||
bot = 0;
|
||||
while (top > bot)
|
||||
{
|
||||
int mid = (top + bot) / 2;
|
||||
uschar *entry = nametable + entrysize*mid;
|
||||
int c = strcmp(stringname, (char *)(entry + 2));
|
||||
if (c == 0) return (entry[0] << 8) + entry[1];
|
||||
if (c > 0) bot = mid + 1; else top = mid;
|
||||
}
|
||||
|
||||
return PCRE_ERROR_NOSUBSTRING;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Copy captured string to given buffer *
|
||||
*************************************************/
|
||||
|
||||
/* This function copies a single captured substring into a given buffer.
|
||||
Note that we use memcpy() rather than strncpy() in case there are binary zeros
|
||||
in the string.
|
||||
|
||||
Arguments:
|
||||
subject the subject string that was matched
|
||||
ovector pointer to the offsets table
|
||||
stringcount the number of substrings that were captured
|
||||
(i.e. the yield of the pcre_exec call, unless
|
||||
that was zero, in which case it should be 1/3
|
||||
of the offset table size)
|
||||
stringnumber the number of the required substring
|
||||
buffer where to put the substring
|
||||
size the size of the buffer
|
||||
|
||||
Returns: if successful:
|
||||
the length of the copied string, not including the zero
|
||||
that is put on the end; can be zero
|
||||
if not successful:
|
||||
PCRE_ERROR_NOMEMORY (-6) buffer too small
|
||||
PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
|
||||
*/
|
||||
|
||||
int
|
||||
pcre_copy_substring(const char *subject, int *ovector, int stringcount,
|
||||
int stringnumber, char *buffer, int size)
|
||||
{
|
||||
int yield;
|
||||
if (stringnumber < 0 || stringnumber >= stringcount)
|
||||
return PCRE_ERROR_NOSUBSTRING;
|
||||
stringnumber *= 2;
|
||||
yield = ovector[stringnumber+1] - ovector[stringnumber];
|
||||
if (size < yield + 1) return PCRE_ERROR_NOMEMORY;
|
||||
memcpy(buffer, subject + ovector[stringnumber], yield);
|
||||
buffer[yield] = 0;
|
||||
return yield;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Copy named captured string to given buffer *
|
||||
*************************************************/
|
||||
|
||||
/* This function copies a single captured substring into a given buffer,
|
||||
identifying it by name.
|
||||
|
||||
Arguments:
|
||||
code the compiled regex
|
||||
subject the subject string that was matched
|
||||
ovector pointer to the offsets table
|
||||
stringcount the number of substrings that were captured
|
||||
(i.e. the yield of the pcre_exec call, unless
|
||||
that was zero, in which case it should be 1/3
|
||||
of the offset table size)
|
||||
stringname the name of the required substring
|
||||
buffer where to put the substring
|
||||
size the size of the buffer
|
||||
|
||||
Returns: if successful:
|
||||
the length of the copied string, not including the zero
|
||||
that is put on the end; can be zero
|
||||
if not successful:
|
||||
PCRE_ERROR_NOMEMORY (-6) buffer too small
|
||||
PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
|
||||
*/
|
||||
|
||||
int
|
||||
pcre_copy_named_substring(const pcre *code, const char *subject, int *ovector,
|
||||
int stringcount, const char *stringname, char *buffer, int size)
|
||||
{
|
||||
int n = pcre_get_stringnumber(code, stringname);
|
||||
if (n <= 0) return n;
|
||||
return pcre_copy_substring(subject, ovector, stringcount, n, buffer, size);
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Copy all captured strings to new store *
|
||||
*************************************************/
|
||||
|
||||
/* This function gets one chunk of store and builds a list of pointers and all
|
||||
of the captured substrings in it. A NULL pointer is put on the end of the list.
|
||||
|
||||
Arguments:
|
||||
subject the subject string that was matched
|
||||
ovector pointer to the offsets table
|
||||
stringcount the number of substrings that were captured
|
||||
(i.e. the yield of the pcre_exec call, unless
|
||||
that was zero, in which case it should be 1/3
|
||||
of the offset table size)
|
||||
listptr set to point to the list of pointers
|
||||
|
||||
Returns: if successful: 0
|
||||
if not successful:
|
||||
PCRE_ERROR_NOMEMORY (-6) failed to get store
|
||||
*/
|
||||
|
||||
int
|
||||
pcre_get_substring_list(const char *subject, int *ovector, int stringcount,
|
||||
const char ***listptr)
|
||||
{
|
||||
int i;
|
||||
int size = sizeof(char *);
|
||||
int double_count = stringcount * 2;
|
||||
char **stringlist;
|
||||
char *p;
|
||||
|
||||
for (i = 0; i < double_count; i += 2)
|
||||
size += sizeof(char *) + ovector[i+1] - ovector[i] + 1;
|
||||
|
||||
stringlist = (char **)(pcre_malloc)(size);
|
||||
if (stringlist == NULL) return PCRE_ERROR_NOMEMORY;
|
||||
|
||||
*listptr = (const char **)stringlist;
|
||||
p = (char *)(stringlist + stringcount + 1);
|
||||
|
||||
for (i = 0; i < double_count; i += 2)
|
||||
{
|
||||
int len = ovector[i+1] - ovector[i];
|
||||
memcpy(p, subject + ovector[i], len);
|
||||
*stringlist++ = p;
|
||||
p += len;
|
||||
*p++ = 0;
|
||||
}
|
||||
|
||||
*stringlist = NULL;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Free store obtained by get_substring_list *
|
||||
*************************************************/
|
||||
|
||||
/* This function exists for the benefit of people calling PCRE from non-C
|
||||
programs that can call its functions, but not free() or (pcre_free)() directly.
|
||||
|
||||
Argument: the result of a previous pcre_get_substring_list()
|
||||
Returns: nothing
|
||||
*/
|
||||
|
||||
void
|
||||
pcre_free_substring_list(const char **pointer)
|
||||
{
|
||||
(pcre_free)((void *)pointer);
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Copy captured string to new store *
|
||||
*************************************************/
|
||||
|
||||
/* This function copies a single captured substring into a piece of new
|
||||
store
|
||||
|
||||
Arguments:
|
||||
subject the subject string that was matched
|
||||
ovector pointer to the offsets table
|
||||
stringcount the number of substrings that were captured
|
||||
(i.e. the yield of the pcre_exec call, unless
|
||||
that was zero, in which case it should be 1/3
|
||||
of the offset table size)
|
||||
stringnumber the number of the required substring
|
||||
stringptr where to put a pointer to the substring
|
||||
|
||||
Returns: if successful:
|
||||
the length of the string, not including the zero that
|
||||
is put on the end; can be zero
|
||||
if not successful:
|
||||
PCRE_ERROR_NOMEMORY (-6) failed to get store
|
||||
PCRE_ERROR_NOSUBSTRING (-7) substring not present
|
||||
*/
|
||||
|
||||
int
|
||||
pcre_get_substring(const char *subject, int *ovector, int stringcount,
|
||||
int stringnumber, const char **stringptr)
|
||||
{
|
||||
int yield;
|
||||
char *substring;
|
||||
if (stringnumber < 0 || stringnumber >= stringcount)
|
||||
return PCRE_ERROR_NOSUBSTRING;
|
||||
stringnumber *= 2;
|
||||
yield = ovector[stringnumber+1] - ovector[stringnumber];
|
||||
substring = (char *)(pcre_malloc)(yield + 1);
|
||||
if (substring == NULL) return PCRE_ERROR_NOMEMORY;
|
||||
memcpy(substring, subject + ovector[stringnumber], yield);
|
||||
substring[yield] = 0;
|
||||
*stringptr = substring;
|
||||
return yield;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Copy named captured string to new store *
|
||||
*************************************************/
|
||||
|
||||
/* This function copies a single captured substring, identified by name, into
|
||||
new store.
|
||||
|
||||
Arguments:
|
||||
code the compiled regex
|
||||
subject the subject string that was matched
|
||||
ovector pointer to the offsets table
|
||||
stringcount the number of substrings that were captured
|
||||
(i.e. the yield of the pcre_exec call, unless
|
||||
that was zero, in which case it should be 1/3
|
||||
of the offset table size)
|
||||
stringname the name of the required substring
|
||||
stringptr where to put the pointer
|
||||
|
||||
Returns: if successful:
|
||||
the length of the copied string, not including the zero
|
||||
that is put on the end; can be zero
|
||||
if not successful:
|
||||
PCRE_ERROR_NOMEMORY (-6) couldn't get memory
|
||||
PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
|
||||
*/
|
||||
|
||||
int
|
||||
pcre_get_named_substring(const pcre *code, const char *subject, int *ovector,
|
||||
int stringcount, const char *stringname, const char **stringptr)
|
||||
{
|
||||
int n = pcre_get_stringnumber(code, stringname);
|
||||
if (n <= 0) return n;
|
||||
return pcre_get_substring(subject, ovector, stringcount, n, stringptr);
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Free store obtained by get_substring *
|
||||
*************************************************/
|
||||
|
||||
/* This function exists for the benefit of people calling PCRE from non-C
|
||||
programs that can call its functions, but not free() or (pcre_free)() directly.
|
||||
|
||||
Argument: the result of a previous pcre_get_substring()
|
||||
Returns: nothing
|
||||
*/
|
||||
|
||||
void
|
||||
pcre_free_substring(const char *pointer)
|
||||
{
|
||||
(pcre_free)((void *)pointer);
|
||||
}
|
||||
|
||||
/* End of pcreget.c */
|
||||
72
harbour/source/hbpcre/pcreglob.c
Normal file
72
harbour/source/hbpcre/pcreglob.c
Normal file
@@ -0,0 +1,72 @@
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Copyright (c) 1997-2005 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
|
||||
/* This module contains global variables that are exported by the PCRE library.
|
||||
PCRE is thread-clean and doesn't use any global variables in the normal sense.
|
||||
However, it calls memory allocation and freeing functions via the four
|
||||
indirections below, and it can optionally do callouts, using the fifth
|
||||
indirection. These values can be changed by the caller, but are shared between
|
||||
all threads. However, when compiling for Virtual Pascal, things are done
|
||||
differently, and global variables are not used (see pcre.in). */
|
||||
|
||||
|
||||
#include "pcreinal.h"
|
||||
|
||||
|
||||
#ifndef VPCOMPAT
|
||||
#ifdef __cplusplus
|
||||
extern "C"
|
||||
{
|
||||
void *(*pcre_malloc)(size_t) = malloc;
|
||||
void (*pcre_free)(void *) = free;
|
||||
void *(*pcre_stack_malloc)(size_t) = malloc;
|
||||
void (*pcre_stack_free)(void *) = free;
|
||||
int (*pcre_callout)(pcre_callout_block *) = NULL;
|
||||
}
|
||||
#else
|
||||
void *(*pcre_malloc)(size_t) = malloc;
|
||||
void (*pcre_free)(void *) = free;
|
||||
void *(*pcre_stack_malloc)(size_t) = malloc;
|
||||
void (*pcre_stack_free)(void *) = free;
|
||||
int (*pcre_callout)(pcre_callout_block *) = NULL;
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/* End of pcreglob.c */
|
||||
912
harbour/source/hbpcre/pcreinal.h
Normal file
912
harbour/source/hbpcre/pcreinal.h
Normal file
@@ -0,0 +1,912 @@
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Copyright (c) 1997-2005 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
/* This header contains definitions that are shared between the different
|
||||
modules, but which are not relevant to the exported API. This includes some
|
||||
functions whose names all begin with "_pcre_". */
|
||||
|
||||
|
||||
/* Define DEBUG to get debugging output on stdout. */
|
||||
|
||||
/****
|
||||
#define DEBUG
|
||||
****/
|
||||
|
||||
/* Use a macro for debugging printing, 'cause that eliminates the use of #ifdef
|
||||
inline, and there are *still* stupid compilers about that don't like indented
|
||||
pre-processor statements, or at least there were when I first wrote this. After
|
||||
all, it had only been about 10 years then... */
|
||||
|
||||
#ifdef DEBUG
|
||||
#define DPRINTF(p) printf p
|
||||
#else
|
||||
#define DPRINTF(p) /*nothing*/
|
||||
#endif
|
||||
|
||||
|
||||
/* Get the definitions provided by running "configure" */
|
||||
|
||||
#include "config.h"
|
||||
|
||||
/* Standard C headers plus the external interface definition. The only time
|
||||
setjmp and stdarg are used is when NO_RECURSE is set. */
|
||||
|
||||
#include <ctype.h>
|
||||
#include <limits.h>
|
||||
#include <setjmp.h>
|
||||
#include <stdarg.h>
|
||||
#include <stddef.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#ifndef PCRE_SPY
|
||||
#define PCRE_DEFINITION /* Win32 __declspec(export) trigger for .dll */
|
||||
#endif
|
||||
|
||||
/* We need to have types that specify unsigned 16-bit and 32-bit integers. We
|
||||
cannot determine these outside the compilation (e.g. by running a program as
|
||||
part of "configure") because PCRE is often cross-compiled for use on other
|
||||
systems. Instead we make use of the maximum sizes that are available at
|
||||
preprocessor time in standard C environments. */
|
||||
|
||||
#if USHRT_MAX == 65535
|
||||
typedef unsigned short pcre_uint16;
|
||||
#elif UINT_MAX == 65535
|
||||
typedef unsigned int pcre_uint16;
|
||||
#else
|
||||
#error Cannot determine a type for 16-bit unsigned integers
|
||||
#endif
|
||||
|
||||
#if UINT_MAX == 4294967295
|
||||
typedef unsigned int pcre_uint32;
|
||||
#elif ULONG_MAX == 4294967295
|
||||
typedef unsigned long int pcre_uint32;
|
||||
#else
|
||||
#error Cannot determine a type for 32-bit unsigned integers
|
||||
#endif
|
||||
|
||||
/* All character handling must be done as unsigned characters. Otherwise there
|
||||
are problems with top-bit-set characters and functions such as isspace().
|
||||
However, we leave the interface to the outside world as char *, because that
|
||||
should make things easier for callers. We define a short type for unsigned char
|
||||
to save lots of typing. I tried "uchar", but it causes problems on Digital
|
||||
Unix, where it is defined in sys/types, so use "uschar" instead. */
|
||||
|
||||
typedef unsigned char uschar;
|
||||
|
||||
/* Include the public PCRE header */
|
||||
|
||||
#include "pcre.h"
|
||||
|
||||
/* Include the (copy of) the public ucp header, changing the external name into
|
||||
a private one. This does no harm, even if we aren't compiling UCP support. */
|
||||
|
||||
#define ucp_findchar _pcre_ucp_findchar
|
||||
#include "ucp.h"
|
||||
|
||||
/* When compiling for use with the Virtual Pascal compiler, these functions
|
||||
need to have their names changed. PCRE must be compiled with the -DVPCOMPAT
|
||||
option on the command line. */
|
||||
|
||||
#ifdef VPCOMPAT
|
||||
#define strncmp(s1,s2,m) _strncmp(s1,s2,m)
|
||||
#define memcpy(d,s,n) _memcpy(d,s,n)
|
||||
#define memmove(d,s,n) _memmove(d,s,n)
|
||||
#define memset(s,c,n) _memset(s,c,n)
|
||||
#else /* VPCOMPAT */
|
||||
|
||||
/* To cope with SunOS4 and other systems that lack memmove() but have bcopy(),
|
||||
define a macro for memmove() if HAVE_MEMMOVE is false, provided that HAVE_BCOPY
|
||||
is set. Otherwise, include an emulating function for those systems that have
|
||||
neither (there some non-Unix environments where this is the case). This assumes
|
||||
that all calls to memmove are moving strings upwards in store, which is the
|
||||
case in PCRE. */
|
||||
|
||||
#if ! HAVE_MEMMOVE
|
||||
#undef memmove /* some systems may have a macro */
|
||||
#if HAVE_BCOPY
|
||||
#define memmove(a, b, c) bcopy(b, a, c)
|
||||
#else /* HAVE_BCOPY */
|
||||
void *
|
||||
pcre_memmove(unsigned char *dest, const unsigned char *src, size_t n)
|
||||
{
|
||||
int i;
|
||||
dest += n;
|
||||
src += n;
|
||||
for (i = 0; i < n; ++i) *(--dest) = *(--src);
|
||||
}
|
||||
#define memmove(a, b, c) pcre_memmove(a, b, c)
|
||||
#endif /* not HAVE_BCOPY */
|
||||
#endif /* not HAVE_MEMMOVE */
|
||||
#endif /* not VPCOMPAT */
|
||||
|
||||
|
||||
/* PCRE keeps offsets in its compiled code as 2-byte quantities (always stored
|
||||
in big-endian order) by default. These are used, for example, to link from the
|
||||
start of a subpattern to its alternatives and its end. The use of 2 bytes per
|
||||
offset limits the size of the compiled regex to around 64K, which is big enough
|
||||
for almost everybody. However, I received a request for an even bigger limit.
|
||||
For this reason, and also to make the code easier to maintain, the storing and
|
||||
loading of offsets from the byte string is now handled by the macros that are
|
||||
defined here.
|
||||
|
||||
The macros are controlled by the value of LINK_SIZE. This defaults to 2 in
|
||||
the config.h file, but can be overridden by using -D on the command line. This
|
||||
is automated on Unix systems via the "configure" command. */
|
||||
|
||||
#if LINK_SIZE == 2
|
||||
|
||||
#define PUT(a,n,d) \
|
||||
(a[n] = (d) >> 8), \
|
||||
(a[(n)+1] = (d) & 255)
|
||||
|
||||
#define GET(a,n) \
|
||||
(((a)[n] << 8) | (a)[(n)+1])
|
||||
|
||||
#define MAX_PATTERN_SIZE (1 << 16)
|
||||
|
||||
|
||||
#elif LINK_SIZE == 3
|
||||
|
||||
#define PUT(a,n,d) \
|
||||
(a[n] = (d) >> 16), \
|
||||
(a[(n)+1] = (d) >> 8), \
|
||||
(a[(n)+2] = (d) & 255)
|
||||
|
||||
#define GET(a,n) \
|
||||
(((a)[n] << 16) | ((a)[(n)+1] << 8) | (a)[(n)+2])
|
||||
|
||||
#define MAX_PATTERN_SIZE (1 << 24)
|
||||
|
||||
|
||||
#elif LINK_SIZE == 4
|
||||
|
||||
#define PUT(a,n,d) \
|
||||
(a[n] = (d) >> 24), \
|
||||
(a[(n)+1] = (d) >> 16), \
|
||||
(a[(n)+2] = (d) >> 8), \
|
||||
(a[(n)+3] = (d) & 255)
|
||||
|
||||
#define GET(a,n) \
|
||||
(((a)[n] << 24) | ((a)[(n)+1] << 16) | ((a)[(n)+2] << 8) | (a)[(n)+3])
|
||||
|
||||
#define MAX_PATTERN_SIZE (1 << 30) /* Keep it positive */
|
||||
|
||||
|
||||
#else
|
||||
#error LINK_SIZE must be either 2, 3, or 4
|
||||
#endif
|
||||
|
||||
|
||||
/* Convenience macro defined in terms of the others */
|
||||
|
||||
#define PUTINC(a,n,d) PUT(a,n,d), a += LINK_SIZE
|
||||
|
||||
|
||||
/* PCRE uses some other 2-byte quantities that do not change when the size of
|
||||
offsets changes. There are used for repeat counts and for other things such as
|
||||
capturing parenthesis numbers in back references. */
|
||||
|
||||
#define PUT2(a,n,d) \
|
||||
a[n] = (d) >> 8; \
|
||||
a[(n)+1] = (d) & 255
|
||||
|
||||
#define GET2(a,n) \
|
||||
(((a)[n] << 8) | (a)[(n)+1])
|
||||
|
||||
#define PUT2INC(a,n,d) PUT2(a,n,d), a += 2
|
||||
|
||||
|
||||
/* When UTF-8 encoding is being used, a character is no longer just a single
|
||||
byte. The macros for character handling generate simple sequences when used in
|
||||
byte-mode, and more complicated ones for UTF-8 characters. */
|
||||
|
||||
#ifndef SUPPORT_UTF8
|
||||
#define GETCHAR(c, eptr) c = *eptr;
|
||||
#define GETCHARTEST(c, eptr) c = *eptr;
|
||||
#define GETCHARINC(c, eptr) c = *eptr++;
|
||||
#define GETCHARINCTEST(c, eptr) c = *eptr++;
|
||||
#define GETCHARLEN(c, eptr, len) c = *eptr;
|
||||
#define BACKCHAR(eptr)
|
||||
|
||||
#else /* SUPPORT_UTF8 */
|
||||
|
||||
/* Get the next UTF-8 character, not advancing the pointer. This is called when
|
||||
we know we are in UTF-8 mode. */
|
||||
|
||||
#define GETCHAR(c, eptr) \
|
||||
c = *eptr; \
|
||||
if ((c & 0xc0) == 0xc0) \
|
||||
{ \
|
||||
int gcii; \
|
||||
int gcaa = _pcre_utf8_table4[c & 0x3f]; /* Number of additional bytes */ \
|
||||
int gcss = 6*gcaa; \
|
||||
c = (c & _pcre_utf8_table3[gcaa]) << gcss; \
|
||||
for (gcii = 1; gcii <= gcaa; gcii++) \
|
||||
{ \
|
||||
gcss -= 6; \
|
||||
c |= (eptr[gcii] & 0x3f) << gcss; \
|
||||
} \
|
||||
}
|
||||
|
||||
/* Get the next UTF-8 character, testing for UTF-8 mode, and not advancing the
|
||||
pointer. */
|
||||
|
||||
#define GETCHARTEST(c, eptr) \
|
||||
c = *eptr; \
|
||||
if (utf8 && (c & 0xc0) == 0xc0) \
|
||||
{ \
|
||||
int gcii; \
|
||||
int gcaa = _pcre_utf8_table4[c & 0x3f]; /* Number of additional bytes */ \
|
||||
int gcss = 6*gcaa; \
|
||||
c = (c & _pcre_utf8_table3[gcaa]) << gcss; \
|
||||
for (gcii = 1; gcii <= gcaa; gcii++) \
|
||||
{ \
|
||||
gcss -= 6; \
|
||||
c |= (eptr[gcii] & 0x3f) << gcss; \
|
||||
} \
|
||||
}
|
||||
|
||||
/* Get the next UTF-8 character, advancing the pointer. This is called when we
|
||||
know we are in UTF-8 mode. */
|
||||
|
||||
#define GETCHARINC(c, eptr) \
|
||||
c = *eptr++; \
|
||||
if ((c & 0xc0) == 0xc0) \
|
||||
{ \
|
||||
int gcaa = _pcre_utf8_table4[c & 0x3f]; /* Number of additional bytes */ \
|
||||
int gcss = 6*gcaa; \
|
||||
c = (c & _pcre_utf8_table3[gcaa]) << gcss; \
|
||||
while (gcaa-- > 0) \
|
||||
{ \
|
||||
gcss -= 6; \
|
||||
c |= (*eptr++ & 0x3f) << gcss; \
|
||||
} \
|
||||
}
|
||||
|
||||
/* Get the next character, testing for UTF-8 mode, and advancing the pointer */
|
||||
|
||||
#define GETCHARINCTEST(c, eptr) \
|
||||
c = *eptr++; \
|
||||
if (utf8 && (c & 0xc0) == 0xc0) \
|
||||
{ \
|
||||
int gcaa = _pcre_utf8_table4[c & 0x3f]; /* Number of additional bytes */ \
|
||||
int gcss = 6*gcaa; \
|
||||
c = (c & _pcre_utf8_table3[gcaa]) << gcss; \
|
||||
while (gcaa-- > 0) \
|
||||
{ \
|
||||
gcss -= 6; \
|
||||
c |= (*eptr++ & 0x3f) << gcss; \
|
||||
} \
|
||||
}
|
||||
|
||||
/* Get the next UTF-8 character, not advancing the pointer, incrementing length
|
||||
if there are extra bytes. This is called when we know we are in UTF-8 mode. */
|
||||
|
||||
#define GETCHARLEN(c, eptr, len) \
|
||||
c = *eptr; \
|
||||
if ((c & 0xc0) == 0xc0) \
|
||||
{ \
|
||||
int gcii; \
|
||||
int gcaa = _pcre_utf8_table4[c & 0x3f]; /* Number of additional bytes */ \
|
||||
int gcss = 6*gcaa; \
|
||||
c = (c & _pcre_utf8_table3[gcaa]) << gcss; \
|
||||
for (gcii = 1; gcii <= gcaa; gcii++) \
|
||||
{ \
|
||||
gcss -= 6; \
|
||||
c |= (eptr[gcii] & 0x3f) << gcss; \
|
||||
} \
|
||||
len += gcaa; \
|
||||
}
|
||||
|
||||
/* If the pointer is not at the start of a character, move it back until
|
||||
it is. Called only in UTF-8 mode. */
|
||||
|
||||
#define BACKCHAR(eptr) while((*eptr & 0xc0) == 0x80) eptr--;
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
/* In case there is no definition of offsetof() provided - though any proper
|
||||
Standard C system should have one. */
|
||||
|
||||
#ifndef offsetof
|
||||
#define offsetof(p_type,field) ((size_t)&(((p_type *)0)->field))
|
||||
#endif
|
||||
|
||||
|
||||
/* These are the public options that can change during matching. */
|
||||
|
||||
#define PCRE_IMS (PCRE_CASELESS|PCRE_MULTILINE|PCRE_DOTALL)
|
||||
|
||||
/* Private options flags start at the most significant end of the four bytes,
|
||||
but skip the top bit so we can use ints for convenience without getting tangled
|
||||
with negative values. The public options defined in pcre.h start at the least
|
||||
significant end. Make sure they don't overlap! */
|
||||
|
||||
#define PCRE_FIRSTSET 0x40000000 /* first_byte is set */
|
||||
#define PCRE_REQCHSET 0x20000000 /* req_byte is set */
|
||||
#define PCRE_STARTLINE 0x10000000 /* start after \n for multiline */
|
||||
#define PCRE_ICHANGED 0x08000000 /* i option changes within regex */
|
||||
#define PCRE_NOPARTIAL 0x04000000 /* can't use partial with this regex */
|
||||
|
||||
/* Options for the "extra" block produced by pcre_study(). */
|
||||
|
||||
#define PCRE_STUDY_MAPPED 0x01 /* a map of starting chars exists */
|
||||
|
||||
/* Masks for identifying the public options that are permitted at compile
|
||||
time, run time, or study time, respectively. */
|
||||
|
||||
#define PUBLIC_OPTIONS \
|
||||
(PCRE_CASELESS|PCRE_EXTENDED|PCRE_ANCHORED|PCRE_MULTILINE| \
|
||||
PCRE_DOTALL|PCRE_DOLLAR_ENDONLY|PCRE_EXTRA|PCRE_UNGREEDY|PCRE_UTF8| \
|
||||
PCRE_NO_AUTO_CAPTURE|PCRE_NO_UTF8_CHECK|PCRE_AUTO_CALLOUT|PCRE_FIRSTLINE)
|
||||
|
||||
#define PUBLIC_EXEC_OPTIONS \
|
||||
(PCRE_ANCHORED|PCRE_NOTBOL|PCRE_NOTEOL|PCRE_NOTEMPTY|PCRE_NO_UTF8_CHECK| \
|
||||
PCRE_PARTIAL)
|
||||
|
||||
#define PUBLIC_DFA_EXEC_OPTIONS \
|
||||
(PCRE_ANCHORED|PCRE_NOTBOL|PCRE_NOTEOL|PCRE_NOTEMPTY|PCRE_NO_UTF8_CHECK| \
|
||||
PCRE_PARTIAL|PCRE_DFA_SHORTEST|PCRE_DFA_RESTART)
|
||||
|
||||
#define PUBLIC_STUDY_OPTIONS 0 /* None defined */
|
||||
|
||||
/* Magic number to provide a small check against being handed junk. Also used
|
||||
to detect whether a pattern was compiled on a host of different endianness. */
|
||||
|
||||
#define MAGIC_NUMBER 0x50435245UL /* 'PCRE' */
|
||||
|
||||
/* Negative values for the firstchar and reqchar variables */
|
||||
|
||||
#define REQ_UNSET (-2)
|
||||
#define REQ_NONE (-1)
|
||||
|
||||
/* The maximum remaining length of subject we are prepared to search for a
|
||||
req_byte match. */
|
||||
|
||||
#define REQ_BYTE_MAX 1000
|
||||
|
||||
/* Flags added to firstbyte or reqbyte; a "non-literal" item is either a
|
||||
variable-length repeat, or a anything other than literal characters. */
|
||||
|
||||
#define REQ_CASELESS 0x0100 /* indicates caselessness */
|
||||
#define REQ_VARY 0x0200 /* reqbyte followed non-literal item */
|
||||
|
||||
/* Miscellaneous definitions */
|
||||
|
||||
/* 30/08/2005 - <maurilio.longo@libero.it>
|
||||
Use xharbour typedefs if they exist, ie, this file
|
||||
has been included after xharbour ones
|
||||
*/
|
||||
#ifndef FALSE
|
||||
typedef int BOOL;
|
||||
|
||||
#define FALSE 0
|
||||
#define TRUE 1
|
||||
#endif
|
||||
|
||||
|
||||
/* Escape items that are just an encoding of a particular data value. Note that
|
||||
ESC_n is defined as yet another macro, which is set in config.h to either \n
|
||||
(the default) or \r (which some people want). */
|
||||
|
||||
#ifndef ESC_e
|
||||
#define ESC_e 27
|
||||
#endif
|
||||
|
||||
#ifndef ESC_f
|
||||
#define ESC_f '\f'
|
||||
#endif
|
||||
|
||||
#ifndef ESC_n
|
||||
#define ESC_n NEWLINE
|
||||
#endif
|
||||
|
||||
#ifndef ESC_r
|
||||
#define ESC_r '\r'
|
||||
#endif
|
||||
|
||||
/* We can't officially use ESC_t because it is a POSIX reserved identifier
|
||||
(presumably because of all the others like size_t). */
|
||||
|
||||
#ifndef ESC_tee
|
||||
#define ESC_tee '\t'
|
||||
#endif
|
||||
|
||||
/* These are escaped items that aren't just an encoding of a particular data
|
||||
value such as \n. They must have non-zero values, as check_escape() returns
|
||||
their negation. Also, they must appear in the same order as in the opcode
|
||||
definitions below, up to ESC_z. There's a dummy for OP_ANY because it
|
||||
corresponds to "." rather than an escape sequence. The final one must be
|
||||
ESC_REF as subsequent values are used for \1, \2, \3, etc. There is are two
|
||||
tests in the code for an escape greater than ESC_b and less than ESC_Z to
|
||||
detect the types that may be repeated. These are the types that consume
|
||||
characters. If any new escapes are put in between that don't consume a
|
||||
character, that code will have to change. */
|
||||
|
||||
enum { ESC_A = 1, ESC_G, ESC_B, ESC_b, ESC_D, ESC_d, ESC_S, ESC_s, ESC_W,
|
||||
ESC_w, ESC_dum1, ESC_C, ESC_P, ESC_p, ESC_X, ESC_Z, ESC_z, ESC_E,
|
||||
ESC_Q, ESC_REF };
|
||||
|
||||
/* Flag bits and data types for the extended class (OP_XCLASS) for classes that
|
||||
contain UTF-8 characters with values greater than 255. */
|
||||
|
||||
#define XCL_NOT 0x01 /* Flag: this is a negative class */
|
||||
#define XCL_MAP 0x02 /* Flag: a 32-byte map is present */
|
||||
|
||||
#define XCL_END 0 /* Marks end of individual items */
|
||||
#define XCL_SINGLE 1 /* Single item (one multibyte char) follows */
|
||||
#define XCL_RANGE 2 /* A range (two multibyte chars) follows */
|
||||
#define XCL_PROP 3 /* Unicode property (one property code) follows */
|
||||
#define XCL_NOTPROP 4 /* Unicode inverted property (ditto) */
|
||||
|
||||
|
||||
/* Opcode table: OP_BRA must be last, as all values >= it are used for brackets
|
||||
that extract substrings. Starting from 1 (i.e. after OP_END), the values up to
|
||||
OP_EOD must correspond in order to the list of escapes immediately above.
|
||||
Note that whenever this list is updated, the two macro definitions that follow
|
||||
must also be updated to match. */
|
||||
|
||||
enum {
|
||||
OP_END, /* 0 End of pattern */
|
||||
|
||||
/* Values corresponding to backslashed metacharacters */
|
||||
|
||||
OP_SOD, /* 1 Start of data: \A */
|
||||
OP_SOM, /* 2 Start of match (subject + offset): \G */
|
||||
OP_NOT_WORD_BOUNDARY, /* 3 \B */
|
||||
OP_WORD_BOUNDARY, /* 4 \b */
|
||||
OP_NOT_DIGIT, /* 5 \D */
|
||||
OP_DIGIT, /* 6 \d */
|
||||
OP_NOT_WHITESPACE, /* 7 \S */
|
||||
OP_WHITESPACE, /* 8 \s */
|
||||
OP_NOT_WORDCHAR, /* 9 \W */
|
||||
OP_WORDCHAR, /* 10 \w */
|
||||
OP_ANY, /* 11 Match any character */
|
||||
OP_ANYBYTE, /* 12 Match any byte (\C); different to OP_ANY for UTF-8 */
|
||||
OP_NOTPROP, /* 13 \P (not Unicode property) */
|
||||
OP_PROP, /* 14 \p (Unicode property) */
|
||||
OP_EXTUNI, /* 15 \X (extended Unicode sequence */
|
||||
OP_EODN, /* 16 End of data or \n at end of data: \Z. */
|
||||
OP_EOD, /* 17 End of data: \z */
|
||||
|
||||
OP_OPT, /* 18 Set runtime options */
|
||||
OP_CIRC, /* 19 Start of line - varies with multiline switch */
|
||||
OP_DOLL, /* 20 End of line - varies with multiline switch */
|
||||
OP_CHAR, /* 21 Match one character, casefully */
|
||||
OP_CHARNC, /* 22 Match one character, caselessly */
|
||||
OP_NOT, /* 23 Match anything but the following char */
|
||||
|
||||
OP_STAR, /* 24 The maximizing and minimizing versions of */
|
||||
OP_MINSTAR, /* 25 all these opcodes must come in pairs, with */
|
||||
OP_PLUS, /* 26 the minimizing one second. */
|
||||
OP_MINPLUS, /* 27 This first set applies to single characters */
|
||||
OP_QUERY, /* 28 */
|
||||
OP_MINQUERY, /* 29 */
|
||||
OP_UPTO, /* 30 From 0 to n matches */
|
||||
OP_MINUPTO, /* 31 */
|
||||
OP_EXACT, /* 32 Exactly n matches */
|
||||
|
||||
OP_NOTSTAR, /* 33 The maximizing and minimizing versions of */
|
||||
OP_NOTMINSTAR, /* 34 all these opcodes must come in pairs, with */
|
||||
OP_NOTPLUS, /* 35 the minimizing one second. */
|
||||
OP_NOTMINPLUS, /* 36 This set applies to "not" single characters */
|
||||
OP_NOTQUERY, /* 37 */
|
||||
OP_NOTMINQUERY, /* 38 */
|
||||
OP_NOTUPTO, /* 39 From 0 to n matches */
|
||||
OP_NOTMINUPTO, /* 40 */
|
||||
OP_NOTEXACT, /* 41 Exactly n matches */
|
||||
|
||||
OP_TYPESTAR, /* 42 The maximizing and minimizing versions of */
|
||||
OP_TYPEMINSTAR, /* 43 all these opcodes must come in pairs, with */
|
||||
OP_TYPEPLUS, /* 44 the minimizing one second. These codes must */
|
||||
OP_TYPEMINPLUS, /* 45 be in exactly the same order as those above. */
|
||||
OP_TYPEQUERY, /* 46 This set applies to character types such as \d */
|
||||
OP_TYPEMINQUERY, /* 47 */
|
||||
OP_TYPEUPTO, /* 48 From 0 to n matches */
|
||||
OP_TYPEMINUPTO, /* 49 */
|
||||
OP_TYPEEXACT, /* 50 Exactly n matches */
|
||||
|
||||
OP_CRSTAR, /* 51 The maximizing and minimizing versions of */
|
||||
OP_CRMINSTAR, /* 52 all these opcodes must come in pairs, with */
|
||||
OP_CRPLUS, /* 53 the minimizing one second. These codes must */
|
||||
OP_CRMINPLUS, /* 54 be in exactly the same order as those above. */
|
||||
OP_CRQUERY, /* 55 These are for character classes and back refs */
|
||||
OP_CRMINQUERY, /* 56 */
|
||||
OP_CRRANGE, /* 57 These are different to the three sets above. */
|
||||
OP_CRMINRANGE, /* 58 */
|
||||
|
||||
OP_CLASS, /* 59 Match a character class, chars < 256 only */
|
||||
OP_NCLASS, /* 60 Same, but the bitmap was created from a negative
|
||||
class - the difference is relevant only when a UTF-8
|
||||
character > 255 is encountered. */
|
||||
|
||||
OP_XCLASS, /* 61 Extended class for handling UTF-8 chars within the
|
||||
class. This does both positive and negative. */
|
||||
|
||||
OP_REF, /* 62 Match a back reference */
|
||||
OP_RECURSE, /* 63 Match a numbered subpattern (possibly recursive) */
|
||||
OP_CALLOUT, /* 64 Call out to external function if provided */
|
||||
|
||||
OP_ALT, /* 65 Start of alternation */
|
||||
OP_KET, /* 66 End of group that doesn't have an unbounded repeat */
|
||||
OP_KETRMAX, /* 67 These two must remain together and in this */
|
||||
OP_KETRMIN, /* 68 order. They are for groups the repeat for ever. */
|
||||
|
||||
/* The assertions must come before ONCE and COND */
|
||||
|
||||
OP_ASSERT, /* 69 Positive lookahead */
|
||||
OP_ASSERT_NOT, /* 70 Negative lookahead */
|
||||
OP_ASSERTBACK, /* 71 Positive lookbehind */
|
||||
OP_ASSERTBACK_NOT, /* 72 Negative lookbehind */
|
||||
OP_REVERSE, /* 73 Move pointer back - used in lookbehind assertions */
|
||||
|
||||
/* ONCE and COND must come after the assertions, with ONCE first, as there's
|
||||
a test for >= ONCE for a subpattern that isn't an assertion. */
|
||||
|
||||
OP_ONCE, /* 74 Once matched, don't back up into the subpattern */
|
||||
OP_COND, /* 75 Conditional group */
|
||||
OP_CREF, /* 76 Used to hold an extraction string number (cond ref) */
|
||||
|
||||
OP_BRAZERO, /* 77 These two must remain together and in this */
|
||||
OP_BRAMINZERO, /* 78 order. */
|
||||
|
||||
OP_BRANUMBER, /* 79 Used for extracting brackets whose number is greater
|
||||
than can fit into an opcode. */
|
||||
|
||||
OP_BRA /* 80 This and greater values are used for brackets that
|
||||
extract substrings up to EXTRACT_BASIC_MAX. After
|
||||
that, use is made of OP_BRANUMBER. */
|
||||
};
|
||||
|
||||
/* WARNING WARNING WARNING: There is an implicit assumption in pcre.c and
|
||||
study.c that all opcodes are less than 128 in value. This makes handling UTF-8
|
||||
character sequences easier. */
|
||||
|
||||
/* The highest extraction number before we have to start using additional
|
||||
bytes. (Originally PCRE didn't have support for extraction counts highter than
|
||||
this number.) The value is limited by the number of opcodes left after OP_BRA,
|
||||
i.e. 255 - OP_BRA. We actually set it a bit lower to leave room for additional
|
||||
opcodes. */
|
||||
|
||||
#define EXTRACT_BASIC_MAX 100
|
||||
|
||||
|
||||
/* This macro defines textual names for all the opcodes. These are used only
|
||||
for debugging. The macro is referenced only in pcreprni.c. */
|
||||
|
||||
#define OP_NAME_LIST \
|
||||
"End", "\\A", "\\G", "\\B", "\\b", "\\D", "\\d", \
|
||||
"\\S", "\\s", "\\W", "\\w", "Any", "Anybyte", \
|
||||
"notprop", "prop", "extuni", \
|
||||
"\\Z", "\\z", \
|
||||
"Opt", "^", "$", "char", "charnc", "not", \
|
||||
"*", "*?", "+", "+?", "?", "??", "{", "{", "{", \
|
||||
"*", "*?", "+", "+?", "?", "??", "{", "{", "{", \
|
||||
"*", "*?", "+", "+?", "?", "??", "{", "{", "{", \
|
||||
"*", "*?", "+", "+?", "?", "??", "{", "{", \
|
||||
"class", "nclass", "xclass", "Ref", "Recurse", "Callout", \
|
||||
"Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not", \
|
||||
"AssertB", "AssertB not", "Reverse", "Once", "Cond", "Cond ref",\
|
||||
"Brazero", "Braminzero", "Branumber", "Bra"
|
||||
|
||||
|
||||
/* This macro defines the length of fixed length operations in the compiled
|
||||
regex. The lengths are used when searching for specific things, and also in the
|
||||
debugging printing of a compiled regex. We use a macro so that it can be
|
||||
defined close to the definitions of the opcodes themselves.
|
||||
|
||||
As things have been extended, some of these are no longer fixed lenths, but are
|
||||
minima instead. For example, the length of a single-character repeat may vary
|
||||
in UTF-8 mode. The code that uses this table must know about such things. */
|
||||
|
||||
#define OP_LENGTHS \
|
||||
1, /* End */ \
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* \A, \G, \B, \B, \D, \d, \S, \s, \W, \w */ \
|
||||
1, 1, /* Any, Anybyte */ \
|
||||
2, 2, 1, /* NOTPROP, PROP, EXTUNI */ \
|
||||
1, 1, 2, 1, 1, /* \Z, \z, Opt, ^, $ */ \
|
||||
2, /* Char - the minimum length */ \
|
||||
2, /* Charnc - the minimum length */ \
|
||||
2, /* not */ \
|
||||
/* Positive single-char repeats ** These are */ \
|
||||
2, 2, 2, 2, 2, 2, /* *, *?, +, +?, ?, ?? ** minima in */ \
|
||||
4, 4, 4, /* upto, minupto, exact ** UTF-8 mode */ \
|
||||
/* Negative single-char repeats - only for chars < 256 */ \
|
||||
2, 2, 2, 2, 2, 2, /* NOT *, *?, +, +?, ?, ?? */ \
|
||||
4, 4, 4, /* NOT upto, minupto, exact */ \
|
||||
/* Positive type repeats */ \
|
||||
2, 2, 2, 2, 2, 2, /* Type *, *?, +, +?, ?, ?? */ \
|
||||
4, 4, 4, /* Type upto, minupto, exact */ \
|
||||
/* Character class & ref repeats */ \
|
||||
1, 1, 1, 1, 1, 1, /* *, *?, +, +?, ?, ?? */ \
|
||||
5, 5, /* CRRANGE, CRMINRANGE */ \
|
||||
33, /* CLASS */ \
|
||||
33, /* NCLASS */ \
|
||||
0, /* XCLASS - variable length */ \
|
||||
3, /* REF */ \
|
||||
1+LINK_SIZE, /* RECURSE */ \
|
||||
2+2*LINK_SIZE, /* CALLOUT */ \
|
||||
1+LINK_SIZE, /* Alt */ \
|
||||
1+LINK_SIZE, /* Ket */ \
|
||||
1+LINK_SIZE, /* KetRmax */ \
|
||||
1+LINK_SIZE, /* KetRmin */ \
|
||||
1+LINK_SIZE, /* Assert */ \
|
||||
1+LINK_SIZE, /* Assert not */ \
|
||||
1+LINK_SIZE, /* Assert behind */ \
|
||||
1+LINK_SIZE, /* Assert behind not */ \
|
||||
1+LINK_SIZE, /* Reverse */ \
|
||||
1+LINK_SIZE, /* Once */ \
|
||||
1+LINK_SIZE, /* COND */ \
|
||||
3, /* CREF */ \
|
||||
1, 1, /* BRAZERO, BRAMINZERO */ \
|
||||
3, /* BRANUMBER */ \
|
||||
1+LINK_SIZE /* BRA */ \
|
||||
|
||||
|
||||
/* A magic value for OP_CREF to indicate the "in recursion" condition. */
|
||||
|
||||
#define CREF_RECURSE 0xffff
|
||||
|
||||
/* Error code numbers. They are given names so that they can more easily be
|
||||
tracked. */
|
||||
|
||||
enum { ERR0, ERR1, ERR2, ERR3, ERR4, ERR5, ERR6, ERR7, ERR8, ERR9,
|
||||
ERR10, ERR11, ERR12, ERR13, ERR14, ERR15, ERR16, ERR17, ERR18, ERR19,
|
||||
ERR20, ERR21, ERR22, ERR23, ERR24, ERR25, ERR26, ERR27, ERR28, ERR29,
|
||||
ERR30, ERR31, ERR32, ERR33, ERR34, ERR35, ERR36, ERR37, ERR38, ERR39,
|
||||
ERR40, ERR41, ERR42, ERR43, ERR44, ERR45, ERR46, ERR47 };
|
||||
|
||||
/* The real format of the start of the pcre block; the index of names and the
|
||||
code vector run on as long as necessary after the end. We store an explicit
|
||||
offset to the name table so that if a regex is compiled on one host, saved, and
|
||||
then run on another where the size of pointers is different, all might still
|
||||
be well. For the case of compiled-on-4 and run-on-8, we include an extra
|
||||
pointer that is always NULL. For future-proofing, a few dummy fields were
|
||||
originally included - even though you can never get this planning right - but
|
||||
there is only one left now.
|
||||
|
||||
NOTE NOTE NOTE:
|
||||
Because people can now save and re-use compiled patterns, any additions to this
|
||||
structure should be made at the end, and something earlier (e.g. a new
|
||||
flag in the options or one of the dummy fields) should indicate that the new
|
||||
fields are present. Currently PCRE always sets the dummy fields to zero.
|
||||
NOTE NOTE NOTE:
|
||||
*/
|
||||
|
||||
typedef struct real_pcre {
|
||||
pcre_uint32 magic_number;
|
||||
pcre_uint32 size; /* Total that was malloced */
|
||||
pcre_uint32 options;
|
||||
pcre_uint32 dummy1; /* For future use, maybe */
|
||||
|
||||
pcre_uint16 top_bracket;
|
||||
pcre_uint16 top_backref;
|
||||
pcre_uint16 first_byte;
|
||||
pcre_uint16 req_byte;
|
||||
pcre_uint16 name_table_offset; /* Offset to name table that follows */
|
||||
pcre_uint16 name_entry_size; /* Size of any name items */
|
||||
pcre_uint16 name_count; /* Number of name items */
|
||||
pcre_uint16 ref_count; /* Reference count */
|
||||
|
||||
const unsigned char *tables; /* Pointer to tables or NULL for std */
|
||||
const unsigned char *nullpad; /* NULL padding */
|
||||
} real_pcre;
|
||||
|
||||
/* The format of the block used to store data from pcre_study(). The same
|
||||
remark (see NOTE above) about extending this structure applies. */
|
||||
|
||||
typedef struct pcre_study_data {
|
||||
pcre_uint32 size; /* Total that was malloced */
|
||||
pcre_uint32 options;
|
||||
uschar start_bits[32];
|
||||
} pcre_study_data;
|
||||
|
||||
/* Structure for passing "static" information around between the functions
|
||||
doing the compiling, so that they are thread-safe. */
|
||||
|
||||
typedef struct compile_data {
|
||||
const uschar *lcc; /* Points to lower casing table */
|
||||
const uschar *fcc; /* Points to case-flipping table */
|
||||
const uschar *cbits; /* Points to character type table */
|
||||
const uschar *ctypes; /* Points to table of type maps */
|
||||
const uschar *start_code; /* The start of the compiled code */
|
||||
const uschar *start_pattern; /* The start of the pattern */
|
||||
uschar *name_table; /* The name/number table */
|
||||
int names_found; /* Number of entries so far */
|
||||
int name_entry_size; /* Size of each entry */
|
||||
int top_backref; /* Maximum back reference */
|
||||
unsigned int backref_map; /* Bitmap of low back refs */
|
||||
int req_varyopt; /* "After variable item" flag for reqbyte */
|
||||
BOOL nopartial; /* Set TRUE if partial won't work */
|
||||
} compile_data;
|
||||
|
||||
/* Structure for maintaining a chain of pointers to the currently incomplete
|
||||
branches, for testing for left recursion. */
|
||||
|
||||
typedef struct branch_chain {
|
||||
struct branch_chain *outer;
|
||||
uschar *current;
|
||||
} branch_chain;
|
||||
|
||||
/* Structure for items in a linked list that represents an explicit recursive
|
||||
call within the pattern. */
|
||||
|
||||
typedef struct recursion_info {
|
||||
struct recursion_info *prevrec; /* Previous recursion record (or NULL) */
|
||||
int group_num; /* Number of group that was called */
|
||||
const uschar *after_call; /* "Return value": points after the call in the expr */
|
||||
const uschar *save_start; /* Old value of md->start_match */
|
||||
int *offset_save; /* Pointer to start of saved offsets */
|
||||
int saved_max; /* Number of saved offsets */
|
||||
} recursion_info;
|
||||
|
||||
/* When compiling in a mode that doesn't use recursive calls to match(),
|
||||
a structure is used to remember local variables on the heap. It is defined in
|
||||
pcre.c, close to the match() function, so that it is easy to keep it in step
|
||||
with any changes of local variable. However, the pointer to the current frame
|
||||
must be saved in some "static" place over a longjmp(). We declare the
|
||||
structure here so that we can put a pointer in the match_data structure.
|
||||
NOTE: This isn't used for a "normal" compilation of pcre. */
|
||||
|
||||
struct heapframe;
|
||||
|
||||
/* Structure for passing "static" information around between the functions
|
||||
doing traditional NFA matching, so that they are thread-safe. */
|
||||
|
||||
typedef struct match_data {
|
||||
unsigned long int match_call_count; /* As it says */
|
||||
unsigned long int match_limit;/* As it says */
|
||||
int *offset_vector; /* Offset vector */
|
||||
int offset_end; /* One past the end */
|
||||
int offset_max; /* The maximum usable for return data */
|
||||
const uschar *lcc; /* Points to lower casing table */
|
||||
const uschar *ctypes; /* Points to table of type maps */
|
||||
BOOL offset_overflow; /* Set if too many extractions */
|
||||
BOOL notbol; /* NOTBOL flag */
|
||||
BOOL noteol; /* NOTEOL flag */
|
||||
BOOL utf8; /* UTF8 flag */
|
||||
BOOL endonly; /* Dollar not before final \n */
|
||||
BOOL notempty; /* Empty string match not wanted */
|
||||
BOOL partial; /* PARTIAL flag */
|
||||
BOOL hitend; /* Hit the end of the subject at some point */
|
||||
const uschar *start_code; /* For use when recursing */
|
||||
const uschar *start_subject; /* Start of the subject string */
|
||||
const uschar *end_subject; /* End of the subject string */
|
||||
const uschar *start_match; /* Start of this match attempt */
|
||||
const uschar *end_match_ptr; /* Subject position at end match */
|
||||
int end_offset_top; /* Highwater mark at end of match */
|
||||
int capture_last; /* Most recent capture number */
|
||||
int start_offset; /* The start offset value */
|
||||
recursion_info *recursive; /* Linked list of recursion data */
|
||||
void *callout_data; /* To pass back to callouts */
|
||||
struct heapframe *thisframe; /* Used only when compiling for no recursion */
|
||||
} match_data;
|
||||
|
||||
/* A similar structure is used for the same purpose by the DFA matching
|
||||
functions. */
|
||||
|
||||
typedef struct dfa_match_data {
|
||||
const uschar *start_code; /* Start of the compiled pattern */
|
||||
const uschar *start_subject; /* Start of the subject string */
|
||||
const uschar *end_subject; /* End of subject string */
|
||||
const uschar *tables; /* Character tables */
|
||||
int moptions; /* Match options */
|
||||
int poptions; /* Pattern options */
|
||||
void *callout_data; /* To pass back to callouts */
|
||||
} dfa_match_data;
|
||||
|
||||
/* Bit definitions for entries in the pcre_ctypes table. */
|
||||
|
||||
#define ctype_space 0x01
|
||||
#define ctype_letter 0x02
|
||||
#define ctype_digit 0x04
|
||||
#define ctype_xdigit 0x08
|
||||
#define ctype_word 0x10 /* alphameric or '_' */
|
||||
#define ctype_meta 0x80 /* regexp meta char or zero (end pattern) */
|
||||
|
||||
/* Offsets for the bitmap tables in pcre_cbits. Each table contains a set
|
||||
of bits for a class map. Some classes are built by combining these tables. */
|
||||
|
||||
#define cbit_space 0 /* [:space:] or \s */
|
||||
#define cbit_xdigit 32 /* [:xdigit:] */
|
||||
#define cbit_digit 64 /* [:digit:] or \d */
|
||||
#define cbit_upper 96 /* [:upper:] */
|
||||
#define cbit_lower 128 /* [:lower:] */
|
||||
#define cbit_word 160 /* [:word:] or \w */
|
||||
#define cbit_graph 192 /* [:graph:] */
|
||||
#define cbit_print 224 /* [:print:] */
|
||||
#define cbit_punct 256 /* [:punct:] */
|
||||
#define cbit_cntrl 288 /* [:cntrl:] */
|
||||
#define cbit_length 320 /* Length of the cbits table */
|
||||
|
||||
/* Offsets of the various tables from the base tables pointer, and
|
||||
total length. */
|
||||
|
||||
#define lcc_offset 0
|
||||
#define fcc_offset 256
|
||||
#define cbits_offset 512
|
||||
#define ctypes_offset (cbits_offset + cbit_length)
|
||||
#define tables_length (ctypes_offset + 256)
|
||||
|
||||
/* Layout of the UCP type table that translates property names into codes for
|
||||
ucp_findchar(). */
|
||||
|
||||
typedef struct {
|
||||
const char *name;
|
||||
int value;
|
||||
} ucp_type_table;
|
||||
|
||||
|
||||
/* Internal shared data tables. These are tables that are used by more than one
|
||||
of the exported public functions. They have to be "external" in the C sense,
|
||||
but are not part of the PCRE public API. The data for these tables is in the
|
||||
pcretabs.c module. */
|
||||
|
||||
extern const int _pcre_utf8_table1[];
|
||||
extern const int _pcre_utf8_table2[];
|
||||
extern const int _pcre_utf8_table3[];
|
||||
extern const uschar _pcre_utf8_table4[];
|
||||
|
||||
extern const int _pcre_utf8_table1_size;
|
||||
|
||||
extern const ucp_type_table _pcre_utt[];
|
||||
extern const int _pcre_utt_size;
|
||||
|
||||
extern const uschar _pcre_default_tables[];
|
||||
|
||||
extern const uschar _pcre_OP_lengths[];
|
||||
|
||||
|
||||
/* Internal shared functions. These are functions that are used by more than
|
||||
one of the exported public functions. They have to be "external" in the C
|
||||
sense, but are not part of the PCRE public API. */
|
||||
|
||||
extern int _pcre_ord2utf8(int, uschar *);
|
||||
extern void _pcre_printint(pcre *, FILE *);
|
||||
extern real_pcre * _pcre_try_flipped(const real_pcre *, real_pcre *,
|
||||
const pcre_study_data *, pcre_study_data *);
|
||||
extern int _pcre_ucp_findchar(const int, int *, int *);
|
||||
extern int _pcre_valid_utf8(const uschar *, int);
|
||||
extern BOOL _pcre_xclass(int, const uschar *);
|
||||
|
||||
/* End of pcreinal.h */
|
||||
89
harbour/source/hbpcre/pcreinfo.c
Normal file
89
harbour/source/hbpcre/pcreinfo.c
Normal file
@@ -0,0 +1,89 @@
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Copyright (c) 1997-2005 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
|
||||
/* This module contains the external function pcre_info(), which gives some
|
||||
information about a compiled pattern. However, use of this function is now
|
||||
deprecated, as it has been superseded by pcre_fullinfo(). */
|
||||
|
||||
|
||||
#include "pcreinal.h"
|
||||
|
||||
|
||||
/*************************************************
|
||||
* (Obsolete) Return info about compiled pattern *
|
||||
*************************************************/
|
||||
|
||||
/* This is the original "info" function. It picks potentially useful data out
|
||||
of the private structure, but its interface was too rigid. It remains for
|
||||
backwards compatibility. The public options are passed back in an int - though
|
||||
the re->options field has been expanded to a long int, all the public options
|
||||
at the low end of it, and so even on 16-bit systems this will still be OK.
|
||||
Therefore, I haven't changed the API for pcre_info().
|
||||
|
||||
Arguments:
|
||||
argument_re points to compiled code
|
||||
optptr where to pass back the options
|
||||
first_byte where to pass back the first character,
|
||||
or -1 if multiline and all branches start ^,
|
||||
or -2 otherwise
|
||||
|
||||
Returns: number of capturing subpatterns
|
||||
or negative values on error
|
||||
*/
|
||||
|
||||
EXPORT int
|
||||
pcre_info(const pcre *argument_re, int *optptr, int *first_byte)
|
||||
{
|
||||
real_pcre internal_re;
|
||||
const real_pcre *re = (const real_pcre *)argument_re;
|
||||
if (re == NULL) return PCRE_ERROR_NULL;
|
||||
if (re->magic_number != MAGIC_NUMBER)
|
||||
{
|
||||
re = _pcre_try_flipped(re, &internal_re, NULL, NULL);
|
||||
if (re == NULL) return PCRE_ERROR_BADMAGIC;
|
||||
}
|
||||
if (optptr != NULL) *optptr = (int)(re->options & PUBLIC_OPTIONS);
|
||||
if (first_byte != NULL)
|
||||
*first_byte = ((re->options & PCRE_FIRSTSET) != 0)? re->first_byte :
|
||||
((re->options & PCRE_STARTLINE) != 0)? -1 : -2;
|
||||
return re->top_bracket;
|
||||
}
|
||||
|
||||
/* End of pcreinfo.c */
|
||||
145
harbour/source/hbpcre/pcremktb.c
Normal file
145
harbour/source/hbpcre/pcremktb.c
Normal file
@@ -0,0 +1,145 @@
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Copyright (c) 1997-2005 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
|
||||
/* This module contains the external function pcre_maketables(), which builds
|
||||
character tables for PCRE in the current locale. The file is compiled on its
|
||||
own as part of the PCRE library. However, it is also included in the
|
||||
compilation of dftables.c, in which case the macro DFTABLES is defined. */
|
||||
|
||||
|
||||
#ifndef DFTABLES
|
||||
#include "pcreinal.h"
|
||||
#endif
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Create PCRE character tables *
|
||||
*************************************************/
|
||||
|
||||
/* This function builds a set of character tables for use by PCRE and returns
|
||||
a pointer to them. They are build using the ctype functions, and consequently
|
||||
their contents will depend upon the current locale setting. When compiled as
|
||||
part of the library, the store is obtained via pcre_malloc(), but when compiled
|
||||
inside dftables, use malloc().
|
||||
|
||||
Arguments: none
|
||||
Returns: pointer to the contiguous block of data
|
||||
*/
|
||||
|
||||
const unsigned char *
|
||||
pcre_maketables(void)
|
||||
{
|
||||
unsigned char *yield, *p;
|
||||
int i;
|
||||
|
||||
#ifndef DFTABLES
|
||||
yield = (unsigned char*)(pcre_malloc)(tables_length);
|
||||
#else
|
||||
yield = (unsigned char*)malloc(tables_length);
|
||||
#endif
|
||||
|
||||
if (yield == NULL) return NULL;
|
||||
p = yield;
|
||||
|
||||
/* First comes the lower casing table */
|
||||
|
||||
for (i = 0; i < 256; i++) *p++ = tolower(i);
|
||||
|
||||
/* Next the case-flipping table */
|
||||
|
||||
for (i = 0; i < 256; i++) *p++ = islower(i)? toupper(i) : tolower(i);
|
||||
|
||||
/* Then the character class tables. Don't try to be clever and save effort
|
||||
on exclusive ones - in some locales things may be different. Note that the
|
||||
table for "space" includes everything "isspace" gives, including VT in the
|
||||
default locale. This makes it work for the POSIX class [:space:]. */
|
||||
|
||||
memset(p, 0, cbit_length);
|
||||
for (i = 0; i < 256; i++)
|
||||
{
|
||||
if (isdigit(i))
|
||||
{
|
||||
p[cbit_digit + i/8] |= 1 << (i&7);
|
||||
p[cbit_word + i/8] |= 1 << (i&7);
|
||||
}
|
||||
if (isupper(i))
|
||||
{
|
||||
p[cbit_upper + i/8] |= 1 << (i&7);
|
||||
p[cbit_word + i/8] |= 1 << (i&7);
|
||||
}
|
||||
if (islower(i))
|
||||
{
|
||||
p[cbit_lower + i/8] |= 1 << (i&7);
|
||||
p[cbit_word + i/8] |= 1 << (i&7);
|
||||
}
|
||||
if (i == '_') p[cbit_word + i/8] |= 1 << (i&7);
|
||||
if (isspace(i)) p[cbit_space + i/8] |= 1 << (i&7);
|
||||
if (isxdigit(i))p[cbit_xdigit + i/8] |= 1 << (i&7);
|
||||
if (isgraph(i)) p[cbit_graph + i/8] |= 1 << (i&7);
|
||||
if (isprint(i)) p[cbit_print + i/8] |= 1 << (i&7);
|
||||
if (ispunct(i)) p[cbit_punct + i/8] |= 1 << (i&7);
|
||||
if (iscntrl(i)) p[cbit_cntrl + i/8] |= 1 << (i&7);
|
||||
}
|
||||
p += cbit_length;
|
||||
|
||||
/* Finally, the character type table. In this, we exclude VT from the white
|
||||
space chars, because Perl doesn't recognize it as such for \s and for comments
|
||||
within regexes. */
|
||||
|
||||
for (i = 0; i < 256; i++)
|
||||
{
|
||||
int x = 0;
|
||||
if (i != 0x0b && isspace(i)) x += ctype_space;
|
||||
if (isalpha(i)) x += ctype_letter;
|
||||
if (isdigit(i)) x += ctype_digit;
|
||||
if (isxdigit(i)) x += ctype_xdigit;
|
||||
if (isalnum(i) || i == '_') x += ctype_word;
|
||||
|
||||
/* Note: strchr includes the terminating zero in the characters it considers.
|
||||
In this instance, that is ok because we want binary zero to be flagged as a
|
||||
meta-character, which in this sense is any character that terminates a run
|
||||
of data characters. */
|
||||
|
||||
if (strchr("*+?{^.$|()[", i) != 0) x += ctype_meta; *p++ = x; }
|
||||
|
||||
return yield;
|
||||
}
|
||||
|
||||
/* End of pcremktb.c */
|
||||
78
harbour/source/hbpcre/pcreoutf.c
Normal file
78
harbour/source/hbpcre/pcreoutf.c
Normal file
@@ -0,0 +1,78 @@
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Copyright (c) 1997-2005 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
|
||||
/* This file contains a private PCRE function that converts an ordinal
|
||||
character value into a UTF8 string. */
|
||||
|
||||
|
||||
#include "pcreinal.h"
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Convert character value to UTF-8 *
|
||||
*************************************************/
|
||||
|
||||
/* This function takes an integer value in the range 0 - 0x7fffffff
|
||||
and encodes it as a UTF-8 character in 0 to 6 bytes.
|
||||
|
||||
Arguments:
|
||||
cvalue the character value
|
||||
buffer pointer to buffer for result - at least 6 bytes long
|
||||
|
||||
Returns: number of characters placed in the buffer
|
||||
*/
|
||||
|
||||
EXPORT int
|
||||
_pcre_ord2utf8(int cvalue, uschar *buffer)
|
||||
{
|
||||
register int i, j;
|
||||
for (i = 0; i < _pcre_utf8_table1_size; i++)
|
||||
if (cvalue <= _pcre_utf8_table1[i]) break;
|
||||
buffer += i;
|
||||
for (j = i; j > 0; j--)
|
||||
{
|
||||
*buffer-- = 0x80 | (cvalue & 0x3f);
|
||||
cvalue >>= 6;
|
||||
}
|
||||
*buffer = _pcre_utf8_table2[i] | cvalue;
|
||||
return i + 1;
|
||||
}
|
||||
|
||||
/* End of pcreoutf.c */
|
||||
451
harbour/source/hbpcre/pcreprni.c
Normal file
451
harbour/source/hbpcre/pcreprni.c
Normal file
@@ -0,0 +1,451 @@
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Copyright (c) 1997-2005 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
|
||||
/* This module contains an PCRE private debugging function for printing out the
|
||||
internal form of a compiled regular expression, along with some supporting
|
||||
local functions. */
|
||||
|
||||
|
||||
#include "pcreinal.h"
|
||||
|
||||
|
||||
static const char *OP_names[] = { OP_NAME_LIST };
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Print single- or multi-byte character *
|
||||
*************************************************/
|
||||
|
||||
static int
|
||||
print_char(FILE *f, uschar *ptr, BOOL utf8)
|
||||
{
|
||||
int c = *ptr;
|
||||
|
||||
if (!utf8 || (c & 0xc0) != 0xc0)
|
||||
{
|
||||
if (isprint(c)) fprintf(f, "%c", c); else fprintf(f, "\\x%02x", c);
|
||||
return 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
int i;
|
||||
int a = _pcre_utf8_table4[c & 0x3f]; /* Number of additional bytes */
|
||||
int s = 6*a;
|
||||
c = (c & _pcre_utf8_table3[a]) << s;
|
||||
for (i = 1; i <= a; i++)
|
||||
{
|
||||
/* This is a check for malformed UTF-8; it should only occur if the sanity
|
||||
check has been turned off. Rather than swallow random bytes, just stop if
|
||||
we hit a bad one. Print it with \X instead of \x as an indication. */
|
||||
|
||||
if ((ptr[i] & 0xc0) != 0x80)
|
||||
{
|
||||
fprintf(f, "\\X{%x}", c);
|
||||
return i - 1;
|
||||
}
|
||||
|
||||
/* The byte is OK */
|
||||
|
||||
s -= 6;
|
||||
c |= (ptr[i] & 0x3f) << s;
|
||||
}
|
||||
if (c < 128) fprintf(f, "\\x%02x", c); else fprintf(f, "\\x{%x}", c);
|
||||
return a;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Find Unicode property name *
|
||||
*************************************************/
|
||||
|
||||
static const char *
|
||||
get_ucpname(int property)
|
||||
{
|
||||
#ifdef SUPPORT_UCP
|
||||
int i;
|
||||
for (i = _pcre_utt_size; i >= 0; i--)
|
||||
{
|
||||
if (property == _pcre_utt[i].value) break;
|
||||
}
|
||||
return (i >= 0)? _pcre_utt[i].name : "??";
|
||||
#else
|
||||
return "??";
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Print compiled regex *
|
||||
*************************************************/
|
||||
|
||||
/* Make this function work for a regex with integers either byte order.
|
||||
However, we assume that what we are passed is a compiled regex. */
|
||||
|
||||
EXPORT void
|
||||
_pcre_printint(pcre *external_re, FILE *f)
|
||||
{
|
||||
real_pcre *re = (real_pcre *)external_re;
|
||||
uschar *codestart, *code;
|
||||
BOOL utf8;
|
||||
|
||||
unsigned int options = re->options;
|
||||
int offset = re->name_table_offset;
|
||||
int count = re->name_count;
|
||||
int size = re->name_entry_size;
|
||||
|
||||
if (re->magic_number != MAGIC_NUMBER)
|
||||
{
|
||||
offset = ((offset << 8) & 0xff00) | ((offset >> 8) & 0xff);
|
||||
count = ((count << 8) & 0xff00) | ((count >> 8) & 0xff);
|
||||
size = ((size << 8) & 0xff00) | ((size >> 8) & 0xff);
|
||||
options = ((options << 24) & 0xff000000) |
|
||||
((options << 8) & 0x00ff0000) |
|
||||
((options >> 8) & 0x0000ff00) |
|
||||
((options >> 24) & 0x000000ff);
|
||||
}
|
||||
|
||||
code = codestart = (uschar *)re + offset + count * size;
|
||||
utf8 = (options & PCRE_UTF8) != 0;
|
||||
|
||||
for(;;)
|
||||
{
|
||||
uschar *ccode;
|
||||
int c;
|
||||
int extra = 0;
|
||||
|
||||
fprintf(f, "%3d ", (int)(code - codestart));
|
||||
|
||||
if (*code >= OP_BRA)
|
||||
{
|
||||
if (*code - OP_BRA > EXTRACT_BASIC_MAX)
|
||||
fprintf(f, "%3d Bra extra\n", GET(code, 1));
|
||||
else
|
||||
fprintf(f, "%3d Bra %d\n", GET(code, 1), *code - OP_BRA);
|
||||
code += _pcre_OP_lengths[OP_BRA];
|
||||
continue;
|
||||
}
|
||||
|
||||
switch(*code)
|
||||
{
|
||||
case OP_END:
|
||||
fprintf(f, " %s\n", OP_names[*code]);
|
||||
fprintf(f, "------------------------------------------------------------------\n");
|
||||
return;
|
||||
|
||||
case OP_OPT:
|
||||
fprintf(f, " %.2x %s", code[1], OP_names[*code]);
|
||||
break;
|
||||
|
||||
case OP_CHAR:
|
||||
{
|
||||
fprintf(f, " ");
|
||||
do
|
||||
{
|
||||
code++;
|
||||
code += 1 + print_char(f, code, utf8);
|
||||
}
|
||||
while (*code == OP_CHAR);
|
||||
fprintf(f, "\n");
|
||||
continue;
|
||||
}
|
||||
break;
|
||||
|
||||
case OP_CHARNC:
|
||||
{
|
||||
fprintf(f, " NC ");
|
||||
do
|
||||
{
|
||||
code++;
|
||||
code += 1 + print_char(f, code, utf8);
|
||||
}
|
||||
while (*code == OP_CHARNC);
|
||||
fprintf(f, "\n");
|
||||
continue;
|
||||
}
|
||||
break;
|
||||
|
||||
case OP_KETRMAX:
|
||||
case OP_KETRMIN:
|
||||
case OP_ALT:
|
||||
case OP_KET:
|
||||
case OP_ASSERT:
|
||||
case OP_ASSERT_NOT:
|
||||
case OP_ASSERTBACK:
|
||||
case OP_ASSERTBACK_NOT:
|
||||
case OP_ONCE:
|
||||
case OP_COND:
|
||||
case OP_REVERSE:
|
||||
fprintf(f, "%3d %s", GET(code, 1), OP_names[*code]);
|
||||
break;
|
||||
|
||||
case OP_BRANUMBER:
|
||||
printf("%3d %s", GET2(code, 1), OP_names[*code]);
|
||||
break;
|
||||
|
||||
case OP_CREF:
|
||||
if (GET2(code, 1) == CREF_RECURSE)
|
||||
fprintf(f, " Cond recurse");
|
||||
else
|
||||
fprintf(f, "%3d %s", GET2(code,1), OP_names[*code]);
|
||||
break;
|
||||
|
||||
case OP_STAR:
|
||||
case OP_MINSTAR:
|
||||
case OP_PLUS:
|
||||
case OP_MINPLUS:
|
||||
case OP_QUERY:
|
||||
case OP_MINQUERY:
|
||||
case OP_TYPESTAR:
|
||||
case OP_TYPEMINSTAR:
|
||||
case OP_TYPEPLUS:
|
||||
case OP_TYPEMINPLUS:
|
||||
case OP_TYPEQUERY:
|
||||
case OP_TYPEMINQUERY:
|
||||
fprintf(f, " ");
|
||||
if (*code >= OP_TYPESTAR)
|
||||
{
|
||||
fprintf(f, "%s", OP_names[code[1]]);
|
||||
if (code[1] == OP_PROP || code[1] == OP_NOTPROP)
|
||||
{
|
||||
fprintf(f, " %s ", get_ucpname(code[2]));
|
||||
extra = 1;
|
||||
}
|
||||
}
|
||||
else extra = print_char(f, code+1, utf8);
|
||||
fprintf(f, "%s", OP_names[*code]);
|
||||
break;
|
||||
|
||||
case OP_EXACT:
|
||||
case OP_UPTO:
|
||||
case OP_MINUPTO:
|
||||
fprintf(f, " ");
|
||||
extra = print_char(f, code+3, utf8);
|
||||
fprintf(f, "{");
|
||||
if (*code != OP_EXACT) fprintf(f, ",");
|
||||
fprintf(f, "%d}", GET2(code,1));
|
||||
if (*code == OP_MINUPTO) fprintf(f, "?");
|
||||
break;
|
||||
|
||||
case OP_TYPEEXACT:
|
||||
case OP_TYPEUPTO:
|
||||
case OP_TYPEMINUPTO:
|
||||
fprintf(f, " %s", OP_names[code[3]]);
|
||||
if (code[3] == OP_PROP || code[3] == OP_NOTPROP)
|
||||
{
|
||||
fprintf(f, " %s ", get_ucpname(code[4]));
|
||||
extra = 1;
|
||||
}
|
||||
fprintf(f, "{");
|
||||
if (*code != OP_TYPEEXACT) fprintf(f, "0,");
|
||||
fprintf(f, "%d}", GET2(code,1));
|
||||
if (*code == OP_TYPEMINUPTO) fprintf(f, "?");
|
||||
break;
|
||||
|
||||
case OP_NOT:
|
||||
if (isprint(c = code[1])) fprintf(f, " [^%c]", c);
|
||||
else fprintf(f, " [^\\x%02x]", c);
|
||||
break;
|
||||
|
||||
case OP_NOTSTAR:
|
||||
case OP_NOTMINSTAR:
|
||||
case OP_NOTPLUS:
|
||||
case OP_NOTMINPLUS:
|
||||
case OP_NOTQUERY:
|
||||
case OP_NOTMINQUERY:
|
||||
if (isprint(c = code[1])) fprintf(f, " [^%c]", c);
|
||||
else fprintf(f, " [^\\x%02x]", c);
|
||||
fprintf(f, "%s", OP_names[*code]);
|
||||
break;
|
||||
|
||||
case OP_NOTEXACT:
|
||||
case OP_NOTUPTO:
|
||||
case OP_NOTMINUPTO:
|
||||
if (isprint(c = code[3])) fprintf(f, " [^%c]{", c);
|
||||
else fprintf(f, " [^\\x%02x]{", c);
|
||||
if (*code != OP_NOTEXACT) fprintf(f, "0,");
|
||||
fprintf(f, "%d}", GET2(code,1));
|
||||
if (*code == OP_NOTMINUPTO) fprintf(f, "?");
|
||||
break;
|
||||
|
||||
case OP_RECURSE:
|
||||
fprintf(f, "%3d %s", GET(code, 1), OP_names[*code]);
|
||||
break;
|
||||
|
||||
case OP_REF:
|
||||
fprintf(f, " \\%d", GET2(code,1));
|
||||
ccode = code + _pcre_OP_lengths[*code];
|
||||
goto CLASS_REF_REPEAT;
|
||||
|
||||
case OP_CALLOUT:
|
||||
fprintf(f, " %s %d %d %d", OP_names[*code], code[1], GET(code,2),
|
||||
GET(code, 2 + LINK_SIZE));
|
||||
break;
|
||||
|
||||
case OP_PROP:
|
||||
case OP_NOTPROP:
|
||||
fprintf(f, " %s %s", OP_names[*code], get_ucpname(code[1]));
|
||||
break;
|
||||
|
||||
/* OP_XCLASS can only occur in UTF-8 mode. However, there's no harm in
|
||||
having this code always here, and it makes it less messy without all those
|
||||
#ifdefs. */
|
||||
|
||||
case OP_CLASS:
|
||||
case OP_NCLASS:
|
||||
case OP_XCLASS:
|
||||
{
|
||||
int i, min, max;
|
||||
BOOL printmap;
|
||||
|
||||
fprintf(f, " [");
|
||||
|
||||
if (*code == OP_XCLASS)
|
||||
{
|
||||
extra = GET(code, 1);
|
||||
ccode = code + LINK_SIZE + 1;
|
||||
printmap = (*ccode & XCL_MAP) != 0;
|
||||
if ((*ccode++ & XCL_NOT) != 0) fprintf(f, "^");
|
||||
}
|
||||
else
|
||||
{
|
||||
printmap = TRUE;
|
||||
ccode = code + 1;
|
||||
}
|
||||
|
||||
/* Print a bit map */
|
||||
|
||||
if (printmap)
|
||||
{
|
||||
for (i = 0; i < 256; i++)
|
||||
{
|
||||
if ((ccode[i/8] & (1 << (i&7))) != 0)
|
||||
{
|
||||
int j;
|
||||
for (j = i+1; j < 256; j++)
|
||||
if ((ccode[j/8] & (1 << (j&7))) == 0) break;
|
||||
if (i == '-' || i == ']') fprintf(f, "\\");
|
||||
if (isprint(i)) fprintf(f, "%c", i); else fprintf(f, "\\x%02x", i);
|
||||
if (--j > i)
|
||||
{
|
||||
if (j != i + 1) fprintf(f, "-");
|
||||
if (j == '-' || j == ']') fprintf(f, "\\");
|
||||
if (isprint(j)) fprintf(f, "%c", j); else fprintf(f, "\\x%02x", j);
|
||||
}
|
||||
i = j;
|
||||
}
|
||||
}
|
||||
ccode += 32;
|
||||
}
|
||||
|
||||
/* For an XCLASS there is always some additional data */
|
||||
|
||||
if (*code == OP_XCLASS)
|
||||
{
|
||||
int ch;
|
||||
while ((ch = *ccode++) != XCL_END)
|
||||
{
|
||||
if (ch == XCL_PROP)
|
||||
{
|
||||
fprintf(f, "\\p{%s}", get_ucpname(*ccode++));
|
||||
}
|
||||
else if (ch == XCL_NOTPROP)
|
||||
{
|
||||
fprintf(f, "\\P{%s}", get_ucpname(*ccode++));
|
||||
}
|
||||
else
|
||||
{
|
||||
ccode += 1 + print_char(f, ccode, TRUE);
|
||||
if (ch == XCL_RANGE)
|
||||
{
|
||||
fprintf(f, "-");
|
||||
ccode += 1 + print_char(f, ccode, TRUE);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Indicate a non-UTF8 class which was created by negation */
|
||||
|
||||
fprintf(f, "]%s", (*code == OP_NCLASS)? " (neg)" : "");
|
||||
|
||||
/* Handle repeats after a class or a back reference */
|
||||
|
||||
CLASS_REF_REPEAT:
|
||||
switch(*ccode)
|
||||
{
|
||||
case OP_CRSTAR:
|
||||
case OP_CRMINSTAR:
|
||||
case OP_CRPLUS:
|
||||
case OP_CRMINPLUS:
|
||||
case OP_CRQUERY:
|
||||
case OP_CRMINQUERY:
|
||||
fprintf(f, "%s", OP_names[*ccode]);
|
||||
extra += _pcre_OP_lengths[*ccode];
|
||||
break;
|
||||
|
||||
case OP_CRRANGE:
|
||||
case OP_CRMINRANGE:
|
||||
min = GET2(ccode,1);
|
||||
max = GET2(ccode,3);
|
||||
if (max == 0) fprintf(f, "{%d,}", min);
|
||||
else fprintf(f, "{%d,%d}", min, max);
|
||||
if (*ccode == OP_CRMINRANGE) fprintf(f, "?");
|
||||
extra += _pcre_OP_lengths[*ccode];
|
||||
break;
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
/* Anything else is just an item with no data*/
|
||||
|
||||
default:
|
||||
fprintf(f, " %s", OP_names[*code]);
|
||||
break;
|
||||
}
|
||||
|
||||
code += _pcre_OP_lengths[*code] + extra;
|
||||
fprintf(f, "\n");
|
||||
}
|
||||
}
|
||||
|
||||
/* End of pcreprni.c */
|
||||
77
harbour/source/hbpcre/pcrerefc.c
Normal file
77
harbour/source/hbpcre/pcrerefc.c
Normal file
@@ -0,0 +1,77 @@
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Copyright (c) 1997-2005 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
|
||||
/* This module contains the external function pcre_refcount(), which is an
|
||||
auxiliary function that can be used to maintain a reference count in a compiled
|
||||
pattern data block. This might be helpful in applications where the block is
|
||||
shared by different users. */
|
||||
|
||||
#include "pcreinal.h"
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Maintain reference count *
|
||||
*************************************************/
|
||||
|
||||
/* The reference count is a 16-bit field, initialized to zero. It is not
|
||||
possible to transfer a non-zero count from one host to a different host that
|
||||
has a different byte order - though I can't see why anyone in their right mind
|
||||
would ever want to do that!
|
||||
|
||||
Arguments:
|
||||
argument_re points to compiled code
|
||||
adjust value to add to the count
|
||||
|
||||
Returns: the (possibly updated) count value (a non-negative number), or
|
||||
a negative error number
|
||||
*/
|
||||
|
||||
EXPORT int
|
||||
pcre_refcount(pcre *argument_re, int adjust)
|
||||
{
|
||||
real_pcre *re = (real_pcre *)argument_re;
|
||||
if (re == NULL) return PCRE_ERROR_NULL;
|
||||
re->ref_count = (-adjust > re->ref_count)? 0 :
|
||||
(adjust + re->ref_count > 65535)? 65535 :
|
||||
re->ref_count + adjust;
|
||||
return re->ref_count;
|
||||
}
|
||||
|
||||
/* End of pcrerefc.c */
|
||||
482
harbour/source/hbpcre/pcrestud.c
Normal file
482
harbour/source/hbpcre/pcrestud.c
Normal file
@@ -0,0 +1,482 @@
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Copyright (c) 1997-2005 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
|
||||
/* This module contains the external function pcre_study(), along with local
|
||||
supporting functions. */
|
||||
|
||||
|
||||
#include "pcreinal.h"
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Set a bit and maybe its alternate case *
|
||||
*************************************************/
|
||||
|
||||
/* Given a character, set its bit in the table, and also the bit for the other
|
||||
version of a letter if we are caseless.
|
||||
|
||||
Arguments:
|
||||
start_bits points to the bit map
|
||||
c is the character
|
||||
caseless the caseless flag
|
||||
cd the block with char table pointers
|
||||
|
||||
Returns: nothing
|
||||
*/
|
||||
|
||||
static void
|
||||
set_bit(uschar *start_bits, unsigned int c, BOOL caseless, compile_data *cd)
|
||||
{
|
||||
start_bits[c/8] |= (1 << (c&7));
|
||||
if (caseless && (cd->ctypes[c] & ctype_letter) != 0)
|
||||
start_bits[cd->fcc[c]/8] |= (1 << (cd->fcc[c]&7));
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Create bitmap of starting chars *
|
||||
*************************************************/
|
||||
|
||||
/* This function scans a compiled unanchored expression and attempts to build a
|
||||
bitmap of the set of initial characters. If it can't, it returns FALSE. As time
|
||||
goes by, we may be able to get more clever at doing this.
|
||||
|
||||
Arguments:
|
||||
code points to an expression
|
||||
start_bits points to a 32-byte table, initialized to 0
|
||||
caseless the current state of the caseless flag
|
||||
utf8 TRUE if in UTF-8 mode
|
||||
cd the block with char table pointers
|
||||
|
||||
Returns: TRUE if table built, FALSE otherwise
|
||||
*/
|
||||
|
||||
static BOOL
|
||||
set_start_bits(const uschar *code, uschar *start_bits, BOOL caseless,
|
||||
BOOL utf8, compile_data *cd)
|
||||
{
|
||||
register int c;
|
||||
|
||||
/* This next statement and the later reference to dummy are here in order to
|
||||
trick the optimizer of the IBM C compiler for OS/2 into generating correct
|
||||
code. Apparently IBM isn't going to fix the problem, and we would rather not
|
||||
disable optimization (in this module it actually makes a big difference, and
|
||||
the pcre module can use all the optimization it can get). */
|
||||
|
||||
volatile int dummy;
|
||||
|
||||
do
|
||||
{
|
||||
const uschar *tcode = code + 1 + LINK_SIZE;
|
||||
BOOL try_next = TRUE;
|
||||
|
||||
while (try_next)
|
||||
{
|
||||
/* If a branch starts with a bracket or a positive lookahead assertion,
|
||||
recurse to set bits from within them. That's all for this branch. */
|
||||
|
||||
if ((int)*tcode >= OP_BRA || *tcode == OP_ASSERT)
|
||||
{
|
||||
if (!set_start_bits(tcode, start_bits, caseless, utf8, cd))
|
||||
return FALSE;
|
||||
try_next = FALSE;
|
||||
}
|
||||
|
||||
else switch(*tcode)
|
||||
{
|
||||
default:
|
||||
return FALSE;
|
||||
|
||||
/* Skip over callout */
|
||||
|
||||
case OP_CALLOUT:
|
||||
tcode += 2 + 2*LINK_SIZE;
|
||||
break;
|
||||
|
||||
/* Skip over extended extraction bracket number */
|
||||
|
||||
case OP_BRANUMBER:
|
||||
tcode += 3;
|
||||
break;
|
||||
|
||||
/* Skip over lookbehind and negative lookahead assertions */
|
||||
|
||||
case OP_ASSERT_NOT:
|
||||
case OP_ASSERTBACK:
|
||||
case OP_ASSERTBACK_NOT:
|
||||
do tcode += GET(tcode, 1); while (*tcode == OP_ALT);
|
||||
tcode += 1+LINK_SIZE;
|
||||
break;
|
||||
|
||||
/* Skip over an option setting, changing the caseless flag */
|
||||
|
||||
case OP_OPT:
|
||||
caseless = (tcode[1] & PCRE_CASELESS) != 0;
|
||||
tcode += 2;
|
||||
break;
|
||||
|
||||
/* BRAZERO does the bracket, but carries on. */
|
||||
|
||||
case OP_BRAZERO:
|
||||
case OP_BRAMINZERO:
|
||||
if (!set_start_bits(++tcode, start_bits, caseless, utf8, cd))
|
||||
return FALSE;
|
||||
dummy = 1;
|
||||
do tcode += GET(tcode,1); while (*tcode == OP_ALT);
|
||||
tcode += 1+LINK_SIZE;
|
||||
break;
|
||||
|
||||
/* Single-char * or ? sets the bit and tries the next item */
|
||||
|
||||
case OP_STAR:
|
||||
case OP_MINSTAR:
|
||||
case OP_QUERY:
|
||||
case OP_MINQUERY:
|
||||
set_bit(start_bits, tcode[1], caseless, cd);
|
||||
tcode += 2;
|
||||
#ifdef SUPPORT_UTF8
|
||||
if (utf8) while ((*tcode & 0xc0) == 0x80) tcode++;
|
||||
#endif
|
||||
break;
|
||||
|
||||
/* Single-char upto sets the bit and tries the next */
|
||||
|
||||
case OP_UPTO:
|
||||
case OP_MINUPTO:
|
||||
set_bit(start_bits, tcode[3], caseless, cd);
|
||||
tcode += 4;
|
||||
#ifdef SUPPORT_UTF8
|
||||
if (utf8) while ((*tcode & 0xc0) == 0x80) tcode++;
|
||||
#endif
|
||||
break;
|
||||
|
||||
/* At least one single char sets the bit and stops */
|
||||
|
||||
case OP_EXACT: /* Fall through */
|
||||
tcode += 2;
|
||||
|
||||
case OP_CHAR:
|
||||
case OP_CHARNC:
|
||||
case OP_PLUS:
|
||||
case OP_MINPLUS:
|
||||
set_bit(start_bits, tcode[1], caseless, cd);
|
||||
try_next = FALSE;
|
||||
break;
|
||||
|
||||
/* Single character type sets the bits and stops */
|
||||
|
||||
case OP_NOT_DIGIT:
|
||||
for (c = 0; c < 32; c++)
|
||||
start_bits[c] |= ~cd->cbits[c+cbit_digit];
|
||||
try_next = FALSE;
|
||||
break;
|
||||
|
||||
case OP_DIGIT:
|
||||
for (c = 0; c < 32; c++)
|
||||
start_bits[c] |= cd->cbits[c+cbit_digit];
|
||||
try_next = FALSE;
|
||||
break;
|
||||
|
||||
case OP_NOT_WHITESPACE:
|
||||
for (c = 0; c < 32; c++)
|
||||
start_bits[c] |= ~cd->cbits[c+cbit_space];
|
||||
try_next = FALSE;
|
||||
break;
|
||||
|
||||
case OP_WHITESPACE:
|
||||
for (c = 0; c < 32; c++)
|
||||
start_bits[c] |= cd->cbits[c+cbit_space];
|
||||
try_next = FALSE;
|
||||
break;
|
||||
|
||||
case OP_NOT_WORDCHAR:
|
||||
for (c = 0; c < 32; c++)
|
||||
start_bits[c] |= ~cd->cbits[c+cbit_word];
|
||||
try_next = FALSE;
|
||||
break;
|
||||
|
||||
case OP_WORDCHAR:
|
||||
for (c = 0; c < 32; c++)
|
||||
start_bits[c] |= cd->cbits[c+cbit_word];
|
||||
try_next = FALSE;
|
||||
break;
|
||||
|
||||
/* One or more character type fudges the pointer and restarts, knowing
|
||||
it will hit a single character type and stop there. */
|
||||
|
||||
case OP_TYPEPLUS:
|
||||
case OP_TYPEMINPLUS:
|
||||
tcode++;
|
||||
break;
|
||||
|
||||
case OP_TYPEEXACT:
|
||||
tcode += 3;
|
||||
break;
|
||||
|
||||
/* Zero or more repeats of character types set the bits and then
|
||||
try again. */
|
||||
|
||||
case OP_TYPEUPTO:
|
||||
case OP_TYPEMINUPTO:
|
||||
tcode += 2; /* Fall through */
|
||||
|
||||
case OP_TYPESTAR:
|
||||
case OP_TYPEMINSTAR:
|
||||
case OP_TYPEQUERY:
|
||||
case OP_TYPEMINQUERY:
|
||||
switch(tcode[1])
|
||||
{
|
||||
case OP_ANY:
|
||||
return FALSE;
|
||||
|
||||
case OP_NOT_DIGIT:
|
||||
for (c = 0; c < 32; c++)
|
||||
start_bits[c] |= ~cd->cbits[c+cbit_digit];
|
||||
break;
|
||||
|
||||
case OP_DIGIT:
|
||||
for (c = 0; c < 32; c++)
|
||||
start_bits[c] |= cd->cbits[c+cbit_digit];
|
||||
break;
|
||||
|
||||
case OP_NOT_WHITESPACE:
|
||||
for (c = 0; c < 32; c++)
|
||||
start_bits[c] |= ~cd->cbits[c+cbit_space];
|
||||
break;
|
||||
|
||||
case OP_WHITESPACE:
|
||||
for (c = 0; c < 32; c++)
|
||||
start_bits[c] |= cd->cbits[c+cbit_space];
|
||||
break;
|
||||
|
||||
case OP_NOT_WORDCHAR:
|
||||
for (c = 0; c < 32; c++)
|
||||
start_bits[c] |= ~cd->cbits[c+cbit_word];
|
||||
break;
|
||||
|
||||
case OP_WORDCHAR:
|
||||
for (c = 0; c < 32; c++)
|
||||
start_bits[c] |= cd->cbits[c+cbit_word];
|
||||
break;
|
||||
}
|
||||
|
||||
tcode += 2;
|
||||
break;
|
||||
|
||||
/* Character class where all the information is in a bit map: set the
|
||||
bits and either carry on or not, according to the repeat count. If it was
|
||||
a negative class, and we are operating with UTF-8 characters, any byte
|
||||
with a value >= 0xc4 is a potentially valid starter because it starts a
|
||||
character with a value > 255. */
|
||||
|
||||
case OP_NCLASS:
|
||||
if (utf8)
|
||||
{
|
||||
start_bits[24] |= 0xf0; /* Bits for 0xc4 - 0xc8 */
|
||||
memset(start_bits+25, 0xff, 7); /* Bits for 0xc9 - 0xff */
|
||||
}
|
||||
/* Fall through */
|
||||
|
||||
case OP_CLASS:
|
||||
{
|
||||
tcode++;
|
||||
|
||||
/* In UTF-8 mode, the bits in a bit map correspond to character
|
||||
values, not to byte values. However, the bit map we are constructing is
|
||||
for byte values. So we have to do a conversion for characters whose
|
||||
value is > 127. In fact, there are only two possible starting bytes for
|
||||
characters in the range 128 - 255. */
|
||||
|
||||
if (utf8)
|
||||
{
|
||||
for (c = 0; c < 16; c++) start_bits[c] |= tcode[c];
|
||||
for (c = 128; c < 256; c++)
|
||||
{
|
||||
if ((tcode[c/8] && (1 << (c&7))) != 0)
|
||||
{
|
||||
int d = (c >> 6) | 0xc0; /* Set bit for this starter */
|
||||
start_bits[d/8] |= (1 << (d&7)); /* and then skip on to the */
|
||||
c = (c & 0xc0) + 0x40 - 1; /* next relevant character. */
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* In non-UTF-8 mode, the two bit maps are completely compatible. */
|
||||
|
||||
else
|
||||
{
|
||||
for (c = 0; c < 32; c++) start_bits[c] |= tcode[c];
|
||||
}
|
||||
|
||||
/* Advance past the bit map, and act on what follows */
|
||||
|
||||
tcode += 32;
|
||||
switch (*tcode)
|
||||
{
|
||||
case OP_CRSTAR:
|
||||
case OP_CRMINSTAR:
|
||||
case OP_CRQUERY:
|
||||
case OP_CRMINQUERY:
|
||||
tcode++;
|
||||
break;
|
||||
|
||||
case OP_CRRANGE:
|
||||
case OP_CRMINRANGE:
|
||||
if (((tcode[1] << 8) + tcode[2]) == 0) tcode += 5;
|
||||
else try_next = FALSE;
|
||||
break;
|
||||
|
||||
default:
|
||||
try_next = FALSE;
|
||||
break;
|
||||
}
|
||||
}
|
||||
break; /* End of bitmap class handling */
|
||||
|
||||
} /* End of switch */
|
||||
} /* End of try_next loop */
|
||||
|
||||
code += GET(code, 1); /* Advance to next branch */
|
||||
}
|
||||
while (*code == OP_ALT);
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Study a compiled expression *
|
||||
*************************************************/
|
||||
|
||||
/* This function is handed a compiled expression that it must study to produce
|
||||
information that will speed up the matching. It returns a pcre_extra block
|
||||
which then gets handed back to pcre_exec().
|
||||
|
||||
Arguments:
|
||||
re points to the compiled expression
|
||||
options contains option bits
|
||||
errorptr points to where to place error messages;
|
||||
set NULL unless error
|
||||
|
||||
Returns: pointer to a pcre_extra block, with study_data filled in and the
|
||||
appropriate flag set;
|
||||
NULL on error or if no optimization possible
|
||||
*/
|
||||
|
||||
EXPORT pcre_extra *
|
||||
pcre_study(const pcre *external_re, int options, const char **errorptr)
|
||||
{
|
||||
uschar start_bits[32];
|
||||
pcre_extra *extra;
|
||||
pcre_study_data *study;
|
||||
const uschar *tables;
|
||||
const real_pcre *re = (const real_pcre *)external_re;
|
||||
uschar *code = (uschar *)re + re->name_table_offset +
|
||||
(re->name_count * re->name_entry_size);
|
||||
compile_data compile_block;
|
||||
|
||||
*errorptr = NULL;
|
||||
|
||||
if (re == NULL || re->magic_number != MAGIC_NUMBER)
|
||||
{
|
||||
*errorptr = "argument is not a compiled regular expression";
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if ((options & ~PUBLIC_STUDY_OPTIONS) != 0)
|
||||
{
|
||||
*errorptr = "unknown or incorrect option bit(s) set";
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* For an anchored pattern, or an unanchored pattern that has a first char, or
|
||||
a multiline pattern that matches only at "line starts", no further processing
|
||||
at present. */
|
||||
|
||||
if ((re->options & (PCRE_ANCHORED|PCRE_FIRSTSET|PCRE_STARTLINE)) != 0)
|
||||
return NULL;
|
||||
|
||||
/* Set the character tables in the block that is passed around */
|
||||
|
||||
tables = re->tables;
|
||||
if (tables == NULL)
|
||||
(void)pcre_fullinfo(external_re, NULL, PCRE_INFO_DEFAULT_TABLES,
|
||||
(void *)(&tables));
|
||||
|
||||
compile_block.lcc = tables + lcc_offset;
|
||||
compile_block.fcc = tables + fcc_offset;
|
||||
compile_block.cbits = tables + cbits_offset;
|
||||
compile_block.ctypes = tables + ctypes_offset;
|
||||
|
||||
/* See if we can find a fixed set of initial characters for the pattern. */
|
||||
|
||||
memset(start_bits, 0, 32 * sizeof(uschar));
|
||||
if (!set_start_bits(code, start_bits, (re->options & PCRE_CASELESS) != 0,
|
||||
(re->options & PCRE_UTF8) != 0, &compile_block)) return NULL;
|
||||
|
||||
/* Get a pcre_extra block and a pcre_study_data block. The study data is put in
|
||||
the latter, which is pointed to by the former, which may also get additional
|
||||
data set later by the calling program. At the moment, the size of
|
||||
pcre_study_data is fixed. We nevertheless save it in a field for returning via
|
||||
the pcre_fullinfo() function so that if it becomes variable in the future, we
|
||||
don't have to change that code. */
|
||||
|
||||
extra = (pcre_extra *)(pcre_malloc)
|
||||
(sizeof(pcre_extra) + sizeof(pcre_study_data));
|
||||
|
||||
if (extra == NULL)
|
||||
{
|
||||
*errorptr = "failed to get memory";
|
||||
return NULL;
|
||||
}
|
||||
|
||||
study = (pcre_study_data *)((char *)extra + sizeof(pcre_extra));
|
||||
extra->flags = PCRE_EXTRA_STUDY_DATA;
|
||||
extra->study_data = study;
|
||||
|
||||
study->size = sizeof(pcre_study_data);
|
||||
study->options = PCRE_STUDY_MAPPED;
|
||||
memcpy(study->start_bits, start_bits, sizeof(start_bits));
|
||||
|
||||
return extra;
|
||||
}
|
||||
|
||||
/* End of pcrestud.c */
|
||||
129
harbour/source/hbpcre/pcretabs.c
Normal file
129
harbour/source/hbpcre/pcretabs.c
Normal file
@@ -0,0 +1,129 @@
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Copyright (c) 1997-2005 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
|
||||
/* This module contains some fixed tables that are used by more than one of the
|
||||
PCRE code modules. */
|
||||
|
||||
|
||||
#include "pcreinal.h"
|
||||
|
||||
|
||||
/* Table of sizes for the fixed-length opcodes. It's defined in a macro so that
|
||||
the definition is next to the definition of the opcodes in internal.h. */
|
||||
|
||||
const uschar _pcre_OP_lengths[] = { OP_LENGTHS };
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Tables for UTF-8 support *
|
||||
*************************************************/
|
||||
|
||||
/* These are the breakpoints for different numbers of bytes in a UTF-8
|
||||
character. */
|
||||
|
||||
const int _pcre_utf8_table1[] =
|
||||
{ 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff};
|
||||
|
||||
const int _pcre_utf8_table1_size = sizeof(_pcre_utf8_table1)/sizeof(int);
|
||||
|
||||
/* These are the indicator bits and the mask for the data bits to set in the
|
||||
first byte of a character, indexed by the number of additional bytes. */
|
||||
|
||||
const int _pcre_utf8_table2[] = { 0, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc};
|
||||
const int _pcre_utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
|
||||
|
||||
/* Table of the number of extra characters, indexed by the first character
|
||||
masked with 0x3f. The highest number for a valid UTF-8 character is in fact
|
||||
0x3d. */
|
||||
|
||||
const uschar _pcre_utf8_table4[] = {
|
||||
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
|
||||
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
|
||||
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
|
||||
3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
|
||||
|
||||
/* This table translates Unicode property names into code values for the
|
||||
ucp_findchar() function. It is used by pcretest as well as by the library
|
||||
functions. */
|
||||
|
||||
const ucp_type_table _pcre_utt[] = {
|
||||
{ "C", 128 + ucp_C },
|
||||
{ "Cc", ucp_Cc },
|
||||
{ "Cf", ucp_Cf },
|
||||
{ "Cn", ucp_Cn },
|
||||
{ "Co", ucp_Co },
|
||||
{ "Cs", ucp_Cs },
|
||||
{ "L", 128 + ucp_L },
|
||||
{ "Ll", ucp_Ll },
|
||||
{ "Lm", ucp_Lm },
|
||||
{ "Lo", ucp_Lo },
|
||||
{ "Lt", ucp_Lt },
|
||||
{ "Lu", ucp_Lu },
|
||||
{ "M", 128 + ucp_M },
|
||||
{ "Mc", ucp_Mc },
|
||||
{ "Me", ucp_Me },
|
||||
{ "Mn", ucp_Mn },
|
||||
{ "N", 128 + ucp_N },
|
||||
{ "Nd", ucp_Nd },
|
||||
{ "Nl", ucp_Nl },
|
||||
{ "No", ucp_No },
|
||||
{ "P", 128 + ucp_P },
|
||||
{ "Pc", ucp_Pc },
|
||||
{ "Pd", ucp_Pd },
|
||||
{ "Pe", ucp_Pe },
|
||||
{ "Pf", ucp_Pf },
|
||||
{ "Pi", ucp_Pi },
|
||||
{ "Po", ucp_Po },
|
||||
{ "Ps", ucp_Ps },
|
||||
{ "S", 128 + ucp_S },
|
||||
{ "Sc", ucp_Sc },
|
||||
{ "Sk", ucp_Sk },
|
||||
{ "Sm", ucp_Sm },
|
||||
{ "So", ucp_So },
|
||||
{ "Z", 128 + ucp_Z },
|
||||
{ "Zl", ucp_Zl },
|
||||
{ "Zp", ucp_Zp },
|
||||
{ "Zs", ucp_Zs }
|
||||
};
|
||||
|
||||
const int _pcre_utt_size = sizeof(_pcre_utt)/sizeof(ucp_type_table);
|
||||
|
||||
/* End of pcretabs.c */
|
||||
132
harbour/source/hbpcre/pcretryf.c
Normal file
132
harbour/source/hbpcre/pcretryf.c
Normal file
@@ -0,0 +1,132 @@
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Copyright (c) 1997-2005 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
|
||||
/* This module contains an internal function that tests a compiled pattern to
|
||||
see if it was compiled with the opposite endianness. If so, it uses an
|
||||
auxiliary local function to flip the appropriate bytes. */
|
||||
|
||||
|
||||
#include "pcreinal.h"
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Flip bytes in an integer *
|
||||
*************************************************/
|
||||
|
||||
/* This function is called when the magic number in a regex doesn't match, in
|
||||
order to flip its bytes to see if we are dealing with a pattern that was
|
||||
compiled on a host of different endianness. If so, this function is used to
|
||||
flip other byte values.
|
||||
|
||||
Arguments:
|
||||
value the number to flip
|
||||
n the number of bytes to flip (assumed to be 2 or 4)
|
||||
|
||||
Returns: the flipped value
|
||||
*/
|
||||
|
||||
static long int
|
||||
byteflip(long int value, int n)
|
||||
{
|
||||
if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);
|
||||
return ((value & 0x000000ff) << 24) |
|
||||
((value & 0x0000ff00) << 8) |
|
||||
((value & 0x00ff0000) >> 8) |
|
||||
((value & 0xff000000) >> 24);
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Test for a byte-flipped compiled regex *
|
||||
*************************************************/
|
||||
|
||||
/* This function is called from pcre_exec(), pcre_dfa_exec(), and also from
|
||||
pcre_fullinfo(). Its job is to test whether the regex is byte-flipped - that
|
||||
is, it was compiled on a system of opposite endianness. The function is called
|
||||
only when the native MAGIC_NUMBER test fails. If the regex is indeed flipped,
|
||||
we flip all the relevant values into a different data block, and return it.
|
||||
|
||||
Arguments:
|
||||
re points to the regex
|
||||
study points to study data, or NULL
|
||||
internal_re points to a new regex block
|
||||
internal_study points to a new study block
|
||||
|
||||
Returns: the new block if is is indeed a byte-flipped regex
|
||||
NULL if it is not
|
||||
*/
|
||||
|
||||
EXPORT real_pcre *
|
||||
_pcre_try_flipped(const real_pcre *re, real_pcre *internal_re,
|
||||
const pcre_study_data *study, pcre_study_data *internal_study)
|
||||
{
|
||||
if (byteflip(re->magic_number, sizeof(re->magic_number)) != MAGIC_NUMBER)
|
||||
return NULL;
|
||||
|
||||
*internal_re = *re; /* To copy other fields */
|
||||
internal_re->size = byteflip(re->size, sizeof(re->size));
|
||||
internal_re->options = byteflip(re->options, sizeof(re->options));
|
||||
internal_re->top_bracket =
|
||||
(pcre_uint16)byteflip(re->top_bracket, sizeof(re->top_bracket));
|
||||
internal_re->top_backref =
|
||||
(pcre_uint16)byteflip(re->top_backref, sizeof(re->top_backref));
|
||||
internal_re->first_byte =
|
||||
(pcre_uint16)byteflip(re->first_byte, sizeof(re->first_byte));
|
||||
internal_re->req_byte =
|
||||
(pcre_uint16)byteflip(re->req_byte, sizeof(re->req_byte));
|
||||
internal_re->name_table_offset =
|
||||
(pcre_uint16)byteflip(re->name_table_offset, sizeof(re->name_table_offset));
|
||||
internal_re->name_entry_size =
|
||||
(pcre_uint16)byteflip(re->name_entry_size, sizeof(re->name_entry_size));
|
||||
internal_re->name_count =
|
||||
(pcre_uint16)byteflip(re->name_count, sizeof(re->name_count));
|
||||
|
||||
if (study != NULL)
|
||||
{
|
||||
*internal_study = *study; /* To copy other fields */
|
||||
internal_study->size = byteflip(study->size, sizeof(study->size));
|
||||
internal_study->options = byteflip(study->options, sizeof(study->options));
|
||||
}
|
||||
|
||||
return internal_re;
|
||||
}
|
||||
|
||||
/* End of pcretryf.c */
|
||||
61
harbour/source/hbpcre/pcrever.c
Normal file
61
harbour/source/hbpcre/pcrever.c
Normal file
@@ -0,0 +1,61 @@
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Copyright (c) 1997-2005 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
|
||||
/* This module contains the external function pcre_version(), which returns a
|
||||
string that identifies the PCRE version that is in use. */
|
||||
|
||||
|
||||
#include "pcreinal.h"
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Return version string *
|
||||
*************************************************/
|
||||
|
||||
#define STRING(a) # a
|
||||
#define XSTRING(s) STRING(s)
|
||||
|
||||
EXPORT const char *
|
||||
pcre_version(void)
|
||||
{
|
||||
return XSTRING(PCRE_MAJOR) "." XSTRING(PCRE_MINOR) " " XSTRING(PCRE_DATE);
|
||||
}
|
||||
|
||||
/* End of pcrevers.c */
|
||||
130
harbour/source/hbpcre/pcrevutf.c
Normal file
130
harbour/source/hbpcre/pcrevutf.c
Normal file
@@ -0,0 +1,130 @@
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Copyright (c) 1997-2005 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
|
||||
/* This module contains an internal function for validating UTF-8 character
|
||||
strings. */
|
||||
|
||||
|
||||
#include "pcreinal.h"
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Validate a UTF-8 string *
|
||||
*************************************************/
|
||||
|
||||
/* This function is called (optionally) at the start of compile or match, to
|
||||
validate that a supposed UTF-8 string is actually valid. The early check means
|
||||
that subsequent code can assume it is dealing with a valid string. The check
|
||||
can be turned off for maximum performance, but the consequences of supplying
|
||||
an invalid string are then undefined.
|
||||
|
||||
Arguments:
|
||||
string points to the string
|
||||
length length of string, or -1 if the string is zero-terminated
|
||||
|
||||
Returns: < 0 if the string is a valid UTF-8 string
|
||||
>= 0 otherwise; the value is the offset of the bad byte
|
||||
*/
|
||||
|
||||
EXPORT int
|
||||
_pcre_valid_utf8(const uschar *string, int length)
|
||||
{
|
||||
register const uschar *p;
|
||||
|
||||
if (length < 0)
|
||||
{
|
||||
for (p = string; *p != 0; p++);
|
||||
length = p - string;
|
||||
}
|
||||
|
||||
for (p = string; length-- > 0; p++)
|
||||
{
|
||||
register int ab;
|
||||
register int c = *p;
|
||||
if (c < 128) continue;
|
||||
if ((c & 0xc0) != 0xc0) return p - string;
|
||||
ab = _pcre_utf8_table4[c & 0x3f]; /* Number of additional bytes */
|
||||
if (length < ab) return p - string;
|
||||
length -= ab;
|
||||
|
||||
/* Check top bits in the second byte */
|
||||
if ((*(++p) & 0xc0) != 0x80) return p - string;
|
||||
|
||||
/* Check for overlong sequences for each different length */
|
||||
switch (ab)
|
||||
{
|
||||
/* Check for xx00 000x */
|
||||
case 1:
|
||||
if ((c & 0x3e) == 0) return p - string;
|
||||
continue; /* We know there aren't any more bytes to check */
|
||||
|
||||
/* Check for 1110 0000, xx0x xxxx */
|
||||
case 2:
|
||||
if (c == 0xe0 && (*p & 0x20) == 0) return p - string;
|
||||
break;
|
||||
|
||||
/* Check for 1111 0000, xx00 xxxx */
|
||||
case 3:
|
||||
if (c == 0xf0 && (*p & 0x30) == 0) return p - string;
|
||||
break;
|
||||
|
||||
/* Check for 1111 1000, xx00 0xxx */
|
||||
case 4:
|
||||
if (c == 0xf8 && (*p & 0x38) == 0) return p - string;
|
||||
break;
|
||||
|
||||
/* Check for leading 0xfe or 0xff, and then for 1111 1100, xx00 00xx */
|
||||
case 5:
|
||||
if (c == 0xfe || c == 0xff ||
|
||||
(c == 0xfc && (*p & 0x3c) == 0)) return p - string;
|
||||
break;
|
||||
}
|
||||
|
||||
/* Check for valid bytes after the 2nd, if any; all must start 10 */
|
||||
while (--ab > 0)
|
||||
{
|
||||
if ((*(++p) & 0xc0) != 0x80) return p - string;
|
||||
}
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* End of pcrevutf.c */
|
||||
121
harbour/source/hbpcre/pcrexcls.c
Normal file
121
harbour/source/hbpcre/pcrexcls.c
Normal file
@@ -0,0 +1,121 @@
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Copyright (c) 1997-2005 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
|
||||
/* This module contains an internal function that is used to match an extended
|
||||
class (one that contains characters whose values are > 255). It is used by both
|
||||
pcre_exec() and pcre_def_exec(). */
|
||||
|
||||
|
||||
#include "pcreinal.h"
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Match character against an XCLASS *
|
||||
*************************************************/
|
||||
|
||||
/* This function is called to match a character against an extended class that
|
||||
might contain values > 255.
|
||||
|
||||
Arguments:
|
||||
c the character
|
||||
data points to the flag byte of the XCLASS data
|
||||
|
||||
Returns: TRUE if character matches, else FALSE
|
||||
*/
|
||||
|
||||
EXPORT BOOL
|
||||
_pcre_xclass(int c, const uschar *data)
|
||||
{
|
||||
int t;
|
||||
BOOL negated = (*data & XCL_NOT) != 0;
|
||||
|
||||
/* Character values < 256 are matched against a bitmap, if one is present. If
|
||||
not, we still carry on, because there may be ranges that start below 256 in the
|
||||
additional data. */
|
||||
|
||||
if (c < 256)
|
||||
{
|
||||
if ((*data & XCL_MAP) != 0 && (data[1 + c/8] & (1 << (c&7))) != 0)
|
||||
return !negated; /* char found */
|
||||
}
|
||||
|
||||
/* First skip the bit map if present. Then match against the list of Unicode
|
||||
properties or large chars or ranges that end with a large char. We won't ever
|
||||
encounter XCL_PROP or XCL_NOTPROP when UCP support is not compiled. */
|
||||
|
||||
if ((*data++ & XCL_MAP) != 0) data += 32;
|
||||
|
||||
while ((t = *data++) != XCL_END)
|
||||
{
|
||||
int x, y;
|
||||
if (t == XCL_SINGLE)
|
||||
{
|
||||
GETCHARINC(x, data);
|
||||
if (c == x) return !negated;
|
||||
}
|
||||
else if (t == XCL_RANGE)
|
||||
{
|
||||
GETCHARINC(x, data);
|
||||
GETCHARINC(y, data);
|
||||
if (c >= x && c <= y) return !negated;
|
||||
}
|
||||
|
||||
#ifdef SUPPORT_UCP
|
||||
else /* XCL_PROP & XCL_NOTPROP */
|
||||
{
|
||||
int chartype, othercase;
|
||||
int rqdtype = *data++;
|
||||
int category = ucp_findchar(c, &chartype, &othercase);
|
||||
if (rqdtype >= 128)
|
||||
{
|
||||
if ((rqdtype - 128 == category) == (t == XCL_PROP)) return !negated;
|
||||
}
|
||||
else
|
||||
{
|
||||
if ((rqdtype == chartype) == (t == XCL_PROP)) return !negated;
|
||||
}
|
||||
}
|
||||
#endif /* SUPPORT_UCP */
|
||||
}
|
||||
|
||||
return negated; /* char did not match */
|
||||
}
|
||||
|
||||
/* End of pcrexcls.c */
|
||||
60
harbour/source/hbpcre/ucp.h
Normal file
60
harbour/source/hbpcre/ucp.h
Normal file
@@ -0,0 +1,60 @@
|
||||
/*************************************************
|
||||
* libucp - Unicode Property Table handler *
|
||||
*************************************************/
|
||||
|
||||
|
||||
#ifndef _UCP_H
|
||||
#define _UCP_H
|
||||
|
||||
/* These are the character categories that are returned by ucp_findchar */
|
||||
|
||||
enum {
|
||||
ucp_C, /* Other */
|
||||
ucp_L, /* Letter */
|
||||
ucp_M, /* Mark */
|
||||
ucp_N, /* Number */
|
||||
ucp_P, /* Punctuation */
|
||||
ucp_S, /* Symbol */
|
||||
ucp_Z /* Separator */
|
||||
};
|
||||
|
||||
/* These are the detailed character types that are returned by ucp_findchar */
|
||||
|
||||
enum {
|
||||
ucp_Cc, /* Control */
|
||||
ucp_Cf, /* Format */
|
||||
ucp_Cn, /* Unassigned */
|
||||
ucp_Co, /* Private use */
|
||||
ucp_Cs, /* Surrogate */
|
||||
ucp_Ll, /* Lower case letter */
|
||||
ucp_Lm, /* Modifier letter */
|
||||
ucp_Lo, /* Other letter */
|
||||
ucp_Lt, /* Title case letter */
|
||||
ucp_Lu, /* Upper case letter */
|
||||
ucp_Mc, /* Spacing mark */
|
||||
ucp_Me, /* Enclosing mark */
|
||||
ucp_Mn, /* Non-spacing mark */
|
||||
ucp_Nd, /* Decimal number */
|
||||
ucp_Nl, /* Letter number */
|
||||
ucp_No, /* Other number */
|
||||
ucp_Pc, /* Connector punctuation */
|
||||
ucp_Pd, /* Dash punctuation */
|
||||
ucp_Pe, /* Close punctuation */
|
||||
ucp_Pf, /* Final punctuation */
|
||||
ucp_Pi, /* Initial punctuation */
|
||||
ucp_Po, /* Other punctuation */
|
||||
ucp_Ps, /* Open punctuation */
|
||||
ucp_Sc, /* Currency symbol */
|
||||
ucp_Sk, /* Modifier symbol */
|
||||
ucp_Sm, /* Mathematical symbol */
|
||||
ucp_So, /* Other symbol */
|
||||
ucp_Zl, /* Line separator */
|
||||
ucp_Zp, /* Paragraph separator */
|
||||
ucp_Zs /* Space separator */
|
||||
};
|
||||
|
||||
extern int ucp_findchar(const int, int *, int *);
|
||||
|
||||
#endif
|
||||
|
||||
/* End of ucp.h */
|
||||
91
harbour/source/hbpcre/ucpinter.h
Normal file
91
harbour/source/hbpcre/ucpinter.h
Normal file
@@ -0,0 +1,91 @@
|
||||
/*************************************************
|
||||
* libucp - Unicode Property Table handler *
|
||||
*************************************************/
|
||||
|
||||
/* Internal header file defining the layout of compact nodes in the tree. */
|
||||
|
||||
typedef struct cnode {
|
||||
unsigned short int f0;
|
||||
unsigned short int f1;
|
||||
unsigned short int f2;
|
||||
} cnode;
|
||||
|
||||
/* Things for the f0 field */
|
||||
|
||||
#define f0_leftexists 0x8000 /* Left child exists */
|
||||
#define f0_typemask 0x3f00 /* Type bits */
|
||||
#define f0_typeshift 8 /* Type shift */
|
||||
#define f0_chhmask 0x00ff /* Character high bits */
|
||||
|
||||
/* Things for the f2 field */
|
||||
|
||||
#define f2_rightmask 0xf000 /* Mask for right offset bits */
|
||||
#define f2_rightshift 12 /* Shift for right offset */
|
||||
#define f2_casemask 0x0fff /* Mask for case offset */
|
||||
|
||||
/* The tree consists of a vector of structures of type cnode, with the root
|
||||
node as the first element. The three short ints (16-bits) are used as follows:
|
||||
|
||||
(f0) (1) The 0x8000 bit of f0 is set if a left child exists. The child's node
|
||||
is the next node in the vector.
|
||||
(2) The 0x4000 bits of f0 is spare.
|
||||
(3) The 0x3f00 bits of f0 contain the character type; this is a number
|
||||
defined by the enumeration in ucp.h (e.g. ucp_Lu).
|
||||
(4) The bottom 8 bits of f0 contain the most significant byte of the
|
||||
character's 24-bit codepoint.
|
||||
|
||||
(f1) (1) The f1 field contains the two least significant bytes of the
|
||||
codepoint.
|
||||
|
||||
(f2) (1) The 0xf000 bits of f2 contain zero if there is no right child of this
|
||||
node. Otherwise, they contain one plus the exponent of the power of
|
||||
two of the offset to the right node (e.g. a value of 3 means 8). The
|
||||
units of the offset are node items.
|
||||
|
||||
(2) The 0x0fff bits of f2 contain the signed offset from this character to
|
||||
its alternate cased value. They are zero if there is no such
|
||||
character.
|
||||
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
||.|.| type (6) | ms char (8) || ls char (16) ||....| case offset (12) ||
|
||||
-----------------------------------------------------------------------------
|
||||
| | |
|
||||
| |-> spare |
|
||||
| exponent of right
|
||||
|-> left child exists child offset
|
||||
|
||||
|
||||
The upper/lower casing information is set only for characters that come in
|
||||
pairs. There are (at present) four non-one-to-one mappings in the Unicode data.
|
||||
These are ignored. They are:
|
||||
|
||||
1FBE Greek Prosgegrammeni (lower, with upper -> capital iota)
|
||||
2126 Ohm
|
||||
212A Kelvin
|
||||
212B Angstrom
|
||||
|
||||
Certainly for the last three, having an alternate case would seem to be a
|
||||
mistake. I don't know any Greek, so cannot comment on the first one.
|
||||
|
||||
|
||||
When searching the tree, proceed as follows:
|
||||
|
||||
(1) Start at the first node.
|
||||
|
||||
(2) Extract the character value from f1 and the bottom 8 bits of f0;
|
||||
|
||||
(3) Compare with the character being sought. If equal, we are done.
|
||||
|
||||
(4) If the test character is smaller, inspect the f0_leftexists flag. If it is
|
||||
not set, the character is not in the tree. If it is set, move to the next
|
||||
node, and go to (2).
|
||||
|
||||
(5) If the test character is bigger, extract the f2_rightmask bits from f2, and
|
||||
shift them right by f2_rightshift. If the result is zero, the character is
|
||||
not in the tree. Otherwise, calculate the number of nodes to skip by
|
||||
shifting the value 1 left by this number minus one. Go to (2).
|
||||
*/
|
||||
|
||||
|
||||
/* End of internal.h */
|
||||
15105
harbour/source/hbpcre/ucptable.c
Normal file
15105
harbour/source/hbpcre/ucptable.c
Normal file
File diff suppressed because it is too large
Load Diff
@@ -5521,7 +5521,7 @@ static BOOL hb_cdxRegexMatch( CDXAREAP pArea, PHB_REGEX pRegEx, LPCDXKEY pKey )
|
||||
#else
|
||||
HB_SYMBOL_UNUSED( pArea );
|
||||
#endif
|
||||
return hb_regexMatch( pRegEx, szKey, FALSE );
|
||||
return hb_regexMatch( pRegEx, szKey, pKey->len, FALSE );
|
||||
}
|
||||
|
||||
/*
|
||||
|
||||
@@ -4289,7 +4289,7 @@ static BOOL hb_ntxRegexMatch( LPTAGINFO pTag, PHB_REGEX pRegEx, char * szKey )
|
||||
#else
|
||||
HB_SYMBOL_UNUSED( pTag );
|
||||
#endif
|
||||
return hb_regexMatch( pRegEx, szKey, FALSE );
|
||||
return hb_regexMatch( pRegEx, szKey, pTag->KeyLength, FALSE );
|
||||
}
|
||||
|
||||
/*
|
||||
|
||||
@@ -56,6 +56,7 @@ C_SOURCES=\
|
||||
hbinet.c \
|
||||
hbrandom.c \
|
||||
hbregex.c \
|
||||
hbregexc.c \
|
||||
hbtoken.c \
|
||||
idle.c \
|
||||
inkey.c \
|
||||
|
||||
@@ -56,113 +56,100 @@
|
||||
#include "hbregex.h"
|
||||
#include "hbapiitm.h"
|
||||
#include "hbapierr.h"
|
||||
#include "hbinit.h"
|
||||
|
||||
/* This releases regex when called from the garbage collector */
|
||||
static HB_GARBAGE_FUNC( hb_regexRelease )
|
||||
static void hb_regfree( PHB_REGEX pRegEx )
|
||||
{
|
||||
#ifdef _HB_REGEX_INTERNAL_
|
||||
PHB_REGEX pRegEx = ( PHB_REGEX ) Cargo;
|
||||
#if defined( HB_PCRE_REGEX )
|
||||
( pcre_free )( pRegEx->re_pcre );
|
||||
#elif defined( HB_POSIX_REGEX )
|
||||
regfree( &pRegEx->reg );
|
||||
#else
|
||||
HB_SYMBOL_UNUSED( Cargo );
|
||||
#endif
|
||||
}
|
||||
|
||||
PHB_REGEX hb_regexCompile( const char *szRegEx, ULONG ulLen, int iFlags )
|
||||
{
|
||||
#ifdef _HB_REGEX_INTERNAL_
|
||||
PHB_REGEX pRegEx;
|
||||
|
||||
HB_SYMBOL_UNUSED( ulLen );
|
||||
|
||||
pRegEx = ( PHB_REGEX ) hb_gcAlloc( sizeof( HB_REGEX ), hb_regexRelease );
|
||||
hb_gcLock( pRegEx );
|
||||
memset( pRegEx, 0, sizeof( HB_REGEX ) );
|
||||
pRegEx->fFree = TRUE;
|
||||
pRegEx->iCFlags = REG_EXTENDED |
|
||||
( ( iFlags & HBREG_ICASE ) ? REG_ICASE : 0 ) |
|
||||
( ( iFlags & HBREG_NEWLINE ) ? REG_NEWLINE : 0 ) |
|
||||
( ( iFlags & HBREG_NOSUB ) ? REG_NOSUB : 0 );
|
||||
pRegEx->iEFlags = ( ( iFlags & HBREG_NOTBOL ) ? REG_NOTBOL : 0 ) |
|
||||
( ( iFlags & HBREG_NOTEOL ) ? REG_NOTBOL : 0 );
|
||||
|
||||
if( regcomp( &pRegEx->reg, szRegEx, pRegEx->iCFlags ) != 0 )
|
||||
{
|
||||
hb_gcFree( pRegEx );
|
||||
pRegEx = NULL;
|
||||
}
|
||||
|
||||
return pRegEx;
|
||||
|
||||
#else
|
||||
HB_SYMBOL_UNUSED( szRegEx );
|
||||
HB_SYMBOL_UNUSED( ulLen );
|
||||
HB_SYMBOL_UNUSED( iFlags );
|
||||
|
||||
return NULL;
|
||||
#endif
|
||||
}
|
||||
|
||||
PHB_REGEX hb_regexGet( PHB_ITEM pRegExItm, int iFlags )
|
||||
{
|
||||
PHB_REGEX pRegEx = NULL;
|
||||
|
||||
if( pRegExItm )
|
||||
{
|
||||
if( HB_IS_POINTER( pRegExItm ) )
|
||||
{
|
||||
pRegEx = ( PHB_REGEX ) hb_itemGetPtrGC( pRegExItm, hb_regexRelease );
|
||||
}
|
||||
else if( HB_IS_STRING( pRegExItm ) )
|
||||
{
|
||||
ULONG ulLen = hb_itemGetCLen( pRegExItm );
|
||||
char * szRegEx = hb_itemGetCPtr( pRegExItm );
|
||||
if( ulLen > 0 )
|
||||
pRegEx = hb_regexCompile( szRegEx, ulLen, iFlags );
|
||||
}
|
||||
}
|
||||
|
||||
if( !pRegEx )
|
||||
hb_errRT_BASE_SubstR( EG_ARG, 3012, "Invalid Regular expression", &hb_errFuncName, 1, pRegExItm );
|
||||
|
||||
return pRegEx;
|
||||
}
|
||||
|
||||
void hb_regexFree( PHB_REGEX pRegEx )
|
||||
{
|
||||
#ifdef _HB_REGEX_INTERNAL_
|
||||
if( pRegEx && pRegEx->fFree )
|
||||
{
|
||||
regfree( &pRegEx->reg );
|
||||
hb_gcFree( pRegEx );
|
||||
}
|
||||
#else
|
||||
HB_SYMBOL_UNUSED( pRegEx );
|
||||
#endif
|
||||
}
|
||||
|
||||
BOOL hb_regexMatch( PHB_REGEX pRegEx, const char *szString, BOOL fFull )
|
||||
static int hb_regcomp( PHB_REGEX pRegEx, const char * szRegEx )
|
||||
{
|
||||
#ifdef _HB_REGEX_INTERNAL_
|
||||
BOOL fMatch;
|
||||
#if defined( HB_PCRE_REGEX )
|
||||
const unsigned char * pCharTable = NULL;
|
||||
const char *szError = NULL;
|
||||
int iErrOffset = 0;
|
||||
int iCFlags = ( ( pRegEx->iFlags & HBREG_ICASE ) ? PCRE_CASELESS : 0 ) |
|
||||
( ( pRegEx->iFlags & HBREG_NEWLINE ) ? PCRE_MULTILINE : 0 ) |
|
||||
( ( pRegEx->iFlags & HBREG_DOTALL ) ? PCRE_DOTALL : 0 );
|
||||
|
||||
fMatch = regexec( &pRegEx->reg, szString, 1, pRegEx->aMatches, pRegEx->iEFlags ) == 0;
|
||||
pRegEx->iEFlags = ( ( pRegEx->iFlags & HBREG_NOTBOL ) ? PCRE_NOTBOL : 0 ) |
|
||||
( ( pRegEx->iFlags & HBREG_NOTEOL ) ? PCRE_NOTEOL : 0 );
|
||||
|
||||
return fMatch && ( !fFull ||
|
||||
( pRegEx->aMatches[0].rm_so == 0 &&
|
||||
pRegEx->aMatches[0].rm_eo == (int) strlen( szString ) ) );
|
||||
pRegEx->re_pcre = pcre_compile( szRegEx, iCFlags, &szError,
|
||||
&iErrOffset, pCharTable );
|
||||
return pRegEx->re_pcre ? 0 : -1;
|
||||
#elif defined( HB_POSIX_REGEX )
|
||||
int iCFlags = REG_EXTENDED |
|
||||
( ( pRegEx->iFlags & HBREG_ICASE ) ? REG_ICASE : 0 ) |
|
||||
( ( pRegEx->iFlags & HBREG_NEWLINE ) ? REG_NEWLINE : 0 ) |
|
||||
( ( pRegEx->iFlags & HBREG_NOSUB ) ? REG_NOSUB : 0 );
|
||||
pRegEx->iEFlags = ( ( pRegEx->iFlags & HBREG_NOTBOL ) ? REG_NOTBOL : 0 ) |
|
||||
( ( pRegEx->iFlags & HBREG_NOTEOL ) ? REG_NOTEOL : 0 );
|
||||
return regcomp( &pRegEx->reg, szRegEx, iCFlags );
|
||||
#else
|
||||
HB_SYMBOL_UNUSED( pRegEx );
|
||||
HB_SYMBOL_UNUSED( szRegEx );
|
||||
return -1;
|
||||
#endif
|
||||
}
|
||||
|
||||
static int hb_regexec( PHB_REGEX pRegEx, const char * szString, ULONG ulLen,
|
||||
int iMatches, HB_REGMATCH * aMatches )
|
||||
{
|
||||
#if defined( HB_PCRE_REGEX )
|
||||
int iResult;
|
||||
|
||||
iResult = pcre_exec( pRegEx->re_pcre, NULL /* pcre_extra */,
|
||||
szString, ulLen, 0 /* startoffset */,
|
||||
pRegEx->iEFlags, aMatches, iMatches + 1 );
|
||||
if( iResult == 0 )
|
||||
iResult = iMatches;
|
||||
return iResult;
|
||||
#elif defined( HB_POSIX_REGEX )
|
||||
char * szBuffer = NULL;
|
||||
int iResult, i;
|
||||
|
||||
if( szString[ ulLen ] != 0 )
|
||||
{
|
||||
szBuffer = hb_strndup( szString, ulLen );
|
||||
szString = szBuffer;
|
||||
}
|
||||
for( i = 0; i < iMatches; i++ )
|
||||
HB_REGMATCH_EO( aMatches, i ) = -1;
|
||||
iResult = regexec( &pRegEx->reg, szString, iMatches, aMatches, pRegEx->iEFlags );
|
||||
if( iResult == 0 )
|
||||
{
|
||||
for( i = 0; i < iMatches; i++ )
|
||||
{
|
||||
if( HB_REGMATCH_EO( aMatches, i ) != -1 )
|
||||
iResult = i + 1;
|
||||
}
|
||||
}
|
||||
else
|
||||
iResult = -1;
|
||||
if( szBuffer )
|
||||
hb_xfree( szBuffer );
|
||||
return iResult;
|
||||
#else
|
||||
HB_SYMBOL_UNUSED( pRegEx );
|
||||
HB_SYMBOL_UNUSED( szString );
|
||||
HB_SYMBOL_UNUSED( fFull );
|
||||
return FALSE;
|
||||
HB_SYMBOL_UNUSED( ulLen );
|
||||
HB_SYMBOL_UNUSED( iMatches );
|
||||
HB_SYMBOL_UNUSED( aMatches );
|
||||
return -1;
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
HB_FUNC( HB_REGEXCOMP )
|
||||
{
|
||||
#ifdef _HB_REGEX_INTERNAL_
|
||||
ULONG ulLen = hb_parclen( 1 );
|
||||
|
||||
if( ulLen == 0 )
|
||||
@@ -186,7 +173,6 @@ HB_FUNC( HB_REGEXCOMP )
|
||||
hb_gcUnlock( pRegEx );
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
HB_FUNC( HB_ISREGEX )
|
||||
@@ -196,10 +182,8 @@ HB_FUNC( HB_ISREGEX )
|
||||
|
||||
HB_FUNC( HB_ATX )
|
||||
{
|
||||
#ifdef _HB_REGEX_INTERNAL_
|
||||
char * pszString, * pszCopy = NULL;
|
||||
char * pszString;
|
||||
ULONG ulLen, ulStart, ulEnd;
|
||||
regmatch_t aMatches[ 1 ];
|
||||
PHB_REGEX pRegEx;
|
||||
PHB_ITEM pString;
|
||||
int iPCount = hb_pcount();
|
||||
@@ -223,24 +207,20 @@ HB_FUNC( HB_ATX )
|
||||
|
||||
if( ulLen && ulStart <= ulLen && ulStart <= ulEnd )
|
||||
{
|
||||
if( ulEnd > 0 && ulEnd < ulLen && pszString[ ulEnd ] != 0 )
|
||||
{
|
||||
if( ulStart > 1 )
|
||||
{
|
||||
pszString += ulStart - 1;
|
||||
ulEnd -= ulStart - 1;
|
||||
}
|
||||
pszCopy = ( char * ) hb_xgrab( ulEnd + 1 );
|
||||
memcpy( pszCopy, pszString, ulEnd );
|
||||
pszCopy[ ulEnd ] = '\0';
|
||||
pszString = pszCopy;
|
||||
}
|
||||
HB_REGMATCH aMatches[ HB_REGMATCH_SIZE( 1 ) ];
|
||||
|
||||
if( regexec( &pRegEx->reg, pszString, 1, aMatches, 0 ) == 0 )
|
||||
if( ulEnd < ulLen )
|
||||
ulLen = ulEnd;
|
||||
if( ulStart )
|
||||
{
|
||||
ulStart = aMatches[0].rm_so + 1;
|
||||
ulLen = aMatches[0].rm_eo - aMatches[0].rm_so;
|
||||
hb_retclen( pszString + aMatches[0].rm_so, ulLen );
|
||||
--ulStart;
|
||||
ulLen -= ulStart;
|
||||
}
|
||||
if( hb_regexec( pRegEx, pszString + ulStart, ulLen, 1, aMatches ) > 0 )
|
||||
{
|
||||
ulStart += HB_REGMATCH_SO( aMatches, 0 ) + 1;
|
||||
ulLen = HB_REGMATCH_EO( aMatches, 0 ) - HB_REGMATCH_SO( aMatches, 0 );
|
||||
hb_retclen( pszString + ulStart - 1, ulLen );
|
||||
}
|
||||
else
|
||||
ulStart = ulLen = 0;
|
||||
@@ -249,22 +229,17 @@ HB_FUNC( HB_ATX )
|
||||
ulStart = ulLen = 0;
|
||||
|
||||
hb_regexFree( pRegEx );
|
||||
if( pszCopy )
|
||||
hb_xfree( pszCopy );
|
||||
|
||||
if( iPCount > 3 )
|
||||
{
|
||||
hb_stornl( ulStart, 4 );
|
||||
if( iPCount > 4 )
|
||||
hb_stornl( ulLen, 5 );
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
static BOOL hb_regex( int iRequest )
|
||||
{
|
||||
#ifdef _HB_REGEX_INTERNAL_
|
||||
regmatch_t aMatches[ REGEX_MAX_GROUPS ];
|
||||
HB_REGMATCH aMatches[ HB_REGMATCH_SIZE( REGEX_MAX_GROUPS ) ];
|
||||
PHB_ITEM pRetArray, pMatch, pString;
|
||||
int i, iMatches, iMaxMatch;
|
||||
BOOL fResult = FALSE;
|
||||
@@ -287,30 +262,22 @@ static BOOL hb_regex( int iRequest )
|
||||
|
||||
pszString = hb_itemGetCPtr( pString );
|
||||
ulLen = hb_itemGetCLen( pString );
|
||||
iMatches = 0;
|
||||
iMaxMatch = iRequest == 0 || iRequest == 4 || iRequest == 5 ?
|
||||
REGEX_MAX_GROUPS : 1;
|
||||
aMatches[0].rm_so = 0;
|
||||
aMatches[0].rm_eo = ulLen;
|
||||
if( regexec( &pRegEx->reg, pszString, iMaxMatch, aMatches, 0 ) == 0 )
|
||||
iMatches = hb_regexec( pRegEx, pszString, ulLen, iMaxMatch, aMatches );
|
||||
if( iMatches > 0 )
|
||||
{
|
||||
switch ( iRequest )
|
||||
{
|
||||
case 0:
|
||||
/* Count sucessful matches */
|
||||
for( i = 0; i < iMaxMatch; i++ )
|
||||
{
|
||||
if( aMatches[i].rm_eo != -1 )
|
||||
iMatches = i;
|
||||
}
|
||||
iMatches++;
|
||||
pRetArray = hb_itemArrayNew( iMatches );
|
||||
for( i = 0; i < iMatches; i++ )
|
||||
{
|
||||
if( aMatches[i].rm_eo > -1 )
|
||||
if( HB_REGMATCH_EO( aMatches, i ) > -1 )
|
||||
hb_itemPutCL( hb_arrayGetItemPtr( pRetArray, i + 1 ),
|
||||
pszString + aMatches[i].rm_so,
|
||||
aMatches[i].rm_eo - aMatches[i].rm_so );
|
||||
pszString + HB_REGMATCH_SO( aMatches, i ),
|
||||
HB_REGMATCH_EO( aMatches, i ) -
|
||||
HB_REGMATCH_SO( aMatches, i ) );
|
||||
else
|
||||
hb_itemPutCL( hb_arrayGetItemPtr( pRetArray, i + 1 ), "", 0 );
|
||||
}
|
||||
@@ -319,8 +286,8 @@ static BOOL hb_regex( int iRequest )
|
||||
break;
|
||||
|
||||
case 1: /* LIKE */
|
||||
fResult = aMatches[0].rm_so == 0 &&
|
||||
( ULONG ) aMatches[0].rm_eo == ulLen;
|
||||
fResult = HB_REGMATCH_SO( aMatches, 0 ) == 0 &&
|
||||
( ULONG ) HB_REGMATCH_EO( aMatches, 0 ) == ulLen;
|
||||
break;
|
||||
|
||||
case 2: /* MATCH ( HAS ) */
|
||||
@@ -331,16 +298,18 @@ static BOOL hb_regex( int iRequest )
|
||||
iMaxMatch = hb_parni( 5 );
|
||||
pRetArray = hb_itemArrayNew( 0 );
|
||||
pMatch = hb_itemNew( NULL );
|
||||
iMatches = 0;
|
||||
do
|
||||
{
|
||||
hb_itemPutCL( pMatch, pszString, aMatches[0].rm_so );
|
||||
hb_itemPutCL( pMatch, pszString, HB_REGMATCH_SO( aMatches, 0 ) );
|
||||
hb_arrayAddForward( pRetArray, pMatch );
|
||||
ulLen -= aMatches[0].rm_eo;
|
||||
pszString += aMatches[ 0 ].rm_eo;
|
||||
ulLen -= HB_REGMATCH_EO( aMatches, 0 );
|
||||
pszString += HB_REGMATCH_EO( aMatches, 0 );
|
||||
iMatches++;
|
||||
}
|
||||
while( aMatches[0].rm_eo && ulLen && ( iMaxMatch == 0 || iMatches < iMaxMatch ) &&
|
||||
regexec( &pRegEx->reg, pszString, 1, aMatches, 0 ) == 0 );
|
||||
while( HB_REGMATCH_EO( aMatches, 0 ) > 0 && ulLen &&
|
||||
( iMaxMatch == 0 || iMatches < iMaxMatch ) &&
|
||||
hb_regexec( pRegEx, pszString, ulLen, 1, aMatches ) > 0 );
|
||||
|
||||
/* last match must be done also in case that pszString is empty;
|
||||
this would mean an empty split field at the end of the string */
|
||||
@@ -356,27 +325,22 @@ static BOOL hb_regex( int iRequest )
|
||||
break;
|
||||
|
||||
case 4: /* results AND positions */
|
||||
/* Count sucessful matches */
|
||||
for( i = 0; i < iMaxMatch; i++ )
|
||||
{
|
||||
if( aMatches[i].rm_eo != -1 )
|
||||
iMatches = i;
|
||||
}
|
||||
iMatches++;
|
||||
pRetArray = hb_itemArrayNew( iMatches );
|
||||
|
||||
for( i = 0; i < iMatches; i++ )
|
||||
{
|
||||
int iSO = HB_REGMATCH_SO( aMatches, i ),
|
||||
iEO = HB_REGMATCH_EO( aMatches, i );
|
||||
pMatch = hb_arrayGetItemPtr( pRetArray, i + 1 );
|
||||
hb_arrayNew( pMatch, 3 );
|
||||
if ( aMatches[i].rm_eo != -1 )
|
||||
if( iEO != -1 )
|
||||
{
|
||||
/* matched string */
|
||||
hb_itemPutCL( hb_arrayGetItemPtr( pMatch, 1 ), pszString + aMatches[i].rm_so, aMatches[i].rm_eo - aMatches[i].rm_so );
|
||||
hb_itemPutCL( hb_arrayGetItemPtr( pMatch, 1 ), pszString + iSO, iEO - iSO );
|
||||
/* begin of match */
|
||||
hb_itemPutNI( hb_arrayGetItemPtr( pMatch, 2 ), aMatches[i].rm_so + 1 );
|
||||
hb_itemPutNI( hb_arrayGetItemPtr( pMatch, 2 ), iSO + 1 );
|
||||
/* End of match */
|
||||
hb_itemPutNI( hb_arrayGetItemPtr( pMatch, 3 ), aMatches[i].rm_eo );
|
||||
hb_itemPutNI( hb_arrayGetItemPtr( pMatch, 3 ), iEO );
|
||||
}
|
||||
else
|
||||
{
|
||||
@@ -397,19 +361,12 @@ static BOOL hb_regex( int iRequest )
|
||||
BOOL fOnlyMatch = !ISLOG( 7 ) || hb_parl( 7 ); /* if TRUE returns only matches and sub-matches, not positions */
|
||||
ULONG ulOffSet = 0;
|
||||
int iCount = 0;
|
||||
int iSO, iEO;
|
||||
|
||||
/* Set new array */
|
||||
pRetArray = hb_itemArrayNew( 0 );
|
||||
do
|
||||
{
|
||||
/* Count sucessful matches */
|
||||
for( i = 0; i < iMaxMatch; i++ )
|
||||
{
|
||||
if( aMatches[i].rm_eo != -1 )
|
||||
iMatches = i;
|
||||
}
|
||||
iMatches++;
|
||||
|
||||
/* If I want all matches */
|
||||
if( iGetMatch == 0 || // Check boundaries
|
||||
( iGetMatch < 0 || iGetMatch > iMatches ) )
|
||||
@@ -417,18 +374,20 @@ static BOOL hb_regex( int iRequest )
|
||||
pAtxArray = hb_itemArrayNew( iMatches );
|
||||
for( i = 0; i < iMatches; i++ )
|
||||
{
|
||||
iSO = HB_REGMATCH_SO( aMatches, i );
|
||||
iEO = HB_REGMATCH_EO( aMatches, i );
|
||||
pMatch = hb_arrayGetItemPtr( pAtxArray, i + 1 );
|
||||
if( !fOnlyMatch )
|
||||
{
|
||||
hb_arrayNew( pMatch, 3 );
|
||||
if ( aMatches[i].rm_eo != -1 )
|
||||
if ( iEO != -1 )
|
||||
{
|
||||
/* matched string */
|
||||
hb_itemPutCL( hb_arrayGetItemPtr( pMatch, 1 ), pszString + aMatches[i].rm_so, aMatches[i].rm_eo - aMatches[i].rm_so );
|
||||
hb_itemPutCL( hb_arrayGetItemPtr( pMatch, 1 ), pszString + iSO, iEO - iSO );
|
||||
/* begin of match */
|
||||
hb_itemPutNI( hb_arrayGetItemPtr( pMatch, 2 ), ulOffSet + aMatches[i].rm_so + 1 );
|
||||
hb_itemPutNI( hb_arrayGetItemPtr( pMatch, 2 ), ulOffSet + iSO + 1 );
|
||||
/* End of match */
|
||||
hb_itemPutNI( hb_arrayGetItemPtr( pMatch, 3 ), ulOffSet + aMatches[i].rm_eo );
|
||||
hb_itemPutNI( hb_arrayGetItemPtr( pMatch, 3 ), ulOffSet + iEO );
|
||||
}
|
||||
else
|
||||
{
|
||||
@@ -439,9 +398,9 @@ static BOOL hb_regex( int iRequest )
|
||||
}
|
||||
else
|
||||
{
|
||||
if( aMatches[i].rm_eo != -1 )
|
||||
if( iEO != -1 )
|
||||
/* matched string */
|
||||
hb_itemPutCL( pMatch, pszString + aMatches[i].rm_so, aMatches[i].rm_eo - aMatches[i].rm_so );
|
||||
hb_itemPutCL( pMatch, pszString + iSO, iEO - iSO );
|
||||
else
|
||||
hb_itemPutCL( pMatch, "", 0 );
|
||||
}
|
||||
@@ -452,18 +411,20 @@ static BOOL hb_regex( int iRequest )
|
||||
else /* Here I get only single matches */
|
||||
{
|
||||
i = iGetMatch - 1;
|
||||
iSO = HB_REGMATCH_SO( aMatches, i );
|
||||
iEO = HB_REGMATCH_EO( aMatches, i );
|
||||
pMatch = hb_itemNew( NULL );
|
||||
if( !fOnlyMatch )
|
||||
{
|
||||
hb_arrayNew( pMatch, 3 );
|
||||
if( aMatches[i].rm_eo != -1 )
|
||||
if( iEO != -1 )
|
||||
{
|
||||
/* matched string */
|
||||
hb_itemPutCL( hb_arrayGetItemPtr( pMatch, 1 ), pszString + aMatches[i].rm_so, aMatches[i].rm_eo - aMatches[i].rm_so );
|
||||
hb_itemPutCL( hb_arrayGetItemPtr( pMatch, 1 ), pszString + iSO, iEO - iSO );
|
||||
/* begin of match */
|
||||
hb_itemPutNI( hb_arrayGetItemPtr( pMatch, 2 ), ulOffSet + aMatches[i].rm_so + 1 );
|
||||
hb_itemPutNI( hb_arrayGetItemPtr( pMatch, 2 ), ulOffSet + iSO + 1 );
|
||||
/* End of match */
|
||||
hb_itemPutNI( hb_arrayGetItemPtr( pMatch, 3 ), ulOffSet + aMatches[i].rm_eo );
|
||||
hb_itemPutNI( hb_arrayGetItemPtr( pMatch, 3 ), ulOffSet + iEO );
|
||||
}
|
||||
else
|
||||
{
|
||||
@@ -474,9 +435,9 @@ static BOOL hb_regex( int iRequest )
|
||||
}
|
||||
else
|
||||
{
|
||||
if( aMatches[i].rm_eo != -1 )
|
||||
if( iEO != -1 )
|
||||
/* matched string */
|
||||
hb_itemPutCL( pMatch, pszString + aMatches[i].rm_so, aMatches[i].rm_eo - aMatches[i].rm_so );
|
||||
hb_itemPutCL( pMatch, pszString + iSO, iEO - iSO );
|
||||
else
|
||||
hb_itemPutCL( pMatch, "", 0 );
|
||||
}
|
||||
@@ -484,13 +445,16 @@ static BOOL hb_regex( int iRequest )
|
||||
hb_itemRelease( pMatch );
|
||||
}
|
||||
|
||||
ulLen -= aMatches[0].rm_eo;
|
||||
pszString += aMatches[ 0 ].rm_eo;
|
||||
ulOffSet += aMatches[0].rm_eo;
|
||||
iEO = HB_REGMATCH_EO( aMatches, 0 );
|
||||
if( iEO == -1 )
|
||||
break;
|
||||
ulLen -= iEO;
|
||||
pszString += iEO;
|
||||
ulOffSet += iEO;
|
||||
iCount++;
|
||||
}
|
||||
while( aMatches[0].rm_eo && ulLen && ( iMax == 0 || iCount < iMax ) &&
|
||||
regexec( &pRegEx->reg, pszString, iMaxMatch, aMatches, 0 ) == 0 );
|
||||
while( iEO && ulLen && ( iMax == 0 || iCount < iMax ) &&
|
||||
( iMatches = hb_regexec( pRegEx, pszString, ulLen, iMaxMatch, aMatches ) ) > 0 );
|
||||
hb_itemRelease( hb_itemReturnForward( pRetArray ) );
|
||||
fResult = TRUE;
|
||||
break;
|
||||
@@ -507,10 +471,6 @@ static BOOL hb_regex( int iRequest )
|
||||
|
||||
hb_regexFree( pRegEx );
|
||||
return fResult;
|
||||
#else
|
||||
HB_SYMBOL_UNUSED( iRequest );
|
||||
return FALSE;
|
||||
#endif
|
||||
}
|
||||
|
||||
/* Returns array of Match + Sub-Matches. */
|
||||
@@ -567,3 +527,31 @@ HB_FUNC( HB_REGEXALL )
|
||||
{
|
||||
hb_regex( 5 );
|
||||
}
|
||||
|
||||
#if defined( HB_PCRE_REGEX )
|
||||
static void * hb_pcre_grab( size_t size )
|
||||
{
|
||||
return hb_xgrab( size );
|
||||
}
|
||||
#endif
|
||||
|
||||
HB_CALL_ON_STARTUP_BEGIN( _hb_regex_init_ )
|
||||
#if defined( HB_PCRE_REGEX )
|
||||
pcre_malloc = hb_pcre_grab;
|
||||
pcre_free = hb_xfree;
|
||||
#endif
|
||||
hb_regexInit( hb_regfree, hb_regcomp, hb_regexec );
|
||||
HB_CALL_ON_STARTUP_END( _hb_regex_init_ )
|
||||
|
||||
#if defined(HB_PRAGMA_STARTUP)
|
||||
#pragma startup _hb_regex_init_
|
||||
#elif defined(HB_MSC_STARTUP)
|
||||
#if _MSC_VER >= 1010
|
||||
#pragma data_seg( ".CRT$XIY" )
|
||||
#pragma comment( linker, "/Merge:.CRT=.data" )
|
||||
#else
|
||||
#pragma data_seg( "XIY" )
|
||||
#endif
|
||||
static HB_$INITSYM hb_vm_auto_regex_init_ = _hb_regex_init_;
|
||||
#pragma data_seg()
|
||||
#endif
|
||||
|
||||
164
harbour/source/rtl/hbregexc.c
Normal file
164
harbour/source/rtl/hbregexc.c
Normal file
@@ -0,0 +1,164 @@
|
||||
/*
|
||||
* $Id$
|
||||
*/
|
||||
|
||||
/*
|
||||
* Harbour Project source code:
|
||||
*
|
||||
*
|
||||
* Copyright 2007 Przemyslaw Czerpak <druzus / at / priv.onet.pl>
|
||||
* www - http://www.harbour-project.org
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2, or (at your option)
|
||||
* any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this software; see the file COPYING. If not, write to
|
||||
* the Free Software Foundation, Inc., 59 Temple Place, Suite 330,
|
||||
* Boston, MA 02111-1307 USA (or visit the web site http://www.gnu.org/).
|
||||
*
|
||||
* As a special exception, the Harbour Project gives permission for
|
||||
* additional uses of the text contained in its release of Harbour.
|
||||
*
|
||||
* The exception is that, if you link the Harbour libraries with other
|
||||
* files to produce an executable, this does not by itself cause the
|
||||
* resulting executable to be covered by the GNU General Public License.
|
||||
* Your use of that executable is in no way restricted on account of
|
||||
* linking the Harbour library code into it.
|
||||
*
|
||||
* This exception does not however invalidate any other reasons why
|
||||
* the executable file might be covered by the GNU General Public License.
|
||||
*
|
||||
* This exception applies only to the code released by the Harbour
|
||||
* Project under the name Harbour. If you copy code from other
|
||||
* Harbour Project or Free Software Foundation releases into a copy of
|
||||
* Harbour, as the General Public License permits, the exception does
|
||||
* not apply to the code that you add in this way. To avoid misleading
|
||||
* anyone as to the status of such modified files, you must delete
|
||||
* this exception notice from them.
|
||||
*
|
||||
* If you write modifications of your own for Harbour, it is your choice
|
||||
* whether to permit this exception to apply to your modifications.
|
||||
* If you do not wish that, delete this exception notice.
|
||||
*
|
||||
*/
|
||||
|
||||
/* #define HB_PCRE_REGEX */
|
||||
|
||||
#define _HB_REGEX_INTERNAL_
|
||||
#include "hbregex.h"
|
||||
#include "hbapiitm.h"
|
||||
#include "hbapierr.h"
|
||||
|
||||
static void hb_regfree( PHB_REGEX pRegEx )
|
||||
{
|
||||
HB_SYMBOL_UNUSED( pRegEx );
|
||||
}
|
||||
|
||||
static int hb_regcomp( PHB_REGEX pRegEx, const char * szRegEx )
|
||||
{
|
||||
HB_SYMBOL_UNUSED( pRegEx );
|
||||
HB_SYMBOL_UNUSED( szRegEx );
|
||||
return -1;
|
||||
}
|
||||
|
||||
static int hb_regexec( PHB_REGEX pRegEx, const char * szString, ULONG ulLen,
|
||||
int iMatches, HB_REGMATCH * aMatches )
|
||||
{
|
||||
HB_SYMBOL_UNUSED( pRegEx );
|
||||
HB_SYMBOL_UNUSED( szString );
|
||||
HB_SYMBOL_UNUSED( ulLen );
|
||||
HB_SYMBOL_UNUSED( iMatches );
|
||||
HB_SYMBOL_UNUSED( aMatches );
|
||||
return -1;
|
||||
}
|
||||
|
||||
static HB_REG_FREE s_reg_free = hb_regfree;
|
||||
static HB_REG_COMP s_reg_comp = hb_regcomp;
|
||||
static HB_REG_EXEC s_reg_exec = hb_regexec;
|
||||
|
||||
void hb_regexInit( HB_REG_FREE pFree, HB_REG_COMP pComp, HB_REG_EXEC pExec )
|
||||
{
|
||||
s_reg_free = pFree;
|
||||
s_reg_comp = pComp;
|
||||
s_reg_exec = pExec;
|
||||
}
|
||||
|
||||
/* This releases regex when called from the garbage collector */
|
||||
HB_GARBAGE_FUNC( hb_regexRelease )
|
||||
{
|
||||
( s_reg_free )( ( PHB_REGEX ) Cargo );
|
||||
}
|
||||
|
||||
PHB_REGEX hb_regexCompile( const char *szRegEx, ULONG ulLen, int iFlags )
|
||||
{
|
||||
PHB_REGEX pRegEx;
|
||||
|
||||
HB_SYMBOL_UNUSED( ulLen );
|
||||
|
||||
pRegEx = ( PHB_REGEX ) hb_gcAlloc( sizeof( HB_REGEX ), hb_regexRelease );
|
||||
hb_gcLock( pRegEx );
|
||||
memset( pRegEx, 0, sizeof( HB_REGEX ) );
|
||||
pRegEx->fFree = TRUE;
|
||||
pRegEx->iFlags = iFlags;
|
||||
|
||||
if( ( s_reg_comp )( pRegEx, szRegEx ) != 0 )
|
||||
{
|
||||
hb_gcFree( pRegEx );
|
||||
pRegEx = NULL;
|
||||
}
|
||||
|
||||
return pRegEx;
|
||||
}
|
||||
|
||||
PHB_REGEX hb_regexGet( PHB_ITEM pRegExItm, int iFlags )
|
||||
{
|
||||
PHB_REGEX pRegEx = NULL;
|
||||
|
||||
if( pRegExItm )
|
||||
{
|
||||
if( HB_IS_POINTER( pRegExItm ) )
|
||||
{
|
||||
pRegEx = ( PHB_REGEX ) hb_itemGetPtrGC( pRegExItm, hb_regexRelease );
|
||||
}
|
||||
else if( HB_IS_STRING( pRegExItm ) )
|
||||
{
|
||||
ULONG ulLen = hb_itemGetCLen( pRegExItm );
|
||||
char * szRegEx = hb_itemGetCPtr( pRegExItm );
|
||||
if( ulLen > 0 )
|
||||
pRegEx = hb_regexCompile( szRegEx, ulLen, iFlags );
|
||||
}
|
||||
}
|
||||
|
||||
if( !pRegEx )
|
||||
hb_errRT_BASE_SubstR( EG_ARG, 3012, "Invalid Regular expression", &hb_errFuncName, 1, pRegExItm );
|
||||
|
||||
return pRegEx;
|
||||
}
|
||||
|
||||
void hb_regexFree( PHB_REGEX pRegEx )
|
||||
{
|
||||
if( pRegEx && pRegEx->fFree )
|
||||
{
|
||||
( s_reg_free )( pRegEx );
|
||||
hb_gcFree( pRegEx );
|
||||
}
|
||||
}
|
||||
|
||||
BOOL hb_regexMatch( PHB_REGEX pRegEx, const char *szString, ULONG ulLen, BOOL fFull )
|
||||
{
|
||||
HB_REGMATCH aMatches[ HB_REGMATCH_SIZE( 1 ) ];
|
||||
BOOL fMatch;
|
||||
|
||||
fMatch = ( s_reg_exec )( pRegEx, szString, ulLen, 1, aMatches ) > 0;
|
||||
return fMatch && ( !fFull ||
|
||||
( HB_REGMATCH_SO( aMatches, 0 ) == 0 &&
|
||||
HB_REGMATCH_EO( aMatches, 0 ) == ( int ) ulLen ) );
|
||||
}
|
||||
@@ -282,7 +282,7 @@ BOOL hb_strMatchRegExp( const char * szString, const char * szPattern )
|
||||
if( pRegEx )
|
||||
{
|
||||
BOOL fMatch;
|
||||
fMatch = hb_regexMatch( pRegEx, szString, TRUE );
|
||||
fMatch = hb_regexMatch( pRegEx, szString, strlen( szString ), TRUE );
|
||||
hb_regexFree( pRegEx );
|
||||
return fMatch;
|
||||
}
|
||||
|
||||
@@ -26,5 +26,12 @@ LIBS=\
|
||||
pp \
|
||||
compiler \
|
||||
common \
|
||||
hbpcre \
|
||||
|
||||
ifeq ($(findstring -DHB_PCRE_REGEX, $(C_USR)),)
|
||||
ifeq ($(findstring -DHB_POSIX_REGEX, $(C_USR)),)
|
||||
LIBS += hbpcre
|
||||
endif
|
||||
endif
|
||||
|
||||
include $(TOP)$(ROOT)config/bin.cf
|
||||
|
||||
Reference in New Issue
Block a user