2008-05-26 16:59 UTC+0100 Viktor Szakats (harbour.01 syenar hu)
* common.mak
* source/hbpcre/Makefile
* source/hbpcre/chartabs.c -- changes
* source/hbpcre/config.h
* source/hbpcre/pcre.h
* source/hbpcre/pcrecomp.c
* source/hbpcre/pcreconf.c
* source/hbpcre/pcredfa.c
* source/hbpcre/pcreexec.c
* source/hbpcre/pcrefind.c
* source/hbpcre/pcrefinf.c
* source/hbpcre/pcreget.c
* source/hbpcre/pcreinal.h
* source/hbpcre/pcreinfo.c
* source/hbpcre/pcremktb.c
* source/hbpcre/pcreoutf.c
* source/hbpcre/pcrerefc.c
* source/hbpcre/pcrestud.c
* source/hbpcre/pcretabs.c
* source/hbpcre/pcretryf.c
* source/hbpcre/pcrever.c
* source/hbpcre/pcrevutf.c
* source/hbpcre/pcrexcls.c
* source/hbpcre/ucp.h
* source/hbpcre/ucpinter.h
- source/hbpcre/pcreglob.c -- renames
+ source/hbpcre/_hbpcreg.c
- source/hbpcre/pcreprni.c
+ source/hbpcre/pcreprni.h
- source/hbpcre/ucptable.c
+ source/hbpcre/ucptable.h
+ source/hbpcre/_hbconf.h -- new files
+ source/hbpcre/pcrenewl.c
- source/hbpcre/dftables.c -- deletion
+ Updated to PCRE 7.7 (from 6.3)
; Original code not (yet) modified, so some warnings
may appear in foreign code.
; Please test.
+ source/hbpcre/cnv_hb2o.bat
+ source/hbpcre/cnv_o2hb.bat
+ Added batch files to make to conversion from
original PCRE source files into Harbour.
This commit is contained in:
@@ -8,6 +8,51 @@
|
||||
2008-12-31 13:59 UTC+0100 Foo Bar <foo.bar@foobar.org>
|
||||
*/
|
||||
|
||||
2008-05-26 16:59 UTC+0100 Viktor Szakats (harbour.01 syenar hu)
|
||||
* common.mak
|
||||
* source/hbpcre/Makefile
|
||||
* source/hbpcre/chartabs.c -- changes
|
||||
* source/hbpcre/config.h
|
||||
* source/hbpcre/pcre.h
|
||||
* source/hbpcre/pcrecomp.c
|
||||
* source/hbpcre/pcreconf.c
|
||||
* source/hbpcre/pcredfa.c
|
||||
* source/hbpcre/pcreexec.c
|
||||
* source/hbpcre/pcrefind.c
|
||||
* source/hbpcre/pcrefinf.c
|
||||
* source/hbpcre/pcreget.c
|
||||
* source/hbpcre/pcreinal.h
|
||||
* source/hbpcre/pcreinfo.c
|
||||
* source/hbpcre/pcremktb.c
|
||||
* source/hbpcre/pcreoutf.c
|
||||
* source/hbpcre/pcrerefc.c
|
||||
* source/hbpcre/pcrestud.c
|
||||
* source/hbpcre/pcretabs.c
|
||||
* source/hbpcre/pcretryf.c
|
||||
* source/hbpcre/pcrever.c
|
||||
* source/hbpcre/pcrevutf.c
|
||||
* source/hbpcre/pcrexcls.c
|
||||
* source/hbpcre/ucp.h
|
||||
* source/hbpcre/ucpinter.h
|
||||
- source/hbpcre/pcreglob.c -- renames
|
||||
+ source/hbpcre/_hbpcreg.c
|
||||
- source/hbpcre/pcreprni.c
|
||||
+ source/hbpcre/pcreprni.h
|
||||
- source/hbpcre/ucptable.c
|
||||
+ source/hbpcre/ucptable.h
|
||||
+ source/hbpcre/_hbconf.h -- new files
|
||||
+ source/hbpcre/pcrenewl.c
|
||||
- source/hbpcre/dftables.c -- deletion
|
||||
+ Updated to PCRE 7.7 (from 6.3)
|
||||
; Original code not (yet) modified, so some warnings
|
||||
may appear in foreign code.
|
||||
; Please test.
|
||||
|
||||
+ source/hbpcre/cnv_hb2o.bat
|
||||
+ source/hbpcre/cnv_o2hb.bat
|
||||
+ Added batch files to make to conversion from
|
||||
original PCRE source files into Harbour.
|
||||
|
||||
2008-05-26 12:23 UTC+0100 Viktor Szakats (harbour.01 syenar hu)
|
||||
* contrib/hbclipsm/gauge.c
|
||||
! Fixed GPF in GaugeNew(). Old bug.
|
||||
|
||||
@@ -745,25 +745,25 @@ LANG_LIB_OBJS = \
|
||||
|
||||
#**********************************************************
|
||||
PCRE_LIB_OBJS = \
|
||||
$(OBJ_DIR)\_hbpcreg$(OBJEXT) \
|
||||
$(OBJ_DIR)\chartabs$(OBJEXT) \
|
||||
$(OBJ_DIR)\pcrecomp$(OBJEXT) \
|
||||
$(OBJ_DIR)\pcreconf$(OBJEXT) \
|
||||
$(OBJ_DIR)\pcredfa$(OBJEXT) \
|
||||
$(OBJ_DIR)\pcreexec$(OBJEXT) \
|
||||
$(OBJ_DIR)\pcrefind$(OBJEXT) \
|
||||
$(OBJ_DIR)\pcrefinf$(OBJEXT) \
|
||||
$(OBJ_DIR)\pcreget$(OBJEXT) \
|
||||
$(OBJ_DIR)\pcreglob$(OBJEXT) \
|
||||
$(OBJ_DIR)\pcreinfo$(OBJEXT) \
|
||||
$(OBJ_DIR)\pcremktb$(OBJEXT) \
|
||||
$(OBJ_DIR)\pcrenewl$(OBJEXT) \
|
||||
$(OBJ_DIR)\pcreoutf$(OBJEXT) \
|
||||
$(OBJ_DIR)\pcreprni$(OBJEXT) \
|
||||
$(OBJ_DIR)\pcrerefc$(OBJEXT) \
|
||||
$(OBJ_DIR)\pcrestud$(OBJEXT) \
|
||||
$(OBJ_DIR)\pcretabs$(OBJEXT) \
|
||||
$(OBJ_DIR)\pcretryf$(OBJEXT) \
|
||||
$(OBJ_DIR)\pcrefind$(OBJEXT) \
|
||||
$(OBJ_DIR)\pcrevutf$(OBJEXT) \
|
||||
$(OBJ_DIR)\pcrever$(OBJEXT) \
|
||||
$(OBJ_DIR)\pcrevutf$(OBJEXT) \
|
||||
$(OBJ_DIR)\pcrexcls$(OBJEXT) \
|
||||
|
||||
#**********************************************************
|
||||
|
||||
@@ -5,25 +5,25 @@
|
||||
ROOT = ../../
|
||||
|
||||
C_SOURCES=\
|
||||
_hbpcreg.c \
|
||||
chartabs.c \
|
||||
pcrecomp.c \
|
||||
pcreconf.c \
|
||||
pcredfa.c \
|
||||
pcredfa.c \
|
||||
pcreexec.c \
|
||||
pcrefind.c \
|
||||
pcrefinf.c \
|
||||
pcreget.c \
|
||||
pcreglob.c \
|
||||
pcreget.c \
|
||||
pcreinfo.c \
|
||||
pcremktb.c \
|
||||
pcrenewl.c \
|
||||
pcreoutf.c \
|
||||
pcreprni.c \
|
||||
pcrerefc.c \
|
||||
pcrestud.c \
|
||||
pcretabs.c \
|
||||
pcretryf.c \
|
||||
pcrefind.c \
|
||||
pcrever.c \
|
||||
pcrevutf.c \
|
||||
pcrever.c \
|
||||
pcrexcls.c \
|
||||
|
||||
LIBNAME=hbpcre
|
||||
|
||||
22
harbour/source/hbpcre/_hbconf.h
Normal file
22
harbour/source/hbpcre/_hbconf.h
Normal file
@@ -0,0 +1,22 @@
|
||||
/*
|
||||
* $Id$
|
||||
*/
|
||||
|
||||
#ifndef HB_CONFIG_H
|
||||
#define HB_CONFIG_H
|
||||
|
||||
#define PCRE_STATIC
|
||||
|
||||
#if defined( _MSC_VER )
|
||||
#pragma warning( push, 0 )
|
||||
#endif
|
||||
|
||||
#if defined( __BORLANDC__ )
|
||||
#pragma warn -use
|
||||
#pragma warn -csu
|
||||
#pragma warn -aus
|
||||
#endif
|
||||
|
||||
#include "config.h"
|
||||
|
||||
#endif
|
||||
@@ -1,3 +1,10 @@
|
||||
/*
|
||||
* $Id$
|
||||
*/
|
||||
|
||||
/* See pcre_globals.c in PCRE source package for the original of
|
||||
this file. We need to override it, so we've made it local. */
|
||||
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
@@ -6,7 +13,7 @@
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Copyright (c) 1997-2005 University of Cambridge
|
||||
Copyright (c) 1997-2008 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
@@ -46,32 +53,32 @@ indirection. These values can be changed by the caller, but are shared between
|
||||
all threads. However, when compiling for Virtual Pascal, things are done
|
||||
differently, and global variables are not used (see pcre.in). */
|
||||
|
||||
|
||||
#include "hbapi.h"
|
||||
#include "pcreinal.h"
|
||||
|
||||
static void * hb_pcre_grab( size_t size )
|
||||
{
|
||||
return hb_xgrab( size );
|
||||
}
|
||||
|
||||
#ifndef VPCOMPAT
|
||||
#ifdef __cplusplus
|
||||
extern "C"
|
||||
{
|
||||
void *(*pcre_malloc)(size_t) = hb_pcre_grab;
|
||||
void (*pcre_free)(void *) = hb_xfree;
|
||||
void *(*pcre_stack_malloc)(size_t) = hb_pcre_grab;
|
||||
void (*pcre_stack_free)(void *) = hb_xfree;
|
||||
int (*pcre_callout)(pcre_callout_block *) = NULL;
|
||||
}
|
||||
#else
|
||||
void *(*pcre_malloc)(size_t) = hb_pcre_grab;
|
||||
void (*pcre_free)(void *) = hb_xfree;
|
||||
void *(*pcre_stack_malloc)(size_t) = hb_pcre_grab;
|
||||
void (*pcre_stack_free)(void *) = hb_xfree;
|
||||
int (*pcre_callout)(pcre_callout_block *) = NULL;
|
||||
#ifdef TRUE
|
||||
#undef TRUE
|
||||
#endif
|
||||
#ifdef FALSE
|
||||
#undef FALSE
|
||||
#endif
|
||||
|
||||
/* End of pcreglob.c */
|
||||
#if 1
|
||||
#include "_hbconf.h"
|
||||
#endif
|
||||
|
||||
#include "pcreinal.h"
|
||||
|
||||
#ifndef VPCOMPAT
|
||||
PCRE_EXP_DATA_DEFN void *(*pcre_malloc)(size_t) = malloc;
|
||||
PCRE_EXP_DATA_DEFN void (*pcre_free)(void *) = free;
|
||||
PCRE_EXP_DATA_DEFN void *(*pcre_stack_malloc)(size_t) = malloc;
|
||||
PCRE_EXP_DATA_DEFN void (*pcre_stack_free)(void *) = free;
|
||||
PCRE_EXP_DATA_DEFN int (*pcre_callout)(pcre_callout_block *) = NULL;
|
||||
#endif
|
||||
|
||||
/* End of pcre_globals.c */
|
||||
@@ -2,15 +2,30 @@
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* This file is automatically written by the dftables auxiliary
|
||||
program. If you edit it by hand, you might like to edit the Makefile to
|
||||
prevent its ever being regenerated.
|
||||
/* This file contains character tables that are used when no external tables
|
||||
are passed to PCRE by the application that calls it. The tables are used only
|
||||
for characters whose code values are less than 256.
|
||||
|
||||
This file contains the default tables for characters with codes less than
|
||||
128 (ASCII characters). These tables are used when no external tables are
|
||||
passed to PCRE. */
|
||||
This is a default version of the tables that assumes ASCII encoding. A program
|
||||
called dftables (which is distributed with PCRE) can be used to build
|
||||
alternative versions of this file. This is necessary if you are running in an
|
||||
EBCDIC environment, or if you want to default to a different encoding, for
|
||||
example ISO-8859-1. When dftables is run, it creates these tables in the
|
||||
current locale. If PCRE is configured with --enable-rebuild-chartables, this
|
||||
happens automatically.
|
||||
|
||||
The following #includes are present because without the gcc 4.x may remove the
|
||||
array definition from the final binary if PCRE is built into a static library
|
||||
and dead code stripping is activated. This leads to link errors. Pulling in the
|
||||
header ensures that the array gets flagged as "someone outside this compilation
|
||||
unit might reference this" and so it will always be supplied to the linker. */
|
||||
|
||||
#if 1
|
||||
#include "_hbconf.h"
|
||||
#endif
|
||||
|
||||
#include "pcreinal.h"
|
||||
|
||||
extern const unsigned char _pcre_default_tables[];
|
||||
const unsigned char _pcre_default_tables[] = {
|
||||
|
||||
/* This table is a lower casing table. */
|
||||
@@ -83,11 +98,10 @@ const unsigned char _pcre_default_tables[] = {
|
||||
240,241,242,243,244,245,246,247,
|
||||
248,249,250,251,252,253,254,255,
|
||||
|
||||
/* This table contains bit maps for various character classes.
|
||||
Each map is 32 bytes long and the bits run from the least
|
||||
significant end of each byte. The classes that have their own
|
||||
maps are: space, xdigit, digit, upper, lower, word, graph
|
||||
print, punct, and cntrl. Other classes are built from combinations. */
|
||||
/* This table contains bit maps for various character classes. Each map is 32
|
||||
bytes long and the bits run from the least significant end of each byte. The
|
||||
classes that have their own maps are: space, xdigit, digit, upper, lower, word,
|
||||
graph, print, punct, and cntrl. Other classes are built from combinations. */
|
||||
|
||||
0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
@@ -159,7 +173,7 @@ print, punct, and cntrl. Other classes are built from combinations. */
|
||||
0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* @ - G */
|
||||
0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* H - O */
|
||||
0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* P - W */
|
||||
0x12,0x12,0x12,0x80,0x00,0x00,0x80,0x10, /* X - _ */
|
||||
0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /* X - _ */
|
||||
0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* ` - g */
|
||||
0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* h - o */
|
||||
0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* p - w */
|
||||
@@ -181,4 +195,4 @@ print, punct, and cntrl. Other classes are built from combinations. */
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
|
||||
|
||||
/* End of chartables.c */
|
||||
/* End of pcre_chartables.c */
|
||||
|
||||
59
harbour/source/hbpcre/cnv_hb2o.bat
Normal file
59
harbour/source/hbpcre/cnv_hb2o.bat
Normal file
@@ -0,0 +1,59 @@
|
||||
@echo off
|
||||
rem
|
||||
rem $Id$
|
||||
rem
|
||||
|
||||
rem Tested with PCRE 7.7
|
||||
|
||||
rem NOTE: Purpose of this script is to take the PCRE files
|
||||
rem in Harbour repo and convert them back to the filenames
|
||||
rem used in the original PCRE source distribution.
|
||||
rem This is to aid finding local modifications and
|
||||
rem apply them after a PCRE source update.
|
||||
rem [vszakats]
|
||||
rem
|
||||
rem This tool uses 'GNU gsar' for search and replace.
|
||||
rem
|
||||
rem DISCLAIMER: This tool is targeted only to Harbour core
|
||||
rem maintainers. If you're not one of them you
|
||||
rem don't have to mess with this tool.
|
||||
|
||||
md ori_dst
|
||||
del ori_dst\*.* /Y
|
||||
|
||||
copy config.h ori_dst\config.h.generic
|
||||
copy pcre.h ori_dst\pcre.h.generic
|
||||
copy pcreinal.h ori_dst\pcre_internal.h
|
||||
copy ucp.h ori_dst\ucp.h
|
||||
copy ucpinter.h ori_dst\ucpinternal.h
|
||||
copy ucptable.h ori_dst\ucptable.h
|
||||
copy chartabs.c ori_dst\pcre_chartables.c.dist
|
||||
copy pcrecomp.c ori_dst\pcre_compile.c
|
||||
copy pcreconf.c ori_dst\pcre_config.c
|
||||
copy pcredfa.c ori_dst\pcre_dfa_exec.c
|
||||
copy pcreexec.c ori_dst\pcre_exec.c
|
||||
copy pcrefinf.c ori_dst\pcre_fullinfo.c
|
||||
copy pcreget.c ori_dst\pcre_get.c
|
||||
copy pcreinfo.c ori_dst\pcre_info.c
|
||||
copy pcremktb.c ori_dst\pcre_maketables.c
|
||||
copy pcrenewl.c ori_dst\pcre_newline.c
|
||||
copy pcreoutf.c ori_dst\pcre_ord2utf8.c
|
||||
copy pcreprni.h ori_dst\pcre_printint.src
|
||||
copy pcrerefc.c ori_dst\pcre_refcount.c
|
||||
copy pcrestud.c ori_dst\pcre_study.c
|
||||
copy pcretabs.c ori_dst\pcre_tables.c
|
||||
copy pcretryf.c ori_dst\pcre_try_flipped.c
|
||||
copy pcrefind.c ori_dst\pcre_ucp_searchfuncs.c
|
||||
copy pcrevutf.c ori_dst\pcre_valid_utf8.c
|
||||
copy pcrever.c ori_dst\pcre_version.c
|
||||
copy pcrexcls.c ori_dst\pcre_xclass.c
|
||||
|
||||
cd ori_dst
|
||||
|
||||
gsar -o -s":x22pcreinal.h:x22" -r":x22pcre_internal.h:x22" *.*
|
||||
gsar -o -s":x22ucpinter.h:x22" -r":x22ucpinternal.h:x22" *.*
|
||||
gsar -o -s":x22_hbconf.h:x22" -r":x22config.h:x22" *.*
|
||||
gsar -o -s":x22pcreprni.h:x22" -r":x22pcre_printint.src:x22" *.*
|
||||
gsar -o -s"if 1" -r"ifdef HAVE_CONFIG_H" *.*
|
||||
|
||||
cd ..
|
||||
70
harbour/source/hbpcre/cnv_o2hb.bat
Normal file
70
harbour/source/hbpcre/cnv_o2hb.bat
Normal file
@@ -0,0 +1,70 @@
|
||||
@echo off
|
||||
rem
|
||||
rem $Id$
|
||||
rem
|
||||
|
||||
rem Tested with PCRE 7.7
|
||||
|
||||
rem NOTE: Purpose of this script is to take the original
|
||||
rem PCRE file from its source distribution and convert
|
||||
rem them to the short filenames we use here in Harbour.
|
||||
rem Short filenames are needed for full DJGPP support.
|
||||
rem Some other automated modifications are also done
|
||||
rem to help compiling the sources "as-is", to try to
|
||||
rem avoid any manual editing on these foreign sources.
|
||||
rem [vszakats]
|
||||
rem
|
||||
rem This tool uses 'GNU gsar' for search and replace.
|
||||
rem and 'GNU unix2dos' for line ending conversion.
|
||||
rem
|
||||
rem DISCLAIMER: This tool is targeted only to Harbour core
|
||||
rem maintainers. If you're not one of them you
|
||||
rem don't have to mess with this tool.
|
||||
|
||||
attrib +R _hbconf.h
|
||||
attrib +R _hbpcreg.c
|
||||
del *.c
|
||||
del *.h
|
||||
attrib -R _hbconf.h
|
||||
attrib -R _hbpcreg.c
|
||||
|
||||
copy ori_src\config.h.generic config.h
|
||||
copy ori_src\pcre.h.generic pcre.h
|
||||
copy ori_src\pcre_internal.h pcreinal.h
|
||||
copy ori_src\ucp.h ucp.h
|
||||
copy ori_src\ucpinternal.h ucpinter.h
|
||||
copy ori_src\ucptable.h ucptable.h
|
||||
copy ori_src\pcre_chartables.c.dist chartabs.c
|
||||
copy ori_src\pcre_compile.c pcrecomp.c
|
||||
copy ori_src\pcre_config.c pcreconf.c
|
||||
copy ori_src\pcre_dfa_exec.c pcredfa.c
|
||||
copy ori_src\pcre_exec.c pcreexec.c
|
||||
copy ori_src\pcre_fullinfo.c pcrefinf.c
|
||||
copy ori_src\pcre_get.c pcreget.c
|
||||
copy ori_src\pcre_info.c pcreinfo.c
|
||||
copy ori_src\pcre_maketables.c pcremktb.c
|
||||
copy ori_src\pcre_newline.c pcrenewl.c
|
||||
copy ori_src\pcre_ord2utf8.c pcreoutf.c
|
||||
copy ori_src\pcre_printint.src pcreprni.h
|
||||
copy ori_src\pcre_refcount.c pcrerefc.c
|
||||
copy ori_src\pcre_study.c pcrestud.c
|
||||
copy ori_src\pcre_tables.c pcretabs.c
|
||||
copy ori_src\pcre_try_flipped.c pcretryf.c
|
||||
copy ori_src\pcre_ucp_searchfuncs.c pcrefind.c
|
||||
copy ori_src\pcre_valid_utf8.c pcrevutf.c
|
||||
copy ori_src\pcre_version.c pcrever.c
|
||||
copy ori_src\pcre_xclass.c pcrexcls.c
|
||||
|
||||
unix2dos *.c
|
||||
unix2dos *.h
|
||||
|
||||
gsar -o -s":x22pcre_printint.src:x22" -r":x22pcreprni.h:x22" *.c
|
||||
gsar -o -s":x22pcre_printint.src:x22" -r":x22pcreprni.h:x22" *.h
|
||||
gsar -o -s":x22pcre_internal.h:x22" -r":x22pcreinal.h:x22" *.c
|
||||
gsar -o -s":x22pcre_internal.h:x22" -r":x22pcreinal.h:x22" *.h
|
||||
gsar -o -s":x22ucpinternal.h:x22" -r":x22ucpinter.h:x22" *.c
|
||||
gsar -o -s":x22ucpinternal.h:x22" -r":x22ucpinter.h:x22" *.h
|
||||
gsar -o -s":x22config.h:x22" -r":x22_hbconf.h:x22" *.c
|
||||
gsar -o -s":x22config.h:x22" -r":x22_hbconf.h:x22" *.h
|
||||
gsar -o -s":x22_hbconf.h:x22" -r":x22config.h:x22" _hbconf.h
|
||||
gsar -o -s"ifdef HAVE_CONFIG_H" -r"if 1" *.c
|
||||
@@ -1,116 +1,307 @@
|
||||
/* config.h. Generated from config.h.in by configure. */
|
||||
/* config.h.in. Generated from configure.ac by autoheader. */
|
||||
|
||||
/* On Unix systems config.in is converted by configure into config.h. PCRE is
|
||||
written in Standard C, but there are a few non-standard things it can cope
|
||||
with, allowing it to run on SunOS4 and other "close to standard" systems.
|
||||
|
||||
On a non-Unix system you should just copy this file into config.h, and set up
|
||||
the macros the way you need them. You should normally change the definitions of
|
||||
HAVE_STRERROR and HAVE_MEMMOVE to 1. Unfortunately, because of the way autoconf
|
||||
works, these cannot be made the defaults. If your system has bcopy() and not
|
||||
memmove(), change the definition of HAVE_BCOPY instead of HAVE_MEMMOVE. If your
|
||||
system has neither bcopy() nor memmove(), leave them both as 0; an emulation
|
||||
function will be used. */
|
||||
/* On Unix-like systems config.h.in is converted by "configure" into config.h.
|
||||
Some other environments also support the use of "configure". PCRE is written in
|
||||
Standard C, but there are a few non-standard things it can cope with, allowing
|
||||
it to run on SunOS4 and other "close to standard" systems.
|
||||
|
||||
If you are going to build PCRE "by hand" on a system without "configure" you
|
||||
should copy the distributed config.h.generic to config.h, and then set up the
|
||||
macro definitions the way you need them. You must then add -DHAVE_CONFIG_H to
|
||||
all of your compile commands, so that config.h is included at the start of
|
||||
every source.
|
||||
|
||||
Alternatively, you can avoid editing by using -D on the compiler command line
|
||||
to set the macro values. In this case, you do not have to set -DHAVE_CONFIG_H.
|
||||
|
||||
PCRE uses memmove() if HAVE_MEMMOVE is set to 1; otherwise it uses bcopy() if
|
||||
HAVE_BCOPY is set to 1. If your system has neither bcopy() nor memmove(), set
|
||||
them both to 0; an emulation function will be used. */
|
||||
|
||||
/* By default, the \R escape sequence matches any Unicode line ending
|
||||
character or sequence of characters. If BSR_ANYCRLF is defined, this is
|
||||
changed so that backslash-R matches only CR, LF, or CRLF. The build- time
|
||||
default can be overridden by the user of PCRE at runtime. On systems that
|
||||
support it, "configure" can be used to override the default. */
|
||||
/* #undef BSR_ANYCRLF */
|
||||
|
||||
/* If you are compiling for a system that uses EBCDIC instead of ASCII
|
||||
character codes, define this macro as 1. On systems that can use "configure",
|
||||
this can be done via --enable-ebcdic. */
|
||||
character codes, define this macro as 1. On systems that can use
|
||||
"configure", this can be done via --enable-ebcdic. */
|
||||
/* #undef EBCDIC */
|
||||
|
||||
#ifndef EBCDIC
|
||||
#define EBCDIC 0
|
||||
/* Define to 1 if you have the `bcopy' function. */
|
||||
#ifndef HAVE_BCOPY
|
||||
#define HAVE_BCOPY 1
|
||||
#endif
|
||||
|
||||
/* If you are compiling for a system that needs some magic to be inserted
|
||||
before the definition of an exported function, define this macro to contain the
|
||||
relevant magic. It apears at the start of every exported function. */
|
||||
/* Define to 1 if you have the <bits/type_traits.h> header file. */
|
||||
/* #undef HAVE_BITS_TYPE_TRAITS_H */
|
||||
|
||||
#define EXPORT
|
||||
/* Define to 1 if you have the <bzlib.h> header file. */
|
||||
#ifndef HAVE_BZLIB_H
|
||||
#define HAVE_BZLIB_H 1
|
||||
#endif
|
||||
|
||||
/* Define to empty if the "const" keyword does not work. */
|
||||
/* Define to 1 if you have the <dirent.h> header file. */
|
||||
#ifndef HAVE_DIRENT_H
|
||||
#define HAVE_DIRENT_H 1
|
||||
#endif
|
||||
|
||||
#undef const
|
||||
/* Define to 1 if you have the <dlfcn.h> header file. */
|
||||
#ifndef HAVE_DLFCN_H
|
||||
#define HAVE_DLFCN_H 1
|
||||
#endif
|
||||
|
||||
/* Define to "unsigned" if <stddef.h> doesn't define size_t. */
|
||||
/* Define to 1 if you have the <inttypes.h> header file. */
|
||||
#ifndef HAVE_INTTYPES_H
|
||||
#define HAVE_INTTYPES_H 1
|
||||
#endif
|
||||
|
||||
#undef size_t
|
||||
/* Define to 1 if you have the <limits.h> header file. */
|
||||
#ifndef HAVE_LIMITS_H
|
||||
#define HAVE_LIMITS_H 1
|
||||
#endif
|
||||
|
||||
/* The following two definitions are mainly for the benefit of SunOS4, which
|
||||
doesn't have the strerror() or memmove() functions that should be present in
|
||||
all Standard C libraries. The macros HAVE_STRERROR and HAVE_MEMMOVE should
|
||||
normally be defined with the value 1 for other systems, but unfortunately we
|
||||
can't make this the default because "configure" files generated by autoconf
|
||||
will only change 0 to 1; they won't change 1 to 0 if the functions are not
|
||||
found. */
|
||||
/* Define to 1 if the system has the type `long long'. */
|
||||
#ifndef HAVE_LONG_LONG
|
||||
#define HAVE_LONG_LONG 1
|
||||
#endif
|
||||
|
||||
/* Define to 1 if you have the `memmove' function. */
|
||||
#ifndef HAVE_MEMMOVE
|
||||
#define HAVE_MEMMOVE 1
|
||||
#endif
|
||||
|
||||
/* Define to 1 if you have the <memory.h> header file. */
|
||||
#ifndef HAVE_MEMORY_H
|
||||
#define HAVE_MEMORY_H 1
|
||||
#endif
|
||||
|
||||
/* Define to 1 if you have the <readline/history.h> header file. */
|
||||
#ifndef HAVE_READLINE_HISTORY_H
|
||||
#define HAVE_READLINE_HISTORY_H 1
|
||||
#endif
|
||||
|
||||
/* Define to 1 if you have the <readline/readline.h> header file. */
|
||||
#ifndef HAVE_READLINE_READLINE_H
|
||||
#define HAVE_READLINE_READLINE_H 1
|
||||
#endif
|
||||
|
||||
/* Define to 1 if you have the <stdint.h> header file. */
|
||||
#ifndef HAVE_STDINT_H
|
||||
#define HAVE_STDINT_H 1
|
||||
#endif
|
||||
|
||||
/* Define to 1 if you have the <stdlib.h> header file. */
|
||||
#ifndef HAVE_STDLIB_H
|
||||
#define HAVE_STDLIB_H 1
|
||||
#endif
|
||||
|
||||
/* Define to 1 if you have the `strerror' function. */
|
||||
#ifndef HAVE_STRERROR
|
||||
#define HAVE_STRERROR 1
|
||||
#define HAVE_MEMMOVE 1
|
||||
|
||||
/* There are some non-Unix systems that don't even have bcopy(). If this macro
|
||||
is false, an emulation is used. If HAVE_MEMMOVE is set to 1, the value of
|
||||
HAVE_BCOPY is not relevant. */
|
||||
|
||||
#define HAVE_BCOPY 0
|
||||
|
||||
/* The value of NEWLINE determines the newline character. The default is to
|
||||
leave it up to the compiler, but some sites want to force a particular value.
|
||||
On Unix systems, "configure" can be used to override this default. */
|
||||
|
||||
#ifndef NEWLINE
|
||||
#define NEWLINE '\n'
|
||||
#endif
|
||||
|
||||
/* The value of LINK_SIZE determines the number of bytes used to store
|
||||
links as offsets within the compiled regex. The default is 2, which allows for
|
||||
compiled patterns up to 64K long. This covers the vast majority of cases.
|
||||
However, PCRE can also be compiled to use 3 or 4 bytes instead. This allows for
|
||||
longer patterns in extreme cases. On Unix systems, "configure" can be used to
|
||||
override this default. */
|
||||
/* Define to 1 if you have the <string> header file. */
|
||||
#ifndef HAVE_STRING
|
||||
#define HAVE_STRING 1
|
||||
#endif
|
||||
|
||||
/* Define to 1 if you have the <strings.h> header file. */
|
||||
#ifndef HAVE_STRINGS_H
|
||||
#define HAVE_STRINGS_H 1
|
||||
#endif
|
||||
|
||||
/* Define to 1 if you have the <string.h> header file. */
|
||||
#ifndef HAVE_STRING_H
|
||||
#define HAVE_STRING_H 1
|
||||
#endif
|
||||
|
||||
/* Define to 1 if you have the `strtoll' function. */
|
||||
/* #undef HAVE_STRTOLL */
|
||||
|
||||
/* Define to 1 if you have the `strtoq' function. */
|
||||
#ifndef HAVE_STRTOQ
|
||||
#define HAVE_STRTOQ 1
|
||||
#endif
|
||||
|
||||
/* Define to 1 if you have the <sys/stat.h> header file. */
|
||||
#ifndef HAVE_SYS_STAT_H
|
||||
#define HAVE_SYS_STAT_H 1
|
||||
#endif
|
||||
|
||||
/* Define to 1 if you have the <sys/types.h> header file. */
|
||||
#ifndef HAVE_SYS_TYPES_H
|
||||
#define HAVE_SYS_TYPES_H 1
|
||||
#endif
|
||||
|
||||
/* Define to 1 if you have the <type_traits.h> header file. */
|
||||
/* #undef HAVE_TYPE_TRAITS_H */
|
||||
|
||||
/* Define to 1 if you have the <unistd.h> header file. */
|
||||
#ifndef HAVE_UNISTD_H
|
||||
#define HAVE_UNISTD_H 1
|
||||
#endif
|
||||
|
||||
/* Define to 1 if the system has the type `unsigned long long'. */
|
||||
#ifndef HAVE_UNSIGNED_LONG_LONG
|
||||
#define HAVE_UNSIGNED_LONG_LONG 1
|
||||
#endif
|
||||
|
||||
/* Define to 1 if you have the <windows.h> header file. */
|
||||
/* #undef HAVE_WINDOWS_H */
|
||||
|
||||
/* Define to 1 if you have the <zlib.h> header file. */
|
||||
#ifndef HAVE_ZLIB_H
|
||||
#define HAVE_ZLIB_H 1
|
||||
#endif
|
||||
|
||||
/* Define to 1 if you have the `_strtoi64' function. */
|
||||
/* #undef HAVE__STRTOI64 */
|
||||
|
||||
/* The value of LINK_SIZE determines the number of bytes used to store links
|
||||
as offsets within the compiled regex. The default is 2, which allows for
|
||||
compiled patterns up to 64K long. This covers the vast majority of cases.
|
||||
However, PCRE can also be compiled to use 3 or 4 bytes instead. This allows
|
||||
for longer patterns in extreme cases. On systems that support it,
|
||||
"configure" can be used to override this default. */
|
||||
#ifndef LINK_SIZE
|
||||
#define LINK_SIZE 2
|
||||
#define LINK_SIZE 2
|
||||
#endif
|
||||
|
||||
/* The value of MATCH_LIMIT determines the default number of times the match()
|
||||
function can be called during a single execution of pcre_exec(). (There is a
|
||||
runtime method of setting a different limit.) The limit exists in order to
|
||||
catch runaway regular expressions that take for ever to determine that they do
|
||||
not match. The default is set very large so that it does not accidentally catch
|
||||
legitimate cases. On Unix systems, "configure" can be used to override this
|
||||
default default. */
|
||||
|
||||
/* The value of MATCH_LIMIT determines the default number of times the
|
||||
internal match() function can be called during a single execution of
|
||||
pcre_exec(). There is a runtime interface for setting a different limit.
|
||||
The limit exists in order to catch runaway regular expressions that take
|
||||
for ever to determine that they do not match. The default is set very large
|
||||
so that it does not accidentally catch legitimate cases. On systems that
|
||||
support it, "configure" can be used to override this default default. */
|
||||
#ifndef MATCH_LIMIT
|
||||
#define MATCH_LIMIT 10000000
|
||||
#endif
|
||||
|
||||
/* When calling PCRE via the POSIX interface, additional working storage is
|
||||
required for holding the pointers to capturing substrings because PCRE requires
|
||||
three integers per substring, whereas the POSIX interface provides only two. If
|
||||
the number of expected substrings is small, the wrapper function uses space on
|
||||
the stack, because this is faster than using malloc() for each call. The
|
||||
threshold above which the stack is no longer use is defined by POSIX_MALLOC_
|
||||
THRESHOLD. On Unix systems, "configure" can be used to override this default.
|
||||
*/
|
||||
/* The above limit applies to all calls of match(), whether or not they
|
||||
increase the recursion depth. In some environments it is desirable to limit
|
||||
the depth of recursive calls of match() more strictly, in order to restrict
|
||||
the maximum amount of stack (or heap, if NO_RECURSE is defined) that is
|
||||
used. The value of MATCH_LIMIT_RECURSION applies only to recursive calls of
|
||||
match(). To have any useful effect, it must be less than the value of
|
||||
MATCH_LIMIT. The default is to use the same value as MATCH_LIMIT. There is
|
||||
a runtime method for setting a different limit. On systems that support it,
|
||||
"configure" can be used to override the default. */
|
||||
#ifndef MATCH_LIMIT_RECURSION
|
||||
#define MATCH_LIMIT_RECURSION MATCH_LIMIT
|
||||
#endif
|
||||
|
||||
/* This limit is parameterized just in case anybody ever wants to change it.
|
||||
Care must be taken if it is increased, because it guards against integer
|
||||
overflow caused by enormously large patterns. */
|
||||
#ifndef MAX_NAME_COUNT
|
||||
#define MAX_NAME_COUNT 10000
|
||||
#endif
|
||||
|
||||
/* This limit is parameterized just in case anybody ever wants to change it.
|
||||
Care must be taken if it is increased, because it guards against integer
|
||||
overflow caused by enormously large patterns. */
|
||||
#ifndef MAX_NAME_SIZE
|
||||
#define MAX_NAME_SIZE 32
|
||||
#endif
|
||||
|
||||
/* The value of NEWLINE determines the newline character sequence. On systems
|
||||
that support it, "configure" can be used to override the default, which is
|
||||
10. The possible values are 10 (LF), 13 (CR), 3338 (CRLF), -1 (ANY), or -2
|
||||
(ANYCRLF). */
|
||||
#ifndef NEWLINE
|
||||
#define NEWLINE 10
|
||||
#endif
|
||||
|
||||
/* PCRE uses recursive function calls to handle backtracking while matching.
|
||||
This can sometimes be a problem on systems that have stacks of limited
|
||||
size. Define NO_RECURSE to get a version that doesn't use recursion in the
|
||||
match() function; instead it creates its own stack by steam using
|
||||
pcre_recurse_malloc() to obtain memory from the heap. For more detail, see
|
||||
the comments and other stuff just above the match() function. On systems
|
||||
that support it, "configure" can be used to set this in the Makefile (use
|
||||
--disable-stack-for-recursion). */
|
||||
/* #undef NO_RECURSE */
|
||||
|
||||
/* Name of package */
|
||||
#define PACKAGE "pcre"
|
||||
|
||||
/* Define to the address where bug reports for this package should be sent. */
|
||||
#define PACKAGE_BUGREPORT ""
|
||||
|
||||
/* Define to the full name of this package. */
|
||||
#define PACKAGE_NAME "PCRE"
|
||||
|
||||
/* Define to the full name and version of this package. */
|
||||
#define PACKAGE_STRING "PCRE 7.7"
|
||||
|
||||
/* Define to the one symbol short name of this package. */
|
||||
#define PACKAGE_TARNAME "pcre"
|
||||
|
||||
/* Define to the version of this package. */
|
||||
#define PACKAGE_VERSION "7.7"
|
||||
|
||||
|
||||
/* If you are compiling for a system other than a Unix-like system or
|
||||
Win32, and it needs some magic to be inserted before the definition
|
||||
of a function that is exported by the library, define this macro to
|
||||
contain the relevant magic. If you do not define this macro, it
|
||||
defaults to "extern" for a C compiler and "extern C" for a C++
|
||||
compiler on non-Win32 systems. This macro apears at the start of
|
||||
every exported function that is part of the external API. It does
|
||||
not appear on functions that are "external" in the C sense, but
|
||||
which are internal to the library. */
|
||||
/* #undef PCRE_EXP_DEFN */
|
||||
|
||||
/* Define if linking statically (TODO: make nice with Libtool) */
|
||||
/* #undef PCRE_STATIC */
|
||||
|
||||
/* When calling PCRE via the POSIX interface, additional working storage is
|
||||
required for holding the pointers to capturing substrings because PCRE
|
||||
requires three integers per substring, whereas the POSIX interface provides
|
||||
only two. If the number of expected substrings is small, the wrapper
|
||||
function uses space on the stack, because this is faster than using
|
||||
malloc() for each call. The threshold above which the stack is no longer
|
||||
used is defined by POSIX_MALLOC_THRESHOLD. On systems that support it,
|
||||
"configure" can be used to override this default. */
|
||||
#ifndef POSIX_MALLOC_THRESHOLD
|
||||
#define POSIX_MALLOC_THRESHOLD 10
|
||||
#endif
|
||||
|
||||
/* PCRE uses recursive function calls to handle backtracking while matching.
|
||||
This can sometimes be a problem on systems that have stacks of limited size.
|
||||
Define NO_RECURSE to get a version that doesn't use recursion in the match()
|
||||
function; instead it creates its own stack by steam using pcre_recurse_malloc
|
||||
to get memory. For more detail, see comments and other stuff just above the
|
||||
match() function. On Unix systems, "configure" can be used to set this in the
|
||||
Makefile (use --disable-stack-for-recursion). */
|
||||
|
||||
/* #define NO_RECURSE */
|
||||
|
||||
/* xHarbour stuff - default definitions */
|
||||
#ifndef SUPPORT_UTF8
|
||||
#define SUPPORT_UTF8
|
||||
/* Define to 1 if you have the ANSI C header files. */
|
||||
#ifndef STDC_HEADERS
|
||||
#define STDC_HEADERS 1
|
||||
#endif
|
||||
|
||||
#ifndef SUPPORT_UCP
|
||||
#define SUPPORT_UCP
|
||||
/* Define to allow pcregrep to be linked with libbz2, so that it is able to
|
||||
handle .bz2 files. */
|
||||
/* #undef SUPPORT_LIBBZ2 */
|
||||
|
||||
/* Define to allow pcretest to be linked with libreadline. */
|
||||
/* #undef SUPPORT_LIBREADLINE */
|
||||
|
||||
/* Define to allow pcregrep to be linked with libz, so that it is able to
|
||||
handle .gz files. */
|
||||
/* #undef SUPPORT_LIBZ */
|
||||
|
||||
/* Define to enable support for Unicode properties */
|
||||
/* #undef SUPPORT_UCP */
|
||||
|
||||
/* Define to enable support for the UTF-8 Unicode encoding. */
|
||||
/* #undef SUPPORT_UTF8 */
|
||||
|
||||
/* Version number of package */
|
||||
#ifndef VERSION
|
||||
#define VERSION "7.7"
|
||||
#endif
|
||||
|
||||
/* End */
|
||||
/* Define to empty if `const' does not conform to ANSI C. */
|
||||
/* #undef const */
|
||||
|
||||
/* Define to `unsigned int' if <sys/types.h> does not define. */
|
||||
/* #undef size_t */
|
||||
|
||||
@@ -1,173 +0,0 @@
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Copyright (c) 1997-2005 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
|
||||
/* This is a freestanding support program to generate a file containing default
|
||||
character tables for PCRE. The tables are built according to the default C
|
||||
locale. Now that pcre_maketables is a function visible to the outside world, we
|
||||
make use of its code from here in order to be consistent. */
|
||||
|
||||
#include <ctype.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "pcreinal.h"
|
||||
|
||||
#define DFTABLES /* pcremktb.c notices this */
|
||||
#include "pcremktb.c"
|
||||
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
int i;
|
||||
FILE *f;
|
||||
const unsigned char *tables = pcre_maketables();
|
||||
const unsigned char *base_of_tables = tables;
|
||||
|
||||
if (argc != 2)
|
||||
{
|
||||
fprintf(stderr, "dftables: one filename argument is required\n");
|
||||
return 1;
|
||||
}
|
||||
|
||||
f = fopen(argv[1], "wb");
|
||||
if (f == NULL)
|
||||
{
|
||||
fprintf(stderr, "dftables: failed to open %s for writing\n", argv[1]);
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* There are two fprintf() calls here, because gcc in pedantic mode complains
|
||||
about the very long string otherwise. */
|
||||
|
||||
fprintf(f,
|
||||
"/*************************************************\n"
|
||||
"* Perl-Compatible Regular Expressions *\n"
|
||||
"*************************************************/\n\n"
|
||||
"/* This file is automatically written by the dftables auxiliary \n"
|
||||
"program. If you edit it by hand, you might like to edit the Makefile to \n"
|
||||
"prevent its ever being regenerated.\n\n");
|
||||
fprintf(f,
|
||||
"This file contains the default tables for characters with codes less than\n"
|
||||
"128 (ASCII characters). These tables are used when no external tables are\n"
|
||||
"passed to PCRE. */\n\n"
|
||||
"extern const unsigned char _pcre_default_tables[];\n"
|
||||
"const unsigned char _pcre_default_tables[] = {\n\n"
|
||||
"/* This table is a lower casing table. */\n\n");
|
||||
|
||||
fprintf(f, " ");
|
||||
for (i = 0; i < 256; i++)
|
||||
{
|
||||
if ((i & 7) == 0 && i != 0) fprintf(f, "\n ");
|
||||
fprintf(f, "%3d", *tables++);
|
||||
if (i != 255) fprintf(f, ",");
|
||||
}
|
||||
fprintf(f, ",\n\n");
|
||||
|
||||
fprintf(f, "/* This table is a case flipping table. */\n\n");
|
||||
|
||||
fprintf(f, " ");
|
||||
for (i = 0; i < 256; i++)
|
||||
{
|
||||
if ((i & 7) == 0 && i != 0) fprintf(f, "\n ");
|
||||
fprintf(f, "%3d", *tables++);
|
||||
if (i != 255) fprintf(f, ",");
|
||||
}
|
||||
fprintf(f, ",\n\n");
|
||||
|
||||
fprintf(f,
|
||||
"/* This table contains bit maps for various character classes.\n"
|
||||
"Each map is 32 bytes long and the bits run from the least\n"
|
||||
"significant end of each byte. The classes that have their own\n"
|
||||
"maps are: space, xdigit, digit, upper, lower, word, graph\n"
|
||||
"print, punct, and cntrl. Other classes are built from combinations. */\n\n");
|
||||
|
||||
fprintf(f, " ");
|
||||
for (i = 0; i < cbit_length; i++)
|
||||
{
|
||||
if ((i & 7) == 0 && i != 0)
|
||||
{
|
||||
if ((i & 31) == 0) fprintf(f, "\n");
|
||||
fprintf(f, "\n ");
|
||||
}
|
||||
fprintf(f, "0x%02x", *tables++);
|
||||
if (i != cbit_length - 1) fprintf(f, ",");
|
||||
}
|
||||
fprintf(f, ",\n\n");
|
||||
|
||||
fprintf(f,
|
||||
"/* This table identifies various classes of character by individual bits:\n"
|
||||
" 0x%02x white space character\n"
|
||||
" 0x%02x letter\n"
|
||||
" 0x%02x decimal digit\n"
|
||||
" 0x%02x hexadecimal digit\n"
|
||||
" 0x%02x alphanumeric or '_'\n"
|
||||
" 0x%02x regular expression metacharacter or binary zero\n*/\n\n",
|
||||
ctype_space, ctype_letter, ctype_digit, ctype_xdigit, ctype_word,
|
||||
ctype_meta);
|
||||
|
||||
fprintf(f, " ");
|
||||
for (i = 0; i < 256; i++)
|
||||
{
|
||||
if ((i & 7) == 0 && i != 0)
|
||||
{
|
||||
fprintf(f, " /* ");
|
||||
if (isprint(i-8)) fprintf(f, " %c -", i-8);
|
||||
else fprintf(f, "%3d-", i-8);
|
||||
if (isprint(i-1)) fprintf(f, " %c ", i-1);
|
||||
else fprintf(f, "%3d", i-1);
|
||||
fprintf(f, " */\n ");
|
||||
}
|
||||
fprintf(f, "0x%02x", *tables++);
|
||||
if (i != 255) fprintf(f, ",");
|
||||
}
|
||||
|
||||
fprintf(f, "};/* ");
|
||||
if (isprint(i-8)) fprintf(f, " %c -", i-8);
|
||||
else fprintf(f, "%3d-", i-8);
|
||||
if (isprint(i-1)) fprintf(f, " %c ", i-1);
|
||||
else fprintf(f, "%3d", i-1);
|
||||
fprintf(f, " */\n\n/* End of chartabs.c */\n");
|
||||
|
||||
fclose(f);
|
||||
free((void *)base_of_tables);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* End of dftables.c */
|
||||
@@ -2,10 +2,10 @@
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* In its original form, this is the .in file that is transformed by
|
||||
"configure" into pcre.h.
|
||||
/* This is the public header file for the PCRE library, to be #included by
|
||||
applications that call the PCRE functions.
|
||||
|
||||
Copyright (c) 1997-2005 University of Cambridge
|
||||
Copyright (c) 1997-2008 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
@@ -39,34 +39,48 @@ POSSIBILITY OF SUCH DAMAGE.
|
||||
#ifndef _PCRE_H
|
||||
#define _PCRE_H
|
||||
|
||||
/* The file pcre.h is build by "configure". Do not edit it; instead
|
||||
make changes to pcre.in. */
|
||||
/* The current PCRE version information. */
|
||||
|
||||
#define PCRE_MAJOR 6
|
||||
#define PCRE_MINOR 3
|
||||
#define PCRE_DATE 15-Aug-2005
|
||||
#define PCRE_MAJOR 7
|
||||
#define PCRE_MINOR 7
|
||||
#define PCRE_PRERELEASE
|
||||
#define PCRE_DATE 2008-05-07
|
||||
|
||||
/* Win32 uses DLL by default; it needs special stuff for exported functions. */
|
||||
/* When an application links to a PCRE DLL in Windows, the symbols that are
|
||||
imported have to be identified as such. When building PCRE, the appropriate
|
||||
export setting is defined in pcre_internal.h, which includes this file. So we
|
||||
don't change existing definitions of PCRE_EXP_DECL and PCRECPP_EXP_DECL. */
|
||||
|
||||
#ifdef _WIN32
|
||||
# ifdef PCRE_DEFINITION
|
||||
# ifdef DLL_EXPORT
|
||||
# define PCRE_DATA_SCOPE __declspec(dllexport)
|
||||
#if defined(_WIN32) && !defined(PCRE_STATIC)
|
||||
# ifndef PCRE_EXP_DECL
|
||||
# define PCRE_EXP_DECL extern __declspec(dllimport)
|
||||
# endif
|
||||
# ifdef __cplusplus
|
||||
# ifndef PCRECPP_EXP_DECL
|
||||
# define PCRECPP_EXP_DECL extern __declspec(dllimport)
|
||||
# endif
|
||||
# else
|
||||
# ifndef PCRE_STATIC
|
||||
# define PCRE_DATA_SCOPE extern __declspec(dllimport)
|
||||
# ifndef PCRECPP_EXP_DEFN
|
||||
# define PCRECPP_EXP_DEFN __declspec(dllimport)
|
||||
# endif
|
||||
# endif
|
||||
#endif
|
||||
|
||||
/* For other operating systems, we use the standard "extern". */
|
||||
/* By default, we use the standard "extern" declarations. */
|
||||
|
||||
#ifndef PCRE_DATA_SCOPE
|
||||
#ifndef PCRE_EXP_DECL
|
||||
# ifdef __cplusplus
|
||||
# define PCRE_DATA_SCOPE extern "C"
|
||||
# define PCRE_EXP_DECL extern "C"
|
||||
# else
|
||||
# define PCRE_DATA_SCOPE extern
|
||||
# define PCRE_EXP_DECL extern
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
# ifndef PCRECPP_EXP_DECL
|
||||
# define PCRECPP_EXP_DECL extern
|
||||
# endif
|
||||
# ifndef PCRECPP_EXP_DEFN
|
||||
# define PCRECPP_EXP_DEFN
|
||||
# endif
|
||||
#endif
|
||||
|
||||
@@ -102,6 +116,15 @@ extern "C" {
|
||||
#define PCRE_DFA_SHORTEST 0x00010000
|
||||
#define PCRE_DFA_RESTART 0x00020000
|
||||
#define PCRE_FIRSTLINE 0x00040000
|
||||
#define PCRE_DUPNAMES 0x00080000
|
||||
#define PCRE_NEWLINE_CR 0x00100000
|
||||
#define PCRE_NEWLINE_LF 0x00200000
|
||||
#define PCRE_NEWLINE_CRLF 0x00300000
|
||||
#define PCRE_NEWLINE_ANY 0x00400000
|
||||
#define PCRE_NEWLINE_ANYCRLF 0x00500000
|
||||
#define PCRE_BSR_ANYCRLF 0x00800000
|
||||
#define PCRE_BSR_UNICODE 0x01000000
|
||||
#define PCRE_JAVASCRIPT_COMPAT 0x02000000
|
||||
|
||||
/* Exec-time and get/set-time error codes */
|
||||
|
||||
@@ -109,7 +132,8 @@ extern "C" {
|
||||
#define PCRE_ERROR_NULL (-2)
|
||||
#define PCRE_ERROR_BADOPTION (-3)
|
||||
#define PCRE_ERROR_BADMAGIC (-4)
|
||||
#define PCRE_ERROR_UNKNOWN_NODE (-5)
|
||||
#define PCRE_ERROR_UNKNOWN_OPCODE (-5)
|
||||
#define PCRE_ERROR_UNKNOWN_NODE (-5) /* For backward compatibility */
|
||||
#define PCRE_ERROR_NOMEMORY (-6)
|
||||
#define PCRE_ERROR_NOSUBSTRING (-7)
|
||||
#define PCRE_ERROR_MATCHLIMIT (-8)
|
||||
@@ -125,6 +149,9 @@ extern "C" {
|
||||
#define PCRE_ERROR_DFA_UMLIMIT (-18)
|
||||
#define PCRE_ERROR_DFA_WSSIZE (-19)
|
||||
#define PCRE_ERROR_DFA_RECURSE (-20)
|
||||
#define PCRE_ERROR_RECURSIONLIMIT (-21)
|
||||
#define PCRE_ERROR_NULLWSLIMIT (-22) /* No longer actually used */
|
||||
#define PCRE_ERROR_BADNEWLINE (-23)
|
||||
|
||||
/* Request types for pcre_fullinfo() */
|
||||
|
||||
@@ -141,8 +168,12 @@ extern "C" {
|
||||
#define PCRE_INFO_NAMETABLE 9
|
||||
#define PCRE_INFO_STUDYSIZE 10
|
||||
#define PCRE_INFO_DEFAULT_TABLES 11
|
||||
#define PCRE_INFO_OKPARTIAL 12
|
||||
#define PCRE_INFO_JCHANGED 13
|
||||
#define PCRE_INFO_HASCRORLF 14
|
||||
|
||||
/* Request types for pcre_config() */
|
||||
/* Request types for pcre_config(). Do not re-arrange, in order to remain
|
||||
compatible. */
|
||||
|
||||
#define PCRE_CONFIG_UTF8 0
|
||||
#define PCRE_CONFIG_NEWLINE 1
|
||||
@@ -151,19 +182,31 @@ extern "C" {
|
||||
#define PCRE_CONFIG_MATCH_LIMIT 4
|
||||
#define PCRE_CONFIG_STACKRECURSE 5
|
||||
#define PCRE_CONFIG_UNICODE_PROPERTIES 6
|
||||
#define PCRE_CONFIG_MATCH_LIMIT_RECURSION 7
|
||||
#define PCRE_CONFIG_BSR 8
|
||||
|
||||
/* Bit flags for the pcre_extra structure */
|
||||
/* Bit flags for the pcre_extra structure. Do not re-arrange or redefine
|
||||
these bits, just add new ones on the end, in order to remain compatible. */
|
||||
|
||||
#define PCRE_EXTRA_STUDY_DATA 0x0001
|
||||
#define PCRE_EXTRA_MATCH_LIMIT 0x0002
|
||||
#define PCRE_EXTRA_CALLOUT_DATA 0x0004
|
||||
#define PCRE_EXTRA_TABLES 0x0008
|
||||
#define PCRE_EXTRA_STUDY_DATA 0x0001
|
||||
#define PCRE_EXTRA_MATCH_LIMIT 0x0002
|
||||
#define PCRE_EXTRA_CALLOUT_DATA 0x0004
|
||||
#define PCRE_EXTRA_TABLES 0x0008
|
||||
#define PCRE_EXTRA_MATCH_LIMIT_RECURSION 0x0010
|
||||
|
||||
/* Types */
|
||||
|
||||
struct real_pcre; /* declaration; the definition is private */
|
||||
typedef struct real_pcre pcre;
|
||||
|
||||
/* When PCRE is compiled as a C++ library, the subject pointer type can be
|
||||
replaced with a custom type. For conventional use, the public interface is a
|
||||
const char *. */
|
||||
|
||||
#ifndef PCRE_SPTR
|
||||
#define PCRE_SPTR const char *
|
||||
#endif
|
||||
|
||||
/* The structure for passing additional data to pcre_exec(). This is defined in
|
||||
such as way as to be extensible. Always add new fields at the end, in order to
|
||||
remain compatible. */
|
||||
@@ -174,6 +217,7 @@ typedef struct pcre_extra {
|
||||
unsigned long int match_limit; /* Maximum number of calls to match() */
|
||||
void *callout_data; /* Data passed back in callouts */
|
||||
const unsigned char *tables; /* Pointer to character tables */
|
||||
unsigned long int match_limit_recursion; /* Max recursive calls to match() */
|
||||
} pcre_extra;
|
||||
|
||||
/* The structure for passing out data via the pcre_callout_function. We use a
|
||||
@@ -186,7 +230,7 @@ typedef struct pcre_callout_block {
|
||||
/* ------------------------ Version 0 ------------------------------- */
|
||||
int callout_number; /* Number compiled into pattern */
|
||||
int *offset_vector; /* The offset vector */
|
||||
const char *subject; /* The subject being matched */
|
||||
PCRE_SPTR subject; /* The subject being matched */
|
||||
int subject_length; /* The length of the subject */
|
||||
int start_match; /* Offset to start of this match attempt */
|
||||
int current_position; /* Where we currently are in the subject */
|
||||
@@ -206,50 +250,52 @@ that is triggered by the (?) regex item. For Virtual Pascal, these definitions
|
||||
have to take another form. */
|
||||
|
||||
#ifndef VPCOMPAT
|
||||
PCRE_DATA_SCOPE void *(*pcre_malloc)(size_t);
|
||||
PCRE_DATA_SCOPE void (*pcre_free)(void *);
|
||||
PCRE_DATA_SCOPE void *(*pcre_stack_malloc)(size_t);
|
||||
PCRE_DATA_SCOPE void (*pcre_stack_free)(void *);
|
||||
PCRE_DATA_SCOPE int (*pcre_callout)(pcre_callout_block *);
|
||||
PCRE_EXP_DECL void *(*pcre_malloc)(size_t);
|
||||
PCRE_EXP_DECL void (*pcre_free)(void *);
|
||||
PCRE_EXP_DECL void *(*pcre_stack_malloc)(size_t);
|
||||
PCRE_EXP_DECL void (*pcre_stack_free)(void *);
|
||||
PCRE_EXP_DECL int (*pcre_callout)(pcre_callout_block *);
|
||||
#else /* VPCOMPAT */
|
||||
PCRE_DATA_SCOPE void *pcre_malloc(size_t);
|
||||
PCRE_DATA_SCOPE void pcre_free(void *);
|
||||
PCRE_DATA_SCOPE void *pcre_stack_malloc(size_t);
|
||||
PCRE_DATA_SCOPE void pcre_stack_free(void *);
|
||||
PCRE_DATA_SCOPE int pcre_callout(pcre_callout_block *);
|
||||
PCRE_EXP_DECL void *pcre_malloc(size_t);
|
||||
PCRE_EXP_DECL void pcre_free(void *);
|
||||
PCRE_EXP_DECL void *pcre_stack_malloc(size_t);
|
||||
PCRE_EXP_DECL void pcre_stack_free(void *);
|
||||
PCRE_EXP_DECL int pcre_callout(pcre_callout_block *);
|
||||
#endif /* VPCOMPAT */
|
||||
|
||||
/* Exported PCRE functions */
|
||||
|
||||
PCRE_DATA_SCOPE pcre *pcre_compile(const char *, int, const char **, int *,
|
||||
PCRE_EXP_DECL pcre *pcre_compile(const char *, int, const char **, int *,
|
||||
const unsigned char *);
|
||||
PCRE_DATA_SCOPE pcre *pcre_compile2(const char *, int, int *, const char **,
|
||||
PCRE_EXP_DECL pcre *pcre_compile2(const char *, int, int *, const char **,
|
||||
int *, const unsigned char *);
|
||||
PCRE_DATA_SCOPE int pcre_config(int, void *);
|
||||
PCRE_DATA_SCOPE int pcre_copy_named_substring(const pcre *, const char *,
|
||||
PCRE_EXP_DECL int pcre_config(int, void *);
|
||||
PCRE_EXP_DECL int pcre_copy_named_substring(const pcre *, const char *,
|
||||
int *, int, const char *, char *, int);
|
||||
PCRE_DATA_SCOPE int pcre_copy_substring(const char *, int *, int, int, char *,
|
||||
PCRE_EXP_DECL int pcre_copy_substring(const char *, int *, int, int, char *,
|
||||
int);
|
||||
PCRE_DATA_SCOPE int pcre_dfa_exec(const pcre *, const pcre_extra *,
|
||||
PCRE_EXP_DECL int pcre_dfa_exec(const pcre *, const pcre_extra *,
|
||||
const char *, int, int, int, int *, int , int *, int);
|
||||
PCRE_DATA_SCOPE int pcre_exec(const pcre *, const pcre_extra *, const char *,
|
||||
PCRE_EXP_DECL int pcre_exec(const pcre *, const pcre_extra *, PCRE_SPTR,
|
||||
int, int, int, int *, int);
|
||||
PCRE_DATA_SCOPE void pcre_free_substring(const char *);
|
||||
PCRE_DATA_SCOPE void pcre_free_substring_list(const char **);
|
||||
PCRE_DATA_SCOPE int pcre_fullinfo(const pcre *, const pcre_extra *, int,
|
||||
PCRE_EXP_DECL void pcre_free_substring(const char *);
|
||||
PCRE_EXP_DECL void pcre_free_substring_list(const char **);
|
||||
PCRE_EXP_DECL int pcre_fullinfo(const pcre *, const pcre_extra *, int,
|
||||
void *);
|
||||
PCRE_DATA_SCOPE int pcre_get_named_substring(const pcre *, const char *,
|
||||
PCRE_EXP_DECL int pcre_get_named_substring(const pcre *, const char *,
|
||||
int *, int, const char *, const char **);
|
||||
PCRE_DATA_SCOPE int pcre_get_stringnumber(const pcre *, const char *);
|
||||
PCRE_DATA_SCOPE int pcre_get_substring(const char *, int *, int, int,
|
||||
PCRE_EXP_DECL int pcre_get_stringnumber(const pcre *, const char *);
|
||||
PCRE_EXP_DECL int pcre_get_stringtable_entries(const pcre *, const char *,
|
||||
char **, char **);
|
||||
PCRE_EXP_DECL int pcre_get_substring(const char *, int *, int, int,
|
||||
const char **);
|
||||
PCRE_DATA_SCOPE int pcre_get_substring_list(const char *, int *, int,
|
||||
PCRE_EXP_DECL int pcre_get_substring_list(const char *, int *, int,
|
||||
const char ***);
|
||||
PCRE_DATA_SCOPE int pcre_info(const pcre *, int *, int *);
|
||||
PCRE_DATA_SCOPE const unsigned char *pcre_maketables(void);
|
||||
PCRE_DATA_SCOPE int pcre_refcount(pcre *, int);
|
||||
PCRE_DATA_SCOPE pcre_extra *pcre_study(const pcre *, int, const char **);
|
||||
PCRE_DATA_SCOPE const char *pcre_version(void);
|
||||
PCRE_EXP_DECL int pcre_info(const pcre *, int *, int *);
|
||||
PCRE_EXP_DECL const unsigned char *pcre_maketables(void);
|
||||
PCRE_EXP_DECL int pcre_refcount(pcre *, int);
|
||||
PCRE_EXP_DECL pcre_extra *pcre_study(const pcre *, int, const char **);
|
||||
PCRE_EXP_DECL const char *pcre_version(void);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} /* extern "C" */
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -6,7 +6,7 @@
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Copyright (c) 1997-2005 University of Cambridge
|
||||
Copyright (c) 1997-2008 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
@@ -41,6 +41,10 @@ POSSIBILITY OF SUCH DAMAGE.
|
||||
/* This module contains the external function pcre_config(). */
|
||||
|
||||
|
||||
#if 1
|
||||
#include "_hbconf.h"
|
||||
#endif
|
||||
|
||||
#include "pcreinal.h"
|
||||
|
||||
|
||||
@@ -58,7 +62,7 @@ Arguments:
|
||||
Returns: 0 if data returned, negative on error
|
||||
*/
|
||||
|
||||
EXPORT int
|
||||
PCRE_EXP_DEFN int
|
||||
pcre_config(int what, void *where)
|
||||
{
|
||||
switch (what)
|
||||
@@ -83,6 +87,14 @@ switch (what)
|
||||
*((int *)where) = NEWLINE;
|
||||
break;
|
||||
|
||||
case PCRE_CONFIG_BSR:
|
||||
#ifdef BSR_ANYCRLF
|
||||
*((int *)where) = 1;
|
||||
#else
|
||||
*((int *)where) = 0;
|
||||
#endif
|
||||
break;
|
||||
|
||||
case PCRE_CONFIG_LINK_SIZE:
|
||||
*((int *)where) = LINK_SIZE;
|
||||
break;
|
||||
@@ -95,6 +107,10 @@ switch (what)
|
||||
*((unsigned int *)where) = MATCH_LIMIT;
|
||||
break;
|
||||
|
||||
case PCRE_CONFIG_MATCH_LIMIT_RECURSION:
|
||||
*((unsigned int *)where) = MATCH_LIMIT_RECURSION;
|
||||
break;
|
||||
|
||||
case PCRE_CONFIG_STACKRECURSE:
|
||||
#ifdef NO_RECURSE
|
||||
*((int *)where) = 0;
|
||||
@@ -109,4 +125,4 @@ switch (what)
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* End of pcreconf.c */
|
||||
/* End of pcre_config.c */
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -1,12 +1,12 @@
|
||||
/*************************************************
|
||||
* libucp - Unicode Property Table handler *
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* Copyright (c) University of Cambridge 2004 */
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
/* This little library provides a fast way of obtaining the basic Unicode
|
||||
properties of a character, using a compact binary tree that occupies less than
|
||||
100K bytes.
|
||||
Written by Philip Hazel
|
||||
Copyright (c) 1997-2008 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
@@ -37,117 +37,143 @@ POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
|
||||
/* This module contains code for searching the table of Unicode character
|
||||
properties. */
|
||||
|
||||
#if 1
|
||||
#include "_hbconf.h"
|
||||
#endif
|
||||
|
||||
#include "pcreinal.h"
|
||||
|
||||
#include "ucp.h" /* Exported interface */
|
||||
#include "ucpinter.h" /* Internal table details */
|
||||
#include "ucptable.c" /* The table itself */
|
||||
#include "ucp.h" /* Category definitions */
|
||||
#include "ucpinter.h" /* Internal table details */
|
||||
#include "ucptable.h" /* The table itself */
|
||||
|
||||
|
||||
/* In some environments, external functions have to be preceded by some magic.
|
||||
In my world (Unix), they do not. Use a macro to deal with this. */
|
||||
/* Table to translate from particular type value to the general value. */
|
||||
|
||||
#ifndef EXPORT
|
||||
#define EXPORT
|
||||
#endif
|
||||
static const int ucp_gentype[] = {
|
||||
ucp_C, ucp_C, ucp_C, ucp_C, ucp_C, /* Cc, Cf, Cn, Co, Cs */
|
||||
ucp_L, ucp_L, ucp_L, ucp_L, ucp_L, /* Ll, Lu, Lm, Lo, Lt */
|
||||
ucp_M, ucp_M, ucp_M, /* Mc, Me, Mn */
|
||||
ucp_N, ucp_N, ucp_N, /* Nd, Nl, No */
|
||||
ucp_P, ucp_P, ucp_P, ucp_P, ucp_P, /* Pc, Pd, Pe, Pf, Pi */
|
||||
ucp_P, ucp_P, /* Ps, Po */
|
||||
ucp_S, ucp_S, ucp_S, ucp_S, /* Sc, Sk, Sm, So */
|
||||
ucp_Z, ucp_Z, ucp_Z /* Zl, Zp, Zs */
|
||||
};
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Search table and return data *
|
||||
* Search table and return type *
|
||||
*************************************************/
|
||||
|
||||
/* Two values are returned: the category is ucp_C, ucp_L, etc. The detailed
|
||||
character type is ucp_Lu, ucp_Nd, etc.
|
||||
/* Three values are returned: the category is ucp_C, ucp_L, etc. The detailed
|
||||
character type is ucp_Lu, ucp_Nd, etc. The script is ucp_Latin, etc.
|
||||
|
||||
Arguments:
|
||||
c the character value
|
||||
type_ptr the detailed character type is returned here
|
||||
case_ptr for letters, the opposite case is returned here, if there
|
||||
is one, else zero
|
||||
script_ptr the script is returned here
|
||||
|
||||
Returns: the character type category or -1 if not found
|
||||
Returns: the character type category
|
||||
*/
|
||||
|
||||
EXPORT int
|
||||
ucp_findchar(const int c, int *type_ptr, int *case_ptr)
|
||||
int
|
||||
_pcre_ucp_findprop(const unsigned int c, int *type_ptr, int *script_ptr)
|
||||
{
|
||||
cnode *node = ucp_table;
|
||||
register int cc = c;
|
||||
int case_offset;
|
||||
int bot = 0;
|
||||
int top = sizeof(ucp_table)/sizeof(cnode);
|
||||
int mid;
|
||||
|
||||
/* The table is searched using a binary chop. You might think that using
|
||||
intermediate variables to hold some of the common expressions would speed
|
||||
things up, but tests with gcc 3.4.4 on Linux showed that, on the contrary, it
|
||||
makes things a lot slower. */
|
||||
|
||||
for (;;)
|
||||
{
|
||||
register int d = node->f1 | ((node->f0 & f0_chhmask) << 16);
|
||||
if (cc == d) break;
|
||||
if (cc < d)
|
||||
if (top <= bot)
|
||||
{
|
||||
if ((node->f0 & f0_leftexists) == 0) return -1;
|
||||
node ++;
|
||||
*type_ptr = ucp_Cn;
|
||||
*script_ptr = ucp_Common;
|
||||
return ucp_C;
|
||||
}
|
||||
mid = (bot + top) >> 1;
|
||||
if (c == (ucp_table[mid].f0 & f0_charmask)) break;
|
||||
if (c < (ucp_table[mid].f0 & f0_charmask)) top = mid;
|
||||
else
|
||||
{
|
||||
register int roffset = (node->f2 & f2_rightmask) >> f2_rightshift;
|
||||
if (roffset == 0) return -1;
|
||||
node += 1 << (roffset - 1);
|
||||
if ((ucp_table[mid].f0 & f0_rangeflag) != 0 &&
|
||||
c <= (ucp_table[mid].f0 & f0_charmask) +
|
||||
(ucp_table[mid].f1 & f1_rangemask)) break;
|
||||
bot = mid + 1;
|
||||
}
|
||||
}
|
||||
|
||||
switch ((*type_ptr = ((node->f0 & f0_typemask) >> f0_typeshift)))
|
||||
{
|
||||
case ucp_Cc:
|
||||
case ucp_Cf:
|
||||
case ucp_Cn:
|
||||
case ucp_Co:
|
||||
case ucp_Cs:
|
||||
return ucp_C;
|
||||
/* Found an entry in the table. Set the script and detailed type values, and
|
||||
return the general type. */
|
||||
|
||||
case ucp_Ll:
|
||||
case ucp_Lu:
|
||||
case_offset = node->f2 & f2_casemask;
|
||||
if ((case_offset & 0x0100) != 0) case_offset |= 0xfffff000;
|
||||
*case_ptr = (case_offset == 0)? 0 : cc + case_offset;
|
||||
return ucp_L;
|
||||
*script_ptr = (ucp_table[mid].f0 & f0_scriptmask) >> f0_scriptshift;
|
||||
*type_ptr = (ucp_table[mid].f1 & f1_typemask) >> f1_typeshift;
|
||||
|
||||
case ucp_Lm:
|
||||
case ucp_Lo:
|
||||
case ucp_Lt:
|
||||
*case_ptr = 0;
|
||||
return ucp_L;
|
||||
|
||||
case ucp_Mc:
|
||||
case ucp_Me:
|
||||
case ucp_Mn:
|
||||
return ucp_M;
|
||||
|
||||
case ucp_Nd:
|
||||
case ucp_Nl:
|
||||
case ucp_No:
|
||||
return ucp_N;
|
||||
|
||||
case ucp_Pc:
|
||||
case ucp_Pd:
|
||||
case ucp_Pe:
|
||||
case ucp_Pf:
|
||||
case ucp_Pi:
|
||||
case ucp_Ps:
|
||||
case ucp_Po:
|
||||
return ucp_P;
|
||||
|
||||
case ucp_Sc:
|
||||
case ucp_Sk:
|
||||
case ucp_Sm:
|
||||
case ucp_So:
|
||||
return ucp_S;
|
||||
|
||||
case ucp_Zl:
|
||||
case ucp_Zp:
|
||||
case ucp_Zs:
|
||||
return ucp_Z;
|
||||
|
||||
default: /* "Should never happen" */
|
||||
return -1;
|
||||
}
|
||||
return ucp_gentype[*type_ptr];
|
||||
}
|
||||
|
||||
/* End of pcrefind.c */
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Search table and return other case *
|
||||
*************************************************/
|
||||
|
||||
/* If the given character is a letter, and there is another case for the
|
||||
letter, return the other case. Otherwise, return -1.
|
||||
|
||||
Arguments:
|
||||
c the character value
|
||||
|
||||
Returns: the other case or NOTACHAR if none
|
||||
*/
|
||||
|
||||
unsigned int
|
||||
_pcre_ucp_othercase(const unsigned int c)
|
||||
{
|
||||
int bot = 0;
|
||||
int top = sizeof(ucp_table)/sizeof(cnode);
|
||||
int mid, offset;
|
||||
|
||||
/* The table is searched using a binary chop. You might think that using
|
||||
intermediate variables to hold some of the common expressions would speed
|
||||
things up, but tests with gcc 3.4.4 on Linux showed that, on the contrary, it
|
||||
makes things a lot slower. */
|
||||
|
||||
for (;;)
|
||||
{
|
||||
if (top <= bot) return -1;
|
||||
mid = (bot + top) >> 1;
|
||||
if (c == (ucp_table[mid].f0 & f0_charmask)) break;
|
||||
if (c < (ucp_table[mid].f0 & f0_charmask)) top = mid;
|
||||
else
|
||||
{
|
||||
if ((ucp_table[mid].f0 & f0_rangeflag) != 0 &&
|
||||
c <= (ucp_table[mid].f0 & f0_charmask) +
|
||||
(ucp_table[mid].f1 & f1_rangemask)) break;
|
||||
bot = mid + 1;
|
||||
}
|
||||
}
|
||||
|
||||
/* Found an entry in the table. Return NOTACHAR for a range entry. Otherwise
|
||||
return the other case if there is one, else NOTACHAR. */
|
||||
|
||||
if ((ucp_table[mid].f0 & f0_rangeflag) != 0) return NOTACHAR;
|
||||
|
||||
offset = ucp_table[mid].f1 & f1_casemask;
|
||||
if ((offset & f1_caseneg) != 0) offset |= f1_caseneg;
|
||||
return (offset == 0)? NOTACHAR : c + offset;
|
||||
}
|
||||
|
||||
|
||||
/* End of pcre_ucp_searchfuncs.c */
|
||||
|
||||
@@ -2,11 +2,11 @@
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/*PCRE is a library of functions to support regular expressions whose syntax
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Copyright (c) 1997-2005 University of Cambridge
|
||||
Copyright (c) 1997-2008 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
@@ -42,6 +42,10 @@ POSSIBILITY OF SUCH DAMAGE.
|
||||
information about a compiled pattern. */
|
||||
|
||||
|
||||
#if 1
|
||||
#include "_hbconf.h"
|
||||
#endif
|
||||
|
||||
#include "pcreinal.h"
|
||||
|
||||
|
||||
@@ -61,7 +65,7 @@ Arguments:
|
||||
Returns: 0 if data returned, negative on error
|
||||
*/
|
||||
|
||||
EXPORT int
|
||||
PCRE_EXP_DEFN int
|
||||
pcre_fullinfo(const pcre *argument_re, const pcre_extra *extra_data, int what,
|
||||
void *where)
|
||||
{
|
||||
@@ -106,8 +110,8 @@ switch (what)
|
||||
|
||||
case PCRE_INFO_FIRSTBYTE:
|
||||
*((int *)where) =
|
||||
((re->options & PCRE_FIRSTSET) != 0)? re->first_byte :
|
||||
((re->options & PCRE_STARTLINE) != 0)? -1 : -2;
|
||||
((re->flags & PCRE_FIRSTSET) != 0)? re->first_byte :
|
||||
((re->flags & PCRE_STARTLINE) != 0)? -1 : -2;
|
||||
break;
|
||||
|
||||
/* Make sure we pass back the pointer to the bit vector in the external
|
||||
@@ -121,7 +125,7 @@ switch (what)
|
||||
|
||||
case PCRE_INFO_LASTLITERAL:
|
||||
*((int *)where) =
|
||||
((re->options & PCRE_REQCHSET) != 0)? re->req_byte : -1;
|
||||
((re->flags & PCRE_REQCHSET) != 0)? re->req_byte : -1;
|
||||
break;
|
||||
|
||||
case PCRE_INFO_NAMEENTRYSIZE:
|
||||
@@ -140,10 +144,22 @@ switch (what)
|
||||
*((const uschar **)where) = (const uschar *)(_pcre_default_tables);
|
||||
break;
|
||||
|
||||
case PCRE_INFO_OKPARTIAL:
|
||||
*((int *)where) = (re->flags & PCRE_NOPARTIAL) == 0;
|
||||
break;
|
||||
|
||||
case PCRE_INFO_JCHANGED:
|
||||
*((int *)where) = (re->flags & PCRE_JCHANGED) != 0;
|
||||
break;
|
||||
|
||||
case PCRE_INFO_HASCRORLF:
|
||||
*((int *)where) = (re->flags & PCRE_HASCRORLF) != 0;
|
||||
break;
|
||||
|
||||
default: return PCRE_ERROR_BADOPTION;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* End of pcrefinf.c */
|
||||
/* End of pcre_fullinfo.c */
|
||||
|
||||
@@ -6,7 +6,7 @@
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Copyright (c) 1997-2005 University of Cambridge
|
||||
Copyright (c) 1997-2008 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
@@ -43,6 +43,10 @@ from the subject string after a regex match has succeeded. The original idea
|
||||
for these functions came from Scott Wimer. */
|
||||
|
||||
|
||||
#if 1
|
||||
#include "_hbconf.h"
|
||||
#endif
|
||||
|
||||
#include "pcreinal.h"
|
||||
|
||||
|
||||
@@ -50,8 +54,8 @@ for these functions came from Scott Wimer. */
|
||||
* Find number for named string *
|
||||
*************************************************/
|
||||
|
||||
/* This function is used by the two extraction functions below, as well
|
||||
as being generally available.
|
||||
/* This function is used by the get_first_set() function below, as well
|
||||
as being generally available. It assumes that names are unique.
|
||||
|
||||
Arguments:
|
||||
code the compiled regex
|
||||
@@ -93,6 +97,113 @@ return PCRE_ERROR_NOSUBSTRING;
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Find (multiple) entries for named string *
|
||||
*************************************************/
|
||||
|
||||
/* This is used by the get_first_set() function below, as well as being
|
||||
generally available. It is used when duplicated names are permitted.
|
||||
|
||||
Arguments:
|
||||
code the compiled regex
|
||||
stringname the name whose entries required
|
||||
firstptr where to put the pointer to the first entry
|
||||
lastptr where to put the pointer to the last entry
|
||||
|
||||
Returns: the length of each entry, or a negative number
|
||||
(PCRE_ERROR_NOSUBSTRING) if not found
|
||||
*/
|
||||
|
||||
int
|
||||
pcre_get_stringtable_entries(const pcre *code, const char *stringname,
|
||||
char **firstptr, char **lastptr)
|
||||
{
|
||||
int rc;
|
||||
int entrysize;
|
||||
int top, bot;
|
||||
uschar *nametable, *lastentry;
|
||||
|
||||
if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0)
|
||||
return rc;
|
||||
if (top <= 0) return PCRE_ERROR_NOSUBSTRING;
|
||||
|
||||
if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize)) != 0)
|
||||
return rc;
|
||||
if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0)
|
||||
return rc;
|
||||
|
||||
lastentry = nametable + entrysize * (top - 1);
|
||||
bot = 0;
|
||||
while (top > bot)
|
||||
{
|
||||
int mid = (top + bot) / 2;
|
||||
uschar *entry = nametable + entrysize*mid;
|
||||
int c = strcmp(stringname, (char *)(entry + 2));
|
||||
if (c == 0)
|
||||
{
|
||||
uschar *first = entry;
|
||||
uschar *last = entry;
|
||||
while (first > nametable)
|
||||
{
|
||||
if (strcmp(stringname, (char *)(first - entrysize + 2)) != 0) break;
|
||||
first -= entrysize;
|
||||
}
|
||||
while (last < lastentry)
|
||||
{
|
||||
if (strcmp(stringname, (char *)(last + entrysize + 2)) != 0) break;
|
||||
last += entrysize;
|
||||
}
|
||||
*firstptr = (char *)first;
|
||||
*lastptr = (char *)last;
|
||||
return entrysize;
|
||||
}
|
||||
if (c > 0) bot = mid + 1; else top = mid;
|
||||
}
|
||||
|
||||
return PCRE_ERROR_NOSUBSTRING;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Find first set of multiple named strings *
|
||||
*************************************************/
|
||||
|
||||
/* This function allows for duplicate names in the table of named substrings.
|
||||
It returns the number of the first one that was set in a pattern match.
|
||||
|
||||
Arguments:
|
||||
code the compiled regex
|
||||
stringname the name of the capturing substring
|
||||
ovector the vector of matched substrings
|
||||
|
||||
Returns: the number of the first that is set,
|
||||
or the number of the last one if none are set,
|
||||
or a negative number on error
|
||||
*/
|
||||
|
||||
static int
|
||||
get_first_set(const pcre *code, const char *stringname, int *ovector)
|
||||
{
|
||||
const real_pcre *re = (const real_pcre *)code;
|
||||
int entrysize;
|
||||
char *first, *last;
|
||||
uschar *entry;
|
||||
if ((re->options & PCRE_DUPNAMES) == 0 && (re->flags & PCRE_JCHANGED) == 0)
|
||||
return pcre_get_stringnumber(code, stringname);
|
||||
entrysize = pcre_get_stringtable_entries(code, stringname, &first, &last);
|
||||
if (entrysize <= 0) return entrysize;
|
||||
for (entry = (uschar *)first; entry <= (uschar *)last; entry += entrysize)
|
||||
{
|
||||
int n = (entry[0] << 8) + entry[1];
|
||||
if (ovector[n*2] >= 0) return n;
|
||||
}
|
||||
return (first[0] << 8) + first[1];
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Copy captured string to given buffer *
|
||||
*************************************************/
|
||||
@@ -142,7 +253,8 @@ return yield;
|
||||
*************************************************/
|
||||
|
||||
/* This function copies a single captured substring into a given buffer,
|
||||
identifying it by name.
|
||||
identifying it by name. If the regex permits duplicate names, the first
|
||||
substring that is set is chosen.
|
||||
|
||||
Arguments:
|
||||
code the compiled regex
|
||||
@@ -168,7 +280,7 @@ int
|
||||
pcre_copy_named_substring(const pcre *code, const char *subject, int *ovector,
|
||||
int stringcount, const char *stringname, char *buffer, int size)
|
||||
{
|
||||
int n = pcre_get_stringnumber(code, stringname);
|
||||
int n = get_first_set(code, stringname, ovector);
|
||||
if (n <= 0) return n;
|
||||
return pcre_copy_substring(subject, ovector, stringcount, n, buffer, size);
|
||||
}
|
||||
@@ -299,7 +411,8 @@ return yield;
|
||||
*************************************************/
|
||||
|
||||
/* This function copies a single captured substring, identified by name, into
|
||||
new store.
|
||||
new store. If the regex permits duplicate names, the first substring that is
|
||||
set is chosen.
|
||||
|
||||
Arguments:
|
||||
code the compiled regex
|
||||
@@ -324,7 +437,7 @@ int
|
||||
pcre_get_named_substring(const pcre *code, const char *subject, int *ovector,
|
||||
int stringcount, const char *stringname, const char **stringptr)
|
||||
{
|
||||
int n = pcre_get_stringnumber(code, stringname);
|
||||
int n = get_first_set(code, stringname, ovector);
|
||||
if (n <= 0) return n;
|
||||
return pcre_get_substring(subject, ovector, stringcount, n, stringptr);
|
||||
}
|
||||
@@ -349,4 +462,4 @@ pcre_free_substring(const char *pointer)
|
||||
(pcre_free)((void *)pointer);
|
||||
}
|
||||
|
||||
/* End of pcreget.c */
|
||||
/* End of pcre_get.c */
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -6,7 +6,7 @@
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Copyright (c) 1997-2005 University of Cambridge
|
||||
Copyright (c) 1997-2008 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
@@ -43,6 +43,10 @@ information about a compiled pattern. However, use of this function is now
|
||||
deprecated, as it has been superseded by pcre_fullinfo(). */
|
||||
|
||||
|
||||
#if 1
|
||||
#include "_hbconf.h"
|
||||
#endif
|
||||
|
||||
#include "pcreinal.h"
|
||||
|
||||
|
||||
@@ -68,7 +72,7 @@ Returns: number of capturing subpatterns
|
||||
or negative values on error
|
||||
*/
|
||||
|
||||
EXPORT int
|
||||
PCRE_EXP_DEFN int
|
||||
pcre_info(const pcre *argument_re, int *optptr, int *first_byte)
|
||||
{
|
||||
real_pcre internal_re;
|
||||
@@ -81,9 +85,9 @@ if (re->magic_number != MAGIC_NUMBER)
|
||||
}
|
||||
if (optptr != NULL) *optptr = (int)(re->options & PUBLIC_OPTIONS);
|
||||
if (first_byte != NULL)
|
||||
*first_byte = ((re->options & PCRE_FIRSTSET) != 0)? re->first_byte :
|
||||
((re->options & PCRE_STARTLINE) != 0)? -1 : -2;
|
||||
*first_byte = ((re->flags & PCRE_FIRSTSET) != 0)? re->first_byte :
|
||||
((re->flags & PCRE_STARTLINE) != 0)? -1 : -2;
|
||||
return re->top_bracket;
|
||||
}
|
||||
|
||||
/* End of pcreinfo.c */
|
||||
/* End of pcre_info.c */
|
||||
|
||||
@@ -6,7 +6,7 @@
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Copyright (c) 1997-2005 University of Cambridge
|
||||
Copyright (c) 1997-2008 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
@@ -45,7 +45,10 @@ compilation of dftables.c, in which case the macro DFTABLES is defined. */
|
||||
|
||||
|
||||
#ifndef DFTABLES
|
||||
#include "pcreinal.h"
|
||||
# if 1
|
||||
# include "_hbconf.h"
|
||||
# endif
|
||||
# include "pcreinal.h"
|
||||
#endif
|
||||
|
||||
|
||||
@@ -86,29 +89,22 @@ for (i = 0; i < 256; i++) *p++ = tolower(i);
|
||||
|
||||
for (i = 0; i < 256; i++) *p++ = islower(i)? toupper(i) : tolower(i);
|
||||
|
||||
/* Then the character class tables. Don't try to be clever and save effort
|
||||
on exclusive ones - in some locales things may be different. Note that the
|
||||
table for "space" includes everything "isspace" gives, including VT in the
|
||||
default locale. This makes it work for the POSIX class [:space:]. */
|
||||
/* Then the character class tables. Don't try to be clever and save effort on
|
||||
exclusive ones - in some locales things may be different. Note that the table
|
||||
for "space" includes everything "isspace" gives, including VT in the default
|
||||
locale. This makes it work for the POSIX class [:space:]. Note also that it is
|
||||
possible for a character to be alnum or alpha without being lower or upper,
|
||||
such as "male and female ordinals" (\xAA and \xBA) in the fr_FR locale (at
|
||||
least under Debian Linux's locales as of 12/2005). So we must test for alnum
|
||||
specially. */
|
||||
|
||||
memset(p, 0, cbit_length);
|
||||
for (i = 0; i < 256; i++)
|
||||
{
|
||||
if (isdigit(i))
|
||||
{
|
||||
p[cbit_digit + i/8] |= 1 << (i&7);
|
||||
p[cbit_word + i/8] |= 1 << (i&7);
|
||||
}
|
||||
if (isupper(i))
|
||||
{
|
||||
p[cbit_upper + i/8] |= 1 << (i&7);
|
||||
p[cbit_word + i/8] |= 1 << (i&7);
|
||||
}
|
||||
if (islower(i))
|
||||
{
|
||||
p[cbit_lower + i/8] |= 1 << (i&7);
|
||||
p[cbit_word + i/8] |= 1 << (i&7);
|
||||
}
|
||||
if (isdigit(i)) p[cbit_digit + i/8] |= 1 << (i&7);
|
||||
if (isupper(i)) p[cbit_upper + i/8] |= 1 << (i&7);
|
||||
if (islower(i)) p[cbit_lower + i/8] |= 1 << (i&7);
|
||||
if (isalnum(i)) p[cbit_word + i/8] |= 1 << (i&7);
|
||||
if (i == '_') p[cbit_word + i/8] |= 1 << (i&7);
|
||||
if (isspace(i)) p[cbit_space + i/8] |= 1 << (i&7);
|
||||
if (isxdigit(i))p[cbit_xdigit + i/8] |= 1 << (i&7);
|
||||
@@ -137,9 +133,11 @@ for (i = 0; i < 256; i++)
|
||||
meta-character, which in this sense is any character that terminates a run
|
||||
of data characters. */
|
||||
|
||||
if (strchr("*+?{^.$|()[", i) != 0) x += ctype_meta; *p++ = x; }
|
||||
if (strchr("\\*+?{^.$|()[", i) != 0) x += ctype_meta;
|
||||
*p++ = x;
|
||||
}
|
||||
|
||||
return yield;
|
||||
}
|
||||
|
||||
/* End of pcremktb.c */
|
||||
/* End of pcre_maketables.c */
|
||||
|
||||
164
harbour/source/hbpcre/pcrenewl.c
Normal file
164
harbour/source/hbpcre/pcrenewl.c
Normal file
@@ -0,0 +1,164 @@
|
||||
/*************************************************
|
||||
* Perl-Compatible Regular Expressions *
|
||||
*************************************************/
|
||||
|
||||
/* PCRE is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Copyright (c) 1997-2008 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of the University of Cambridge nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
|
||||
/* This module contains internal functions for testing newlines when more than
|
||||
one kind of newline is to be recognized. When a newline is found, its length is
|
||||
returned. In principle, we could implement several newline "types", each
|
||||
referring to a different set of newline characters. At present, PCRE supports
|
||||
only NLTYPE_FIXED, which gets handled without these functions, NLTYPE_ANYCRLF,
|
||||
and NLTYPE_ANY. The full list of Unicode newline characters is taken from
|
||||
http://unicode.org/unicode/reports/tr18/. */
|
||||
|
||||
|
||||
#if 1
|
||||
#include "_hbconf.h"
|
||||
#endif
|
||||
|
||||
#include "pcreinal.h"
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Check for newline at given position *
|
||||
*************************************************/
|
||||
|
||||
/* It is guaranteed that the initial value of ptr is less than the end of the
|
||||
string that is being processed.
|
||||
|
||||
Arguments:
|
||||
ptr pointer to possible newline
|
||||
type the newline type
|
||||
endptr pointer to the end of the string
|
||||
lenptr where to return the length
|
||||
utf8 TRUE if in utf8 mode
|
||||
|
||||
Returns: TRUE or FALSE
|
||||
*/
|
||||
|
||||
BOOL
|
||||
_pcre_is_newline(const uschar *ptr, int type, const uschar *endptr,
|
||||
int *lenptr, BOOL utf8)
|
||||
{
|
||||
int c;
|
||||
if (utf8) { GETCHAR(c, ptr); } else c = *ptr;
|
||||
|
||||
if (type == NLTYPE_ANYCRLF) switch(c)
|
||||
{
|
||||
case 0x000a: *lenptr = 1; return TRUE; /* LF */
|
||||
case 0x000d: *lenptr = (ptr < endptr - 1 && ptr[1] == 0x0a)? 2 : 1;
|
||||
return TRUE; /* CR */
|
||||
default: return FALSE;
|
||||
}
|
||||
|
||||
/* NLTYPE_ANY */
|
||||
|
||||
else switch(c)
|
||||
{
|
||||
case 0x000a: /* LF */
|
||||
case 0x000b: /* VT */
|
||||
case 0x000c: *lenptr = 1; return TRUE; /* FF */
|
||||
case 0x000d: *lenptr = (ptr < endptr - 1 && ptr[1] == 0x0a)? 2 : 1;
|
||||
return TRUE; /* CR */
|
||||
case 0x0085: *lenptr = utf8? 2 : 1; return TRUE; /* NEL */
|
||||
case 0x2028: /* LS */
|
||||
case 0x2029: *lenptr = 3; return TRUE; /* PS */
|
||||
default: return FALSE;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Check for newline at previous position *
|
||||
*************************************************/
|
||||
|
||||
/* It is guaranteed that the initial value of ptr is greater than the start of
|
||||
the string that is being processed.
|
||||
|
||||
Arguments:
|
||||
ptr pointer to possible newline
|
||||
type the newline type
|
||||
startptr pointer to the start of the string
|
||||
lenptr where to return the length
|
||||
utf8 TRUE if in utf8 mode
|
||||
|
||||
Returns: TRUE or FALSE
|
||||
*/
|
||||
|
||||
BOOL
|
||||
_pcre_was_newline(const uschar *ptr, int type, const uschar *startptr,
|
||||
int *lenptr, BOOL utf8)
|
||||
{
|
||||
int c;
|
||||
ptr--;
|
||||
#ifdef SUPPORT_UTF8
|
||||
if (utf8)
|
||||
{
|
||||
BACKCHAR(ptr);
|
||||
GETCHAR(c, ptr);
|
||||
}
|
||||
else c = *ptr;
|
||||
#else /* no UTF-8 support */
|
||||
c = *ptr;
|
||||
#endif /* SUPPORT_UTF8 */
|
||||
|
||||
if (type == NLTYPE_ANYCRLF) switch(c)
|
||||
{
|
||||
case 0x000a: *lenptr = (ptr > startptr && ptr[-1] == 0x0d)? 2 : 1;
|
||||
return TRUE; /* LF */
|
||||
case 0x000d: *lenptr = 1; return TRUE; /* CR */
|
||||
default: return FALSE;
|
||||
}
|
||||
|
||||
else switch(c)
|
||||
{
|
||||
case 0x000a: *lenptr = (ptr > startptr && ptr[-1] == 0x0d)? 2 : 1;
|
||||
return TRUE; /* LF */
|
||||
case 0x000b: /* VT */
|
||||
case 0x000c: /* FF */
|
||||
case 0x000d: *lenptr = 1; return TRUE; /* CR */
|
||||
case 0x0085: *lenptr = utf8? 2 : 1; return TRUE; /* NEL */
|
||||
case 0x2028: /* LS */
|
||||
case 0x2029: *lenptr = 3; return TRUE; /* PS */
|
||||
default: return FALSE;
|
||||
}
|
||||
}
|
||||
|
||||
/* End of pcre_newline.c */
|
||||
@@ -6,7 +6,7 @@
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Copyright (c) 1997-2005 University of Cambridge
|
||||
Copyright (c) 1997-2008 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
@@ -41,6 +41,9 @@ POSSIBILITY OF SUCH DAMAGE.
|
||||
/* This file contains a private PCRE function that converts an ordinal
|
||||
character value into a UTF8 string. */
|
||||
|
||||
#if 1
|
||||
#include "_hbconf.h"
|
||||
#endif
|
||||
|
||||
#include "pcreinal.h"
|
||||
|
||||
@@ -59,9 +62,10 @@ Arguments:
|
||||
Returns: number of characters placed in the buffer
|
||||
*/
|
||||
|
||||
EXPORT int
|
||||
int
|
||||
_pcre_ord2utf8(int cvalue, uschar *buffer)
|
||||
{
|
||||
#ifdef SUPPORT_UTF8
|
||||
register int i, j;
|
||||
for (i = 0; i < _pcre_utf8_table1_size; i++)
|
||||
if (cvalue <= _pcre_utf8_table1[i]) break;
|
||||
@@ -73,6 +77,9 @@ for (j = i; j > 0; j--)
|
||||
}
|
||||
*buffer = _pcre_utf8_table2[i] | cvalue;
|
||||
return i + 1;
|
||||
#else
|
||||
return 0; /* Keep compiler happy; this function won't ever be */
|
||||
#endif /* called when SUPPORT_UTF8 is not defined. */
|
||||
}
|
||||
|
||||
/* End of pcreoutf.c */
|
||||
/* End of pcre_ord2utf8.c */
|
||||
|
||||
@@ -6,7 +6,7 @@
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Copyright (c) 1997-2005 University of Cambridge
|
||||
Copyright (c) 1997-2008 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
@@ -38,17 +38,30 @@ POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
|
||||
/* This module contains an PCRE private debugging function for printing out the
|
||||
/* This module contains a PCRE private debugging function for printing out the
|
||||
internal form of a compiled regular expression, along with some supporting
|
||||
local functions. */
|
||||
local functions. This source file is used in two places:
|
||||
|
||||
(1) It is #included by pcre_compile.c when it is compiled in debugging mode
|
||||
(DEBUG defined in pcre_internal.h). It is not included in production compiles.
|
||||
|
||||
(2) It is always #included by pcretest.c, which can be asked to print out a
|
||||
compiled regex for debugging purposes. */
|
||||
|
||||
|
||||
#include "pcreinal.h"
|
||||
/* Macro that decides whether a character should be output as a literal or in
|
||||
hexadecimal. We don't use isprint() because that can vary from system to system
|
||||
(even without the use of locales) and we want the output always to be the same,
|
||||
for testing purposes. This macro is used in pcretest as well as in this file. */
|
||||
|
||||
#define PRINTABLE(c) ((c) >= 32 && (c) < 127)
|
||||
|
||||
/* The table of operator names. */
|
||||
|
||||
static const char *OP_names[] = { OP_NAME_LIST };
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Print single- or multi-byte character *
|
||||
*************************************************/
|
||||
@@ -58,9 +71,15 @@ print_char(FILE *f, uschar *ptr, BOOL utf8)
|
||||
{
|
||||
int c = *ptr;
|
||||
|
||||
#ifndef SUPPORT_UTF8
|
||||
utf8 = utf8; /* Avoid compiler warning */
|
||||
if (PRINTABLE(c)) fprintf(f, "%c", c); else fprintf(f, "\\x%02x", c);
|
||||
return 0;
|
||||
|
||||
#else
|
||||
if (!utf8 || (c & 0xc0) != 0xc0)
|
||||
{
|
||||
if (isprint(c)) fprintf(f, "%c", c); else fprintf(f, "\\x%02x", c);
|
||||
if (PRINTABLE(c)) fprintf(f, "%c", c); else fprintf(f, "\\x%02x", c);
|
||||
return 0;
|
||||
}
|
||||
else
|
||||
@@ -89,6 +108,7 @@ else
|
||||
if (c < 128) fprintf(f, "\\x%02x", c); else fprintf(f, "\\x{%x}", c);
|
||||
return a;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
@@ -98,17 +118,19 @@ else
|
||||
*************************************************/
|
||||
|
||||
static const char *
|
||||
get_ucpname(int property)
|
||||
get_ucpname(int ptype, int pvalue)
|
||||
{
|
||||
#ifdef SUPPORT_UCP
|
||||
int i;
|
||||
for (i = _pcre_utt_size; i >= 0; i--)
|
||||
for (i = _pcre_utt_size - 1; i >= 0; i--)
|
||||
{
|
||||
if (property == _pcre_utt[i].value) break;
|
||||
if (ptype == _pcre_utt[i].type && pvalue == _pcre_utt[i].value) break;
|
||||
}
|
||||
return (i >= 0)? _pcre_utt[i].name : "??";
|
||||
return (i >= 0)? _pcre_utt_names + _pcre_utt[i].name_offset : "??";
|
||||
#else
|
||||
return "??";
|
||||
/* It gets harder and harder to shut off unwanted compiler warnings. */
|
||||
ptype = ptype * pvalue;
|
||||
return (ptype == pvalue)? "??" : "??";
|
||||
#endif
|
||||
}
|
||||
|
||||
@@ -119,10 +141,13 @@ return "??";
|
||||
*************************************************/
|
||||
|
||||
/* Make this function work for a regex with integers either byte order.
|
||||
However, we assume that what we are passed is a compiled regex. */
|
||||
However, we assume that what we are passed is a compiled regex. The
|
||||
print_lengths flag controls whether offsets and lengths of items are printed.
|
||||
They can be turned off from pcretest so that automatic tests on bytecode can be
|
||||
written that do not depend on the value of LINK_SIZE. */
|
||||
|
||||
EXPORT void
|
||||
_pcre_printint(pcre *external_re, FILE *f)
|
||||
static void
|
||||
pcre_printint(pcre *external_re, FILE *f, BOOL print_lengths)
|
||||
{
|
||||
real_pcre *re = (real_pcre *)external_re;
|
||||
uschar *codestart, *code;
|
||||
@@ -153,17 +178,10 @@ for(;;)
|
||||
int c;
|
||||
int extra = 0;
|
||||
|
||||
fprintf(f, "%3d ", (int)(code - codestart));
|
||||
|
||||
if (*code >= OP_BRA)
|
||||
{
|
||||
if (*code - OP_BRA > EXTRACT_BASIC_MAX)
|
||||
fprintf(f, "%3d Bra extra\n", GET(code, 1));
|
||||
else
|
||||
fprintf(f, "%3d Bra %d\n", GET(code, 1), *code - OP_BRA);
|
||||
code += _pcre_OP_lengths[OP_BRA];
|
||||
continue;
|
||||
}
|
||||
if (print_lengths)
|
||||
fprintf(f, "%3d ", (int)(code - codestart));
|
||||
else
|
||||
fprintf(f, " ");
|
||||
|
||||
switch(*code)
|
||||
{
|
||||
@@ -177,31 +195,36 @@ for(;;)
|
||||
break;
|
||||
|
||||
case OP_CHAR:
|
||||
fprintf(f, " ");
|
||||
do
|
||||
{
|
||||
fprintf(f, " ");
|
||||
do
|
||||
{
|
||||
code++;
|
||||
code += 1 + print_char(f, code, utf8);
|
||||
}
|
||||
while (*code == OP_CHAR);
|
||||
fprintf(f, "\n");
|
||||
continue;
|
||||
code++;
|
||||
code += 1 + print_char(f, code, utf8);
|
||||
}
|
||||
while (*code == OP_CHAR);
|
||||
fprintf(f, "\n");
|
||||
continue;
|
||||
|
||||
case OP_CHARNC:
|
||||
fprintf(f, " NC ");
|
||||
do
|
||||
{
|
||||
fprintf(f, " NC ");
|
||||
do
|
||||
{
|
||||
code++;
|
||||
code += 1 + print_char(f, code, utf8);
|
||||
}
|
||||
while (*code == OP_CHARNC);
|
||||
fprintf(f, "\n");
|
||||
continue;
|
||||
code++;
|
||||
code += 1 + print_char(f, code, utf8);
|
||||
}
|
||||
while (*code == OP_CHARNC);
|
||||
fprintf(f, "\n");
|
||||
continue;
|
||||
|
||||
case OP_CBRA:
|
||||
case OP_SCBRA:
|
||||
if (print_lengths) fprintf(f, "%3d ", GET(code, 1));
|
||||
else fprintf(f, " ");
|
||||
fprintf(f, "%s %d", OP_names[*code], GET2(code, 1+LINK_SIZE));
|
||||
break;
|
||||
|
||||
case OP_BRA:
|
||||
case OP_SBRA:
|
||||
case OP_KETRMAX:
|
||||
case OP_KETRMIN:
|
||||
case OP_ALT:
|
||||
@@ -212,41 +235,55 @@ for(;;)
|
||||
case OP_ASSERTBACK_NOT:
|
||||
case OP_ONCE:
|
||||
case OP_COND:
|
||||
case OP_SCOND:
|
||||
case OP_REVERSE:
|
||||
fprintf(f, "%3d %s", GET(code, 1), OP_names[*code]);
|
||||
break;
|
||||
|
||||
case OP_BRANUMBER:
|
||||
printf("%3d %s", GET2(code, 1), OP_names[*code]);
|
||||
if (print_lengths) fprintf(f, "%3d ", GET(code, 1));
|
||||
else fprintf(f, " ");
|
||||
fprintf(f, "%s", OP_names[*code]);
|
||||
break;
|
||||
|
||||
case OP_CREF:
|
||||
if (GET2(code, 1) == CREF_RECURSE)
|
||||
fprintf(f, " Cond recurse");
|
||||
fprintf(f, "%3d %s", GET2(code,1), OP_names[*code]);
|
||||
break;
|
||||
|
||||
case OP_RREF:
|
||||
c = GET2(code, 1);
|
||||
if (c == RREF_ANY)
|
||||
fprintf(f, " Cond recurse any");
|
||||
else
|
||||
fprintf(f, "%3d %s", GET2(code,1), OP_names[*code]);
|
||||
fprintf(f, " Cond recurse %d", c);
|
||||
break;
|
||||
|
||||
case OP_DEF:
|
||||
fprintf(f, " Cond def");
|
||||
break;
|
||||
|
||||
case OP_STAR:
|
||||
case OP_MINSTAR:
|
||||
case OP_POSSTAR:
|
||||
case OP_PLUS:
|
||||
case OP_MINPLUS:
|
||||
case OP_POSPLUS:
|
||||
case OP_QUERY:
|
||||
case OP_MINQUERY:
|
||||
case OP_POSQUERY:
|
||||
case OP_TYPESTAR:
|
||||
case OP_TYPEMINSTAR:
|
||||
case OP_TYPEPOSSTAR:
|
||||
case OP_TYPEPLUS:
|
||||
case OP_TYPEMINPLUS:
|
||||
case OP_TYPEPOSPLUS:
|
||||
case OP_TYPEQUERY:
|
||||
case OP_TYPEMINQUERY:
|
||||
case OP_TYPEPOSQUERY:
|
||||
fprintf(f, " ");
|
||||
if (*code >= OP_TYPESTAR)
|
||||
{
|
||||
fprintf(f, "%s", OP_names[code[1]]);
|
||||
if (code[1] == OP_PROP || code[1] == OP_NOTPROP)
|
||||
{
|
||||
fprintf(f, " %s ", get_ucpname(code[2]));
|
||||
extra = 1;
|
||||
fprintf(f, " %s ", get_ucpname(code[2], code[3]));
|
||||
extra = 2;
|
||||
}
|
||||
}
|
||||
else extra = print_char(f, code+1, utf8);
|
||||
@@ -256,41 +293,50 @@ for(;;)
|
||||
case OP_EXACT:
|
||||
case OP_UPTO:
|
||||
case OP_MINUPTO:
|
||||
case OP_POSUPTO:
|
||||
fprintf(f, " ");
|
||||
extra = print_char(f, code+3, utf8);
|
||||
fprintf(f, "{");
|
||||
if (*code != OP_EXACT) fprintf(f, ",");
|
||||
if (*code != OP_EXACT) fprintf(f, "0,");
|
||||
fprintf(f, "%d}", GET2(code,1));
|
||||
if (*code == OP_MINUPTO) fprintf(f, "?");
|
||||
else if (*code == OP_POSUPTO) fprintf(f, "+");
|
||||
break;
|
||||
|
||||
case OP_TYPEEXACT:
|
||||
case OP_TYPEUPTO:
|
||||
case OP_TYPEMINUPTO:
|
||||
case OP_TYPEPOSUPTO:
|
||||
fprintf(f, " %s", OP_names[code[3]]);
|
||||
if (code[3] == OP_PROP || code[3] == OP_NOTPROP)
|
||||
{
|
||||
fprintf(f, " %s ", get_ucpname(code[4]));
|
||||
extra = 1;
|
||||
fprintf(f, " %s ", get_ucpname(code[4], code[5]));
|
||||
extra = 2;
|
||||
}
|
||||
fprintf(f, "{");
|
||||
if (*code != OP_TYPEEXACT) fprintf(f, "0,");
|
||||
fprintf(f, "%d}", GET2(code,1));
|
||||
if (*code == OP_TYPEMINUPTO) fprintf(f, "?");
|
||||
else if (*code == OP_TYPEPOSUPTO) fprintf(f, "+");
|
||||
break;
|
||||
|
||||
case OP_NOT:
|
||||
if (isprint(c = code[1])) fprintf(f, " [^%c]", c);
|
||||
c = code[1];
|
||||
if (PRINTABLE(c)) fprintf(f, " [^%c]", c);
|
||||
else fprintf(f, " [^\\x%02x]", c);
|
||||
break;
|
||||
|
||||
case OP_NOTSTAR:
|
||||
case OP_NOTMINSTAR:
|
||||
case OP_NOTPOSSTAR:
|
||||
case OP_NOTPLUS:
|
||||
case OP_NOTMINPLUS:
|
||||
case OP_NOTPOSPLUS:
|
||||
case OP_NOTQUERY:
|
||||
case OP_NOTMINQUERY:
|
||||
if (isprint(c = code[1])) fprintf(f, " [^%c]", c);
|
||||
case OP_NOTPOSQUERY:
|
||||
c = code[1];
|
||||
if (PRINTABLE(c)) fprintf(f, " [^%c]", c);
|
||||
else fprintf(f, " [^\\x%02x]", c);
|
||||
fprintf(f, "%s", OP_names[*code]);
|
||||
break;
|
||||
@@ -298,15 +344,20 @@ for(;;)
|
||||
case OP_NOTEXACT:
|
||||
case OP_NOTUPTO:
|
||||
case OP_NOTMINUPTO:
|
||||
if (isprint(c = code[3])) fprintf(f, " [^%c]{", c);
|
||||
case OP_NOTPOSUPTO:
|
||||
c = code[3];
|
||||
if (PRINTABLE(c)) fprintf(f, " [^%c]{", c);
|
||||
else fprintf(f, " [^\\x%02x]{", c);
|
||||
if (*code != OP_NOTEXACT) fprintf(f, "0,");
|
||||
fprintf(f, "%d}", GET2(code,1));
|
||||
if (*code == OP_NOTMINUPTO) fprintf(f, "?");
|
||||
else if (*code == OP_NOTPOSUPTO) fprintf(f, "+");
|
||||
break;
|
||||
|
||||
case OP_RECURSE:
|
||||
fprintf(f, "%3d %s", GET(code, 1), OP_names[*code]);
|
||||
if (print_lengths) fprintf(f, "%3d ", GET(code, 1));
|
||||
else fprintf(f, " ");
|
||||
fprintf(f, "%s", OP_names[*code]);
|
||||
break;
|
||||
|
||||
case OP_REF:
|
||||
@@ -321,7 +372,7 @@ for(;;)
|
||||
|
||||
case OP_PROP:
|
||||
case OP_NOTPROP:
|
||||
fprintf(f, " %s %s", OP_names[*code], get_ucpname(code[1]));
|
||||
fprintf(f, " %s %s", OP_names[*code], get_ucpname(code[1], code[2]));
|
||||
break;
|
||||
|
||||
/* OP_XCLASS can only occur in UTF-8 mode. However, there's no harm in
|
||||
@@ -362,12 +413,14 @@ for(;;)
|
||||
for (j = i+1; j < 256; j++)
|
||||
if ((ccode[j/8] & (1 << (j&7))) == 0) break;
|
||||
if (i == '-' || i == ']') fprintf(f, "\\");
|
||||
if (isprint(i)) fprintf(f, "%c", i); else fprintf(f, "\\x%02x", i);
|
||||
if (PRINTABLE(i)) fprintf(f, "%c", i);
|
||||
else fprintf(f, "\\x%02x", i);
|
||||
if (--j > i)
|
||||
{
|
||||
if (j != i + 1) fprintf(f, "-");
|
||||
if (j == '-' || j == ']') fprintf(f, "\\");
|
||||
if (isprint(j)) fprintf(f, "%c", j); else fprintf(f, "\\x%02x", j);
|
||||
if (PRINTABLE(j)) fprintf(f, "%c", j);
|
||||
else fprintf(f, "\\x%02x", j);
|
||||
}
|
||||
i = j;
|
||||
}
|
||||
@@ -384,11 +437,15 @@ for(;;)
|
||||
{
|
||||
if (ch == XCL_PROP)
|
||||
{
|
||||
fprintf(f, "\\p{%s}", get_ucpname(*ccode++));
|
||||
int ptype = *ccode++;
|
||||
int pvalue = *ccode++;
|
||||
fprintf(f, "\\p{%s}", get_ucpname(ptype, pvalue));
|
||||
}
|
||||
else if (ch == XCL_NOTPROP)
|
||||
{
|
||||
fprintf(f, "\\P{%s}", get_ucpname(*ccode++));
|
||||
int ptype = *ccode++;
|
||||
int pvalue = *ccode++;
|
||||
fprintf(f, "\\P{%s}", get_ucpname(ptype, pvalue));
|
||||
}
|
||||
else
|
||||
{
|
||||
@@ -430,6 +487,12 @@ for(;;)
|
||||
if (*ccode == OP_CRMINRANGE) fprintf(f, "?");
|
||||
extra += _pcre_OP_lengths[*ccode];
|
||||
break;
|
||||
|
||||
/* Do nothing if it's not a repeat; this code stops picky compilers
|
||||
warning about the lack of a default code path. */
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
break;
|
||||
@@ -446,4 +509,4 @@ for(;;)
|
||||
}
|
||||
}
|
||||
|
||||
/* End of pcreprni.c */
|
||||
/* End of pcre_printint.src */
|
||||
@@ -6,7 +6,7 @@
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Copyright (c) 1997-2005 University of Cambridge
|
||||
Copyright (c) 1997-2008 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
@@ -43,6 +43,11 @@ auxiliary function that can be used to maintain a reference count in a compiled
|
||||
pattern data block. This might be helpful in applications where the block is
|
||||
shared by different users. */
|
||||
|
||||
|
||||
#if 1
|
||||
#include "_hbconf.h"
|
||||
#endif
|
||||
|
||||
#include "pcreinal.h"
|
||||
|
||||
|
||||
@@ -63,7 +68,7 @@ Returns: the (possibly updated) count value (a non-negative number), or
|
||||
a negative error number
|
||||
*/
|
||||
|
||||
EXPORT int
|
||||
PCRE_EXP_DEFN int
|
||||
pcre_refcount(pcre *argument_re, int adjust)
|
||||
{
|
||||
real_pcre *re = (real_pcre *)argument_re;
|
||||
@@ -74,4 +79,4 @@ re->ref_count = (-adjust > re->ref_count)? 0 :
|
||||
return re->ref_count;
|
||||
}
|
||||
|
||||
/* End of pcrerefc.c */
|
||||
/* End of pcre_refcount.c */
|
||||
|
||||
@@ -6,7 +6,7 @@
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Copyright (c) 1997-2005 University of Cambridge
|
||||
Copyright (c) 1997-2008 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
@@ -42,9 +42,18 @@ POSSIBILITY OF SUCH DAMAGE.
|
||||
supporting functions. */
|
||||
|
||||
|
||||
#if 1
|
||||
#include "_hbconf.h"
|
||||
#endif
|
||||
|
||||
#include "pcreinal.h"
|
||||
|
||||
|
||||
/* Returns from set_start_bits() */
|
||||
|
||||
enum { SSB_FAIL, SSB_DONE, SSB_CONTINUE };
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Set a bit and maybe its alternate case *
|
||||
*************************************************/
|
||||
@@ -72,12 +81,16 @@ if (caseless && (cd->ctypes[c] & ctype_letter) != 0)
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Create bitmap of starting chars *
|
||||
* Create bitmap of starting bytes *
|
||||
*************************************************/
|
||||
|
||||
/* This function scans a compiled unanchored expression and attempts to build a
|
||||
bitmap of the set of initial characters. If it can't, it returns FALSE. As time
|
||||
goes by, we may be able to get more clever at doing this.
|
||||
/* This function scans a compiled unanchored expression recursively and
|
||||
attempts to build a bitmap of the set of possible starting bytes. As time goes
|
||||
by, we may be able to get more clever at doing this. The SSB_CONTINUE return is
|
||||
useful for parenthesized groups in patterns such as (a*)b where the group
|
||||
provides some optional starting bytes but scanning must continue at the outer
|
||||
level to find at least one mandatory byte. At the outermost level, this
|
||||
function fails unless the result is SSB_DONE.
|
||||
|
||||
Arguments:
|
||||
code points to an expression
|
||||
@@ -86,14 +99,24 @@ Arguments:
|
||||
utf8 TRUE if in UTF-8 mode
|
||||
cd the block with char table pointers
|
||||
|
||||
Returns: TRUE if table built, FALSE otherwise
|
||||
Returns: SSB_FAIL => Failed to find any starting bytes
|
||||
SSB_DONE => Found mandatory starting bytes
|
||||
SSB_CONTINUE => Found optional starting bytes
|
||||
*/
|
||||
|
||||
static BOOL
|
||||
static int
|
||||
set_start_bits(const uschar *code, uschar *start_bits, BOOL caseless,
|
||||
BOOL utf8, compile_data *cd)
|
||||
{
|
||||
register int c;
|
||||
int yield = SSB_DONE;
|
||||
|
||||
#if 0
|
||||
/* ========================================================================= */
|
||||
/* The following comment and code was inserted in January 1999. In May 2006,
|
||||
when it was observed to cause compiler warnings about unused values, I took it
|
||||
out again. If anybody is still using OS/2, they will have to put it back
|
||||
manually. */
|
||||
|
||||
/* This next statement and the later reference to dummy are here in order to
|
||||
trick the optimizer of the IBM C compiler for OS/2 into generating correct
|
||||
@@ -102,28 +125,60 @@ disable optimization (in this module it actually makes a big difference, and
|
||||
the pcre module can use all the optimization it can get). */
|
||||
|
||||
volatile int dummy;
|
||||
/* ========================================================================= */
|
||||
#endif
|
||||
|
||||
do
|
||||
{
|
||||
const uschar *tcode = code + 1 + LINK_SIZE;
|
||||
const uschar *tcode = code + (((int)*code == OP_CBRA)? 3:1) + LINK_SIZE;
|
||||
BOOL try_next = TRUE;
|
||||
|
||||
while (try_next)
|
||||
while (try_next) /* Loop for items in this branch */
|
||||
{
|
||||
/* If a branch starts with a bracket or a positive lookahead assertion,
|
||||
recurse to set bits from within them. That's all for this branch. */
|
||||
|
||||
if ((int)*tcode >= OP_BRA || *tcode == OP_ASSERT)
|
||||
int rc;
|
||||
switch(*tcode)
|
||||
{
|
||||
if (!set_start_bits(tcode, start_bits, caseless, utf8, cd))
|
||||
return FALSE;
|
||||
try_next = FALSE;
|
||||
}
|
||||
/* Fail if we reach something we don't understand */
|
||||
|
||||
else switch(*tcode)
|
||||
{
|
||||
default:
|
||||
return FALSE;
|
||||
return SSB_FAIL;
|
||||
|
||||
/* If we hit a bracket or a positive lookahead assertion, recurse to set
|
||||
bits from within the subpattern. If it can't find anything, we have to
|
||||
give up. If it finds some mandatory character(s), we are done for this
|
||||
branch. Otherwise, carry on scanning after the subpattern. */
|
||||
|
||||
case OP_BRA:
|
||||
case OP_SBRA:
|
||||
case OP_CBRA:
|
||||
case OP_SCBRA:
|
||||
case OP_ONCE:
|
||||
case OP_ASSERT:
|
||||
rc = set_start_bits(tcode, start_bits, caseless, utf8, cd);
|
||||
if (rc == SSB_FAIL) return SSB_FAIL;
|
||||
if (rc == SSB_DONE) try_next = FALSE; else
|
||||
{
|
||||
do tcode += GET(tcode, 1); while (*tcode == OP_ALT);
|
||||
tcode += 1 + LINK_SIZE;
|
||||
}
|
||||
break;
|
||||
|
||||
/* If we hit ALT or KET, it means we haven't found anything mandatory in
|
||||
this branch, though we might have found something optional. For ALT, we
|
||||
continue with the next alternative, but we have to arrange that the final
|
||||
result from subpattern is SSB_CONTINUE rather than SSB_DONE. For KET,
|
||||
return SSB_CONTINUE: if this is the top level, that indicates failure,
|
||||
but after a nested subpattern, it causes scanning to continue. */
|
||||
|
||||
case OP_ALT:
|
||||
yield = SSB_CONTINUE;
|
||||
try_next = FALSE;
|
||||
break;
|
||||
|
||||
case OP_KET:
|
||||
case OP_KETRMAX:
|
||||
case OP_KETRMIN:
|
||||
return SSB_CONTINUE;
|
||||
|
||||
/* Skip over callout */
|
||||
|
||||
@@ -131,19 +186,13 @@ do
|
||||
tcode += 2 + 2*LINK_SIZE;
|
||||
break;
|
||||
|
||||
/* Skip over extended extraction bracket number */
|
||||
|
||||
case OP_BRANUMBER:
|
||||
tcode += 3;
|
||||
break;
|
||||
|
||||
/* Skip over lookbehind and negative lookahead assertions */
|
||||
|
||||
case OP_ASSERT_NOT:
|
||||
case OP_ASSERTBACK:
|
||||
case OP_ASSERTBACK_NOT:
|
||||
do tcode += GET(tcode, 1); while (*tcode == OP_ALT);
|
||||
tcode += 1+LINK_SIZE;
|
||||
tcode += 1 + LINK_SIZE;
|
||||
break;
|
||||
|
||||
/* Skip over an option setting, changing the caseless flag */
|
||||
@@ -157,23 +206,37 @@ do
|
||||
|
||||
case OP_BRAZERO:
|
||||
case OP_BRAMINZERO:
|
||||
if (!set_start_bits(++tcode, start_bits, caseless, utf8, cd))
|
||||
return FALSE;
|
||||
dummy = 1; (void)dummy;
|
||||
if (set_start_bits(++tcode, start_bits, caseless, utf8, cd) == SSB_FAIL)
|
||||
return SSB_FAIL;
|
||||
/* =========================================================================
|
||||
See the comment at the head of this function concerning the next line,
|
||||
which was an old fudge for the benefit of OS/2.
|
||||
dummy = 1;
|
||||
========================================================================= */
|
||||
do tcode += GET(tcode,1); while (*tcode == OP_ALT);
|
||||
tcode += 1+LINK_SIZE;
|
||||
tcode += 1 + LINK_SIZE;
|
||||
break;
|
||||
|
||||
/* SKIPZERO skips the bracket. */
|
||||
|
||||
case OP_SKIPZERO:
|
||||
do tcode += GET(tcode,1); while (*tcode == OP_ALT);
|
||||
tcode += 1 + LINK_SIZE;
|
||||
break;
|
||||
|
||||
/* Single-char * or ? sets the bit and tries the next item */
|
||||
|
||||
case OP_STAR:
|
||||
case OP_MINSTAR:
|
||||
case OP_POSSTAR:
|
||||
case OP_QUERY:
|
||||
case OP_MINQUERY:
|
||||
case OP_POSQUERY:
|
||||
set_bit(start_bits, tcode[1], caseless, cd);
|
||||
tcode += 2;
|
||||
#ifdef SUPPORT_UTF8
|
||||
if (utf8) while ((*tcode & 0xc0) == 0x80) tcode++;
|
||||
if (utf8 && tcode[-1] >= 0xc0)
|
||||
tcode += _pcre_utf8_table4[tcode[-1] & 0x3f];
|
||||
#endif
|
||||
break;
|
||||
|
||||
@@ -181,10 +244,12 @@ do
|
||||
|
||||
case OP_UPTO:
|
||||
case OP_MINUPTO:
|
||||
case OP_POSUPTO:
|
||||
set_bit(start_bits, tcode[3], caseless, cd);
|
||||
tcode += 4;
|
||||
#ifdef SUPPORT_UTF8
|
||||
if (utf8) while ((*tcode & 0xc0) == 0x80) tcode++;
|
||||
if (utf8 && tcode[-1] >= 0xc0)
|
||||
tcode += _pcre_utf8_table4[tcode[-1] & 0x3f];
|
||||
#endif
|
||||
break;
|
||||
|
||||
@@ -197,6 +262,7 @@ do
|
||||
case OP_CHARNC:
|
||||
case OP_PLUS:
|
||||
case OP_MINPLUS:
|
||||
case OP_POSPLUS:
|
||||
set_bit(start_bits, tcode[1], caseless, cd);
|
||||
try_next = FALSE;
|
||||
break;
|
||||
@@ -215,15 +281,29 @@ do
|
||||
try_next = FALSE;
|
||||
break;
|
||||
|
||||
/* The cbit_space table has vertical tab as whitespace; we have to
|
||||
discard it. */
|
||||
|
||||
case OP_NOT_WHITESPACE:
|
||||
for (c = 0; c < 32; c++)
|
||||
start_bits[c] |= ~cd->cbits[c+cbit_space];
|
||||
{
|
||||
int d = cd->cbits[c+cbit_space];
|
||||
if (c == 1) d &= ~0x08;
|
||||
start_bits[c] |= ~d;
|
||||
}
|
||||
try_next = FALSE;
|
||||
break;
|
||||
|
||||
/* The cbit_space table has vertical tab as whitespace; we have to
|
||||
discard it. */
|
||||
|
||||
case OP_WHITESPACE:
|
||||
for (c = 0; c < 32; c++)
|
||||
start_bits[c] |= cd->cbits[c+cbit_space];
|
||||
{
|
||||
int d = cd->cbits[c+cbit_space];
|
||||
if (c == 1) d &= ~0x08;
|
||||
start_bits[c] |= d;
|
||||
}
|
||||
try_next = FALSE;
|
||||
break;
|
||||
|
||||
@@ -256,16 +336,20 @@ do
|
||||
|
||||
case OP_TYPEUPTO:
|
||||
case OP_TYPEMINUPTO:
|
||||
case OP_TYPEPOSUPTO:
|
||||
tcode += 2; /* Fall through */
|
||||
|
||||
case OP_TYPESTAR:
|
||||
case OP_TYPEMINSTAR:
|
||||
case OP_TYPEPOSSTAR:
|
||||
case OP_TYPEQUERY:
|
||||
case OP_TYPEMINQUERY:
|
||||
case OP_TYPEPOSQUERY:
|
||||
switch(tcode[1])
|
||||
{
|
||||
case OP_ANY:
|
||||
return FALSE;
|
||||
case OP_ALLANY:
|
||||
return SSB_FAIL;
|
||||
|
||||
case OP_NOT_DIGIT:
|
||||
for (c = 0; c < 32; c++)
|
||||
@@ -277,14 +361,28 @@ do
|
||||
start_bits[c] |= cd->cbits[c+cbit_digit];
|
||||
break;
|
||||
|
||||
/* The cbit_space table has vertical tab as whitespace; we have to
|
||||
discard it. */
|
||||
|
||||
case OP_NOT_WHITESPACE:
|
||||
for (c = 0; c < 32; c++)
|
||||
start_bits[c] |= ~cd->cbits[c+cbit_space];
|
||||
{
|
||||
int d = cd->cbits[c+cbit_space];
|
||||
if (c == 1) d &= ~0x08;
|
||||
start_bits[c] |= ~d;
|
||||
}
|
||||
break;
|
||||
|
||||
/* The cbit_space table has vertical tab as whitespace; we have to
|
||||
discard it. */
|
||||
|
||||
case OP_WHITESPACE:
|
||||
for (c = 0; c < 32; c++)
|
||||
start_bits[c] |= cd->cbits[c+cbit_space];
|
||||
{
|
||||
int d = cd->cbits[c+cbit_space];
|
||||
if (c == 1) d &= ~0x08;
|
||||
start_bits[c] |= d;
|
||||
}
|
||||
break;
|
||||
|
||||
case OP_NOT_WORDCHAR:
|
||||
@@ -308,11 +406,13 @@ do
|
||||
character with a value > 255. */
|
||||
|
||||
case OP_NCLASS:
|
||||
#ifdef SUPPORT_UTF8
|
||||
if (utf8)
|
||||
{
|
||||
start_bits[24] |= 0xf0; /* Bits for 0xc4 - 0xc8 */
|
||||
memset(start_bits+25, 0xff, 7); /* Bits for 0xc9 - 0xff */
|
||||
}
|
||||
#endif
|
||||
/* Fall through */
|
||||
|
||||
case OP_CLASS:
|
||||
@@ -325,6 +425,7 @@ do
|
||||
value is > 127. In fact, there are only two possible starting bytes for
|
||||
characters in the range 128 - 255. */
|
||||
|
||||
#ifdef SUPPORT_UTF8
|
||||
if (utf8)
|
||||
{
|
||||
for (c = 0; c < 16; c++) start_bits[c] |= tcode[c];
|
||||
@@ -342,6 +443,7 @@ do
|
||||
/* In non-UTF-8 mode, the two bit maps are completely compatible. */
|
||||
|
||||
else
|
||||
#endif
|
||||
{
|
||||
for (c = 0; c < 32; c++) start_bits[c] |= tcode[c];
|
||||
}
|
||||
@@ -377,7 +479,7 @@ do
|
||||
code += GET(code, 1); /* Advance to next branch */
|
||||
}
|
||||
while (*code == OP_ALT);
|
||||
return TRUE;
|
||||
return yield;
|
||||
}
|
||||
|
||||
|
||||
@@ -401,17 +503,16 @@ Returns: pointer to a pcre_extra block, with study_data filled in and the
|
||||
NULL on error or if no optimization possible
|
||||
*/
|
||||
|
||||
EXPORT pcre_extra *
|
||||
PCRE_EXP_DEFN pcre_extra *
|
||||
pcre_study(const pcre *external_re, int options, const char **errorptr)
|
||||
{
|
||||
uschar start_bits[32];
|
||||
pcre_extra *extra;
|
||||
pcre_study_data *study;
|
||||
const uschar *tables;
|
||||
const real_pcre *re = (const real_pcre *)external_re;
|
||||
uschar *code = (uschar *)re + re->name_table_offset +
|
||||
(re->name_count * re->name_entry_size);
|
||||
uschar *code;
|
||||
compile_data compile_block;
|
||||
const real_pcre *re = (const real_pcre *)external_re;
|
||||
|
||||
*errorptr = NULL;
|
||||
|
||||
@@ -427,11 +528,15 @@ if ((options & ~PUBLIC_STUDY_OPTIONS) != 0)
|
||||
return NULL;
|
||||
}
|
||||
|
||||
code = (uschar *)re + re->name_table_offset +
|
||||
(re->name_count * re->name_entry_size);
|
||||
|
||||
/* For an anchored pattern, or an unanchored pattern that has a first char, or
|
||||
a multiline pattern that matches only at "line starts", no further processing
|
||||
at present. */
|
||||
|
||||
if ((re->options & (PCRE_ANCHORED|PCRE_FIRSTSET|PCRE_STARTLINE)) != 0)
|
||||
if ((re->options & PCRE_ANCHORED) != 0 ||
|
||||
(re->flags & (PCRE_FIRSTSET|PCRE_STARTLINE)) != 0)
|
||||
return NULL;
|
||||
|
||||
/* Set the character tables in the block that is passed around */
|
||||
@@ -449,8 +554,8 @@ compile_block.ctypes = tables + ctypes_offset;
|
||||
/* See if we can find a fixed set of initial characters for the pattern. */
|
||||
|
||||
memset(start_bits, 0, 32 * sizeof(uschar));
|
||||
if (!set_start_bits(code, start_bits, (re->options & PCRE_CASELESS) != 0,
|
||||
(re->options & PCRE_UTF8) != 0, &compile_block)) return NULL;
|
||||
if (set_start_bits(code, start_bits, (re->options & PCRE_CASELESS) != 0,
|
||||
(re->options & PCRE_UTF8) != 0, &compile_block) != SSB_DONE) return NULL;
|
||||
|
||||
/* Get a pcre_extra block and a pcre_study_data block. The study data is put in
|
||||
the latter, which is pointed to by the former, which may also get additional
|
||||
@@ -479,4 +584,4 @@ memcpy(study->start_bits, start_bits, sizeof(start_bits));
|
||||
return extra;
|
||||
}
|
||||
|
||||
/* End of pcrestud.c */
|
||||
/* End of pcre_study.c */
|
||||
|
||||
@@ -6,7 +6,7 @@
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Copyright (c) 1997-2005 University of Cambridge
|
||||
Copyright (c) 1997-2008 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
@@ -39,14 +39,20 @@ POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
|
||||
/* This module contains some fixed tables that are used by more than one of the
|
||||
PCRE code modules. */
|
||||
PCRE code modules. The tables are also #included by the pcretest program, which
|
||||
uses macros to change their names from _pcre_xxx to xxxx, thereby avoiding name
|
||||
clashes with the library. */
|
||||
|
||||
|
||||
#if 1
|
||||
#include "_hbconf.h"
|
||||
#endif
|
||||
|
||||
#include "pcreinal.h"
|
||||
|
||||
|
||||
/* Table of sizes for the fixed-length opcodes. It's defined in a macro so that
|
||||
the definition is next to the definition of the opcodes in internal.h. */
|
||||
the definition is next to the definition of the opcodes in pcre_internal.h. */
|
||||
|
||||
const uschar _pcre_OP_lengths[] = { OP_LENGTHS };
|
||||
|
||||
@@ -59,6 +65,8 @@ const uschar _pcre_OP_lengths[] = { OP_LENGTHS };
|
||||
/* These are the breakpoints for different numbers of bytes in a UTF-8
|
||||
character. */
|
||||
|
||||
#ifdef SUPPORT_UTF8
|
||||
|
||||
const int _pcre_utf8_table1[] =
|
||||
{ 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff};
|
||||
|
||||
@@ -70,9 +78,8 @@ first byte of a character, indexed by the number of additional bytes. */
|
||||
const int _pcre_utf8_table2[] = { 0, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc};
|
||||
const int _pcre_utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
|
||||
|
||||
/* Table of the number of extra characters, indexed by the first character
|
||||
masked with 0x3f. The highest number for a valid UTF-8 character is in fact
|
||||
0x3d. */
|
||||
/* Table of the number of extra bytes, indexed by the first byte masked with
|
||||
0x3f. The highest number for a valid UTF-8 first byte is in fact 0x3d. */
|
||||
|
||||
const uschar _pcre_utf8_table4[] = {
|
||||
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
|
||||
@@ -80,50 +87,232 @@ const uschar _pcre_utf8_table4[] = {
|
||||
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
|
||||
3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
|
||||
|
||||
/* This table translates Unicode property names into code values for the
|
||||
ucp_findchar() function. It is used by pcretest as well as by the library
|
||||
functions. */
|
||||
/* The pcre_utt[] table below translates Unicode property names into type and
|
||||
code values. It is searched by binary chop, so must be in collating sequence of
|
||||
name. Originally, the table contained pointers to the name strings in the first
|
||||
field of each entry. However, that leads to a large number of relocations when
|
||||
a shared library is dynamically loaded. A significant reduction is made by
|
||||
putting all the names into a single, large string and then using offsets in the
|
||||
table itself. Maintenance is more error-prone, but frequent changes to this
|
||||
data is unlikely. */
|
||||
|
||||
const char _pcre_utt_names[] =
|
||||
"Any\0"
|
||||
"Arabic\0"
|
||||
"Armenian\0"
|
||||
"Balinese\0"
|
||||
"Bengali\0"
|
||||
"Bopomofo\0"
|
||||
"Braille\0"
|
||||
"Buginese\0"
|
||||
"Buhid\0"
|
||||
"C\0"
|
||||
"Canadian_Aboriginal\0"
|
||||
"Cc\0"
|
||||
"Cf\0"
|
||||
"Cherokee\0"
|
||||
"Cn\0"
|
||||
"Co\0"
|
||||
"Common\0"
|
||||
"Coptic\0"
|
||||
"Cs\0"
|
||||
"Cuneiform\0"
|
||||
"Cypriot\0"
|
||||
"Cyrillic\0"
|
||||
"Deseret\0"
|
||||
"Devanagari\0"
|
||||
"Ethiopic\0"
|
||||
"Georgian\0"
|
||||
"Glagolitic\0"
|
||||
"Gothic\0"
|
||||
"Greek\0"
|
||||
"Gujarati\0"
|
||||
"Gurmukhi\0"
|
||||
"Han\0"
|
||||
"Hangul\0"
|
||||
"Hanunoo\0"
|
||||
"Hebrew\0"
|
||||
"Hiragana\0"
|
||||
"Inherited\0"
|
||||
"Kannada\0"
|
||||
"Katakana\0"
|
||||
"Kharoshthi\0"
|
||||
"Khmer\0"
|
||||
"L\0"
|
||||
"L&\0"
|
||||
"Lao\0"
|
||||
"Latin\0"
|
||||
"Limbu\0"
|
||||
"Linear_B\0"
|
||||
"Ll\0"
|
||||
"Lm\0"
|
||||
"Lo\0"
|
||||
"Lt\0"
|
||||
"Lu\0"
|
||||
"M\0"
|
||||
"Malayalam\0"
|
||||
"Mc\0"
|
||||
"Me\0"
|
||||
"Mn\0"
|
||||
"Mongolian\0"
|
||||
"Myanmar\0"
|
||||
"N\0"
|
||||
"Nd\0"
|
||||
"New_Tai_Lue\0"
|
||||
"Nko\0"
|
||||
"Nl\0"
|
||||
"No\0"
|
||||
"Ogham\0"
|
||||
"Old_Italic\0"
|
||||
"Old_Persian\0"
|
||||
"Oriya\0"
|
||||
"Osmanya\0"
|
||||
"P\0"
|
||||
"Pc\0"
|
||||
"Pd\0"
|
||||
"Pe\0"
|
||||
"Pf\0"
|
||||
"Phags_Pa\0"
|
||||
"Phoenician\0"
|
||||
"Pi\0"
|
||||
"Po\0"
|
||||
"Ps\0"
|
||||
"Runic\0"
|
||||
"S\0"
|
||||
"Sc\0"
|
||||
"Shavian\0"
|
||||
"Sinhala\0"
|
||||
"Sk\0"
|
||||
"Sm\0"
|
||||
"So\0"
|
||||
"Syloti_Nagri\0"
|
||||
"Syriac\0"
|
||||
"Tagalog\0"
|
||||
"Tagbanwa\0"
|
||||
"Tai_Le\0"
|
||||
"Tamil\0"
|
||||
"Telugu\0"
|
||||
"Thaana\0"
|
||||
"Thai\0"
|
||||
"Tibetan\0"
|
||||
"Tifinagh\0"
|
||||
"Ugaritic\0"
|
||||
"Yi\0"
|
||||
"Z\0"
|
||||
"Zl\0"
|
||||
"Zp\0"
|
||||
"Zs\0";
|
||||
|
||||
const ucp_type_table _pcre_utt[] = {
|
||||
{ "C", 128 + ucp_C },
|
||||
{ "Cc", ucp_Cc },
|
||||
{ "Cf", ucp_Cf },
|
||||
{ "Cn", ucp_Cn },
|
||||
{ "Co", ucp_Co },
|
||||
{ "Cs", ucp_Cs },
|
||||
{ "L", 128 + ucp_L },
|
||||
{ "Ll", ucp_Ll },
|
||||
{ "Lm", ucp_Lm },
|
||||
{ "Lo", ucp_Lo },
|
||||
{ "Lt", ucp_Lt },
|
||||
{ "Lu", ucp_Lu },
|
||||
{ "M", 128 + ucp_M },
|
||||
{ "Mc", ucp_Mc },
|
||||
{ "Me", ucp_Me },
|
||||
{ "Mn", ucp_Mn },
|
||||
{ "N", 128 + ucp_N },
|
||||
{ "Nd", ucp_Nd },
|
||||
{ "Nl", ucp_Nl },
|
||||
{ "No", ucp_No },
|
||||
{ "P", 128 + ucp_P },
|
||||
{ "Pc", ucp_Pc },
|
||||
{ "Pd", ucp_Pd },
|
||||
{ "Pe", ucp_Pe },
|
||||
{ "Pf", ucp_Pf },
|
||||
{ "Pi", ucp_Pi },
|
||||
{ "Po", ucp_Po },
|
||||
{ "Ps", ucp_Ps },
|
||||
{ "S", 128 + ucp_S },
|
||||
{ "Sc", ucp_Sc },
|
||||
{ "Sk", ucp_Sk },
|
||||
{ "Sm", ucp_Sm },
|
||||
{ "So", ucp_So },
|
||||
{ "Z", 128 + ucp_Z },
|
||||
{ "Zl", ucp_Zl },
|
||||
{ "Zp", ucp_Zp },
|
||||
{ "Zs", ucp_Zs }
|
||||
{ 0, PT_ANY, 0 },
|
||||
{ 4, PT_SC, ucp_Arabic },
|
||||
{ 11, PT_SC, ucp_Armenian },
|
||||
{ 20, PT_SC, ucp_Balinese },
|
||||
{ 29, PT_SC, ucp_Bengali },
|
||||
{ 37, PT_SC, ucp_Bopomofo },
|
||||
{ 46, PT_SC, ucp_Braille },
|
||||
{ 54, PT_SC, ucp_Buginese },
|
||||
{ 63, PT_SC, ucp_Buhid },
|
||||
{ 69, PT_GC, ucp_C },
|
||||
{ 71, PT_SC, ucp_Canadian_Aboriginal },
|
||||
{ 91, PT_PC, ucp_Cc },
|
||||
{ 94, PT_PC, ucp_Cf },
|
||||
{ 97, PT_SC, ucp_Cherokee },
|
||||
{ 106, PT_PC, ucp_Cn },
|
||||
{ 109, PT_PC, ucp_Co },
|
||||
{ 112, PT_SC, ucp_Common },
|
||||
{ 119, PT_SC, ucp_Coptic },
|
||||
{ 126, PT_PC, ucp_Cs },
|
||||
{ 129, PT_SC, ucp_Cuneiform },
|
||||
{ 139, PT_SC, ucp_Cypriot },
|
||||
{ 147, PT_SC, ucp_Cyrillic },
|
||||
{ 156, PT_SC, ucp_Deseret },
|
||||
{ 164, PT_SC, ucp_Devanagari },
|
||||
{ 175, PT_SC, ucp_Ethiopic },
|
||||
{ 184, PT_SC, ucp_Georgian },
|
||||
{ 193, PT_SC, ucp_Glagolitic },
|
||||
{ 204, PT_SC, ucp_Gothic },
|
||||
{ 211, PT_SC, ucp_Greek },
|
||||
{ 217, PT_SC, ucp_Gujarati },
|
||||
{ 226, PT_SC, ucp_Gurmukhi },
|
||||
{ 235, PT_SC, ucp_Han },
|
||||
{ 239, PT_SC, ucp_Hangul },
|
||||
{ 246, PT_SC, ucp_Hanunoo },
|
||||
{ 254, PT_SC, ucp_Hebrew },
|
||||
{ 261, PT_SC, ucp_Hiragana },
|
||||
{ 270, PT_SC, ucp_Inherited },
|
||||
{ 280, PT_SC, ucp_Kannada },
|
||||
{ 288, PT_SC, ucp_Katakana },
|
||||
{ 297, PT_SC, ucp_Kharoshthi },
|
||||
{ 308, PT_SC, ucp_Khmer },
|
||||
{ 314, PT_GC, ucp_L },
|
||||
{ 316, PT_LAMP, 0 },
|
||||
{ 319, PT_SC, ucp_Lao },
|
||||
{ 323, PT_SC, ucp_Latin },
|
||||
{ 329, PT_SC, ucp_Limbu },
|
||||
{ 335, PT_SC, ucp_Linear_B },
|
||||
{ 344, PT_PC, ucp_Ll },
|
||||
{ 347, PT_PC, ucp_Lm },
|
||||
{ 350, PT_PC, ucp_Lo },
|
||||
{ 353, PT_PC, ucp_Lt },
|
||||
{ 356, PT_PC, ucp_Lu },
|
||||
{ 359, PT_GC, ucp_M },
|
||||
{ 361, PT_SC, ucp_Malayalam },
|
||||
{ 371, PT_PC, ucp_Mc },
|
||||
{ 374, PT_PC, ucp_Me },
|
||||
{ 377, PT_PC, ucp_Mn },
|
||||
{ 380, PT_SC, ucp_Mongolian },
|
||||
{ 390, PT_SC, ucp_Myanmar },
|
||||
{ 398, PT_GC, ucp_N },
|
||||
{ 400, PT_PC, ucp_Nd },
|
||||
{ 403, PT_SC, ucp_New_Tai_Lue },
|
||||
{ 415, PT_SC, ucp_Nko },
|
||||
{ 419, PT_PC, ucp_Nl },
|
||||
{ 422, PT_PC, ucp_No },
|
||||
{ 425, PT_SC, ucp_Ogham },
|
||||
{ 431, PT_SC, ucp_Old_Italic },
|
||||
{ 442, PT_SC, ucp_Old_Persian },
|
||||
{ 454, PT_SC, ucp_Oriya },
|
||||
{ 460, PT_SC, ucp_Osmanya },
|
||||
{ 468, PT_GC, ucp_P },
|
||||
{ 470, PT_PC, ucp_Pc },
|
||||
{ 473, PT_PC, ucp_Pd },
|
||||
{ 476, PT_PC, ucp_Pe },
|
||||
{ 479, PT_PC, ucp_Pf },
|
||||
{ 482, PT_SC, ucp_Phags_Pa },
|
||||
{ 491, PT_SC, ucp_Phoenician },
|
||||
{ 502, PT_PC, ucp_Pi },
|
||||
{ 505, PT_PC, ucp_Po },
|
||||
{ 508, PT_PC, ucp_Ps },
|
||||
{ 511, PT_SC, ucp_Runic },
|
||||
{ 517, PT_GC, ucp_S },
|
||||
{ 519, PT_PC, ucp_Sc },
|
||||
{ 522, PT_SC, ucp_Shavian },
|
||||
{ 530, PT_SC, ucp_Sinhala },
|
||||
{ 538, PT_PC, ucp_Sk },
|
||||
{ 541, PT_PC, ucp_Sm },
|
||||
{ 544, PT_PC, ucp_So },
|
||||
{ 547, PT_SC, ucp_Syloti_Nagri },
|
||||
{ 560, PT_SC, ucp_Syriac },
|
||||
{ 567, PT_SC, ucp_Tagalog },
|
||||
{ 575, PT_SC, ucp_Tagbanwa },
|
||||
{ 584, PT_SC, ucp_Tai_Le },
|
||||
{ 591, PT_SC, ucp_Tamil },
|
||||
{ 597, PT_SC, ucp_Telugu },
|
||||
{ 604, PT_SC, ucp_Thaana },
|
||||
{ 611, PT_SC, ucp_Thai },
|
||||
{ 616, PT_SC, ucp_Tibetan },
|
||||
{ 624, PT_SC, ucp_Tifinagh },
|
||||
{ 633, PT_SC, ucp_Ugaritic },
|
||||
{ 642, PT_SC, ucp_Yi },
|
||||
{ 645, PT_GC, ucp_Z },
|
||||
{ 647, PT_PC, ucp_Zl },
|
||||
{ 650, PT_PC, ucp_Zp },
|
||||
{ 653, PT_PC, ucp_Zs }
|
||||
};
|
||||
|
||||
const int _pcre_utt_size = sizeof(_pcre_utt)/sizeof(ucp_type_table);
|
||||
|
||||
/* End of pcretabs.c */
|
||||
#endif /* SUPPORT_UTF8 */
|
||||
|
||||
/* End of pcre_tables.c */
|
||||
|
||||
@@ -6,7 +6,7 @@
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Copyright (c) 1997-2005 University of Cambridge
|
||||
Copyright (c) 1997-2008 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
@@ -43,6 +43,10 @@ see if it was compiled with the opposite endianness. If so, it uses an
|
||||
auxiliary local function to flip the appropriate bytes. */
|
||||
|
||||
|
||||
#if 1
|
||||
#include "_hbconf.h"
|
||||
#endif
|
||||
|
||||
#include "pcreinal.h"
|
||||
|
||||
|
||||
@@ -62,8 +66,8 @@ Arguments:
|
||||
Returns: the flipped value
|
||||
*/
|
||||
|
||||
static long int
|
||||
byteflip(long int value, int n)
|
||||
static unsigned long int
|
||||
byteflip(unsigned long int value, int n)
|
||||
{
|
||||
if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);
|
||||
return ((value & 0x000000ff) << 24) |
|
||||
@@ -94,16 +98,17 @@ Returns: the new block if is is indeed a byte-flipped regex
|
||||
NULL if it is not
|
||||
*/
|
||||
|
||||
EXPORT real_pcre *
|
||||
real_pcre *
|
||||
_pcre_try_flipped(const real_pcre *re, real_pcre *internal_re,
|
||||
const pcre_study_data *study, pcre_study_data *internal_study)
|
||||
{
|
||||
if ((unsigned long) byteflip(re->magic_number, sizeof(re->magic_number)) != MAGIC_NUMBER)
|
||||
if (byteflip(re->magic_number, sizeof(re->magic_number)) != MAGIC_NUMBER)
|
||||
return NULL;
|
||||
|
||||
*internal_re = *re; /* To copy other fields */
|
||||
internal_re->size = byteflip(re->size, sizeof(re->size));
|
||||
internal_re->options = byteflip(re->options, sizeof(re->options));
|
||||
internal_re->flags = (pcre_uint16)byteflip(re->flags, sizeof(re->flags));
|
||||
internal_re->top_bracket =
|
||||
(pcre_uint16)byteflip(re->top_bracket, sizeof(re->top_bracket));
|
||||
internal_re->top_backref =
|
||||
@@ -129,4 +134,4 @@ if (study != NULL)
|
||||
return internal_re;
|
||||
}
|
||||
|
||||
/* End of pcretryf.c */
|
||||
/* End of pcre_tryflipped.c */
|
||||
|
||||
@@ -6,7 +6,7 @@
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Copyright (c) 1997-2005 University of Cambridge
|
||||
Copyright (c) 1997-2008 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
@@ -42,6 +42,10 @@ POSSIBILITY OF SUCH DAMAGE.
|
||||
string that identifies the PCRE version that is in use. */
|
||||
|
||||
|
||||
#if 1
|
||||
#include "_hbconf.h"
|
||||
#endif
|
||||
|
||||
#include "pcreinal.h"
|
||||
|
||||
|
||||
@@ -49,13 +53,38 @@ string that identifies the PCRE version that is in use. */
|
||||
* Return version string *
|
||||
*************************************************/
|
||||
|
||||
/* These macros are the standard way of turning unquoted text into C strings.
|
||||
They allow macros like PCRE_MAJOR to be defined without quotes, which is
|
||||
convenient for user programs that want to test its value. */
|
||||
|
||||
#define STRING(a) # a
|
||||
#define XSTRING(s) STRING(s)
|
||||
|
||||
EXPORT const char *
|
||||
/* A problem turned up with PCRE_PRERELEASE, which is defined empty for
|
||||
production releases. Originally, it was used naively in this code:
|
||||
|
||||
return XSTRING(PCRE_MAJOR)
|
||||
"." XSTRING(PCRE_MINOR)
|
||||
XSTRING(PCRE_PRERELEASE)
|
||||
" " XSTRING(PCRE_DATE);
|
||||
|
||||
However, when PCRE_PRERELEASE is empty, this leads to an attempted expansion of
|
||||
STRING(). The C standard states: "If (before argument substitution) any
|
||||
argument consists of no preprocessing tokens, the behavior is undefined." It
|
||||
turns out the gcc treats this case as a single empty string - which is what we
|
||||
really want - but Visual C grumbles about the lack of an argument for the
|
||||
macro. Unfortunately, both are within their rights. To cope with both ways of
|
||||
handling this, I had resort to some messy hackery that does a test at run time.
|
||||
I could find no way of detecting that a macro is defined as an empty string at
|
||||
pre-processor time. This hack uses a standard trick for avoiding calling
|
||||
the STRING macro with an empty argument when doing the test. */
|
||||
|
||||
PCRE_EXP_DEFN const char *
|
||||
pcre_version(void)
|
||||
{
|
||||
return XSTRING(PCRE_MAJOR) "." XSTRING(PCRE_MINOR) " " XSTRING(PCRE_DATE);
|
||||
return (XSTRING(Z PCRE_PRERELEASE)[1] == 0)?
|
||||
XSTRING(PCRE_MAJOR.PCRE_MINOR PCRE_DATE) :
|
||||
XSTRING(PCRE_MAJOR.PCRE_MINOR) XSTRING(PCRE_PRERELEASE PCRE_DATE);
|
||||
}
|
||||
|
||||
/* End of pcrevers.c */
|
||||
/* End of pcre_version.c */
|
||||
|
||||
@@ -6,7 +6,7 @@
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Copyright (c) 1997-2005 University of Cambridge
|
||||
Copyright (c) 1997-2008 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
@@ -42,6 +42,10 @@ POSSIBILITY OF SUCH DAMAGE.
|
||||
strings. */
|
||||
|
||||
|
||||
#if 1
|
||||
#include "_hbconf.h"
|
||||
#endif
|
||||
|
||||
#include "pcreinal.h"
|
||||
|
||||
|
||||
@@ -55,6 +59,13 @@ that subsequent code can assume it is dealing with a valid string. The check
|
||||
can be turned off for maximum performance, but the consequences of supplying
|
||||
an invalid string are then undefined.
|
||||
|
||||
Originally, this function checked according to RFC 2279, allowing for values in
|
||||
the range 0 to 0x7fffffff, up to 6 bytes long, but ensuring that they were in
|
||||
the canonical format. Once somebody had pointed out RFC 3629 to me (it
|
||||
obsoletes 2279), additional restrictions were applied. The values are now
|
||||
limited to be between 0 and 0x0010ffff, no more than 4 bytes long, and the
|
||||
subrange 0xd000 to 0xdfff is excluded.
|
||||
|
||||
Arguments:
|
||||
string points to the string
|
||||
length length of string, or -1 if the string is zero-terminated
|
||||
@@ -63,9 +74,10 @@ Returns: < 0 if the string is a valid UTF-8 string
|
||||
>= 0 otherwise; the value is the offset of the bad byte
|
||||
*/
|
||||
|
||||
EXPORT int
|
||||
int
|
||||
_pcre_valid_utf8(const uschar *string, int length)
|
||||
{
|
||||
#ifdef SUPPORT_UTF8
|
||||
register const uschar *p;
|
||||
|
||||
if (length < 0)
|
||||
@@ -79,32 +91,49 @@ for (p = string; length-- > 0; p++)
|
||||
register int ab;
|
||||
register int c = *p;
|
||||
if (c < 128) continue;
|
||||
if ((c & 0xc0) != 0xc0) return p - string;
|
||||
ab = _pcre_utf8_table4[c & 0x3f]; /* Number of additional bytes */
|
||||
if (length < ab) return p - string;
|
||||
if (c < 0xc0) return p - string;
|
||||
ab = _pcre_utf8_table4[c & 0x3f]; /* Number of additional bytes */
|
||||
if (length < ab || ab > 3) return p - string;
|
||||
length -= ab;
|
||||
|
||||
/* Check top bits in the second byte */
|
||||
if ((*(++p) & 0xc0) != 0x80) return p - string;
|
||||
|
||||
/* Check for overlong sequences for each different length */
|
||||
/* Check for overlong sequences for each different length, and for the
|
||||
excluded range 0xd000 to 0xdfff. */
|
||||
|
||||
switch (ab)
|
||||
{
|
||||
/* Check for xx00 000x */
|
||||
/* Check for xx00 000x (overlong sequence) */
|
||||
|
||||
case 1:
|
||||
if ((c & 0x3e) == 0) return p - string;
|
||||
continue; /* We know there aren't any more bytes to check */
|
||||
|
||||
/* Check for 1110 0000, xx0x xxxx */
|
||||
/* Check for 1110 0000, xx0x xxxx (overlong sequence) or
|
||||
1110 1101, 1010 xxxx (0xd000 - 0xdfff) */
|
||||
|
||||
case 2:
|
||||
if (c == 0xe0 && (*p & 0x20) == 0) return p - string;
|
||||
if ((c == 0xe0 && (*p & 0x20) == 0) ||
|
||||
(c == 0xed && *p >= 0xa0))
|
||||
return p - string;
|
||||
break;
|
||||
|
||||
/* Check for 1111 0000, xx00 xxxx */
|
||||
/* Check for 1111 0000, xx00 xxxx (overlong sequence) or
|
||||
greater than 0x0010ffff (f4 8f bf bf) */
|
||||
|
||||
case 3:
|
||||
if (c == 0xf0 && (*p & 0x30) == 0) return p - string;
|
||||
if ((c == 0xf0 && (*p & 0x30) == 0) ||
|
||||
(c > 0xf4 ) ||
|
||||
(c == 0xf4 && *p > 0x8f))
|
||||
return p - string;
|
||||
break;
|
||||
|
||||
#if 0
|
||||
/* These cases can no longer occur, as we restrict to a maximum of four
|
||||
bytes nowadays. Leave the code here in case we ever want to add an option
|
||||
for longer sequences. */
|
||||
|
||||
/* Check for 1111 1000, xx00 0xxx */
|
||||
case 4:
|
||||
if (c == 0xf8 && (*p & 0x38) == 0) return p - string;
|
||||
@@ -115,6 +144,8 @@ for (p = string; length-- > 0; p++)
|
||||
if (c == 0xfe || c == 0xff ||
|
||||
(c == 0xfc && (*p & 0x3c) == 0)) return p - string;
|
||||
break;
|
||||
#endif
|
||||
|
||||
}
|
||||
|
||||
/* Check for valid bytes after the 2nd, if any; all must start 10 */
|
||||
@@ -123,8 +154,9 @@ for (p = string; length-- > 0; p++)
|
||||
if ((*(++p) & 0xc0) != 0x80) return p - string;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* End of pcrevutf.c */
|
||||
/* End of pcre_valid_utf8.c */
|
||||
|
||||
@@ -6,7 +6,7 @@
|
||||
and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Copyright (c) 1997-2005 University of Cambridge
|
||||
Copyright (c) 1997-2008 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
@@ -43,6 +43,10 @@ class (one that contains characters whose values are > 255). It is used by both
|
||||
pcre_exec() and pcre_def_exec(). */
|
||||
|
||||
|
||||
#if 1
|
||||
#include "_hbconf.h"
|
||||
#endif
|
||||
|
||||
#include "pcreinal.h"
|
||||
|
||||
|
||||
@@ -60,7 +64,7 @@ Arguments:
|
||||
Returns: TRUE if character matches, else FALSE
|
||||
*/
|
||||
|
||||
EXPORT BOOL
|
||||
BOOL
|
||||
_pcre_xclass(int c, const uschar *data)
|
||||
{
|
||||
int t;
|
||||
@@ -100,17 +104,40 @@ while ((t = *data++) != XCL_END)
|
||||
#ifdef SUPPORT_UCP
|
||||
else /* XCL_PROP & XCL_NOTPROP */
|
||||
{
|
||||
int chartype, othercase;
|
||||
int rqdtype = *data++;
|
||||
int category = ucp_findchar(c, &chartype, &othercase);
|
||||
if (rqdtype >= 128)
|
||||
int chartype, script;
|
||||
int category = _pcre_ucp_findprop(c, &chartype, &script);
|
||||
|
||||
switch(*data)
|
||||
{
|
||||
if ((rqdtype - 128 == category) == (t == XCL_PROP)) return !negated;
|
||||
}
|
||||
else
|
||||
{
|
||||
if ((rqdtype == chartype) == (t == XCL_PROP)) return !negated;
|
||||
case PT_ANY:
|
||||
if (t == XCL_PROP) return !negated;
|
||||
break;
|
||||
|
||||
case PT_LAMP:
|
||||
if ((chartype == ucp_Lu || chartype == ucp_Ll || chartype == ucp_Lt) ==
|
||||
(t == XCL_PROP)) return !negated;
|
||||
break;
|
||||
|
||||
case PT_GC:
|
||||
if ((data[1] == category) == (t == XCL_PROP)) return !negated;
|
||||
break;
|
||||
|
||||
case PT_PC:
|
||||
if ((data[1] == chartype) == (t == XCL_PROP)) return !negated;
|
||||
break;
|
||||
|
||||
case PT_SC:
|
||||
if ((data[1] == script) == (t == XCL_PROP)) return !negated;
|
||||
break;
|
||||
|
||||
/* This should never occur, but compilers may mutter if there is no
|
||||
default. */
|
||||
|
||||
default:
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
data += 2;
|
||||
}
|
||||
#endif /* SUPPORT_UCP */
|
||||
}
|
||||
@@ -118,4 +145,4 @@ while ((t = *data++) != XCL_END)
|
||||
return negated; /* char did not match */
|
||||
}
|
||||
|
||||
/* End of pcrexcls.c */
|
||||
/* End of pcre_xclass.c */
|
||||
|
||||
@@ -1,12 +1,16 @@
|
||||
/*************************************************
|
||||
* libucp - Unicode Property Table handler *
|
||||
* Unicode Property Table handler *
|
||||
*************************************************/
|
||||
|
||||
|
||||
#ifndef _UCP_H
|
||||
#define _UCP_H
|
||||
|
||||
/* These are the character categories that are returned by ucp_findchar */
|
||||
/* This file contains definitions of the property values that are returned by
|
||||
the function _pcre_ucp_findprop(). New values that are added for new releases
|
||||
of Unicode should always be at the end of each enum, for backwards
|
||||
compatibility. */
|
||||
|
||||
/* These are the general character categories. */
|
||||
|
||||
enum {
|
||||
ucp_C, /* Other */
|
||||
@@ -18,7 +22,7 @@ enum {
|
||||
ucp_Z /* Separator */
|
||||
};
|
||||
|
||||
/* These are the detailed character types that are returned by ucp_findchar */
|
||||
/* These are the particular character types. */
|
||||
|
||||
enum {
|
||||
ucp_Cc, /* Control */
|
||||
@@ -53,7 +57,76 @@ enum {
|
||||
ucp_Zs /* Space separator */
|
||||
};
|
||||
|
||||
extern int ucp_findchar(const int, int *, int *);
|
||||
/* These are the script identifications. */
|
||||
|
||||
enum {
|
||||
ucp_Arabic,
|
||||
ucp_Armenian,
|
||||
ucp_Bengali,
|
||||
ucp_Bopomofo,
|
||||
ucp_Braille,
|
||||
ucp_Buginese,
|
||||
ucp_Buhid,
|
||||
ucp_Canadian_Aboriginal,
|
||||
ucp_Cherokee,
|
||||
ucp_Common,
|
||||
ucp_Coptic,
|
||||
ucp_Cypriot,
|
||||
ucp_Cyrillic,
|
||||
ucp_Deseret,
|
||||
ucp_Devanagari,
|
||||
ucp_Ethiopic,
|
||||
ucp_Georgian,
|
||||
ucp_Glagolitic,
|
||||
ucp_Gothic,
|
||||
ucp_Greek,
|
||||
ucp_Gujarati,
|
||||
ucp_Gurmukhi,
|
||||
ucp_Han,
|
||||
ucp_Hangul,
|
||||
ucp_Hanunoo,
|
||||
ucp_Hebrew,
|
||||
ucp_Hiragana,
|
||||
ucp_Inherited,
|
||||
ucp_Kannada,
|
||||
ucp_Katakana,
|
||||
ucp_Kharoshthi,
|
||||
ucp_Khmer,
|
||||
ucp_Lao,
|
||||
ucp_Latin,
|
||||
ucp_Limbu,
|
||||
ucp_Linear_B,
|
||||
ucp_Malayalam,
|
||||
ucp_Mongolian,
|
||||
ucp_Myanmar,
|
||||
ucp_New_Tai_Lue,
|
||||
ucp_Ogham,
|
||||
ucp_Old_Italic,
|
||||
ucp_Old_Persian,
|
||||
ucp_Oriya,
|
||||
ucp_Osmanya,
|
||||
ucp_Runic,
|
||||
ucp_Shavian,
|
||||
ucp_Sinhala,
|
||||
ucp_Syloti_Nagri,
|
||||
ucp_Syriac,
|
||||
ucp_Tagalog,
|
||||
ucp_Tagbanwa,
|
||||
ucp_Tai_Le,
|
||||
ucp_Tamil,
|
||||
ucp_Telugu,
|
||||
ucp_Thaana,
|
||||
ucp_Thai,
|
||||
ucp_Tibetan,
|
||||
ucp_Tifinagh,
|
||||
ucp_Ugaritic,
|
||||
ucp_Yi,
|
||||
ucp_Balinese, /* New for Unicode 5.0.0 */
|
||||
ucp_Cuneiform, /* New for Unicode 5.0.0 */
|
||||
ucp_Nko, /* New for Unicode 5.0.0 */
|
||||
ucp_Phags_Pa, /* New for Unicode 5.0.0 */
|
||||
ucp_Phoenician /* New for Unicode 5.0.0 */
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
@@ -1,91 +1,92 @@
|
||||
/*************************************************
|
||||
* libucp - Unicode Property Table handler *
|
||||
* Unicode Property Table handler *
|
||||
*************************************************/
|
||||
|
||||
/* Internal header file defining the layout of compact nodes in the tree. */
|
||||
#ifndef _UCPINTERNAL_H
|
||||
#define _UCPINTERNAL_H
|
||||
|
||||
/* Internal header file defining the layout of the bits in each pair of 32-bit
|
||||
words that form a data item in the table. */
|
||||
|
||||
typedef struct cnode {
|
||||
unsigned short int f0;
|
||||
unsigned short int f1;
|
||||
unsigned short int f2;
|
||||
pcre_uint32 f0;
|
||||
pcre_uint32 f1;
|
||||
} cnode;
|
||||
|
||||
/* Things for the f0 field */
|
||||
|
||||
#define f0_leftexists 0x8000 /* Left child exists */
|
||||
#define f0_typemask 0x3f00 /* Type bits */
|
||||
#define f0_typeshift 8 /* Type shift */
|
||||
#define f0_chhmask 0x00ff /* Character high bits */
|
||||
#define f0_scriptmask 0xff000000 /* Mask for script field */
|
||||
#define f0_scriptshift 24 /* Shift for script value */
|
||||
#define f0_rangeflag 0x00800000 /* Flag for a range item */
|
||||
#define f0_charmask 0x001fffff /* Mask for code point value */
|
||||
|
||||
/* Things for the f2 field */
|
||||
/* Things for the f1 field */
|
||||
|
||||
#define f2_rightmask 0xf000 /* Mask for right offset bits */
|
||||
#define f2_rightshift 12 /* Shift for right offset */
|
||||
#define f2_casemask 0x0fff /* Mask for case offset */
|
||||
#define f1_typemask 0xfc000000 /* Mask for char type field */
|
||||
#define f1_typeshift 26 /* Shift for the type field */
|
||||
#define f1_rangemask 0x0000ffff /* Mask for a range offset */
|
||||
#define f1_casemask 0x0000ffff /* Mask for a case offset */
|
||||
#define f1_caseneg 0xffff8000 /* Bits for negation */
|
||||
|
||||
/* The tree consists of a vector of structures of type cnode, with the root
|
||||
node as the first element. The three short ints (16-bits) are used as follows:
|
||||
/* The data consists of a vector of structures of type cnode. The two unsigned
|
||||
32-bit integers are used as follows:
|
||||
|
||||
(f0) (1) The 0x8000 bit of f0 is set if a left child exists. The child's node
|
||||
is the next node in the vector.
|
||||
(2) The 0x4000 bits of f0 is spare.
|
||||
(3) The 0x3f00 bits of f0 contain the character type; this is a number
|
||||
defined by the enumeration in ucp.h (e.g. ucp_Lu).
|
||||
(4) The bottom 8 bits of f0 contain the most significant byte of the
|
||||
character's 24-bit codepoint.
|
||||
(f0) (1) The most significant byte holds the script number. The numbers are
|
||||
defined by the enum in ucp.h.
|
||||
|
||||
(f1) (1) The f1 field contains the two least significant bytes of the
|
||||
codepoint.
|
||||
(2) The 0x00800000 bit is set if this entry defines a range of characters.
|
||||
It is not set if this entry defines a single character
|
||||
|
||||
(f2) (1) The 0xf000 bits of f2 contain zero if there is no right child of this
|
||||
node. Otherwise, they contain one plus the exponent of the power of
|
||||
two of the offset to the right node (e.g. a value of 3 means 8). The
|
||||
units of the offset are node items.
|
||||
(3) The 0x00600000 bits are spare.
|
||||
|
||||
(2) The 0x0fff bits of f2 contain the signed offset from this character to
|
||||
its alternate cased value. They are zero if there is no such
|
||||
character.
|
||||
(4) The 0x001fffff bits contain the code point. No Unicode code point will
|
||||
ever be greater than 0x0010ffff, so this should be OK for ever.
|
||||
|
||||
(f1) (1) The 0xfc000000 bits contain the character type number. The numbers are
|
||||
defined by an enum in ucp.h.
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
||.|.| type (6) | ms char (8) || ls char (16) ||....| case offset (12) ||
|
||||
-----------------------------------------------------------------------------
|
||||
| | |
|
||||
| |-> spare |
|
||||
| exponent of right
|
||||
|-> left child exists child offset
|
||||
(2) The 0x03ff0000 bits are spare.
|
||||
|
||||
(3) The 0x0000ffff bits contain EITHER the unsigned offset to the top of
|
||||
range if this entry defines a range, OR the *signed* offset to the
|
||||
character's "other case" partner if this entry defines a single
|
||||
character. There is no partner if the value is zero.
|
||||
|
||||
-------------------------------------------------------------------------------
|
||||
| script (8) |.|.|.| codepoint (21) || type (6) |.|.| spare (8) | offset (16) |
|
||||
-------------------------------------------------------------------------------
|
||||
| | | | |
|
||||
| | |-> spare | |-> spare
|
||||
| | |
|
||||
| |-> spare |-> spare
|
||||
|
|
||||
|-> range flag
|
||||
|
||||
The upper/lower casing information is set only for characters that come in
|
||||
pairs. There are (at present) four non-one-to-one mappings in the Unicode data.
|
||||
These are ignored. They are:
|
||||
pairs. The non-one-to-one mappings in the Unicode data are ignored.
|
||||
|
||||
1FBE Greek Prosgegrammeni (lower, with upper -> capital iota)
|
||||
2126 Ohm
|
||||
212A Kelvin
|
||||
212B Angstrom
|
||||
When searching the data, proceed as follows:
|
||||
|
||||
Certainly for the last three, having an alternate case would seem to be a
|
||||
mistake. I don't know any Greek, so cannot comment on the first one.
|
||||
(1) Set up for a binary chop search.
|
||||
|
||||
(2) If the top is not greater than the bottom, the character is not in the
|
||||
table. Its type must therefore be "Cn" ("Undefined").
|
||||
|
||||
When searching the tree, proceed as follows:
|
||||
(3) Find the middle vector element.
|
||||
|
||||
(1) Start at the first node.
|
||||
(4) Extract the code point and compare. If equal, we are done.
|
||||
|
||||
(2) Extract the character value from f1 and the bottom 8 bits of f0;
|
||||
(5) If the test character is smaller, set the top to the current point, and
|
||||
goto (2).
|
||||
|
||||
(3) Compare with the character being sought. If equal, we are done.
|
||||
(6) If the current entry defines a range, compute the last character by adding
|
||||
the offset, and see if the test character is within the range. If it is,
|
||||
we are done.
|
||||
|
||||
(4) If the test character is smaller, inspect the f0_leftexists flag. If it is
|
||||
not set, the character is not in the tree. If it is set, move to the next
|
||||
node, and go to (2).
|
||||
|
||||
(5) If the test character is bigger, extract the f2_rightmask bits from f2, and
|
||||
shift them right by f2_rightshift. If the result is zero, the character is
|
||||
not in the tree. Otherwise, calculate the number of nodes to skip by
|
||||
shifting the value 1 left by this number minus one. Go to (2).
|
||||
(7) Otherwise, set the bottom to one element past the current point and goto
|
||||
(2).
|
||||
*/
|
||||
|
||||
#endif /* _UCPINTERNAL_H */
|
||||
|
||||
/* End of internal.h */
|
||||
/* End of ucpinternal.h */
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
3088
harbour/source/hbpcre/ucptable.h
Normal file
3088
harbour/source/hbpcre/ucptable.h
Normal file
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user