2008-05-26 16:59 UTC+0100 Viktor Szakats (harbour.01 syenar hu)

* common.mak
   * source/hbpcre/Makefile
   * source/hbpcre/chartabs.c -- changes
   * source/hbpcre/config.h
   * source/hbpcre/pcre.h
   * source/hbpcre/pcrecomp.c
   * source/hbpcre/pcreconf.c
   * source/hbpcre/pcredfa.c
   * source/hbpcre/pcreexec.c
   * source/hbpcre/pcrefind.c
   * source/hbpcre/pcrefinf.c
   * source/hbpcre/pcreget.c
   * source/hbpcre/pcreinal.h
   * source/hbpcre/pcreinfo.c
   * source/hbpcre/pcremktb.c
   * source/hbpcre/pcreoutf.c
   * source/hbpcre/pcrerefc.c
   * source/hbpcre/pcrestud.c
   * source/hbpcre/pcretabs.c
   * source/hbpcre/pcretryf.c
   * source/hbpcre/pcrever.c
   * source/hbpcre/pcrevutf.c
   * source/hbpcre/pcrexcls.c
   * source/hbpcre/ucp.h
   * source/hbpcre/ucpinter.h
   - source/hbpcre/pcreglob.c -- renames
   + source/hbpcre/_hbpcreg.c
   - source/hbpcre/pcreprni.c
   + source/hbpcre/pcreprni.h
   - source/hbpcre/ucptable.c
   + source/hbpcre/ucptable.h
   + source/hbpcre/_hbconf.h  -- new files
   + source/hbpcre/pcrenewl.c
   - source/hbpcre/dftables.c -- deletion
     + Updated to PCRE 7.7 (from 6.3)
     ; Original code not (yet) modified, so some warnings 
       may appear in foreign code.
     ; Please test.

   + source/hbpcre/cnv_hb2o.bat
   + source/hbpcre/cnv_o2hb.bat
     + Added batch files to make to conversion from 
       original PCRE source files into Harbour.
This commit is contained in:
Viktor Szakats
2008-05-26 15:03:16 +00:00
parent 9b8db0c3de
commit 4425d27401
35 changed files with 11776 additions and 18788 deletions

View File

@@ -8,6 +8,51 @@
2008-12-31 13:59 UTC+0100 Foo Bar <foo.bar@foobar.org>
*/
2008-05-26 16:59 UTC+0100 Viktor Szakats (harbour.01 syenar hu)
* common.mak
* source/hbpcre/Makefile
* source/hbpcre/chartabs.c -- changes
* source/hbpcre/config.h
* source/hbpcre/pcre.h
* source/hbpcre/pcrecomp.c
* source/hbpcre/pcreconf.c
* source/hbpcre/pcredfa.c
* source/hbpcre/pcreexec.c
* source/hbpcre/pcrefind.c
* source/hbpcre/pcrefinf.c
* source/hbpcre/pcreget.c
* source/hbpcre/pcreinal.h
* source/hbpcre/pcreinfo.c
* source/hbpcre/pcremktb.c
* source/hbpcre/pcreoutf.c
* source/hbpcre/pcrerefc.c
* source/hbpcre/pcrestud.c
* source/hbpcre/pcretabs.c
* source/hbpcre/pcretryf.c
* source/hbpcre/pcrever.c
* source/hbpcre/pcrevutf.c
* source/hbpcre/pcrexcls.c
* source/hbpcre/ucp.h
* source/hbpcre/ucpinter.h
- source/hbpcre/pcreglob.c -- renames
+ source/hbpcre/_hbpcreg.c
- source/hbpcre/pcreprni.c
+ source/hbpcre/pcreprni.h
- source/hbpcre/ucptable.c
+ source/hbpcre/ucptable.h
+ source/hbpcre/_hbconf.h -- new files
+ source/hbpcre/pcrenewl.c
- source/hbpcre/dftables.c -- deletion
+ Updated to PCRE 7.7 (from 6.3)
; Original code not (yet) modified, so some warnings
may appear in foreign code.
; Please test.
+ source/hbpcre/cnv_hb2o.bat
+ source/hbpcre/cnv_o2hb.bat
+ Added batch files to make to conversion from
original PCRE source files into Harbour.
2008-05-26 12:23 UTC+0100 Viktor Szakats (harbour.01 syenar hu)
* contrib/hbclipsm/gauge.c
! Fixed GPF in GaugeNew(). Old bug.

View File

@@ -745,25 +745,25 @@ LANG_LIB_OBJS = \
#**********************************************************
PCRE_LIB_OBJS = \
$(OBJ_DIR)\_hbpcreg$(OBJEXT) \
$(OBJ_DIR)\chartabs$(OBJEXT) \
$(OBJ_DIR)\pcrecomp$(OBJEXT) \
$(OBJ_DIR)\pcreconf$(OBJEXT) \
$(OBJ_DIR)\pcredfa$(OBJEXT) \
$(OBJ_DIR)\pcreexec$(OBJEXT) \
$(OBJ_DIR)\pcrefind$(OBJEXT) \
$(OBJ_DIR)\pcrefinf$(OBJEXT) \
$(OBJ_DIR)\pcreget$(OBJEXT) \
$(OBJ_DIR)\pcreglob$(OBJEXT) \
$(OBJ_DIR)\pcreinfo$(OBJEXT) \
$(OBJ_DIR)\pcremktb$(OBJEXT) \
$(OBJ_DIR)\pcrenewl$(OBJEXT) \
$(OBJ_DIR)\pcreoutf$(OBJEXT) \
$(OBJ_DIR)\pcreprni$(OBJEXT) \
$(OBJ_DIR)\pcrerefc$(OBJEXT) \
$(OBJ_DIR)\pcrestud$(OBJEXT) \
$(OBJ_DIR)\pcretabs$(OBJEXT) \
$(OBJ_DIR)\pcretryf$(OBJEXT) \
$(OBJ_DIR)\pcrefind$(OBJEXT) \
$(OBJ_DIR)\pcrevutf$(OBJEXT) \
$(OBJ_DIR)\pcrever$(OBJEXT) \
$(OBJ_DIR)\pcrevutf$(OBJEXT) \
$(OBJ_DIR)\pcrexcls$(OBJEXT) \
#**********************************************************

View File

@@ -5,25 +5,25 @@
ROOT = ../../
C_SOURCES=\
_hbpcreg.c \
chartabs.c \
pcrecomp.c \
pcreconf.c \
pcredfa.c \
pcredfa.c \
pcreexec.c \
pcrefind.c \
pcrefinf.c \
pcreget.c \
pcreglob.c \
pcreget.c \
pcreinfo.c \
pcremktb.c \
pcrenewl.c \
pcreoutf.c \
pcreprni.c \
pcrerefc.c \
pcrestud.c \
pcretabs.c \
pcretryf.c \
pcrefind.c \
pcrever.c \
pcrevutf.c \
pcrever.c \
pcrexcls.c \
LIBNAME=hbpcre

View File

@@ -0,0 +1,22 @@
/*
* $Id$
*/
#ifndef HB_CONFIG_H
#define HB_CONFIG_H
#define PCRE_STATIC
#if defined( _MSC_VER )
#pragma warning( push, 0 )
#endif
#if defined( __BORLANDC__ )
#pragma warn -use
#pragma warn -csu
#pragma warn -aus
#endif
#include "config.h"
#endif

View File

@@ -1,3 +1,10 @@
/*
* $Id$
*/
/* See pcre_globals.c in PCRE source package for the original of
this file. We need to override it, so we've made it local. */
/*************************************************
* Perl-Compatible Regular Expressions *
*************************************************/
@@ -6,7 +13,7 @@
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Copyright (c) 1997-2005 University of Cambridge
Copyright (c) 1997-2008 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -46,32 +53,32 @@ indirection. These values can be changed by the caller, but are shared between
all threads. However, when compiling for Virtual Pascal, things are done
differently, and global variables are not used (see pcre.in). */
#include "hbapi.h"
#include "pcreinal.h"
static void * hb_pcre_grab( size_t size )
{
return hb_xgrab( size );
}
#ifndef VPCOMPAT
#ifdef __cplusplus
extern "C"
{
void *(*pcre_malloc)(size_t) = hb_pcre_grab;
void (*pcre_free)(void *) = hb_xfree;
void *(*pcre_stack_malloc)(size_t) = hb_pcre_grab;
void (*pcre_stack_free)(void *) = hb_xfree;
int (*pcre_callout)(pcre_callout_block *) = NULL;
}
#else
void *(*pcre_malloc)(size_t) = hb_pcre_grab;
void (*pcre_free)(void *) = hb_xfree;
void *(*pcre_stack_malloc)(size_t) = hb_pcre_grab;
void (*pcre_stack_free)(void *) = hb_xfree;
int (*pcre_callout)(pcre_callout_block *) = NULL;
#ifdef TRUE
#undef TRUE
#endif
#ifdef FALSE
#undef FALSE
#endif
/* End of pcreglob.c */
#if 1
#include "_hbconf.h"
#endif
#include "pcreinal.h"
#ifndef VPCOMPAT
PCRE_EXP_DATA_DEFN void *(*pcre_malloc)(size_t) = malloc;
PCRE_EXP_DATA_DEFN void (*pcre_free)(void *) = free;
PCRE_EXP_DATA_DEFN void *(*pcre_stack_malloc)(size_t) = malloc;
PCRE_EXP_DATA_DEFN void (*pcre_stack_free)(void *) = free;
PCRE_EXP_DATA_DEFN int (*pcre_callout)(pcre_callout_block *) = NULL;
#endif
/* End of pcre_globals.c */

View File

@@ -2,15 +2,30 @@
* Perl-Compatible Regular Expressions *
*************************************************/
/* This file is automatically written by the dftables auxiliary
program. If you edit it by hand, you might like to edit the Makefile to
prevent its ever being regenerated.
/* This file contains character tables that are used when no external tables
are passed to PCRE by the application that calls it. The tables are used only
for characters whose code values are less than 256.
This file contains the default tables for characters with codes less than
128 (ASCII characters). These tables are used when no external tables are
passed to PCRE. */
This is a default version of the tables that assumes ASCII encoding. A program
called dftables (which is distributed with PCRE) can be used to build
alternative versions of this file. This is necessary if you are running in an
EBCDIC environment, or if you want to default to a different encoding, for
example ISO-8859-1. When dftables is run, it creates these tables in the
current locale. If PCRE is configured with --enable-rebuild-chartables, this
happens automatically.
The following #includes are present because without the gcc 4.x may remove the
array definition from the final binary if PCRE is built into a static library
and dead code stripping is activated. This leads to link errors. Pulling in the
header ensures that the array gets flagged as "someone outside this compilation
unit might reference this" and so it will always be supplied to the linker. */
#if 1
#include "_hbconf.h"
#endif
#include "pcreinal.h"
extern const unsigned char _pcre_default_tables[];
const unsigned char _pcre_default_tables[] = {
/* This table is a lower casing table. */
@@ -83,11 +98,10 @@ const unsigned char _pcre_default_tables[] = {
240,241,242,243,244,245,246,247,
248,249,250,251,252,253,254,255,
/* This table contains bit maps for various character classes.
Each map is 32 bytes long and the bits run from the least
significant end of each byte. The classes that have their own
maps are: space, xdigit, digit, upper, lower, word, graph
print, punct, and cntrl. Other classes are built from combinations. */
/* This table contains bit maps for various character classes. Each map is 32
bytes long and the bits run from the least significant end of each byte. The
classes that have their own maps are: space, xdigit, digit, upper, lower, word,
graph, print, punct, and cntrl. Other classes are built from combinations. */
0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
@@ -159,7 +173,7 @@ print, punct, and cntrl. Other classes are built from combinations. */
0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* @ - G */
0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* H - O */
0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* P - W */
0x12,0x12,0x12,0x80,0x00,0x00,0x80,0x10, /* X - _ */
0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /* X - _ */
0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* ` - g */
0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* h - o */
0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* p - w */
@@ -181,4 +195,4 @@ print, punct, and cntrl. Other classes are built from combinations. */
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
/* End of chartables.c */
/* End of pcre_chartables.c */

View File

@@ -0,0 +1,59 @@
@echo off
rem
rem $Id$
rem
rem Tested with PCRE 7.7
rem NOTE: Purpose of this script is to take the PCRE files
rem in Harbour repo and convert them back to the filenames
rem used in the original PCRE source distribution.
rem This is to aid finding local modifications and
rem apply them after a PCRE source update.
rem [vszakats]
rem
rem This tool uses 'GNU gsar' for search and replace.
rem
rem DISCLAIMER: This tool is targeted only to Harbour core
rem maintainers. If you're not one of them you
rem don't have to mess with this tool.
md ori_dst
del ori_dst\*.* /Y
copy config.h ori_dst\config.h.generic
copy pcre.h ori_dst\pcre.h.generic
copy pcreinal.h ori_dst\pcre_internal.h
copy ucp.h ori_dst\ucp.h
copy ucpinter.h ori_dst\ucpinternal.h
copy ucptable.h ori_dst\ucptable.h
copy chartabs.c ori_dst\pcre_chartables.c.dist
copy pcrecomp.c ori_dst\pcre_compile.c
copy pcreconf.c ori_dst\pcre_config.c
copy pcredfa.c ori_dst\pcre_dfa_exec.c
copy pcreexec.c ori_dst\pcre_exec.c
copy pcrefinf.c ori_dst\pcre_fullinfo.c
copy pcreget.c ori_dst\pcre_get.c
copy pcreinfo.c ori_dst\pcre_info.c
copy pcremktb.c ori_dst\pcre_maketables.c
copy pcrenewl.c ori_dst\pcre_newline.c
copy pcreoutf.c ori_dst\pcre_ord2utf8.c
copy pcreprni.h ori_dst\pcre_printint.src
copy pcrerefc.c ori_dst\pcre_refcount.c
copy pcrestud.c ori_dst\pcre_study.c
copy pcretabs.c ori_dst\pcre_tables.c
copy pcretryf.c ori_dst\pcre_try_flipped.c
copy pcrefind.c ori_dst\pcre_ucp_searchfuncs.c
copy pcrevutf.c ori_dst\pcre_valid_utf8.c
copy pcrever.c ori_dst\pcre_version.c
copy pcrexcls.c ori_dst\pcre_xclass.c
cd ori_dst
gsar -o -s":x22pcreinal.h:x22" -r":x22pcre_internal.h:x22" *.*
gsar -o -s":x22ucpinter.h:x22" -r":x22ucpinternal.h:x22" *.*
gsar -o -s":x22_hbconf.h:x22" -r":x22config.h:x22" *.*
gsar -o -s":x22pcreprni.h:x22" -r":x22pcre_printint.src:x22" *.*
gsar -o -s"if 1" -r"ifdef HAVE_CONFIG_H" *.*
cd ..

View File

@@ -0,0 +1,70 @@
@echo off
rem
rem $Id$
rem
rem Tested with PCRE 7.7
rem NOTE: Purpose of this script is to take the original
rem PCRE file from its source distribution and convert
rem them to the short filenames we use here in Harbour.
rem Short filenames are needed for full DJGPP support.
rem Some other automated modifications are also done
rem to help compiling the sources "as-is", to try to
rem avoid any manual editing on these foreign sources.
rem [vszakats]
rem
rem This tool uses 'GNU gsar' for search and replace.
rem and 'GNU unix2dos' for line ending conversion.
rem
rem DISCLAIMER: This tool is targeted only to Harbour core
rem maintainers. If you're not one of them you
rem don't have to mess with this tool.
attrib +R _hbconf.h
attrib +R _hbpcreg.c
del *.c
del *.h
attrib -R _hbconf.h
attrib -R _hbpcreg.c
copy ori_src\config.h.generic config.h
copy ori_src\pcre.h.generic pcre.h
copy ori_src\pcre_internal.h pcreinal.h
copy ori_src\ucp.h ucp.h
copy ori_src\ucpinternal.h ucpinter.h
copy ori_src\ucptable.h ucptable.h
copy ori_src\pcre_chartables.c.dist chartabs.c
copy ori_src\pcre_compile.c pcrecomp.c
copy ori_src\pcre_config.c pcreconf.c
copy ori_src\pcre_dfa_exec.c pcredfa.c
copy ori_src\pcre_exec.c pcreexec.c
copy ori_src\pcre_fullinfo.c pcrefinf.c
copy ori_src\pcre_get.c pcreget.c
copy ori_src\pcre_info.c pcreinfo.c
copy ori_src\pcre_maketables.c pcremktb.c
copy ori_src\pcre_newline.c pcrenewl.c
copy ori_src\pcre_ord2utf8.c pcreoutf.c
copy ori_src\pcre_printint.src pcreprni.h
copy ori_src\pcre_refcount.c pcrerefc.c
copy ori_src\pcre_study.c pcrestud.c
copy ori_src\pcre_tables.c pcretabs.c
copy ori_src\pcre_try_flipped.c pcretryf.c
copy ori_src\pcre_ucp_searchfuncs.c pcrefind.c
copy ori_src\pcre_valid_utf8.c pcrevutf.c
copy ori_src\pcre_version.c pcrever.c
copy ori_src\pcre_xclass.c pcrexcls.c
unix2dos *.c
unix2dos *.h
gsar -o -s":x22pcre_printint.src:x22" -r":x22pcreprni.h:x22" *.c
gsar -o -s":x22pcre_printint.src:x22" -r":x22pcreprni.h:x22" *.h
gsar -o -s":x22pcre_internal.h:x22" -r":x22pcreinal.h:x22" *.c
gsar -o -s":x22pcre_internal.h:x22" -r":x22pcreinal.h:x22" *.h
gsar -o -s":x22ucpinternal.h:x22" -r":x22ucpinter.h:x22" *.c
gsar -o -s":x22ucpinternal.h:x22" -r":x22ucpinter.h:x22" *.h
gsar -o -s":x22config.h:x22" -r":x22_hbconf.h:x22" *.c
gsar -o -s":x22config.h:x22" -r":x22_hbconf.h:x22" *.h
gsar -o -s":x22_hbconf.h:x22" -r":x22config.h:x22" _hbconf.h
gsar -o -s"ifdef HAVE_CONFIG_H" -r"if 1" *.c

View File

@@ -1,116 +1,307 @@
/* config.h. Generated from config.h.in by configure. */
/* config.h.in. Generated from configure.ac by autoheader. */
/* On Unix systems config.in is converted by configure into config.h. PCRE is
written in Standard C, but there are a few non-standard things it can cope
with, allowing it to run on SunOS4 and other "close to standard" systems.
On a non-Unix system you should just copy this file into config.h, and set up
the macros the way you need them. You should normally change the definitions of
HAVE_STRERROR and HAVE_MEMMOVE to 1. Unfortunately, because of the way autoconf
works, these cannot be made the defaults. If your system has bcopy() and not
memmove(), change the definition of HAVE_BCOPY instead of HAVE_MEMMOVE. If your
system has neither bcopy() nor memmove(), leave them both as 0; an emulation
function will be used. */
/* On Unix-like systems config.h.in is converted by "configure" into config.h.
Some other environments also support the use of "configure". PCRE is written in
Standard C, but there are a few non-standard things it can cope with, allowing
it to run on SunOS4 and other "close to standard" systems.
If you are going to build PCRE "by hand" on a system without "configure" you
should copy the distributed config.h.generic to config.h, and then set up the
macro definitions the way you need them. You must then add -DHAVE_CONFIG_H to
all of your compile commands, so that config.h is included at the start of
every source.
Alternatively, you can avoid editing by using -D on the compiler command line
to set the macro values. In this case, you do not have to set -DHAVE_CONFIG_H.
PCRE uses memmove() if HAVE_MEMMOVE is set to 1; otherwise it uses bcopy() if
HAVE_BCOPY is set to 1. If your system has neither bcopy() nor memmove(), set
them both to 0; an emulation function will be used. */
/* By default, the \R escape sequence matches any Unicode line ending
character or sequence of characters. If BSR_ANYCRLF is defined, this is
changed so that backslash-R matches only CR, LF, or CRLF. The build- time
default can be overridden by the user of PCRE at runtime. On systems that
support it, "configure" can be used to override the default. */
/* #undef BSR_ANYCRLF */
/* If you are compiling for a system that uses EBCDIC instead of ASCII
character codes, define this macro as 1. On systems that can use "configure",
this can be done via --enable-ebcdic. */
character codes, define this macro as 1. On systems that can use
"configure", this can be done via --enable-ebcdic. */
/* #undef EBCDIC */
#ifndef EBCDIC
#define EBCDIC 0
/* Define to 1 if you have the `bcopy' function. */
#ifndef HAVE_BCOPY
#define HAVE_BCOPY 1
#endif
/* If you are compiling for a system that needs some magic to be inserted
before the definition of an exported function, define this macro to contain the
relevant magic. It apears at the start of every exported function. */
/* Define to 1 if you have the <bits/type_traits.h> header file. */
/* #undef HAVE_BITS_TYPE_TRAITS_H */
#define EXPORT
/* Define to 1 if you have the <bzlib.h> header file. */
#ifndef HAVE_BZLIB_H
#define HAVE_BZLIB_H 1
#endif
/* Define to empty if the "const" keyword does not work. */
/* Define to 1 if you have the <dirent.h> header file. */
#ifndef HAVE_DIRENT_H
#define HAVE_DIRENT_H 1
#endif
#undef const
/* Define to 1 if you have the <dlfcn.h> header file. */
#ifndef HAVE_DLFCN_H
#define HAVE_DLFCN_H 1
#endif
/* Define to "unsigned" if <stddef.h> doesn't define size_t. */
/* Define to 1 if you have the <inttypes.h> header file. */
#ifndef HAVE_INTTYPES_H
#define HAVE_INTTYPES_H 1
#endif
#undef size_t
/* Define to 1 if you have the <limits.h> header file. */
#ifndef HAVE_LIMITS_H
#define HAVE_LIMITS_H 1
#endif
/* The following two definitions are mainly for the benefit of SunOS4, which
doesn't have the strerror() or memmove() functions that should be present in
all Standard C libraries. The macros HAVE_STRERROR and HAVE_MEMMOVE should
normally be defined with the value 1 for other systems, but unfortunately we
can't make this the default because "configure" files generated by autoconf
will only change 0 to 1; they won't change 1 to 0 if the functions are not
found. */
/* Define to 1 if the system has the type `long long'. */
#ifndef HAVE_LONG_LONG
#define HAVE_LONG_LONG 1
#endif
/* Define to 1 if you have the `memmove' function. */
#ifndef HAVE_MEMMOVE
#define HAVE_MEMMOVE 1
#endif
/* Define to 1 if you have the <memory.h> header file. */
#ifndef HAVE_MEMORY_H
#define HAVE_MEMORY_H 1
#endif
/* Define to 1 if you have the <readline/history.h> header file. */
#ifndef HAVE_READLINE_HISTORY_H
#define HAVE_READLINE_HISTORY_H 1
#endif
/* Define to 1 if you have the <readline/readline.h> header file. */
#ifndef HAVE_READLINE_READLINE_H
#define HAVE_READLINE_READLINE_H 1
#endif
/* Define to 1 if you have the <stdint.h> header file. */
#ifndef HAVE_STDINT_H
#define HAVE_STDINT_H 1
#endif
/* Define to 1 if you have the <stdlib.h> header file. */
#ifndef HAVE_STDLIB_H
#define HAVE_STDLIB_H 1
#endif
/* Define to 1 if you have the `strerror' function. */
#ifndef HAVE_STRERROR
#define HAVE_STRERROR 1
#define HAVE_MEMMOVE 1
/* There are some non-Unix systems that don't even have bcopy(). If this macro
is false, an emulation is used. If HAVE_MEMMOVE is set to 1, the value of
HAVE_BCOPY is not relevant. */
#define HAVE_BCOPY 0
/* The value of NEWLINE determines the newline character. The default is to
leave it up to the compiler, but some sites want to force a particular value.
On Unix systems, "configure" can be used to override this default. */
#ifndef NEWLINE
#define NEWLINE '\n'
#endif
/* The value of LINK_SIZE determines the number of bytes used to store
links as offsets within the compiled regex. The default is 2, which allows for
compiled patterns up to 64K long. This covers the vast majority of cases.
However, PCRE can also be compiled to use 3 or 4 bytes instead. This allows for
longer patterns in extreme cases. On Unix systems, "configure" can be used to
override this default. */
/* Define to 1 if you have the <string> header file. */
#ifndef HAVE_STRING
#define HAVE_STRING 1
#endif
/* Define to 1 if you have the <strings.h> header file. */
#ifndef HAVE_STRINGS_H
#define HAVE_STRINGS_H 1
#endif
/* Define to 1 if you have the <string.h> header file. */
#ifndef HAVE_STRING_H
#define HAVE_STRING_H 1
#endif
/* Define to 1 if you have the `strtoll' function. */
/* #undef HAVE_STRTOLL */
/* Define to 1 if you have the `strtoq' function. */
#ifndef HAVE_STRTOQ
#define HAVE_STRTOQ 1
#endif
/* Define to 1 if you have the <sys/stat.h> header file. */
#ifndef HAVE_SYS_STAT_H
#define HAVE_SYS_STAT_H 1
#endif
/* Define to 1 if you have the <sys/types.h> header file. */
#ifndef HAVE_SYS_TYPES_H
#define HAVE_SYS_TYPES_H 1
#endif
/* Define to 1 if you have the <type_traits.h> header file. */
/* #undef HAVE_TYPE_TRAITS_H */
/* Define to 1 if you have the <unistd.h> header file. */
#ifndef HAVE_UNISTD_H
#define HAVE_UNISTD_H 1
#endif
/* Define to 1 if the system has the type `unsigned long long'. */
#ifndef HAVE_UNSIGNED_LONG_LONG
#define HAVE_UNSIGNED_LONG_LONG 1
#endif
/* Define to 1 if you have the <windows.h> header file. */
/* #undef HAVE_WINDOWS_H */
/* Define to 1 if you have the <zlib.h> header file. */
#ifndef HAVE_ZLIB_H
#define HAVE_ZLIB_H 1
#endif
/* Define to 1 if you have the `_strtoi64' function. */
/* #undef HAVE__STRTOI64 */
/* The value of LINK_SIZE determines the number of bytes used to store links
as offsets within the compiled regex. The default is 2, which allows for
compiled patterns up to 64K long. This covers the vast majority of cases.
However, PCRE can also be compiled to use 3 or 4 bytes instead. This allows
for longer patterns in extreme cases. On systems that support it,
"configure" can be used to override this default. */
#ifndef LINK_SIZE
#define LINK_SIZE 2
#define LINK_SIZE 2
#endif
/* The value of MATCH_LIMIT determines the default number of times the match()
function can be called during a single execution of pcre_exec(). (There is a
runtime method of setting a different limit.) The limit exists in order to
catch runaway regular expressions that take for ever to determine that they do
not match. The default is set very large so that it does not accidentally catch
legitimate cases. On Unix systems, "configure" can be used to override this
default default. */
/* The value of MATCH_LIMIT determines the default number of times the
internal match() function can be called during a single execution of
pcre_exec(). There is a runtime interface for setting a different limit.
The limit exists in order to catch runaway regular expressions that take
for ever to determine that they do not match. The default is set very large
so that it does not accidentally catch legitimate cases. On systems that
support it, "configure" can be used to override this default default. */
#ifndef MATCH_LIMIT
#define MATCH_LIMIT 10000000
#endif
/* When calling PCRE via the POSIX interface, additional working storage is
required for holding the pointers to capturing substrings because PCRE requires
three integers per substring, whereas the POSIX interface provides only two. If
the number of expected substrings is small, the wrapper function uses space on
the stack, because this is faster than using malloc() for each call. The
threshold above which the stack is no longer use is defined by POSIX_MALLOC_
THRESHOLD. On Unix systems, "configure" can be used to override this default.
*/
/* The above limit applies to all calls of match(), whether or not they
increase the recursion depth. In some environments it is desirable to limit
the depth of recursive calls of match() more strictly, in order to restrict
the maximum amount of stack (or heap, if NO_RECURSE is defined) that is
used. The value of MATCH_LIMIT_RECURSION applies only to recursive calls of
match(). To have any useful effect, it must be less than the value of
MATCH_LIMIT. The default is to use the same value as MATCH_LIMIT. There is
a runtime method for setting a different limit. On systems that support it,
"configure" can be used to override the default. */
#ifndef MATCH_LIMIT_RECURSION
#define MATCH_LIMIT_RECURSION MATCH_LIMIT
#endif
/* This limit is parameterized just in case anybody ever wants to change it.
Care must be taken if it is increased, because it guards against integer
overflow caused by enormously large patterns. */
#ifndef MAX_NAME_COUNT
#define MAX_NAME_COUNT 10000
#endif
/* This limit is parameterized just in case anybody ever wants to change it.
Care must be taken if it is increased, because it guards against integer
overflow caused by enormously large patterns. */
#ifndef MAX_NAME_SIZE
#define MAX_NAME_SIZE 32
#endif
/* The value of NEWLINE determines the newline character sequence. On systems
that support it, "configure" can be used to override the default, which is
10. The possible values are 10 (LF), 13 (CR), 3338 (CRLF), -1 (ANY), or -2
(ANYCRLF). */
#ifndef NEWLINE
#define NEWLINE 10
#endif
/* PCRE uses recursive function calls to handle backtracking while matching.
This can sometimes be a problem on systems that have stacks of limited
size. Define NO_RECURSE to get a version that doesn't use recursion in the
match() function; instead it creates its own stack by steam using
pcre_recurse_malloc() to obtain memory from the heap. For more detail, see
the comments and other stuff just above the match() function. On systems
that support it, "configure" can be used to set this in the Makefile (use
--disable-stack-for-recursion). */
/* #undef NO_RECURSE */
/* Name of package */
#define PACKAGE "pcre"
/* Define to the address where bug reports for this package should be sent. */
#define PACKAGE_BUGREPORT ""
/* Define to the full name of this package. */
#define PACKAGE_NAME "PCRE"
/* Define to the full name and version of this package. */
#define PACKAGE_STRING "PCRE 7.7"
/* Define to the one symbol short name of this package. */
#define PACKAGE_TARNAME "pcre"
/* Define to the version of this package. */
#define PACKAGE_VERSION "7.7"
/* If you are compiling for a system other than a Unix-like system or
Win32, and it needs some magic to be inserted before the definition
of a function that is exported by the library, define this macro to
contain the relevant magic. If you do not define this macro, it
defaults to "extern" for a C compiler and "extern C" for a C++
compiler on non-Win32 systems. This macro apears at the start of
every exported function that is part of the external API. It does
not appear on functions that are "external" in the C sense, but
which are internal to the library. */
/* #undef PCRE_EXP_DEFN */
/* Define if linking statically (TODO: make nice with Libtool) */
/* #undef PCRE_STATIC */
/* When calling PCRE via the POSIX interface, additional working storage is
required for holding the pointers to capturing substrings because PCRE
requires three integers per substring, whereas the POSIX interface provides
only two. If the number of expected substrings is small, the wrapper
function uses space on the stack, because this is faster than using
malloc() for each call. The threshold above which the stack is no longer
used is defined by POSIX_MALLOC_THRESHOLD. On systems that support it,
"configure" can be used to override this default. */
#ifndef POSIX_MALLOC_THRESHOLD
#define POSIX_MALLOC_THRESHOLD 10
#endif
/* PCRE uses recursive function calls to handle backtracking while matching.
This can sometimes be a problem on systems that have stacks of limited size.
Define NO_RECURSE to get a version that doesn't use recursion in the match()
function; instead it creates its own stack by steam using pcre_recurse_malloc
to get memory. For more detail, see comments and other stuff just above the
match() function. On Unix systems, "configure" can be used to set this in the
Makefile (use --disable-stack-for-recursion). */
/* #define NO_RECURSE */
/* xHarbour stuff - default definitions */
#ifndef SUPPORT_UTF8
#define SUPPORT_UTF8
/* Define to 1 if you have the ANSI C header files. */
#ifndef STDC_HEADERS
#define STDC_HEADERS 1
#endif
#ifndef SUPPORT_UCP
#define SUPPORT_UCP
/* Define to allow pcregrep to be linked with libbz2, so that it is able to
handle .bz2 files. */
/* #undef SUPPORT_LIBBZ2 */
/* Define to allow pcretest to be linked with libreadline. */
/* #undef SUPPORT_LIBREADLINE */
/* Define to allow pcregrep to be linked with libz, so that it is able to
handle .gz files. */
/* #undef SUPPORT_LIBZ */
/* Define to enable support for Unicode properties */
/* #undef SUPPORT_UCP */
/* Define to enable support for the UTF-8 Unicode encoding. */
/* #undef SUPPORT_UTF8 */
/* Version number of package */
#ifndef VERSION
#define VERSION "7.7"
#endif
/* End */
/* Define to empty if `const' does not conform to ANSI C. */
/* #undef const */
/* Define to `unsigned int' if <sys/types.h> does not define. */
/* #undef size_t */

View File

@@ -1,173 +0,0 @@
/*************************************************
* Perl-Compatible Regular Expressions *
*************************************************/
/* PCRE is a library of functions to support regular expressions whose syntax
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Copyright (c) 1997-2005 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the University of Cambridge nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
-----------------------------------------------------------------------------
*/
/* This is a freestanding support program to generate a file containing default
character tables for PCRE. The tables are built according to the default C
locale. Now that pcre_maketables is a function visible to the outside world, we
make use of its code from here in order to be consistent. */
#include <ctype.h>
#include <stdio.h>
#include <string.h>
#include "pcreinal.h"
#define DFTABLES /* pcremktb.c notices this */
#include "pcremktb.c"
int main(int argc, char **argv)
{
int i;
FILE *f;
const unsigned char *tables = pcre_maketables();
const unsigned char *base_of_tables = tables;
if (argc != 2)
{
fprintf(stderr, "dftables: one filename argument is required\n");
return 1;
}
f = fopen(argv[1], "wb");
if (f == NULL)
{
fprintf(stderr, "dftables: failed to open %s for writing\n", argv[1]);
return 1;
}
/* There are two fprintf() calls here, because gcc in pedantic mode complains
about the very long string otherwise. */
fprintf(f,
"/*************************************************\n"
"* Perl-Compatible Regular Expressions *\n"
"*************************************************/\n\n"
"/* This file is automatically written by the dftables auxiliary \n"
"program. If you edit it by hand, you might like to edit the Makefile to \n"
"prevent its ever being regenerated.\n\n");
fprintf(f,
"This file contains the default tables for characters with codes less than\n"
"128 (ASCII characters). These tables are used when no external tables are\n"
"passed to PCRE. */\n\n"
"extern const unsigned char _pcre_default_tables[];\n"
"const unsigned char _pcre_default_tables[] = {\n\n"
"/* This table is a lower casing table. */\n\n");
fprintf(f, " ");
for (i = 0; i < 256; i++)
{
if ((i & 7) == 0 && i != 0) fprintf(f, "\n ");
fprintf(f, "%3d", *tables++);
if (i != 255) fprintf(f, ",");
}
fprintf(f, ",\n\n");
fprintf(f, "/* This table is a case flipping table. */\n\n");
fprintf(f, " ");
for (i = 0; i < 256; i++)
{
if ((i & 7) == 0 && i != 0) fprintf(f, "\n ");
fprintf(f, "%3d", *tables++);
if (i != 255) fprintf(f, ",");
}
fprintf(f, ",\n\n");
fprintf(f,
"/* This table contains bit maps for various character classes.\n"
"Each map is 32 bytes long and the bits run from the least\n"
"significant end of each byte. The classes that have their own\n"
"maps are: space, xdigit, digit, upper, lower, word, graph\n"
"print, punct, and cntrl. Other classes are built from combinations. */\n\n");
fprintf(f, " ");
for (i = 0; i < cbit_length; i++)
{
if ((i & 7) == 0 && i != 0)
{
if ((i & 31) == 0) fprintf(f, "\n");
fprintf(f, "\n ");
}
fprintf(f, "0x%02x", *tables++);
if (i != cbit_length - 1) fprintf(f, ",");
}
fprintf(f, ",\n\n");
fprintf(f,
"/* This table identifies various classes of character by individual bits:\n"
" 0x%02x white space character\n"
" 0x%02x letter\n"
" 0x%02x decimal digit\n"
" 0x%02x hexadecimal digit\n"
" 0x%02x alphanumeric or '_'\n"
" 0x%02x regular expression metacharacter or binary zero\n*/\n\n",
ctype_space, ctype_letter, ctype_digit, ctype_xdigit, ctype_word,
ctype_meta);
fprintf(f, " ");
for (i = 0; i < 256; i++)
{
if ((i & 7) == 0 && i != 0)
{
fprintf(f, " /* ");
if (isprint(i-8)) fprintf(f, " %c -", i-8);
else fprintf(f, "%3d-", i-8);
if (isprint(i-1)) fprintf(f, " %c ", i-1);
else fprintf(f, "%3d", i-1);
fprintf(f, " */\n ");
}
fprintf(f, "0x%02x", *tables++);
if (i != 255) fprintf(f, ",");
}
fprintf(f, "};/* ");
if (isprint(i-8)) fprintf(f, " %c -", i-8);
else fprintf(f, "%3d-", i-8);
if (isprint(i-1)) fprintf(f, " %c ", i-1);
else fprintf(f, "%3d", i-1);
fprintf(f, " */\n\n/* End of chartabs.c */\n");
fclose(f);
free((void *)base_of_tables);
return 0;
}
/* End of dftables.c */

View File

@@ -2,10 +2,10 @@
* Perl-Compatible Regular Expressions *
*************************************************/
/* In its original form, this is the .in file that is transformed by
"configure" into pcre.h.
/* This is the public header file for the PCRE library, to be #included by
applications that call the PCRE functions.
Copyright (c) 1997-2005 University of Cambridge
Copyright (c) 1997-2008 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -39,34 +39,48 @@ POSSIBILITY OF SUCH DAMAGE.
#ifndef _PCRE_H
#define _PCRE_H
/* The file pcre.h is build by "configure". Do not edit it; instead
make changes to pcre.in. */
/* The current PCRE version information. */
#define PCRE_MAJOR 6
#define PCRE_MINOR 3
#define PCRE_DATE 15-Aug-2005
#define PCRE_MAJOR 7
#define PCRE_MINOR 7
#define PCRE_PRERELEASE
#define PCRE_DATE 2008-05-07
/* Win32 uses DLL by default; it needs special stuff for exported functions. */
/* When an application links to a PCRE DLL in Windows, the symbols that are
imported have to be identified as such. When building PCRE, the appropriate
export setting is defined in pcre_internal.h, which includes this file. So we
don't change existing definitions of PCRE_EXP_DECL and PCRECPP_EXP_DECL. */
#ifdef _WIN32
# ifdef PCRE_DEFINITION
# ifdef DLL_EXPORT
# define PCRE_DATA_SCOPE __declspec(dllexport)
#if defined(_WIN32) && !defined(PCRE_STATIC)
# ifndef PCRE_EXP_DECL
# define PCRE_EXP_DECL extern __declspec(dllimport)
# endif
# ifdef __cplusplus
# ifndef PCRECPP_EXP_DECL
# define PCRECPP_EXP_DECL extern __declspec(dllimport)
# endif
# else
# ifndef PCRE_STATIC
# define PCRE_DATA_SCOPE extern __declspec(dllimport)
# ifndef PCRECPP_EXP_DEFN
# define PCRECPP_EXP_DEFN __declspec(dllimport)
# endif
# endif
#endif
/* For other operating systems, we use the standard "extern". */
/* By default, we use the standard "extern" declarations. */
#ifndef PCRE_DATA_SCOPE
#ifndef PCRE_EXP_DECL
# ifdef __cplusplus
# define PCRE_DATA_SCOPE extern "C"
# define PCRE_EXP_DECL extern "C"
# else
# define PCRE_DATA_SCOPE extern
# define PCRE_EXP_DECL extern
# endif
#endif
#ifdef __cplusplus
# ifndef PCRECPP_EXP_DECL
# define PCRECPP_EXP_DECL extern
# endif
# ifndef PCRECPP_EXP_DEFN
# define PCRECPP_EXP_DEFN
# endif
#endif
@@ -102,6 +116,15 @@ extern "C" {
#define PCRE_DFA_SHORTEST 0x00010000
#define PCRE_DFA_RESTART 0x00020000
#define PCRE_FIRSTLINE 0x00040000
#define PCRE_DUPNAMES 0x00080000
#define PCRE_NEWLINE_CR 0x00100000
#define PCRE_NEWLINE_LF 0x00200000
#define PCRE_NEWLINE_CRLF 0x00300000
#define PCRE_NEWLINE_ANY 0x00400000
#define PCRE_NEWLINE_ANYCRLF 0x00500000
#define PCRE_BSR_ANYCRLF 0x00800000
#define PCRE_BSR_UNICODE 0x01000000
#define PCRE_JAVASCRIPT_COMPAT 0x02000000
/* Exec-time and get/set-time error codes */
@@ -109,7 +132,8 @@ extern "C" {
#define PCRE_ERROR_NULL (-2)
#define PCRE_ERROR_BADOPTION (-3)
#define PCRE_ERROR_BADMAGIC (-4)
#define PCRE_ERROR_UNKNOWN_NODE (-5)
#define PCRE_ERROR_UNKNOWN_OPCODE (-5)
#define PCRE_ERROR_UNKNOWN_NODE (-5) /* For backward compatibility */
#define PCRE_ERROR_NOMEMORY (-6)
#define PCRE_ERROR_NOSUBSTRING (-7)
#define PCRE_ERROR_MATCHLIMIT (-8)
@@ -125,6 +149,9 @@ extern "C" {
#define PCRE_ERROR_DFA_UMLIMIT (-18)
#define PCRE_ERROR_DFA_WSSIZE (-19)
#define PCRE_ERROR_DFA_RECURSE (-20)
#define PCRE_ERROR_RECURSIONLIMIT (-21)
#define PCRE_ERROR_NULLWSLIMIT (-22) /* No longer actually used */
#define PCRE_ERROR_BADNEWLINE (-23)
/* Request types for pcre_fullinfo() */
@@ -141,8 +168,12 @@ extern "C" {
#define PCRE_INFO_NAMETABLE 9
#define PCRE_INFO_STUDYSIZE 10
#define PCRE_INFO_DEFAULT_TABLES 11
#define PCRE_INFO_OKPARTIAL 12
#define PCRE_INFO_JCHANGED 13
#define PCRE_INFO_HASCRORLF 14
/* Request types for pcre_config() */
/* Request types for pcre_config(). Do not re-arrange, in order to remain
compatible. */
#define PCRE_CONFIG_UTF8 0
#define PCRE_CONFIG_NEWLINE 1
@@ -151,19 +182,31 @@ extern "C" {
#define PCRE_CONFIG_MATCH_LIMIT 4
#define PCRE_CONFIG_STACKRECURSE 5
#define PCRE_CONFIG_UNICODE_PROPERTIES 6
#define PCRE_CONFIG_MATCH_LIMIT_RECURSION 7
#define PCRE_CONFIG_BSR 8
/* Bit flags for the pcre_extra structure */
/* Bit flags for the pcre_extra structure. Do not re-arrange or redefine
these bits, just add new ones on the end, in order to remain compatible. */
#define PCRE_EXTRA_STUDY_DATA 0x0001
#define PCRE_EXTRA_MATCH_LIMIT 0x0002
#define PCRE_EXTRA_CALLOUT_DATA 0x0004
#define PCRE_EXTRA_TABLES 0x0008
#define PCRE_EXTRA_STUDY_DATA 0x0001
#define PCRE_EXTRA_MATCH_LIMIT 0x0002
#define PCRE_EXTRA_CALLOUT_DATA 0x0004
#define PCRE_EXTRA_TABLES 0x0008
#define PCRE_EXTRA_MATCH_LIMIT_RECURSION 0x0010
/* Types */
struct real_pcre; /* declaration; the definition is private */
typedef struct real_pcre pcre;
/* When PCRE is compiled as a C++ library, the subject pointer type can be
replaced with a custom type. For conventional use, the public interface is a
const char *. */
#ifndef PCRE_SPTR
#define PCRE_SPTR const char *
#endif
/* The structure for passing additional data to pcre_exec(). This is defined in
such as way as to be extensible. Always add new fields at the end, in order to
remain compatible. */
@@ -174,6 +217,7 @@ typedef struct pcre_extra {
unsigned long int match_limit; /* Maximum number of calls to match() */
void *callout_data; /* Data passed back in callouts */
const unsigned char *tables; /* Pointer to character tables */
unsigned long int match_limit_recursion; /* Max recursive calls to match() */
} pcre_extra;
/* The structure for passing out data via the pcre_callout_function. We use a
@@ -186,7 +230,7 @@ typedef struct pcre_callout_block {
/* ------------------------ Version 0 ------------------------------- */
int callout_number; /* Number compiled into pattern */
int *offset_vector; /* The offset vector */
const char *subject; /* The subject being matched */
PCRE_SPTR subject; /* The subject being matched */
int subject_length; /* The length of the subject */
int start_match; /* Offset to start of this match attempt */
int current_position; /* Where we currently are in the subject */
@@ -206,50 +250,52 @@ that is triggered by the (?) regex item. For Virtual Pascal, these definitions
have to take another form. */
#ifndef VPCOMPAT
PCRE_DATA_SCOPE void *(*pcre_malloc)(size_t);
PCRE_DATA_SCOPE void (*pcre_free)(void *);
PCRE_DATA_SCOPE void *(*pcre_stack_malloc)(size_t);
PCRE_DATA_SCOPE void (*pcre_stack_free)(void *);
PCRE_DATA_SCOPE int (*pcre_callout)(pcre_callout_block *);
PCRE_EXP_DECL void *(*pcre_malloc)(size_t);
PCRE_EXP_DECL void (*pcre_free)(void *);
PCRE_EXP_DECL void *(*pcre_stack_malloc)(size_t);
PCRE_EXP_DECL void (*pcre_stack_free)(void *);
PCRE_EXP_DECL int (*pcre_callout)(pcre_callout_block *);
#else /* VPCOMPAT */
PCRE_DATA_SCOPE void *pcre_malloc(size_t);
PCRE_DATA_SCOPE void pcre_free(void *);
PCRE_DATA_SCOPE void *pcre_stack_malloc(size_t);
PCRE_DATA_SCOPE void pcre_stack_free(void *);
PCRE_DATA_SCOPE int pcre_callout(pcre_callout_block *);
PCRE_EXP_DECL void *pcre_malloc(size_t);
PCRE_EXP_DECL void pcre_free(void *);
PCRE_EXP_DECL void *pcre_stack_malloc(size_t);
PCRE_EXP_DECL void pcre_stack_free(void *);
PCRE_EXP_DECL int pcre_callout(pcre_callout_block *);
#endif /* VPCOMPAT */
/* Exported PCRE functions */
PCRE_DATA_SCOPE pcre *pcre_compile(const char *, int, const char **, int *,
PCRE_EXP_DECL pcre *pcre_compile(const char *, int, const char **, int *,
const unsigned char *);
PCRE_DATA_SCOPE pcre *pcre_compile2(const char *, int, int *, const char **,
PCRE_EXP_DECL pcre *pcre_compile2(const char *, int, int *, const char **,
int *, const unsigned char *);
PCRE_DATA_SCOPE int pcre_config(int, void *);
PCRE_DATA_SCOPE int pcre_copy_named_substring(const pcre *, const char *,
PCRE_EXP_DECL int pcre_config(int, void *);
PCRE_EXP_DECL int pcre_copy_named_substring(const pcre *, const char *,
int *, int, const char *, char *, int);
PCRE_DATA_SCOPE int pcre_copy_substring(const char *, int *, int, int, char *,
PCRE_EXP_DECL int pcre_copy_substring(const char *, int *, int, int, char *,
int);
PCRE_DATA_SCOPE int pcre_dfa_exec(const pcre *, const pcre_extra *,
PCRE_EXP_DECL int pcre_dfa_exec(const pcre *, const pcre_extra *,
const char *, int, int, int, int *, int , int *, int);
PCRE_DATA_SCOPE int pcre_exec(const pcre *, const pcre_extra *, const char *,
PCRE_EXP_DECL int pcre_exec(const pcre *, const pcre_extra *, PCRE_SPTR,
int, int, int, int *, int);
PCRE_DATA_SCOPE void pcre_free_substring(const char *);
PCRE_DATA_SCOPE void pcre_free_substring_list(const char **);
PCRE_DATA_SCOPE int pcre_fullinfo(const pcre *, const pcre_extra *, int,
PCRE_EXP_DECL void pcre_free_substring(const char *);
PCRE_EXP_DECL void pcre_free_substring_list(const char **);
PCRE_EXP_DECL int pcre_fullinfo(const pcre *, const pcre_extra *, int,
void *);
PCRE_DATA_SCOPE int pcre_get_named_substring(const pcre *, const char *,
PCRE_EXP_DECL int pcre_get_named_substring(const pcre *, const char *,
int *, int, const char *, const char **);
PCRE_DATA_SCOPE int pcre_get_stringnumber(const pcre *, const char *);
PCRE_DATA_SCOPE int pcre_get_substring(const char *, int *, int, int,
PCRE_EXP_DECL int pcre_get_stringnumber(const pcre *, const char *);
PCRE_EXP_DECL int pcre_get_stringtable_entries(const pcre *, const char *,
char **, char **);
PCRE_EXP_DECL int pcre_get_substring(const char *, int *, int, int,
const char **);
PCRE_DATA_SCOPE int pcre_get_substring_list(const char *, int *, int,
PCRE_EXP_DECL int pcre_get_substring_list(const char *, int *, int,
const char ***);
PCRE_DATA_SCOPE int pcre_info(const pcre *, int *, int *);
PCRE_DATA_SCOPE const unsigned char *pcre_maketables(void);
PCRE_DATA_SCOPE int pcre_refcount(pcre *, int);
PCRE_DATA_SCOPE pcre_extra *pcre_study(const pcre *, int, const char **);
PCRE_DATA_SCOPE const char *pcre_version(void);
PCRE_EXP_DECL int pcre_info(const pcre *, int *, int *);
PCRE_EXP_DECL const unsigned char *pcre_maketables(void);
PCRE_EXP_DECL int pcre_refcount(pcre *, int);
PCRE_EXP_DECL pcre_extra *pcre_study(const pcre *, int, const char **);
PCRE_EXP_DECL const char *pcre_version(void);
#ifdef __cplusplus
} /* extern "C" */

File diff suppressed because it is too large Load Diff

View File

@@ -6,7 +6,7 @@
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Copyright (c) 1997-2005 University of Cambridge
Copyright (c) 1997-2008 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -41,6 +41,10 @@ POSSIBILITY OF SUCH DAMAGE.
/* This module contains the external function pcre_config(). */
#if 1
#include "_hbconf.h"
#endif
#include "pcreinal.h"
@@ -58,7 +62,7 @@ Arguments:
Returns: 0 if data returned, negative on error
*/
EXPORT int
PCRE_EXP_DEFN int
pcre_config(int what, void *where)
{
switch (what)
@@ -83,6 +87,14 @@ switch (what)
*((int *)where) = NEWLINE;
break;
case PCRE_CONFIG_BSR:
#ifdef BSR_ANYCRLF
*((int *)where) = 1;
#else
*((int *)where) = 0;
#endif
break;
case PCRE_CONFIG_LINK_SIZE:
*((int *)where) = LINK_SIZE;
break;
@@ -95,6 +107,10 @@ switch (what)
*((unsigned int *)where) = MATCH_LIMIT;
break;
case PCRE_CONFIG_MATCH_LIMIT_RECURSION:
*((unsigned int *)where) = MATCH_LIMIT_RECURSION;
break;
case PCRE_CONFIG_STACKRECURSE:
#ifdef NO_RECURSE
*((int *)where) = 0;
@@ -109,4 +125,4 @@ switch (what)
return 0;
}
/* End of pcreconf.c */
/* End of pcre_config.c */

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -1,12 +1,12 @@
/*************************************************
* libucp - Unicode Property Table handler *
* Perl-Compatible Regular Expressions *
*************************************************/
/* Copyright (c) University of Cambridge 2004 */
/* PCRE is a library of functions to support regular expressions whose syntax
and semantics are as close as possible to those of the Perl 5 language.
/* This little library provides a fast way of obtaining the basic Unicode
properties of a character, using a compact binary tree that occupies less than
100K bytes.
Written by Philip Hazel
Copyright (c) 1997-2008 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -37,117 +37,143 @@ POSSIBILITY OF SUCH DAMAGE.
-----------------------------------------------------------------------------
*/
/* This module contains code for searching the table of Unicode character
properties. */
#if 1
#include "_hbconf.h"
#endif
#include "pcreinal.h"
#include "ucp.h" /* Exported interface */
#include "ucpinter.h" /* Internal table details */
#include "ucptable.c" /* The table itself */
#include "ucp.h" /* Category definitions */
#include "ucpinter.h" /* Internal table details */
#include "ucptable.h" /* The table itself */
/* In some environments, external functions have to be preceded by some magic.
In my world (Unix), they do not. Use a macro to deal with this. */
/* Table to translate from particular type value to the general value. */
#ifndef EXPORT
#define EXPORT
#endif
static const int ucp_gentype[] = {
ucp_C, ucp_C, ucp_C, ucp_C, ucp_C, /* Cc, Cf, Cn, Co, Cs */
ucp_L, ucp_L, ucp_L, ucp_L, ucp_L, /* Ll, Lu, Lm, Lo, Lt */
ucp_M, ucp_M, ucp_M, /* Mc, Me, Mn */
ucp_N, ucp_N, ucp_N, /* Nd, Nl, No */
ucp_P, ucp_P, ucp_P, ucp_P, ucp_P, /* Pc, Pd, Pe, Pf, Pi */
ucp_P, ucp_P, /* Ps, Po */
ucp_S, ucp_S, ucp_S, ucp_S, /* Sc, Sk, Sm, So */
ucp_Z, ucp_Z, ucp_Z /* Zl, Zp, Zs */
};
/*************************************************
* Search table and return data *
* Search table and return type *
*************************************************/
/* Two values are returned: the category is ucp_C, ucp_L, etc. The detailed
character type is ucp_Lu, ucp_Nd, etc.
/* Three values are returned: the category is ucp_C, ucp_L, etc. The detailed
character type is ucp_Lu, ucp_Nd, etc. The script is ucp_Latin, etc.
Arguments:
c the character value
type_ptr the detailed character type is returned here
case_ptr for letters, the opposite case is returned here, if there
is one, else zero
script_ptr the script is returned here
Returns: the character type category or -1 if not found
Returns: the character type category
*/
EXPORT int
ucp_findchar(const int c, int *type_ptr, int *case_ptr)
int
_pcre_ucp_findprop(const unsigned int c, int *type_ptr, int *script_ptr)
{
cnode *node = ucp_table;
register int cc = c;
int case_offset;
int bot = 0;
int top = sizeof(ucp_table)/sizeof(cnode);
int mid;
/* The table is searched using a binary chop. You might think that using
intermediate variables to hold some of the common expressions would speed
things up, but tests with gcc 3.4.4 on Linux showed that, on the contrary, it
makes things a lot slower. */
for (;;)
{
register int d = node->f1 | ((node->f0 & f0_chhmask) << 16);
if (cc == d) break;
if (cc < d)
if (top <= bot)
{
if ((node->f0 & f0_leftexists) == 0) return -1;
node ++;
*type_ptr = ucp_Cn;
*script_ptr = ucp_Common;
return ucp_C;
}
mid = (bot + top) >> 1;
if (c == (ucp_table[mid].f0 & f0_charmask)) break;
if (c < (ucp_table[mid].f0 & f0_charmask)) top = mid;
else
{
register int roffset = (node->f2 & f2_rightmask) >> f2_rightshift;
if (roffset == 0) return -1;
node += 1 << (roffset - 1);
if ((ucp_table[mid].f0 & f0_rangeflag) != 0 &&
c <= (ucp_table[mid].f0 & f0_charmask) +
(ucp_table[mid].f1 & f1_rangemask)) break;
bot = mid + 1;
}
}
switch ((*type_ptr = ((node->f0 & f0_typemask) >> f0_typeshift)))
{
case ucp_Cc:
case ucp_Cf:
case ucp_Cn:
case ucp_Co:
case ucp_Cs:
return ucp_C;
/* Found an entry in the table. Set the script and detailed type values, and
return the general type. */
case ucp_Ll:
case ucp_Lu:
case_offset = node->f2 & f2_casemask;
if ((case_offset & 0x0100) != 0) case_offset |= 0xfffff000;
*case_ptr = (case_offset == 0)? 0 : cc + case_offset;
return ucp_L;
*script_ptr = (ucp_table[mid].f0 & f0_scriptmask) >> f0_scriptshift;
*type_ptr = (ucp_table[mid].f1 & f1_typemask) >> f1_typeshift;
case ucp_Lm:
case ucp_Lo:
case ucp_Lt:
*case_ptr = 0;
return ucp_L;
case ucp_Mc:
case ucp_Me:
case ucp_Mn:
return ucp_M;
case ucp_Nd:
case ucp_Nl:
case ucp_No:
return ucp_N;
case ucp_Pc:
case ucp_Pd:
case ucp_Pe:
case ucp_Pf:
case ucp_Pi:
case ucp_Ps:
case ucp_Po:
return ucp_P;
case ucp_Sc:
case ucp_Sk:
case ucp_Sm:
case ucp_So:
return ucp_S;
case ucp_Zl:
case ucp_Zp:
case ucp_Zs:
return ucp_Z;
default: /* "Should never happen" */
return -1;
}
return ucp_gentype[*type_ptr];
}
/* End of pcrefind.c */
/*************************************************
* Search table and return other case *
*************************************************/
/* If the given character is a letter, and there is another case for the
letter, return the other case. Otherwise, return -1.
Arguments:
c the character value
Returns: the other case or NOTACHAR if none
*/
unsigned int
_pcre_ucp_othercase(const unsigned int c)
{
int bot = 0;
int top = sizeof(ucp_table)/sizeof(cnode);
int mid, offset;
/* The table is searched using a binary chop. You might think that using
intermediate variables to hold some of the common expressions would speed
things up, but tests with gcc 3.4.4 on Linux showed that, on the contrary, it
makes things a lot slower. */
for (;;)
{
if (top <= bot) return -1;
mid = (bot + top) >> 1;
if (c == (ucp_table[mid].f0 & f0_charmask)) break;
if (c < (ucp_table[mid].f0 & f0_charmask)) top = mid;
else
{
if ((ucp_table[mid].f0 & f0_rangeflag) != 0 &&
c <= (ucp_table[mid].f0 & f0_charmask) +
(ucp_table[mid].f1 & f1_rangemask)) break;
bot = mid + 1;
}
}
/* Found an entry in the table. Return NOTACHAR for a range entry. Otherwise
return the other case if there is one, else NOTACHAR. */
if ((ucp_table[mid].f0 & f0_rangeflag) != 0) return NOTACHAR;
offset = ucp_table[mid].f1 & f1_casemask;
if ((offset & f1_caseneg) != 0) offset |= f1_caseneg;
return (offset == 0)? NOTACHAR : c + offset;
}
/* End of pcre_ucp_searchfuncs.c */

View File

@@ -2,11 +2,11 @@
* Perl-Compatible Regular Expressions *
*************************************************/
/*PCRE is a library of functions to support regular expressions whose syntax
/* PCRE is a library of functions to support regular expressions whose syntax
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Copyright (c) 1997-2005 University of Cambridge
Copyright (c) 1997-2008 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -42,6 +42,10 @@ POSSIBILITY OF SUCH DAMAGE.
information about a compiled pattern. */
#if 1
#include "_hbconf.h"
#endif
#include "pcreinal.h"
@@ -61,7 +65,7 @@ Arguments:
Returns: 0 if data returned, negative on error
*/
EXPORT int
PCRE_EXP_DEFN int
pcre_fullinfo(const pcre *argument_re, const pcre_extra *extra_data, int what,
void *where)
{
@@ -106,8 +110,8 @@ switch (what)
case PCRE_INFO_FIRSTBYTE:
*((int *)where) =
((re->options & PCRE_FIRSTSET) != 0)? re->first_byte :
((re->options & PCRE_STARTLINE) != 0)? -1 : -2;
((re->flags & PCRE_FIRSTSET) != 0)? re->first_byte :
((re->flags & PCRE_STARTLINE) != 0)? -1 : -2;
break;
/* Make sure we pass back the pointer to the bit vector in the external
@@ -121,7 +125,7 @@ switch (what)
case PCRE_INFO_LASTLITERAL:
*((int *)where) =
((re->options & PCRE_REQCHSET) != 0)? re->req_byte : -1;
((re->flags & PCRE_REQCHSET) != 0)? re->req_byte : -1;
break;
case PCRE_INFO_NAMEENTRYSIZE:
@@ -140,10 +144,22 @@ switch (what)
*((const uschar **)where) = (const uschar *)(_pcre_default_tables);
break;
case PCRE_INFO_OKPARTIAL:
*((int *)where) = (re->flags & PCRE_NOPARTIAL) == 0;
break;
case PCRE_INFO_JCHANGED:
*((int *)where) = (re->flags & PCRE_JCHANGED) != 0;
break;
case PCRE_INFO_HASCRORLF:
*((int *)where) = (re->flags & PCRE_HASCRORLF) != 0;
break;
default: return PCRE_ERROR_BADOPTION;
}
return 0;
}
/* End of pcrefinf.c */
/* End of pcre_fullinfo.c */

View File

@@ -6,7 +6,7 @@
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Copyright (c) 1997-2005 University of Cambridge
Copyright (c) 1997-2008 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -43,6 +43,10 @@ from the subject string after a regex match has succeeded. The original idea
for these functions came from Scott Wimer. */
#if 1
#include "_hbconf.h"
#endif
#include "pcreinal.h"
@@ -50,8 +54,8 @@ for these functions came from Scott Wimer. */
* Find number for named string *
*************************************************/
/* This function is used by the two extraction functions below, as well
as being generally available.
/* This function is used by the get_first_set() function below, as well
as being generally available. It assumes that names are unique.
Arguments:
code the compiled regex
@@ -93,6 +97,113 @@ return PCRE_ERROR_NOSUBSTRING;
/*************************************************
* Find (multiple) entries for named string *
*************************************************/
/* This is used by the get_first_set() function below, as well as being
generally available. It is used when duplicated names are permitted.
Arguments:
code the compiled regex
stringname the name whose entries required
firstptr where to put the pointer to the first entry
lastptr where to put the pointer to the last entry
Returns: the length of each entry, or a negative number
(PCRE_ERROR_NOSUBSTRING) if not found
*/
int
pcre_get_stringtable_entries(const pcre *code, const char *stringname,
char **firstptr, char **lastptr)
{
int rc;
int entrysize;
int top, bot;
uschar *nametable, *lastentry;
if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0)
return rc;
if (top <= 0) return PCRE_ERROR_NOSUBSTRING;
if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize)) != 0)
return rc;
if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0)
return rc;
lastentry = nametable + entrysize * (top - 1);
bot = 0;
while (top > bot)
{
int mid = (top + bot) / 2;
uschar *entry = nametable + entrysize*mid;
int c = strcmp(stringname, (char *)(entry + 2));
if (c == 0)
{
uschar *first = entry;
uschar *last = entry;
while (first > nametable)
{
if (strcmp(stringname, (char *)(first - entrysize + 2)) != 0) break;
first -= entrysize;
}
while (last < lastentry)
{
if (strcmp(stringname, (char *)(last + entrysize + 2)) != 0) break;
last += entrysize;
}
*firstptr = (char *)first;
*lastptr = (char *)last;
return entrysize;
}
if (c > 0) bot = mid + 1; else top = mid;
}
return PCRE_ERROR_NOSUBSTRING;
}
/*************************************************
* Find first set of multiple named strings *
*************************************************/
/* This function allows for duplicate names in the table of named substrings.
It returns the number of the first one that was set in a pattern match.
Arguments:
code the compiled regex
stringname the name of the capturing substring
ovector the vector of matched substrings
Returns: the number of the first that is set,
or the number of the last one if none are set,
or a negative number on error
*/
static int
get_first_set(const pcre *code, const char *stringname, int *ovector)
{
const real_pcre *re = (const real_pcre *)code;
int entrysize;
char *first, *last;
uschar *entry;
if ((re->options & PCRE_DUPNAMES) == 0 && (re->flags & PCRE_JCHANGED) == 0)
return pcre_get_stringnumber(code, stringname);
entrysize = pcre_get_stringtable_entries(code, stringname, &first, &last);
if (entrysize <= 0) return entrysize;
for (entry = (uschar *)first; entry <= (uschar *)last; entry += entrysize)
{
int n = (entry[0] << 8) + entry[1];
if (ovector[n*2] >= 0) return n;
}
return (first[0] << 8) + first[1];
}
/*************************************************
* Copy captured string to given buffer *
*************************************************/
@@ -142,7 +253,8 @@ return yield;
*************************************************/
/* This function copies a single captured substring into a given buffer,
identifying it by name.
identifying it by name. If the regex permits duplicate names, the first
substring that is set is chosen.
Arguments:
code the compiled regex
@@ -168,7 +280,7 @@ int
pcre_copy_named_substring(const pcre *code, const char *subject, int *ovector,
int stringcount, const char *stringname, char *buffer, int size)
{
int n = pcre_get_stringnumber(code, stringname);
int n = get_first_set(code, stringname, ovector);
if (n <= 0) return n;
return pcre_copy_substring(subject, ovector, stringcount, n, buffer, size);
}
@@ -299,7 +411,8 @@ return yield;
*************************************************/
/* This function copies a single captured substring, identified by name, into
new store.
new store. If the regex permits duplicate names, the first substring that is
set is chosen.
Arguments:
code the compiled regex
@@ -324,7 +437,7 @@ int
pcre_get_named_substring(const pcre *code, const char *subject, int *ovector,
int stringcount, const char *stringname, const char **stringptr)
{
int n = pcre_get_stringnumber(code, stringname);
int n = get_first_set(code, stringname, ovector);
if (n <= 0) return n;
return pcre_get_substring(subject, ovector, stringcount, n, stringptr);
}
@@ -349,4 +462,4 @@ pcre_free_substring(const char *pointer)
(pcre_free)((void *)pointer);
}
/* End of pcreget.c */
/* End of pcre_get.c */

File diff suppressed because it is too large Load Diff

View File

@@ -6,7 +6,7 @@
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Copyright (c) 1997-2005 University of Cambridge
Copyright (c) 1997-2008 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -43,6 +43,10 @@ information about a compiled pattern. However, use of this function is now
deprecated, as it has been superseded by pcre_fullinfo(). */
#if 1
#include "_hbconf.h"
#endif
#include "pcreinal.h"
@@ -68,7 +72,7 @@ Returns: number of capturing subpatterns
or negative values on error
*/
EXPORT int
PCRE_EXP_DEFN int
pcre_info(const pcre *argument_re, int *optptr, int *first_byte)
{
real_pcre internal_re;
@@ -81,9 +85,9 @@ if (re->magic_number != MAGIC_NUMBER)
}
if (optptr != NULL) *optptr = (int)(re->options & PUBLIC_OPTIONS);
if (first_byte != NULL)
*first_byte = ((re->options & PCRE_FIRSTSET) != 0)? re->first_byte :
((re->options & PCRE_STARTLINE) != 0)? -1 : -2;
*first_byte = ((re->flags & PCRE_FIRSTSET) != 0)? re->first_byte :
((re->flags & PCRE_STARTLINE) != 0)? -1 : -2;
return re->top_bracket;
}
/* End of pcreinfo.c */
/* End of pcre_info.c */

View File

@@ -6,7 +6,7 @@
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Copyright (c) 1997-2005 University of Cambridge
Copyright (c) 1997-2008 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -45,7 +45,10 @@ compilation of dftables.c, in which case the macro DFTABLES is defined. */
#ifndef DFTABLES
#include "pcreinal.h"
# if 1
# include "_hbconf.h"
# endif
# include "pcreinal.h"
#endif
@@ -86,29 +89,22 @@ for (i = 0; i < 256; i++) *p++ = tolower(i);
for (i = 0; i < 256; i++) *p++ = islower(i)? toupper(i) : tolower(i);
/* Then the character class tables. Don't try to be clever and save effort
on exclusive ones - in some locales things may be different. Note that the
table for "space" includes everything "isspace" gives, including VT in the
default locale. This makes it work for the POSIX class [:space:]. */
/* Then the character class tables. Don't try to be clever and save effort on
exclusive ones - in some locales things may be different. Note that the table
for "space" includes everything "isspace" gives, including VT in the default
locale. This makes it work for the POSIX class [:space:]. Note also that it is
possible for a character to be alnum or alpha without being lower or upper,
such as "male and female ordinals" (\xAA and \xBA) in the fr_FR locale (at
least under Debian Linux's locales as of 12/2005). So we must test for alnum
specially. */
memset(p, 0, cbit_length);
for (i = 0; i < 256; i++)
{
if (isdigit(i))
{
p[cbit_digit + i/8] |= 1 << (i&7);
p[cbit_word + i/8] |= 1 << (i&7);
}
if (isupper(i))
{
p[cbit_upper + i/8] |= 1 << (i&7);
p[cbit_word + i/8] |= 1 << (i&7);
}
if (islower(i))
{
p[cbit_lower + i/8] |= 1 << (i&7);
p[cbit_word + i/8] |= 1 << (i&7);
}
if (isdigit(i)) p[cbit_digit + i/8] |= 1 << (i&7);
if (isupper(i)) p[cbit_upper + i/8] |= 1 << (i&7);
if (islower(i)) p[cbit_lower + i/8] |= 1 << (i&7);
if (isalnum(i)) p[cbit_word + i/8] |= 1 << (i&7);
if (i == '_') p[cbit_word + i/8] |= 1 << (i&7);
if (isspace(i)) p[cbit_space + i/8] |= 1 << (i&7);
if (isxdigit(i))p[cbit_xdigit + i/8] |= 1 << (i&7);
@@ -137,9 +133,11 @@ for (i = 0; i < 256; i++)
meta-character, which in this sense is any character that terminates a run
of data characters. */
if (strchr("*+?{^.$|()[", i) != 0) x += ctype_meta; *p++ = x; }
if (strchr("\\*+?{^.$|()[", i) != 0) x += ctype_meta;
*p++ = x;
}
return yield;
}
/* End of pcremktb.c */
/* End of pcre_maketables.c */

View File

@@ -0,0 +1,164 @@
/*************************************************
* Perl-Compatible Regular Expressions *
*************************************************/
/* PCRE is a library of functions to support regular expressions whose syntax
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Copyright (c) 1997-2008 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the University of Cambridge nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
-----------------------------------------------------------------------------
*/
/* This module contains internal functions for testing newlines when more than
one kind of newline is to be recognized. When a newline is found, its length is
returned. In principle, we could implement several newline "types", each
referring to a different set of newline characters. At present, PCRE supports
only NLTYPE_FIXED, which gets handled without these functions, NLTYPE_ANYCRLF,
and NLTYPE_ANY. The full list of Unicode newline characters is taken from
http://unicode.org/unicode/reports/tr18/. */
#if 1
#include "_hbconf.h"
#endif
#include "pcreinal.h"
/*************************************************
* Check for newline at given position *
*************************************************/
/* It is guaranteed that the initial value of ptr is less than the end of the
string that is being processed.
Arguments:
ptr pointer to possible newline
type the newline type
endptr pointer to the end of the string
lenptr where to return the length
utf8 TRUE if in utf8 mode
Returns: TRUE or FALSE
*/
BOOL
_pcre_is_newline(const uschar *ptr, int type, const uschar *endptr,
int *lenptr, BOOL utf8)
{
int c;
if (utf8) { GETCHAR(c, ptr); } else c = *ptr;
if (type == NLTYPE_ANYCRLF) switch(c)
{
case 0x000a: *lenptr = 1; return TRUE; /* LF */
case 0x000d: *lenptr = (ptr < endptr - 1 && ptr[1] == 0x0a)? 2 : 1;
return TRUE; /* CR */
default: return FALSE;
}
/* NLTYPE_ANY */
else switch(c)
{
case 0x000a: /* LF */
case 0x000b: /* VT */
case 0x000c: *lenptr = 1; return TRUE; /* FF */
case 0x000d: *lenptr = (ptr < endptr - 1 && ptr[1] == 0x0a)? 2 : 1;
return TRUE; /* CR */
case 0x0085: *lenptr = utf8? 2 : 1; return TRUE; /* NEL */
case 0x2028: /* LS */
case 0x2029: *lenptr = 3; return TRUE; /* PS */
default: return FALSE;
}
}
/*************************************************
* Check for newline at previous position *
*************************************************/
/* It is guaranteed that the initial value of ptr is greater than the start of
the string that is being processed.
Arguments:
ptr pointer to possible newline
type the newline type
startptr pointer to the start of the string
lenptr where to return the length
utf8 TRUE if in utf8 mode
Returns: TRUE or FALSE
*/
BOOL
_pcre_was_newline(const uschar *ptr, int type, const uschar *startptr,
int *lenptr, BOOL utf8)
{
int c;
ptr--;
#ifdef SUPPORT_UTF8
if (utf8)
{
BACKCHAR(ptr);
GETCHAR(c, ptr);
}
else c = *ptr;
#else /* no UTF-8 support */
c = *ptr;
#endif /* SUPPORT_UTF8 */
if (type == NLTYPE_ANYCRLF) switch(c)
{
case 0x000a: *lenptr = (ptr > startptr && ptr[-1] == 0x0d)? 2 : 1;
return TRUE; /* LF */
case 0x000d: *lenptr = 1; return TRUE; /* CR */
default: return FALSE;
}
else switch(c)
{
case 0x000a: *lenptr = (ptr > startptr && ptr[-1] == 0x0d)? 2 : 1;
return TRUE; /* LF */
case 0x000b: /* VT */
case 0x000c: /* FF */
case 0x000d: *lenptr = 1; return TRUE; /* CR */
case 0x0085: *lenptr = utf8? 2 : 1; return TRUE; /* NEL */
case 0x2028: /* LS */
case 0x2029: *lenptr = 3; return TRUE; /* PS */
default: return FALSE;
}
}
/* End of pcre_newline.c */

View File

@@ -6,7 +6,7 @@
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Copyright (c) 1997-2005 University of Cambridge
Copyright (c) 1997-2008 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -41,6 +41,9 @@ POSSIBILITY OF SUCH DAMAGE.
/* This file contains a private PCRE function that converts an ordinal
character value into a UTF8 string. */
#if 1
#include "_hbconf.h"
#endif
#include "pcreinal.h"
@@ -59,9 +62,10 @@ Arguments:
Returns: number of characters placed in the buffer
*/
EXPORT int
int
_pcre_ord2utf8(int cvalue, uschar *buffer)
{
#ifdef SUPPORT_UTF8
register int i, j;
for (i = 0; i < _pcre_utf8_table1_size; i++)
if (cvalue <= _pcre_utf8_table1[i]) break;
@@ -73,6 +77,9 @@ for (j = i; j > 0; j--)
}
*buffer = _pcre_utf8_table2[i] | cvalue;
return i + 1;
#else
return 0; /* Keep compiler happy; this function won't ever be */
#endif /* called when SUPPORT_UTF8 is not defined. */
}
/* End of pcreoutf.c */
/* End of pcre_ord2utf8.c */

View File

@@ -6,7 +6,7 @@
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Copyright (c) 1997-2005 University of Cambridge
Copyright (c) 1997-2008 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -38,17 +38,30 @@ POSSIBILITY OF SUCH DAMAGE.
*/
/* This module contains an PCRE private debugging function for printing out the
/* This module contains a PCRE private debugging function for printing out the
internal form of a compiled regular expression, along with some supporting
local functions. */
local functions. This source file is used in two places:
(1) It is #included by pcre_compile.c when it is compiled in debugging mode
(DEBUG defined in pcre_internal.h). It is not included in production compiles.
(2) It is always #included by pcretest.c, which can be asked to print out a
compiled regex for debugging purposes. */
#include "pcreinal.h"
/* Macro that decides whether a character should be output as a literal or in
hexadecimal. We don't use isprint() because that can vary from system to system
(even without the use of locales) and we want the output always to be the same,
for testing purposes. This macro is used in pcretest as well as in this file. */
#define PRINTABLE(c) ((c) >= 32 && (c) < 127)
/* The table of operator names. */
static const char *OP_names[] = { OP_NAME_LIST };
/*************************************************
* Print single- or multi-byte character *
*************************************************/
@@ -58,9 +71,15 @@ print_char(FILE *f, uschar *ptr, BOOL utf8)
{
int c = *ptr;
#ifndef SUPPORT_UTF8
utf8 = utf8; /* Avoid compiler warning */
if (PRINTABLE(c)) fprintf(f, "%c", c); else fprintf(f, "\\x%02x", c);
return 0;
#else
if (!utf8 || (c & 0xc0) != 0xc0)
{
if (isprint(c)) fprintf(f, "%c", c); else fprintf(f, "\\x%02x", c);
if (PRINTABLE(c)) fprintf(f, "%c", c); else fprintf(f, "\\x%02x", c);
return 0;
}
else
@@ -89,6 +108,7 @@ else
if (c < 128) fprintf(f, "\\x%02x", c); else fprintf(f, "\\x{%x}", c);
return a;
}
#endif
}
@@ -98,17 +118,19 @@ else
*************************************************/
static const char *
get_ucpname(int property)
get_ucpname(int ptype, int pvalue)
{
#ifdef SUPPORT_UCP
int i;
for (i = _pcre_utt_size; i >= 0; i--)
for (i = _pcre_utt_size - 1; i >= 0; i--)
{
if (property == _pcre_utt[i].value) break;
if (ptype == _pcre_utt[i].type && pvalue == _pcre_utt[i].value) break;
}
return (i >= 0)? _pcre_utt[i].name : "??";
return (i >= 0)? _pcre_utt_names + _pcre_utt[i].name_offset : "??";
#else
return "??";
/* It gets harder and harder to shut off unwanted compiler warnings. */
ptype = ptype * pvalue;
return (ptype == pvalue)? "??" : "??";
#endif
}
@@ -119,10 +141,13 @@ return "??";
*************************************************/
/* Make this function work for a regex with integers either byte order.
However, we assume that what we are passed is a compiled regex. */
However, we assume that what we are passed is a compiled regex. The
print_lengths flag controls whether offsets and lengths of items are printed.
They can be turned off from pcretest so that automatic tests on bytecode can be
written that do not depend on the value of LINK_SIZE. */
EXPORT void
_pcre_printint(pcre *external_re, FILE *f)
static void
pcre_printint(pcre *external_re, FILE *f, BOOL print_lengths)
{
real_pcre *re = (real_pcre *)external_re;
uschar *codestart, *code;
@@ -153,17 +178,10 @@ for(;;)
int c;
int extra = 0;
fprintf(f, "%3d ", (int)(code - codestart));
if (*code >= OP_BRA)
{
if (*code - OP_BRA > EXTRACT_BASIC_MAX)
fprintf(f, "%3d Bra extra\n", GET(code, 1));
else
fprintf(f, "%3d Bra %d\n", GET(code, 1), *code - OP_BRA);
code += _pcre_OP_lengths[OP_BRA];
continue;
}
if (print_lengths)
fprintf(f, "%3d ", (int)(code - codestart));
else
fprintf(f, " ");
switch(*code)
{
@@ -177,31 +195,36 @@ for(;;)
break;
case OP_CHAR:
fprintf(f, " ");
do
{
fprintf(f, " ");
do
{
code++;
code += 1 + print_char(f, code, utf8);
}
while (*code == OP_CHAR);
fprintf(f, "\n");
continue;
code++;
code += 1 + print_char(f, code, utf8);
}
while (*code == OP_CHAR);
fprintf(f, "\n");
continue;
case OP_CHARNC:
fprintf(f, " NC ");
do
{
fprintf(f, " NC ");
do
{
code++;
code += 1 + print_char(f, code, utf8);
}
while (*code == OP_CHARNC);
fprintf(f, "\n");
continue;
code++;
code += 1 + print_char(f, code, utf8);
}
while (*code == OP_CHARNC);
fprintf(f, "\n");
continue;
case OP_CBRA:
case OP_SCBRA:
if (print_lengths) fprintf(f, "%3d ", GET(code, 1));
else fprintf(f, " ");
fprintf(f, "%s %d", OP_names[*code], GET2(code, 1+LINK_SIZE));
break;
case OP_BRA:
case OP_SBRA:
case OP_KETRMAX:
case OP_KETRMIN:
case OP_ALT:
@@ -212,41 +235,55 @@ for(;;)
case OP_ASSERTBACK_NOT:
case OP_ONCE:
case OP_COND:
case OP_SCOND:
case OP_REVERSE:
fprintf(f, "%3d %s", GET(code, 1), OP_names[*code]);
break;
case OP_BRANUMBER:
printf("%3d %s", GET2(code, 1), OP_names[*code]);
if (print_lengths) fprintf(f, "%3d ", GET(code, 1));
else fprintf(f, " ");
fprintf(f, "%s", OP_names[*code]);
break;
case OP_CREF:
if (GET2(code, 1) == CREF_RECURSE)
fprintf(f, " Cond recurse");
fprintf(f, "%3d %s", GET2(code,1), OP_names[*code]);
break;
case OP_RREF:
c = GET2(code, 1);
if (c == RREF_ANY)
fprintf(f, " Cond recurse any");
else
fprintf(f, "%3d %s", GET2(code,1), OP_names[*code]);
fprintf(f, " Cond recurse %d", c);
break;
case OP_DEF:
fprintf(f, " Cond def");
break;
case OP_STAR:
case OP_MINSTAR:
case OP_POSSTAR:
case OP_PLUS:
case OP_MINPLUS:
case OP_POSPLUS:
case OP_QUERY:
case OP_MINQUERY:
case OP_POSQUERY:
case OP_TYPESTAR:
case OP_TYPEMINSTAR:
case OP_TYPEPOSSTAR:
case OP_TYPEPLUS:
case OP_TYPEMINPLUS:
case OP_TYPEPOSPLUS:
case OP_TYPEQUERY:
case OP_TYPEMINQUERY:
case OP_TYPEPOSQUERY:
fprintf(f, " ");
if (*code >= OP_TYPESTAR)
{
fprintf(f, "%s", OP_names[code[1]]);
if (code[1] == OP_PROP || code[1] == OP_NOTPROP)
{
fprintf(f, " %s ", get_ucpname(code[2]));
extra = 1;
fprintf(f, " %s ", get_ucpname(code[2], code[3]));
extra = 2;
}
}
else extra = print_char(f, code+1, utf8);
@@ -256,41 +293,50 @@ for(;;)
case OP_EXACT:
case OP_UPTO:
case OP_MINUPTO:
case OP_POSUPTO:
fprintf(f, " ");
extra = print_char(f, code+3, utf8);
fprintf(f, "{");
if (*code != OP_EXACT) fprintf(f, ",");
if (*code != OP_EXACT) fprintf(f, "0,");
fprintf(f, "%d}", GET2(code,1));
if (*code == OP_MINUPTO) fprintf(f, "?");
else if (*code == OP_POSUPTO) fprintf(f, "+");
break;
case OP_TYPEEXACT:
case OP_TYPEUPTO:
case OP_TYPEMINUPTO:
case OP_TYPEPOSUPTO:
fprintf(f, " %s", OP_names[code[3]]);
if (code[3] == OP_PROP || code[3] == OP_NOTPROP)
{
fprintf(f, " %s ", get_ucpname(code[4]));
extra = 1;
fprintf(f, " %s ", get_ucpname(code[4], code[5]));
extra = 2;
}
fprintf(f, "{");
if (*code != OP_TYPEEXACT) fprintf(f, "0,");
fprintf(f, "%d}", GET2(code,1));
if (*code == OP_TYPEMINUPTO) fprintf(f, "?");
else if (*code == OP_TYPEPOSUPTO) fprintf(f, "+");
break;
case OP_NOT:
if (isprint(c = code[1])) fprintf(f, " [^%c]", c);
c = code[1];
if (PRINTABLE(c)) fprintf(f, " [^%c]", c);
else fprintf(f, " [^\\x%02x]", c);
break;
case OP_NOTSTAR:
case OP_NOTMINSTAR:
case OP_NOTPOSSTAR:
case OP_NOTPLUS:
case OP_NOTMINPLUS:
case OP_NOTPOSPLUS:
case OP_NOTQUERY:
case OP_NOTMINQUERY:
if (isprint(c = code[1])) fprintf(f, " [^%c]", c);
case OP_NOTPOSQUERY:
c = code[1];
if (PRINTABLE(c)) fprintf(f, " [^%c]", c);
else fprintf(f, " [^\\x%02x]", c);
fprintf(f, "%s", OP_names[*code]);
break;
@@ -298,15 +344,20 @@ for(;;)
case OP_NOTEXACT:
case OP_NOTUPTO:
case OP_NOTMINUPTO:
if (isprint(c = code[3])) fprintf(f, " [^%c]{", c);
case OP_NOTPOSUPTO:
c = code[3];
if (PRINTABLE(c)) fprintf(f, " [^%c]{", c);
else fprintf(f, " [^\\x%02x]{", c);
if (*code != OP_NOTEXACT) fprintf(f, "0,");
fprintf(f, "%d}", GET2(code,1));
if (*code == OP_NOTMINUPTO) fprintf(f, "?");
else if (*code == OP_NOTPOSUPTO) fprintf(f, "+");
break;
case OP_RECURSE:
fprintf(f, "%3d %s", GET(code, 1), OP_names[*code]);
if (print_lengths) fprintf(f, "%3d ", GET(code, 1));
else fprintf(f, " ");
fprintf(f, "%s", OP_names[*code]);
break;
case OP_REF:
@@ -321,7 +372,7 @@ for(;;)
case OP_PROP:
case OP_NOTPROP:
fprintf(f, " %s %s", OP_names[*code], get_ucpname(code[1]));
fprintf(f, " %s %s", OP_names[*code], get_ucpname(code[1], code[2]));
break;
/* OP_XCLASS can only occur in UTF-8 mode. However, there's no harm in
@@ -362,12 +413,14 @@ for(;;)
for (j = i+1; j < 256; j++)
if ((ccode[j/8] & (1 << (j&7))) == 0) break;
if (i == '-' || i == ']') fprintf(f, "\\");
if (isprint(i)) fprintf(f, "%c", i); else fprintf(f, "\\x%02x", i);
if (PRINTABLE(i)) fprintf(f, "%c", i);
else fprintf(f, "\\x%02x", i);
if (--j > i)
{
if (j != i + 1) fprintf(f, "-");
if (j == '-' || j == ']') fprintf(f, "\\");
if (isprint(j)) fprintf(f, "%c", j); else fprintf(f, "\\x%02x", j);
if (PRINTABLE(j)) fprintf(f, "%c", j);
else fprintf(f, "\\x%02x", j);
}
i = j;
}
@@ -384,11 +437,15 @@ for(;;)
{
if (ch == XCL_PROP)
{
fprintf(f, "\\p{%s}", get_ucpname(*ccode++));
int ptype = *ccode++;
int pvalue = *ccode++;
fprintf(f, "\\p{%s}", get_ucpname(ptype, pvalue));
}
else if (ch == XCL_NOTPROP)
{
fprintf(f, "\\P{%s}", get_ucpname(*ccode++));
int ptype = *ccode++;
int pvalue = *ccode++;
fprintf(f, "\\P{%s}", get_ucpname(ptype, pvalue));
}
else
{
@@ -430,6 +487,12 @@ for(;;)
if (*ccode == OP_CRMINRANGE) fprintf(f, "?");
extra += _pcre_OP_lengths[*ccode];
break;
/* Do nothing if it's not a repeat; this code stops picky compilers
warning about the lack of a default code path. */
default:
break;
}
}
break;
@@ -446,4 +509,4 @@ for(;;)
}
}
/* End of pcreprni.c */
/* End of pcre_printint.src */

View File

@@ -6,7 +6,7 @@
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Copyright (c) 1997-2005 University of Cambridge
Copyright (c) 1997-2008 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -43,6 +43,11 @@ auxiliary function that can be used to maintain a reference count in a compiled
pattern data block. This might be helpful in applications where the block is
shared by different users. */
#if 1
#include "_hbconf.h"
#endif
#include "pcreinal.h"
@@ -63,7 +68,7 @@ Returns: the (possibly updated) count value (a non-negative number), or
a negative error number
*/
EXPORT int
PCRE_EXP_DEFN int
pcre_refcount(pcre *argument_re, int adjust)
{
real_pcre *re = (real_pcre *)argument_re;
@@ -74,4 +79,4 @@ re->ref_count = (-adjust > re->ref_count)? 0 :
return re->ref_count;
}
/* End of pcrerefc.c */
/* End of pcre_refcount.c */

View File

@@ -6,7 +6,7 @@
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Copyright (c) 1997-2005 University of Cambridge
Copyright (c) 1997-2008 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -42,9 +42,18 @@ POSSIBILITY OF SUCH DAMAGE.
supporting functions. */
#if 1
#include "_hbconf.h"
#endif
#include "pcreinal.h"
/* Returns from set_start_bits() */
enum { SSB_FAIL, SSB_DONE, SSB_CONTINUE };
/*************************************************
* Set a bit and maybe its alternate case *
*************************************************/
@@ -72,12 +81,16 @@ if (caseless && (cd->ctypes[c] & ctype_letter) != 0)
/*************************************************
* Create bitmap of starting chars *
* Create bitmap of starting bytes *
*************************************************/
/* This function scans a compiled unanchored expression and attempts to build a
bitmap of the set of initial characters. If it can't, it returns FALSE. As time
goes by, we may be able to get more clever at doing this.
/* This function scans a compiled unanchored expression recursively and
attempts to build a bitmap of the set of possible starting bytes. As time goes
by, we may be able to get more clever at doing this. The SSB_CONTINUE return is
useful for parenthesized groups in patterns such as (a*)b where the group
provides some optional starting bytes but scanning must continue at the outer
level to find at least one mandatory byte. At the outermost level, this
function fails unless the result is SSB_DONE.
Arguments:
code points to an expression
@@ -86,14 +99,24 @@ Arguments:
utf8 TRUE if in UTF-8 mode
cd the block with char table pointers
Returns: TRUE if table built, FALSE otherwise
Returns: SSB_FAIL => Failed to find any starting bytes
SSB_DONE => Found mandatory starting bytes
SSB_CONTINUE => Found optional starting bytes
*/
static BOOL
static int
set_start_bits(const uschar *code, uschar *start_bits, BOOL caseless,
BOOL utf8, compile_data *cd)
{
register int c;
int yield = SSB_DONE;
#if 0
/* ========================================================================= */
/* The following comment and code was inserted in January 1999. In May 2006,
when it was observed to cause compiler warnings about unused values, I took it
out again. If anybody is still using OS/2, they will have to put it back
manually. */
/* This next statement and the later reference to dummy are here in order to
trick the optimizer of the IBM C compiler for OS/2 into generating correct
@@ -102,28 +125,60 @@ disable optimization (in this module it actually makes a big difference, and
the pcre module can use all the optimization it can get). */
volatile int dummy;
/* ========================================================================= */
#endif
do
{
const uschar *tcode = code + 1 + LINK_SIZE;
const uschar *tcode = code + (((int)*code == OP_CBRA)? 3:1) + LINK_SIZE;
BOOL try_next = TRUE;
while (try_next)
while (try_next) /* Loop for items in this branch */
{
/* If a branch starts with a bracket or a positive lookahead assertion,
recurse to set bits from within them. That's all for this branch. */
if ((int)*tcode >= OP_BRA || *tcode == OP_ASSERT)
int rc;
switch(*tcode)
{
if (!set_start_bits(tcode, start_bits, caseless, utf8, cd))
return FALSE;
try_next = FALSE;
}
/* Fail if we reach something we don't understand */
else switch(*tcode)
{
default:
return FALSE;
return SSB_FAIL;
/* If we hit a bracket or a positive lookahead assertion, recurse to set
bits from within the subpattern. If it can't find anything, we have to
give up. If it finds some mandatory character(s), we are done for this
branch. Otherwise, carry on scanning after the subpattern. */
case OP_BRA:
case OP_SBRA:
case OP_CBRA:
case OP_SCBRA:
case OP_ONCE:
case OP_ASSERT:
rc = set_start_bits(tcode, start_bits, caseless, utf8, cd);
if (rc == SSB_FAIL) return SSB_FAIL;
if (rc == SSB_DONE) try_next = FALSE; else
{
do tcode += GET(tcode, 1); while (*tcode == OP_ALT);
tcode += 1 + LINK_SIZE;
}
break;
/* If we hit ALT or KET, it means we haven't found anything mandatory in
this branch, though we might have found something optional. For ALT, we
continue with the next alternative, but we have to arrange that the final
result from subpattern is SSB_CONTINUE rather than SSB_DONE. For KET,
return SSB_CONTINUE: if this is the top level, that indicates failure,
but after a nested subpattern, it causes scanning to continue. */
case OP_ALT:
yield = SSB_CONTINUE;
try_next = FALSE;
break;
case OP_KET:
case OP_KETRMAX:
case OP_KETRMIN:
return SSB_CONTINUE;
/* Skip over callout */
@@ -131,19 +186,13 @@ do
tcode += 2 + 2*LINK_SIZE;
break;
/* Skip over extended extraction bracket number */
case OP_BRANUMBER:
tcode += 3;
break;
/* Skip over lookbehind and negative lookahead assertions */
case OP_ASSERT_NOT:
case OP_ASSERTBACK:
case OP_ASSERTBACK_NOT:
do tcode += GET(tcode, 1); while (*tcode == OP_ALT);
tcode += 1+LINK_SIZE;
tcode += 1 + LINK_SIZE;
break;
/* Skip over an option setting, changing the caseless flag */
@@ -157,23 +206,37 @@ do
case OP_BRAZERO:
case OP_BRAMINZERO:
if (!set_start_bits(++tcode, start_bits, caseless, utf8, cd))
return FALSE;
dummy = 1; (void)dummy;
if (set_start_bits(++tcode, start_bits, caseless, utf8, cd) == SSB_FAIL)
return SSB_FAIL;
/* =========================================================================
See the comment at the head of this function concerning the next line,
which was an old fudge for the benefit of OS/2.
dummy = 1;
========================================================================= */
do tcode += GET(tcode,1); while (*tcode == OP_ALT);
tcode += 1+LINK_SIZE;
tcode += 1 + LINK_SIZE;
break;
/* SKIPZERO skips the bracket. */
case OP_SKIPZERO:
do tcode += GET(tcode,1); while (*tcode == OP_ALT);
tcode += 1 + LINK_SIZE;
break;
/* Single-char * or ? sets the bit and tries the next item */
case OP_STAR:
case OP_MINSTAR:
case OP_POSSTAR:
case OP_QUERY:
case OP_MINQUERY:
case OP_POSQUERY:
set_bit(start_bits, tcode[1], caseless, cd);
tcode += 2;
#ifdef SUPPORT_UTF8
if (utf8) while ((*tcode & 0xc0) == 0x80) tcode++;
if (utf8 && tcode[-1] >= 0xc0)
tcode += _pcre_utf8_table4[tcode[-1] & 0x3f];
#endif
break;
@@ -181,10 +244,12 @@ do
case OP_UPTO:
case OP_MINUPTO:
case OP_POSUPTO:
set_bit(start_bits, tcode[3], caseless, cd);
tcode += 4;
#ifdef SUPPORT_UTF8
if (utf8) while ((*tcode & 0xc0) == 0x80) tcode++;
if (utf8 && tcode[-1] >= 0xc0)
tcode += _pcre_utf8_table4[tcode[-1] & 0x3f];
#endif
break;
@@ -197,6 +262,7 @@ do
case OP_CHARNC:
case OP_PLUS:
case OP_MINPLUS:
case OP_POSPLUS:
set_bit(start_bits, tcode[1], caseless, cd);
try_next = FALSE;
break;
@@ -215,15 +281,29 @@ do
try_next = FALSE;
break;
/* The cbit_space table has vertical tab as whitespace; we have to
discard it. */
case OP_NOT_WHITESPACE:
for (c = 0; c < 32; c++)
start_bits[c] |= ~cd->cbits[c+cbit_space];
{
int d = cd->cbits[c+cbit_space];
if (c == 1) d &= ~0x08;
start_bits[c] |= ~d;
}
try_next = FALSE;
break;
/* The cbit_space table has vertical tab as whitespace; we have to
discard it. */
case OP_WHITESPACE:
for (c = 0; c < 32; c++)
start_bits[c] |= cd->cbits[c+cbit_space];
{
int d = cd->cbits[c+cbit_space];
if (c == 1) d &= ~0x08;
start_bits[c] |= d;
}
try_next = FALSE;
break;
@@ -256,16 +336,20 @@ do
case OP_TYPEUPTO:
case OP_TYPEMINUPTO:
case OP_TYPEPOSUPTO:
tcode += 2; /* Fall through */
case OP_TYPESTAR:
case OP_TYPEMINSTAR:
case OP_TYPEPOSSTAR:
case OP_TYPEQUERY:
case OP_TYPEMINQUERY:
case OP_TYPEPOSQUERY:
switch(tcode[1])
{
case OP_ANY:
return FALSE;
case OP_ALLANY:
return SSB_FAIL;
case OP_NOT_DIGIT:
for (c = 0; c < 32; c++)
@@ -277,14 +361,28 @@ do
start_bits[c] |= cd->cbits[c+cbit_digit];
break;
/* The cbit_space table has vertical tab as whitespace; we have to
discard it. */
case OP_NOT_WHITESPACE:
for (c = 0; c < 32; c++)
start_bits[c] |= ~cd->cbits[c+cbit_space];
{
int d = cd->cbits[c+cbit_space];
if (c == 1) d &= ~0x08;
start_bits[c] |= ~d;
}
break;
/* The cbit_space table has vertical tab as whitespace; we have to
discard it. */
case OP_WHITESPACE:
for (c = 0; c < 32; c++)
start_bits[c] |= cd->cbits[c+cbit_space];
{
int d = cd->cbits[c+cbit_space];
if (c == 1) d &= ~0x08;
start_bits[c] |= d;
}
break;
case OP_NOT_WORDCHAR:
@@ -308,11 +406,13 @@ do
character with a value > 255. */
case OP_NCLASS:
#ifdef SUPPORT_UTF8
if (utf8)
{
start_bits[24] |= 0xf0; /* Bits for 0xc4 - 0xc8 */
memset(start_bits+25, 0xff, 7); /* Bits for 0xc9 - 0xff */
}
#endif
/* Fall through */
case OP_CLASS:
@@ -325,6 +425,7 @@ do
value is > 127. In fact, there are only two possible starting bytes for
characters in the range 128 - 255. */
#ifdef SUPPORT_UTF8
if (utf8)
{
for (c = 0; c < 16; c++) start_bits[c] |= tcode[c];
@@ -342,6 +443,7 @@ do
/* In non-UTF-8 mode, the two bit maps are completely compatible. */
else
#endif
{
for (c = 0; c < 32; c++) start_bits[c] |= tcode[c];
}
@@ -377,7 +479,7 @@ do
code += GET(code, 1); /* Advance to next branch */
}
while (*code == OP_ALT);
return TRUE;
return yield;
}
@@ -401,17 +503,16 @@ Returns: pointer to a pcre_extra block, with study_data filled in and the
NULL on error or if no optimization possible
*/
EXPORT pcre_extra *
PCRE_EXP_DEFN pcre_extra *
pcre_study(const pcre *external_re, int options, const char **errorptr)
{
uschar start_bits[32];
pcre_extra *extra;
pcre_study_data *study;
const uschar *tables;
const real_pcre *re = (const real_pcre *)external_re;
uschar *code = (uschar *)re + re->name_table_offset +
(re->name_count * re->name_entry_size);
uschar *code;
compile_data compile_block;
const real_pcre *re = (const real_pcre *)external_re;
*errorptr = NULL;
@@ -427,11 +528,15 @@ if ((options & ~PUBLIC_STUDY_OPTIONS) != 0)
return NULL;
}
code = (uschar *)re + re->name_table_offset +
(re->name_count * re->name_entry_size);
/* For an anchored pattern, or an unanchored pattern that has a first char, or
a multiline pattern that matches only at "line starts", no further processing
at present. */
if ((re->options & (PCRE_ANCHORED|PCRE_FIRSTSET|PCRE_STARTLINE)) != 0)
if ((re->options & PCRE_ANCHORED) != 0 ||
(re->flags & (PCRE_FIRSTSET|PCRE_STARTLINE)) != 0)
return NULL;
/* Set the character tables in the block that is passed around */
@@ -449,8 +554,8 @@ compile_block.ctypes = tables + ctypes_offset;
/* See if we can find a fixed set of initial characters for the pattern. */
memset(start_bits, 0, 32 * sizeof(uschar));
if (!set_start_bits(code, start_bits, (re->options & PCRE_CASELESS) != 0,
(re->options & PCRE_UTF8) != 0, &compile_block)) return NULL;
if (set_start_bits(code, start_bits, (re->options & PCRE_CASELESS) != 0,
(re->options & PCRE_UTF8) != 0, &compile_block) != SSB_DONE) return NULL;
/* Get a pcre_extra block and a pcre_study_data block. The study data is put in
the latter, which is pointed to by the former, which may also get additional
@@ -479,4 +584,4 @@ memcpy(study->start_bits, start_bits, sizeof(start_bits));
return extra;
}
/* End of pcrestud.c */
/* End of pcre_study.c */

View File

@@ -6,7 +6,7 @@
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Copyright (c) 1997-2005 University of Cambridge
Copyright (c) 1997-2008 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -39,14 +39,20 @@ POSSIBILITY OF SUCH DAMAGE.
/* This module contains some fixed tables that are used by more than one of the
PCRE code modules. */
PCRE code modules. The tables are also #included by the pcretest program, which
uses macros to change their names from _pcre_xxx to xxxx, thereby avoiding name
clashes with the library. */
#if 1
#include "_hbconf.h"
#endif
#include "pcreinal.h"
/* Table of sizes for the fixed-length opcodes. It's defined in a macro so that
the definition is next to the definition of the opcodes in internal.h. */
the definition is next to the definition of the opcodes in pcre_internal.h. */
const uschar _pcre_OP_lengths[] = { OP_LENGTHS };
@@ -59,6 +65,8 @@ const uschar _pcre_OP_lengths[] = { OP_LENGTHS };
/* These are the breakpoints for different numbers of bytes in a UTF-8
character. */
#ifdef SUPPORT_UTF8
const int _pcre_utf8_table1[] =
{ 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff};
@@ -70,9 +78,8 @@ first byte of a character, indexed by the number of additional bytes. */
const int _pcre_utf8_table2[] = { 0, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc};
const int _pcre_utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
/* Table of the number of extra characters, indexed by the first character
masked with 0x3f. The highest number for a valid UTF-8 character is in fact
0x3d. */
/* Table of the number of extra bytes, indexed by the first byte masked with
0x3f. The highest number for a valid UTF-8 first byte is in fact 0x3d. */
const uschar _pcre_utf8_table4[] = {
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
@@ -80,50 +87,232 @@ const uschar _pcre_utf8_table4[] = {
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
/* This table translates Unicode property names into code values for the
ucp_findchar() function. It is used by pcretest as well as by the library
functions. */
/* The pcre_utt[] table below translates Unicode property names into type and
code values. It is searched by binary chop, so must be in collating sequence of
name. Originally, the table contained pointers to the name strings in the first
field of each entry. However, that leads to a large number of relocations when
a shared library is dynamically loaded. A significant reduction is made by
putting all the names into a single, large string and then using offsets in the
table itself. Maintenance is more error-prone, but frequent changes to this
data is unlikely. */
const char _pcre_utt_names[] =
"Any\0"
"Arabic\0"
"Armenian\0"
"Balinese\0"
"Bengali\0"
"Bopomofo\0"
"Braille\0"
"Buginese\0"
"Buhid\0"
"C\0"
"Canadian_Aboriginal\0"
"Cc\0"
"Cf\0"
"Cherokee\0"
"Cn\0"
"Co\0"
"Common\0"
"Coptic\0"
"Cs\0"
"Cuneiform\0"
"Cypriot\0"
"Cyrillic\0"
"Deseret\0"
"Devanagari\0"
"Ethiopic\0"
"Georgian\0"
"Glagolitic\0"
"Gothic\0"
"Greek\0"
"Gujarati\0"
"Gurmukhi\0"
"Han\0"
"Hangul\0"
"Hanunoo\0"
"Hebrew\0"
"Hiragana\0"
"Inherited\0"
"Kannada\0"
"Katakana\0"
"Kharoshthi\0"
"Khmer\0"
"L\0"
"L&\0"
"Lao\0"
"Latin\0"
"Limbu\0"
"Linear_B\0"
"Ll\0"
"Lm\0"
"Lo\0"
"Lt\0"
"Lu\0"
"M\0"
"Malayalam\0"
"Mc\0"
"Me\0"
"Mn\0"
"Mongolian\0"
"Myanmar\0"
"N\0"
"Nd\0"
"New_Tai_Lue\0"
"Nko\0"
"Nl\0"
"No\0"
"Ogham\0"
"Old_Italic\0"
"Old_Persian\0"
"Oriya\0"
"Osmanya\0"
"P\0"
"Pc\0"
"Pd\0"
"Pe\0"
"Pf\0"
"Phags_Pa\0"
"Phoenician\0"
"Pi\0"
"Po\0"
"Ps\0"
"Runic\0"
"S\0"
"Sc\0"
"Shavian\0"
"Sinhala\0"
"Sk\0"
"Sm\0"
"So\0"
"Syloti_Nagri\0"
"Syriac\0"
"Tagalog\0"
"Tagbanwa\0"
"Tai_Le\0"
"Tamil\0"
"Telugu\0"
"Thaana\0"
"Thai\0"
"Tibetan\0"
"Tifinagh\0"
"Ugaritic\0"
"Yi\0"
"Z\0"
"Zl\0"
"Zp\0"
"Zs\0";
const ucp_type_table _pcre_utt[] = {
{ "C", 128 + ucp_C },
{ "Cc", ucp_Cc },
{ "Cf", ucp_Cf },
{ "Cn", ucp_Cn },
{ "Co", ucp_Co },
{ "Cs", ucp_Cs },
{ "L", 128 + ucp_L },
{ "Ll", ucp_Ll },
{ "Lm", ucp_Lm },
{ "Lo", ucp_Lo },
{ "Lt", ucp_Lt },
{ "Lu", ucp_Lu },
{ "M", 128 + ucp_M },
{ "Mc", ucp_Mc },
{ "Me", ucp_Me },
{ "Mn", ucp_Mn },
{ "N", 128 + ucp_N },
{ "Nd", ucp_Nd },
{ "Nl", ucp_Nl },
{ "No", ucp_No },
{ "P", 128 + ucp_P },
{ "Pc", ucp_Pc },
{ "Pd", ucp_Pd },
{ "Pe", ucp_Pe },
{ "Pf", ucp_Pf },
{ "Pi", ucp_Pi },
{ "Po", ucp_Po },
{ "Ps", ucp_Ps },
{ "S", 128 + ucp_S },
{ "Sc", ucp_Sc },
{ "Sk", ucp_Sk },
{ "Sm", ucp_Sm },
{ "So", ucp_So },
{ "Z", 128 + ucp_Z },
{ "Zl", ucp_Zl },
{ "Zp", ucp_Zp },
{ "Zs", ucp_Zs }
{ 0, PT_ANY, 0 },
{ 4, PT_SC, ucp_Arabic },
{ 11, PT_SC, ucp_Armenian },
{ 20, PT_SC, ucp_Balinese },
{ 29, PT_SC, ucp_Bengali },
{ 37, PT_SC, ucp_Bopomofo },
{ 46, PT_SC, ucp_Braille },
{ 54, PT_SC, ucp_Buginese },
{ 63, PT_SC, ucp_Buhid },
{ 69, PT_GC, ucp_C },
{ 71, PT_SC, ucp_Canadian_Aboriginal },
{ 91, PT_PC, ucp_Cc },
{ 94, PT_PC, ucp_Cf },
{ 97, PT_SC, ucp_Cherokee },
{ 106, PT_PC, ucp_Cn },
{ 109, PT_PC, ucp_Co },
{ 112, PT_SC, ucp_Common },
{ 119, PT_SC, ucp_Coptic },
{ 126, PT_PC, ucp_Cs },
{ 129, PT_SC, ucp_Cuneiform },
{ 139, PT_SC, ucp_Cypriot },
{ 147, PT_SC, ucp_Cyrillic },
{ 156, PT_SC, ucp_Deseret },
{ 164, PT_SC, ucp_Devanagari },
{ 175, PT_SC, ucp_Ethiopic },
{ 184, PT_SC, ucp_Georgian },
{ 193, PT_SC, ucp_Glagolitic },
{ 204, PT_SC, ucp_Gothic },
{ 211, PT_SC, ucp_Greek },
{ 217, PT_SC, ucp_Gujarati },
{ 226, PT_SC, ucp_Gurmukhi },
{ 235, PT_SC, ucp_Han },
{ 239, PT_SC, ucp_Hangul },
{ 246, PT_SC, ucp_Hanunoo },
{ 254, PT_SC, ucp_Hebrew },
{ 261, PT_SC, ucp_Hiragana },
{ 270, PT_SC, ucp_Inherited },
{ 280, PT_SC, ucp_Kannada },
{ 288, PT_SC, ucp_Katakana },
{ 297, PT_SC, ucp_Kharoshthi },
{ 308, PT_SC, ucp_Khmer },
{ 314, PT_GC, ucp_L },
{ 316, PT_LAMP, 0 },
{ 319, PT_SC, ucp_Lao },
{ 323, PT_SC, ucp_Latin },
{ 329, PT_SC, ucp_Limbu },
{ 335, PT_SC, ucp_Linear_B },
{ 344, PT_PC, ucp_Ll },
{ 347, PT_PC, ucp_Lm },
{ 350, PT_PC, ucp_Lo },
{ 353, PT_PC, ucp_Lt },
{ 356, PT_PC, ucp_Lu },
{ 359, PT_GC, ucp_M },
{ 361, PT_SC, ucp_Malayalam },
{ 371, PT_PC, ucp_Mc },
{ 374, PT_PC, ucp_Me },
{ 377, PT_PC, ucp_Mn },
{ 380, PT_SC, ucp_Mongolian },
{ 390, PT_SC, ucp_Myanmar },
{ 398, PT_GC, ucp_N },
{ 400, PT_PC, ucp_Nd },
{ 403, PT_SC, ucp_New_Tai_Lue },
{ 415, PT_SC, ucp_Nko },
{ 419, PT_PC, ucp_Nl },
{ 422, PT_PC, ucp_No },
{ 425, PT_SC, ucp_Ogham },
{ 431, PT_SC, ucp_Old_Italic },
{ 442, PT_SC, ucp_Old_Persian },
{ 454, PT_SC, ucp_Oriya },
{ 460, PT_SC, ucp_Osmanya },
{ 468, PT_GC, ucp_P },
{ 470, PT_PC, ucp_Pc },
{ 473, PT_PC, ucp_Pd },
{ 476, PT_PC, ucp_Pe },
{ 479, PT_PC, ucp_Pf },
{ 482, PT_SC, ucp_Phags_Pa },
{ 491, PT_SC, ucp_Phoenician },
{ 502, PT_PC, ucp_Pi },
{ 505, PT_PC, ucp_Po },
{ 508, PT_PC, ucp_Ps },
{ 511, PT_SC, ucp_Runic },
{ 517, PT_GC, ucp_S },
{ 519, PT_PC, ucp_Sc },
{ 522, PT_SC, ucp_Shavian },
{ 530, PT_SC, ucp_Sinhala },
{ 538, PT_PC, ucp_Sk },
{ 541, PT_PC, ucp_Sm },
{ 544, PT_PC, ucp_So },
{ 547, PT_SC, ucp_Syloti_Nagri },
{ 560, PT_SC, ucp_Syriac },
{ 567, PT_SC, ucp_Tagalog },
{ 575, PT_SC, ucp_Tagbanwa },
{ 584, PT_SC, ucp_Tai_Le },
{ 591, PT_SC, ucp_Tamil },
{ 597, PT_SC, ucp_Telugu },
{ 604, PT_SC, ucp_Thaana },
{ 611, PT_SC, ucp_Thai },
{ 616, PT_SC, ucp_Tibetan },
{ 624, PT_SC, ucp_Tifinagh },
{ 633, PT_SC, ucp_Ugaritic },
{ 642, PT_SC, ucp_Yi },
{ 645, PT_GC, ucp_Z },
{ 647, PT_PC, ucp_Zl },
{ 650, PT_PC, ucp_Zp },
{ 653, PT_PC, ucp_Zs }
};
const int _pcre_utt_size = sizeof(_pcre_utt)/sizeof(ucp_type_table);
/* End of pcretabs.c */
#endif /* SUPPORT_UTF8 */
/* End of pcre_tables.c */

View File

@@ -6,7 +6,7 @@
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Copyright (c) 1997-2005 University of Cambridge
Copyright (c) 1997-2008 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -43,6 +43,10 @@ see if it was compiled with the opposite endianness. If so, it uses an
auxiliary local function to flip the appropriate bytes. */
#if 1
#include "_hbconf.h"
#endif
#include "pcreinal.h"
@@ -62,8 +66,8 @@ Arguments:
Returns: the flipped value
*/
static long int
byteflip(long int value, int n)
static unsigned long int
byteflip(unsigned long int value, int n)
{
if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);
return ((value & 0x000000ff) << 24) |
@@ -94,16 +98,17 @@ Returns: the new block if is is indeed a byte-flipped regex
NULL if it is not
*/
EXPORT real_pcre *
real_pcre *
_pcre_try_flipped(const real_pcre *re, real_pcre *internal_re,
const pcre_study_data *study, pcre_study_data *internal_study)
{
if ((unsigned long) byteflip(re->magic_number, sizeof(re->magic_number)) != MAGIC_NUMBER)
if (byteflip(re->magic_number, sizeof(re->magic_number)) != MAGIC_NUMBER)
return NULL;
*internal_re = *re; /* To copy other fields */
internal_re->size = byteflip(re->size, sizeof(re->size));
internal_re->options = byteflip(re->options, sizeof(re->options));
internal_re->flags = (pcre_uint16)byteflip(re->flags, sizeof(re->flags));
internal_re->top_bracket =
(pcre_uint16)byteflip(re->top_bracket, sizeof(re->top_bracket));
internal_re->top_backref =
@@ -129,4 +134,4 @@ if (study != NULL)
return internal_re;
}
/* End of pcretryf.c */
/* End of pcre_tryflipped.c */

View File

@@ -6,7 +6,7 @@
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Copyright (c) 1997-2005 University of Cambridge
Copyright (c) 1997-2008 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -42,6 +42,10 @@ POSSIBILITY OF SUCH DAMAGE.
string that identifies the PCRE version that is in use. */
#if 1
#include "_hbconf.h"
#endif
#include "pcreinal.h"
@@ -49,13 +53,38 @@ string that identifies the PCRE version that is in use. */
* Return version string *
*************************************************/
/* These macros are the standard way of turning unquoted text into C strings.
They allow macros like PCRE_MAJOR to be defined without quotes, which is
convenient for user programs that want to test its value. */
#define STRING(a) # a
#define XSTRING(s) STRING(s)
EXPORT const char *
/* A problem turned up with PCRE_PRERELEASE, which is defined empty for
production releases. Originally, it was used naively in this code:
return XSTRING(PCRE_MAJOR)
"." XSTRING(PCRE_MINOR)
XSTRING(PCRE_PRERELEASE)
" " XSTRING(PCRE_DATE);
However, when PCRE_PRERELEASE is empty, this leads to an attempted expansion of
STRING(). The C standard states: "If (before argument substitution) any
argument consists of no preprocessing tokens, the behavior is undefined." It
turns out the gcc treats this case as a single empty string - which is what we
really want - but Visual C grumbles about the lack of an argument for the
macro. Unfortunately, both are within their rights. To cope with both ways of
handling this, I had resort to some messy hackery that does a test at run time.
I could find no way of detecting that a macro is defined as an empty string at
pre-processor time. This hack uses a standard trick for avoiding calling
the STRING macro with an empty argument when doing the test. */
PCRE_EXP_DEFN const char *
pcre_version(void)
{
return XSTRING(PCRE_MAJOR) "." XSTRING(PCRE_MINOR) " " XSTRING(PCRE_DATE);
return (XSTRING(Z PCRE_PRERELEASE)[1] == 0)?
XSTRING(PCRE_MAJOR.PCRE_MINOR PCRE_DATE) :
XSTRING(PCRE_MAJOR.PCRE_MINOR) XSTRING(PCRE_PRERELEASE PCRE_DATE);
}
/* End of pcrevers.c */
/* End of pcre_version.c */

View File

@@ -6,7 +6,7 @@
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Copyright (c) 1997-2005 University of Cambridge
Copyright (c) 1997-2008 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -42,6 +42,10 @@ POSSIBILITY OF SUCH DAMAGE.
strings. */
#if 1
#include "_hbconf.h"
#endif
#include "pcreinal.h"
@@ -55,6 +59,13 @@ that subsequent code can assume it is dealing with a valid string. The check
can be turned off for maximum performance, but the consequences of supplying
an invalid string are then undefined.
Originally, this function checked according to RFC 2279, allowing for values in
the range 0 to 0x7fffffff, up to 6 bytes long, but ensuring that they were in
the canonical format. Once somebody had pointed out RFC 3629 to me (it
obsoletes 2279), additional restrictions were applied. The values are now
limited to be between 0 and 0x0010ffff, no more than 4 bytes long, and the
subrange 0xd000 to 0xdfff is excluded.
Arguments:
string points to the string
length length of string, or -1 if the string is zero-terminated
@@ -63,9 +74,10 @@ Returns: < 0 if the string is a valid UTF-8 string
>= 0 otherwise; the value is the offset of the bad byte
*/
EXPORT int
int
_pcre_valid_utf8(const uschar *string, int length)
{
#ifdef SUPPORT_UTF8
register const uschar *p;
if (length < 0)
@@ -79,32 +91,49 @@ for (p = string; length-- > 0; p++)
register int ab;
register int c = *p;
if (c < 128) continue;
if ((c & 0xc0) != 0xc0) return p - string;
ab = _pcre_utf8_table4[c & 0x3f]; /* Number of additional bytes */
if (length < ab) return p - string;
if (c < 0xc0) return p - string;
ab = _pcre_utf8_table4[c & 0x3f]; /* Number of additional bytes */
if (length < ab || ab > 3) return p - string;
length -= ab;
/* Check top bits in the second byte */
if ((*(++p) & 0xc0) != 0x80) return p - string;
/* Check for overlong sequences for each different length */
/* Check for overlong sequences for each different length, and for the
excluded range 0xd000 to 0xdfff. */
switch (ab)
{
/* Check for xx00 000x */
/* Check for xx00 000x (overlong sequence) */
case 1:
if ((c & 0x3e) == 0) return p - string;
continue; /* We know there aren't any more bytes to check */
/* Check for 1110 0000, xx0x xxxx */
/* Check for 1110 0000, xx0x xxxx (overlong sequence) or
1110 1101, 1010 xxxx (0xd000 - 0xdfff) */
case 2:
if (c == 0xe0 && (*p & 0x20) == 0) return p - string;
if ((c == 0xe0 && (*p & 0x20) == 0) ||
(c == 0xed && *p >= 0xa0))
return p - string;
break;
/* Check for 1111 0000, xx00 xxxx */
/* Check for 1111 0000, xx00 xxxx (overlong sequence) or
greater than 0x0010ffff (f4 8f bf bf) */
case 3:
if (c == 0xf0 && (*p & 0x30) == 0) return p - string;
if ((c == 0xf0 && (*p & 0x30) == 0) ||
(c > 0xf4 ) ||
(c == 0xf4 && *p > 0x8f))
return p - string;
break;
#if 0
/* These cases can no longer occur, as we restrict to a maximum of four
bytes nowadays. Leave the code here in case we ever want to add an option
for longer sequences. */
/* Check for 1111 1000, xx00 0xxx */
case 4:
if (c == 0xf8 && (*p & 0x38) == 0) return p - string;
@@ -115,6 +144,8 @@ for (p = string; length-- > 0; p++)
if (c == 0xfe || c == 0xff ||
(c == 0xfc && (*p & 0x3c) == 0)) return p - string;
break;
#endif
}
/* Check for valid bytes after the 2nd, if any; all must start 10 */
@@ -123,8 +154,9 @@ for (p = string; length-- > 0; p++)
if ((*(++p) & 0xc0) != 0x80) return p - string;
}
}
#endif
return -1;
}
/* End of pcrevutf.c */
/* End of pcre_valid_utf8.c */

View File

@@ -6,7 +6,7 @@
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Copyright (c) 1997-2005 University of Cambridge
Copyright (c) 1997-2008 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -43,6 +43,10 @@ class (one that contains characters whose values are > 255). It is used by both
pcre_exec() and pcre_def_exec(). */
#if 1
#include "_hbconf.h"
#endif
#include "pcreinal.h"
@@ -60,7 +64,7 @@ Arguments:
Returns: TRUE if character matches, else FALSE
*/
EXPORT BOOL
BOOL
_pcre_xclass(int c, const uschar *data)
{
int t;
@@ -100,17 +104,40 @@ while ((t = *data++) != XCL_END)
#ifdef SUPPORT_UCP
else /* XCL_PROP & XCL_NOTPROP */
{
int chartype, othercase;
int rqdtype = *data++;
int category = ucp_findchar(c, &chartype, &othercase);
if (rqdtype >= 128)
int chartype, script;
int category = _pcre_ucp_findprop(c, &chartype, &script);
switch(*data)
{
if ((rqdtype - 128 == category) == (t == XCL_PROP)) return !negated;
}
else
{
if ((rqdtype == chartype) == (t == XCL_PROP)) return !negated;
case PT_ANY:
if (t == XCL_PROP) return !negated;
break;
case PT_LAMP:
if ((chartype == ucp_Lu || chartype == ucp_Ll || chartype == ucp_Lt) ==
(t == XCL_PROP)) return !negated;
break;
case PT_GC:
if ((data[1] == category) == (t == XCL_PROP)) return !negated;
break;
case PT_PC:
if ((data[1] == chartype) == (t == XCL_PROP)) return !negated;
break;
case PT_SC:
if ((data[1] == script) == (t == XCL_PROP)) return !negated;
break;
/* This should never occur, but compilers may mutter if there is no
default. */
default:
return FALSE;
}
data += 2;
}
#endif /* SUPPORT_UCP */
}
@@ -118,4 +145,4 @@ while ((t = *data++) != XCL_END)
return negated; /* char did not match */
}
/* End of pcrexcls.c */
/* End of pcre_xclass.c */

View File

@@ -1,12 +1,16 @@
/*************************************************
* libucp - Unicode Property Table handler *
* Unicode Property Table handler *
*************************************************/
#ifndef _UCP_H
#define _UCP_H
/* These are the character categories that are returned by ucp_findchar */
/* This file contains definitions of the property values that are returned by
the function _pcre_ucp_findprop(). New values that are added for new releases
of Unicode should always be at the end of each enum, for backwards
compatibility. */
/* These are the general character categories. */
enum {
ucp_C, /* Other */
@@ -18,7 +22,7 @@ enum {
ucp_Z /* Separator */
};
/* These are the detailed character types that are returned by ucp_findchar */
/* These are the particular character types. */
enum {
ucp_Cc, /* Control */
@@ -53,7 +57,76 @@ enum {
ucp_Zs /* Space separator */
};
extern int ucp_findchar(const int, int *, int *);
/* These are the script identifications. */
enum {
ucp_Arabic,
ucp_Armenian,
ucp_Bengali,
ucp_Bopomofo,
ucp_Braille,
ucp_Buginese,
ucp_Buhid,
ucp_Canadian_Aboriginal,
ucp_Cherokee,
ucp_Common,
ucp_Coptic,
ucp_Cypriot,
ucp_Cyrillic,
ucp_Deseret,
ucp_Devanagari,
ucp_Ethiopic,
ucp_Georgian,
ucp_Glagolitic,
ucp_Gothic,
ucp_Greek,
ucp_Gujarati,
ucp_Gurmukhi,
ucp_Han,
ucp_Hangul,
ucp_Hanunoo,
ucp_Hebrew,
ucp_Hiragana,
ucp_Inherited,
ucp_Kannada,
ucp_Katakana,
ucp_Kharoshthi,
ucp_Khmer,
ucp_Lao,
ucp_Latin,
ucp_Limbu,
ucp_Linear_B,
ucp_Malayalam,
ucp_Mongolian,
ucp_Myanmar,
ucp_New_Tai_Lue,
ucp_Ogham,
ucp_Old_Italic,
ucp_Old_Persian,
ucp_Oriya,
ucp_Osmanya,
ucp_Runic,
ucp_Shavian,
ucp_Sinhala,
ucp_Syloti_Nagri,
ucp_Syriac,
ucp_Tagalog,
ucp_Tagbanwa,
ucp_Tai_Le,
ucp_Tamil,
ucp_Telugu,
ucp_Thaana,
ucp_Thai,
ucp_Tibetan,
ucp_Tifinagh,
ucp_Ugaritic,
ucp_Yi,
ucp_Balinese, /* New for Unicode 5.0.0 */
ucp_Cuneiform, /* New for Unicode 5.0.0 */
ucp_Nko, /* New for Unicode 5.0.0 */
ucp_Phags_Pa, /* New for Unicode 5.0.0 */
ucp_Phoenician /* New for Unicode 5.0.0 */
};
#endif

View File

@@ -1,91 +1,92 @@
/*************************************************
* libucp - Unicode Property Table handler *
* Unicode Property Table handler *
*************************************************/
/* Internal header file defining the layout of compact nodes in the tree. */
#ifndef _UCPINTERNAL_H
#define _UCPINTERNAL_H
/* Internal header file defining the layout of the bits in each pair of 32-bit
words that form a data item in the table. */
typedef struct cnode {
unsigned short int f0;
unsigned short int f1;
unsigned short int f2;
pcre_uint32 f0;
pcre_uint32 f1;
} cnode;
/* Things for the f0 field */
#define f0_leftexists 0x8000 /* Left child exists */
#define f0_typemask 0x3f00 /* Type bits */
#define f0_typeshift 8 /* Type shift */
#define f0_chhmask 0x00ff /* Character high bits */
#define f0_scriptmask 0xff000000 /* Mask for script field */
#define f0_scriptshift 24 /* Shift for script value */
#define f0_rangeflag 0x00800000 /* Flag for a range item */
#define f0_charmask 0x001fffff /* Mask for code point value */
/* Things for the f2 field */
/* Things for the f1 field */
#define f2_rightmask 0xf000 /* Mask for right offset bits */
#define f2_rightshift 12 /* Shift for right offset */
#define f2_casemask 0x0fff /* Mask for case offset */
#define f1_typemask 0xfc000000 /* Mask for char type field */
#define f1_typeshift 26 /* Shift for the type field */
#define f1_rangemask 0x0000ffff /* Mask for a range offset */
#define f1_casemask 0x0000ffff /* Mask for a case offset */
#define f1_caseneg 0xffff8000 /* Bits for negation */
/* The tree consists of a vector of structures of type cnode, with the root
node as the first element. The three short ints (16-bits) are used as follows:
/* The data consists of a vector of structures of type cnode. The two unsigned
32-bit integers are used as follows:
(f0) (1) The 0x8000 bit of f0 is set if a left child exists. The child's node
is the next node in the vector.
(2) The 0x4000 bits of f0 is spare.
(3) The 0x3f00 bits of f0 contain the character type; this is a number
defined by the enumeration in ucp.h (e.g. ucp_Lu).
(4) The bottom 8 bits of f0 contain the most significant byte of the
character's 24-bit codepoint.
(f0) (1) The most significant byte holds the script number. The numbers are
defined by the enum in ucp.h.
(f1) (1) The f1 field contains the two least significant bytes of the
codepoint.
(2) The 0x00800000 bit is set if this entry defines a range of characters.
It is not set if this entry defines a single character
(f2) (1) The 0xf000 bits of f2 contain zero if there is no right child of this
node. Otherwise, they contain one plus the exponent of the power of
two of the offset to the right node (e.g. a value of 3 means 8). The
units of the offset are node items.
(3) The 0x00600000 bits are spare.
(2) The 0x0fff bits of f2 contain the signed offset from this character to
its alternate cased value. They are zero if there is no such
character.
(4) The 0x001fffff bits contain the code point. No Unicode code point will
ever be greater than 0x0010ffff, so this should be OK for ever.
(f1) (1) The 0xfc000000 bits contain the character type number. The numbers are
defined by an enum in ucp.h.
-----------------------------------------------------------------------------
||.|.| type (6) | ms char (8) || ls char (16) ||....| case offset (12) ||
-----------------------------------------------------------------------------
| | |
| |-> spare |
| exponent of right
|-> left child exists child offset
(2) The 0x03ff0000 bits are spare.
(3) The 0x0000ffff bits contain EITHER the unsigned offset to the top of
range if this entry defines a range, OR the *signed* offset to the
character's "other case" partner if this entry defines a single
character. There is no partner if the value is zero.
-------------------------------------------------------------------------------
| script (8) |.|.|.| codepoint (21) || type (6) |.|.| spare (8) | offset (16) |
-------------------------------------------------------------------------------
| | | | |
| | |-> spare | |-> spare
| | |
| |-> spare |-> spare
|
|-> range flag
The upper/lower casing information is set only for characters that come in
pairs. There are (at present) four non-one-to-one mappings in the Unicode data.
These are ignored. They are:
pairs. The non-one-to-one mappings in the Unicode data are ignored.
1FBE Greek Prosgegrammeni (lower, with upper -> capital iota)
2126 Ohm
212A Kelvin
212B Angstrom
When searching the data, proceed as follows:
Certainly for the last three, having an alternate case would seem to be a
mistake. I don't know any Greek, so cannot comment on the first one.
(1) Set up for a binary chop search.
(2) If the top is not greater than the bottom, the character is not in the
table. Its type must therefore be "Cn" ("Undefined").
When searching the tree, proceed as follows:
(3) Find the middle vector element.
(1) Start at the first node.
(4) Extract the code point and compare. If equal, we are done.
(2) Extract the character value from f1 and the bottom 8 bits of f0;
(5) If the test character is smaller, set the top to the current point, and
goto (2).
(3) Compare with the character being sought. If equal, we are done.
(6) If the current entry defines a range, compute the last character by adding
the offset, and see if the test character is within the range. If it is,
we are done.
(4) If the test character is smaller, inspect the f0_leftexists flag. If it is
not set, the character is not in the tree. If it is set, move to the next
node, and go to (2).
(5) If the test character is bigger, extract the f2_rightmask bits from f2, and
shift them right by f2_rightshift. If the result is zero, the character is
not in the tree. Otherwise, calculate the number of nodes to skip by
shifting the value 1 left by this number minus one. Go to (2).
(7) Otherwise, set the bottom to one element past the current point and goto
(2).
*/
#endif /* _UCPINTERNAL_H */
/* End of internal.h */
/* End of ucpinternal.h */

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff