* common.mak
* source/hbpcre/Makefile
* source/hbpcre/chartabs.c -- changes
* source/hbpcre/config.h
* source/hbpcre/pcre.h
* source/hbpcre/pcrecomp.c
* source/hbpcre/pcreconf.c
* source/hbpcre/pcredfa.c
* source/hbpcre/pcreexec.c
* source/hbpcre/pcrefind.c
* source/hbpcre/pcrefinf.c
* source/hbpcre/pcreget.c
* source/hbpcre/pcreinal.h
* source/hbpcre/pcreinfo.c
* source/hbpcre/pcremktb.c
* source/hbpcre/pcreoutf.c
* source/hbpcre/pcrerefc.c
* source/hbpcre/pcrestud.c
* source/hbpcre/pcretabs.c
* source/hbpcre/pcretryf.c
* source/hbpcre/pcrever.c
* source/hbpcre/pcrevutf.c
* source/hbpcre/pcrexcls.c
* source/hbpcre/ucp.h
* source/hbpcre/ucpinter.h
- source/hbpcre/pcreglob.c -- renames
+ source/hbpcre/_hbpcreg.c
- source/hbpcre/pcreprni.c
+ source/hbpcre/pcreprni.h
- source/hbpcre/ucptable.c
+ source/hbpcre/ucptable.h
+ source/hbpcre/_hbconf.h -- new files
+ source/hbpcre/pcrenewl.c
- source/hbpcre/dftables.c -- deletion
+ Updated to PCRE 7.7 (from 6.3)
; Original code not (yet) modified, so some warnings
may appear in foreign code.
; Please test.
+ source/hbpcre/cnv_hb2o.bat
+ source/hbpcre/cnv_o2hb.bat
+ Added batch files to make to conversion from
original PCRE source files into Harbour.
93 lines
3.4 KiB
C
93 lines
3.4 KiB
C
/*************************************************
|
|
* Unicode Property Table handler *
|
|
*************************************************/
|
|
|
|
#ifndef _UCPINTERNAL_H
|
|
#define _UCPINTERNAL_H
|
|
|
|
/* Internal header file defining the layout of the bits in each pair of 32-bit
|
|
words that form a data item in the table. */
|
|
|
|
typedef struct cnode {
|
|
pcre_uint32 f0;
|
|
pcre_uint32 f1;
|
|
} cnode;
|
|
|
|
/* Things for the f0 field */
|
|
|
|
#define f0_scriptmask 0xff000000 /* Mask for script field */
|
|
#define f0_scriptshift 24 /* Shift for script value */
|
|
#define f0_rangeflag 0x00800000 /* Flag for a range item */
|
|
#define f0_charmask 0x001fffff /* Mask for code point value */
|
|
|
|
/* Things for the f1 field */
|
|
|
|
#define f1_typemask 0xfc000000 /* Mask for char type field */
|
|
#define f1_typeshift 26 /* Shift for the type field */
|
|
#define f1_rangemask 0x0000ffff /* Mask for a range offset */
|
|
#define f1_casemask 0x0000ffff /* Mask for a case offset */
|
|
#define f1_caseneg 0xffff8000 /* Bits for negation */
|
|
|
|
/* The data consists of a vector of structures of type cnode. The two unsigned
|
|
32-bit integers are used as follows:
|
|
|
|
(f0) (1) The most significant byte holds the script number. The numbers are
|
|
defined by the enum in ucp.h.
|
|
|
|
(2) The 0x00800000 bit is set if this entry defines a range of characters.
|
|
It is not set if this entry defines a single character
|
|
|
|
(3) The 0x00600000 bits are spare.
|
|
|
|
(4) The 0x001fffff bits contain the code point. No Unicode code point will
|
|
ever be greater than 0x0010ffff, so this should be OK for ever.
|
|
|
|
(f1) (1) The 0xfc000000 bits contain the character type number. The numbers are
|
|
defined by an enum in ucp.h.
|
|
|
|
(2) The 0x03ff0000 bits are spare.
|
|
|
|
(3) The 0x0000ffff bits contain EITHER the unsigned offset to the top of
|
|
range if this entry defines a range, OR the *signed* offset to the
|
|
character's "other case" partner if this entry defines a single
|
|
character. There is no partner if the value is zero.
|
|
|
|
-------------------------------------------------------------------------------
|
|
| script (8) |.|.|.| codepoint (21) || type (6) |.|.| spare (8) | offset (16) |
|
|
-------------------------------------------------------------------------------
|
|
| | | | |
|
|
| | |-> spare | |-> spare
|
|
| | |
|
|
| |-> spare |-> spare
|
|
|
|
|
|-> range flag
|
|
|
|
The upper/lower casing information is set only for characters that come in
|
|
pairs. The non-one-to-one mappings in the Unicode data are ignored.
|
|
|
|
When searching the data, proceed as follows:
|
|
|
|
(1) Set up for a binary chop search.
|
|
|
|
(2) If the top is not greater than the bottom, the character is not in the
|
|
table. Its type must therefore be "Cn" ("Undefined").
|
|
|
|
(3) Find the middle vector element.
|
|
|
|
(4) Extract the code point and compare. If equal, we are done.
|
|
|
|
(5) If the test character is smaller, set the top to the current point, and
|
|
goto (2).
|
|
|
|
(6) If the current entry defines a range, compute the last character by adding
|
|
the offset, and see if the test character is within the range. If it is,
|
|
we are done.
|
|
|
|
(7) Otherwise, set the bottom to one element past the current point and goto
|
|
(2).
|
|
*/
|
|
|
|
#endif /* _UCPINTERNAL_H */
|
|
|
|
/* End of ucpinternal.h */
|