2025-09-09 13:50 UTC+0200 Przemyslaw Czerpak (druzus/at/poczta.onet.pl)
* include/hbdefs.h
+ added new types HB_WCHAR16 and HB_WCHAR32, existing type HB_WCHAR
is mapped to HB_WCHAR16 (just like before)
* include/hbapicdp.h
* src/harbour.def
* src/rtl/cdpapi.c
+ added new C functions for encoding and decoding UTF-8 string using
which HB_WCHAR32:
int hb_cdpU32CharToUTF8( char * szUTF8, HB_WCHAR32 wc );
HB_BOOL hb_cdpUTF8GetU32( const char * pSrc, HB_SIZE nLen,
HB_SIZE * pnIndex, HB_WCHAR32 * pWC );
HB_BOOL hb_cdpUTF8GetUCS( const char * pSrc, HB_SIZE nLen,
HB_SIZE * pnIndex, HB_WCHAR32 * pWC );
HB_BOOL hb_cdpUTF8GetU16( const char * pSrc, HB_SIZE nLen,
HB_SIZE * pnIndex, HB_WCHAR16 * pWC );
HB_BOOL hb_cdpUTF8Validate( const char * pSrc, HB_SIZE nLen );
They support full UCS and are much more restrictive against errors and
wrong UTF-8 encoding, i.e. now overlong encoding is forbidden.
The wrong characters are translated to 0xFFFD and later if such
character does not exist in final CP to '?' ASCII character.
* declaration of the following UTF-8 C functions have been changed to
operate on HB_WCHAR32 instead of HB_WCHAR:
int hb_cdpUTF8CharSize( HB_WCHAR32 wc );
HB_WCHAR32 hb_cdpUTF8StringPeek( const char * pSrc, HB_SIZE nLen,
HB_SIZE nPos );
* the following C functions have been changed to internally operate on
HB_WCHAR32 instead of HB_WCHAR:
hb_cdpUTF8StringLength()
hb_cdpUTF8StringAt()
hb_cdpUTF8StringSubstr()
* the following C functions have been changed to use new hb_cdpUTF8GetU*()
instead of step by step decoding with hb_cdpUTF8ToU16NextChar()
hb_cdpStrToUTF8Disp()
hb_cdpUTF8AsStrLen()
hb_cdpUTF8ToStr()
hb_cdpStrToU16()
hb_cdpUtf8Char()
* use HB_CDP_ERROR_* macros to mark wrong encoding
* src/rtl/cdpapihb.c
* the following UTF-8 C functions have been changed to operate on
HB_WCHAR32 instead of HB_WCHAR:
hb_utf8Chr()
hb_utf8Asc()
hb_utf8Poke()
hb_utf8Peek()
Other UTF-8 PRG functions have been adopted to HB_WCHAR32 by changes
in corresponding C functions.
* src/codepage/cp_utf8.c
* use new function hb_cdpUTF8GetU16() to decode UTF-8 strings in UTF8EX CP
* src/rtl/arc4.c
+ added new macro HB_NO_SYSCTL which allow to disable sysctl() in Linux
builds for GLIBC < 2.30
This commit is contained in:
@@ -401,6 +401,19 @@ extern HB_EXPORT void hb_vmSetCDP( PHB_CODEPAGE pCDP );
|
||||
*/
|
||||
#define HB_MAX_CHAR_LEN 8
|
||||
|
||||
/* UCS maximal character value */
|
||||
#define HB_CDP_UNICODE_MAX 0x10FFFF
|
||||
|
||||
/* UTF-16 surrogates for mapping U+010000 to U+10FFFF characters */
|
||||
#define HB_CDP_SURROGATE_FIRST 0xD800
|
||||
#define HB_CDP_SURROGATE_LAST 0xDFFF
|
||||
#define HB_CDP_SURROGATE_HIGH 0xD800
|
||||
#define HB_CDP_SURROGATE_LOW 0xDC00
|
||||
|
||||
/* character codes to replace sequences with wrong encoding or translation */
|
||||
#define HB_CDP_ERROR_UNICHAR 0xFFFD /* <?> */
|
||||
#define HB_CDP_ERROR_ASCCHAR 0x3F /* ? */
|
||||
|
||||
/* codepage uses simple binary sorting */
|
||||
#define HB_CDP_ISBINSORT( cdp ) ( ( ( cdp )->type & HB_CDP_TYPE_BINSORT ) != 0 )
|
||||
/* codepage uses custom string decoding */
|
||||
@@ -473,7 +486,7 @@ extern HB_EXPORT HB_BOOL hb_cdpGetFromUTF8( PHB_CODEPAGE cdp, HB_UCHAR ch,
|
||||
|
||||
extern HB_EXPORT HB_SIZE hb_cdpUTF8StringLength( const char * pSrc, HB_SIZE nLen );
|
||||
extern HB_EXPORT HB_SIZE hb_cdpUTF8StringAt( const char * szNeedle, HB_SIZE nLenN, const char * szHaystack, HB_SIZE nLenH, HB_SIZE nStart, HB_SIZE nEnd, HB_BOOL fReverse );
|
||||
extern HB_EXPORT HB_WCHAR hb_cdpUTF8StringPeek( const char * pSrc, HB_SIZE nLen, HB_SIZE nPos );
|
||||
extern HB_EXPORT HB_WCHAR32 hb_cdpUTF8StringPeek( const char * pSrc, HB_SIZE nLen, HB_SIZE nPos );
|
||||
extern HB_EXPORT char * hb_cdpUTF8StringSubstr( const char * pSrc, HB_SIZE nLen, HB_SIZE nFrom, HB_SIZE nCount, HB_SIZE * pnDest );
|
||||
|
||||
extern HB_EXPORT HB_SIZE hb_cdpUTF8AsStrLen( PHB_CODEPAGE cdp, const char * pSrc, HB_SIZE nSrc, HB_SIZE nMax );
|
||||
@@ -491,10 +504,14 @@ extern HB_EXPORT HB_WCHAR * hb_cdpnStrDupU16( PHB_CODEPAGE cdp, int iEndian, c
|
||||
|
||||
extern HB_EXPORT HB_WCHAR hb_cdpGetU16Ctrl( HB_WCHAR wc );
|
||||
|
||||
extern HB_EXPORT int hb_cdpUTF8CharSize( HB_WCHAR wc );
|
||||
extern HB_EXPORT int hb_cdpUTF8CharSize( HB_WCHAR32 wc );
|
||||
extern HB_EXPORT int hb_cdpU32CharToUTF8( char * szUTF8, HB_WCHAR32 wc );
|
||||
extern HB_EXPORT int hb_cdpU16CharToUTF8( char * szUTF8, HB_WCHAR wc );
|
||||
extern HB_EXPORT HB_BOOL hb_cdpUTF8ToU16NextChar( HB_UCHAR ucChar, int * n, HB_WCHAR * pwc );
|
||||
|
||||
extern HB_EXPORT HB_BOOL hb_cdpUTF8GetU32( const char * pSrc, HB_SIZE nLen, HB_SIZE * pnIndex, HB_WCHAR32 * pWC );
|
||||
extern HB_EXPORT HB_BOOL hb_cdpUTF8GetUCS( const char * pSrc, HB_SIZE nLen, HB_SIZE * pnIndex, HB_WCHAR32 * pWC );
|
||||
extern HB_EXPORT HB_BOOL hb_cdpUTF8GetU16( const char * pSrc, HB_SIZE nLen, HB_SIZE * pnIndex, HB_WCHAR16 * pWC );
|
||||
extern HB_EXPORT HB_BOOL hb_cdpUTF8Validate( const char * pSrc, HB_SIZE nLen );
|
||||
|
||||
extern HB_EXPORT PHB_ITEM hb_itemDeserializeCP( const char ** pBufferPtr, HB_SIZE * pnSize, PHB_CODEPAGE cdpIn, PHB_CODEPAGE cdpOut );
|
||||
extern HB_EXPORT char * hb_itemSerializeCP( PHB_ITEM pItem, int iFlags, PHB_CODEPAGE cdpIn, PHB_CODEPAGE cdpOut, HB_SIZE * pnSize );
|
||||
|
||||
@@ -639,10 +639,18 @@ typedef HB_U32 HB_FATTR;
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#if defined( HB_OS_WIN )
|
||||
#if defined( HB_OS_WIN ) || defined( HB_OS_DOS ) || defined( HB_OS_OS2 )
|
||||
typedef wchar_t HB_WCHAR;
|
||||
typedef wchar_t HB_WCHAR16;
|
||||
typedef HB_I32 HB_WCHAR32;
|
||||
#elif defined( __WATCOMC__ )
|
||||
typedef unsigned short HB_WCHAR;
|
||||
typedef unsigned short HB_WCHAR16;
|
||||
typedef HB_I32 HB_WCHAR32;
|
||||
#else
|
||||
typedef unsigned short HB_WCHAR;
|
||||
typedef unsigned short HB_WCHAR16;
|
||||
typedef wchar_t HB_WCHAR32;
|
||||
#endif
|
||||
|
||||
/* maximum length of double number in decimal representation:
|
||||
|
||||
Reference in New Issue
Block a user