2011-04-13 11:07 UTC+0200 Przemyslaw Czerpak (druzus/at/priv.onet.pl)
* harbour/include/hbapicdp.h
* harbour/include/hbcdpreg.h
* harbour/src/rtl/cdpapi.c
* harbour/src/rtl/cdpapihb.c
+ added support for user defined character encoding.
Now it's possible to easy create and register in HVM CPs using any
encoding so I expected that users interested in adding support for
some exotic character encoding will create such mapping instead
of asking for it.
% moved support for codepages using multibyte characters mapped to
single unicode values to user defined encoding - it nicely simplify
the code and eliminate unnecessary overhead in other CPs.
* allow to chose UTF8 as HVM CP
* harbour/src/rtl/idle.c
! fixed idle mode flag resetting
* harbour/src/rdd/hbsix/sxutil.c
! fixed SX_SLIMFAST() results when nested quoting with (") and (') is
used
+ harbour/tests/big5_gen.prg
+ added code to generate C source with conversion tables between
BIG5 and UCS16 using data defined by Unicode, Inc. in BIG5.TXT
+ harbour/src/codepage/cp_utf8.c
+ added alternative UTF8 Harbour CP (UTF8ASC) as an example Harbour
user defined codapged using multibyte character encoding
* harbour/src/codepage/Makefile
+ harbour/src/codepage/cp_u16le.c
+ added Harbour codepage using UTF16 little endian encoding
* harbour/src/codepage/Makefile
+ harbour/src/codepage/big5.c
+ harbour/src/codepage/cp_big5.c
+ added BIG5 Harbour CP. It can be used with programs using Harbour
STR API with automatic translations.
; This CP needs really big translation tables. I added code which
makes some very simple compression which reduced raw size from
176100 bytes to 77354 but it's still large 77KB so maybe we should
think about moving this CP to other Harbour codpage library which
is not part of harbour shared library harbour*{.dll|.so|.dyn|...}
Alternatively I can try to reduce static size to about 30KB and
then build necessary tables dynamically at runtime when they are
used first time though in such case I will need additional 177KB
of dynamic memory instead of 77KB of static memory used by current
code.
This commit is contained in:
@@ -16,6 +16,56 @@
|
||||
The license applies to all entries newer than 2009-04-28.
|
||||
*/
|
||||
|
||||
2011-04-13 11:07 UTC+0200 Przemyslaw Czerpak (druzus/at/priv.onet.pl)
|
||||
* harbour/include/hbapicdp.h
|
||||
* harbour/include/hbcdpreg.h
|
||||
* harbour/src/rtl/cdpapi.c
|
||||
* harbour/src/rtl/cdpapihb.c
|
||||
+ added support for user defined character encoding.
|
||||
Now it's possible to easy create and register in HVM CPs using any
|
||||
encoding so I expected that users interested in adding support for
|
||||
some exotic character encoding will create such mapping instead
|
||||
of asking for it.
|
||||
% moved support for codepages using multibyte characters mapped to
|
||||
single unicode values to user defined encoding - it nicely simplify
|
||||
the code and eliminate unnecessary overhead in other CPs.
|
||||
* allow to chose UTF8 as HVM CP
|
||||
|
||||
* harbour/src/rtl/idle.c
|
||||
! fixed idle mode flag resetting
|
||||
|
||||
* harbour/src/rdd/hbsix/sxutil.c
|
||||
! fixed SX_SLIMFAST() results when nested quoting with (") and (') is
|
||||
used
|
||||
|
||||
+ harbour/tests/big5_gen.prg
|
||||
+ added code to generate C source with conversion tables between
|
||||
BIG5 and UCS16 using data defined by Unicode, Inc. in BIG5.TXT
|
||||
|
||||
+ harbour/src/codepage/cp_utf8.c
|
||||
+ added alternative UTF8 Harbour CP (UTF8ASC) as an example Harbour
|
||||
user defined codapged using multibyte character encoding
|
||||
|
||||
* harbour/src/codepage/Makefile
|
||||
+ harbour/src/codepage/cp_u16le.c
|
||||
+ added Harbour codepage using UTF16 little endian encoding
|
||||
|
||||
* harbour/src/codepage/Makefile
|
||||
+ harbour/src/codepage/big5.c
|
||||
+ harbour/src/codepage/cp_big5.c
|
||||
+ added BIG5 Harbour CP. It can be used with programs using Harbour
|
||||
STR API with automatic translations.
|
||||
; This CP needs really big translation tables. I added code which
|
||||
makes some very simple compression which reduced raw size from
|
||||
176100 bytes to 77354 but it's still large 77KB so maybe we should
|
||||
think about moving this CP to other Harbour codpage library which
|
||||
is not part of harbour shared library harbour*{.dll|.so|.dyn|...}
|
||||
Alternatively I can try to reduce static size to about 30KB and
|
||||
then build necessary tables dynamically at runtime when they are
|
||||
used first time though in such case I will need additional 177KB
|
||||
of dynamic memory instead of 77KB of static memory used by current
|
||||
code.
|
||||
|
||||
2011-04-13 04:56 UTC+0200 Viktor Szakats (harbour.01 syenar.hu)
|
||||
* src/rtl/fstemp.c
|
||||
! hb_fsTempDir(): due to misplaced else branche it returned
|
||||
|
||||
@@ -77,6 +77,26 @@ HB_EXTERN_BEGIN
|
||||
typedef unsigned short HB_WCHAR;
|
||||
#endif
|
||||
|
||||
|
||||
/* forward declaration */
|
||||
struct _HB_CODEPAGE;
|
||||
|
||||
#define HB_CODEPAGE_PTR struct _HB_CODEPAGE *
|
||||
|
||||
#define HB_CDPCHAR_GET( c, s, n, i, w ) (c)->wcharGet( c, s, n, i, w )
|
||||
#define HB_CDPCHAR_PUT( c, s, n, i, w ) (c)->wcharPut( c, s, n, i, w )
|
||||
#define HB_CDPCHAR_LEN( c, w ) (c)->wcharLen( c, w )
|
||||
|
||||
#define HB_CDP_GET_FUNC( func ) HB_BOOL func( HB_CODEPAGE_PTR cdp, const char * pSrc, HB_SIZE nLen, HB_SIZE * pnIndex, HB_WCHAR * wc )
|
||||
typedef HB_CDP_GET_FUNC( ( * PHB_CDP_GET_FUNC ) );
|
||||
|
||||
#define HB_CDP_PUT_FUNC( func ) HB_BOOL func( HB_CODEPAGE_PTR cdp, char * pDst, HB_SIZE nLen, HB_SIZE * pnIndex, HB_WCHAR wc )
|
||||
typedef HB_CDP_PUT_FUNC( ( * PHB_CDP_PUT_FUNC ) );
|
||||
|
||||
#define HB_CDP_LEN_FUNC( func ) int func( HB_CODEPAGE_PTR cdp, HB_WCHAR wc )
|
||||
typedef HB_CDP_LEN_FUNC( ( * PHB_CDP_LEN_FUNC ) );
|
||||
|
||||
|
||||
typedef struct _HB_UNITABLE
|
||||
{
|
||||
const char * uniID;
|
||||
@@ -106,6 +126,10 @@ typedef struct _HB_CODEPAGE
|
||||
const HB_UCHAR * sort;
|
||||
const HB_UCHAR * acc;
|
||||
int nACSort;
|
||||
HB_BOOL fCustom;
|
||||
PHB_CDP_GET_FUNC wcharGet;
|
||||
PHB_CDP_PUT_FUNC wcharPut;
|
||||
PHB_CDP_LEN_FUNC wcharLen;
|
||||
int nMulti;
|
||||
int nMultiUC;
|
||||
PHB_MULTICHAR multi;
|
||||
@@ -312,6 +336,15 @@ extern HB_UNITABLE hb_uniTbl_646YU;
|
||||
extern HB_EXPORT PHB_CODEPAGE hb_vmCDP( void );
|
||||
extern HB_EXPORT void hb_vmSetCDP( PHB_CODEPAGE pCDP );
|
||||
|
||||
|
||||
/* character flags */
|
||||
#define HB_CDP_DIGIT 0x01
|
||||
#define HB_CDP_ALPHA 0x02
|
||||
#define HB_CDP_LOWER 0x04
|
||||
#define HB_CDP_UPPER 0x08
|
||||
#define HB_CDP_MULTI1 0x10
|
||||
#define HB_CDP_MULTI2 0x20
|
||||
|
||||
/* accented character sorting */
|
||||
#define HB_CDP_ACSORT_NONE 0 /* no special sorting for accented
|
||||
characters */
|
||||
@@ -344,6 +377,7 @@ extern HB_EXPORT HB_BOOL hb_cdpRegisterNew( const char * id,
|
||||
const char * pszLower,
|
||||
unsigned int nACSort,
|
||||
unsigned int nCaseSort );
|
||||
extern HB_EXPORT void hb_cdpBuildTransTable( PHB_UNITABLE uniTable );
|
||||
extern HB_EXPORT void hb_cdpReleaseAll( void );
|
||||
extern HB_EXPORT const char * hb_cdpID( void );
|
||||
extern HB_EXPORT PHB_CODEPAGE hb_cdpSelect( PHB_CODEPAGE cdp );
|
||||
|
||||
@@ -61,6 +61,18 @@ HB_CALL_ON_STARTUP_BEGIN( HB_MACRONAME_JOIN( _hb_codepage_Init_, HB_CP_ID ) )
|
||||
#endif
|
||||
|
||||
#if defined( HB_CP_RAW )
|
||||
#if !defined( HB_CP_CUSTOM )
|
||||
#if defined( HB_CP_GET_FUNC ) && \
|
||||
defined( HB_CP_PUT_FUNC ) && \
|
||||
defined( HB_CP_LEN_FUNC )
|
||||
#define HB_CP_CUSTOM HB_TRUE
|
||||
#else
|
||||
#define HB_CP_CUSTOM HB_FALSE
|
||||
#define HB_CP_GET_FUNC NULL
|
||||
#define HB_CP_PUT_FUNC NULL
|
||||
#define HB_CP_LEN_FUNC NULL
|
||||
#endif
|
||||
#endif
|
||||
static HB_CODEPAGE s_codePage =
|
||||
{
|
||||
HB_MACRO2STRING( HB_CP_ID ),
|
||||
@@ -72,12 +84,19 @@ HB_CALL_ON_STARTUP_BEGIN( HB_MACRONAME_JOIN( _hb_codepage_Init_, HB_CP_ID ) )
|
||||
s_sort,
|
||||
NULL,
|
||||
HB_CDP_ACSORT_NONE,
|
||||
HB_CP_CUSTOM,
|
||||
HB_CP_GET_FUNC,
|
||||
HB_CP_PUT_FUNC,
|
||||
HB_CP_LEN_FUNC,
|
||||
0,
|
||||
0,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
};
|
||||
#if defined( HB_CP_INIT )
|
||||
HB_CP_INIT( &s_codePage );
|
||||
#endif
|
||||
hb_cdpRegisterRaw( &s_codePage );
|
||||
#else
|
||||
#ifndef HB_CP_CSSORT
|
||||
|
||||
@@ -5,6 +5,9 @@
|
||||
ROOT := ../../
|
||||
|
||||
C_SOURCES := \
|
||||
cp_big5.c \
|
||||
cp_utf8.c \
|
||||
cp_u16le.c \
|
||||
cpbg866.c \
|
||||
cpbgiso.c \
|
||||
cpbgmik.c \
|
||||
|
||||
4897
harbour/src/codepage/big5.c
Normal file
4897
harbour/src/codepage/big5.c
Normal file
File diff suppressed because it is too large
Load Diff
188
harbour/src/codepage/cp_big5.c
Normal file
188
harbour/src/codepage/cp_big5.c
Normal file
@@ -0,0 +1,188 @@
|
||||
/*
|
||||
* $Id$
|
||||
*/
|
||||
|
||||
/*
|
||||
* Harbour Project source code:
|
||||
* example of Harbour codepage using BIG5 encoding
|
||||
*
|
||||
* Copyright 2011 Przemyslaw Czerpak <druzus / at / priv.onet.pl>
|
||||
* www - http://www.harbour-project.org
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2, or (at your option)
|
||||
* any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this software; see the file COPYING. If not, write to
|
||||
* the Free Software Foundation, Inc., 59 Temple Place, Suite 330,
|
||||
* Boston, MA 02111-1307 USA (or visit the web site http://www.gnu.org/).
|
||||
*
|
||||
* As a special exception, the Harbour Project gives permission for
|
||||
* additional uses of the text contained in its release of Harbour.
|
||||
*
|
||||
* The exception is that, if you link the Harbour libraries with other
|
||||
* files to produce an executable, this does not by itself cause the
|
||||
* resulting executable to be covered by the GNU General Public License.
|
||||
* Your use of that executable is in no way restricted on account of
|
||||
* linking the Harbour library code into it.
|
||||
*
|
||||
* This exception does not however invalidate any other reasons why
|
||||
* the executable file might be covered by the GNU General Public License.
|
||||
*
|
||||
* This exception applies only to the code released by the Harbour
|
||||
* Project under the name Harbour. If you copy code from other
|
||||
* Harbour Project or Free Software Foundation releases into a copy of
|
||||
* Harbour, as the General Public License permits, the exception does
|
||||
* not apply to the code that you add in this way. To avoid misleading
|
||||
* anyone as to the status of such modified files, you must delete
|
||||
* this exception notice from them.
|
||||
*
|
||||
* If you write modifications of your own for Harbour, it is your choice
|
||||
* whether to permit this exception to apply to your modifications.
|
||||
* If you do not wish that, delete this exception notice.
|
||||
*
|
||||
*/
|
||||
|
||||
#include "hbapi.h"
|
||||
#include "hbapicdp.h"
|
||||
|
||||
#include "big5.c"
|
||||
|
||||
static HB_CDP_GET_FUNC( BIG5_get )
|
||||
{
|
||||
*wc = 0;
|
||||
if( *pnIndex < nLen )
|
||||
{
|
||||
HB_UCHAR uc = pSrc[ ( * pnIndex )++ ];
|
||||
|
||||
if( uc >= ( HB_BIG5_FIRST >> 8 ) && uc <= ( HB_BIG5_LAST >> 8 ) &&
|
||||
*pnIndex < nLen )
|
||||
{
|
||||
*wc = s_big5_to_ucs16( ( ( int ) uc << 8 ) | ( HB_UCHAR ) pSrc[ * pnIndex ] );
|
||||
if( *wc )
|
||||
{
|
||||
( * pnIndex )++;
|
||||
return HB_TRUE;
|
||||
}
|
||||
}
|
||||
*wc = cdp->uniTable->uniCodes[ uc ];
|
||||
if( *wc == 0 )
|
||||
*wc = uc;
|
||||
return HB_TRUE;
|
||||
}
|
||||
return HB_FALSE;
|
||||
}
|
||||
|
||||
static HB_CDP_PUT_FUNC( BIG5_put )
|
||||
{
|
||||
if( * pnIndex < nLen )
|
||||
{
|
||||
HB_USHORT b5 = s_ucs16_to_big5( wc );
|
||||
|
||||
if( b5 )
|
||||
{
|
||||
if( * pnIndex + 1 < nLen )
|
||||
{
|
||||
HB_PUT_BE_UINT16( &pDst[ ( * pnIndex ) ], b5 );
|
||||
* pnIndex += 2;
|
||||
return HB_TRUE;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if( cdp->uniTable->uniTrans == NULL )
|
||||
hb_cdpBuildTransTable( cdp->uniTable );
|
||||
|
||||
if( wc <= cdp->uniTable->wcMax &&
|
||||
cdp->uniTable->uniTrans[ wc ] )
|
||||
pDst[ ( * pnIndex )++ ] = cdp->uniTable->uniTrans[ wc ];
|
||||
else
|
||||
pDst[ ( * pnIndex )++ ] = wc >= 0x100 ? '?' : ( HB_UCHAR ) wc;
|
||||
return HB_TRUE;
|
||||
}
|
||||
}
|
||||
return HB_FALSE;
|
||||
}
|
||||
|
||||
static HB_CDP_LEN_FUNC( BIG5_len )
|
||||
{
|
||||
HB_USHORT b5 = s_ucs16_to_big5( wc );
|
||||
|
||||
HB_SYMBOL_UNUSED( cdp );
|
||||
|
||||
return b5 ? 2 : 1;
|
||||
}
|
||||
|
||||
static void hb_cp_init( PHB_CODEPAGE cdp )
|
||||
{
|
||||
HB_UCHAR * flags, * upper, * lower;
|
||||
int i;
|
||||
|
||||
cdp->buffer = ( HB_UCHAR * ) hb_xgrab( 0x300 );
|
||||
cdp->flags = flags = ( HB_UCHAR * ) cdp->buffer;
|
||||
cdp->upper = upper = ( HB_UCHAR * ) cdp->buffer + 0x100;
|
||||
cdp->lower = lower = ( HB_UCHAR * ) cdp->buffer + 0x200;
|
||||
|
||||
for( i = 0; i < 0x100; ++i )
|
||||
{
|
||||
flags[ i ] = 0;
|
||||
if( HB_ISDIGIT( i ) )
|
||||
flags[ i ] |= HB_CDP_DIGIT;
|
||||
if( HB_ISALPHA( i ) )
|
||||
flags[ i ] |= HB_CDP_ALPHA;
|
||||
if( HB_ISUPPER( i ) )
|
||||
flags[ i ] |= HB_CDP_UPPER;
|
||||
if( HB_ISLOWER( i ) )
|
||||
flags[ i ] |= HB_CDP_LOWER;
|
||||
upper[ i ] = ( HB_UCHAR ) HB_TOUPPER( i );
|
||||
lower[ i ] = ( HB_UCHAR ) HB_TOLOWER( i );
|
||||
}
|
||||
|
||||
#if 0
|
||||
for( i = 0; i < 0x10000; ++i )
|
||||
{
|
||||
HB_WCHAR wc = s_big5_to_ucs16( i );
|
||||
if( wc )
|
||||
{
|
||||
if( i != s_ucs16_to_big5( wc ) )
|
||||
{
|
||||
printf( "irreversible translation: (BIG5)%04X -> U+%04X -> (BIG5)%04X\r\n",
|
||||
i, wc, s_ucs16_to_big5( wc ) );
|
||||
fflush(stdout);
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
#define HB_CP_RAW
|
||||
|
||||
#define HB_CP_ID BIG5
|
||||
#define HB_CP_INFO "BIG5-5"
|
||||
#define HB_CP_UNITB HB_UNITB_437
|
||||
|
||||
#define HB_CP_GET_FUNC BIG5_get
|
||||
#define HB_CP_PUT_FUNC BIG5_put
|
||||
#define HB_CP_LEN_FUNC BIG5_len
|
||||
|
||||
#define HB_CP_CMP_FUNC NULL
|
||||
#define HB_CP_FLAG_FUNC NULL
|
||||
#define HB_CP_UPPER_FUNC NULL
|
||||
#define HB_CP_LOWER_FUNC NULL
|
||||
|
||||
#define s_flags NULL
|
||||
#define s_upper NULL
|
||||
#define s_lower NULL
|
||||
#define s_sort NULL
|
||||
|
||||
#define HB_CP_INIT hb_cp_init
|
||||
|
||||
/* include CP registration code */
|
||||
#include "hbcdpreg.h"
|
||||
141
harbour/src/codepage/cp_u16le.c
Normal file
141
harbour/src/codepage/cp_u16le.c
Normal file
@@ -0,0 +1,141 @@
|
||||
/*
|
||||
* $Id$
|
||||
*/
|
||||
|
||||
/*
|
||||
* Harbour Project source code:
|
||||
* example of Harbour codepage using UTF-16 little endian encoding
|
||||
*
|
||||
* Copyright 2011 Przemyslaw Czerpak <druzus / at / priv.onet.pl>
|
||||
* www - http://www.harbour-project.org
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2, or (at your option)
|
||||
* any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this software; see the file COPYING. If not, write to
|
||||
* the Free Software Foundation, Inc., 59 Temple Place, Suite 330,
|
||||
* Boston, MA 02111-1307 USA (or visit the web site http://www.gnu.org/).
|
||||
*
|
||||
* As a special exception, the Harbour Project gives permission for
|
||||
* additional uses of the text contained in its release of Harbour.
|
||||
*
|
||||
* The exception is that, if you link the Harbour libraries with other
|
||||
* files to produce an executable, this does not by itself cause the
|
||||
* resulting executable to be covered by the GNU General Public License.
|
||||
* Your use of that executable is in no way restricted on account of
|
||||
* linking the Harbour library code into it.
|
||||
*
|
||||
* This exception does not however invalidate any other reasons why
|
||||
* the executable file might be covered by the GNU General Public License.
|
||||
*
|
||||
* This exception applies only to the code released by the Harbour
|
||||
* Project under the name Harbour. If you copy code from other
|
||||
* Harbour Project or Free Software Foundation releases into a copy of
|
||||
* Harbour, as the General Public License permits, the exception does
|
||||
* not apply to the code that you add in this way. To avoid misleading
|
||||
* anyone as to the status of such modified files, you must delete
|
||||
* this exception notice from them.
|
||||
*
|
||||
* If you write modifications of your own for Harbour, it is your choice
|
||||
* whether to permit this exception to apply to your modifications.
|
||||
* If you do not wish that, delete this exception notice.
|
||||
*
|
||||
*/
|
||||
|
||||
#include "hbapi.h"
|
||||
#include "hbapicdp.h"
|
||||
|
||||
static HB_CDP_GET_FUNC( UTF16LE_get )
|
||||
{
|
||||
HB_SYMBOL_UNUSED( cdp );
|
||||
|
||||
if( * pnIndex + 1 < nLen )
|
||||
{
|
||||
* wc = HB_GET_LE_UINT16( &pSrc[ * pnIndex ] );
|
||||
* pnIndex += 2;
|
||||
return HB_TRUE;
|
||||
}
|
||||
else
|
||||
*wc = 0;
|
||||
return HB_FALSE;
|
||||
}
|
||||
|
||||
static HB_CDP_PUT_FUNC( UTF16LE_put )
|
||||
{
|
||||
HB_SYMBOL_UNUSED( cdp );
|
||||
|
||||
if( * pnIndex + 1 < nLen )
|
||||
{
|
||||
HB_PUT_LE_UINT16( &pDst[ * pnIndex ], wc );
|
||||
* pnIndex += 2;
|
||||
return HB_TRUE;
|
||||
}
|
||||
return HB_FALSE;
|
||||
}
|
||||
|
||||
static HB_CDP_LEN_FUNC( UTF16LE_len )
|
||||
{
|
||||
HB_SYMBOL_UNUSED( cdp );
|
||||
HB_SYMBOL_UNUSED( wc );
|
||||
|
||||
return 2;
|
||||
}
|
||||
|
||||
static void hb_cp_init( PHB_CODEPAGE cdp )
|
||||
{
|
||||
HB_UCHAR * flags, * upper, * lower;
|
||||
int i;
|
||||
|
||||
cdp->buffer = ( HB_UCHAR * ) hb_xgrab( 0x300 );
|
||||
cdp->flags = flags = ( HB_UCHAR * ) cdp->buffer;
|
||||
cdp->upper = upper = ( HB_UCHAR * ) cdp->buffer + 0x100;
|
||||
cdp->lower = lower = ( HB_UCHAR * ) cdp->buffer + 0x200;
|
||||
|
||||
for( i = 0; i < 0x100; ++i )
|
||||
{
|
||||
flags[ i ] = 0;
|
||||
if( HB_ISDIGIT( i ) )
|
||||
flags[ i ] |= HB_CDP_DIGIT;
|
||||
if( HB_ISALPHA( i ) )
|
||||
flags[ i ] |= HB_CDP_ALPHA;
|
||||
if( HB_ISUPPER( i ) )
|
||||
flags[ i ] |= HB_CDP_UPPER;
|
||||
if( HB_ISLOWER( i ) )
|
||||
flags[ i ] |= HB_CDP_LOWER;
|
||||
upper[ i ] = ( HB_UCHAR ) HB_TOUPPER( i );
|
||||
lower[ i ] = ( HB_UCHAR ) HB_TOLOWER( i );
|
||||
}
|
||||
}
|
||||
|
||||
#define HB_CP_RAW
|
||||
|
||||
#define HB_CP_ID UTF16LE
|
||||
#define HB_CP_INFO "UTF-16 little endian"
|
||||
#define HB_CP_UNITB HB_UNITB_437
|
||||
|
||||
#define HB_CP_GET_FUNC UTF16LE_get
|
||||
#define HB_CP_PUT_FUNC UTF16LE_put
|
||||
#define HB_CP_LEN_FUNC UTF16LE_len
|
||||
|
||||
#define HB_CP_CMP_FUNC NULL
|
||||
#define HB_CP_FLAG_FUNC NULL
|
||||
#define HB_CP_UPPER_FUNC NULL
|
||||
#define HB_CP_LOWER_FUNC NULL
|
||||
|
||||
#define s_flags NULL
|
||||
#define s_upper NULL
|
||||
#define s_lower NULL
|
||||
#define s_sort NULL
|
||||
|
||||
#define HB_CP_INIT hb_cp_init
|
||||
|
||||
/* include CP registration code */
|
||||
#include "hbcdpreg.h"
|
||||
149
harbour/src/codepage/cp_utf8.c
Normal file
149
harbour/src/codepage/cp_utf8.c
Normal file
@@ -0,0 +1,149 @@
|
||||
/*
|
||||
* $Id$
|
||||
*/
|
||||
|
||||
/*
|
||||
* Harbour Project source code:
|
||||
* example of Harbour codepage using UTF8 encoding
|
||||
*
|
||||
* Copyright 2011 Przemyslaw Czerpak <druzus / at / priv.onet.pl>
|
||||
* www - http://www.harbour-project.org
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2, or (at your option)
|
||||
* any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this software; see the file COPYING. If not, write to
|
||||
* the Free Software Foundation, Inc., 59 Temple Place, Suite 330,
|
||||
* Boston, MA 02111-1307 USA (or visit the web site http://www.gnu.org/).
|
||||
*
|
||||
* As a special exception, the Harbour Project gives permission for
|
||||
* additional uses of the text contained in its release of Harbour.
|
||||
*
|
||||
* The exception is that, if you link the Harbour libraries with other
|
||||
* files to produce an executable, this does not by itself cause the
|
||||
* resulting executable to be covered by the GNU General Public License.
|
||||
* Your use of that executable is in no way restricted on account of
|
||||
* linking the Harbour library code into it.
|
||||
*
|
||||
* This exception does not however invalidate any other reasons why
|
||||
* the executable file might be covered by the GNU General Public License.
|
||||
*
|
||||
* This exception applies only to the code released by the Harbour
|
||||
* Project under the name Harbour. If you copy code from other
|
||||
* Harbour Project or Free Software Foundation releases into a copy of
|
||||
* Harbour, as the General Public License permits, the exception does
|
||||
* not apply to the code that you add in this way. To avoid misleading
|
||||
* anyone as to the status of such modified files, you must delete
|
||||
* this exception notice from them.
|
||||
*
|
||||
* If you write modifications of your own for Harbour, it is your choice
|
||||
* whether to permit this exception to apply to your modifications.
|
||||
* If you do not wish that, delete this exception notice.
|
||||
*
|
||||
*/
|
||||
|
||||
#include "hbapi.h"
|
||||
#include "hbapicdp.h"
|
||||
|
||||
static HB_CDP_GET_FUNC( UTF8_get )
|
||||
{
|
||||
HB_SIZE nIndex = *pnIndex;
|
||||
int n = 0;
|
||||
|
||||
HB_SYMBOL_UNUSED( cdp );
|
||||
|
||||
*wc = 0;
|
||||
while( nIndex < nLen )
|
||||
{
|
||||
if( hb_cdpUTF8ToU16NextChar( ( HB_UCHAR ) pSrc[ nIndex++ ], &n, wc ) )
|
||||
{
|
||||
if( n == 0 )
|
||||
{
|
||||
*pnIndex = nIndex;
|
||||
return HB_TRUE;
|
||||
}
|
||||
}
|
||||
}
|
||||
return HB_FALSE;
|
||||
}
|
||||
|
||||
static HB_CDP_PUT_FUNC( UTF8_put )
|
||||
{
|
||||
int i = hb_cdpUTF8CharSize( wc );
|
||||
|
||||
HB_SYMBOL_UNUSED( cdp );
|
||||
|
||||
if( *pnIndex + i <= nLen )
|
||||
{
|
||||
hb_cdpU16CharToUTF8( &pDst[ *pnIndex ], wc );
|
||||
*pnIndex += i;
|
||||
return HB_TRUE;
|
||||
}
|
||||
return HB_FALSE;
|
||||
}
|
||||
|
||||
static HB_CDP_LEN_FUNC( UTF8_len )
|
||||
{
|
||||
HB_SYMBOL_UNUSED( cdp );
|
||||
|
||||
return hb_cdpUTF8CharSize( wc );
|
||||
}
|
||||
|
||||
static void hb_cp_init( PHB_CODEPAGE cdp )
|
||||
{
|
||||
HB_UCHAR * flags, * upper, * lower;
|
||||
int i;
|
||||
|
||||
cdp->buffer = ( HB_UCHAR * ) hb_xgrab( 0x300 );
|
||||
cdp->flags = flags = ( HB_UCHAR * ) cdp->buffer;
|
||||
cdp->upper = upper = ( HB_UCHAR * ) cdp->buffer + 0x100;
|
||||
cdp->lower = lower = ( HB_UCHAR * ) cdp->buffer + 0x200;
|
||||
|
||||
for( i = 0; i < 0x100; ++i )
|
||||
{
|
||||
flags[ i ] = 0;
|
||||
if( HB_ISDIGIT( i ) )
|
||||
flags[ i ] |= HB_CDP_DIGIT;
|
||||
if( HB_ISALPHA( i ) )
|
||||
flags[ i ] |= HB_CDP_ALPHA;
|
||||
if( HB_ISUPPER( i ) )
|
||||
flags[ i ] |= HB_CDP_UPPER;
|
||||
if( HB_ISLOWER( i ) )
|
||||
flags[ i ] |= HB_CDP_LOWER;
|
||||
upper[ i ] = ( HB_UCHAR ) HB_TOUPPER( i );
|
||||
lower[ i ] = ( HB_UCHAR ) HB_TOLOWER( i );
|
||||
}
|
||||
}
|
||||
|
||||
#define HB_CP_RAW
|
||||
|
||||
#define HB_CP_ID UTF8ASC
|
||||
#define HB_CP_INFO "UTF-8 ASCII letters"
|
||||
#define HB_CP_UNITB HB_UNITB_437
|
||||
|
||||
#define HB_CP_GET_FUNC UTF8_get
|
||||
#define HB_CP_PUT_FUNC UTF8_put
|
||||
#define HB_CP_LEN_FUNC UTF8_len
|
||||
|
||||
#define HB_CP_CMP_FUNC NULL
|
||||
#define HB_CP_FLAG_FUNC NULL
|
||||
#define HB_CP_UPPER_FUNC NULL
|
||||
#define HB_CP_LOWER_FUNC NULL
|
||||
|
||||
#define s_flags NULL
|
||||
#define s_upper NULL
|
||||
#define s_lower NULL
|
||||
#define s_sort NULL
|
||||
|
||||
#define HB_CP_INIT hb_cp_init
|
||||
|
||||
/* include CP registration code */
|
||||
#include "hbcdpreg.h"
|
||||
@@ -72,13 +72,14 @@ HB_FUNC( SX_SLIMFAST )
|
||||
{
|
||||
if( c == cQuote )
|
||||
cQuote = 0;
|
||||
else if( c == '"' || c == '\'' )
|
||||
cQuote = c;
|
||||
else if( !cQuote )
|
||||
{
|
||||
if( c == ' ' && nDst && szDst[nDst - 1] == ' ' )
|
||||
if( c == '"' || c == '\'' )
|
||||
cQuote = c;
|
||||
else if( c == ' ' && nDst && szDst[nDst - 1] == ' ' )
|
||||
continue;
|
||||
c = ( char ) hb_charUpper( ( HB_UCHAR ) c );
|
||||
else
|
||||
c = ( char ) hb_charUpper( ( HB_UCHAR ) c );
|
||||
}
|
||||
szDst[nDst++] = c;
|
||||
}
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -159,7 +159,9 @@ HB_FUNC( HB_TRANSLATE )
|
||||
PHB_CODEPAGE cdpIn = szIdIn ? hb_cdpFindExt( szIdIn ) : hb_vmCDP();
|
||||
PHB_CODEPAGE cdpOut = szIdOut ? hb_cdpFindExt( szIdOut ) : hb_vmCDP();
|
||||
|
||||
if( cdpIn && cdpOut && cdpIn->uniTable != cdpOut->uniTable )
|
||||
if( cdpIn && cdpOut && cdpIn != cdpOut &&
|
||||
( cdpIn->uniTable != cdpOut->uniTable ||
|
||||
cdpIn->fCustom || cdpOut->fCustom ) )
|
||||
{
|
||||
char * szResult = hb_cdpnDup( hb_parc( 1 ), &nLen, cdpIn, cdpOut );
|
||||
hb_retclen_buffer( szResult, nLen );
|
||||
|
||||
@@ -131,8 +131,8 @@ void hb_idleState( void )
|
||||
pIdleData->fCollectGarbage = HB_TRUE;
|
||||
}
|
||||
}
|
||||
pIdleData->fIamIdle = HB_FALSE;
|
||||
}
|
||||
pIdleData->fIamIdle = HB_FALSE;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
265
harbour/tests/big5_gen.prg
Normal file
265
harbour/tests/big5_gen.prg
Normal file
@@ -0,0 +1,265 @@
|
||||
/*
|
||||
* $Id$
|
||||
*/
|
||||
|
||||
/*
|
||||
* Harbour Project source code:
|
||||
* code to generate C source with conversion tables between BIG5 and UCS16
|
||||
* using data defined by Unicode, Inc. in BIG5.TXT
|
||||
*
|
||||
* Copyright 2011 Przemyslaw Czerpak <druzus / at / priv.onet.pl>
|
||||
* www - http://harbour-project.org
|
||||
*
|
||||
*/
|
||||
|
||||
proc main()
|
||||
local cLine, aVal, aVal2, aValU, aValU2, hVal, aInd, ;
|
||||
n, nn, nBG5, nU16, nMin, nMax, nUMin, nUMax, cResult, nBit
|
||||
|
||||
aVal := afill( array( 0x10000 ), 0 )
|
||||
aValU := afill( array( 0x10000 ), 0 )
|
||||
nMin := nUMin := 0xFFFF
|
||||
nMax := nUMax := 0x0000
|
||||
|
||||
for each cLine in hb_aTokens( hb_memoRead( "BIG5.TXT" ), hb_eol() )
|
||||
if cLine = "0x" .and. ( n := hb_at( "0x", cLine, 7 ) ) != 0
|
||||
nBG5 := hb_hexToNum( substr( cLine, 3, 4 ) )
|
||||
nU16 := hb_hexToNum( substr( cLine, n + 2, 4 ) )
|
||||
if nBG5 == 0 .or. nU16 == 0
|
||||
? "unrecognized line:", cLine:__enumIndex()
|
||||
return
|
||||
elseif nBG5 > 0xFFFF .or. nU16 > 0xFFFF
|
||||
? "wrong character range at line:", cLine:__enumIndex()
|
||||
return
|
||||
endif
|
||||
aVal[ nBG5 ] := nU16
|
||||
if nBG5 > nMax
|
||||
nMax := nBG5
|
||||
endif
|
||||
if nBG5 < nMin
|
||||
nMin := nBG5
|
||||
endif
|
||||
if nU16 > nUMax
|
||||
nUMax := nU16
|
||||
endif
|
||||
if nU16 < nUMin
|
||||
nUMin := nU16
|
||||
endif
|
||||
endif
|
||||
next
|
||||
|
||||
for n := 1 to len( aVal )
|
||||
if aVal[ n ] != 0
|
||||
aValU[ aVal[ n ] ] := n
|
||||
endif
|
||||
next
|
||||
|
||||
#if 0
|
||||
cResult += "static const HB_USHORT s_big5uni[ " + hb_ntos( nMax - nMin + 1 ) + " ] =" + hb_eol()
|
||||
cResult += "{" + hb_eol()
|
||||
l := 0
|
||||
for n := nMin to nMax
|
||||
if ++l > 8
|
||||
l := 1
|
||||
cResult += ","
|
||||
cResult += hb_eol()
|
||||
cResult += " "
|
||||
elseif n == nMin
|
||||
cResult += " "
|
||||
else
|
||||
cResult += ", "
|
||||
endif
|
||||
cResult += "0x"
|
||||
cResult += hb_numToHex( aVal[ n ], 4 )
|
||||
next
|
||||
cResult += hb_eol()
|
||||
cResult += "};" + hb_eol()
|
||||
#endif
|
||||
|
||||
? "BIG5->UCS16 tables."
|
||||
n := min_size( aVal, nMin, nMax, @nBit )
|
||||
? "raw size:", hb_ntos( ( nMax - nMin + 1 ) * 2 )
|
||||
? "minimal size:", hb_ntos( n ), ;
|
||||
"for", hb_ntos( hb_bitshift( 1, nBit ) ), "byte blocks"
|
||||
calc_size( aVal, nMin, nMax, nBit, @hVal, @aInd, @nn )
|
||||
aVal2 := hash_to_array( hVal )
|
||||
|
||||
|
||||
cResult := ;
|
||||
"/*" + hb_eol() + ;
|
||||
" * $Id$" + hb_eol() + ;
|
||||
" */" + hb_eol() + ;
|
||||
hb_eol() + ;
|
||||
"/*" + hb_eol() + ;
|
||||
" * Harbour Project source code:" + hb_eol() + ;
|
||||
" * BIG5 <-> UCS16 conversion tables" + hb_eol() + ;
|
||||
" * code generated automatically by tests/big5_gen.prg" + hb_eol() + ;
|
||||
" *" + hb_eol() + ;
|
||||
" * Copyright 2011 Przemyslaw Czerpak <druzus / at / priv.onet.pl>" + hb_eol() + ;
|
||||
" * www - http://harbour-project.org" + hb_eol() + ;
|
||||
" *" + hb_eol() + ;
|
||||
" */" + hb_eol() + ;
|
||||
hb_eol()
|
||||
|
||||
cResult += '#include "hbapi.h"' + hb_eol()
|
||||
cResult += hb_eol()
|
||||
cResult += "#define HB_BIG5_FIRST 0x" + hb_numToHex( nMin, 4 ) + hb_eol()
|
||||
cResult += "#define HB_BIG5_LAST 0x" + hb_numToHex( nMax, 4 ) + hb_eol()
|
||||
cResult += "#define HB_BIG5_BITS " + hb_ntos( nBit ) + hb_eol()
|
||||
cResult += hb_eol()
|
||||
|
||||
cResult += array_to_code( aInd, "s_big5index", nn )
|
||||
cResult += hb_eol()
|
||||
cResult += array_to_code( aVal2, "s_big5_ucs16", 2 )
|
||||
cResult += hb_eol()
|
||||
cResult += index_func( "s_big5_to_ucs16", "s_big5index", "s_big5_ucs16", ;
|
||||
"HB_BIG5_FIRST", "HB_BIG5_LAST", "HB_BIG5_BITS" )
|
||||
|
||||
check_conv( aVal, aInd, aVal2, nMin, nMax, nBit )
|
||||
|
||||
?
|
||||
? "UCS16->BIG5 tables."
|
||||
n := min_size( aValU, nUMin, nUMax, @nBit )
|
||||
? "raw size:", hb_ntos( ( nUMax - nUMin + 1 ) * 2 )
|
||||
? "minimal size:", hb_ntos( n ), ;
|
||||
"for", hb_ntos( hb_bitshift( 1, nBit ) ), "byte blocks"
|
||||
calc_size( aValU, nUMin, nUMax, nBit, @hVal, @aInd, @nn )
|
||||
aValU2 := hash_to_array( hVal )
|
||||
|
||||
cResult += hb_eol()
|
||||
cResult += "#define HB_U16_FIRST 0x" + hb_numToHex( nUMin, 4 ) + hb_eol()
|
||||
cResult += "#define HB_U16_LAST 0x" + hb_numToHex( nUMax, 4 ) + hb_eol()
|
||||
cResult += "#define HB_U16_BITS " + hb_ntos( nBit ) + hb_eol()
|
||||
cResult += hb_eol()
|
||||
cResult += array_to_code( aInd, "s_ucs16index", nn )
|
||||
cResult += hb_eol()
|
||||
cResult += array_to_code( aValU2, "s_ucs16_big5", 2 )
|
||||
cResult += hb_eol()
|
||||
cResult += index_func( "s_ucs16_to_big5", "s_ucs16index", "s_ucs16_big5", ;
|
||||
"HB_U16_FIRST", "HB_U16_LAST", "HB_U16_BITS" )
|
||||
|
||||
check_conv( aValU, aInd, aValU2, nUMin, nUMax, nBit )
|
||||
|
||||
hb_memowrit( "big5.c", cResult )
|
||||
return
|
||||
|
||||
static function array_to_code( aVal, cName, nn )
|
||||
local cResult, l, n
|
||||
|
||||
cResult := "static const " + ;
|
||||
iif( nn == 1, "HB_BYTE", "HB_USHORT" ) + " " + ;
|
||||
cName + "[ " + hb_ntos( len( aVal ) ) + " ] =" + hb_eol()
|
||||
cResult += "{" + hb_eol()
|
||||
l := 0
|
||||
for n := 1 to len( aVal )
|
||||
if ++l > iif( nn == 1, 12, 8 )
|
||||
l := 1
|
||||
cResult += ","
|
||||
cResult += hb_eol()
|
||||
cResult += " "
|
||||
elseif n == 1
|
||||
cResult += " "
|
||||
else
|
||||
cResult += ", "
|
||||
endif
|
||||
cResult += "0x"
|
||||
cResult += hb_numToHex( aVal[ n ], nn * 2 )
|
||||
next
|
||||
cResult += hb_eol()
|
||||
cResult += "};" + hb_eol()
|
||||
return cResult;
|
||||
|
||||
static function hash_to_array( hVal )
|
||||
local aVal := {}, cLine, n
|
||||
for each cLine in hVal
|
||||
for n := 1 to len( cLine ) step( 2 )
|
||||
aadd( aVal, bin2w( substr( cLine, n, 2 ) ) )
|
||||
next
|
||||
next
|
||||
return aVal
|
||||
|
||||
function min_size( aVal, nMin, nMax, nBit )
|
||||
local n, nS, nSize
|
||||
nSize := 0xFFFFFF
|
||||
for n := 1 to 16
|
||||
nS := calc_size( aVal, nMin, nMax, n )
|
||||
// ? n, nS
|
||||
if nS < nSize
|
||||
nSize := nS
|
||||
nBit := n
|
||||
endif
|
||||
next
|
||||
return nSize
|
||||
|
||||
function calc_size( aVal, nMin, nMax, nBit, hVal, aInd, nn )
|
||||
local nLine, n, cLine, c
|
||||
|
||||
nLine := int( 2 ^ nBit )
|
||||
|
||||
cLine := ""
|
||||
hVal := {=>}
|
||||
aInd := {}
|
||||
hb_hKeepOrder( hVal, .t. )
|
||||
for n := nMin to nMax
|
||||
cLine += i2bin( aVal[n] )
|
||||
if len( cLine ) == nLine * 2
|
||||
hVal[ cLine ] := cLine
|
||||
aadd( aInd, hb_hpos( hVal, cLine ) - 1 )
|
||||
cLine := ""
|
||||
endif
|
||||
next
|
||||
if ! cLine == ""
|
||||
for each c in hVal
|
||||
if c = cLine
|
||||
cLine := c
|
||||
exit
|
||||
endif
|
||||
next
|
||||
hVal[ cLine ] := cLine
|
||||
aadd( aInd, hb_hpos( hVal, cLine ) - 1 )
|
||||
endif
|
||||
nn := iif( len( aInd ) > 256, 2, 1 )
|
||||
n := len( aInd ) * nn
|
||||
for each c in hVal
|
||||
n += len( c )
|
||||
next
|
||||
|
||||
return n
|
||||
|
||||
static function index_func( cName, cNameInd, cNameConv, cMin, cMax, cBit )
|
||||
local cResult
|
||||
|
||||
cResult := "static HB_USHORT " + cName + "( int n )" + hb_eol() + ;
|
||||
"{" + hb_eol() + ;
|
||||
" n -= " + cMin + ";" + hb_eol() + ;
|
||||
" if( n >= 0 && n <= ( " + cMax + " - " + cMin + ") )" + hb_eol() + ;
|
||||
" {" + hb_eol() + ;
|
||||
" return " + cNameConv + "[ ( " + cNameInd + ;
|
||||
"[ n >> " + cBit + " ] << " + cBit + " ) +" + hb_eol() + ;
|
||||
space( len( cNameInd ) + 16 ) + ;
|
||||
"( n & ( ( 1 << " + cBit + " ) - 1 ) ) ];" + hb_eol() + ;
|
||||
" }" + hb_eol() + ;
|
||||
" return 0;" + hb_eol() + ;
|
||||
"}" + hb_eol()
|
||||
|
||||
return cResult
|
||||
|
||||
static function conv_get( n, aInd, aVal2, nMin, nMax, nBit )
|
||||
local nDiv
|
||||
if n >= nMin .and. n <= nMax
|
||||
nDiv := 2 ^ nBit
|
||||
n -= nMin
|
||||
return aVal2[ aInd[ n / nDiv + 1 ] * nDiv + n % nDiv + 1 ]
|
||||
endif
|
||||
return 0
|
||||
|
||||
static function check_conv( aVal, aInd, aVal2, nMin, nMax, nBit )
|
||||
local n, nVal
|
||||
for n := 1 to len( aVal )
|
||||
nVal := conv_get( n, aInd, aVal2, nMin, nMax, nBit )
|
||||
if aVal[ n ] != nVal
|
||||
? "Wrong decoding:", n, aVal[ n ], nVal, len( aVal ), nMax, hb_eol()
|
||||
break
|
||||
endif
|
||||
next
|
||||
return nil
|
||||
Reference in New Issue
Block a user