2011-04-13 11:07 UTC+0200 Przemyslaw Czerpak (druzus/at/priv.onet.pl)

* harbour/include/hbapicdp.h
  * harbour/include/hbcdpreg.h
  * harbour/src/rtl/cdpapi.c
  * harbour/src/rtl/cdpapihb.c
    + added support for user defined character encoding.
      Now it's possible to easy create and register in HVM CPs using any
      encoding so I expected that users interested in adding support for
      some exotic character encoding will create such mapping instead
      of asking for it.
    % moved support for codepages using multibyte characters mapped to
      single unicode values to user defined encoding - it nicely simplify
      the code and eliminate unnecessary overhead in other CPs.
    * allow to chose UTF8 as HVM CP

  * harbour/src/rtl/idle.c
    ! fixed idle mode flag resetting

  * harbour/src/rdd/hbsix/sxutil.c
    ! fixed SX_SLIMFAST() results when nested quoting with (") and (') is
      used

  + harbour/tests/big5_gen.prg
    + added code to generate C source with conversion tables between
      BIG5 and UCS16 using data defined by Unicode, Inc. in BIG5.TXT

  + harbour/src/codepage/cp_utf8.c
    + added alternative UTF8 Harbour CP (UTF8ASC) as an example Harbour
      user defined codapged using multibyte character encoding

  * harbour/src/codepage/Makefile
  + harbour/src/codepage/cp_u16le.c
    + added Harbour codepage using UTF16 little endian encoding

  * harbour/src/codepage/Makefile
  + harbour/src/codepage/big5.c
  + harbour/src/codepage/cp_big5.c
    + added BIG5 Harbour CP. It can be used with programs using Harbour
      STR API with automatic translations.
    ; This CP needs really big translation tables. I added code which
      makes some very simple compression which reduced raw size from
      176100 bytes to 77354 but it's still large 77KB so maybe we should
      think about moving this CP to other Harbour codpage library which
      is not part of harbour shared library harbour*{.dll|.so|.dyn|...}
      Alternatively I can try to reduce static size to about 30KB and
      then build necessary tables dynamically at runtime when they are
      used first time though in such case I will need additional 177KB
      of dynamic memory instead of 77KB of static memory used by current
      code.
This commit is contained in:
Przemyslaw Czerpak
2011-04-13 09:07:47 +00:00
parent 1b73c1bf54
commit 30a8610407
13 changed files with 6388 additions and 584 deletions

View File

@@ -16,6 +16,56 @@
The license applies to all entries newer than 2009-04-28.
*/
2011-04-13 11:07 UTC+0200 Przemyslaw Czerpak (druzus/at/priv.onet.pl)
* harbour/include/hbapicdp.h
* harbour/include/hbcdpreg.h
* harbour/src/rtl/cdpapi.c
* harbour/src/rtl/cdpapihb.c
+ added support for user defined character encoding.
Now it's possible to easy create and register in HVM CPs using any
encoding so I expected that users interested in adding support for
some exotic character encoding will create such mapping instead
of asking for it.
% moved support for codepages using multibyte characters mapped to
single unicode values to user defined encoding - it nicely simplify
the code and eliminate unnecessary overhead in other CPs.
* allow to chose UTF8 as HVM CP
* harbour/src/rtl/idle.c
! fixed idle mode flag resetting
* harbour/src/rdd/hbsix/sxutil.c
! fixed SX_SLIMFAST() results when nested quoting with (") and (') is
used
+ harbour/tests/big5_gen.prg
+ added code to generate C source with conversion tables between
BIG5 and UCS16 using data defined by Unicode, Inc. in BIG5.TXT
+ harbour/src/codepage/cp_utf8.c
+ added alternative UTF8 Harbour CP (UTF8ASC) as an example Harbour
user defined codapged using multibyte character encoding
* harbour/src/codepage/Makefile
+ harbour/src/codepage/cp_u16le.c
+ added Harbour codepage using UTF16 little endian encoding
* harbour/src/codepage/Makefile
+ harbour/src/codepage/big5.c
+ harbour/src/codepage/cp_big5.c
+ added BIG5 Harbour CP. It can be used with programs using Harbour
STR API with automatic translations.
; This CP needs really big translation tables. I added code which
makes some very simple compression which reduced raw size from
176100 bytes to 77354 but it's still large 77KB so maybe we should
think about moving this CP to other Harbour codpage library which
is not part of harbour shared library harbour*{.dll|.so|.dyn|...}
Alternatively I can try to reduce static size to about 30KB and
then build necessary tables dynamically at runtime when they are
used first time though in such case I will need additional 177KB
of dynamic memory instead of 77KB of static memory used by current
code.
2011-04-13 04:56 UTC+0200 Viktor Szakats (harbour.01 syenar.hu)
* src/rtl/fstemp.c
! hb_fsTempDir(): due to misplaced else branche it returned

View File

@@ -77,6 +77,26 @@ HB_EXTERN_BEGIN
typedef unsigned short HB_WCHAR;
#endif
/* forward declaration */
struct _HB_CODEPAGE;
#define HB_CODEPAGE_PTR struct _HB_CODEPAGE *
#define HB_CDPCHAR_GET( c, s, n, i, w ) (c)->wcharGet( c, s, n, i, w )
#define HB_CDPCHAR_PUT( c, s, n, i, w ) (c)->wcharPut( c, s, n, i, w )
#define HB_CDPCHAR_LEN( c, w ) (c)->wcharLen( c, w )
#define HB_CDP_GET_FUNC( func ) HB_BOOL func( HB_CODEPAGE_PTR cdp, const char * pSrc, HB_SIZE nLen, HB_SIZE * pnIndex, HB_WCHAR * wc )
typedef HB_CDP_GET_FUNC( ( * PHB_CDP_GET_FUNC ) );
#define HB_CDP_PUT_FUNC( func ) HB_BOOL func( HB_CODEPAGE_PTR cdp, char * pDst, HB_SIZE nLen, HB_SIZE * pnIndex, HB_WCHAR wc )
typedef HB_CDP_PUT_FUNC( ( * PHB_CDP_PUT_FUNC ) );
#define HB_CDP_LEN_FUNC( func ) int func( HB_CODEPAGE_PTR cdp, HB_WCHAR wc )
typedef HB_CDP_LEN_FUNC( ( * PHB_CDP_LEN_FUNC ) );
typedef struct _HB_UNITABLE
{
const char * uniID;
@@ -106,6 +126,10 @@ typedef struct _HB_CODEPAGE
const HB_UCHAR * sort;
const HB_UCHAR * acc;
int nACSort;
HB_BOOL fCustom;
PHB_CDP_GET_FUNC wcharGet;
PHB_CDP_PUT_FUNC wcharPut;
PHB_CDP_LEN_FUNC wcharLen;
int nMulti;
int nMultiUC;
PHB_MULTICHAR multi;
@@ -312,6 +336,15 @@ extern HB_UNITABLE hb_uniTbl_646YU;
extern HB_EXPORT PHB_CODEPAGE hb_vmCDP( void );
extern HB_EXPORT void hb_vmSetCDP( PHB_CODEPAGE pCDP );
/* character flags */
#define HB_CDP_DIGIT 0x01
#define HB_CDP_ALPHA 0x02
#define HB_CDP_LOWER 0x04
#define HB_CDP_UPPER 0x08
#define HB_CDP_MULTI1 0x10
#define HB_CDP_MULTI2 0x20
/* accented character sorting */
#define HB_CDP_ACSORT_NONE 0 /* no special sorting for accented
characters */
@@ -344,6 +377,7 @@ extern HB_EXPORT HB_BOOL hb_cdpRegisterNew( const char * id,
const char * pszLower,
unsigned int nACSort,
unsigned int nCaseSort );
extern HB_EXPORT void hb_cdpBuildTransTable( PHB_UNITABLE uniTable );
extern HB_EXPORT void hb_cdpReleaseAll( void );
extern HB_EXPORT const char * hb_cdpID( void );
extern HB_EXPORT PHB_CODEPAGE hb_cdpSelect( PHB_CODEPAGE cdp );

View File

@@ -61,6 +61,18 @@ HB_CALL_ON_STARTUP_BEGIN( HB_MACRONAME_JOIN( _hb_codepage_Init_, HB_CP_ID ) )
#endif
#if defined( HB_CP_RAW )
#if !defined( HB_CP_CUSTOM )
#if defined( HB_CP_GET_FUNC ) && \
defined( HB_CP_PUT_FUNC ) && \
defined( HB_CP_LEN_FUNC )
#define HB_CP_CUSTOM HB_TRUE
#else
#define HB_CP_CUSTOM HB_FALSE
#define HB_CP_GET_FUNC NULL
#define HB_CP_PUT_FUNC NULL
#define HB_CP_LEN_FUNC NULL
#endif
#endif
static HB_CODEPAGE s_codePage =
{
HB_MACRO2STRING( HB_CP_ID ),
@@ -72,12 +84,19 @@ HB_CALL_ON_STARTUP_BEGIN( HB_MACRONAME_JOIN( _hb_codepage_Init_, HB_CP_ID ) )
s_sort,
NULL,
HB_CDP_ACSORT_NONE,
HB_CP_CUSTOM,
HB_CP_GET_FUNC,
HB_CP_PUT_FUNC,
HB_CP_LEN_FUNC,
0,
0,
NULL,
NULL,
NULL,
};
#if defined( HB_CP_INIT )
HB_CP_INIT( &s_codePage );
#endif
hb_cdpRegisterRaw( &s_codePage );
#else
#ifndef HB_CP_CSSORT

View File

@@ -5,6 +5,9 @@
ROOT := ../../
C_SOURCES := \
cp_big5.c \
cp_utf8.c \
cp_u16le.c \
cpbg866.c \
cpbgiso.c \
cpbgmik.c \

4897
harbour/src/codepage/big5.c Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,188 @@
/*
* $Id$
*/
/*
* Harbour Project source code:
* example of Harbour codepage using BIG5 encoding
*
* Copyright 2011 Przemyslaw Czerpak <druzus / at / priv.onet.pl>
* www - http://www.harbour-project.org
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2, or (at your option)
* any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this software; see the file COPYING. If not, write to
* the Free Software Foundation, Inc., 59 Temple Place, Suite 330,
* Boston, MA 02111-1307 USA (or visit the web site http://www.gnu.org/).
*
* As a special exception, the Harbour Project gives permission for
* additional uses of the text contained in its release of Harbour.
*
* The exception is that, if you link the Harbour libraries with other
* files to produce an executable, this does not by itself cause the
* resulting executable to be covered by the GNU General Public License.
* Your use of that executable is in no way restricted on account of
* linking the Harbour library code into it.
*
* This exception does not however invalidate any other reasons why
* the executable file might be covered by the GNU General Public License.
*
* This exception applies only to the code released by the Harbour
* Project under the name Harbour. If you copy code from other
* Harbour Project or Free Software Foundation releases into a copy of
* Harbour, as the General Public License permits, the exception does
* not apply to the code that you add in this way. To avoid misleading
* anyone as to the status of such modified files, you must delete
* this exception notice from them.
*
* If you write modifications of your own for Harbour, it is your choice
* whether to permit this exception to apply to your modifications.
* If you do not wish that, delete this exception notice.
*
*/
#include "hbapi.h"
#include "hbapicdp.h"
#include "big5.c"
static HB_CDP_GET_FUNC( BIG5_get )
{
*wc = 0;
if( *pnIndex < nLen )
{
HB_UCHAR uc = pSrc[ ( * pnIndex )++ ];
if( uc >= ( HB_BIG5_FIRST >> 8 ) && uc <= ( HB_BIG5_LAST >> 8 ) &&
*pnIndex < nLen )
{
*wc = s_big5_to_ucs16( ( ( int ) uc << 8 ) | ( HB_UCHAR ) pSrc[ * pnIndex ] );
if( *wc )
{
( * pnIndex )++;
return HB_TRUE;
}
}
*wc = cdp->uniTable->uniCodes[ uc ];
if( *wc == 0 )
*wc = uc;
return HB_TRUE;
}
return HB_FALSE;
}
static HB_CDP_PUT_FUNC( BIG5_put )
{
if( * pnIndex < nLen )
{
HB_USHORT b5 = s_ucs16_to_big5( wc );
if( b5 )
{
if( * pnIndex + 1 < nLen )
{
HB_PUT_BE_UINT16( &pDst[ ( * pnIndex ) ], b5 );
* pnIndex += 2;
return HB_TRUE;
}
}
else
{
if( cdp->uniTable->uniTrans == NULL )
hb_cdpBuildTransTable( cdp->uniTable );
if( wc <= cdp->uniTable->wcMax &&
cdp->uniTable->uniTrans[ wc ] )
pDst[ ( * pnIndex )++ ] = cdp->uniTable->uniTrans[ wc ];
else
pDst[ ( * pnIndex )++ ] = wc >= 0x100 ? '?' : ( HB_UCHAR ) wc;
return HB_TRUE;
}
}
return HB_FALSE;
}
static HB_CDP_LEN_FUNC( BIG5_len )
{
HB_USHORT b5 = s_ucs16_to_big5( wc );
HB_SYMBOL_UNUSED( cdp );
return b5 ? 2 : 1;
}
static void hb_cp_init( PHB_CODEPAGE cdp )
{
HB_UCHAR * flags, * upper, * lower;
int i;
cdp->buffer = ( HB_UCHAR * ) hb_xgrab( 0x300 );
cdp->flags = flags = ( HB_UCHAR * ) cdp->buffer;
cdp->upper = upper = ( HB_UCHAR * ) cdp->buffer + 0x100;
cdp->lower = lower = ( HB_UCHAR * ) cdp->buffer + 0x200;
for( i = 0; i < 0x100; ++i )
{
flags[ i ] = 0;
if( HB_ISDIGIT( i ) )
flags[ i ] |= HB_CDP_DIGIT;
if( HB_ISALPHA( i ) )
flags[ i ] |= HB_CDP_ALPHA;
if( HB_ISUPPER( i ) )
flags[ i ] |= HB_CDP_UPPER;
if( HB_ISLOWER( i ) )
flags[ i ] |= HB_CDP_LOWER;
upper[ i ] = ( HB_UCHAR ) HB_TOUPPER( i );
lower[ i ] = ( HB_UCHAR ) HB_TOLOWER( i );
}
#if 0
for( i = 0; i < 0x10000; ++i )
{
HB_WCHAR wc = s_big5_to_ucs16( i );
if( wc )
{
if( i != s_ucs16_to_big5( wc ) )
{
printf( "irreversible translation: (BIG5)%04X -> U+%04X -> (BIG5)%04X\r\n",
i, wc, s_ucs16_to_big5( wc ) );
fflush(stdout);
}
}
}
#endif
}
#define HB_CP_RAW
#define HB_CP_ID BIG5
#define HB_CP_INFO "BIG5-5"
#define HB_CP_UNITB HB_UNITB_437
#define HB_CP_GET_FUNC BIG5_get
#define HB_CP_PUT_FUNC BIG5_put
#define HB_CP_LEN_FUNC BIG5_len
#define HB_CP_CMP_FUNC NULL
#define HB_CP_FLAG_FUNC NULL
#define HB_CP_UPPER_FUNC NULL
#define HB_CP_LOWER_FUNC NULL
#define s_flags NULL
#define s_upper NULL
#define s_lower NULL
#define s_sort NULL
#define HB_CP_INIT hb_cp_init
/* include CP registration code */
#include "hbcdpreg.h"

View File

@@ -0,0 +1,141 @@
/*
* $Id$
*/
/*
* Harbour Project source code:
* example of Harbour codepage using UTF-16 little endian encoding
*
* Copyright 2011 Przemyslaw Czerpak <druzus / at / priv.onet.pl>
* www - http://www.harbour-project.org
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2, or (at your option)
* any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this software; see the file COPYING. If not, write to
* the Free Software Foundation, Inc., 59 Temple Place, Suite 330,
* Boston, MA 02111-1307 USA (or visit the web site http://www.gnu.org/).
*
* As a special exception, the Harbour Project gives permission for
* additional uses of the text contained in its release of Harbour.
*
* The exception is that, if you link the Harbour libraries with other
* files to produce an executable, this does not by itself cause the
* resulting executable to be covered by the GNU General Public License.
* Your use of that executable is in no way restricted on account of
* linking the Harbour library code into it.
*
* This exception does not however invalidate any other reasons why
* the executable file might be covered by the GNU General Public License.
*
* This exception applies only to the code released by the Harbour
* Project under the name Harbour. If you copy code from other
* Harbour Project or Free Software Foundation releases into a copy of
* Harbour, as the General Public License permits, the exception does
* not apply to the code that you add in this way. To avoid misleading
* anyone as to the status of such modified files, you must delete
* this exception notice from them.
*
* If you write modifications of your own for Harbour, it is your choice
* whether to permit this exception to apply to your modifications.
* If you do not wish that, delete this exception notice.
*
*/
#include "hbapi.h"
#include "hbapicdp.h"
static HB_CDP_GET_FUNC( UTF16LE_get )
{
HB_SYMBOL_UNUSED( cdp );
if( * pnIndex + 1 < nLen )
{
* wc = HB_GET_LE_UINT16( &pSrc[ * pnIndex ] );
* pnIndex += 2;
return HB_TRUE;
}
else
*wc = 0;
return HB_FALSE;
}
static HB_CDP_PUT_FUNC( UTF16LE_put )
{
HB_SYMBOL_UNUSED( cdp );
if( * pnIndex + 1 < nLen )
{
HB_PUT_LE_UINT16( &pDst[ * pnIndex ], wc );
* pnIndex += 2;
return HB_TRUE;
}
return HB_FALSE;
}
static HB_CDP_LEN_FUNC( UTF16LE_len )
{
HB_SYMBOL_UNUSED( cdp );
HB_SYMBOL_UNUSED( wc );
return 2;
}
static void hb_cp_init( PHB_CODEPAGE cdp )
{
HB_UCHAR * flags, * upper, * lower;
int i;
cdp->buffer = ( HB_UCHAR * ) hb_xgrab( 0x300 );
cdp->flags = flags = ( HB_UCHAR * ) cdp->buffer;
cdp->upper = upper = ( HB_UCHAR * ) cdp->buffer + 0x100;
cdp->lower = lower = ( HB_UCHAR * ) cdp->buffer + 0x200;
for( i = 0; i < 0x100; ++i )
{
flags[ i ] = 0;
if( HB_ISDIGIT( i ) )
flags[ i ] |= HB_CDP_DIGIT;
if( HB_ISALPHA( i ) )
flags[ i ] |= HB_CDP_ALPHA;
if( HB_ISUPPER( i ) )
flags[ i ] |= HB_CDP_UPPER;
if( HB_ISLOWER( i ) )
flags[ i ] |= HB_CDP_LOWER;
upper[ i ] = ( HB_UCHAR ) HB_TOUPPER( i );
lower[ i ] = ( HB_UCHAR ) HB_TOLOWER( i );
}
}
#define HB_CP_RAW
#define HB_CP_ID UTF16LE
#define HB_CP_INFO "UTF-16 little endian"
#define HB_CP_UNITB HB_UNITB_437
#define HB_CP_GET_FUNC UTF16LE_get
#define HB_CP_PUT_FUNC UTF16LE_put
#define HB_CP_LEN_FUNC UTF16LE_len
#define HB_CP_CMP_FUNC NULL
#define HB_CP_FLAG_FUNC NULL
#define HB_CP_UPPER_FUNC NULL
#define HB_CP_LOWER_FUNC NULL
#define s_flags NULL
#define s_upper NULL
#define s_lower NULL
#define s_sort NULL
#define HB_CP_INIT hb_cp_init
/* include CP registration code */
#include "hbcdpreg.h"

View File

@@ -0,0 +1,149 @@
/*
* $Id$
*/
/*
* Harbour Project source code:
* example of Harbour codepage using UTF8 encoding
*
* Copyright 2011 Przemyslaw Czerpak <druzus / at / priv.onet.pl>
* www - http://www.harbour-project.org
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2, or (at your option)
* any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this software; see the file COPYING. If not, write to
* the Free Software Foundation, Inc., 59 Temple Place, Suite 330,
* Boston, MA 02111-1307 USA (or visit the web site http://www.gnu.org/).
*
* As a special exception, the Harbour Project gives permission for
* additional uses of the text contained in its release of Harbour.
*
* The exception is that, if you link the Harbour libraries with other
* files to produce an executable, this does not by itself cause the
* resulting executable to be covered by the GNU General Public License.
* Your use of that executable is in no way restricted on account of
* linking the Harbour library code into it.
*
* This exception does not however invalidate any other reasons why
* the executable file might be covered by the GNU General Public License.
*
* This exception applies only to the code released by the Harbour
* Project under the name Harbour. If you copy code from other
* Harbour Project or Free Software Foundation releases into a copy of
* Harbour, as the General Public License permits, the exception does
* not apply to the code that you add in this way. To avoid misleading
* anyone as to the status of such modified files, you must delete
* this exception notice from them.
*
* If you write modifications of your own for Harbour, it is your choice
* whether to permit this exception to apply to your modifications.
* If you do not wish that, delete this exception notice.
*
*/
#include "hbapi.h"
#include "hbapicdp.h"
static HB_CDP_GET_FUNC( UTF8_get )
{
HB_SIZE nIndex = *pnIndex;
int n = 0;
HB_SYMBOL_UNUSED( cdp );
*wc = 0;
while( nIndex < nLen )
{
if( hb_cdpUTF8ToU16NextChar( ( HB_UCHAR ) pSrc[ nIndex++ ], &n, wc ) )
{
if( n == 0 )
{
*pnIndex = nIndex;
return HB_TRUE;
}
}
}
return HB_FALSE;
}
static HB_CDP_PUT_FUNC( UTF8_put )
{
int i = hb_cdpUTF8CharSize( wc );
HB_SYMBOL_UNUSED( cdp );
if( *pnIndex + i <= nLen )
{
hb_cdpU16CharToUTF8( &pDst[ *pnIndex ], wc );
*pnIndex += i;
return HB_TRUE;
}
return HB_FALSE;
}
static HB_CDP_LEN_FUNC( UTF8_len )
{
HB_SYMBOL_UNUSED( cdp );
return hb_cdpUTF8CharSize( wc );
}
static void hb_cp_init( PHB_CODEPAGE cdp )
{
HB_UCHAR * flags, * upper, * lower;
int i;
cdp->buffer = ( HB_UCHAR * ) hb_xgrab( 0x300 );
cdp->flags = flags = ( HB_UCHAR * ) cdp->buffer;
cdp->upper = upper = ( HB_UCHAR * ) cdp->buffer + 0x100;
cdp->lower = lower = ( HB_UCHAR * ) cdp->buffer + 0x200;
for( i = 0; i < 0x100; ++i )
{
flags[ i ] = 0;
if( HB_ISDIGIT( i ) )
flags[ i ] |= HB_CDP_DIGIT;
if( HB_ISALPHA( i ) )
flags[ i ] |= HB_CDP_ALPHA;
if( HB_ISUPPER( i ) )
flags[ i ] |= HB_CDP_UPPER;
if( HB_ISLOWER( i ) )
flags[ i ] |= HB_CDP_LOWER;
upper[ i ] = ( HB_UCHAR ) HB_TOUPPER( i );
lower[ i ] = ( HB_UCHAR ) HB_TOLOWER( i );
}
}
#define HB_CP_RAW
#define HB_CP_ID UTF8ASC
#define HB_CP_INFO "UTF-8 ASCII letters"
#define HB_CP_UNITB HB_UNITB_437
#define HB_CP_GET_FUNC UTF8_get
#define HB_CP_PUT_FUNC UTF8_put
#define HB_CP_LEN_FUNC UTF8_len
#define HB_CP_CMP_FUNC NULL
#define HB_CP_FLAG_FUNC NULL
#define HB_CP_UPPER_FUNC NULL
#define HB_CP_LOWER_FUNC NULL
#define s_flags NULL
#define s_upper NULL
#define s_lower NULL
#define s_sort NULL
#define HB_CP_INIT hb_cp_init
/* include CP registration code */
#include "hbcdpreg.h"

View File

@@ -72,13 +72,14 @@ HB_FUNC( SX_SLIMFAST )
{
if( c == cQuote )
cQuote = 0;
else if( c == '"' || c == '\'' )
cQuote = c;
else if( !cQuote )
{
if( c == ' ' && nDst && szDst[nDst - 1] == ' ' )
if( c == '"' || c == '\'' )
cQuote = c;
else if( c == ' ' && nDst && szDst[nDst - 1] == ' ' )
continue;
c = ( char ) hb_charUpper( ( HB_UCHAR ) c );
else
c = ( char ) hb_charUpper( ( HB_UCHAR ) c );
}
szDst[nDst++] = c;
}

File diff suppressed because it is too large Load Diff

View File

@@ -159,7 +159,9 @@ HB_FUNC( HB_TRANSLATE )
PHB_CODEPAGE cdpIn = szIdIn ? hb_cdpFindExt( szIdIn ) : hb_vmCDP();
PHB_CODEPAGE cdpOut = szIdOut ? hb_cdpFindExt( szIdOut ) : hb_vmCDP();
if( cdpIn && cdpOut && cdpIn->uniTable != cdpOut->uniTable )
if( cdpIn && cdpOut && cdpIn != cdpOut &&
( cdpIn->uniTable != cdpOut->uniTable ||
cdpIn->fCustom || cdpOut->fCustom ) )
{
char * szResult = hb_cdpnDup( hb_parc( 1 ), &nLen, cdpIn, cdpOut );
hb_retclen_buffer( szResult, nLen );

View File

@@ -131,8 +131,8 @@ void hb_idleState( void )
pIdleData->fCollectGarbage = HB_TRUE;
}
}
pIdleData->fIamIdle = HB_FALSE;
}
pIdleData->fIamIdle = HB_FALSE;
}
}

265
harbour/tests/big5_gen.prg Normal file
View File

@@ -0,0 +1,265 @@
/*
* $Id$
*/
/*
* Harbour Project source code:
* code to generate C source with conversion tables between BIG5 and UCS16
* using data defined by Unicode, Inc. in BIG5.TXT
*
* Copyright 2011 Przemyslaw Czerpak <druzus / at / priv.onet.pl>
* www - http://harbour-project.org
*
*/
proc main()
local cLine, aVal, aVal2, aValU, aValU2, hVal, aInd, ;
n, nn, nBG5, nU16, nMin, nMax, nUMin, nUMax, cResult, nBit
aVal := afill( array( 0x10000 ), 0 )
aValU := afill( array( 0x10000 ), 0 )
nMin := nUMin := 0xFFFF
nMax := nUMax := 0x0000
for each cLine in hb_aTokens( hb_memoRead( "BIG5.TXT" ), hb_eol() )
if cLine = "0x" .and. ( n := hb_at( "0x", cLine, 7 ) ) != 0
nBG5 := hb_hexToNum( substr( cLine, 3, 4 ) )
nU16 := hb_hexToNum( substr( cLine, n + 2, 4 ) )
if nBG5 == 0 .or. nU16 == 0
? "unrecognized line:", cLine:__enumIndex()
return
elseif nBG5 > 0xFFFF .or. nU16 > 0xFFFF
? "wrong character range at line:", cLine:__enumIndex()
return
endif
aVal[ nBG5 ] := nU16
if nBG5 > nMax
nMax := nBG5
endif
if nBG5 < nMin
nMin := nBG5
endif
if nU16 > nUMax
nUMax := nU16
endif
if nU16 < nUMin
nUMin := nU16
endif
endif
next
for n := 1 to len( aVal )
if aVal[ n ] != 0
aValU[ aVal[ n ] ] := n
endif
next
#if 0
cResult += "static const HB_USHORT s_big5uni[ " + hb_ntos( nMax - nMin + 1 ) + " ] =" + hb_eol()
cResult += "{" + hb_eol()
l := 0
for n := nMin to nMax
if ++l > 8
l := 1
cResult += ","
cResult += hb_eol()
cResult += " "
elseif n == nMin
cResult += " "
else
cResult += ", "
endif
cResult += "0x"
cResult += hb_numToHex( aVal[ n ], 4 )
next
cResult += hb_eol()
cResult += "};" + hb_eol()
#endif
? "BIG5->UCS16 tables."
n := min_size( aVal, nMin, nMax, @nBit )
? "raw size:", hb_ntos( ( nMax - nMin + 1 ) * 2 )
? "minimal size:", hb_ntos( n ), ;
"for", hb_ntos( hb_bitshift( 1, nBit ) ), "byte blocks"
calc_size( aVal, nMin, nMax, nBit, @hVal, @aInd, @nn )
aVal2 := hash_to_array( hVal )
cResult := ;
"/*" + hb_eol() + ;
" * $Id$" + hb_eol() + ;
" */" + hb_eol() + ;
hb_eol() + ;
"/*" + hb_eol() + ;
" * Harbour Project source code:" + hb_eol() + ;
" * BIG5 <-> UCS16 conversion tables" + hb_eol() + ;
" * code generated automatically by tests/big5_gen.prg" + hb_eol() + ;
" *" + hb_eol() + ;
" * Copyright 2011 Przemyslaw Czerpak <druzus / at / priv.onet.pl>" + hb_eol() + ;
" * www - http://harbour-project.org" + hb_eol() + ;
" *" + hb_eol() + ;
" */" + hb_eol() + ;
hb_eol()
cResult += '#include "hbapi.h"' + hb_eol()
cResult += hb_eol()
cResult += "#define HB_BIG5_FIRST 0x" + hb_numToHex( nMin, 4 ) + hb_eol()
cResult += "#define HB_BIG5_LAST 0x" + hb_numToHex( nMax, 4 ) + hb_eol()
cResult += "#define HB_BIG5_BITS " + hb_ntos( nBit ) + hb_eol()
cResult += hb_eol()
cResult += array_to_code( aInd, "s_big5index", nn )
cResult += hb_eol()
cResult += array_to_code( aVal2, "s_big5_ucs16", 2 )
cResult += hb_eol()
cResult += index_func( "s_big5_to_ucs16", "s_big5index", "s_big5_ucs16", ;
"HB_BIG5_FIRST", "HB_BIG5_LAST", "HB_BIG5_BITS" )
check_conv( aVal, aInd, aVal2, nMin, nMax, nBit )
?
? "UCS16->BIG5 tables."
n := min_size( aValU, nUMin, nUMax, @nBit )
? "raw size:", hb_ntos( ( nUMax - nUMin + 1 ) * 2 )
? "minimal size:", hb_ntos( n ), ;
"for", hb_ntos( hb_bitshift( 1, nBit ) ), "byte blocks"
calc_size( aValU, nUMin, nUMax, nBit, @hVal, @aInd, @nn )
aValU2 := hash_to_array( hVal )
cResult += hb_eol()
cResult += "#define HB_U16_FIRST 0x" + hb_numToHex( nUMin, 4 ) + hb_eol()
cResult += "#define HB_U16_LAST 0x" + hb_numToHex( nUMax, 4 ) + hb_eol()
cResult += "#define HB_U16_BITS " + hb_ntos( nBit ) + hb_eol()
cResult += hb_eol()
cResult += array_to_code( aInd, "s_ucs16index", nn )
cResult += hb_eol()
cResult += array_to_code( aValU2, "s_ucs16_big5", 2 )
cResult += hb_eol()
cResult += index_func( "s_ucs16_to_big5", "s_ucs16index", "s_ucs16_big5", ;
"HB_U16_FIRST", "HB_U16_LAST", "HB_U16_BITS" )
check_conv( aValU, aInd, aValU2, nUMin, nUMax, nBit )
hb_memowrit( "big5.c", cResult )
return
static function array_to_code( aVal, cName, nn )
local cResult, l, n
cResult := "static const " + ;
iif( nn == 1, "HB_BYTE", "HB_USHORT" ) + " " + ;
cName + "[ " + hb_ntos( len( aVal ) ) + " ] =" + hb_eol()
cResult += "{" + hb_eol()
l := 0
for n := 1 to len( aVal )
if ++l > iif( nn == 1, 12, 8 )
l := 1
cResult += ","
cResult += hb_eol()
cResult += " "
elseif n == 1
cResult += " "
else
cResult += ", "
endif
cResult += "0x"
cResult += hb_numToHex( aVal[ n ], nn * 2 )
next
cResult += hb_eol()
cResult += "};" + hb_eol()
return cResult;
static function hash_to_array( hVal )
local aVal := {}, cLine, n
for each cLine in hVal
for n := 1 to len( cLine ) step( 2 )
aadd( aVal, bin2w( substr( cLine, n, 2 ) ) )
next
next
return aVal
function min_size( aVal, nMin, nMax, nBit )
local n, nS, nSize
nSize := 0xFFFFFF
for n := 1 to 16
nS := calc_size( aVal, nMin, nMax, n )
// ? n, nS
if nS < nSize
nSize := nS
nBit := n
endif
next
return nSize
function calc_size( aVal, nMin, nMax, nBit, hVal, aInd, nn )
local nLine, n, cLine, c
nLine := int( 2 ^ nBit )
cLine := ""
hVal := {=>}
aInd := {}
hb_hKeepOrder( hVal, .t. )
for n := nMin to nMax
cLine += i2bin( aVal[n] )
if len( cLine ) == nLine * 2
hVal[ cLine ] := cLine
aadd( aInd, hb_hpos( hVal, cLine ) - 1 )
cLine := ""
endif
next
if ! cLine == ""
for each c in hVal
if c = cLine
cLine := c
exit
endif
next
hVal[ cLine ] := cLine
aadd( aInd, hb_hpos( hVal, cLine ) - 1 )
endif
nn := iif( len( aInd ) > 256, 2, 1 )
n := len( aInd ) * nn
for each c in hVal
n += len( c )
next
return n
static function index_func( cName, cNameInd, cNameConv, cMin, cMax, cBit )
local cResult
cResult := "static HB_USHORT " + cName + "( int n )" + hb_eol() + ;
"{" + hb_eol() + ;
" n -= " + cMin + ";" + hb_eol() + ;
" if( n >= 0 && n <= ( " + cMax + " - " + cMin + ") )" + hb_eol() + ;
" {" + hb_eol() + ;
" return " + cNameConv + "[ ( " + cNameInd + ;
"[ n >> " + cBit + " ] << " + cBit + " ) +" + hb_eol() + ;
space( len( cNameInd ) + 16 ) + ;
"( n & ( ( 1 << " + cBit + " ) - 1 ) ) ];" + hb_eol() + ;
" }" + hb_eol() + ;
" return 0;" + hb_eol() + ;
"}" + hb_eol()
return cResult
static function conv_get( n, aInd, aVal2, nMin, nMax, nBit )
local nDiv
if n >= nMin .and. n <= nMax
nDiv := 2 ^ nBit
n -= nMin
return aVal2[ aInd[ n / nDiv + 1 ] * nDiv + n % nDiv + 1 ]
endif
return 0
static function check_conv( aVal, aInd, aVal2, nMin, nMax, nBit )
local n, nVal
for n := 1 to len( aVal )
nVal := conv_get( n, aInd, aVal2, nMin, nMax, nBit )
if aVal[ n ] != nVal
? "Wrong decoding:", n, aVal[ n ], nVal, len( aVal ), nMax, hb_eol()
break
endif
next
return nil