* src/rtl/Makefile
* src/rtl/cdpapi.c
+ src/rtl/cdpapihb.c
* include/hbapicdp.h
+ Moved Harbour level functions to a separate file.
I had to rename and publish three static functions.
Eventually the low level part should go to hbcommon lib.
; TODO: HB_CDPLIST() is still to be moved, I couldn't find out elegant
low-level API to solve it.
2165 lines
58 KiB
C
2165 lines
58 KiB
C
/*
|
|
* $Id$
|
|
*/
|
|
|
|
/*
|
|
* Harbour Project source code:
|
|
* The CodePages API
|
|
*
|
|
* Copyright 2002 Alexander S.Kresin <alex@belacy.belgorod.su>
|
|
* Copyright 2009 Przemyslaw Czerpak <druzus / at / priv.onet.pl>
|
|
* www - http://www.harbour-project.org
|
|
*
|
|
* This program is free software; you can redistribute it and/or modify
|
|
* it under the terms of the GNU General Public License as published by
|
|
* the Free Software Foundation; either version 2, or (at your option)
|
|
* any later version.
|
|
*
|
|
* This program is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
* GNU General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public License
|
|
* along with this software; see the file COPYING. If not, write to
|
|
* the Free Software Foundation, Inc., 59 Temple Place, Suite 330,
|
|
* Boston, MA 02111-1307 USA (or visit the web site http://www.gnu.org/).
|
|
*
|
|
* As a special exception, the Harbour Project gives permission for
|
|
* additional uses of the text contained in its release of Harbour.
|
|
*
|
|
* The exception is that, if you link the Harbour libraries with other
|
|
* files to produce an executable, this does not by itself cause the
|
|
* resulting executable to be covered by the GNU General Public License.
|
|
* Your use of that executable is in no way restricted on account of
|
|
* linking the Harbour library code into it.
|
|
*
|
|
* This exception does not however invalidate any other reasons why
|
|
* the executable file might be covered by the GNU General Public License.
|
|
*
|
|
* This exception applies only to the code released by the Harbour
|
|
* Project under the name Harbour. If you copy code from other
|
|
* Harbour Project or Free Software Foundation releases into a copy of
|
|
* Harbour, as the General Public License permits, the exception does
|
|
* not apply to the code that you add in this way. To avoid misleading
|
|
* anyone as to the status of such modified files, you must delete
|
|
* this exception notice from them.
|
|
*
|
|
* If you write modifications of your own for Harbour, it is your choice
|
|
* whether to permit this exception to apply to your modifications.
|
|
* If you do not wish that, delete this exception notice.
|
|
*
|
|
*/
|
|
|
|
#include "hbapi.h"
|
|
#include "hbvm.h"
|
|
#include "hbapierr.h"
|
|
#include "hbapicdp.h"
|
|
#include "hbthread.h"
|
|
|
|
|
|
/* character flags */
|
|
#define HB_CDP_DIGIT 0x01
|
|
#define HB_CDP_ALPHA 0x02
|
|
#define HB_CDP_LOWER 0x04
|
|
#define HB_CDP_UPPER 0x08
|
|
#define HB_CDP_MULTI1 0x10
|
|
#define HB_CDP_MULTI2 0x20
|
|
|
|
|
|
/* MT macros */
|
|
#define HB_CDP_LOCK hb_threadEnterCriticalSection( &s_cdpMtx );
|
|
#define HB_CDP_UNLOCK hb_threadLeaveCriticalSection( &s_cdpMtx );
|
|
static HB_CRITICAL_NEW( s_cdpMtx );
|
|
|
|
|
|
#define NUMBER_OF_CHARS 256
|
|
|
|
static const HB_WCHAR s_uniCodes[ NUMBER_OF_CHARS ] =
|
|
{
|
|
0x0020, 0x263A, 0x263B, 0x2665, 0x2666, 0x2663, 0x2660, 0x2022,
|
|
0x25D8, 0x25CB, 0x25D9, 0x2642, 0x2640, 0x266A, 0x266B, 0x263C,
|
|
0x25BA, 0x25C4, 0x2195, 0x203C, 0x00B6, 0x00A7, 0x25AC, 0x21A8,
|
|
0x2191, 0x2193, 0x2192, 0x2190, 0x2319, 0x2194, 0x25B2, 0x25BC,
|
|
0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0027,
|
|
0x0028, 0x0029, 0x002A, 0x002B, 0x002C, 0x002D, 0x002E, 0x002F,
|
|
0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037,
|
|
0x0038, 0x0039, 0x003A, 0x003B, 0x003C, 0x003D, 0x003E, 0x003F,
|
|
0x0040, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047,
|
|
0x0048, 0x0049, 0x004A, 0x004B, 0x004C, 0x004D, 0x004E, 0x004F,
|
|
0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057,
|
|
0x0058, 0x0059, 0x005A, 0x005B, 0x005C, 0x005D, 0x005E, 0x005F,
|
|
0x0060, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067,
|
|
0x0068, 0x0069, 0x006A, 0x006B, 0x006C, 0x006D, 0x006E, 0x006F,
|
|
0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077,
|
|
0x0078, 0x0079, 0x007A, 0x007B, 0x007C, 0x007D, 0x007E, 0x007F,
|
|
0x00C7, 0x00FC, 0x00E9, 0x00E2, 0x00E4, 0x00E0, 0x00E5, 0x00E7,
|
|
0x00EA, 0x00EB, 0x00E8, 0x00EF, 0x00EE, 0x00EC, 0x00C4, 0x00C5,
|
|
0x00C9, 0x00E6, 0x00C6, 0x00F4, 0x00F6, 0x00F2, 0x00FB, 0x00F9,
|
|
0x00FF, 0x00D6, 0x00DC, 0x00A2, 0x00A3, 0x00A5, 0x20A7, 0x0192,
|
|
0x00E1, 0x00ED, 0x00F3, 0x00FA, 0x00F1, 0x00D1, 0x00AA, 0x00BA,
|
|
0x00BF, 0x2310, 0x00AC, 0x00BD, 0x00BC, 0x00A1, 0x00AB, 0x00BB,
|
|
0x2591, 0x2592, 0x2593, 0x2502, 0x2524, 0x2561, 0x2562, 0x2556,
|
|
0x2555, 0x2563, 0x2551, 0x2557, 0x255D, 0x255C, 0x255B, 0x2510,
|
|
0x2514, 0x2534, 0x252C, 0x251C, 0x2500, 0x253C, 0x255E, 0x255F,
|
|
0x255A, 0x2554, 0x2569, 0x2566, 0x2560, 0x2550, 0x256C, 0x2567,
|
|
0x2568, 0x2564, 0x2565, 0x2559, 0x2558, 0x2552, 0x2553, 0x256B,
|
|
0x256A, 0x2518, 0x250C, 0x2588, 0x2584, 0x258C, 0x2590, 0x2580,
|
|
0x03B1, 0x00DF, 0x0393, 0x03C0, 0x03A3, 0x03C3, 0x00B5, 0x03C4,
|
|
0x03A6, 0x0398, 0x03A9, 0x03B4, 0x221E, 0x03C6, 0x03B5, 0x2229,
|
|
0x2261, 0x00B1, 0x2265, 0x2264, 0x2320, 0x2321, 0x00F7, 0x2248,
|
|
0x00B0, 0x2219, 0x00B7, 0x221A, 0x207F, 0x00B2, 0x25A0, 0x00A0
|
|
};
|
|
|
|
HB_UNITABLE hb_uniTbl_437 = { HB_CPID_437, s_uniCodes, NULL, 0 };
|
|
|
|
static HB_CODEPAGE s_en_codepage =
|
|
{ "EN", "English CP-437", HB_UNITB_437, NULL, NULL, NULL, NULL, NULL, 0, 0, 0, NULL, NULL, NULL };
|
|
|
|
HB_UNITABLE hb_uniTbl_UTF8 = { HB_CPID_437, s_uniCodes, NULL, 0 };
|
|
|
|
/* pseudo codepage for translations only */
|
|
static HB_CODEPAGE s_utf8_codepage =
|
|
{ "UTF8", "UTF-8", &hb_uniTbl_UTF8, NULL, NULL, NULL, NULL, NULL, 0, 0, 0, NULL, NULL, NULL };
|
|
|
|
HB_CODEPAGE_ANNOUNCE( EN )
|
|
|
|
static PHB_CODEPAGE s_cdpList = NULL;
|
|
|
|
|
|
/*
|
|
* basic CP functions
|
|
*/
|
|
HB_BOOL hb_cdpIsDigit( PHB_CODEPAGE cdp, int iChar )
|
|
{
|
|
if( cdp )
|
|
return ( cdp->flags[ iChar & 0x0ff ] & HB_CDP_DIGIT ) != 0;
|
|
else
|
|
return HB_ISDIGIT( iChar );
|
|
}
|
|
|
|
HB_BOOL hb_cdpIsAlpha( PHB_CODEPAGE cdp, int iChar )
|
|
{
|
|
if( cdp )
|
|
return ( cdp->flags[ iChar & 0x0ff ] & HB_CDP_ALPHA ) != 0;
|
|
else
|
|
return HB_ISALPHA( iChar );
|
|
}
|
|
|
|
HB_BOOL hb_cdpIsLower( PHB_CODEPAGE cdp, int iChar )
|
|
{
|
|
if( cdp )
|
|
return ( cdp->flags[ iChar & 0x0ff ] & HB_CDP_LOWER ) != 0;
|
|
else
|
|
return HB_ISLOWER( iChar );
|
|
}
|
|
|
|
HB_BOOL hb_cdpIsUpper( PHB_CODEPAGE cdp, int iChar )
|
|
{
|
|
if( cdp )
|
|
return ( cdp->flags[ iChar & 0x0ff ] & HB_CDP_UPPER ) != 0;
|
|
else
|
|
return HB_ISUPPER( iChar );
|
|
}
|
|
|
|
HB_BOOL hb_charIsDigit( int iChar )
|
|
{
|
|
PHB_CODEPAGE cdp = hb_vmCDP();
|
|
|
|
if( cdp )
|
|
return ( cdp->flags[ iChar & 0x0ff ] & HB_CDP_DIGIT ) != 0;
|
|
else
|
|
return HB_ISDIGIT( iChar );
|
|
}
|
|
|
|
HB_BOOL hb_charIsAlpha( int iChar )
|
|
{
|
|
PHB_CODEPAGE cdp = hb_vmCDP();
|
|
|
|
if( cdp )
|
|
return ( cdp->flags[ iChar & 0x0ff ] & HB_CDP_ALPHA ) != 0;
|
|
else
|
|
return HB_ISALPHA( iChar );
|
|
}
|
|
|
|
HB_BOOL hb_charIsLower( int iChar )
|
|
{
|
|
PHB_CODEPAGE cdp = hb_vmCDP();
|
|
|
|
if( cdp )
|
|
return ( cdp->flags[ iChar & 0x0ff ] & HB_CDP_LOWER ) != 0;
|
|
else
|
|
return HB_ISLOWER( iChar );
|
|
}
|
|
|
|
HB_BOOL hb_charIsUpper( int iChar )
|
|
{
|
|
PHB_CODEPAGE cdp = hb_vmCDP();
|
|
|
|
if( cdp )
|
|
return ( cdp->flags[ iChar & 0x0ff ] & HB_CDP_UPPER ) != 0;
|
|
else
|
|
return HB_ISUPPER( iChar );
|
|
}
|
|
|
|
int hb_charLower( int iChar )
|
|
{
|
|
PHB_CODEPAGE cdp = hb_vmCDP();
|
|
|
|
if( cdp )
|
|
return cdp->lower[ iChar & 0x0ff ];
|
|
else
|
|
return HB_TOLOWER( iChar );
|
|
}
|
|
|
|
int hb_charUpper( int iChar )
|
|
{
|
|
PHB_CODEPAGE cdp = hb_vmCDP();
|
|
|
|
if( cdp )
|
|
return cdp->upper[ iChar & 0x0ff ];
|
|
else
|
|
return HB_TOUPPER( iChar );
|
|
}
|
|
|
|
char * hb_strLower( char * szText, HB_SIZE ulLen )
|
|
{
|
|
HB_TRACE(HB_TR_DEBUG, ("hb_strLower(%s, %lu)", szText, ulLen));
|
|
|
|
{
|
|
PHB_CODEPAGE cdp = hb_vmCDP();
|
|
HB_SIZE u;
|
|
|
|
if( cdp )
|
|
for( u = 0; u < ulLen; u++ )
|
|
szText[ u ] = ( char ) cdp->lower[ ( unsigned char ) szText[ u ] ];
|
|
else
|
|
for( u = 0; u < ulLen; u++ )
|
|
szText[ u ] = HB_TOLOWER( szText[ u ] );
|
|
}
|
|
|
|
return szText;
|
|
}
|
|
|
|
char * hb_strUpper( char * szText, HB_SIZE ulLen )
|
|
{
|
|
HB_TRACE(HB_TR_DEBUG, ("hb_strUpper(%s, %lu)", szText, ulLen));
|
|
|
|
{
|
|
PHB_CODEPAGE cdp = hb_vmCDP();
|
|
HB_SIZE u;
|
|
|
|
if( cdp )
|
|
for( u = 0; u < ulLen; u++ )
|
|
szText[ u ] = ( char ) cdp->upper[ ( unsigned char ) szText[ u ] ];
|
|
else
|
|
for( u = 0; u < ulLen; u++ )
|
|
szText[ u ] = HB_TOUPPER( szText[ u ] );
|
|
}
|
|
|
|
return szText;
|
|
}
|
|
|
|
/*
|
|
* comparison
|
|
*/
|
|
int hb_cdpchrcmp( char cFirst, char cSecond, PHB_CODEPAGE cdp )
|
|
{
|
|
if( cFirst == cSecond )
|
|
return 0;
|
|
|
|
if( cdp->sort )
|
|
{
|
|
int n1 = cdp->sort[ ( unsigned char ) cFirst ],
|
|
n2 = cdp->sort[ ( unsigned char ) cSecond ];
|
|
|
|
if( !cdp->nMulti || ( n1 != 0 && n2 != 0 ) )
|
|
{
|
|
if( n1 == n2 )
|
|
{
|
|
if( cdp->acc )
|
|
{
|
|
n1 = cdp->acc[ ( unsigned char ) cFirst ];
|
|
n2 = cdp->acc[ ( unsigned char ) cSecond ];
|
|
}
|
|
else
|
|
return 0;
|
|
}
|
|
return ( n1 < n2 ) ? -1 : 1;
|
|
}
|
|
}
|
|
|
|
return ( ( unsigned char ) cFirst < ( unsigned char ) cSecond ) ? -1 : 1;
|
|
}
|
|
|
|
static int hb_cdpMultiWeight( PHB_CODEPAGE cdp, const char * szChar )
|
|
{
|
|
PHB_MULTICHAR pmulti = cdp->multi;
|
|
int i;
|
|
|
|
for( i = cdp->nMulti; i; --i, ++pmulti )
|
|
{
|
|
if( ( szChar[ 0 ] == pmulti->cFirst[ 0 ] ||
|
|
szChar[ 0 ] == pmulti->cFirst[ 1 ] ) &&
|
|
( szChar[ 1 ] == pmulti->cLast[ 0 ] ||
|
|
szChar[ 1 ] == pmulti->cLast[ 1 ] ) )
|
|
{
|
|
return ( szChar[ 0 ] == pmulti->cFirst[ 0 ] ) ?
|
|
pmulti->sortUp : pmulti->sortLo;
|
|
}
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
int hb_cdpcmp( const char * szFirst, HB_SIZE ulLenFirst,
|
|
const char * szSecond, HB_SIZE ulLenSecond,
|
|
PHB_CODEPAGE cdp, HB_BOOL fExact )
|
|
{
|
|
int iRet = 0, iAcc = 0, n, n1, n2;
|
|
HB_SIZE ul, ulLen;
|
|
|
|
ulLen = ulLenFirst < ulLenSecond ? ulLenFirst : ulLenSecond;
|
|
if( cdp->sort == NULL )
|
|
{
|
|
iRet = memcmp( szFirst, szSecond, ulLen );
|
|
}
|
|
else if( cdp->nMulti )
|
|
{
|
|
for( ul = 0; ul < ulLen; ++szFirst, ++szSecond, ++ul )
|
|
{
|
|
unsigned char u1 = ( unsigned char ) * szFirst;
|
|
unsigned char u2 = ( unsigned char ) * szSecond;
|
|
|
|
n1 = cdp->sort[ u1 ];
|
|
if( ( cdp->flags[ u1 ] & HB_CDP_MULTI1 ) != 0 &&
|
|
( ul < ulLenFirst - 1 ) &&
|
|
( cdp->flags[ ( unsigned char ) szFirst[ 1 ] ] & HB_CDP_MULTI2 ) != 0 )
|
|
{
|
|
n = hb_cdpMultiWeight( cdp, szFirst );
|
|
if( n != 0 )
|
|
{
|
|
n1 = n;
|
|
++szFirst;
|
|
if( --ulLenFirst < ulLen )
|
|
ulLen = ulLenFirst;
|
|
}
|
|
}
|
|
n2 = cdp->sort[ u2 ];
|
|
if( ( cdp->flags[ u2 ] & HB_CDP_MULTI1 ) != 0 &&
|
|
( ul < ulLenSecond - 1 ) &&
|
|
( cdp->flags[ ( unsigned char ) szSecond[ 1 ] ] & HB_CDP_MULTI2 ) != 0 )
|
|
{
|
|
n = hb_cdpMultiWeight( cdp, szSecond );
|
|
if( n != 0 )
|
|
{
|
|
n2 = n;
|
|
++szSecond;
|
|
if( --ulLenSecond < ulLen )
|
|
ulLen = ulLenSecond;
|
|
}
|
|
}
|
|
if( n1 != n2 )
|
|
{
|
|
if( n1 == 0 || n2 == 0 )
|
|
/* One of characters doesn't belong to the national characters */
|
|
iRet = ( u1 < u2 ) ? -1 : 1;
|
|
else
|
|
iRet = ( n1 < n2 ) ? -1 : 1;
|
|
break;
|
|
}
|
|
else if( u1 != u2 )
|
|
{
|
|
if( n1 == 0 )
|
|
{
|
|
iRet = ( u1 < u2 ) ? -1 : 1;
|
|
break;
|
|
}
|
|
if( iAcc == 0 && ( fExact || ( ulLenFirst == ulLenSecond && cdp->acc ) ) )
|
|
{
|
|
if( cdp->acc )
|
|
iAcc = ( cdp->acc[ u1 ] < cdp->acc[ u2 ] ) ? -1 : 1;
|
|
else
|
|
iAcc = ( u1 < u2 ) ? -1 : 1;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
else
|
|
{
|
|
for( ul = 0; ul < ulLen; ++szFirst, ++szSecond, ++ul )
|
|
{
|
|
if( *szFirst != *szSecond )
|
|
{
|
|
n1 = ( unsigned char ) cdp->sort[ ( unsigned char ) * szFirst ];
|
|
n2 = ( unsigned char ) cdp->sort[ ( unsigned char ) * szSecond ];
|
|
if( n1 != n2 )
|
|
{
|
|
iRet = ( n1 < n2 ) ? -1 : 1;
|
|
break;
|
|
}
|
|
if( iAcc == 0 && ( fExact || ( ulLenFirst == ulLenSecond && cdp->acc ) ) )
|
|
{
|
|
if( cdp->acc )
|
|
iAcc = ( cdp->acc[ ( unsigned char ) * szFirst ] <
|
|
cdp->acc[ ( unsigned char ) * szSecond ] ) ? -1 : 1;
|
|
else
|
|
iAcc = ( ( unsigned char ) * szFirst < ( unsigned char ) * szSecond ) ? -1 : 1;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
if( !iRet )
|
|
{
|
|
if( iAcc )
|
|
iRet = iAcc;
|
|
else if( ulLenSecond > ulLenFirst )
|
|
iRet = -1;
|
|
else if( fExact && ulLenSecond < ulLenFirst )
|
|
iRet = 1;
|
|
}
|
|
|
|
return iRet;
|
|
}
|
|
|
|
static int hb_cdpMultiWeightI( PHB_CODEPAGE cdp, const char * szChar )
|
|
{
|
|
PHB_MULTICHAR pmulti = cdp->multi;
|
|
int i;
|
|
|
|
for( i = cdp->nMulti; i; --i, ++pmulti )
|
|
{
|
|
if( ( szChar[ 0 ] == pmulti->cFirst[ 0 ] ||
|
|
szChar[ 0 ] == pmulti->cFirst[ 1 ] ) &&
|
|
( szChar[ 1 ] == pmulti->cLast[ 0 ] ||
|
|
szChar[ 1 ] == pmulti->cLast[ 1 ] ) )
|
|
{
|
|
return pmulti->sortUp;
|
|
}
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
int hb_cdpicmp( const char * szFirst, HB_SIZE ulLenFirst,
|
|
const char * szSecond, HB_SIZE ulLenSecond,
|
|
PHB_CODEPAGE cdp, HB_BOOL fExact )
|
|
{
|
|
int iRet = 0, iAcc = 0, n, n1, n2, u1, u2;
|
|
HB_SIZE ul, ulLen;
|
|
|
|
ulLen = ulLenFirst < ulLenSecond ? ulLenFirst : ulLenSecond;
|
|
if( cdp->sort == NULL )
|
|
{
|
|
for( ul = 0; ul < ulLen; ++szFirst, ++szSecond, ++ul )
|
|
{
|
|
u1 = cdp->upper[ ( unsigned char ) * szFirst ];
|
|
u2 = cdp->upper[ ( unsigned char ) * szSecond ];
|
|
if( u1 != u2 )
|
|
{
|
|
iRet = ( u1 < u2 ) ? -1 : 1;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
else if( cdp->nMulti )
|
|
{
|
|
for( ul = 0; ul < ulLen; ++szFirst, ++szSecond, ++ul )
|
|
{
|
|
u1 = cdp->upper[ ( unsigned char ) * szFirst ];
|
|
u2 = cdp->upper[ ( unsigned char ) * szSecond ];
|
|
|
|
if( ( cdp->flags[ u1 ] & HB_CDP_MULTI1 ) != 0 &&
|
|
( ul < ulLenFirst - 1 ) &&
|
|
( cdp->flags[ ( unsigned char ) szFirst[ 1 ] ] & HB_CDP_MULTI2 ) != 0 )
|
|
{
|
|
n = hb_cdpMultiWeightI( cdp, szFirst );
|
|
if( n != 0 )
|
|
{
|
|
n1 = n;
|
|
++szFirst;
|
|
if( --ulLenFirst < ulLen )
|
|
ulLen = ulLenFirst;
|
|
}
|
|
else
|
|
n1 = cdp->sort[ u1 ];
|
|
}
|
|
else
|
|
n1 = cdp->sort[ u1 ];
|
|
|
|
if( ( cdp->flags[ u2 ] & HB_CDP_MULTI1 ) != 0 &&
|
|
( ul < ulLenSecond - 1 ) &&
|
|
( cdp->flags[ ( unsigned char ) szSecond[ 1 ] ] & HB_CDP_MULTI2 ) != 0 )
|
|
{
|
|
n = hb_cdpMultiWeightI( cdp, szSecond );
|
|
if( n != 0 )
|
|
{
|
|
n2 = n;
|
|
++szSecond;
|
|
if( --ulLenSecond < ulLen )
|
|
ulLen = ulLenSecond;
|
|
}
|
|
else
|
|
n2 = cdp->sort[ u2 ];
|
|
}
|
|
else
|
|
n2 = cdp->sort[ u2 ];
|
|
|
|
if( n1 != n2 )
|
|
{
|
|
if( n1 == 0 || n2 == 0 )
|
|
/* One of characters doesn't belong to the national characters */
|
|
iRet = ( u1 < u2 ) ? -1 : 1;
|
|
else
|
|
iRet = ( n1 < n2 ) ? -1 : 1;
|
|
break;
|
|
}
|
|
else if( u1 != u2 )
|
|
{
|
|
if( n1 == 0 )
|
|
{
|
|
iRet = ( u1 < u2 ) ? -1 : 1;
|
|
break;
|
|
}
|
|
if( iAcc == 0 && ( fExact || ( ulLenFirst == ulLenSecond && cdp->acc ) ) )
|
|
{
|
|
if( cdp->acc )
|
|
iAcc = ( cdp->acc[ u1 ] < cdp->acc[ u2 ] ) ? -1 : 1;
|
|
else
|
|
iAcc = ( u1 < u2 ) ? -1 : 1;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
else
|
|
{
|
|
for( ul = 0; ul < ulLen; ++szFirst, ++szSecond, ++ul )
|
|
{
|
|
u1 = cdp->upper[ ( unsigned char ) * szFirst ];
|
|
u2 = cdp->upper[ ( unsigned char ) * szSecond ];
|
|
|
|
if( u1 != u2 )
|
|
{
|
|
n1 = ( unsigned char ) cdp->sort[ u1 ];
|
|
n2 = ( unsigned char ) cdp->sort[ u2 ];
|
|
if( n1 != n2 )
|
|
{
|
|
iRet = ( n1 < n2 ) ? -1 : 1;
|
|
break;
|
|
}
|
|
if( iAcc == 0 && ( fExact || ( ulLenFirst == ulLenSecond && cdp->acc ) ) )
|
|
{
|
|
if( cdp->acc )
|
|
iAcc = ( cdp->acc[ u1 ] < cdp->acc[ u2 ] ) ? -1 : 1;
|
|
else
|
|
iAcc = ( u1 < u2 ) ? -1 : 1;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
if( !iRet )
|
|
{
|
|
if( iAcc )
|
|
iRet = iAcc;
|
|
else if( ulLenSecond > ulLenFirst )
|
|
iRet = -1;
|
|
else if( fExact && ulLenSecond < ulLenFirst )
|
|
iRet = 1;
|
|
}
|
|
|
|
return iRet;
|
|
}
|
|
|
|
/*
|
|
* conversions
|
|
*/
|
|
static void hb_cdpBuildTransTable( PHB_UNITABLE uniTable )
|
|
{
|
|
HB_WCHAR wcMax = 0;
|
|
int i;
|
|
|
|
HB_CDP_LOCK
|
|
if( uniTable->uniTrans == NULL )
|
|
{
|
|
unsigned char * uniTrans;
|
|
|
|
for( i = 0; i < 256; ++i )
|
|
{
|
|
HB_WCHAR wc = uniTable->uniCodes[ i ];
|
|
if( wc > wcMax )
|
|
wcMax = wc;
|
|
}
|
|
uniTrans = ( unsigned char * )
|
|
hb_xgrab( ( wcMax + 1 ) * sizeof( unsigned char ) );
|
|
memset( uniTrans, '\0', ( wcMax + 1 ) * sizeof( unsigned char ) );
|
|
for( i = 0; i < 256; ++i )
|
|
uniTrans[ uniTable->uniCodes[ i ] ] = ( unsigned char ) i;
|
|
|
|
uniTable->wcMax = wcMax;
|
|
uniTable->uniTrans = uniTrans;
|
|
}
|
|
HB_CDP_UNLOCK
|
|
}
|
|
|
|
/*
|
|
* UTF8 conversions
|
|
*/
|
|
int hb_cdpUTF8CharSize( HB_WCHAR wc )
|
|
{
|
|
if( wc < 0x0080 )
|
|
return 1;
|
|
else if( wc < 0x0800 )
|
|
return 2;
|
|
else /* if( wc <= 0xffff ) */
|
|
return 3;
|
|
}
|
|
|
|
int hb_cdpU16CharToUTF8( char * szUTF8, HB_WCHAR wc )
|
|
{
|
|
int n;
|
|
|
|
if( wc < 0x0080 )
|
|
{
|
|
szUTF8[0] = wc & 0xff;
|
|
n = 1;
|
|
}
|
|
else if( wc < 0x0800 )
|
|
{
|
|
szUTF8[0] = 0xc0 | ( ( wc >> 6 ) & 0x1f );
|
|
szUTF8[1] = 0x80 | ( wc & 0x3f );
|
|
n = 2;
|
|
}
|
|
else /* if( wc <= 0xffff ) */
|
|
{
|
|
szUTF8[0] = 0xe0 | ( ( wc >> 12 ) & 0x0f );
|
|
szUTF8[1] = 0x80 | ( ( wc >> 6 ) & 0x3f );
|
|
szUTF8[2] = 0x80 | ( wc & 0x3f );
|
|
n = 3;
|
|
}
|
|
/*
|
|
else
|
|
{
|
|
n = 0;
|
|
}
|
|
*/
|
|
return n;
|
|
}
|
|
|
|
HB_BOOL hb_cdpUTF8ToU16NextChar( unsigned char ucChar, int * n, HB_WCHAR * pwc )
|
|
{
|
|
if( *n > 0 )
|
|
{
|
|
if( ( ucChar & 0xc0 ) != 0x80 )
|
|
return HB_FALSE;
|
|
*pwc = ( *pwc << 6 ) | ( ucChar & 0x3f );
|
|
( *n )--;
|
|
return HB_TRUE;
|
|
}
|
|
|
|
*n = 0;
|
|
*pwc = ucChar;
|
|
if( ucChar >= 0xc0 )
|
|
{
|
|
if( ucChar < 0xe0 )
|
|
{
|
|
*pwc &= 0x1f;
|
|
*n = 1;
|
|
}
|
|
else if( ucChar < 0xf0 )
|
|
{
|
|
*pwc &= 0x0f;
|
|
*n = 2;
|
|
}
|
|
else if( ucChar < 0xf8 )
|
|
{
|
|
*pwc &= 0x07;
|
|
*n = 3;
|
|
}
|
|
else if( ucChar < 0xfc )
|
|
{
|
|
*pwc &= 0x03;
|
|
*n = 4;
|
|
}
|
|
else if( ucChar < 0xfe )
|
|
{
|
|
*pwc &= 0x01;
|
|
*n = 5;
|
|
}
|
|
}
|
|
return HB_TRUE;
|
|
}
|
|
|
|
HB_SIZE hb_cdpUTF8StringLength( const char * pSrc, HB_SIZE ulLen )
|
|
{
|
|
HB_SIZE ul, ulDst;
|
|
HB_WCHAR uc;
|
|
int n = 0;
|
|
|
|
for( ul = ulDst = 0; ul < ulLen; ++ul )
|
|
{
|
|
if( hb_cdpUTF8ToU16NextChar( ( UCHAR ) pSrc[ ul ], &n, &uc ) )
|
|
{
|
|
if( n == 0 )
|
|
++ulDst;
|
|
}
|
|
}
|
|
|
|
return ulDst;
|
|
}
|
|
|
|
HB_SIZE hb_cdpUTF8StringPeek( const char * pSrc, HB_SIZE ulLen, HB_SIZE ulPos )
|
|
{
|
|
if( ulLen )
|
|
{
|
|
HB_SIZE ul;
|
|
HB_WCHAR uc = 0;
|
|
int n = 0;
|
|
|
|
for( ul = 0; ul < ulLen && ulPos; ++ul )
|
|
{
|
|
if( hb_cdpUTF8ToU16NextChar( ( UCHAR ) pSrc[ ul ], &n, &uc ) )
|
|
{
|
|
if( n == 0 )
|
|
--ulPos;
|
|
}
|
|
}
|
|
|
|
if( ul < ulLen )
|
|
{
|
|
n = 0;
|
|
do
|
|
{
|
|
if( hb_cdpUTF8ToU16NextChar( ( UCHAR ) pSrc[ ul ], &n, &uc ) )
|
|
{
|
|
if( n == 0 )
|
|
return uc;
|
|
}
|
|
}
|
|
while( ++ul < ulLen );
|
|
}
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
/* caller must free the returned buffer if not NULL */
|
|
char * hb_cdpUTF8StringSubstr( const char * pSrc, HB_SIZE ulLen,
|
|
HB_SIZE ulFrom, HB_SIZE ulCount, HB_SIZE * pulDest )
|
|
{
|
|
HB_SIZE ul, ulCnt, ulDst = 0;
|
|
HB_WCHAR uc;
|
|
int n;
|
|
char * pDst = NULL;
|
|
|
|
if( ulCount && ulLen )
|
|
{
|
|
n = 0;
|
|
for( ul = 0; ul < ulLen && ulFrom; ++ul )
|
|
{
|
|
if( hb_cdpUTF8ToU16NextChar( pSrc[ ul ], &n, &uc ) )
|
|
{
|
|
if( n == 0 )
|
|
--ulFrom;
|
|
}
|
|
}
|
|
|
|
if( ul < ulLen )
|
|
{
|
|
ulFrom = ul;
|
|
ulCnt = ulCount;
|
|
n = 0;
|
|
do
|
|
{
|
|
if( hb_cdpUTF8ToU16NextChar( pSrc[ ul ], &n, &uc ) )
|
|
{
|
|
if( n == 0 )
|
|
--ulCnt;
|
|
}
|
|
}
|
|
while( ++ul < ulLen && ulCnt );
|
|
|
|
ulDst = ul - ulFrom;
|
|
pDst = ( char * ) hb_xgrab( ulDst + 1 );
|
|
memcpy( pDst, &pSrc[ ulFrom ], ulDst );
|
|
pDst[ ulDst ] = '\0';
|
|
}
|
|
}
|
|
|
|
if( pulDest )
|
|
*pulDest = ulDst;
|
|
|
|
return pDst;
|
|
}
|
|
|
|
HB_BOOL hb_cdpGetFromUTF8( PHB_CODEPAGE cdp, HB_BOOL fCtrl, unsigned char ch,
|
|
int * n, HB_WCHAR * pwc )
|
|
{
|
|
if( hb_cdpUTF8ToU16NextChar( ch, n, pwc ) )
|
|
{
|
|
if( *n == 0 && cdp && ( fCtrl || *pwc >= 32 ) )
|
|
{
|
|
if( cdp->uniTable->uniTrans == NULL )
|
|
hb_cdpBuildTransTable( cdp->uniTable );
|
|
|
|
if( *pwc <= cdp->uniTable->wcMax )
|
|
{
|
|
unsigned char uc = cdp->uniTable->uniTrans[ *pwc ];
|
|
if( uc )
|
|
*pwc = uc;
|
|
}
|
|
}
|
|
return HB_TRUE;
|
|
}
|
|
return HB_FALSE;
|
|
}
|
|
|
|
HB_SIZE hb_cdpStrAsUTF8Len( PHB_CODEPAGE cdp, HB_BOOL fCtrl,
|
|
const char * pSrc, HB_SIZE ulSrc,
|
|
HB_SIZE ulMax )
|
|
{
|
|
const HB_WCHAR * uniCodes;
|
|
HB_SIZE ulS, ulD, u;
|
|
int i;
|
|
|
|
uniCodes = cdp->uniTable->uniCodes;
|
|
for( ulS = ulD = 0; ulS < ulSrc; ++ulS )
|
|
{
|
|
unsigned char uc = ( unsigned char ) pSrc[ ulS ];
|
|
HB_WCHAR wc;
|
|
|
|
if( !fCtrl && uc < 32 )
|
|
wc = uc;
|
|
else
|
|
{
|
|
wc = uniCodes[ uc ];
|
|
if( cdp->nMultiUC &&
|
|
( cdp->flags[ uc ] & HB_CDP_MULTI1 ) != 0 &&
|
|
ulS + 1 < ulSrc &&
|
|
( cdp->flags[ ( unsigned char ) pSrc[ ulS + 1 ] ] & HB_CDP_MULTI2 ) != 0 )
|
|
{
|
|
for( i = 0; i < cdp->nMulti; ++i )
|
|
{
|
|
if( pSrc[ ulS + 1 ] == cdp->multi[ i ].cLast[ 0 ] ||
|
|
pSrc[ ulS + 1 ] == cdp->multi[ i ].cLast[ 1 ] )
|
|
{
|
|
if( pSrc[ ulS ] == cdp->multi[ i ].cFirst[ 0 ] )
|
|
{
|
|
wc = cdp->multi[ i ].wcUp;
|
|
++ulS;
|
|
break;
|
|
}
|
|
else if( pSrc[ ulS ] == cdp->multi[ i ].cFirst[ 1 ] )
|
|
{
|
|
wc = cdp->multi[ i ].wcLo;
|
|
++ulS;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
u = hb_cdpUTF8CharSize( wc );
|
|
if( ulMax && ulD + u > ulMax )
|
|
break;
|
|
ulD += u;
|
|
}
|
|
|
|
return ulD;
|
|
}
|
|
|
|
HB_SIZE hb_cdpStrToUTF8( PHB_CODEPAGE cdp, HB_BOOL fCtrl,
|
|
const char * pSrc, HB_SIZE ulSrc,
|
|
char * pDst, HB_SIZE ulDst )
|
|
{
|
|
const HB_WCHAR * uniCodes;
|
|
HB_SIZE ulS, ulD, u;
|
|
int i;
|
|
|
|
uniCodes = cdp->uniTable->uniCodes;
|
|
for( ulS = ulD = 0; ulS < ulSrc && ulD < ulDst; ++ulS )
|
|
{
|
|
unsigned char uc = ( unsigned char ) pSrc[ ulS ];
|
|
HB_WCHAR wc;
|
|
|
|
if( !fCtrl && uc < 32 )
|
|
wc = uc;
|
|
else
|
|
{
|
|
wc = uniCodes[ uc ];
|
|
if( cdp->nMultiUC &&
|
|
( cdp->flags[ uc ] & HB_CDP_MULTI1 ) != 0 &&
|
|
ulS + 1 < ulSrc &&
|
|
( cdp->flags[ ( unsigned char ) pSrc[ ulS + 1 ] ] & HB_CDP_MULTI2 ) != 0 )
|
|
{
|
|
for( i = 0; i < cdp->nMulti; ++i )
|
|
{
|
|
if( pSrc[ ulS + 1 ] == cdp->multi[ i ].cLast[ 0 ] ||
|
|
pSrc[ ulS + 1 ] == cdp->multi[ i ].cLast[ 1 ] )
|
|
{
|
|
if( pSrc[ ulS ] == cdp->multi[ i ].cFirst[ 0 ] )
|
|
{
|
|
wc = cdp->multi[ i ].wcUp;
|
|
++ulS;
|
|
break;
|
|
}
|
|
else if( pSrc[ ulS ] == cdp->multi[ i ].cFirst[ 1 ] )
|
|
{
|
|
wc = cdp->multi[ i ].wcLo;
|
|
++ulS;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
u = hb_cdpUTF8CharSize( wc );
|
|
if( ulD + u <= ulDst )
|
|
{
|
|
hb_cdpU16CharToUTF8( &pDst[ ulD ], wc );
|
|
ulD += u;
|
|
}
|
|
else
|
|
break;
|
|
}
|
|
if( ulD < ulDst )
|
|
pDst[ ulD ] = '\0';
|
|
|
|
return ulD;
|
|
}
|
|
|
|
HB_SIZE hb_cdpUTF8AsStrLen( PHB_CODEPAGE cdp, HB_BOOL fCtrl,
|
|
const char * pSrc, HB_SIZE ulSrc,
|
|
HB_SIZE ulMax )
|
|
{
|
|
unsigned char * uniTrans;
|
|
HB_WCHAR wcMax, wc = 0;
|
|
HB_SIZE ulS, ulD;
|
|
int n = 0, i;
|
|
|
|
if( cdp->uniTable->uniTrans == NULL )
|
|
hb_cdpBuildTransTable( cdp->uniTable );
|
|
uniTrans = cdp->uniTable->uniTrans;
|
|
wcMax = cdp->uniTable->wcMax;
|
|
|
|
for( ulS = ulD = 0; ulS < ulSrc; ++ulS )
|
|
{
|
|
if( hb_cdpUTF8ToU16NextChar( ( unsigned char ) pSrc[ ulS ], &n, &wc ) )
|
|
{
|
|
if( n == 0 )
|
|
{
|
|
++ulD;
|
|
if( ulMax && ulD >= ulMax )
|
|
break;
|
|
if( wc && cdp->nMultiUC && ( fCtrl || wc >= 32 ) &&
|
|
( wc > wcMax || uniTrans[ wc ] == 0 ) )
|
|
{
|
|
for( i = 0; i < cdp->nMulti; ++i )
|
|
{
|
|
if( wc == cdp->multi[ i ].wcUp ||
|
|
wc == cdp->multi[ i ].wcLo )
|
|
{
|
|
++ulD;
|
|
break;
|
|
}
|
|
}
|
|
if( ulMax && ulD >= ulMax )
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
return ulD;
|
|
}
|
|
|
|
HB_SIZE hb_cdpUTF8ToStr( PHB_CODEPAGE cdp, HB_BOOL fCtrl,
|
|
const char * pSrc, HB_SIZE ulSrc,
|
|
char * pDst, HB_SIZE ulDst )
|
|
{
|
|
unsigned char * uniTrans;
|
|
HB_WCHAR wcMax, wc = 0;
|
|
HB_SIZE ulS, ulD;
|
|
int n = 0, i;
|
|
|
|
if( cdp->uniTable->uniTrans == NULL )
|
|
hb_cdpBuildTransTable( cdp->uniTable );
|
|
uniTrans = cdp->uniTable->uniTrans;
|
|
wcMax = cdp->uniTable->wcMax;
|
|
|
|
for( ulS = ulD = 0; ulS < ulSrc && ulD < ulDst; ++ulS )
|
|
{
|
|
if( hb_cdpUTF8ToU16NextChar( ( unsigned char ) pSrc[ ulS ], &n, &wc ) )
|
|
{
|
|
if( n == 0 )
|
|
{
|
|
if( !fCtrl && wc < 32 )
|
|
pDst[ ulD++ ] = ( unsigned char ) wc;
|
|
else if( wc <= wcMax && uniTrans[ wc ] )
|
|
pDst[ ulD++ ] = uniTrans[ wc ];
|
|
else
|
|
{
|
|
if( wc && cdp->nMultiUC )
|
|
{
|
|
for( i = 0; i < cdp->nMulti; ++i )
|
|
{
|
|
if( wc == cdp->multi[ i ].wcUp )
|
|
{
|
|
pDst[ ulD++ ] = cdp->multi[ i ].cFirst[ 0 ];
|
|
if( ulD < ulDst )
|
|
pDst[ ulD++ ] = cdp->multi[ i ].cLast[ 0 ];
|
|
break;
|
|
}
|
|
if( wc == cdp->multi[ i ].wcLo )
|
|
{
|
|
pDst[ ulD++ ] = cdp->multi[ i ].cFirst[ 1 ];
|
|
if( ulD < ulDst )
|
|
pDst[ ulD++ ] = cdp->multi[ i ].cLast[ 1 ];
|
|
break;
|
|
}
|
|
}
|
|
if( i < cdp->nMulti )
|
|
continue;
|
|
}
|
|
pDst[ ulD++ ] = wc >= 0x100 ? '?' : ( unsigned char ) wc;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
if( ulD < ulDst )
|
|
pDst[ ulD ] = '\0';
|
|
|
|
return ulD;
|
|
}
|
|
|
|
/*
|
|
* U16 (hb wide char) conversions
|
|
*/
|
|
HB_WCHAR hb_cdpGetU16( PHB_CODEPAGE cdp, HB_BOOL fCtrl, unsigned char ch )
|
|
{
|
|
if( cdp && ( fCtrl || ch >= 32 ) )
|
|
return cdp->uniTable->uniCodes[ ch ];
|
|
else
|
|
return ch;
|
|
}
|
|
|
|
unsigned char hb_cdpGetChar( PHB_CODEPAGE cdp, HB_BOOL fCtrl, HB_WCHAR wc )
|
|
{
|
|
if( cdp && ( fCtrl || wc >= 32 ) )
|
|
{
|
|
if( cdp->uniTable->uniTrans == NULL )
|
|
hb_cdpBuildTransTable( cdp->uniTable );
|
|
|
|
if( wc <= cdp->uniTable->wcMax )
|
|
{
|
|
unsigned char uc = cdp->uniTable->uniTrans[ wc ];
|
|
if( uc )
|
|
wc = uc;
|
|
}
|
|
}
|
|
return wc >= 0x100 ? '?' : ( UCHAR ) wc;
|
|
}
|
|
|
|
HB_SIZE hb_cdpStrAsU16Len( PHB_CODEPAGE cdp, HB_BOOL fCtrl,
|
|
const char * pSrc, HB_SIZE ulSrc,
|
|
HB_SIZE ulMax )
|
|
{
|
|
if( cdp->nMultiUC )
|
|
{
|
|
HB_SIZE ulS, ulD;
|
|
int i;
|
|
|
|
for( ulS = ulD = 0; ulS < ulSrc; ++ulS )
|
|
{
|
|
unsigned char uc = ( unsigned char ) pSrc[ ulS ];
|
|
|
|
if( fCtrl || uc >= 32 )
|
|
{
|
|
if( ( cdp->flags[ uc ] & HB_CDP_MULTI1 ) != 0 &&
|
|
ulS + 1 < ulSrc &&
|
|
( cdp->flags[ ( unsigned char ) pSrc[ ulS + 1 ] ] & HB_CDP_MULTI2 ) != 0 )
|
|
{
|
|
for( i = 0; i < cdp->nMulti; ++i )
|
|
{
|
|
if( pSrc[ ulS + 1 ] == cdp->multi[ i ].cLast[ 0 ] ||
|
|
pSrc[ ulS + 1 ] == cdp->multi[ i ].cLast[ 1 ] )
|
|
{
|
|
if( pSrc[ ulS ] == cdp->multi[ i ].cFirst[ 0 ] )
|
|
{
|
|
++ulS;
|
|
break;
|
|
}
|
|
else if( pSrc[ ulS ] == cdp->multi[ i ].cFirst[ 1 ] )
|
|
{
|
|
++ulS;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
++ulD;
|
|
if( ulMax && ulD >= ulMax )
|
|
break;
|
|
}
|
|
return ulD;
|
|
}
|
|
|
|
return ulSrc;
|
|
}
|
|
|
|
HB_SIZE hb_cdpStrToU16( PHB_CODEPAGE cdp, HB_BOOL fCtrl, int iEndian,
|
|
const char * pSrc, HB_SIZE ulSrc,
|
|
HB_WCHAR * pDst, HB_SIZE ulDst )
|
|
{
|
|
const HB_WCHAR * uniCodes;
|
|
HB_SIZE ulS, ulD;
|
|
int i;
|
|
|
|
uniCodes = cdp->uniTable->uniCodes;
|
|
for( ulS = ulD = 0; ulS < ulSrc && ulD < ulDst; ++ulS )
|
|
{
|
|
unsigned char uc = ( unsigned char ) pSrc[ ulS ];
|
|
HB_WCHAR wc;
|
|
|
|
if( !fCtrl && uc < 32 )
|
|
wc = uc;
|
|
else
|
|
{
|
|
wc = uniCodes[ uc ];
|
|
if( cdp->nMultiUC &&
|
|
( cdp->flags[ uc ] & HB_CDP_MULTI1 ) != 0 &&
|
|
ulS + 1 < ulSrc &&
|
|
( cdp->flags[ ( unsigned char ) pSrc[ ulS + 1 ] ] & HB_CDP_MULTI2 ) != 0 )
|
|
{
|
|
for( i = 0; i < cdp->nMulti; ++i )
|
|
{
|
|
if( pSrc[ ulS + 1 ] == cdp->multi[ i ].cLast[ 0 ] ||
|
|
pSrc[ ulS + 1 ] == cdp->multi[ i ].cLast[ 1 ] )
|
|
{
|
|
if( pSrc[ ulS ] == cdp->multi[ i ].cFirst[ 0 ] )
|
|
{
|
|
wc = cdp->multi[ i ].wcUp;
|
|
++ulS;
|
|
break;
|
|
}
|
|
else if( pSrc[ ulS ] == cdp->multi[ i ].cFirst[ 1 ] )
|
|
{
|
|
wc = cdp->multi[ i ].wcLo;
|
|
++ulS;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
#if !defined( HB_BIG_ENDIAN ) && !defined( HB_LITTLE_ENDIAN )
|
|
if( iEndian == HB_CDP_ENDIAN_LITTLE )
|
|
HB_PUT_LE_UINT16( &pDst[ ulD ], wc );
|
|
else if( iEndian == HB_CDP_ENDIAN_BIG )
|
|
HB_PUT_BE_UINT16( &pDst[ ulD ], wc );
|
|
else
|
|
pDst[ ulD ] = wc;
|
|
++ulD;
|
|
#else
|
|
# if defined( HB_BIG_ENDIAN )
|
|
if( iEndian == HB_CDP_ENDIAN_LITTLE )
|
|
# else
|
|
if( iEndian == HB_CDP_ENDIAN_BIG )
|
|
# endif
|
|
wc = HB_SWAP_UINT16( wc );
|
|
pDst[ ulD++ ] = wc;
|
|
#endif
|
|
}
|
|
if( ulD < ulDst )
|
|
pDst[ ulD ] = '\0';
|
|
|
|
return ulD;
|
|
}
|
|
|
|
HB_SIZE hb_cdpU16AsStrLen( PHB_CODEPAGE cdp, HB_BOOL fCtrl,
|
|
const HB_WCHAR * pSrc, HB_SIZE ulSrc,
|
|
HB_SIZE ulMax )
|
|
{
|
|
unsigned char * uniTrans;
|
|
HB_WCHAR wcMax, wc;
|
|
HB_SIZE ulS, ulD;
|
|
int i;
|
|
|
|
if( cdp->uniTable->uniTrans == NULL )
|
|
hb_cdpBuildTransTable( cdp->uniTable );
|
|
uniTrans = cdp->uniTable->uniTrans;
|
|
wcMax = cdp->uniTable->wcMax;
|
|
|
|
for( ulS = ulD = 0; ulS < ulSrc; ++ulS )
|
|
{
|
|
wc = pSrc[ ulS ];
|
|
++ulD;
|
|
if( ulMax && ulD >= ulMax )
|
|
break;
|
|
if( wc && cdp->nMultiUC && ( fCtrl || wc >= 32 ) &&
|
|
( wc > wcMax || uniTrans[ wc ] == 0 ) )
|
|
{
|
|
for( i = 0; i < cdp->nMulti; ++i )
|
|
{
|
|
if( wc == cdp->multi[ i ].wcUp ||
|
|
wc == cdp->multi[ i ].wcLo )
|
|
{
|
|
++ulD;
|
|
break;
|
|
}
|
|
}
|
|
if( ulMax && ulD >= ulMax )
|
|
break;
|
|
}
|
|
}
|
|
|
|
return ulD;
|
|
}
|
|
|
|
HB_SIZE hb_cdpU16ToStr( PHB_CODEPAGE cdp, HB_BOOL fCtrl, int iEndian,
|
|
const HB_WCHAR * pSrc, HB_SIZE ulSrc,
|
|
char * pDst, HB_SIZE ulDst )
|
|
{
|
|
unsigned char * uniTrans;
|
|
HB_WCHAR wcMax, wc;
|
|
HB_SIZE ulS, ulD;
|
|
int i;
|
|
|
|
if( cdp->uniTable->uniTrans == NULL )
|
|
hb_cdpBuildTransTable( cdp->uniTable );
|
|
uniTrans = cdp->uniTable->uniTrans;
|
|
wcMax = cdp->uniTable->wcMax;
|
|
|
|
for( ulS = ulD = 0; ulS < ulSrc && ulD < ulDst; ++ulS )
|
|
{
|
|
#if !defined( HB_BIG_ENDIAN ) && !defined( HB_LITTLE_ENDIAN )
|
|
if( iEndian == HB_CDP_ENDIAN_LITTLE )
|
|
wc = HB_GET_LE_UINT16( &pSrc[ ulS ] );
|
|
else if( iEndian == HB_CDP_ENDIAN_BIG )
|
|
wc = HB_GET_BE_UINT16( &pSrc[ ulS ] );
|
|
else
|
|
wc = pSrc[ ulS ];
|
|
#else
|
|
wc = pSrc[ ulS ];
|
|
# if defined( HB_BIG_ENDIAN )
|
|
if( iEndian == HB_CDP_ENDIAN_LITTLE )
|
|
# else
|
|
if( iEndian == HB_CDP_ENDIAN_BIG )
|
|
# endif
|
|
wc = HB_SWAP_UINT16( wc );
|
|
#endif
|
|
if( !fCtrl && wc < 32 )
|
|
pDst[ ulD++ ] = ( unsigned char ) wc;
|
|
else if( wc <= wcMax && uniTrans[ wc ] )
|
|
pDst[ ulD++ ] = uniTrans[ wc ];
|
|
else
|
|
{
|
|
if( wc && cdp->nMultiUC )
|
|
{
|
|
for( i = 0; i < cdp->nMulti; ++i )
|
|
{
|
|
if( wc == cdp->multi[ i ].wcUp )
|
|
{
|
|
pDst[ ulD++ ] = cdp->multi[ i ].cFirst[ 0 ];
|
|
if( ulD < ulDst )
|
|
pDst[ ulD++ ] = cdp->multi[ i ].cLast[ 0 ];
|
|
break;
|
|
}
|
|
if( wc == cdp->multi[ i ].wcLo )
|
|
{
|
|
pDst[ ulD++ ] = cdp->multi[ i ].cFirst[ 1 ];
|
|
if( ulD < ulDst )
|
|
pDst[ ulD++ ] = cdp->multi[ i ].cLast[ 1 ];
|
|
break;
|
|
}
|
|
}
|
|
if( i < cdp->nMulti )
|
|
continue;
|
|
}
|
|
pDst[ ulD++ ] = wc >= 0x100 ? '?' : ( unsigned char ) wc;
|
|
}
|
|
}
|
|
|
|
if( ulD < ulDst )
|
|
pDst[ ulD ] = '\0';
|
|
|
|
return ulD;
|
|
}
|
|
|
|
|
|
/*
|
|
* CP translations
|
|
*/
|
|
HB_SIZE hb_cdpTransLen( const char * pSrc, HB_SIZE ulSrc, HB_SIZE ulMax,
|
|
PHB_CODEPAGE cdpIn, PHB_CODEPAGE cdpOut )
|
|
{
|
|
HB_SIZE ulSize;
|
|
|
|
if( cdpIn && cdpOut && cdpIn->uniTable != cdpOut->uniTable )
|
|
{
|
|
if( cdpIn == &s_utf8_codepage )
|
|
return hb_cdpUTF8AsStrLen( cdpOut, HB_FALSE, pSrc, ulSrc, ulMax );
|
|
else if( cdpOut == &s_utf8_codepage )
|
|
return hb_cdpStrAsUTF8Len( cdpIn, HB_FALSE, pSrc, ulSrc, ulMax );
|
|
else
|
|
{
|
|
unsigned char * uniTrans;
|
|
HB_WCHAR wcMax;
|
|
|
|
if( cdpOut->uniTable->uniTrans == NULL )
|
|
hb_cdpBuildTransTable( cdpOut->uniTable );
|
|
uniTrans = cdpOut->uniTable->uniTrans;
|
|
wcMax = cdpOut->uniTable->wcMax;
|
|
|
|
if( cdpIn->nMultiUC || cdpOut->nMultiUC )
|
|
{
|
|
HB_SIZE ul;
|
|
int i;
|
|
|
|
for( ul = ulSize = 0; ul < ulSrc && ( ulMax == 0 || ulSize < ulMax ); ++ul, ++ulSize )
|
|
{
|
|
unsigned char uc = ( unsigned char ) pSrc[ ul ];
|
|
HB_WCHAR wc = cdpIn->uniTable->uniCodes[ uc ];
|
|
|
|
if( cdpIn->nMultiUC &&
|
|
( cdpIn->flags[ uc ] & HB_CDP_MULTI1 ) != 0 &&
|
|
ul + 1 < ulSrc &&
|
|
( cdpIn->flags[ ( unsigned char ) pSrc[ ul + 1 ] ] & HB_CDP_MULTI2 ) != 0 )
|
|
{
|
|
for( i = 0; i < cdpIn->nMulti; ++i )
|
|
{
|
|
if( pSrc[ ul + 1 ] == cdpIn->multi[ i ].cLast[ 0 ] ||
|
|
pSrc[ ul + 1 ] == cdpIn->multi[ i ].cLast[ 1 ] )
|
|
{
|
|
if( pSrc[ ul ] == cdpIn->multi[ i ].cFirst[ 0 ] )
|
|
{
|
|
wc = cdpIn->multi[ i ].wcUp;
|
|
++ul;
|
|
break;
|
|
}
|
|
else if( pSrc[ ul ] == cdpIn->multi[ i ].cFirst[ 1 ] )
|
|
{
|
|
wc = cdpIn->multi[ i ].wcLo;
|
|
++ul;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
if( wc && ( wc > wcMax || uniTrans[ wc ] ) && cdpOut->nMultiUC &&
|
|
( ulMax == 0 || ulSize + 1 < ulMax ) )
|
|
{
|
|
for( i = 0; i < cdpOut->nMulti; ++i )
|
|
{
|
|
if( wc == cdpOut->multi[ i ].wcUp ||
|
|
wc == cdpOut->multi[ i ].wcLo )
|
|
{
|
|
++ulSize;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
else
|
|
ulSize = ( ulMax && ulSrc > ulMax ) ? ulMax : ulSrc;
|
|
}
|
|
}
|
|
else
|
|
ulSize = ( ulMax && ulSrc > ulMax ) ? ulMax : ulSrc;
|
|
|
|
return ulSize;
|
|
}
|
|
|
|
HB_SIZE hb_cdpTransTo( const char * pSrc, HB_SIZE ulSrc,
|
|
char * pDst, HB_SIZE ulDst,
|
|
PHB_CODEPAGE cdpIn, PHB_CODEPAGE cdpOut )
|
|
{
|
|
HB_SIZE ulSize;
|
|
|
|
if( cdpIn && cdpOut && cdpIn->uniTable != cdpOut->uniTable )
|
|
{
|
|
if( cdpIn == &s_utf8_codepage )
|
|
return hb_cdpUTF8ToStr( cdpOut, HB_FALSE, pSrc, ulSrc, pDst, ulDst );
|
|
else if( cdpOut == &s_utf8_codepage )
|
|
return hb_cdpStrToUTF8( cdpIn, HB_FALSE, pSrc, ulSrc, pDst, ulDst );
|
|
else
|
|
{
|
|
unsigned char * uniTrans;
|
|
HB_WCHAR wcMax;
|
|
|
|
if( cdpOut->uniTable->uniTrans == NULL )
|
|
hb_cdpBuildTransTable( cdpOut->uniTable );
|
|
uniTrans = cdpOut->uniTable->uniTrans;
|
|
wcMax = cdpOut->uniTable->wcMax;
|
|
|
|
if( cdpIn->nMultiUC || cdpOut->nMultiUC )
|
|
{
|
|
HB_SIZE ul;
|
|
int i;
|
|
|
|
for( ul = ulSize = 0; ul < ulSrc && ulSize < ulDst; ++ul, ++ulSize )
|
|
{
|
|
unsigned char uc = ( unsigned char ) pSrc[ ul ];
|
|
HB_WCHAR wc = cdpIn->uniTable->uniCodes[ uc ];
|
|
|
|
if( cdpIn->nMultiUC &&
|
|
( cdpIn->flags[ uc ] & HB_CDP_MULTI1 ) != 0 &&
|
|
ul + 1 < ulSrc &&
|
|
( cdpIn->flags[ ( unsigned char ) pSrc[ ul + 1 ] ] & HB_CDP_MULTI2 ) != 0 )
|
|
{
|
|
for( i = 0; i < cdpIn->nMulti; ++i )
|
|
{
|
|
if( pSrc[ ul + 1 ] == cdpIn->multi[ i ].cLast[ 0 ] ||
|
|
pSrc[ ul + 1 ] == cdpIn->multi[ i ].cLast[ 1 ] )
|
|
{
|
|
if( pSrc[ ul ] == cdpIn->multi[ i ].cFirst[ 0 ] )
|
|
{
|
|
wc = cdpIn->multi[ i ].wcUp;
|
|
++ul;
|
|
break;
|
|
}
|
|
else if( pSrc[ ul ] == cdpIn->multi[ i ].cFirst[ 1 ] )
|
|
{
|
|
wc = cdpIn->multi[ i ].wcLo;
|
|
++ul;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
if( wc )
|
|
{
|
|
if( wc <= wcMax && uniTrans[ wc ] )
|
|
uc = uniTrans[ wc ];
|
|
else if( cdpOut->nMultiUC )
|
|
{
|
|
for( i = 0; i < cdpOut->nMulti; ++i )
|
|
{
|
|
if( wc == cdpOut->multi[ i ].wcUp )
|
|
{
|
|
if( ulSize + 1 < ulDst )
|
|
{
|
|
pDst[ ulSize++ ] = cdpOut->multi[ i ].cFirst[ 0 ];
|
|
uc = cdpOut->multi[ i ].cLast[ 0 ];
|
|
}
|
|
else
|
|
uc = cdpOut->multi[ i ].cFirst[ 0 ];
|
|
break;
|
|
}
|
|
if( wc == cdpOut->multi[ i ].wcLo )
|
|
{
|
|
if( ulSize + 1 < ulDst )
|
|
{
|
|
pDst[ ulSize++ ] = cdpOut->multi[ i ].cFirst[ 1 ];
|
|
uc = cdpOut->multi[ i ].cLast[ 1 ];
|
|
}
|
|
else
|
|
uc = cdpOut->multi[ i ].cFirst[ 1 ];
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
pDst[ ulSize ] = uc;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
if( ulSrc > ulDst )
|
|
ulSrc = ulDst;
|
|
for( ulSize = 0; ulSize < ulSrc; ++ulSize )
|
|
{
|
|
unsigned char uc = ( unsigned char ) pSrc[ ulSize ];
|
|
HB_WCHAR wc = cdpIn->uniTable->uniCodes[ uc ];
|
|
if( wc && wc <= wcMax && uniTrans[ wc ] )
|
|
uc = uniTrans[ wc ];
|
|
pDst[ ulSize ] = uc;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
else
|
|
{
|
|
ulSize = ( ulSrc > ulDst ) ? ulDst : ulSrc;
|
|
memcpy( pDst, pSrc, ulSize );
|
|
}
|
|
|
|
if( ulSize < ulDst )
|
|
pDst[ ulSize ] = '\0';
|
|
|
|
return ulSize;
|
|
}
|
|
|
|
int hb_cdpTranslateChar( int iChar, HB_BOOL fCtrl, PHB_CODEPAGE cdpIn, PHB_CODEPAGE cdpOut )
|
|
{
|
|
if( cdpIn && cdpOut && cdpIn->uniTable != cdpOut->uniTable &&
|
|
iChar >= ( fCtrl ? 32 : 0 ) && iChar < 256 )
|
|
{
|
|
HB_WCHAR wc;
|
|
|
|
if( cdpOut->uniTable->uniTrans == NULL )
|
|
hb_cdpBuildTransTable( cdpOut->uniTable );
|
|
|
|
wc = cdpIn->uniTable->uniCodes[ iChar ];
|
|
if( wc && wc <= cdpOut->uniTable->wcMax &&
|
|
cdpOut->uniTable->uniTrans[ wc ] )
|
|
iChar = cdpOut->uniTable->uniTrans[ wc ];
|
|
}
|
|
|
|
return iChar;
|
|
}
|
|
|
|
HB_SIZE hb_cdpnDupLen( const char * pSrc, HB_SIZE ulSrc,
|
|
PHB_CODEPAGE cdpIn, PHB_CODEPAGE cdpOut )
|
|
{
|
|
return hb_cdpTransLen( pSrc, ulSrc, 0, cdpIn, cdpOut );
|
|
}
|
|
|
|
HB_SIZE hb_cdpnDup2Len( const char * pSrc, HB_SIZE ulSrc, HB_SIZE ulMax,
|
|
PHB_CODEPAGE cdpIn, PHB_CODEPAGE cdpOut )
|
|
{
|
|
return hb_cdpTransLen( pSrc, ulSrc, ulMax, cdpIn, cdpOut );
|
|
}
|
|
|
|
char * hb_cdpnDup( const char * pSrc, HB_SIZE * pulLen,
|
|
PHB_CODEPAGE cdpIn, PHB_CODEPAGE cdpOut )
|
|
{
|
|
char * pDst;
|
|
HB_SIZE ulDst;
|
|
|
|
ulDst = hb_cdpTransLen( pSrc, *pulLen, 0, cdpIn, cdpOut );
|
|
pDst = ( char * ) hb_xgrab( ulDst + 1 );
|
|
hb_cdpTransTo( pSrc, *pulLen, pDst, ulDst + 1, cdpIn, cdpOut );
|
|
*pulLen = ulDst;
|
|
|
|
return pDst;
|
|
}
|
|
|
|
const char * hb_cdpnDup2( const char * pSrc, HB_SIZE ulSrc,
|
|
char * pDst, HB_SIZE * pulDst,
|
|
PHB_CODEPAGE cdpIn, PHB_CODEPAGE cdpOut )
|
|
{
|
|
* pulDst = hb_cdpTransTo( pSrc, ulSrc, pDst, *pulDst, cdpIn, cdpOut );
|
|
return pDst;
|
|
}
|
|
|
|
const char * hb_cdpnDup3( const char * pSrc, HB_SIZE ulSrc,
|
|
char * pDst, HB_SIZE * pulDst,
|
|
char ** pFree, HB_SIZE * pulSize,
|
|
PHB_CODEPAGE cdpIn, PHB_CODEPAGE cdpOut )
|
|
{
|
|
if( cdpIn && cdpOut && cdpIn->uniTable != cdpOut->uniTable && ulSrc )
|
|
{
|
|
char * pPrev = NULL;
|
|
HB_SIZE ulDst = hb_cdpTransLen( pSrc, ulSrc, 0, cdpIn, cdpOut );
|
|
|
|
if( pDst == NULL )
|
|
{
|
|
pDst = *pFree;
|
|
if( pDst == NULL && *pulSize > 0 )
|
|
pDst = ( char * ) pSrc;
|
|
}
|
|
|
|
if( ulDst >= *pulSize || ( pDst == pSrc &&
|
|
( cdpOut == &s_utf8_codepage || cdpOut->nMultiUC ) ) )
|
|
{
|
|
pPrev = *pFree;
|
|
pDst = *pFree = ( char * ) hb_xgrab( ulDst + 1 );
|
|
*pulSize = ulDst + 1;
|
|
}
|
|
|
|
ulDst = hb_cdpTransTo( pSrc, ulSrc, pDst, *pulSize, cdpIn, cdpOut );
|
|
|
|
if( pPrev )
|
|
hb_xfree( pPrev );
|
|
if( pulDst )
|
|
*pulDst = ulDst;
|
|
return pDst;
|
|
}
|
|
|
|
if( pulDst )
|
|
*pulDst = ulSrc;
|
|
|
|
return pSrc;
|
|
}
|
|
|
|
char * hb_cdpDup( const char * pszSrc, PHB_CODEPAGE cdpIn, PHB_CODEPAGE cdpOut )
|
|
{
|
|
HB_SIZE ulLen = strlen( pszSrc );
|
|
return hb_cdpnDup( pszSrc, &ulLen, cdpIn, cdpOut );
|
|
}
|
|
|
|
|
|
/*
|
|
* CP management
|
|
*/
|
|
static PHB_CODEPAGE hb_buildCodePage( const char * id, const char * info,
|
|
PHB_UNITABLE uniTable,
|
|
const char * pszUpper,
|
|
const char * pszLower,
|
|
unsigned int nACSort )
|
|
{
|
|
HB_BOOL lSort, fError;
|
|
int iMulti, iAcc, iAccUp, iAccLo, iSortUp, iSortLo, i;
|
|
const char * pup, * plo;
|
|
unsigned char ucUp, ucLo, ucUp2, ucLo2;
|
|
HB_SIZE ulSize, ul;
|
|
unsigned char * buffer, * flags, * upper, * lower, * sort, * acc;
|
|
unsigned char used[ 256 ];
|
|
PHB_CODEPAGE cdp;
|
|
PHB_MULTICHAR multi;
|
|
|
|
memset( used, '\0', sizeof( used ) );
|
|
|
|
iMulti = iAcc = iSortUp = iSortLo = 0;
|
|
fError = lSort = HB_FALSE;
|
|
|
|
ucUp2 = ucLo2 = 0;
|
|
pup = pszUpper;
|
|
plo = pszLower;
|
|
for( ;; )
|
|
{
|
|
ucUp = ( unsigned char ) *pup++;
|
|
ucLo = ( unsigned char ) *plo++;
|
|
if( ucUp == 0 || ucLo == 0 )
|
|
{
|
|
if( ucUp || ucLo )
|
|
fError = HB_TRUE;
|
|
break;
|
|
}
|
|
if( ucUp == '.' )
|
|
{
|
|
if( ucLo == '.' &&
|
|
pup[ 0 ] && pup[ 1 ] &&
|
|
( pup[ 2 ] == '.' || pup[ 2 ] == '=' ) &&
|
|
plo[ 0 ] && plo[ 1 ] &&
|
|
( plo[ 2 ] == '.' || plo[ 2 ] == '=' ) )
|
|
{
|
|
ucUp = ( unsigned char ) *pup;
|
|
ucLo = ( unsigned char ) *plo;
|
|
|
|
if( ( ucUp != ' ' || ucLo != ' ' ) &&
|
|
( ucUp == *pup || ( ucUp != ' ' && *pup != ' ' ) ) &&
|
|
( ucLo == *plo || ( ucLo != ' ' && *plo != ' ' ) ) )
|
|
{
|
|
if( ucUp != ' ' )
|
|
++iSortLo;
|
|
pup += 2;
|
|
plo += 2;
|
|
if( *pup == '=' )
|
|
{
|
|
do
|
|
++pup;
|
|
while( HB_ISXDIGIT( *pup ) );
|
|
}
|
|
if( *plo == '=' )
|
|
{
|
|
do
|
|
++plo;
|
|
while( HB_ISXDIGIT( *plo ) );
|
|
}
|
|
if( *pup == '.' && *plo == '.' )
|
|
{
|
|
lSort = HB_TRUE;
|
|
iMulti++;
|
|
pup++;
|
|
plo++;
|
|
continue;
|
|
}
|
|
}
|
|
}
|
|
fError = HB_TRUE;
|
|
break;
|
|
}
|
|
if( ucUp == '~' )
|
|
{
|
|
if( ucLo != '~' || *pup == '\0' || *plo == '\0' )
|
|
{
|
|
fError = HB_TRUE;
|
|
break;
|
|
}
|
|
ucUp = ( unsigned char ) *pup++;
|
|
ucLo = ( unsigned char ) *plo++;
|
|
++iAcc;
|
|
}
|
|
if( used[ ucUp ] != 0 )
|
|
ucUp = ' ';
|
|
if( used[ ucLo ] != 0 )
|
|
ucLo = ' ';
|
|
if( ucUp == ' ' && ucLo == ' ' )
|
|
{
|
|
fError = HB_TRUE;
|
|
break;
|
|
}
|
|
if( ucUp != ' ' )
|
|
{
|
|
used[ ucUp ] = 1;
|
|
++iSortLo;
|
|
if( ucUp < ucUp2 )
|
|
lSort = HB_TRUE;
|
|
ucUp2 = ucUp;
|
|
}
|
|
if( ucLo != ' ' )
|
|
{
|
|
used[ ucLo ] = 1;
|
|
if( ucLo < ucLo2 )
|
|
lSort = HB_TRUE;
|
|
ucLo2 = ucLo;
|
|
}
|
|
}
|
|
|
|
if( iMulti > 64 )
|
|
fError = HB_TRUE;
|
|
|
|
if( fError || nACSort > HB_CDP_ACSORT_INTERLEAVED )
|
|
hb_errInternal( 9994, "Harbour CP (%s) initialization failure", id, NULL );
|
|
|
|
if( iAcc == 0 )
|
|
nACSort = HB_CDP_ACSORT_NONE;
|
|
else if( nACSort != HB_CDP_ACSORT_NONE )
|
|
lSort = HB_TRUE;
|
|
|
|
ulSize = 0x300;
|
|
if( lSort )
|
|
{
|
|
ulSize += 0x100;
|
|
if( nACSort == HB_CDP_ACSORT_INTERLEAVED )
|
|
ulSize += 0x100;
|
|
}
|
|
ul = ulSize;
|
|
ulSize += sizeof( HB_CODEPAGE );
|
|
if( iMulti )
|
|
ulSize += iMulti * sizeof( HB_MULTICHAR );
|
|
|
|
buffer = ( unsigned char * ) hb_xgrab( ulSize );
|
|
memset( buffer, '\0', ulSize );
|
|
cdp = ( PHB_CODEPAGE ) &buffer[ ul ];
|
|
cdp->buffer = buffer;
|
|
|
|
cdp->flags = flags = buffer;
|
|
buffer += 0x100;
|
|
cdp->upper = upper = buffer;
|
|
buffer += 0x100;
|
|
cdp->lower = lower = buffer;
|
|
buffer += 0x100;
|
|
sort = acc = NULL;
|
|
if( lSort )
|
|
{
|
|
cdp->sort = sort = buffer;
|
|
buffer += 0x100;
|
|
if( nACSort == HB_CDP_ACSORT_INTERLEAVED )
|
|
{
|
|
cdp->acc = acc = buffer;
|
|
buffer += 0x100;
|
|
}
|
|
}
|
|
if( iMulti )
|
|
cdp->multi = ( PHB_MULTICHAR ) &buffer[ sizeof( HB_CODEPAGE ) ];
|
|
|
|
cdp->id = id;
|
|
cdp->info = info;
|
|
cdp->uniTable = uniTable;
|
|
cdp->nACSort = nACSort;
|
|
cdp->nMulti = iMulti;
|
|
for( i = 0; i < 0x100; ++i )
|
|
{
|
|
if( HB_ISDIGIT( i ) )
|
|
flags[ i ] |= HB_CDP_DIGIT;
|
|
if( HB_ISALPHA( i ) )
|
|
flags[ i ] |= HB_CDP_ALPHA;
|
|
if( HB_ISUPPER( i ) )
|
|
flags[ i ] |= HB_CDP_UPPER;
|
|
if( HB_ISLOWER( i ) )
|
|
flags[ i ] |= HB_CDP_LOWER;
|
|
upper[ i ] = ( unsigned char ) HB_TOUPPER( i );
|
|
lower[ i ] = ( unsigned char ) HB_TOLOWER( i );
|
|
}
|
|
|
|
iAccUp = iAccLo = 0;
|
|
multi = cdp->multi;
|
|
pup = pszUpper;
|
|
plo = pszLower;
|
|
ucUp2 = ucLo2 = 255;
|
|
memset( used, '\0', sizeof( used ) );
|
|
while( *pup )
|
|
{
|
|
ucUp = ( unsigned char ) *pup++;
|
|
ucLo = ( unsigned char ) *plo++;
|
|
if( ucUp == '.' )
|
|
{
|
|
multi->cFirst[ 0 ] = *pup++;
|
|
multi->cLast [ 0 ] = *pup++;
|
|
multi->cFirst[ 1 ] = *plo++;
|
|
multi->cLast [ 1 ] = *plo++;
|
|
if( multi->cFirst[ 0 ] != ' ' )
|
|
{
|
|
flags[ ( unsigned char ) multi->cFirst[ 0 ] ] |= HB_CDP_MULTI1;
|
|
flags[ ( unsigned char ) multi->cLast [ 0 ] ] |= HB_CDP_MULTI2;
|
|
multi->sortUp = ++iSortUp;
|
|
}
|
|
if( multi->cFirst[ 1 ] != ' ' )
|
|
{
|
|
flags[ ( unsigned char ) multi->cFirst[ 1 ] ] |= HB_CDP_MULTI1;
|
|
flags[ ( unsigned char ) multi->cLast [ 1 ] ] |= HB_CDP_MULTI2;
|
|
multi->sortLo = ++iSortLo;
|
|
}
|
|
if( *pup == '=' )
|
|
{
|
|
++pup;
|
|
while( HB_ISXDIGIT( *pup ) )
|
|
{
|
|
multi->wcUp = ( multi->wcUp << 4 ) |
|
|
( *pup >= 'a' ? ( *pup - 'a' + 10 ) :
|
|
( *pup >= 'A' ? ( *pup - 'A' + 10 ) :
|
|
( *pup - '0' ) ) );
|
|
++pup;
|
|
}
|
|
}
|
|
pup++;
|
|
if( *plo == '=' )
|
|
{
|
|
++plo;
|
|
while( HB_ISXDIGIT( *plo ) )
|
|
{
|
|
multi->wcLo = ( multi->wcLo << 4 ) |
|
|
( *plo >= 'a' ? ( *plo - 'a' + 10 ) :
|
|
( *plo >= 'A' ? ( *plo - 'A' + 10 ) :
|
|
( *plo - '0' ) ) );
|
|
++plo;
|
|
}
|
|
}
|
|
plo++;
|
|
if( multi->wcUp || multi->wcLo )
|
|
cdp->nMultiUC++;
|
|
multi++;
|
|
}
|
|
else
|
|
{
|
|
iAcc = 0;
|
|
if( ucUp == '~' )
|
|
{
|
|
iAcc = 1;
|
|
ucUp = ( unsigned char ) *pup++;
|
|
ucLo = ( unsigned char ) *plo++;
|
|
}
|
|
if( ucUp != ' ' )
|
|
{
|
|
flags[ ucUp ] |= HB_CDP_ALPHA;
|
|
flags[ ucUp ] |= HB_CDP_UPPER;
|
|
if( ucLo != ' ' && ( used[ ucUp ] & HB_CDP_UPPER ) == 0 )
|
|
{
|
|
lower[ ucUp ] = ucLo;
|
|
used[ ucUp ] |= HB_CDP_UPPER;
|
|
}
|
|
if( sort )
|
|
{
|
|
if( sort[ ucUp ] == 0 )
|
|
{
|
|
if( iAcc && nACSort != HB_CDP_ACSORT_NONE )
|
|
++iAccUp;
|
|
sort[ ucUp ] = ++iSortUp - iAccUp;
|
|
if( acc )
|
|
acc[ ucUp ] = iSortUp;
|
|
if( ucUp2 > ucUp )
|
|
ucUp2 = ucUp;
|
|
}
|
|
}
|
|
}
|
|
if( ucLo != ' ' )
|
|
{
|
|
flags[ ucLo ] |= HB_CDP_ALPHA;
|
|
flags[ ucLo ] |= HB_CDP_LOWER;
|
|
if( ucUp != ' ' && ( used[ ucLo ] & HB_CDP_LOWER ) == 0 )
|
|
{
|
|
upper[ ucLo ] = ucUp;
|
|
used[ ucLo ] |= HB_CDP_LOWER;
|
|
}
|
|
if( sort )
|
|
{
|
|
if( sort[ ucLo ] == 0 )
|
|
{
|
|
if( iAcc && nACSort != HB_CDP_ACSORT_NONE )
|
|
++iAccLo;
|
|
sort[ ucLo ] = ++iSortLo - iAccLo;
|
|
if( acc )
|
|
acc[ ucLo ] = iSortLo;
|
|
if( ucLo2 > ucLo )
|
|
ucLo2 = ucLo;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
if( sort )
|
|
{
|
|
int iUp, iLo, iSort1, iSort2, iSort3, iAdd;
|
|
|
|
if( iMulti > 0 )
|
|
{
|
|
if( iMulti > ucUp2 || iMulti > ucLo2 )
|
|
hb_errInternal( 9994, "Harbour CP (%s) initialization failure", id, NULL );
|
|
|
|
if( iMulti <= 32 )
|
|
iMulti = 33;
|
|
else
|
|
iMulti = 65;
|
|
}
|
|
else
|
|
iMulti = 1;
|
|
|
|
for( iUp = iLo = 0, i = iMulti; i < 256; ++i )
|
|
{
|
|
if( sort[ i ] == 0 )
|
|
{
|
|
if( i < ( int ) ucUp2 )
|
|
++iUp;
|
|
else if( i < ( int ) ucLo2 )
|
|
++iLo;
|
|
}
|
|
}
|
|
for( iSort1 = iSort2 = iSort3 = 0, i = iMulti; i < 256; ++i )
|
|
{
|
|
if( sort[ i ] == 0 )
|
|
{
|
|
if( i < ( int ) ucUp2 )
|
|
iAdd = ++iSort1;
|
|
else if( i < ( int ) ucLo2 )
|
|
iAdd = ++iSort2 + iSortUp + iUp;
|
|
else
|
|
iAdd = ++iSort3 + iUp + iSortLo + iLo;
|
|
}
|
|
else if( sort[ i ] <= iSortUp )
|
|
iAdd = iUp;
|
|
else
|
|
iAdd = iUp + iLo;
|
|
|
|
sort[ i ] += iAdd;
|
|
if( acc )
|
|
acc[ i ] += iAdd;
|
|
}
|
|
}
|
|
|
|
return cdp;
|
|
}
|
|
|
|
static PHB_CODEPAGE * hb_cdpFindPos( const char * id )
|
|
{
|
|
PHB_CODEPAGE * cdp_ptr;
|
|
|
|
if( s_cdpList == NULL )
|
|
{
|
|
unsigned char * flags, * upper, * lower;
|
|
int i;
|
|
|
|
s_en_codepage.buffer = ( unsigned char * ) hb_xgrab( 0x300 );
|
|
memset( s_en_codepage.buffer, '\0', 0x300 );
|
|
s_en_codepage.flags = flags = ( unsigned char * ) s_en_codepage.buffer;
|
|
s_en_codepage.upper = upper = ( unsigned char * ) s_en_codepage.buffer + 0x100;
|
|
s_en_codepage.lower = lower = ( unsigned char * ) s_en_codepage.buffer + 0x200;
|
|
for( i = 0; i < 0x100; ++i )
|
|
{
|
|
if( HB_ISDIGIT( i ) )
|
|
flags[ i ] |= HB_CDP_DIGIT;
|
|
if( HB_ISALPHA( i ) )
|
|
flags[ i ] |= HB_CDP_ALPHA;
|
|
if( HB_ISUPPER( i ) )
|
|
flags[ i ] |= HB_CDP_UPPER;
|
|
if( HB_ISLOWER( i ) )
|
|
flags[ i ] |= HB_CDP_LOWER;
|
|
upper[ i ] = ( unsigned char ) HB_TOUPPER( i );
|
|
lower[ i ] = ( unsigned char ) HB_TOLOWER( i );
|
|
}
|
|
s_utf8_codepage.flags = s_en_codepage.flags;
|
|
s_utf8_codepage.upper = s_en_codepage.upper;
|
|
s_utf8_codepage.lower = s_en_codepage.lower;
|
|
s_cdpList = &s_en_codepage;
|
|
}
|
|
|
|
cdp_ptr = &s_cdpList;
|
|
|
|
if( id )
|
|
{
|
|
while( *cdp_ptr )
|
|
{
|
|
if( strcmp( ( *cdp_ptr )->id, id ) == 0 )
|
|
break;
|
|
cdp_ptr = &( *cdp_ptr )->next;
|
|
}
|
|
}
|
|
|
|
return cdp_ptr;
|
|
}
|
|
|
|
HB_BOOL hb_cdpRegisterRaw( PHB_CODEPAGE cdp )
|
|
{
|
|
PHB_CODEPAGE * cdp_ptr;
|
|
|
|
HB_TRACE( HB_TR_DEBUG, ( "hb_cdpRegisterRaw(%p)", cdp ) );
|
|
|
|
cdp_ptr = hb_cdpFindPos( cdp->id );
|
|
if( *cdp_ptr == NULL )
|
|
{
|
|
*cdp_ptr = cdp;
|
|
return HB_TRUE;
|
|
}
|
|
return HB_FALSE;
|
|
}
|
|
|
|
HB_BOOL hb_cdpRegisterNew( const char * id, const char * info,
|
|
PHB_UNITABLE uniTable,
|
|
const char * pszUpper, const char * pszLower,
|
|
unsigned int nACSort )
|
|
{
|
|
PHB_CODEPAGE * cdp_ptr;
|
|
|
|
HB_TRACE( HB_TR_DEBUG, ( "hb_cdpRegisterNew(%s,%s,%s,%s,%d)", id, info, pszUpper, pszLower, nACSort ) );
|
|
|
|
cdp_ptr = hb_cdpFindPos( id );
|
|
if( *cdp_ptr == NULL )
|
|
{
|
|
*cdp_ptr = hb_buildCodePage( id, info, uniTable, pszUpper, pszLower, nACSort );
|
|
return *cdp_ptr != NULL;
|
|
}
|
|
return HB_FALSE;
|
|
}
|
|
|
|
void hb_cdpReleaseAll( void )
|
|
{
|
|
HB_TRACE( HB_TR_DEBUG, ( "hb_cdpReleaseAll()" ) );
|
|
|
|
while( s_cdpList )
|
|
{
|
|
void * buffer = s_cdpList->buffer;
|
|
if( s_cdpList->uniTable->uniTrans )
|
|
{
|
|
hb_xfree( s_cdpList->uniTable->uniTrans );
|
|
s_cdpList->uniTable->uniTrans = NULL;
|
|
}
|
|
s_cdpList = s_cdpList->next;
|
|
if( buffer )
|
|
hb_xfree( buffer );
|
|
}
|
|
}
|
|
|
|
PHB_CODEPAGE hb_cdpFind( const char * id )
|
|
{
|
|
HB_TRACE( HB_TR_DEBUG, ( "hb_cdpFind(%s)", id ) );
|
|
|
|
return id ? * hb_cdpFindPos( id ) : NULL;
|
|
}
|
|
|
|
PHB_CODEPAGE hb_cdpFindExt( const char * id )
|
|
{
|
|
PHB_CODEPAGE cdp = NULL;
|
|
|
|
HB_TRACE( HB_TR_DEBUG, ( "hb_cdpFindExt(%s)", id ) );
|
|
|
|
if( id )
|
|
{
|
|
cdp = * hb_cdpFindPos( id );
|
|
if( cdp == NULL && strcmp( id, "UTF8" ) == 0 )
|
|
return &s_utf8_codepage;
|
|
}
|
|
return cdp;
|
|
}
|
|
|
|
PHB_CODEPAGE hb_cdpSelect( PHB_CODEPAGE cdp )
|
|
{
|
|
PHB_CODEPAGE cdpOld;
|
|
|
|
HB_TRACE( HB_TR_DEBUG, ( "hb_cdpSelect(%p)", cdp ) );
|
|
|
|
cdpOld = hb_vmCDP();
|
|
if( cdp )
|
|
hb_vmSetCDP( cdp );
|
|
|
|
return cdpOld;
|
|
}
|
|
|
|
const char * hb_cdpID( void )
|
|
{
|
|
PHB_CODEPAGE cdp;
|
|
|
|
HB_TRACE( HB_TR_DEBUG, ( "hb_cdpID()" ) );
|
|
|
|
cdp = hb_vmCDP();
|
|
|
|
return cdp ? cdp->id : NULL;
|
|
}
|
|
|
|
const char * hb_cdpSelectID( const char * id )
|
|
{
|
|
const char * idOld;
|
|
|
|
HB_TRACE( HB_TR_DEBUG, ( "hb_cdpSelectID(%s)", id ) );
|
|
|
|
idOld = hb_cdpID();
|
|
hb_cdpSelect( hb_cdpFind( id ) );
|
|
|
|
return idOld;
|
|
}
|
|
|
|
#ifdef HB_LEGACY_LEVEL2
|
|
void hb_cdpnTranslate( char * psz, PHB_CODEPAGE cdpIn, PHB_CODEPAGE cdpOut, HB_SIZE nChars )
|
|
{
|
|
if( cdpIn && cdpOut && cdpIn->uniTable != cdpOut->uniTable )
|
|
{
|
|
HB_SIZE ulDst = nChars;
|
|
char * pDst = psz;
|
|
|
|
if( cdpOut == &s_utf8_codepage || cdpOut->nMultiUC )
|
|
{
|
|
ulDst = hb_cdpTransLen( psz, nChars, 0, cdpIn, cdpOut );
|
|
pDst = ( char * ) hb_xgrab( ulDst );
|
|
}
|
|
ulDst = hb_cdpTransTo( psz, nChars, pDst, ulDst, cdpIn, cdpOut );
|
|
if( psz != pDst )
|
|
{
|
|
if( ulDst > nChars )
|
|
ulDst = nChars;
|
|
memcpy( psz, pDst, ulDst );
|
|
hb_xfree( pDst );
|
|
}
|
|
if( ulDst < nChars )
|
|
memset( psz + ulDst, '\0', nChars - ulDst );
|
|
}
|
|
}
|
|
|
|
void hb_cdpTranslate( char * psz, PHB_CODEPAGE cdpIn, PHB_CODEPAGE cdpOut )
|
|
{
|
|
hb_cdpnTranslate( psz, cdpIn, cdpOut, strlen( psz ) );
|
|
}
|
|
#endif
|
|
|
|
/* TOFIX: Move this to cdpapihb.c */
|
|
HB_FUNC( HB_CDPLIST )
|
|
{
|
|
PHB_CODEPAGE cdp;
|
|
int iCount;
|
|
|
|
cdp = s_cdpList;
|
|
iCount = 0;
|
|
while( cdp )
|
|
{
|
|
++iCount;
|
|
cdp = cdp->next;
|
|
}
|
|
|
|
hb_reta( iCount );
|
|
cdp = s_cdpList;
|
|
iCount = 0;
|
|
while( cdp )
|
|
{
|
|
hb_storvc( cdp->id, -1, ++iCount );
|
|
cdp = cdp->next;
|
|
}
|
|
}
|