/* * $Id$ */ /* * Harbour Project source code: * The CodePages API * * Copyright 2002 Alexander S.Kresin * Copyright 2009 Przemyslaw Czerpak * www - http://www.harbour-project.org * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2, or (at your option) * any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this software; see the file COPYING. If not, write to * the Free Software Foundation, Inc., 59 Temple Place, Suite 330, * Boston, MA 02111-1307 USA (or visit the web site http://www.gnu.org/). * * As a special exception, the Harbour Project gives permission for * additional uses of the text contained in its release of Harbour. * * The exception is that, if you link the Harbour libraries with other * files to produce an executable, this does not by itself cause the * resulting executable to be covered by the GNU General Public License. * Your use of that executable is in no way restricted on account of * linking the Harbour library code into it. * * This exception does not however invalidate any other reasons why * the executable file might be covered by the GNU General Public License. * * This exception applies only to the code released by the Harbour * Project under the name Harbour. If you copy code from other * Harbour Project or Free Software Foundation releases into a copy of * Harbour, as the General Public License permits, the exception does * not apply to the code that you add in this way. To avoid misleading * anyone as to the status of such modified files, you must delete * this exception notice from them. * * If you write modifications of your own for Harbour, it is your choice * whether to permit this exception to apply to your modifications. * If you do not wish that, delete this exception notice. * */ #include "hbapi.h" #include "hbvm.h" #include "hbapierr.h" #include "hbapicdp.h" #include "hbthread.h" /* character flags */ #define HB_CDP_DIGIT 0x01 #define HB_CDP_ALPHA 0x02 #define HB_CDP_LOWER 0x04 #define HB_CDP_UPPER 0x08 #define HB_CDP_MULTI1 0x10 #define HB_CDP_MULTI2 0x20 /* MT macros */ #define HB_CDP_LOCK hb_threadEnterCriticalSection( &s_cdpMtx ); #define HB_CDP_UNLOCK hb_threadLeaveCriticalSection( &s_cdpMtx ); static HB_CRITICAL_NEW( s_cdpMtx ); #define NUMBER_OF_CHARS 256 static const HB_WCHAR s_uniCodes[ NUMBER_OF_CHARS ] = { 0x0020, 0x263A, 0x263B, 0x2665, 0x2666, 0x2663, 0x2660, 0x2022, 0x25D8, 0x25CB, 0x25D9, 0x2642, 0x2640, 0x266A, 0x266B, 0x263C, 0x25BA, 0x25C4, 0x2195, 0x203C, 0x00B6, 0x00A7, 0x25AC, 0x21A8, 0x2191, 0x2193, 0x2192, 0x2190, 0x2319, 0x2194, 0x25B2, 0x25BC, 0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0027, 0x0028, 0x0029, 0x002A, 0x002B, 0x002C, 0x002D, 0x002E, 0x002F, 0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037, 0x0038, 0x0039, 0x003A, 0x003B, 0x003C, 0x003D, 0x003E, 0x003F, 0x0040, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x005B, 0x005C, 0x005D, 0x005E, 0x005F, 0x0060, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067, 0x0068, 0x0069, 0x006A, 0x006B, 0x006C, 0x006D, 0x006E, 0x006F, 0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077, 0x0078, 0x0079, 0x007A, 0x007B, 0x007C, 0x007D, 0x007E, 0x007F, 0x00C7, 0x00FC, 0x00E9, 0x00E2, 0x00E4, 0x00E0, 0x00E5, 0x00E7, 0x00EA, 0x00EB, 0x00E8, 0x00EF, 0x00EE, 0x00EC, 0x00C4, 0x00C5, 0x00C9, 0x00E6, 0x00C6, 0x00F4, 0x00F6, 0x00F2, 0x00FB, 0x00F9, 0x00FF, 0x00D6, 0x00DC, 0x00A2, 0x00A3, 0x00A5, 0x20A7, 0x0192, 0x00E1, 0x00ED, 0x00F3, 0x00FA, 0x00F1, 0x00D1, 0x00AA, 0x00BA, 0x00BF, 0x2310, 0x00AC, 0x00BD, 0x00BC, 0x00A1, 0x00AB, 0x00BB, 0x2591, 0x2592, 0x2593, 0x2502, 0x2524, 0x2561, 0x2562, 0x2556, 0x2555, 0x2563, 0x2551, 0x2557, 0x255D, 0x255C, 0x255B, 0x2510, 0x2514, 0x2534, 0x252C, 0x251C, 0x2500, 0x253C, 0x255E, 0x255F, 0x255A, 0x2554, 0x2569, 0x2566, 0x2560, 0x2550, 0x256C, 0x2567, 0x2568, 0x2564, 0x2565, 0x2559, 0x2558, 0x2552, 0x2553, 0x256B, 0x256A, 0x2518, 0x250C, 0x2588, 0x2584, 0x258C, 0x2590, 0x2580, 0x03B1, 0x00DF, 0x0393, 0x03C0, 0x03A3, 0x03C3, 0x00B5, 0x03C4, 0x03A6, 0x0398, 0x03A9, 0x03B4, 0x221E, 0x03C6, 0x03B5, 0x2229, 0x2261, 0x00B1, 0x2265, 0x2264, 0x2320, 0x2321, 0x00F7, 0x2248, 0x00B0, 0x2219, 0x00B7, 0x221A, 0x207F, 0x00B2, 0x25A0, 0x00A0 }; HB_UNITABLE hb_uniTbl_437 = { HB_CPID_437, s_uniCodes, NULL, 0 }; static HB_CODEPAGE s_en_codepage = { "EN", "English CP-437", HB_UNITB_437, NULL, NULL, NULL, NULL, NULL, 0, 0, 0, NULL, NULL, NULL }; HB_UNITABLE hb_uniTbl_UTF8 = { HB_CPID_437, s_uniCodes, NULL, 0 }; /* pseudo codepage for translations only */ static HB_CODEPAGE s_utf8_codepage = { "UTF8", "UTF-8", &hb_uniTbl_UTF8, NULL, NULL, NULL, NULL, NULL, 0, 0, 0, NULL, NULL, NULL }; HB_CODEPAGE_ANNOUNCE( EN ) static PHB_CODEPAGE s_cdpList = NULL; /* * basic CP functions */ HB_BOOL hb_cdpIsDigit( PHB_CODEPAGE cdp, int iChar ) { if( cdp ) return ( cdp->flags[ iChar & 0x0ff ] & HB_CDP_DIGIT ) != 0; else return HB_ISDIGIT( iChar ); } HB_BOOL hb_cdpIsAlpha( PHB_CODEPAGE cdp, int iChar ) { if( cdp ) return ( cdp->flags[ iChar & 0x0ff ] & HB_CDP_ALPHA ) != 0; else return HB_ISALPHA( iChar ); } HB_BOOL hb_cdpIsLower( PHB_CODEPAGE cdp, int iChar ) { if( cdp ) return ( cdp->flags[ iChar & 0x0ff ] & HB_CDP_LOWER ) != 0; else return HB_ISLOWER( iChar ); } HB_BOOL hb_cdpIsUpper( PHB_CODEPAGE cdp, int iChar ) { if( cdp ) return ( cdp->flags[ iChar & 0x0ff ] & HB_CDP_UPPER ) != 0; else return HB_ISUPPER( iChar ); } HB_BOOL hb_charIsDigit( int iChar ) { PHB_CODEPAGE cdp = hb_vmCDP(); if( cdp ) return ( cdp->flags[ iChar & 0x0ff ] & HB_CDP_DIGIT ) != 0; else return HB_ISDIGIT( iChar ); } HB_BOOL hb_charIsAlpha( int iChar ) { PHB_CODEPAGE cdp = hb_vmCDP(); if( cdp ) return ( cdp->flags[ iChar & 0x0ff ] & HB_CDP_ALPHA ) != 0; else return HB_ISALPHA( iChar ); } HB_BOOL hb_charIsLower( int iChar ) { PHB_CODEPAGE cdp = hb_vmCDP(); if( cdp ) return ( cdp->flags[ iChar & 0x0ff ] & HB_CDP_LOWER ) != 0; else return HB_ISLOWER( iChar ); } HB_BOOL hb_charIsUpper( int iChar ) { PHB_CODEPAGE cdp = hb_vmCDP(); if( cdp ) return ( cdp->flags[ iChar & 0x0ff ] & HB_CDP_UPPER ) != 0; else return HB_ISUPPER( iChar ); } int hb_charLower( int iChar ) { PHB_CODEPAGE cdp = hb_vmCDP(); if( cdp ) return cdp->lower[ iChar & 0x0ff ]; else return HB_TOLOWER( iChar ); } int hb_charUpper( int iChar ) { PHB_CODEPAGE cdp = hb_vmCDP(); if( cdp ) return cdp->upper[ iChar & 0x0ff ]; else return HB_TOUPPER( iChar ); } char * hb_strLower( char * szText, HB_SIZE ulLen ) { HB_TRACE(HB_TR_DEBUG, ("hb_strLower(%s, %lu)", szText, ulLen)); { PHB_CODEPAGE cdp = hb_vmCDP(); HB_SIZE u; if( cdp ) for( u = 0; u < ulLen; u++ ) szText[ u ] = ( char ) cdp->lower[ ( unsigned char ) szText[ u ] ]; else for( u = 0; u < ulLen; u++ ) szText[ u ] = HB_TOLOWER( szText[ u ] ); } return szText; } char * hb_strUpper( char * szText, HB_SIZE ulLen ) { HB_TRACE(HB_TR_DEBUG, ("hb_strUpper(%s, %lu)", szText, ulLen)); { PHB_CODEPAGE cdp = hb_vmCDP(); HB_SIZE u; if( cdp ) for( u = 0; u < ulLen; u++ ) szText[ u ] = ( char ) cdp->upper[ ( unsigned char ) szText[ u ] ]; else for( u = 0; u < ulLen; u++ ) szText[ u ] = HB_TOUPPER( szText[ u ] ); } return szText; } /* * comparison */ int hb_cdpchrcmp( char cFirst, char cSecond, PHB_CODEPAGE cdp ) { if( cFirst == cSecond ) return 0; if( cdp->sort ) { int n1 = cdp->sort[ ( unsigned char ) cFirst ], n2 = cdp->sort[ ( unsigned char ) cSecond ]; if( !cdp->nMulti || ( n1 != 0 && n2 != 0 ) ) { if( n1 == n2 ) { if( cdp->acc ) { n1 = cdp->acc[ ( unsigned char ) cFirst ]; n2 = cdp->acc[ ( unsigned char ) cSecond ]; } else return 0; } return ( n1 < n2 ) ? -1 : 1; } } return ( ( unsigned char ) cFirst < ( unsigned char ) cSecond ) ? -1 : 1; } static int hb_cdpMultiWeight( PHB_CODEPAGE cdp, const char * szChar ) { PHB_MULTICHAR pmulti = cdp->multi; int i; for( i = cdp->nMulti; i; --i, ++pmulti ) { if( ( szChar[ 0 ] == pmulti->cFirst[ 0 ] || szChar[ 0 ] == pmulti->cFirst[ 1 ] ) && ( szChar[ 1 ] == pmulti->cLast[ 0 ] || szChar[ 1 ] == pmulti->cLast[ 1 ] ) ) { return ( szChar[ 0 ] == pmulti->cFirst[ 0 ] ) ? pmulti->sortUp : pmulti->sortLo; } } return 0; } int hb_cdpcmp( const char * szFirst, HB_SIZE ulLenFirst, const char * szSecond, HB_SIZE ulLenSecond, PHB_CODEPAGE cdp, HB_BOOL fExact ) { int iRet = 0, iAcc = 0, n, n1, n2; HB_SIZE ul, ulLen; ulLen = ulLenFirst < ulLenSecond ? ulLenFirst : ulLenSecond; if( cdp->sort == NULL ) { iRet = memcmp( szFirst, szSecond, ulLen ); } else if( cdp->nMulti ) { for( ul = 0; ul < ulLen; ++szFirst, ++szSecond, ++ul ) { unsigned char u1 = ( unsigned char ) * szFirst; unsigned char u2 = ( unsigned char ) * szSecond; n1 = cdp->sort[ u1 ]; if( ( cdp->flags[ u1 ] & HB_CDP_MULTI1 ) != 0 && ( ul < ulLenFirst - 1 ) && ( cdp->flags[ ( unsigned char ) szFirst[ 1 ] ] & HB_CDP_MULTI2 ) != 0 ) { n = hb_cdpMultiWeight( cdp, szFirst ); if( n != 0 ) { n1 = n; ++szFirst; if( --ulLenFirst < ulLen ) ulLen = ulLenFirst; } } n2 = cdp->sort[ u2 ]; if( ( cdp->flags[ u2 ] & HB_CDP_MULTI1 ) != 0 && ( ul < ulLenSecond - 1 ) && ( cdp->flags[ ( unsigned char ) szSecond[ 1 ] ] & HB_CDP_MULTI2 ) != 0 ) { n = hb_cdpMultiWeight( cdp, szSecond ); if( n != 0 ) { n2 = n; ++szSecond; if( --ulLenSecond < ulLen ) ulLen = ulLenSecond; } } if( n1 != n2 ) { if( n1 == 0 || n2 == 0 ) /* One of characters doesn't belong to the national characters */ iRet = ( u1 < u2 ) ? -1 : 1; else iRet = ( n1 < n2 ) ? -1 : 1; break; } else if( u1 != u2 ) { if( n1 == 0 ) { iRet = ( u1 < u2 ) ? -1 : 1; break; } if( iAcc == 0 && ( fExact || ( ulLenFirst == ulLenSecond && cdp->acc ) ) ) { if( cdp->acc ) iAcc = ( cdp->acc[ u1 ] < cdp->acc[ u2 ] ) ? -1 : 1; else iAcc = ( u1 < u2 ) ? -1 : 1; } } } } else { for( ul = 0; ul < ulLen; ++szFirst, ++szSecond, ++ul ) { if( *szFirst != *szSecond ) { n1 = ( unsigned char ) cdp->sort[ ( unsigned char ) * szFirst ]; n2 = ( unsigned char ) cdp->sort[ ( unsigned char ) * szSecond ]; if( n1 != n2 ) { iRet = ( n1 < n2 ) ? -1 : 1; break; } if( iAcc == 0 && ( fExact || ( ulLenFirst == ulLenSecond && cdp->acc ) ) ) { if( cdp->acc ) iAcc = ( cdp->acc[ ( unsigned char ) * szFirst ] < cdp->acc[ ( unsigned char ) * szSecond ] ) ? -1 : 1; else iAcc = ( ( unsigned char ) * szFirst < ( unsigned char ) * szSecond ) ? -1 : 1; } } } } if( !iRet ) { if( iAcc ) iRet = iAcc; else if( ulLenSecond > ulLenFirst ) iRet = -1; else if( fExact && ulLenSecond < ulLenFirst ) iRet = 1; } return iRet; } static int hb_cdpMultiWeightI( PHB_CODEPAGE cdp, const char * szChar ) { PHB_MULTICHAR pmulti = cdp->multi; int i; for( i = cdp->nMulti; i; --i, ++pmulti ) { if( ( szChar[ 0 ] == pmulti->cFirst[ 0 ] || szChar[ 0 ] == pmulti->cFirst[ 1 ] ) && ( szChar[ 1 ] == pmulti->cLast[ 0 ] || szChar[ 1 ] == pmulti->cLast[ 1 ] ) ) { return pmulti->sortUp; } } return 0; } int hb_cdpicmp( const char * szFirst, HB_SIZE ulLenFirst, const char * szSecond, HB_SIZE ulLenSecond, PHB_CODEPAGE cdp, HB_BOOL fExact ) { int iRet = 0, iAcc = 0, n, n1, n2, u1, u2; HB_SIZE ul, ulLen; ulLen = ulLenFirst < ulLenSecond ? ulLenFirst : ulLenSecond; if( cdp->sort == NULL ) { for( ul = 0; ul < ulLen; ++szFirst, ++szSecond, ++ul ) { u1 = cdp->upper[ ( unsigned char ) * szFirst ]; u2 = cdp->upper[ ( unsigned char ) * szSecond ]; if( u1 != u2 ) { iRet = ( u1 < u2 ) ? -1 : 1; break; } } } else if( cdp->nMulti ) { for( ul = 0; ul < ulLen; ++szFirst, ++szSecond, ++ul ) { u1 = cdp->upper[ ( unsigned char ) * szFirst ]; u2 = cdp->upper[ ( unsigned char ) * szSecond ]; if( ( cdp->flags[ u1 ] & HB_CDP_MULTI1 ) != 0 && ( ul < ulLenFirst - 1 ) && ( cdp->flags[ ( unsigned char ) szFirst[ 1 ] ] & HB_CDP_MULTI2 ) != 0 ) { n = hb_cdpMultiWeightI( cdp, szFirst ); if( n != 0 ) { n1 = n; ++szFirst; if( --ulLenFirst < ulLen ) ulLen = ulLenFirst; } else n1 = cdp->sort[ u1 ]; } else n1 = cdp->sort[ u1 ]; if( ( cdp->flags[ u2 ] & HB_CDP_MULTI1 ) != 0 && ( ul < ulLenSecond - 1 ) && ( cdp->flags[ ( unsigned char ) szSecond[ 1 ] ] & HB_CDP_MULTI2 ) != 0 ) { n = hb_cdpMultiWeightI( cdp, szSecond ); if( n != 0 ) { n2 = n; ++szSecond; if( --ulLenSecond < ulLen ) ulLen = ulLenSecond; } else n2 = cdp->sort[ u2 ]; } else n2 = cdp->sort[ u2 ]; if( n1 != n2 ) { if( n1 == 0 || n2 == 0 ) /* One of characters doesn't belong to the national characters */ iRet = ( u1 < u2 ) ? -1 : 1; else iRet = ( n1 < n2 ) ? -1 : 1; break; } else if( u1 != u2 ) { if( n1 == 0 ) { iRet = ( u1 < u2 ) ? -1 : 1; break; } if( iAcc == 0 && ( fExact || ( ulLenFirst == ulLenSecond && cdp->acc ) ) ) { if( cdp->acc ) iAcc = ( cdp->acc[ u1 ] < cdp->acc[ u2 ] ) ? -1 : 1; else iAcc = ( u1 < u2 ) ? -1 : 1; } } } } else { for( ul = 0; ul < ulLen; ++szFirst, ++szSecond, ++ul ) { u1 = cdp->upper[ ( unsigned char ) * szFirst ]; u2 = cdp->upper[ ( unsigned char ) * szSecond ]; if( u1 != u2 ) { n1 = ( unsigned char ) cdp->sort[ u1 ]; n2 = ( unsigned char ) cdp->sort[ u2 ]; if( n1 != n2 ) { iRet = ( n1 < n2 ) ? -1 : 1; break; } if( iAcc == 0 && ( fExact || ( ulLenFirst == ulLenSecond && cdp->acc ) ) ) { if( cdp->acc ) iAcc = ( cdp->acc[ u1 ] < cdp->acc[ u2 ] ) ? -1 : 1; else iAcc = ( u1 < u2 ) ? -1 : 1; } } } } if( !iRet ) { if( iAcc ) iRet = iAcc; else if( ulLenSecond > ulLenFirst ) iRet = -1; else if( fExact && ulLenSecond < ulLenFirst ) iRet = 1; } return iRet; } /* * conversions */ static void hb_cdpBuildTransTable( PHB_UNITABLE uniTable ) { HB_WCHAR wcMax = 0; int i; HB_CDP_LOCK if( uniTable->uniTrans == NULL ) { unsigned char * uniTrans; for( i = 0; i < 256; ++i ) { HB_WCHAR wc = uniTable->uniCodes[ i ]; if( wc > wcMax ) wcMax = wc; } uniTrans = ( unsigned char * ) hb_xgrab( ( wcMax + 1 ) * sizeof( unsigned char ) ); memset( uniTrans, '\0', ( wcMax + 1 ) * sizeof( unsigned char ) ); for( i = 0; i < 256; ++i ) uniTrans[ uniTable->uniCodes[ i ] ] = ( unsigned char ) i; uniTable->wcMax = wcMax; uniTable->uniTrans = uniTrans; } HB_CDP_UNLOCK } /* * UTF8 conversions */ int hb_cdpUTF8CharSize( HB_WCHAR wc ) { if( wc < 0x0080 ) return 1; else if( wc < 0x0800 ) return 2; else /* if( wc <= 0xffff ) */ return 3; } int hb_cdpU16CharToUTF8( char * szUTF8, HB_WCHAR wc ) { int n; if( wc < 0x0080 ) { szUTF8[0] = wc & 0xff; n = 1; } else if( wc < 0x0800 ) { szUTF8[0] = 0xc0 | ( ( wc >> 6 ) & 0x1f ); szUTF8[1] = 0x80 | ( wc & 0x3f ); n = 2; } else /* if( wc <= 0xffff ) */ { szUTF8[0] = 0xe0 | ( ( wc >> 12 ) & 0x0f ); szUTF8[1] = 0x80 | ( ( wc >> 6 ) & 0x3f ); szUTF8[2] = 0x80 | ( wc & 0x3f ); n = 3; } /* else { n = 0; } */ return n; } HB_BOOL hb_cdpUTF8ToU16NextChar( unsigned char ucChar, int * n, HB_WCHAR * pwc ) { if( *n > 0 ) { if( ( ucChar & 0xc0 ) != 0x80 ) return HB_FALSE; *pwc = ( *pwc << 6 ) | ( ucChar & 0x3f ); ( *n )--; return HB_TRUE; } *n = 0; *pwc = ucChar; if( ucChar >= 0xc0 ) { if( ucChar < 0xe0 ) { *pwc &= 0x1f; *n = 1; } else if( ucChar < 0xf0 ) { *pwc &= 0x0f; *n = 2; } else if( ucChar < 0xf8 ) { *pwc &= 0x07; *n = 3; } else if( ucChar < 0xfc ) { *pwc &= 0x03; *n = 4; } else if( ucChar < 0xfe ) { *pwc &= 0x01; *n = 5; } } return HB_TRUE; } HB_SIZE hb_cdpUTF8StringLength( const char * pSrc, HB_SIZE ulLen ) { HB_SIZE ul, ulDst; HB_WCHAR uc; int n = 0; for( ul = ulDst = 0; ul < ulLen; ++ul ) { if( hb_cdpUTF8ToU16NextChar( ( UCHAR ) pSrc[ ul ], &n, &uc ) ) { if( n == 0 ) ++ulDst; } } return ulDst; } HB_SIZE hb_cdpUTF8StringPeek( const char * pSrc, HB_SIZE ulLen, HB_SIZE ulPos ) { if( ulLen ) { HB_SIZE ul; HB_WCHAR uc = 0; int n = 0; for( ul = 0; ul < ulLen && ulPos; ++ul ) { if( hb_cdpUTF8ToU16NextChar( ( UCHAR ) pSrc[ ul ], &n, &uc ) ) { if( n == 0 ) --ulPos; } } if( ul < ulLen ) { n = 0; do { if( hb_cdpUTF8ToU16NextChar( ( UCHAR ) pSrc[ ul ], &n, &uc ) ) { if( n == 0 ) return uc; } } while( ++ul < ulLen ); } } return 0; } /* caller must free the returned buffer if not NULL */ char * hb_cdpUTF8StringSubstr( const char * pSrc, HB_SIZE ulLen, HB_SIZE ulFrom, HB_SIZE ulCount, HB_SIZE * pulDest ) { HB_SIZE ul, ulCnt, ulDst = 0; HB_WCHAR uc; int n; char * pDst = NULL; if( ulCount && ulLen ) { n = 0; for( ul = 0; ul < ulLen && ulFrom; ++ul ) { if( hb_cdpUTF8ToU16NextChar( pSrc[ ul ], &n, &uc ) ) { if( n == 0 ) --ulFrom; } } if( ul < ulLen ) { ulFrom = ul; ulCnt = ulCount; n = 0; do { if( hb_cdpUTF8ToU16NextChar( pSrc[ ul ], &n, &uc ) ) { if( n == 0 ) --ulCnt; } } while( ++ul < ulLen && ulCnt ); ulDst = ul - ulFrom; pDst = ( char * ) hb_xgrab( ulDst + 1 ); memcpy( pDst, &pSrc[ ulFrom ], ulDst ); pDst[ ulDst ] = '\0'; } } if( pulDest ) *pulDest = ulDst; return pDst; } HB_BOOL hb_cdpGetFromUTF8( PHB_CODEPAGE cdp, HB_BOOL fCtrl, unsigned char ch, int * n, HB_WCHAR * pwc ) { if( hb_cdpUTF8ToU16NextChar( ch, n, pwc ) ) { if( *n == 0 && cdp && ( fCtrl || *pwc >= 32 ) ) { if( cdp->uniTable->uniTrans == NULL ) hb_cdpBuildTransTable( cdp->uniTable ); if( *pwc <= cdp->uniTable->wcMax ) { unsigned char uc = cdp->uniTable->uniTrans[ *pwc ]; if( uc ) *pwc = uc; } } return HB_TRUE; } return HB_FALSE; } HB_SIZE hb_cdpStrAsUTF8Len( PHB_CODEPAGE cdp, HB_BOOL fCtrl, const char * pSrc, HB_SIZE ulSrc, HB_SIZE ulMax ) { const HB_WCHAR * uniCodes; HB_SIZE ulS, ulD, u; int i; uniCodes = cdp->uniTable->uniCodes; for( ulS = ulD = 0; ulS < ulSrc; ++ulS ) { unsigned char uc = ( unsigned char ) pSrc[ ulS ]; HB_WCHAR wc; if( !fCtrl && uc < 32 ) wc = uc; else { wc = uniCodes[ uc ]; if( cdp->nMultiUC && ( cdp->flags[ uc ] & HB_CDP_MULTI1 ) != 0 && ulS + 1 < ulSrc && ( cdp->flags[ ( unsigned char ) pSrc[ ulS + 1 ] ] & HB_CDP_MULTI2 ) != 0 ) { for( i = 0; i < cdp->nMulti; ++i ) { if( pSrc[ ulS + 1 ] == cdp->multi[ i ].cLast[ 0 ] || pSrc[ ulS + 1 ] == cdp->multi[ i ].cLast[ 1 ] ) { if( pSrc[ ulS ] == cdp->multi[ i ].cFirst[ 0 ] ) { wc = cdp->multi[ i ].wcUp; ++ulS; break; } else if( pSrc[ ulS ] == cdp->multi[ i ].cFirst[ 1 ] ) { wc = cdp->multi[ i ].wcLo; ++ulS; break; } } } } } u = hb_cdpUTF8CharSize( wc ); if( ulMax && ulD + u > ulMax ) break; ulD += u; } return ulD; } HB_SIZE hb_cdpStrToUTF8( PHB_CODEPAGE cdp, HB_BOOL fCtrl, const char * pSrc, HB_SIZE ulSrc, char * pDst, HB_SIZE ulDst ) { const HB_WCHAR * uniCodes; HB_SIZE ulS, ulD, u; int i; uniCodes = cdp->uniTable->uniCodes; for( ulS = ulD = 0; ulS < ulSrc && ulD < ulDst; ++ulS ) { unsigned char uc = ( unsigned char ) pSrc[ ulS ]; HB_WCHAR wc; if( !fCtrl && uc < 32 ) wc = uc; else { wc = uniCodes[ uc ]; if( cdp->nMultiUC && ( cdp->flags[ uc ] & HB_CDP_MULTI1 ) != 0 && ulS + 1 < ulSrc && ( cdp->flags[ ( unsigned char ) pSrc[ ulS + 1 ] ] & HB_CDP_MULTI2 ) != 0 ) { for( i = 0; i < cdp->nMulti; ++i ) { if( pSrc[ ulS + 1 ] == cdp->multi[ i ].cLast[ 0 ] || pSrc[ ulS + 1 ] == cdp->multi[ i ].cLast[ 1 ] ) { if( pSrc[ ulS ] == cdp->multi[ i ].cFirst[ 0 ] ) { wc = cdp->multi[ i ].wcUp; ++ulS; break; } else if( pSrc[ ulS ] == cdp->multi[ i ].cFirst[ 1 ] ) { wc = cdp->multi[ i ].wcLo; ++ulS; break; } } } } } u = hb_cdpUTF8CharSize( wc ); if( ulD + u <= ulDst ) { hb_cdpU16CharToUTF8( &pDst[ ulD ], wc ); ulD += u; } else break; } if( ulD < ulDst ) pDst[ ulD ] = '\0'; return ulD; } HB_SIZE hb_cdpUTF8AsStrLen( PHB_CODEPAGE cdp, HB_BOOL fCtrl, const char * pSrc, HB_SIZE ulSrc, HB_SIZE ulMax ) { unsigned char * uniTrans; HB_WCHAR wcMax, wc = 0; HB_SIZE ulS, ulD; int n = 0, i; if( cdp->uniTable->uniTrans == NULL ) hb_cdpBuildTransTable( cdp->uniTable ); uniTrans = cdp->uniTable->uniTrans; wcMax = cdp->uniTable->wcMax; for( ulS = ulD = 0; ulS < ulSrc; ++ulS ) { if( hb_cdpUTF8ToU16NextChar( ( unsigned char ) pSrc[ ulS ], &n, &wc ) ) { if( n == 0 ) { ++ulD; if( ulMax && ulD >= ulMax ) break; if( wc && cdp->nMultiUC && ( fCtrl || wc >= 32 ) && ( wc > wcMax || uniTrans[ wc ] == 0 ) ) { for( i = 0; i < cdp->nMulti; ++i ) { if( wc == cdp->multi[ i ].wcUp || wc == cdp->multi[ i ].wcLo ) { ++ulD; break; } } if( ulMax && ulD >= ulMax ) break; } } } } return ulD; } HB_SIZE hb_cdpUTF8ToStr( PHB_CODEPAGE cdp, HB_BOOL fCtrl, const char * pSrc, HB_SIZE ulSrc, char * pDst, HB_SIZE ulDst ) { unsigned char * uniTrans; HB_WCHAR wcMax, wc = 0; HB_SIZE ulS, ulD; int n = 0, i; if( cdp->uniTable->uniTrans == NULL ) hb_cdpBuildTransTable( cdp->uniTable ); uniTrans = cdp->uniTable->uniTrans; wcMax = cdp->uniTable->wcMax; for( ulS = ulD = 0; ulS < ulSrc && ulD < ulDst; ++ulS ) { if( hb_cdpUTF8ToU16NextChar( ( unsigned char ) pSrc[ ulS ], &n, &wc ) ) { if( n == 0 ) { if( !fCtrl && wc < 32 ) pDst[ ulD++ ] = ( unsigned char ) wc; else if( wc <= wcMax && uniTrans[ wc ] ) pDst[ ulD++ ] = uniTrans[ wc ]; else { if( wc && cdp->nMultiUC ) { for( i = 0; i < cdp->nMulti; ++i ) { if( wc == cdp->multi[ i ].wcUp ) { pDst[ ulD++ ] = cdp->multi[ i ].cFirst[ 0 ]; if( ulD < ulDst ) pDst[ ulD++ ] = cdp->multi[ i ].cLast[ 0 ]; break; } if( wc == cdp->multi[ i ].wcLo ) { pDst[ ulD++ ] = cdp->multi[ i ].cFirst[ 1 ]; if( ulD < ulDst ) pDst[ ulD++ ] = cdp->multi[ i ].cLast[ 1 ]; break; } } if( i < cdp->nMulti ) continue; } pDst[ ulD++ ] = wc >= 0x100 ? '?' : ( unsigned char ) wc; } } } } if( ulD < ulDst ) pDst[ ulD ] = '\0'; return ulD; } /* * U16 (hb wide char) conversions */ HB_WCHAR hb_cdpGetU16( PHB_CODEPAGE cdp, HB_BOOL fCtrl, unsigned char ch ) { if( cdp && ( fCtrl || ch >= 32 ) ) return cdp->uniTable->uniCodes[ ch ]; else return ch; } unsigned char hb_cdpGetChar( PHB_CODEPAGE cdp, HB_BOOL fCtrl, HB_WCHAR wc ) { if( cdp && ( fCtrl || wc >= 32 ) ) { if( cdp->uniTable->uniTrans == NULL ) hb_cdpBuildTransTable( cdp->uniTable ); if( wc <= cdp->uniTable->wcMax ) { unsigned char uc = cdp->uniTable->uniTrans[ wc ]; if( uc ) wc = uc; } } return wc >= 0x100 ? '?' : ( UCHAR ) wc; } HB_SIZE hb_cdpStrAsU16Len( PHB_CODEPAGE cdp, HB_BOOL fCtrl, const char * pSrc, HB_SIZE ulSrc, HB_SIZE ulMax ) { if( cdp->nMultiUC ) { HB_SIZE ulS, ulD; int i; for( ulS = ulD = 0; ulS < ulSrc; ++ulS ) { unsigned char uc = ( unsigned char ) pSrc[ ulS ]; if( fCtrl || uc >= 32 ) { if( ( cdp->flags[ uc ] & HB_CDP_MULTI1 ) != 0 && ulS + 1 < ulSrc && ( cdp->flags[ ( unsigned char ) pSrc[ ulS + 1 ] ] & HB_CDP_MULTI2 ) != 0 ) { for( i = 0; i < cdp->nMulti; ++i ) { if( pSrc[ ulS + 1 ] == cdp->multi[ i ].cLast[ 0 ] || pSrc[ ulS + 1 ] == cdp->multi[ i ].cLast[ 1 ] ) { if( pSrc[ ulS ] == cdp->multi[ i ].cFirst[ 0 ] ) { ++ulS; break; } else if( pSrc[ ulS ] == cdp->multi[ i ].cFirst[ 1 ] ) { ++ulS; break; } } } } } ++ulD; if( ulMax && ulD >= ulMax ) break; } return ulD; } return ulSrc; } HB_SIZE hb_cdpStrToU16( PHB_CODEPAGE cdp, HB_BOOL fCtrl, int iEndian, const char * pSrc, HB_SIZE ulSrc, HB_WCHAR * pDst, HB_SIZE ulDst ) { const HB_WCHAR * uniCodes; HB_SIZE ulS, ulD; int i; uniCodes = cdp->uniTable->uniCodes; for( ulS = ulD = 0; ulS < ulSrc && ulD < ulDst; ++ulS ) { unsigned char uc = ( unsigned char ) pSrc[ ulS ]; HB_WCHAR wc; if( !fCtrl && uc < 32 ) wc = uc; else { wc = uniCodes[ uc ]; if( cdp->nMultiUC && ( cdp->flags[ uc ] & HB_CDP_MULTI1 ) != 0 && ulS + 1 < ulSrc && ( cdp->flags[ ( unsigned char ) pSrc[ ulS + 1 ] ] & HB_CDP_MULTI2 ) != 0 ) { for( i = 0; i < cdp->nMulti; ++i ) { if( pSrc[ ulS + 1 ] == cdp->multi[ i ].cLast[ 0 ] || pSrc[ ulS + 1 ] == cdp->multi[ i ].cLast[ 1 ] ) { if( pSrc[ ulS ] == cdp->multi[ i ].cFirst[ 0 ] ) { wc = cdp->multi[ i ].wcUp; ++ulS; break; } else if( pSrc[ ulS ] == cdp->multi[ i ].cFirst[ 1 ] ) { wc = cdp->multi[ i ].wcLo; ++ulS; break; } } } } } #if !defined( HB_BIG_ENDIAN ) && !defined( HB_LITTLE_ENDIAN ) if( iEndian == HB_CDP_ENDIAN_LITTLE ) HB_PUT_LE_UINT16( &pDst[ ulD ], wc ); else if( iEndian == HB_CDP_ENDIAN_BIG ) HB_PUT_BE_UINT16( &pDst[ ulD ], wc ); else pDst[ ulD ] = wc; ++ulD; #else # if defined( HB_BIG_ENDIAN ) if( iEndian == HB_CDP_ENDIAN_LITTLE ) # else if( iEndian == HB_CDP_ENDIAN_BIG ) # endif wc = HB_SWAP_UINT16( wc ); pDst[ ulD++ ] = wc; #endif } if( ulD < ulDst ) pDst[ ulD ] = '\0'; return ulD; } HB_SIZE hb_cdpU16AsStrLen( PHB_CODEPAGE cdp, HB_BOOL fCtrl, const HB_WCHAR * pSrc, HB_SIZE ulSrc, HB_SIZE ulMax ) { unsigned char * uniTrans; HB_WCHAR wcMax, wc; HB_SIZE ulS, ulD; int i; if( cdp->uniTable->uniTrans == NULL ) hb_cdpBuildTransTable( cdp->uniTable ); uniTrans = cdp->uniTable->uniTrans; wcMax = cdp->uniTable->wcMax; for( ulS = ulD = 0; ulS < ulSrc; ++ulS ) { wc = pSrc[ ulS ]; ++ulD; if( ulMax && ulD >= ulMax ) break; if( wc && cdp->nMultiUC && ( fCtrl || wc >= 32 ) && ( wc > wcMax || uniTrans[ wc ] == 0 ) ) { for( i = 0; i < cdp->nMulti; ++i ) { if( wc == cdp->multi[ i ].wcUp || wc == cdp->multi[ i ].wcLo ) { ++ulD; break; } } if( ulMax && ulD >= ulMax ) break; } } return ulD; } HB_SIZE hb_cdpU16ToStr( PHB_CODEPAGE cdp, HB_BOOL fCtrl, int iEndian, const HB_WCHAR * pSrc, HB_SIZE ulSrc, char * pDst, HB_SIZE ulDst ) { unsigned char * uniTrans; HB_WCHAR wcMax, wc; HB_SIZE ulS, ulD; int i; if( cdp->uniTable->uniTrans == NULL ) hb_cdpBuildTransTable( cdp->uniTable ); uniTrans = cdp->uniTable->uniTrans; wcMax = cdp->uniTable->wcMax; for( ulS = ulD = 0; ulS < ulSrc && ulD < ulDst; ++ulS ) { #if !defined( HB_BIG_ENDIAN ) && !defined( HB_LITTLE_ENDIAN ) if( iEndian == HB_CDP_ENDIAN_LITTLE ) wc = HB_GET_LE_UINT16( &pSrc[ ulS ] ); else if( iEndian == HB_CDP_ENDIAN_BIG ) wc = HB_GET_BE_UINT16( &pSrc[ ulS ] ); else wc = pSrc[ ulS ]; #else wc = pSrc[ ulS ]; # if defined( HB_BIG_ENDIAN ) if( iEndian == HB_CDP_ENDIAN_LITTLE ) # else if( iEndian == HB_CDP_ENDIAN_BIG ) # endif wc = HB_SWAP_UINT16( wc ); #endif if( !fCtrl && wc < 32 ) pDst[ ulD++ ] = ( unsigned char ) wc; else if( wc <= wcMax && uniTrans[ wc ] ) pDst[ ulD++ ] = uniTrans[ wc ]; else { if( wc && cdp->nMultiUC ) { for( i = 0; i < cdp->nMulti; ++i ) { if( wc == cdp->multi[ i ].wcUp ) { pDst[ ulD++ ] = cdp->multi[ i ].cFirst[ 0 ]; if( ulD < ulDst ) pDst[ ulD++ ] = cdp->multi[ i ].cLast[ 0 ]; break; } if( wc == cdp->multi[ i ].wcLo ) { pDst[ ulD++ ] = cdp->multi[ i ].cFirst[ 1 ]; if( ulD < ulDst ) pDst[ ulD++ ] = cdp->multi[ i ].cLast[ 1 ]; break; } } if( i < cdp->nMulti ) continue; } pDst[ ulD++ ] = wc >= 0x100 ? '?' : ( unsigned char ) wc; } } if( ulD < ulDst ) pDst[ ulD ] = '\0'; return ulD; } /* * CP translations */ HB_SIZE hb_cdpTransLen( const char * pSrc, HB_SIZE ulSrc, HB_SIZE ulMax, PHB_CODEPAGE cdpIn, PHB_CODEPAGE cdpOut ) { HB_SIZE ulSize; if( cdpIn && cdpOut && cdpIn->uniTable != cdpOut->uniTable ) { if( cdpIn == &s_utf8_codepage ) return hb_cdpUTF8AsStrLen( cdpOut, HB_FALSE, pSrc, ulSrc, ulMax ); else if( cdpOut == &s_utf8_codepage ) return hb_cdpStrAsUTF8Len( cdpIn, HB_FALSE, pSrc, ulSrc, ulMax ); else { unsigned char * uniTrans; HB_WCHAR wcMax; if( cdpOut->uniTable->uniTrans == NULL ) hb_cdpBuildTransTable( cdpOut->uniTable ); uniTrans = cdpOut->uniTable->uniTrans; wcMax = cdpOut->uniTable->wcMax; if( cdpIn->nMultiUC || cdpOut->nMultiUC ) { HB_SIZE ul; int i; for( ul = ulSize = 0; ul < ulSrc && ( ulMax == 0 || ulSize < ulMax ); ++ul, ++ulSize ) { unsigned char uc = ( unsigned char ) pSrc[ ul ]; HB_WCHAR wc = cdpIn->uniTable->uniCodes[ uc ]; if( cdpIn->nMultiUC && ( cdpIn->flags[ uc ] & HB_CDP_MULTI1 ) != 0 && ul + 1 < ulSrc && ( cdpIn->flags[ ( unsigned char ) pSrc[ ul + 1 ] ] & HB_CDP_MULTI2 ) != 0 ) { for( i = 0; i < cdpIn->nMulti; ++i ) { if( pSrc[ ul + 1 ] == cdpIn->multi[ i ].cLast[ 0 ] || pSrc[ ul + 1 ] == cdpIn->multi[ i ].cLast[ 1 ] ) { if( pSrc[ ul ] == cdpIn->multi[ i ].cFirst[ 0 ] ) { wc = cdpIn->multi[ i ].wcUp; ++ul; break; } else if( pSrc[ ul ] == cdpIn->multi[ i ].cFirst[ 1 ] ) { wc = cdpIn->multi[ i ].wcLo; ++ul; break; } } } } if( wc && ( wc > wcMax || uniTrans[ wc ] ) && cdpOut->nMultiUC && ( ulMax == 0 || ulSize + 1 < ulMax ) ) { for( i = 0; i < cdpOut->nMulti; ++i ) { if( wc == cdpOut->multi[ i ].wcUp || wc == cdpOut->multi[ i ].wcLo ) { ++ulSize; break; } } } } } else ulSize = ( ulMax && ulSrc > ulMax ) ? ulMax : ulSrc; } } else ulSize = ( ulMax && ulSrc > ulMax ) ? ulMax : ulSrc; return ulSize; } HB_SIZE hb_cdpTransTo( const char * pSrc, HB_SIZE ulSrc, char * pDst, HB_SIZE ulDst, PHB_CODEPAGE cdpIn, PHB_CODEPAGE cdpOut ) { HB_SIZE ulSize; if( cdpIn && cdpOut && cdpIn->uniTable != cdpOut->uniTable ) { if( cdpIn == &s_utf8_codepage ) return hb_cdpUTF8ToStr( cdpOut, HB_FALSE, pSrc, ulSrc, pDst, ulDst ); else if( cdpOut == &s_utf8_codepage ) return hb_cdpStrToUTF8( cdpIn, HB_FALSE, pSrc, ulSrc, pDst, ulDst ); else { unsigned char * uniTrans; HB_WCHAR wcMax; if( cdpOut->uniTable->uniTrans == NULL ) hb_cdpBuildTransTable( cdpOut->uniTable ); uniTrans = cdpOut->uniTable->uniTrans; wcMax = cdpOut->uniTable->wcMax; if( cdpIn->nMultiUC || cdpOut->nMultiUC ) { HB_SIZE ul; int i; for( ul = ulSize = 0; ul < ulSrc && ulSize < ulDst; ++ul, ++ulSize ) { unsigned char uc = ( unsigned char ) pSrc[ ul ]; HB_WCHAR wc = cdpIn->uniTable->uniCodes[ uc ]; if( cdpIn->nMultiUC && ( cdpIn->flags[ uc ] & HB_CDP_MULTI1 ) != 0 && ul + 1 < ulSrc && ( cdpIn->flags[ ( unsigned char ) pSrc[ ul + 1 ] ] & HB_CDP_MULTI2 ) != 0 ) { for( i = 0; i < cdpIn->nMulti; ++i ) { if( pSrc[ ul + 1 ] == cdpIn->multi[ i ].cLast[ 0 ] || pSrc[ ul + 1 ] == cdpIn->multi[ i ].cLast[ 1 ] ) { if( pSrc[ ul ] == cdpIn->multi[ i ].cFirst[ 0 ] ) { wc = cdpIn->multi[ i ].wcUp; ++ul; break; } else if( pSrc[ ul ] == cdpIn->multi[ i ].cFirst[ 1 ] ) { wc = cdpIn->multi[ i ].wcLo; ++ul; break; } } } } if( wc ) { if( wc <= wcMax && uniTrans[ wc ] ) uc = uniTrans[ wc ]; else if( cdpOut->nMultiUC ) { for( i = 0; i < cdpOut->nMulti; ++i ) { if( wc == cdpOut->multi[ i ].wcUp ) { if( ulSize + 1 < ulDst ) { pDst[ ulSize++ ] = cdpOut->multi[ i ].cFirst[ 0 ]; uc = cdpOut->multi[ i ].cLast[ 0 ]; } else uc = cdpOut->multi[ i ].cFirst[ 0 ]; break; } if( wc == cdpOut->multi[ i ].wcLo ) { if( ulSize + 1 < ulDst ) { pDst[ ulSize++ ] = cdpOut->multi[ i ].cFirst[ 1 ]; uc = cdpOut->multi[ i ].cLast[ 1 ]; } else uc = cdpOut->multi[ i ].cFirst[ 1 ]; break; } } } } pDst[ ulSize ] = uc; } } else { if( ulSrc > ulDst ) ulSrc = ulDst; for( ulSize = 0; ulSize < ulSrc; ++ulSize ) { unsigned char uc = ( unsigned char ) pSrc[ ulSize ]; HB_WCHAR wc = cdpIn->uniTable->uniCodes[ uc ]; if( wc && wc <= wcMax && uniTrans[ wc ] ) uc = uniTrans[ wc ]; pDst[ ulSize ] = uc; } } } } else { ulSize = ( ulSrc > ulDst ) ? ulDst : ulSrc; memcpy( pDst, pSrc, ulSize ); } if( ulSize < ulDst ) pDst[ ulSize ] = '\0'; return ulSize; } int hb_cdpTranslateChar( int iChar, HB_BOOL fCtrl, PHB_CODEPAGE cdpIn, PHB_CODEPAGE cdpOut ) { if( cdpIn && cdpOut && cdpIn->uniTable != cdpOut->uniTable && iChar >= ( fCtrl ? 32 : 0 ) && iChar < 256 ) { HB_WCHAR wc; if( cdpOut->uniTable->uniTrans == NULL ) hb_cdpBuildTransTable( cdpOut->uniTable ); wc = cdpIn->uniTable->uniCodes[ iChar ]; if( wc && wc <= cdpOut->uniTable->wcMax && cdpOut->uniTable->uniTrans[ wc ] ) iChar = cdpOut->uniTable->uniTrans[ wc ]; } return iChar; } HB_SIZE hb_cdpnDupLen( const char * pSrc, HB_SIZE ulSrc, PHB_CODEPAGE cdpIn, PHB_CODEPAGE cdpOut ) { return hb_cdpTransLen( pSrc, ulSrc, 0, cdpIn, cdpOut ); } HB_SIZE hb_cdpnDup2Len( const char * pSrc, HB_SIZE ulSrc, HB_SIZE ulMax, PHB_CODEPAGE cdpIn, PHB_CODEPAGE cdpOut ) { return hb_cdpTransLen( pSrc, ulSrc, ulMax, cdpIn, cdpOut ); } char * hb_cdpnDup( const char * pSrc, HB_SIZE * pulLen, PHB_CODEPAGE cdpIn, PHB_CODEPAGE cdpOut ) { char * pDst; HB_SIZE ulDst; ulDst = hb_cdpTransLen( pSrc, *pulLen, 0, cdpIn, cdpOut ); pDst = ( char * ) hb_xgrab( ulDst + 1 ); hb_cdpTransTo( pSrc, *pulLen, pDst, ulDst + 1, cdpIn, cdpOut ); *pulLen = ulDst; return pDst; } const char * hb_cdpnDup2( const char * pSrc, HB_SIZE ulSrc, char * pDst, HB_SIZE * pulDst, PHB_CODEPAGE cdpIn, PHB_CODEPAGE cdpOut ) { * pulDst = hb_cdpTransTo( pSrc, ulSrc, pDst, *pulDst, cdpIn, cdpOut ); return pDst; } const char * hb_cdpnDup3( const char * pSrc, HB_SIZE ulSrc, char * pDst, HB_SIZE * pulDst, char ** pFree, HB_SIZE * pulSize, PHB_CODEPAGE cdpIn, PHB_CODEPAGE cdpOut ) { if( cdpIn && cdpOut && cdpIn->uniTable != cdpOut->uniTable && ulSrc ) { char * pPrev = NULL; HB_SIZE ulDst = hb_cdpTransLen( pSrc, ulSrc, 0, cdpIn, cdpOut ); if( pDst == NULL ) { pDst = *pFree; if( pDst == NULL && *pulSize > 0 ) pDst = ( char * ) pSrc; } if( ulDst >= *pulSize || ( pDst == pSrc && ( cdpOut == &s_utf8_codepage || cdpOut->nMultiUC ) ) ) { pPrev = *pFree; pDst = *pFree = ( char * ) hb_xgrab( ulDst + 1 ); *pulSize = ulDst + 1; } ulDst = hb_cdpTransTo( pSrc, ulSrc, pDst, *pulSize, cdpIn, cdpOut ); if( pPrev ) hb_xfree( pPrev ); if( pulDst ) *pulDst = ulDst; return pDst; } if( pulDst ) *pulDst = ulSrc; return pSrc; } char * hb_cdpDup( const char * pszSrc, PHB_CODEPAGE cdpIn, PHB_CODEPAGE cdpOut ) { HB_SIZE ulLen = strlen( pszSrc ); return hb_cdpnDup( pszSrc, &ulLen, cdpIn, cdpOut ); } /* * CP management */ static PHB_CODEPAGE hb_buildCodePage( const char * id, const char * info, PHB_UNITABLE uniTable, const char * pszUpper, const char * pszLower, unsigned int nACSort ) { HB_BOOL lSort, fError; int iMulti, iAcc, iAccUp, iAccLo, iSortUp, iSortLo, i; const char * pup, * plo; unsigned char ucUp, ucLo, ucUp2, ucLo2; HB_SIZE ulSize, ul; unsigned char * buffer, * flags, * upper, * lower, * sort, * acc; unsigned char used[ 256 ]; PHB_CODEPAGE cdp; PHB_MULTICHAR multi; memset( used, '\0', sizeof( used ) ); iMulti = iAcc = iSortUp = iSortLo = 0; fError = lSort = HB_FALSE; ucUp2 = ucLo2 = 0; pup = pszUpper; plo = pszLower; for( ;; ) { ucUp = ( unsigned char ) *pup++; ucLo = ( unsigned char ) *plo++; if( ucUp == 0 || ucLo == 0 ) { if( ucUp || ucLo ) fError = HB_TRUE; break; } if( ucUp == '.' ) { if( ucLo == '.' && pup[ 0 ] && pup[ 1 ] && ( pup[ 2 ] == '.' || pup[ 2 ] == '=' ) && plo[ 0 ] && plo[ 1 ] && ( plo[ 2 ] == '.' || plo[ 2 ] == '=' ) ) { ucUp = ( unsigned char ) *pup; ucLo = ( unsigned char ) *plo; if( ( ucUp != ' ' || ucLo != ' ' ) && ( ucUp == *pup || ( ucUp != ' ' && *pup != ' ' ) ) && ( ucLo == *plo || ( ucLo != ' ' && *plo != ' ' ) ) ) { if( ucUp != ' ' ) ++iSortLo; pup += 2; plo += 2; if( *pup == '=' ) { do ++pup; while( HB_ISXDIGIT( *pup ) ); } if( *plo == '=' ) { do ++plo; while( HB_ISXDIGIT( *plo ) ); } if( *pup == '.' && *plo == '.' ) { lSort = HB_TRUE; iMulti++; pup++; plo++; continue; } } } fError = HB_TRUE; break; } if( ucUp == '~' ) { if( ucLo != '~' || *pup == '\0' || *plo == '\0' ) { fError = HB_TRUE; break; } ucUp = ( unsigned char ) *pup++; ucLo = ( unsigned char ) *plo++; ++iAcc; } if( used[ ucUp ] != 0 ) ucUp = ' '; if( used[ ucLo ] != 0 ) ucLo = ' '; if( ucUp == ' ' && ucLo == ' ' ) { fError = HB_TRUE; break; } if( ucUp != ' ' ) { used[ ucUp ] = 1; ++iSortLo; if( ucUp < ucUp2 ) lSort = HB_TRUE; ucUp2 = ucUp; } if( ucLo != ' ' ) { used[ ucLo ] = 1; if( ucLo < ucLo2 ) lSort = HB_TRUE; ucLo2 = ucLo; } } if( iMulti > 64 ) fError = HB_TRUE; if( fError || nACSort > HB_CDP_ACSORT_INTERLEAVED ) hb_errInternal( 9994, "Harbour CP (%s) initialization failure", id, NULL ); if( iAcc == 0 ) nACSort = HB_CDP_ACSORT_NONE; else if( nACSort != HB_CDP_ACSORT_NONE ) lSort = HB_TRUE; ulSize = 0x300; if( lSort ) { ulSize += 0x100; if( nACSort == HB_CDP_ACSORT_INTERLEAVED ) ulSize += 0x100; } ul = ulSize; ulSize += sizeof( HB_CODEPAGE ); if( iMulti ) ulSize += iMulti * sizeof( HB_MULTICHAR ); buffer = ( unsigned char * ) hb_xgrab( ulSize ); memset( buffer, '\0', ulSize ); cdp = ( PHB_CODEPAGE ) &buffer[ ul ]; cdp->buffer = buffer; cdp->flags = flags = buffer; buffer += 0x100; cdp->upper = upper = buffer; buffer += 0x100; cdp->lower = lower = buffer; buffer += 0x100; sort = acc = NULL; if( lSort ) { cdp->sort = sort = buffer; buffer += 0x100; if( nACSort == HB_CDP_ACSORT_INTERLEAVED ) { cdp->acc = acc = buffer; buffer += 0x100; } } if( iMulti ) cdp->multi = ( PHB_MULTICHAR ) &buffer[ sizeof( HB_CODEPAGE ) ]; cdp->id = id; cdp->info = info; cdp->uniTable = uniTable; cdp->nACSort = nACSort; cdp->nMulti = iMulti; for( i = 0; i < 0x100; ++i ) { if( HB_ISDIGIT( i ) ) flags[ i ] |= HB_CDP_DIGIT; if( HB_ISALPHA( i ) ) flags[ i ] |= HB_CDP_ALPHA; if( HB_ISUPPER( i ) ) flags[ i ] |= HB_CDP_UPPER; if( HB_ISLOWER( i ) ) flags[ i ] |= HB_CDP_LOWER; upper[ i ] = ( unsigned char ) HB_TOUPPER( i ); lower[ i ] = ( unsigned char ) HB_TOLOWER( i ); } iAccUp = iAccLo = 0; multi = cdp->multi; pup = pszUpper; plo = pszLower; ucUp2 = ucLo2 = 255; memset( used, '\0', sizeof( used ) ); while( *pup ) { ucUp = ( unsigned char ) *pup++; ucLo = ( unsigned char ) *plo++; if( ucUp == '.' ) { multi->cFirst[ 0 ] = *pup++; multi->cLast [ 0 ] = *pup++; multi->cFirst[ 1 ] = *plo++; multi->cLast [ 1 ] = *plo++; if( multi->cFirst[ 0 ] != ' ' ) { flags[ ( unsigned char ) multi->cFirst[ 0 ] ] |= HB_CDP_MULTI1; flags[ ( unsigned char ) multi->cLast [ 0 ] ] |= HB_CDP_MULTI2; multi->sortUp = ++iSortUp; } if( multi->cFirst[ 1 ] != ' ' ) { flags[ ( unsigned char ) multi->cFirst[ 1 ] ] |= HB_CDP_MULTI1; flags[ ( unsigned char ) multi->cLast [ 1 ] ] |= HB_CDP_MULTI2; multi->sortLo = ++iSortLo; } if( *pup == '=' ) { ++pup; while( HB_ISXDIGIT( *pup ) ) { multi->wcUp = ( multi->wcUp << 4 ) | ( *pup >= 'a' ? ( *pup - 'a' + 10 ) : ( *pup >= 'A' ? ( *pup - 'A' + 10 ) : ( *pup - '0' ) ) ); ++pup; } } pup++; if( *plo == '=' ) { ++plo; while( HB_ISXDIGIT( *plo ) ) { multi->wcLo = ( multi->wcLo << 4 ) | ( *plo >= 'a' ? ( *plo - 'a' + 10 ) : ( *plo >= 'A' ? ( *plo - 'A' + 10 ) : ( *plo - '0' ) ) ); ++plo; } } plo++; if( multi->wcUp || multi->wcLo ) cdp->nMultiUC++; multi++; } else { iAcc = 0; if( ucUp == '~' ) { iAcc = 1; ucUp = ( unsigned char ) *pup++; ucLo = ( unsigned char ) *plo++; } if( ucUp != ' ' ) { flags[ ucUp ] |= HB_CDP_ALPHA; flags[ ucUp ] |= HB_CDP_UPPER; if( ucLo != ' ' && ( used[ ucUp ] & HB_CDP_UPPER ) == 0 ) { lower[ ucUp ] = ucLo; used[ ucUp ] |= HB_CDP_UPPER; } if( sort ) { if( sort[ ucUp ] == 0 ) { if( iAcc && nACSort != HB_CDP_ACSORT_NONE ) ++iAccUp; sort[ ucUp ] = ++iSortUp - iAccUp; if( acc ) acc[ ucUp ] = iSortUp; if( ucUp2 > ucUp ) ucUp2 = ucUp; } } } if( ucLo != ' ' ) { flags[ ucLo ] |= HB_CDP_ALPHA; flags[ ucLo ] |= HB_CDP_LOWER; if( ucUp != ' ' && ( used[ ucLo ] & HB_CDP_LOWER ) == 0 ) { upper[ ucLo ] = ucUp; used[ ucLo ] |= HB_CDP_LOWER; } if( sort ) { if( sort[ ucLo ] == 0 ) { if( iAcc && nACSort != HB_CDP_ACSORT_NONE ) ++iAccLo; sort[ ucLo ] = ++iSortLo - iAccLo; if( acc ) acc[ ucLo ] = iSortLo; if( ucLo2 > ucLo ) ucLo2 = ucLo; } } } } } if( sort ) { int iUp, iLo, iSort1, iSort2, iSort3, iAdd; if( iMulti > 0 ) { if( iMulti > ucUp2 || iMulti > ucLo2 ) hb_errInternal( 9994, "Harbour CP (%s) initialization failure", id, NULL ); if( iMulti <= 32 ) iMulti = 33; else iMulti = 65; } else iMulti = 1; for( iUp = iLo = 0, i = iMulti; i < 256; ++i ) { if( sort[ i ] == 0 ) { if( i < ( int ) ucUp2 ) ++iUp; else if( i < ( int ) ucLo2 ) ++iLo; } } for( iSort1 = iSort2 = iSort3 = 0, i = iMulti; i < 256; ++i ) { if( sort[ i ] == 0 ) { if( i < ( int ) ucUp2 ) iAdd = ++iSort1; else if( i < ( int ) ucLo2 ) iAdd = ++iSort2 + iSortUp + iUp; else iAdd = ++iSort3 + iUp + iSortLo + iLo; } else if( sort[ i ] <= iSortUp ) iAdd = iUp; else iAdd = iUp + iLo; sort[ i ] += iAdd; if( acc ) acc[ i ] += iAdd; } } return cdp; } static PHB_CODEPAGE * hb_cdpFindPos( const char * id ) { PHB_CODEPAGE * cdp_ptr; if( s_cdpList == NULL ) { unsigned char * flags, * upper, * lower; int i; s_en_codepage.buffer = ( unsigned char * ) hb_xgrab( 0x300 ); memset( s_en_codepage.buffer, '\0', 0x300 ); s_en_codepage.flags = flags = ( unsigned char * ) s_en_codepage.buffer; s_en_codepage.upper = upper = ( unsigned char * ) s_en_codepage.buffer + 0x100; s_en_codepage.lower = lower = ( unsigned char * ) s_en_codepage.buffer + 0x200; for( i = 0; i < 0x100; ++i ) { if( HB_ISDIGIT( i ) ) flags[ i ] |= HB_CDP_DIGIT; if( HB_ISALPHA( i ) ) flags[ i ] |= HB_CDP_ALPHA; if( HB_ISUPPER( i ) ) flags[ i ] |= HB_CDP_UPPER; if( HB_ISLOWER( i ) ) flags[ i ] |= HB_CDP_LOWER; upper[ i ] = ( unsigned char ) HB_TOUPPER( i ); lower[ i ] = ( unsigned char ) HB_TOLOWER( i ); } s_utf8_codepage.flags = s_en_codepage.flags; s_utf8_codepage.upper = s_en_codepage.upper; s_utf8_codepage.lower = s_en_codepage.lower; s_cdpList = &s_en_codepage; } cdp_ptr = &s_cdpList; if( id ) { while( *cdp_ptr ) { if( strcmp( ( *cdp_ptr )->id, id ) == 0 ) break; cdp_ptr = &( *cdp_ptr )->next; } } return cdp_ptr; } HB_BOOL hb_cdpRegisterRaw( PHB_CODEPAGE cdp ) { PHB_CODEPAGE * cdp_ptr; HB_TRACE( HB_TR_DEBUG, ( "hb_cdpRegisterRaw(%p)", cdp ) ); cdp_ptr = hb_cdpFindPos( cdp->id ); if( *cdp_ptr == NULL ) { *cdp_ptr = cdp; return HB_TRUE; } return HB_FALSE; } HB_BOOL hb_cdpRegisterNew( const char * id, const char * info, PHB_UNITABLE uniTable, const char * pszUpper, const char * pszLower, unsigned int nACSort ) { PHB_CODEPAGE * cdp_ptr; HB_TRACE( HB_TR_DEBUG, ( "hb_cdpRegisterNew(%s,%s,%s,%s,%d)", id, info, pszUpper, pszLower, nACSort ) ); cdp_ptr = hb_cdpFindPos( id ); if( *cdp_ptr == NULL ) { *cdp_ptr = hb_buildCodePage( id, info, uniTable, pszUpper, pszLower, nACSort ); return *cdp_ptr != NULL; } return HB_FALSE; } void hb_cdpReleaseAll( void ) { HB_TRACE( HB_TR_DEBUG, ( "hb_cdpReleaseAll()" ) ); while( s_cdpList ) { void * buffer = s_cdpList->buffer; if( s_cdpList->uniTable->uniTrans ) { hb_xfree( s_cdpList->uniTable->uniTrans ); s_cdpList->uniTable->uniTrans = NULL; } s_cdpList = s_cdpList->next; if( buffer ) hb_xfree( buffer ); } } PHB_CODEPAGE hb_cdpFind( const char * id ) { HB_TRACE( HB_TR_DEBUG, ( "hb_cdpFind(%s)", id ) ); return id ? * hb_cdpFindPos( id ) : NULL; } PHB_CODEPAGE hb_cdpFindExt( const char * id ) { PHB_CODEPAGE cdp = NULL; HB_TRACE( HB_TR_DEBUG, ( "hb_cdpFindExt(%s)", id ) ); if( id ) { cdp = * hb_cdpFindPos( id ); if( cdp == NULL && strcmp( id, "UTF8" ) == 0 ) return &s_utf8_codepage; } return cdp; } PHB_CODEPAGE hb_cdpSelect( PHB_CODEPAGE cdp ) { PHB_CODEPAGE cdpOld; HB_TRACE( HB_TR_DEBUG, ( "hb_cdpSelect(%p)", cdp ) ); cdpOld = hb_vmCDP(); if( cdp ) hb_vmSetCDP( cdp ); return cdpOld; } const char * hb_cdpID( void ) { PHB_CODEPAGE cdp; HB_TRACE( HB_TR_DEBUG, ( "hb_cdpID()" ) ); cdp = hb_vmCDP(); return cdp ? cdp->id : NULL; } const char * hb_cdpSelectID( const char * id ) { const char * idOld; HB_TRACE( HB_TR_DEBUG, ( "hb_cdpSelectID(%s)", id ) ); idOld = hb_cdpID(); hb_cdpSelect( hb_cdpFind( id ) ); return idOld; } #ifdef HB_LEGACY_LEVEL2 void hb_cdpnTranslate( char * psz, PHB_CODEPAGE cdpIn, PHB_CODEPAGE cdpOut, HB_SIZE nChars ) { if( cdpIn && cdpOut && cdpIn->uniTable != cdpOut->uniTable ) { HB_SIZE ulDst = nChars; char * pDst = psz; if( cdpOut == &s_utf8_codepage || cdpOut->nMultiUC ) { ulDst = hb_cdpTransLen( psz, nChars, 0, cdpIn, cdpOut ); pDst = ( char * ) hb_xgrab( ulDst ); } ulDst = hb_cdpTransTo( psz, nChars, pDst, ulDst, cdpIn, cdpOut ); if( psz != pDst ) { if( ulDst > nChars ) ulDst = nChars; memcpy( psz, pDst, ulDst ); hb_xfree( pDst ); } if( ulDst < nChars ) memset( psz + ulDst, '\0', nChars - ulDst ); } } void hb_cdpTranslate( char * psz, PHB_CODEPAGE cdpIn, PHB_CODEPAGE cdpOut ) { hb_cdpnTranslate( psz, cdpIn, cdpOut, strlen( psz ) ); } #endif /* TOFIX: Move this to cdpapihb.c */ HB_FUNC( HB_CDPLIST ) { PHB_CODEPAGE cdp; int iCount; cdp = s_cdpList; iCount = 0; while( cdp ) { ++iCount; cdp = cdp->next; } hb_reta( iCount ); cdp = s_cdpList; iCount = 0; while( cdp ) { hb_storvc( cdp->id, -1, ++iCount ); cdp = cdp->next; } }