Files
harbour-core/harbour/src/codepage/cp_utf8.c
Viktor Szakats f71c4e0a25 2012-05-16 18:38 UTC+0200 Viktor Szakats (harbour syenar.net)
* src/rtl/hbgtcore.c
  * src/rtl/rat.c
  * src/codepage/cp_utf8.c
    % deleted unnecessary assignment generating warnings with 
      some compilers
2012-05-16 16:39:24 +00:00

313 lines
7.4 KiB
C

/*
* $Id$
*/
/*
* Harbour Project source code:
* example of Harbour codepage using UTF8 encoding
*
* Copyright 2011 Przemyslaw Czerpak <druzus / at / priv.onet.pl>
* www - http://www.harbour-project.org
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2, or (at your option)
* any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this software; see the file COPYING. If not, write to
* the Free Software Foundation, Inc., 59 Temple Place, Suite 330,
* Boston, MA 02111-1307 USA (or visit the web site http://www.gnu.org/).
*
* As a special exception, the Harbour Project gives permission for
* additional uses of the text contained in its release of Harbour.
*
* The exception is that, if you link the Harbour libraries with other
* files to produce an executable, this does not by itself cause the
* resulting executable to be covered by the GNU General Public License.
* Your use of that executable is in no way restricted on account of
* linking the Harbour library code into it.
*
* This exception does not however invalidate any other reasons why
* the executable file might be covered by the GNU General Public License.
*
* This exception applies only to the code released by the Harbour
* Project under the name Harbour. If you copy code from other
* Harbour Project or Free Software Foundation releases into a copy of
* Harbour, as the General Public License permits, the exception does
* not apply to the code that you add in this way. To avoid misleading
* anyone as to the status of such modified files, you must delete
* this exception notice from them.
*
* If you write modifications of your own for Harbour, it is your choice
* whether to permit this exception to apply to your modifications.
* If you do not wish that, delete this exception notice.
*
*/
#define HB_UTF8EX_SORT
#include "hbapi.h"
#include "hbapicdp.h"
#include "uc16def.c"
#ifdef HB_UTF8EX_SORT
# include "utf8sort.c"
#endif
static HB_CDP_GET_FUNC( UTF8_get )
{
HB_SIZE nIndex = *pnIndex;
int n = 0;
HB_SYMBOL_UNUSED( cdp );
*wc = 0;
while( nIndex < nLen )
{
if( hb_cdpUTF8ToU16NextChar( ( HB_UCHAR ) pSrc[ nIndex ], &n, wc ) )
++nIndex;
if( n == 0 )
{
*pnIndex = nIndex;
return HB_TRUE;
}
}
if( n != 0 )
{
*pnIndex = nIndex;
return HB_TRUE;
}
return HB_FALSE;
}
static HB_CDP_PUT_FUNC( UTF8_put )
{
int i = hb_cdpUTF8CharSize( wc );
HB_SYMBOL_UNUSED( cdp );
if( *pnIndex + i <= nLen )
{
hb_cdpU16CharToUTF8( &pDst[ *pnIndex ], wc );
*pnIndex += i;
return HB_TRUE;
}
return HB_FALSE;
}
static HB_CDP_LEN_FUNC( UTF8_len )
{
HB_SYMBOL_UNUSED( cdp );
return hb_cdpUTF8CharSize( wc );
}
static HB_CDP_UPPER_FUNC( UTF8_upper )
{
HB_WCHAR wcUP;
HB_SYMBOL_UNUSED( cdp );
wcUP = s_uc_upper( wc );
return wcUP ? wcUP : wc;
}
static HB_CDP_LOWER_FUNC( UTF8_lower )
{
HB_WCHAR wcLO;
HB_SYMBOL_UNUSED( cdp );
wcLO = s_uc_lower( wc );
return wcLO ? wcLO : wc;
}
static HB_CDP_FLAGS_FUNC( UTF8_flags )
{
HB_SYMBOL_UNUSED( cdp );
return s_uc_flags( wc );
}
static HB_CDP_CMP_FUNC( UTF8_cmp )
{
int iRet;
#ifdef HB_UTF8EX_SORT
HB_SIZE nPos1 = 0, nPos2 = 0;
HB_WCHAR wc1, wc2;
iRet = 0;
for( ;; )
{
if( !HB_CDPCHAR_GET( cdp, szSecond, nLenSecond, &nPos2, &wc2 ) )
{
if( fExact && HB_CDPCHAR_GET( cdp, szFirst, nLenFirst, &nPos1, &wc1 ) )
iRet = 1;
break;
}
if( !HB_CDPCHAR_GET( cdp, szFirst, nLenFirst, &nPos1, &wc1 ) )
{
iRet = -1;
break;
}
if( wc1 != wc2 )
{
HB_USHORT us1 = s_uniSort[ wc1 ], us2 = s_uniSort[ wc2 ];
if( us1 != us2 )
{
iRet = us1 < us2 ? -1 : 1;
break;
}
}
}
#else
HB_SIZE nLen = nLenFirst < nLenSecond ? nLenFirst : nLenSecond;
HB_SYMBOL_UNUSED( cdp );
iRet = memcmp( szFirst, szSecond, nLen );
if( iRet == 0 )
{
if( nLenSecond > nLenFirst )
iRet = -1;
else if( fExact && nLenSecond < nLenFirst )
iRet = 1;
}
#endif
return iRet;
}
static HB_CDP_CMP_FUNC( UTF8_cmpi )
{
int iRet = 0;
#ifdef HB_UTF8EX_SORT
HB_SIZE nPos1 = 0, nPos2 = 0;
HB_WCHAR wc1, wc2;
for( ;; )
{
if( !HB_CDPCHAR_GET( cdp, szSecond, nLenSecond, &nPos2, &wc2 ) )
{
if( fExact && HB_CDPCHAR_GET( cdp, szFirst, nLenFirst, &nPos1, &wc1 ) )
iRet = 1;
break;
}
if( !HB_CDPCHAR_GET( cdp, szFirst, nLenFirst, &nPos1, &wc1 ) )
{
iRet = -1;
break;
}
if( wc1 != wc2 )
{
HB_USHORT us1 = s_uniSort[ HB_CDPCHAR_UPPER( cdp, wc1 ) ],
us2 = s_uniSort[ HB_CDPCHAR_UPPER( cdp, wc2 ) ];
if( us1 != us2 )
{
iRet = us1 < us2 ? -1 : 1;
break;
}
}
}
#else
HB_SIZE nLen = nLenFirst < nLenSecond ? nLenFirst : nLenSecond;
while( nLen-- )
{
HB_UCHAR u1 = cdp->upper[ ( HB_UCHAR ) * szFirst++ ],
u2 = cdp->upper[ ( HB_UCHAR ) * szSecond++ ];
if( u1 != u2 )
{
iRet = ( u1 < u2 ) ? -1 : 1;
break;
}
}
if( iRet == 0 )
{
if( nLenSecond > nLenFirst )
iRet = -1;
else if( fExact && nLenSecond < nLenFirst )
iRet = 1;
}
#endif
return iRet;
}
static void hb_cp_init( PHB_CODEPAGE cdp )
{
HB_UCHAR * flags, * upper, * lower;
int i;
cdp->buffer = ( HB_UCHAR * ) hb_xgrab( 0x300 );
cdp->flags = flags = ( HB_UCHAR * ) cdp->buffer;
cdp->upper = upper = ( HB_UCHAR * ) cdp->buffer + 0x100;
cdp->lower = lower = ( HB_UCHAR * ) cdp->buffer + 0x200;
for( i = 0; i < 0x100; ++i )
{
flags[ i ] = 0;
if( HB_ISDIGIT( i ) )
flags[ i ] |= HB_CDP_DIGIT;
if( HB_ISALPHA( i ) )
flags[ i ] |= HB_CDP_ALPHA;
if( HB_ISUPPER( i ) )
flags[ i ] |= HB_CDP_UPPER;
if( HB_ISLOWER( i ) )
flags[ i ] |= HB_CDP_LOWER;
upper[ i ] = ( HB_UCHAR ) HB_TOUPPER( i );
lower[ i ] = ( HB_UCHAR ) HB_TOLOWER( i );
}
}
#define HB_CP_RAW
#define HB_CP_ID UTF8EX
#define HB_CP_INFO "UTF-8 extended"
#define HB_CP_UNITB HB_UNITB_437
/* use character indexes instead of bytes ones */
#define HB_CP_CHARIDX
/* CHR(), ASC() and similar functions operates on Unicode values instead of bytes */
#define HB_CP_CHARUNI
/* UTF-8 string encoding */
#define HB_CP_UTF8
#define HB_CP_GET_FUNC UTF8_get
#define HB_CP_PUT_FUNC UTF8_put
#define HB_CP_LEN_FUNC UTF8_len
#define HB_CP_FLAGS_FUNC UTF8_flags
#define HB_CP_UPPER_FUNC UTF8_upper
#define HB_CP_LOWER_FUNC UTF8_lower
#define HB_CP_CMP_FUNC UTF8_cmp
#define HB_CP_CMPI_FUNC UTF8_cmpi
#define s_flags NULL
#define s_upper NULL
#define s_lower NULL
#define s_sort NULL
#define HB_CP_INIT hb_cp_init
/* include CP registration code */
#include "hbcdpreg.h"