2025-09-03 12:21 UTC+0200 Przemyslaw Czerpak (druzus/at/poczta.onet.pl)

* src/rtl/cdpapi.c
    + added fallback translation table for different variants of Latin
      character, now when translation is made between different encoding
      and the variant of Latin character does not exist in destination
      code page then it's translated to their base Latin form, i.e.:
         hb_utf8ToStr( "ĄĆĘŁŃÓŚŹŻąćęłńóśźż", "EN" ) -> "ACELNOSZZacelnószz"

  * tests/uc16_gen.prg
    ; updated comment
This commit is contained in:
Przemysław Czerpak
2025-09-03 12:21:01 +02:00
parent c4b2a030c4
commit 315887a395
3 changed files with 151 additions and 4 deletions

View File

@@ -7,6 +7,17 @@
Entries may not always be in chronological/commit order.
See license at the end of file. */
2025-09-03 12:21 UTC+0200 Przemyslaw Czerpak (druzus/at/poczta.onet.pl)
* src/rtl/cdpapi.c
+ added fallback translation table for different variants of Latin
character, now when translation is made between different encoding
and the variant of Latin character does not exist in destination
code page then it's translated to their base Latin form, i.e.:
hb_utf8ToStr( "ĄĆĘŁŃÓŚŹŻąćęłńóśźż", "EN" ) -> "ACELNOSZZacelnószz"
* tests/uc16_gen.prg
; updated comment
2025-08-29 12:49 UTC+0200 Przemyslaw Czerpak (druzus/at/poczta.onet.pl)
* contrib/hbbmp/core.c
* contrib/hbbmp/hbbmp.ch

View File

@@ -145,6 +145,117 @@ static HB_CODEPAGE s_en_codepage =
HB_CODEPAGE_ANNOUNCE( EN )
#define HB_UCFB_FIRST 0x00C0
#define HB_UCFB_LAST 0x1EF9
#define HB_UCFB_BITS 5
static const HB_BYTE s_fb_idx[ 242 ] =
{
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B,
0x0C, 0x0D, 0x0E, 0x0F, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16,
0x17, 0x18
};
static const HB_BYTE s_fb_val[ 794 ] =
{
0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x00, 0x43, 0x45, 0x45, 0x45, 0x45,
0x49, 0x49, 0x49, 0x49, 0x00, 0x4E, 0x4F, 0x4F, 0x4F, 0x4F, 0x4F, 0x00,
0x4F, 0x55, 0x55, 0x55, 0x55, 0x59, 0x00, 0x00, 0x61, 0x61, 0x61, 0x61,
0x61, 0x61, 0x00, 0x63, 0x65, 0x65, 0x65, 0x65, 0x69, 0x69, 0x69, 0x69,
0x00, 0x6E, 0x6F, 0x6F, 0x6F, 0x6F, 0x6F, 0x00, 0x6F, 0x75, 0x75, 0x75,
0x75, 0x79, 0x00, 0x79, 0x41, 0x61, 0x41, 0x61, 0x41, 0x61, 0x43, 0x63,
0x43, 0x63, 0x43, 0x63, 0x43, 0x63, 0x44, 0x64, 0x44, 0x64, 0x45, 0x65,
0x45, 0x65, 0x45, 0x65, 0x45, 0x65, 0x45, 0x65, 0x47, 0x67, 0x47, 0x67,
0x47, 0x67, 0x47, 0x67, 0x48, 0x68, 0x48, 0x68, 0x49, 0x69, 0x49, 0x69,
0x49, 0x69, 0x49, 0x69, 0x49, 0x00, 0x00, 0x00, 0x4A, 0x6A, 0x4B, 0x6B,
0x00, 0x4C, 0x6C, 0x4C, 0x6C, 0x4C, 0x6C, 0x00, 0x00, 0x4C, 0x6C, 0x4E,
0x6E, 0x4E, 0x6E, 0x4E, 0x6E, 0x00, 0x00, 0x00, 0x4F, 0x6F, 0x4F, 0x6F,
0x4F, 0x6F, 0x00, 0x00, 0x52, 0x72, 0x52, 0x72, 0x52, 0x72, 0x53, 0x73,
0x53, 0x73, 0x53, 0x73, 0x53, 0x73, 0x54, 0x74, 0x54, 0x74, 0x54, 0x74,
0x55, 0x75, 0x55, 0x75, 0x55, 0x75, 0x55, 0x75, 0x55, 0x75, 0x55, 0x75,
0x57, 0x77, 0x59, 0x79, 0x59, 0x5A, 0x7A, 0x5A, 0x7A, 0x5A, 0x7A, 0x00,
0x62, 0x42, 0x42, 0x62, 0x00, 0x00, 0x00, 0x43, 0x63, 0x00, 0x44, 0x44,
0x64, 0x00, 0x00, 0x00, 0x00, 0x46, 0x66, 0x47, 0x00, 0x00, 0x00, 0x49,
0x4B, 0x6B, 0x6C, 0x00, 0x00, 0x4E, 0x6E, 0x4F, 0x4F, 0x6F, 0x00, 0x00,
0x50, 0x70, 0x00, 0x00, 0x00, 0x00, 0x00, 0x74, 0x54, 0x74, 0x54, 0x55,
0x75, 0x00, 0x56, 0x59, 0x79, 0x5A, 0x7A, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x41, 0x61, 0x49, 0x69, 0x4F, 0x6F, 0x55,
0x75, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x47, 0x67, 0x47, 0x67, 0x4B, 0x6B, 0x4F, 0x6F,
0x00, 0x00, 0x00, 0x00, 0x6A, 0x00, 0x00, 0x00, 0x47, 0x67, 0x00, 0x00,
0x4E, 0x6E, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x41, 0x61, 0x41, 0x61,
0x45, 0x65, 0x45, 0x65, 0x49, 0x69, 0x49, 0x69, 0x4F, 0x6F, 0x4F, 0x6F,
0x52, 0x72, 0x52, 0x72, 0x55, 0x75, 0x55, 0x75, 0x53, 0x73, 0x54, 0x74,
0x00, 0x00, 0x48, 0x68, 0x4E, 0x64, 0x00, 0x00, 0x5A, 0x7A, 0x41, 0x61,
0x45, 0x65, 0x00, 0x00, 0x00, 0x00, 0x4F, 0x6F, 0x00, 0x00, 0x59, 0x79,
0x6C, 0x6E, 0x74, 0x00, 0x00, 0x00, 0x41, 0x43, 0x63, 0x4C, 0x54, 0x73,
0x7A, 0x00, 0x00, 0x42, 0x00, 0x00, 0x45, 0x65, 0x4A, 0x6A, 0x00, 0x71,
0x52, 0x72, 0x59, 0x79, 0x00, 0x00, 0x00, 0x62, 0x00, 0x63, 0x64, 0x64,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x67, 0x00, 0x00, 0x00,
0x00, 0x00, 0x68, 0x00, 0x69, 0x00, 0x00, 0x6C, 0x6C, 0x6C, 0x00, 0x00,
0x00, 0x6D, 0x6E, 0x6E, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x72, 0x72, 0x72, 0x00, 0x00, 0x00, 0x73, 0x00, 0x00, 0x00, 0x00, 0x00,
0x74, 0x00, 0x00, 0x76, 0x00, 0x00, 0x00, 0x00, 0x7A, 0x7A, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x6A, 0x00, 0x00,
0x71, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x41, 0x61, 0x42, 0x62, 0x42, 0x62, 0x42, 0x62,
0x00, 0x00, 0x44, 0x64, 0x44, 0x64, 0x44, 0x64, 0x44, 0x64, 0x44, 0x64,
0x00, 0x00, 0x00, 0x00, 0x45, 0x65, 0x45, 0x65, 0x00, 0x00, 0x46, 0x66,
0x47, 0x67, 0x48, 0x68, 0x48, 0x68, 0x48, 0x68, 0x48, 0x68, 0x48, 0x68,
0x49, 0x69, 0x00, 0x00, 0x4B, 0x6B, 0x4B, 0x6B, 0x4B, 0x6B, 0x4C, 0x6C,
0x00, 0x00, 0x4C, 0x6C, 0x4C, 0x6C, 0x4D, 0x6D, 0x4D, 0x6D, 0x4D, 0x6D,
0x4E, 0x6E, 0x4E, 0x6E, 0x4E, 0x6E, 0x4E, 0x6E, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x50, 0x70, 0x50, 0x70, 0x52, 0x72, 0x52, 0x72,
0x00, 0x00, 0x52, 0x72, 0x53, 0x73, 0x53, 0x73, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x54, 0x74, 0x54, 0x74, 0x54, 0x74, 0x54, 0x74, 0x55, 0x75,
0x55, 0x75, 0x55, 0x75, 0x00, 0x00, 0x00, 0x00, 0x56, 0x76, 0x56, 0x76,
0x57, 0x77, 0x57, 0x77, 0x57, 0x77, 0x57, 0x77, 0x57, 0x77, 0x58, 0x78,
0x58, 0x78, 0x59, 0x79, 0x5A, 0x7A, 0x5A, 0x7A, 0x5A, 0x7A, 0x68, 0x74,
0x77, 0x79, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x41, 0x61, 0x41, 0x61,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x45, 0x65, 0x45, 0x65,
0x45, 0x65, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x49, 0x69, 0x49, 0x69, 0x4F, 0x6F, 0x4F, 0x6F, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x55, 0x75, 0x55, 0x75, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x59, 0x79, 0x59, 0x79, 0x59, 0x79,
0x59, 0x79
};
static HB_USHORT s_uc_fback( int n )
{
n -= HB_UCFB_FIRST;
if( n >= 0 && n <= ( HB_UCFB_LAST - HB_UCFB_FIRST ) )
{
return s_fb_val[ ( s_fb_idx[ n >> HB_UCFB_BITS ] << HB_UCFB_BITS ) +
( n & ( ( 1 << HB_UCFB_BITS ) - 1 ) ) ];
}
return 0;
}
static PHB_CODEPAGE s_cdpList = NULL;
@@ -157,9 +268,21 @@ void hb_cdpBuildTransTable( PHB_UNITABLE uniTable )
if( uniTable->uniTrans == NULL )
{
HB_UCHAR * uniTrans;
HB_WCHAR wcMax = 0;
HB_WCHAR wcMax;
HB_BOOL fLatinFallback = HB_TRUE;
int i;
for( i = 'A'; i <= 'Z'; ++i )
{
if( uniTable->uniCodes[ i ] != ( HB_WCHAR ) i )
{
fLatinFallback = HB_FALSE;
break;
}
}
if( fLatinFallback )
wcMax = HB_UCFB_LAST;
for( i = 0; i < 256; ++i )
{
HB_WCHAR wc = uniTable->uniCodes[ i ];
@@ -172,6 +295,18 @@ void hb_cdpBuildTransTable( PHB_UNITABLE uniTable )
if( uniTable->uniCodes[ i ] )
uniTrans[ uniTable->uniCodes[ i ] ] = ( HB_UCHAR ) i;
}
if( fLatinFallback )
{
for( i = HB_UCFB_FIRST; i <= HB_UCFB_LAST; ++i )
{
if( uniTrans[ i ] == 0 )
{
HB_WCHAR wc = s_uc_fback( i );
if( wc > 0 && wc <= wcMax && uniTable->uniCodes[ uniTrans[ wc ] ] == wc )
uniTrans[ i ] = uniTrans[ wc ];
}
}
}
uniTable->wcMax = wcMax;
uniTable->uniTrans = uniTrans;

View File

@@ -1,6 +1,7 @@
/*
* code to generate C source with conversion tables between BIG5 and UCS16
* using data defined by Unicode, Inc. in BIG5.TXT
* code to generate C source with tables UCS16 character flags using data
* defined by Unicode, Inc. https://unicode.org/Public/UNIDATA/UnicodeData.txt
* harbour/src/codepage/uc16def.c was generated by this code.
*
* Copyright 2011 Przemyslaw Czerpak <druzus / at / priv.onet.pl>
*
@@ -162,7 +163,7 @@ proc main()
cResult := ;
"/*" + hb_eol() + ;
" * Unicode character tables" + hb_eol() + ;
" * code generated automatically by tests/uc_gen.prg" + hb_eol() + ;
" * code generated automatically by tests/uc16_gen.prg" + hb_eol() + ;
" *" + hb_eol() + ;
" * Copyright 2012 Przemyslaw Czerpak <druzus / at / priv.onet.pl>" + hb_eol() + ;
" *" + hb_eol() + ;