diff --git a/ChangeLog.txt b/ChangeLog.txt index 5fa0d80bd9..8530fa7da7 100644 --- a/ChangeLog.txt +++ b/ChangeLog.txt @@ -7,6 +7,17 @@ Entries may not always be in chronological/commit order. See license at the end of file. */ +2025-09-03 12:21 UTC+0200 Przemyslaw Czerpak (druzus/at/poczta.onet.pl) + * src/rtl/cdpapi.c + + added fallback translation table for different variants of Latin + character, now when translation is made between different encoding + and the variant of Latin character does not exist in destination + code page then it's translated to their base Latin form, i.e.: + hb_utf8ToStr( "ĄĆĘŁŃÓŚŹŻąćęłńóśźż", "EN" ) -> "ACELNOSZZacelnószz" + + * tests/uc16_gen.prg + ; updated comment + 2025-08-29 12:49 UTC+0200 Przemyslaw Czerpak (druzus/at/poczta.onet.pl) * contrib/hbbmp/core.c * contrib/hbbmp/hbbmp.ch diff --git a/src/rtl/cdpapi.c b/src/rtl/cdpapi.c index c1d646cc74..2ae45f959f 100644 --- a/src/rtl/cdpapi.c +++ b/src/rtl/cdpapi.c @@ -145,6 +145,117 @@ static HB_CODEPAGE s_en_codepage = HB_CODEPAGE_ANNOUNCE( EN ) +#define HB_UCFB_FIRST 0x00C0 +#define HB_UCFB_LAST 0x1EF9 +#define HB_UCFB_BITS 5 + +static const HB_BYTE s_fb_idx[ 242 ] = +{ + 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, + 0x0C, 0x0D, 0x0E, 0x0F, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, + 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, + 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, + 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, + 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, + 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, + 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, + 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, + 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, + 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, + 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, + 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, + 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, + 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, + 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, + 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, + 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, + 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, + 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, + 0x17, 0x18 +}; + +static const HB_BYTE s_fb_val[ 794 ] = +{ + 0x41, 0x41, 0x41, 0x41, 0x41, 0x41, 0x00, 0x43, 0x45, 0x45, 0x45, 0x45, + 0x49, 0x49, 0x49, 0x49, 0x00, 0x4E, 0x4F, 0x4F, 0x4F, 0x4F, 0x4F, 0x00, + 0x4F, 0x55, 0x55, 0x55, 0x55, 0x59, 0x00, 0x00, 0x61, 0x61, 0x61, 0x61, + 0x61, 0x61, 0x00, 0x63, 0x65, 0x65, 0x65, 0x65, 0x69, 0x69, 0x69, 0x69, + 0x00, 0x6E, 0x6F, 0x6F, 0x6F, 0x6F, 0x6F, 0x00, 0x6F, 0x75, 0x75, 0x75, + 0x75, 0x79, 0x00, 0x79, 0x41, 0x61, 0x41, 0x61, 0x41, 0x61, 0x43, 0x63, + 0x43, 0x63, 0x43, 0x63, 0x43, 0x63, 0x44, 0x64, 0x44, 0x64, 0x45, 0x65, + 0x45, 0x65, 0x45, 0x65, 0x45, 0x65, 0x45, 0x65, 0x47, 0x67, 0x47, 0x67, + 0x47, 0x67, 0x47, 0x67, 0x48, 0x68, 0x48, 0x68, 0x49, 0x69, 0x49, 0x69, + 0x49, 0x69, 0x49, 0x69, 0x49, 0x00, 0x00, 0x00, 0x4A, 0x6A, 0x4B, 0x6B, + 0x00, 0x4C, 0x6C, 0x4C, 0x6C, 0x4C, 0x6C, 0x00, 0x00, 0x4C, 0x6C, 0x4E, + 0x6E, 0x4E, 0x6E, 0x4E, 0x6E, 0x00, 0x00, 0x00, 0x4F, 0x6F, 0x4F, 0x6F, + 0x4F, 0x6F, 0x00, 0x00, 0x52, 0x72, 0x52, 0x72, 0x52, 0x72, 0x53, 0x73, + 0x53, 0x73, 0x53, 0x73, 0x53, 0x73, 0x54, 0x74, 0x54, 0x74, 0x54, 0x74, + 0x55, 0x75, 0x55, 0x75, 0x55, 0x75, 0x55, 0x75, 0x55, 0x75, 0x55, 0x75, + 0x57, 0x77, 0x59, 0x79, 0x59, 0x5A, 0x7A, 0x5A, 0x7A, 0x5A, 0x7A, 0x00, + 0x62, 0x42, 0x42, 0x62, 0x00, 0x00, 0x00, 0x43, 0x63, 0x00, 0x44, 0x44, + 0x64, 0x00, 0x00, 0x00, 0x00, 0x46, 0x66, 0x47, 0x00, 0x00, 0x00, 0x49, + 0x4B, 0x6B, 0x6C, 0x00, 0x00, 0x4E, 0x6E, 0x4F, 0x4F, 0x6F, 0x00, 0x00, + 0x50, 0x70, 0x00, 0x00, 0x00, 0x00, 0x00, 0x74, 0x54, 0x74, 0x54, 0x55, + 0x75, 0x00, 0x56, 0x59, 0x79, 0x5A, 0x7A, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x41, 0x61, 0x49, 0x69, 0x4F, 0x6F, 0x55, + 0x75, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x47, 0x67, 0x47, 0x67, 0x4B, 0x6B, 0x4F, 0x6F, + 0x00, 0x00, 0x00, 0x00, 0x6A, 0x00, 0x00, 0x00, 0x47, 0x67, 0x00, 0x00, + 0x4E, 0x6E, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x41, 0x61, 0x41, 0x61, + 0x45, 0x65, 0x45, 0x65, 0x49, 0x69, 0x49, 0x69, 0x4F, 0x6F, 0x4F, 0x6F, + 0x52, 0x72, 0x52, 0x72, 0x55, 0x75, 0x55, 0x75, 0x53, 0x73, 0x54, 0x74, + 0x00, 0x00, 0x48, 0x68, 0x4E, 0x64, 0x00, 0x00, 0x5A, 0x7A, 0x41, 0x61, + 0x45, 0x65, 0x00, 0x00, 0x00, 0x00, 0x4F, 0x6F, 0x00, 0x00, 0x59, 0x79, + 0x6C, 0x6E, 0x74, 0x00, 0x00, 0x00, 0x41, 0x43, 0x63, 0x4C, 0x54, 0x73, + 0x7A, 0x00, 0x00, 0x42, 0x00, 0x00, 0x45, 0x65, 0x4A, 0x6A, 0x00, 0x71, + 0x52, 0x72, 0x59, 0x79, 0x00, 0x00, 0x00, 0x62, 0x00, 0x63, 0x64, 0x64, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x67, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x68, 0x00, 0x69, 0x00, 0x00, 0x6C, 0x6C, 0x6C, 0x00, 0x00, + 0x00, 0x6D, 0x6E, 0x6E, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x72, 0x72, 0x72, 0x00, 0x00, 0x00, 0x73, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x74, 0x00, 0x00, 0x76, 0x00, 0x00, 0x00, 0x00, 0x7A, 0x7A, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x6A, 0x00, 0x00, + 0x71, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x41, 0x61, 0x42, 0x62, 0x42, 0x62, 0x42, 0x62, + 0x00, 0x00, 0x44, 0x64, 0x44, 0x64, 0x44, 0x64, 0x44, 0x64, 0x44, 0x64, + 0x00, 0x00, 0x00, 0x00, 0x45, 0x65, 0x45, 0x65, 0x00, 0x00, 0x46, 0x66, + 0x47, 0x67, 0x48, 0x68, 0x48, 0x68, 0x48, 0x68, 0x48, 0x68, 0x48, 0x68, + 0x49, 0x69, 0x00, 0x00, 0x4B, 0x6B, 0x4B, 0x6B, 0x4B, 0x6B, 0x4C, 0x6C, + 0x00, 0x00, 0x4C, 0x6C, 0x4C, 0x6C, 0x4D, 0x6D, 0x4D, 0x6D, 0x4D, 0x6D, + 0x4E, 0x6E, 0x4E, 0x6E, 0x4E, 0x6E, 0x4E, 0x6E, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x50, 0x70, 0x50, 0x70, 0x52, 0x72, 0x52, 0x72, + 0x00, 0x00, 0x52, 0x72, 0x53, 0x73, 0x53, 0x73, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x54, 0x74, 0x54, 0x74, 0x54, 0x74, 0x54, 0x74, 0x55, 0x75, + 0x55, 0x75, 0x55, 0x75, 0x00, 0x00, 0x00, 0x00, 0x56, 0x76, 0x56, 0x76, + 0x57, 0x77, 0x57, 0x77, 0x57, 0x77, 0x57, 0x77, 0x57, 0x77, 0x58, 0x78, + 0x58, 0x78, 0x59, 0x79, 0x5A, 0x7A, 0x5A, 0x7A, 0x5A, 0x7A, 0x68, 0x74, + 0x77, 0x79, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x41, 0x61, 0x41, 0x61, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x45, 0x65, 0x45, 0x65, + 0x45, 0x65, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x49, 0x69, 0x49, 0x69, 0x4F, 0x6F, 0x4F, 0x6F, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x55, 0x75, 0x55, 0x75, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x59, 0x79, 0x59, 0x79, 0x59, 0x79, + 0x59, 0x79 +}; + +static HB_USHORT s_uc_fback( int n ) +{ + n -= HB_UCFB_FIRST; + if( n >= 0 && n <= ( HB_UCFB_LAST - HB_UCFB_FIRST ) ) + { + return s_fb_val[ ( s_fb_idx[ n >> HB_UCFB_BITS ] << HB_UCFB_BITS ) + + ( n & ( ( 1 << HB_UCFB_BITS ) - 1 ) ) ]; + } + return 0; +} + static PHB_CODEPAGE s_cdpList = NULL; @@ -157,9 +268,21 @@ void hb_cdpBuildTransTable( PHB_UNITABLE uniTable ) if( uniTable->uniTrans == NULL ) { HB_UCHAR * uniTrans; - HB_WCHAR wcMax = 0; + HB_WCHAR wcMax; + HB_BOOL fLatinFallback = HB_TRUE; int i; + for( i = 'A'; i <= 'Z'; ++i ) + { + if( uniTable->uniCodes[ i ] != ( HB_WCHAR ) i ) + { + fLatinFallback = HB_FALSE; + break; + } + } + if( fLatinFallback ) + wcMax = HB_UCFB_LAST; + for( i = 0; i < 256; ++i ) { HB_WCHAR wc = uniTable->uniCodes[ i ]; @@ -172,6 +295,18 @@ void hb_cdpBuildTransTable( PHB_UNITABLE uniTable ) if( uniTable->uniCodes[ i ] ) uniTrans[ uniTable->uniCodes[ i ] ] = ( HB_UCHAR ) i; } + if( fLatinFallback ) + { + for( i = HB_UCFB_FIRST; i <= HB_UCFB_LAST; ++i ) + { + if( uniTrans[ i ] == 0 ) + { + HB_WCHAR wc = s_uc_fback( i ); + if( wc > 0 && wc <= wcMax && uniTable->uniCodes[ uniTrans[ wc ] ] == wc ) + uniTrans[ i ] = uniTrans[ wc ]; + } + } + } uniTable->wcMax = wcMax; uniTable->uniTrans = uniTrans; diff --git a/tests/uc16_gen.prg b/tests/uc16_gen.prg index 8c63ce8b37..f5cb1ca2d8 100644 --- a/tests/uc16_gen.prg +++ b/tests/uc16_gen.prg @@ -1,6 +1,7 @@ /* - * code to generate C source with conversion tables between BIG5 and UCS16 - * using data defined by Unicode, Inc. in BIG5.TXT + * code to generate C source with tables UCS16 character flags using data + * defined by Unicode, Inc. https://unicode.org/Public/UNIDATA/UnicodeData.txt + * harbour/src/codepage/uc16def.c was generated by this code. * * Copyright 2011 Przemyslaw Czerpak * @@ -162,7 +163,7 @@ proc main() cResult := ; "/*" + hb_eol() + ; " * Unicode character tables" + hb_eol() + ; - " * code generated automatically by tests/uc_gen.prg" + hb_eol() + ; + " * code generated automatically by tests/uc16_gen.prg" + hb_eol() + ; " *" + hb_eol() + ; " * Copyright 2012 Przemyslaw Czerpak " + hb_eol() + ; " *" + hb_eol() + ;