diff --git a/harbour/ChangeLog.txt b/harbour/ChangeLog.txt index 2567ee97fa..1caf63c909 100644 --- a/harbour/ChangeLog.txt +++ b/harbour/ChangeLog.txt @@ -10,6 +10,56 @@ * Change, ! Fix, % Optimization, + Addition, - Removal, ; Comment */ +2013-01-30 19:48 UTC+0100 Viktor Szakats (harbour syenar.net) + + src/codepage/l_sr_cyr.c + + src/codepage/l_sr_lat.c + * src/codepage/cpsr646.c + * src/codepage/cpsr646c.c + * separated collations from the two correct SR CP modules + ; TOFIX: ? This page suggests that there are latin digraphs + that should be specially sorted: + https://en.wikipedia.org/wiki/Serbo-Croatian#Writing_systems + + * src/codepage/cpsrwin.c + * changed to utilize the standard Serbian cyrillic collation to + the same used by SR646C CP module. Old one seemed + quite wrong though I'm not even remotely expert in Serbian. + [INCOMPATIBLE] + If you use "SRWIN" for indexing, make sure to reindex + ; Verify me + + * src/codepage/cpua866.c + * changed to utilize the standard UK (Ukrainian) collation to + the same used by all other Ukrainian CP modules. The old + one missed the characters: + U+0490 (UPPER) - http://codepoints.net/U+0490 + U+0491 (LOWER) - http://codepoints.net/U+0491 + According to this page, these two chars are part of the + Ukrainian alphabet: + https://en.wikipedia.org/wiki/Ukrainian_language#Alphabet + + ; TOFIX: RUISO: + This has 4 extra character pairs compared to all + other Russian CP modules: + UPPER: + U+0401 - http://codepoints.net/U+0401 (Russian alphabet) + U+0404 - http://codepoints.net/U+0404 (Ukrainian alphabet) + U+0407 - http://codepoints.net/U+0407 (Ukrainian alphabet) + U+040E - http://codepoints.net/U+040E (Belarusian alphabet) + LOWER: + U+0451 - http://codepoints.net/U+0451 (Russian alphabet) + U+0454 - http://codepoints.net/U+0454 (Ukrainian alphabet) + U+0457 - http://codepoints.net/U+0457 (Ukrainian alphabet) + U+045E - http://codepoints.net/U+045E (Belarusian alphabet) + From the above I surmise that it'd be better if + above chars would be part of std russian collation, + though neither I'm an expert nor I'm sure that putting + them to the end of the collation does anything good, + in which latter case, it'd be better be removed from RUISO. + For sure though that U+401/U+0451 should be added to std + collation in l_ru.c. + Any comments from Russian-breathing Harbourers? + 2013-01-30 18:24 UTC+0100 Viktor Szakats (harbour syenar.net) * doc/en/lang.txt * include/hbapilng.h @@ -93,9 +143,9 @@ ; TOFIX: Here's the list of "CP" modules, that use irregular, but not 'raw' collations, that can't be explained with compatibility or other obvious reasons: - SRWIN - is this 'sr_cyr' or 'sr_lat', or else? - RUISO - why has this 4 extra chars at the end compared to std ru collation? - UA866 - why is it missing an accented version of a char compared to std ua collation? + SRWIN - is this 'sr_cyr' or 'sr_lat', or else? [CYRILLIC] [PATCHED] + RUISO - why has this 4 extra chars at the end compared to std ru collation? [MOVED] + UA866 - why is it missing an accented version of a char compared to std ua collation? [PATCHED] * src/codepage/cpbg866.c * src/codepage/cpbgiso.c diff --git a/harbour/src/codepage/cpsr646.c b/harbour/src/codepage/cpsr646.c index dbe2e87e4f..977a097ca7 100644 --- a/harbour/src/codepage/cpsr646.c +++ b/harbour/src/codepage/cpsr646.c @@ -54,8 +54,7 @@ #define HB_CP_INFO "Serbian ISO-646 (YUSCII)" #define HB_CP_UNITB HB_UNITB_646YU #define HB_CP_ACSORT HB_CDP_ACSORT_NONE -#define HB_CP_UPPER "ABCČĆDĐEFGHIJKLMNOPQRSŠTUVWXYZŽ" -#define HB_CP_LOWER "abcčćdđefghijklmnopqrsštuvwxyzž" +#include "l_sr_lat.c" #define HB_CP_UTF8 /* include CP registration code */ diff --git a/harbour/src/codepage/cpsr646c.c b/harbour/src/codepage/cpsr646c.c index 309199315b..3126a6816e 100644 --- a/harbour/src/codepage/cpsr646c.c +++ b/harbour/src/codepage/cpsr646c.c @@ -54,8 +54,7 @@ #define HB_CP_INFO "Serbian ISO-646C (Cyrillic YUSCII)" #define HB_CP_UNITB HB_UNITB_646YUC #define HB_CP_ACSORT HB_CDP_ACSORT_NONE -#define HB_CP_UPPER "АБЦЧЋДЂЕФГХИЈКЛМНОПЉРСШТУВЊЏЅЗЖ" -#define HB_CP_LOWER "абцчћдђефгхијклмнопљрсштувњџѕзж" +#include "l_sr_cyr.c" #define HB_CP_UTF8 /* include CP registration code */ diff --git a/harbour/src/codepage/cpsrwin.c b/harbour/src/codepage/cpsrwin.c index 96e708940a..8dc07594d7 100644 --- a/harbour/src/codepage/cpsrwin.c +++ b/harbour/src/codepage/cpsrwin.c @@ -55,8 +55,7 @@ #define HB_CP_INFO "Serbian Windows-1251" #define HB_CP_UNITB HB_UNITB_1251 #define HB_CP_ACSORT HB_CDP_ACSORT_NONE -#define HB_CP_UPPER "АБВГДЂЕЖЗИЈКЛЉМНЊОПРСТЋУФХЦЧЏШ" -#define HB_CP_LOWER "абвгдђежзијклљмнњопрстћуфхцчџш" +#include "l_sr_cyr.c" #define HB_CP_UTF8 /* include CP registration code */ diff --git a/harbour/src/codepage/cpua866.c b/harbour/src/codepage/cpua866.c index e8ed0bd1f8..a62deb7b3f 100644 --- a/harbour/src/codepage/cpua866.c +++ b/harbour/src/codepage/cpua866.c @@ -54,8 +54,7 @@ #define HB_CP_INFO "Ukrainian CP-866" #define HB_CP_UNITB HB_UNITB_866 #define HB_CP_ACSORT HB_CDP_ACSORT_NONE -#define HB_CP_UPPER "АБВГДЕЁЄЖЗИIЇЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯ" -#define HB_CP_LOWER "абвгдеёєжзиiїйклмнопрстуфхцчшщъыьэюя" +#include "l_uk.c" #define HB_CP_UTF8 /* include CP registration code */ diff --git a/harbour/src/codepage/l_sr_cyr.c b/harbour/src/codepage/l_sr_cyr.c new file mode 100644 index 0000000000..2a4392f773 --- /dev/null +++ b/harbour/src/codepage/l_sr_cyr.c @@ -0,0 +1,8 @@ +/* + * $Id$ + */ + +/* Przemyslaw Czerpak */ + +#define HB_CP_UPPER "АБЦЧЋДЂЕФГХИЈКЛМНОПЉРСШТУВЊЏЅЗЖ" +#define HB_CP_LOWER "абцчћдђефгхијклмнопљрсштувњџѕзж" diff --git a/harbour/src/codepage/l_sr_lat.c b/harbour/src/codepage/l_sr_lat.c new file mode 100644 index 0000000000..e33854d74c --- /dev/null +++ b/harbour/src/codepage/l_sr_lat.c @@ -0,0 +1,8 @@ +/* + * $Id$ + */ + +/* Przemyslaw Czerpak */ + +#define HB_CP_UPPER "ABCČĆDĐEFGHIJKLMNOPQRSŠTUVWXYZŽ" +#define HB_CP_LOWER "abcčćdđefghijklmnopqrsštuvwxyzž"