From 61f7a12fe2c1ec9298b18e59dfc41ba59624988c Mon Sep 17 00:00:00 2001 From: Viktor Szakats Date: Wed, 30 Jan 2013 19:14:54 +0000 Subject: [PATCH] 2013-01-30 19:48 UTC+0100 Viktor Szakats (harbour syenar.net) + src/codepage/l_sr_cyr.c + src/codepage/l_sr_lat.c * src/codepage/cpsr646.c * src/codepage/cpsr646c.c * separated collations from the two correct SR CP modules ; TOFIX: ? This page suggests that there are latin digraphs that should be specially sorted: https://en.wikipedia.org/wiki/Serbo-Croatian#Writing_systems * src/codepage/cpsrwin.c * changed to utilize the standard Serbian cyrillic collation to the same used by SR646C CP module. Old one seemed quite wrong though I'm not even remotely expert in Serbian. [INCOMPATIBLE] If you use "SRWIN" for indexing, make sure to reindex ; Verify me * src/codepage/cpua866.c * changed to utilize the standard UK (Ukrainian) collation to the same used by all other Ukrainian CP modules. The old one missed the characters: U+0490 (UPPER) - http://codepoints.net/U+0490 U+0491 (LOWER) - http://codepoints.net/U+0491 According to this page, these two chars are part of the Ukrainian alphabet: https://en.wikipedia.org/wiki/Ukrainian_language#Alphabet ; TOFIX: RUISO: This has 4 extra character pairs compared to all other Russian CP modules: UPPER: U+0401 - http://codepoints.net/U+0401 (Russian alphabet) U+0404 - http://codepoints.net/U+0404 (Ukrainian alphabet) U+0407 - http://codepoints.net/U+0407 (Ukrainian alphabet) U+040E - http://codepoints.net/U+040E (Belarusian alphabet) LOWER: U+0451 - http://codepoints.net/U+0451 (Russian alphabet) U+0454 - http://codepoints.net/U+0454 (Ukrainian alphabet) U+0457 - http://codepoints.net/U+0457 (Ukrainian alphabet) U+045E - http://codepoints.net/U+045E (Belarusian alphabet) From the above I surmise that it'd be better if above chars would be part of std russian collation, though neither I'm an expert nor I'm sure that putting them to the end of the collation does anything good, in which latter case, it'd be better be removed from RUISO. For sure though that U+401/U+0451 should be added to std collation in l_ru.c. Any comments from Russian-breathing Harbourers? --- harbour/ChangeLog.txt | 56 +++++++++++++++++++++++++++++++-- harbour/src/codepage/cpsr646.c | 3 +- harbour/src/codepage/cpsr646c.c | 3 +- harbour/src/codepage/cpsrwin.c | 3 +- harbour/src/codepage/cpua866.c | 3 +- harbour/src/codepage/l_sr_cyr.c | 8 +++++ harbour/src/codepage/l_sr_lat.c | 8 +++++ 7 files changed, 73 insertions(+), 11 deletions(-) create mode 100644 harbour/src/codepage/l_sr_cyr.c create mode 100644 harbour/src/codepage/l_sr_lat.c diff --git a/harbour/ChangeLog.txt b/harbour/ChangeLog.txt index 2567ee97fa..1caf63c909 100644 --- a/harbour/ChangeLog.txt +++ b/harbour/ChangeLog.txt @@ -10,6 +10,56 @@ * Change, ! Fix, % Optimization, + Addition, - Removal, ; Comment */ +2013-01-30 19:48 UTC+0100 Viktor Szakats (harbour syenar.net) + + src/codepage/l_sr_cyr.c + + src/codepage/l_sr_lat.c + * src/codepage/cpsr646.c + * src/codepage/cpsr646c.c + * separated collations from the two correct SR CP modules + ; TOFIX: ? This page suggests that there are latin digraphs + that should be specially sorted: + https://en.wikipedia.org/wiki/Serbo-Croatian#Writing_systems + + * src/codepage/cpsrwin.c + * changed to utilize the standard Serbian cyrillic collation to + the same used by SR646C CP module. Old one seemed + quite wrong though I'm not even remotely expert in Serbian. + [INCOMPATIBLE] + If you use "SRWIN" for indexing, make sure to reindex + ; Verify me + + * src/codepage/cpua866.c + * changed to utilize the standard UK (Ukrainian) collation to + the same used by all other Ukrainian CP modules. The old + one missed the characters: + U+0490 (UPPER) - http://codepoints.net/U+0490 + U+0491 (LOWER) - http://codepoints.net/U+0491 + According to this page, these two chars are part of the + Ukrainian alphabet: + https://en.wikipedia.org/wiki/Ukrainian_language#Alphabet + + ; TOFIX: RUISO: + This has 4 extra character pairs compared to all + other Russian CP modules: + UPPER: + U+0401 - http://codepoints.net/U+0401 (Russian alphabet) + U+0404 - http://codepoints.net/U+0404 (Ukrainian alphabet) + U+0407 - http://codepoints.net/U+0407 (Ukrainian alphabet) + U+040E - http://codepoints.net/U+040E (Belarusian alphabet) + LOWER: + U+0451 - http://codepoints.net/U+0451 (Russian alphabet) + U+0454 - http://codepoints.net/U+0454 (Ukrainian alphabet) + U+0457 - http://codepoints.net/U+0457 (Ukrainian alphabet) + U+045E - http://codepoints.net/U+045E (Belarusian alphabet) + From the above I surmise that it'd be better if + above chars would be part of std russian collation, + though neither I'm an expert nor I'm sure that putting + them to the end of the collation does anything good, + in which latter case, it'd be better be removed from RUISO. + For sure though that U+401/U+0451 should be added to std + collation in l_ru.c. + Any comments from Russian-breathing Harbourers? + 2013-01-30 18:24 UTC+0100 Viktor Szakats (harbour syenar.net) * doc/en/lang.txt * include/hbapilng.h @@ -93,9 +143,9 @@ ; TOFIX: Here's the list of "CP" modules, that use irregular, but not 'raw' collations, that can't be explained with compatibility or other obvious reasons: - SRWIN - is this 'sr_cyr' or 'sr_lat', or else? - RUISO - why has this 4 extra chars at the end compared to std ru collation? - UA866 - why is it missing an accented version of a char compared to std ua collation? + SRWIN - is this 'sr_cyr' or 'sr_lat', or else? [CYRILLIC] [PATCHED] + RUISO - why has this 4 extra chars at the end compared to std ru collation? [MOVED] + UA866 - why is it missing an accented version of a char compared to std ua collation? [PATCHED] * src/codepage/cpbg866.c * src/codepage/cpbgiso.c diff --git a/harbour/src/codepage/cpsr646.c b/harbour/src/codepage/cpsr646.c index dbe2e87e4f..977a097ca7 100644 --- a/harbour/src/codepage/cpsr646.c +++ b/harbour/src/codepage/cpsr646.c @@ -54,8 +54,7 @@ #define HB_CP_INFO "Serbian ISO-646 (YUSCII)" #define HB_CP_UNITB HB_UNITB_646YU #define HB_CP_ACSORT HB_CDP_ACSORT_NONE -#define HB_CP_UPPER "ABCČĆDĐEFGHIJKLMNOPQRSŠTUVWXYZŽ" -#define HB_CP_LOWER "abcčćdđefghijklmnopqrsštuvwxyzž" +#include "l_sr_lat.c" #define HB_CP_UTF8 /* include CP registration code */ diff --git a/harbour/src/codepage/cpsr646c.c b/harbour/src/codepage/cpsr646c.c index 309199315b..3126a6816e 100644 --- a/harbour/src/codepage/cpsr646c.c +++ b/harbour/src/codepage/cpsr646c.c @@ -54,8 +54,7 @@ #define HB_CP_INFO "Serbian ISO-646C (Cyrillic YUSCII)" #define HB_CP_UNITB HB_UNITB_646YUC #define HB_CP_ACSORT HB_CDP_ACSORT_NONE -#define HB_CP_UPPER "АБЦЧЋДЂЕФГХИЈКЛМНОПЉРСШТУВЊЏЅЗЖ" -#define HB_CP_LOWER "абцчћдђефгхијклмнопљрсштувњџѕзж" +#include "l_sr_cyr.c" #define HB_CP_UTF8 /* include CP registration code */ diff --git a/harbour/src/codepage/cpsrwin.c b/harbour/src/codepage/cpsrwin.c index 96e708940a..8dc07594d7 100644 --- a/harbour/src/codepage/cpsrwin.c +++ b/harbour/src/codepage/cpsrwin.c @@ -55,8 +55,7 @@ #define HB_CP_INFO "Serbian Windows-1251" #define HB_CP_UNITB HB_UNITB_1251 #define HB_CP_ACSORT HB_CDP_ACSORT_NONE -#define HB_CP_UPPER "АБВГДЂЕЖЗИЈКЛЉМНЊОПРСТЋУФХЦЧЏШ" -#define HB_CP_LOWER "абвгдђежзијклљмнњопрстћуфхцчџш" +#include "l_sr_cyr.c" #define HB_CP_UTF8 /* include CP registration code */ diff --git a/harbour/src/codepage/cpua866.c b/harbour/src/codepage/cpua866.c index e8ed0bd1f8..a62deb7b3f 100644 --- a/harbour/src/codepage/cpua866.c +++ b/harbour/src/codepage/cpua866.c @@ -54,8 +54,7 @@ #define HB_CP_INFO "Ukrainian CP-866" #define HB_CP_UNITB HB_UNITB_866 #define HB_CP_ACSORT HB_CDP_ACSORT_NONE -#define HB_CP_UPPER "АБВГДЕЁЄЖЗИIЇЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯ" -#define HB_CP_LOWER "абвгдеёєжзиiїйклмнопрстуфхцчшщъыьэюя" +#include "l_uk.c" #define HB_CP_UTF8 /* include CP registration code */ diff --git a/harbour/src/codepage/l_sr_cyr.c b/harbour/src/codepage/l_sr_cyr.c new file mode 100644 index 0000000000..2a4392f773 --- /dev/null +++ b/harbour/src/codepage/l_sr_cyr.c @@ -0,0 +1,8 @@ +/* + * $Id$ + */ + +/* Przemyslaw Czerpak */ + +#define HB_CP_UPPER "АБЦЧЋДЂЕФГХИЈКЛМНОПЉРСШТУВЊЏЅЗЖ" +#define HB_CP_LOWER "абцчћдђефгхијклмнопљрсштувњџѕзж" diff --git a/harbour/src/codepage/l_sr_lat.c b/harbour/src/codepage/l_sr_lat.c new file mode 100644 index 0000000000..e33854d74c --- /dev/null +++ b/harbour/src/codepage/l_sr_lat.c @@ -0,0 +1,8 @@ +/* + * $Id$ + */ + +/* Przemyslaw Czerpak */ + +#define HB_CP_UPPER "ABCČĆDĐEFGHIJKLMNOPQRSŠTUVWXYZŽ" +#define HB_CP_LOWER "abcčćdđefghijklmnopqrsštuvwxyzž"