2013-01-30 19:48 UTC+0100 Viktor Szakats (harbour syenar.net)

+ src/codepage/l_sr_cyr.c + src/codepage/l_sr_lat.c * src/codepage/cpsr646.c * src/codepage/cpsr646c.c * separated collations from the two correct SR CP modules ; TOFIX: ? This page suggests that there are latin digraphs that should be specially sorted: https://en.wikipedia.org/wiki/Serbo-Croatian#Writing_systems * src/codepage/cpsrwin.c * changed to utilize the standard Serbian cyrillic collation to the same used by SR646C CP module. Old one seemed quite wrong though I'm not even remotely expert in Serbian. [INCOMPATIBLE] If you use "SRWIN" for indexing, make sure to reindex ; Verify me * src/codepage/cpua866.c * changed to utilize the standard UK (Ukrainian) collation to the same used by all other Ukrainian CP modules. The old one missed the characters: U+0490 (UPPER) - http://codepoints.net/U+0490 U+0491 (LOWER) - http://codepoints.net/U+0491 According to this page, these two chars are part of the Ukrainian alphabet: https://en.wikipedia.org/wiki/Ukrainian_language#Alphabet ; TOFIX: RUISO: This has 4 extra character pairs compared to all other Russian CP modules: UPPER: U+0401 - http://codepoints.net/U+0401 (Russian alphabet) U+0404 - http://codepoints.net/U+0404 (Ukrainian alphabet) U+0407 - http://codepoints.net/U+0407 (Ukrainian alphabet) U+040E - http://codepoints.net/U+040E (Belarusian alphabet) LOWER: U+0451 - http://codepoints.net/U+0451 (Russian alphabet) U+0454 - http://codepoints.net/U+0454 (Ukrainian alphabet) U+0457 - http://codepoints.net/U+0457 (Ukrainian alphabet) U+045E - http://codepoints.net/U+045E (Belarusian alphabet) From the above I surmise that it'd be better if above chars would be part of std russian collation, though neither I'm an expert nor I'm sure that putting them to the end of the collation does anything good, in which latter case, it'd be better be removed from RUISO. For sure though that U+401/U+0451 should be added to std collation in l_ru.c. Any comments from Russian-breathing Harbourers?
2013-01-30 19:14:54 +00:00
parent 4cccb2bd26
commit 61f7a12fe2
7 changed files with 73 additions and 11 deletions
--- a/harbour/ChangeLog.txt
+++ b/harbour/ChangeLog.txt
@@ -10,6 +10,56 @@
     * Change, ! Fix, % Optimization, + Addition, - Removal, ; Comment
 */

+2013-01-30 19:48 UTC+0100 Viktor Szakats (harbour syenar.net)
+  + src/codepage/l_sr_cyr.c
+  + src/codepage/l_sr_lat.c
+  * src/codepage/cpsr646.c
+  * src/codepage/cpsr646c.c
+    * separated collations from the two correct SR CP modules
+    ; TOFIX: ? This page suggests that there are latin digraphs
+              that should be specially sorted:
+                 https://en.wikipedia.org/wiki/Serbo-Croatian#Writing_systems
+
+  * src/codepage/cpsrwin.c
+    * changed to utilize the standard Serbian cyrillic collation to
+      the same used by SR646C CP module. Old one seemed
+      quite wrong though I'm not even remotely expert in Serbian.
+      [INCOMPATIBLE]
+      If you use "SRWIN" for indexing, make sure to reindex
+    ; Verify me
+
+  * src/codepage/cpua866.c
+    * changed to utilize the standard UK (Ukrainian) collation to
+      the same used by all other Ukrainian CP modules. The old
+      one missed the characters:
+         U+0490 (UPPER) - http://codepoints.net/U+0490
+         U+0491 (LOWER) - http://codepoints.net/U+0491
+      According to this page, these two chars are part of the
+      Ukrainian alphabet:
+         https://en.wikipedia.org/wiki/Ukrainian_language#Alphabet
+
+  ; TOFIX: RUISO:
+           This has 4 extra character pairs compared to all
+           other Russian CP modules:
+              UPPER:
+                 U+0401 - http://codepoints.net/U+0401 (Russian alphabet)
+                 U+0404 - http://codepoints.net/U+0404 (Ukrainian alphabet)
+                 U+0407 - http://codepoints.net/U+0407 (Ukrainian alphabet)
+                 U+040E - http://codepoints.net/U+040E (Belarusian alphabet)
+              LOWER:
+                 U+0451 - http://codepoints.net/U+0451 (Russian alphabet)
+                 U+0454 - http://codepoints.net/U+0454 (Ukrainian alphabet)
+                 U+0457 - http://codepoints.net/U+0457 (Ukrainian alphabet)
+                 U+045E - http://codepoints.net/U+045E (Belarusian alphabet)
+           From the above I surmise that it'd be better if
+           above chars would be part of std russian collation,
+           though neither I'm an expert nor I'm sure that putting
+           them to the end of the collation does anything good,
+           in which latter case, it'd be better be removed from RUISO.
+           For sure though that U+401/U+0451 should be added to std
+           collation in l_ru.c.
+           Any comments from Russian-breathing Harbourers?
+
 2013-01-30 18:24 UTC+0100 Viktor Szakats (harbour syenar.net)
  * doc/en/lang.txt
  * include/hbapilng.h
@@ -93,9 +143,9 @@
    ; TOFIX: Here's the list of "CP" modules, that use irregular, but
             not 'raw' collations, that can't be explained with
             compatibility or other obvious reasons:
-               SRWIN - is this 'sr_cyr' or 'sr_lat', or else?
-               RUISO - why has this 4 extra chars at the end compared to std ru collation?
-               UA866 - why is it missing an accented version of a char compared to std ua collation?
+               SRWIN - is this 'sr_cyr' or 'sr_lat', or else? [CYRILLIC] [PATCHED]
+               RUISO - why has this 4 extra chars at the end compared to std ru collation? [MOVED]
+               UA866 - why is it missing an accented version of a char compared to std ua collation? [PATCHED]

  * src/codepage/cpbg866.c
  * src/codepage/cpbgiso.c
--- a/harbour/src/codepage/cpsr646.c
+++ b/harbour/src/codepage/cpsr646.c
@@ -54,8 +54,7 @@
 #define HB_CP_INFO      "Serbian ISO-646 (YUSCII)"
 #define HB_CP_UNITB     HB_UNITB_646YU
 #define HB_CP_ACSORT    HB_CDP_ACSORT_NONE
-#define HB_CP_UPPER     "ABCČĆDĐEFGHIJKLMNOPQRSŠTUVWXYZŽ"
-#define HB_CP_LOWER     "abcčćdđefghijklmnopqrsštuvwxyzž"
+#include "l_sr_lat.c"
 #define HB_CP_UTF8

 /* include CP registration code */
--- a/harbour/src/codepage/cpsr646c.c
+++ b/harbour/src/codepage/cpsr646c.c
@@ -54,8 +54,7 @@
 #define HB_CP_INFO      "Serbian ISO-646C (Cyrillic YUSCII)"
 #define HB_CP_UNITB     HB_UNITB_646YUC
 #define HB_CP_ACSORT    HB_CDP_ACSORT_NONE
-#define HB_CP_UPPER     "АБЦЧЋДЂЕФГХИЈКЛМНОПЉРСШТУВЊЏЅЗЖ"
-#define HB_CP_LOWER     "абцчћдђефгхијклмнопљрсштувњџѕзж"
+#include "l_sr_cyr.c"
 #define HB_CP_UTF8

 /* include CP registration code */
--- a/harbour/src/codepage/cpsrwin.c
+++ b/harbour/src/codepage/cpsrwin.c
@@ -55,8 +55,7 @@
 #define HB_CP_INFO      "Serbian Windows-1251"
 #define HB_CP_UNITB     HB_UNITB_1251
 #define HB_CP_ACSORT    HB_CDP_ACSORT_NONE
-#define HB_CP_UPPER     "АБВГДЂЕЖЗИЈКЛЉМНЊОПРСТЋУФХЦЧЏШ"
-#define HB_CP_LOWER     "абвгдђежзијклљмнњопрстћуфхцчџш"
+#include "l_sr_cyr.c"
 #define HB_CP_UTF8

 /* include CP registration code */
--- a/harbour/src/codepage/cpua866.c
+++ b/harbour/src/codepage/cpua866.c
@@ -54,8 +54,7 @@
 #define HB_CP_INFO      "Ukrainian CP-866"
 #define HB_CP_UNITB     HB_UNITB_866
 #define HB_CP_ACSORT    HB_CDP_ACSORT_NONE
-#define HB_CP_UPPER     "АБВГДЕЁЄЖЗИIЇЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯ"
-#define HB_CP_LOWER     "абвгдеёєжзиiїйклмнопрстуфхцчшщъыьэюя"
+#include "l_uk.c"
 #define HB_CP_UTF8

 /* include CP registration code */
--- a/harbour/src/codepage/l_sr_cyr.c
+++ b/harbour/src/codepage/l_sr_cyr.c
@@ -0,0 +1,8 @@
+/*
+ * $Id$
+ */
+
+/* Przemyslaw Czerpak <druzus / at / priv.onet.pl> */
+
+#define HB_CP_UPPER     "АБЦЧЋДЂЕФГХИЈКЛМНОПЉРСШТУВЊЏЅЗЖ"
+#define HB_CP_LOWER     "абцчћдђефгхијклмнопљрсштувњџѕзж"
--- a/harbour/src/codepage/l_sr_lat.c
+++ b/harbour/src/codepage/l_sr_lat.c
@@ -0,0 +1,8 @@
+/*
+ * $Id$
+ */
+
+/* Przemyslaw Czerpak <druzus / at / priv.onet.pl> */
+
+#define HB_CP_UPPER     "ABCČĆDĐEFGHIJKLMNOPQRSŠTUVWXYZŽ"
+#define HB_CP_LOWER     "abcčćdđefghijklmnopqrsštuvwxyzž"