Files
harbour-core/tests/cpinfo.prg
Viktor Szakats cef75de547 2016-01-15 15:20 UTC+0100 Viktor Szakats (vszakats users.noreply.github.com)
* *
    % remove brandings and homepage from copyright header. Pass 3 - manual.
    * project homepage and name is described in README, amongst others
    ; this should make the diff between 3.4 and 3.2 easier to manage
2016-01-15 15:21:27 +01:00

478 lines
14 KiB
Plaintext

/*
* Simple program to generate information for Harbour CP module definition.
* Compile it with Clipper and link with given national sorting module
* (usually ntx*.obj) and then execute to generate letters strings for
* given national sorting module. Then use this string to define Harbour
* CP module in src/codepage/ directory.
*
* Copyright 2009 Przemyslaw Czerpak <druzus / at / priv.onet.pl>
*
*/
#include "fileio.ch"
proc main( cdp, info, unicode )
local cUp, cLo, cUp2, cLo2, cOrd, cOrd2, cOrdMix, cMix, c, i, a
local lWarn, lBin, lSort, lEqual, lMixed, lIsUp, lIsLo
set alternate to cpinfo.txt additive
set alternate on
#ifdef __HARBOUR__
/* for test */
set( _SET_CODEPAGE, iif( empty( cdp ), "PLMAZ", upper( cdp ) ) )
hb_setTermCP( set( _SET_CODEPAGE ), set( _SET_CODEPAGE ) )
lEqual := .f.
#else
lEqual := .f.
#endif
a := array( 256 )
for i := 1 to len( a )
a[ i ] := i - 1
next
asort( a,,, {| x, y | chr( x ) + chr( 0 ) < chr( y ) + chr( 0 ) } )
? date(), time(), os(), version()
#ifdef __HARBOUR__
? "Character encoding: " + Set( _SET_CODEPAGE )
#else
? "Character encoding: " + _natSortVersion()
#endif
? replicate( "=", 50 )
lSort := .f.
for i := 1 to len( a ) - 1
if a[ i ] > a[ i + 1 ]
lSort := .t.
exit
endif
next
if ! lSort
? "simple byte sorting !!!"
endif
lBin := lWarn := lMixed := .f.
cUp := cLo := cOrd := ""
for i := 1 to len( a )
if i < len(a) .and. a[i] > a[ i + 1 ] .and. ! isalpha( chr( a[ i ] ) )
? "non alpha character " + charval( chr( a[ i ] ) ) + ;
" sorted in non ASCII order !!!"
lBin := lWarn := .t.
endif
c := chr( a[ i ] )
if i < len(a)
if c + chr( 0 ) > chr( a[ i + 1 ] ) + chr( 0 )
? "character " + charis( c ) + " is wrongly sorted"
lBin := lWarn := .t.
elseif ! lEqual .and. c + chr( 0 ) = chr( a[ i + 1 ] ) + chr( 0 )
? "character " + charis( c ) + " and " + chr( a[ i + 1 ] ) + ;
" have the same weight"
lWarn := .t.
endif
endif
cOrd += c
if isdigit( c )
if asc( c ) < asc( "0" ) .or. asc( c ) > asc( "9" )
? "character " + charis( c ) + " defined as digit"
lBin := lWarn := .t.
endif
elseif asc( c ) >= asc( "0" ) .and. asc( c ) <= asc( "9" )
? "character " + charis( c ) + " is not defined as digit"
lBin := lWarn := .t.
endif
if isalpha( c )
if isupper( c )
cUp += c
if islower( c )
? "character " + charis( c ) + " defined as upper and lower"
lWarn := .t.
endif
if lower( c ) == c
? "character " + charis( c ) + ;
" is the same as upper and lower"
lWarn := .t.
elseif ! islower( lower( c ) )
? "character " + charis( c ) + ;
" has lower character " + charis( lower( c ) ) + ;
" not marked as lower"
lBin := lWarn := .t.
endif
elseif islower( c )
cLo += c
if isupper( c )
? "character " + charis( c ) + " defined as upper and lower"
lWarn := .t.
endif
if upper( c ) == c
? "character " + charis( c ) + ;
" is the same as upper and lower"
lWarn := .t.
elseif ! isupper( upper( c ) )
? "character " + charis( c ) + ;
" has upper character " + charis( upper( c ) ) + ;
" not marked as upper"
lBin := lWarn := .t.
endif
else
? "character " + charis( c ) + " not defined as upper or lower"
lBin := lWarn := .t.
endif
else
if islower( c ) .or. isupper( c )
? "wrongly defined character " + ;
charval( c ) + ":" + charinfo( c )
lBin := lWarn := .t.
endif
if ! c == lower( c )
? "non alpha character " + charis( c ) + " has corresponding " + ;
"lower character " + charis( lower( c ) )
lBin := lWarn := .t.
endif
if ! c == upper( c )
? "non alpha character " + charis( c ) + " has corresponding " + ;
"upper character " + charis( upper( c ) )
lBin := lWarn := .t.
endif
endif
next
for i := 1 to len( cUp ) - 1
c := substr( cUp, i, 1 )
if c + chr( 0 ) > substr( cUp, i + 1, 1 ) + chr( 0 )
? "letter " + charis( c ) + " is wrongly sorted"
lBin := lWarn := .t.
endif
next
for i := 1 to len( cLo ) - 1
c := substr( cLo, i, 1 )
if c + chr( 0 ) > substr( cLo, i + 1, 1 ) + chr( 0 )
? "letter " + charis( c ) + " is wrongly sorted"
lBin := lWarn := .t.
endif
next
cMix := ""
if ! len( cUp ) == len( cLo )
? "number of upper and lower characters is different"
lWarn := .t.
else
for i := 1 to len( cUp )
cMix += substr( cUp, i, 1 )
cMix += substr( cLo, i, 1 )
next
endif
cOrd2 := cOrdMix := ""
for i := 0 to 255
if i == asc( cUp ) .or. i == asc( cLo )
if i == asc( cUp )
cOrd2 += cUp
else
cOrd2 += cLo
endif
cOrdMix += cMix
cMix := ""
endif
c := chr( i )
if ! c $ cUp .and. ! c $ cLo
cOrd2 += c
cOrdMix += c
endif
next
if ! cOrd == cOrd2
if cUp = "ABCDEFGHIJKLMNOPQRSTUVWXYZ" .and. ;
cLo = "abcdefghijklmnopqrstuvwxyz"
cUp2 := substr( cUp, 27 )
cLo2 := substr( cLo, 27 )
cOrd2 := ""
lIsUp := lIsLo := .f.
for i := 0 to 255
c := chr( i )
if c $ cUp2
if ! lIsUp
cOrd2 += cUp2
lIsUp := .t.
if lIsLo
cOrd2 += cLo2
endif
endif
elseif c $ cLo2
if ! lIsLo
if lIsUp
cOrd2 += cLo2
endif
lIsLo := .t.
endif
else
cOrd2 += chr( i )
endif
next
if cOrd == cOrd2
cUp := cUp2
cLo := cLo2
else
cOrd2 := ""
for i := 0 to 255
if i == asc( cUp2 )
cOrd2 += cUp2
elseif i == asc( cLo2 )
cOrd2 += cLo2
endif
c := chr( i )
if ! c $ cUp2 .and. ! c $ cLo2
cOrd2 += chr( i )
endif
next
if cOrd == cOrd2
cUp := cUp2
cLo := cLo2
if lSort
? "letters are not sorted continuously" + ;
" (padded to 1-st non latin character)"
lBin := lWarn := .t.
endif
elseif cUp2 >= chr( 127 ) .and. cLo2 >= chr( 127 )
cUp := cUp2
cLo := cLo2
endif
endif
elseif cOrd == cOrdMix
? "letters case are mixed"
lMixed := .t.
endif
if ! cOrd == cOrd2 .and. lSort .and. ! lMixed
? "letters are not sorted continuously"
lBin := lWarn := .t.
endif
endif
if lWarn
if lBin
? "Warning: irregular CP which needs special definition in Harbour"
? " using binary tables generated by this program"
else
? "Warning: irregular CP which needs verification for human"
? " readable definition in Harbour"
endif
endif
? ' upper: "' + cUp + '"'
? ' lower: "' + cLo + '"'
if pad_letters( @cUp, @cLo, @lBin )
? 'HB_CP_UPPER: "' + cUp + '"'
? 'HB_CP_LOWER: "' + cLo + '"'
endif
? replicate( "=", 50 )
?
if ! empty( cdp )
write_file( "cp" + lower( cdp ) + ".c", ;
genCP( cdp, info, unicode, lBin, lWarn, lMixed, cUp, cLo ) )
endif
return
static function pad_letters( cUp, cLo, lBin )
local lRet, cUp2, cLo2, cU, cL, i, j
cUp2 := cLo2 := ""
i := j := 1
while i <= len( cUp ) .or. j <= len( cLo )
cU := substr( cUp, i, 1 )
cL := substr( cLo, j, 1 )
if upper( cL ) == cU .and. lower( cU ) == cL
++i
++j
elseif cL == "" .or. ( ! cU == "" .and. ! islower( lower( cU ) ) )
cL := " "
++i
elseif cU == "" .or. ! isupper( upper( cL ) )
cU := " "
++j
elseif upper( cL ) $ substr( cUp, i + 1 )
cL := lower( cU )
++i
else
cU := upper( cL )
++j
endif
if ! lBin .and. ( cU == " " .or. cU $ cUp2 ) .and. ;
( cL == " " .or. cL $ cLo2 )
lBin := .t.
endif
cUp2 += cU
cLo2 += cL
enddo
lRet := !( cUp == cUp2 .and. cLo == cLo2 )
cUp := cUp2
cLo := cLo2
return lRet
static function charval( c )
return "'" + c + "' (" + ltrim( str( asc( c ) ) ) + ")"
static function charis( c )
return "'" + c + "' (" + ltrim( str( asc( c ) ) ) + ":" + ;
iif( isalpha( c ), "A", " " ) + ;
iif( isupper( c ), "U", " " ) + ;
iif( islower( c ), "L", " " ) + ;
iif( isdigit( c ), "D", " " ) + ")"
static function charinfo( c )
local cInfo
cInfo := "ISALPHA->" + iif( isalpha( c ), "Y", "N" )
cInfo += ", ISUPPER->" + iif( isupper( c ), "Y", "N" )
cInfo += ", ISLOWER->" + iif( islower( c ), "Y", "N" )
cInfo += ", ISDIGIT->" + iif( isdigit( c ), "Y", "N" )
cInfo += ", UPPER->'" + upper( c ) + "'"
cInfo += ", LOWER->'" + lower( c ) + "'"
return cInfo
#ifdef __HARBOUR__
#include "hbextcdp.ch"
#define EOL hb_eol()
#else
#define EOL chr( 13 ) + chr( 10 )
#endif
#define HB_CDP_DIGIT 1
#define HB_CDP_ALPHA 2
#define HB_CDP_LOWER 4
#define HB_CDP_UPPER 8
static function write_file( cName, cBody )
local lRet := .f.
local hFile
hFile := fcreate( cName )
if hFile != F_ERROR
lRet := fwrite( hFile, cBody, len( cBody ) ) == len( cBody )
fclose( hFile )
endif
return lRet
static function genCP( id, info, unicode, lBin, lWarn, lMixed, cUp, cLo )
local flags[ 256 ], upper[ 256 ], lower[ 256 ], sort[ 256 ], tmp[ 256 ]
local i, c
id := upper( id )
if empty( info )
info := _natSortVer()
endif
if empty( unicode )
#ifdef __HARBOUR__
unicode := hb_cdpUniID()
if unicode = "cp"
unicode := substr( unicode, 3 )
elseif unicode = "iso" .or. unicode = "bg-"
unicode := substr( unicode, 4 )
endif
unicode := upper( strtran( unicode, "-", "_" ) )
if unicode = "KAM" .or. unicode = "MAZ" .or. unicode = "MIC"
unicode := left( unicode, 3 )
endif
#else
unicode := "437"
#endif
else
unicode := upper( unicode )
endif
for i := 1 to 256
c := chr( i - 1 )
flags[ i ] := 0
if isdigit( c )
flags[ i ] += HB_CDP_DIGIT
endif
if isalpha( c )
flags[ i ] += HB_CDP_ALPHA
endif
if isupper( c )
flags[ i ] += HB_CDP_UPPER
endif
if islower( c )
flags[ i ] += HB_CDP_LOWER
endif
upper[ i ] := asc( upper( c ) )
lower[ i ] := asc( lower( c ) )
tmp[ i ] := i - 1
next
asort( tmp,,, {| x, y | chr( x ) + chr( 0 ) < chr( y ) + chr( 0 ) } )
for i := 1 to 256
sort[ tmp[ i ] + 1 ] := i - 1
next
return genCPfile( id, info, unicode, flags, upper, lower, sort, ;
lBin, lWarn, lMixed, cUp, cLo )
static function genCPfile( id, info, unicode, flags, upper, lower, sort, ;
lBin, lWarn, lMixed, cUp, cLo )
local cDef
cDef := ;
'/*' + EOL + ;
' * National Collation Support Module ($1)' + EOL + ;
' *' + EOL + ;
' * Copyright 2009 Przemyslaw Czerpak <druzus / at / priv.onet.pl>' + EOL + ;
' *' + EOL + ;
' * This file is generated automatically by cpinfo.prg' + EOL + ;
' */' + EOL + EOL + ;
'#define HB_CP_ID $1' + EOL + ;
'#define HB_CP_INFO "$2"' + EOL + ;
'#define HB_CP_UNITB HB_UNITB_$3' + EOL
if ! lBin
cDef += ;
'#define HB_CP_ACSORT HB_CDP_ACSORT_NONE' + EOL
if lMixed
cDef += '#define HB_CP_CSSORT HB_CDP_CSSORT_MIXED' + EOL
endif
#ifdef __HARBOUR__
cDef += ;
'#define HB_CP_UPPER "' + hb_StrToUTF8( cUp ) + '"' + EOL + ;
'#define HB_CP_LOWER "' + hb_StrToUTF8( cLo ) + '"' + EOL + ;
'#define HB_CP_UTF8' + EOL + ;
EOL
#else
cDef += ;
'#define HB_CP_UPPER "' + cUp + '"' + EOL + ;
'#define HB_CP_LOWER "' + cLo + '"' + EOL + ;
EOL
#endif
if lWarn
cDef += ;
'#if 0 /* TOVERIFY: binary tables */' + EOL
endif
endif
if lBin .or. lWarn
cDef += ;
EOL + ;
'#define HB_CP_RAW' + EOL + EOL + ;
'static const unsigned char s_flags[ 256 ] = { $f };' + EOL + ;
'static const unsigned char s_upper[ 256 ] = { $u };' + EOL + ;
'static const unsigned char s_lower[ 256 ] = { $l };' + EOL + ;
'static const unsigned char s_sort [ 256 ] = { $s };' + EOL + ;
EOL
if ! lBin
cDef += ;
'#endif' + EOL + EOL
endif
endif
cDef += ;
'/* include CP registration code */' + EOL + ;
'#include "hbcdpreg.h"' + EOL
cDef := strtran( cDef, "$f", a2def( flags ) )
cDef := strtran( cDef, "$u", a2def( upper ) )
cDef := strtran( cDef, "$l", a2def( lower ) )
cDef := strtran( cDef, "$s", a2def( sort ) )
cDef := strtran( cDef, "$1", id )
cDef := strtran( cDef, "$2", info )
cDef := strtran( cDef, "$3", unicode )
return cDef
func a2def( a )
local i, cData := ""
for i := 1 to len( a )
cData += iif( i == 1, "", "," ) + ltrim( str( a[ i ] ) )
next
return cData