From 3a71036691c31effb41f3c00710f6477c42b3d09 Mon Sep 17 00:00:00 2001 From: Przemyslaw Czerpak Date: Sun, 5 Dec 2010 00:04:47 +0000 Subject: [PATCH] 2010-12-05 01:04 UTC+0100 Przemyslaw Czerpak (druzus/at/priv.onet.pl) * harbour/include/hbapicdp.h * harbour/include/hbcdpreg.h * harbour/src/rtl/cdpapi.c * harbour/tests/cpinfo.prg + added support to define CPs using different letter case sorting in human readable form. Now it's possible to use: // uppers before lowers: ABCDE...abcde... #define HB_CP_CSSORT HB_CDP_CSSORT_UPLO // uppers and lowers are mixed: AaBbCcDdEe.... #define HB_CP_CSSORT HB_CDP_CSSORT_MIXED // ignore case #define HB_CP_CSSORT HB_CDP_CSSORT_IGNORE --- harbour/ChangeLog | 14 ++++++++++ harbour/include/hbapicdp.h | 10 +++++++- harbour/include/hbcdpreg.h | 5 +++- harbour/src/rtl/cdpapi.c | 52 ++++++++++++++++++++++++++++++-------- harbour/tests/cpinfo.prg | 50 ++++++++++++++++++++++++------------ 5 files changed, 103 insertions(+), 28 deletions(-) diff --git a/harbour/ChangeLog b/harbour/ChangeLog index 145b17b710..ac6b5ddef9 100644 --- a/harbour/ChangeLog +++ b/harbour/ChangeLog @@ -16,6 +16,20 @@ The license applies to all entries newer than 2009-04-28. */ +2010-12-05 01:04 UTC+0100 Przemyslaw Czerpak (druzus/at/priv.onet.pl) + * harbour/include/hbapicdp.h + * harbour/include/hbcdpreg.h + * harbour/src/rtl/cdpapi.c + * harbour/tests/cpinfo.prg + + added support to define CPs using different letter case sorting + in human readable form. Now it's possible to use: + // uppers before lowers: ABCDE...abcde... + #define HB_CP_CSSORT HB_CDP_CSSORT_UPLO + // uppers and lowers are mixed: AaBbCcDdEe.... + #define HB_CP_CSSORT HB_CDP_CSSORT_MIXED + // ignore case + #define HB_CP_CSSORT HB_CDP_CSSORT_IGNORE + 2010-12-04 19:32 UTC+0100 Przemyslaw Czerpak (druzus/at/priv.onet.pl) * harbour/include/dbinfo.ch * harbour/src/rdd/dbfntx/dbfntx1.c diff --git a/harbour/include/hbapicdp.h b/harbour/include/hbapicdp.h index 118f31f7cf..1366603a20 100644 --- a/harbour/include/hbapicdp.h +++ b/harbour/include/hbapicdp.h @@ -324,6 +324,13 @@ extern HB_EXPORT void hb_vmSetCDP( PHB_CODEPAGE pCDP ); characters being compared are the same ( interleaving ) */ +/* letter case sensitive sorting */ +#define HB_CDP_CSSORT_UPLO 0 /* upper letters first then lower + ones */ +#define HB_CDP_CSSORT_MIXED 1 /* upper and lower letters are + mixed */ +#define HB_CDP_CSSORT_IGNORE 2 /* ignore case */ + /* byte order */ #define HB_CDP_ENDIAN_NATIVE 0 #define HB_CDP_ENDIAN_LITTLE 1 @@ -335,7 +342,8 @@ extern HB_EXPORT HB_BOOL hb_cdpRegisterNew( const char * id, PHB_UNITABLE uniTable, const char * pszUpper, const char * pszLower, - unsigned int nACSort ); + unsigned int nACSort, + unsigned int nCaseSort ); extern HB_EXPORT void hb_cdpReleaseAll( void ); extern HB_EXPORT const char * hb_cdpID( void ); extern HB_EXPORT PHB_CODEPAGE hb_cdpSelect( PHB_CODEPAGE cdp ); diff --git a/harbour/include/hbcdpreg.h b/harbour/include/hbcdpreg.h index e4fe2292d6..df468b567b 100644 --- a/harbour/include/hbcdpreg.h +++ b/harbour/include/hbcdpreg.h @@ -80,8 +80,11 @@ HB_CALL_ON_STARTUP_BEGIN( HB_MACRONAME_JOIN( _hb_codepage_Init_, HB_CP_ID ) ) }; hb_cdpRegisterRaw( &s_codePage ); #else + #ifndef HB_CP_CSSORT + #define HB_CP_CSSORT HB_CDP_CSSORT_UPLO + #endif hb_cdpRegisterNew( HB_MACRO2STRING( HB_CP_ID ), HB_CP_INFO, HB_CP_UNITB, - HB_CP_UPPER, HB_CP_LOWER, HB_CP_ACSORT ); + HB_CP_UPPER, HB_CP_LOWER, HB_CP_ACSORT, HB_CP_CSSORT ); #endif /* HB_CP_RAW */ #if defined( HB_PRAGMA_STARTUP ) diff --git a/harbour/src/rtl/cdpapi.c b/harbour/src/rtl/cdpapi.c index 1de7cf03da..607abd1f65 100644 --- a/harbour/src/rtl/cdpapi.c +++ b/harbour/src/rtl/cdpapi.c @@ -1783,7 +1783,8 @@ static PHB_CODEPAGE hb_buildCodePage( const char * id, const char * info, PHB_UNITABLE uniTable, const char * pszUpper, const char * pszLower, - unsigned int nACSort ) + unsigned int nACSort, + unsigned int nCaseSort ) { HB_BOOL lSort, fError; int iMulti, iAcc, iAccUp, iAccLo, iSortUp, iSortLo, i; @@ -1880,10 +1881,10 @@ static PHB_CODEPAGE hb_buildCodePage( const char * id, const char * info, if( ucUp != ' ' ) { used[ ucUp ] = 1; - ++iSortLo; if( ucUp < ucUp2 ) lSort = HB_TRUE; ucUp2 = ucUp; + ++iSortLo; } if( ucLo != ' ' ) { @@ -1897,7 +1898,8 @@ static PHB_CODEPAGE hb_buildCodePage( const char * id, const char * info, if( iMulti > 64 ) fError = HB_TRUE; - if( fError || nACSort > HB_CDP_ACSORT_INTERLEAVED ) + if( fError || nACSort > HB_CDP_ACSORT_INTERLEAVED || + nCaseSort > HB_CDP_CSSORT_IGNORE ) { #ifdef __HB_IGNORE_CP_ERRORS return NULL; @@ -1911,6 +1913,9 @@ static PHB_CODEPAGE hb_buildCodePage( const char * id, const char * info, else if( nACSort != HB_CDP_ACSORT_NONE ) lSort = HB_TRUE; + if( nCaseSort != HB_CDP_CSSORT_UPLO ) + lSort = HB_TRUE; + nSize = 0x300; if( lSort ) { @@ -1993,7 +1998,14 @@ static PHB_CODEPAGE hb_buildCodePage( const char * id, const char * info, { flags[ ( HB_UCHAR ) multi->cFirst[ 1 ] ] |= HB_CDP_MULTI1; flags[ ( HB_UCHAR ) multi->cLast [ 1 ] ] |= HB_CDP_MULTI2; - multi->sortLo = ++iSortLo - iAccLo; + + if( nCaseSort == HB_CDP_CSSORT_UPLO ) + ++iSortLo; + else if( nCaseSort == HB_CDP_CSSORT_MIXED ) + iSortLo = ++iSortUp; + else + iSortLo = iSortUp; + multi->sortLo = iSortLo - iAccLo; } if( *pup == '=' ) { @@ -2070,9 +2082,24 @@ static PHB_CODEPAGE hb_buildCodePage( const char * id, const char * info, { if( sort[ ucLo ] == 0 ) { - if( iAcc && nACSort != HB_CDP_ACSORT_NONE ) - ++iAccLo; - sort[ ucLo ] = ( HB_UCHAR ) ( ++iSortLo - iAccLo ); + if( nCaseSort == HB_CDP_CSSORT_UPLO ) + { + if( iAcc && nACSort != HB_CDP_ACSORT_NONE ) + ++iAccLo; + ++iSortLo; + } + else + { + if( nCaseSort == HB_CDP_CSSORT_MIXED ) + { + if( iAcc && nACSort != HB_CDP_ACSORT_NONE ) + ++iAccUp; + iSortLo = ++iSortUp; + } + iAccLo = iAccUp; + iSortLo = iSortUp; + } + sort[ ucLo ] = ( HB_UCHAR ) ( iSortLo - iAccLo ); if( acc ) acc[ ucLo ] = ( HB_UCHAR ) iSortLo; if( ucLo2 > ucLo ) @@ -2107,6 +2134,9 @@ static PHB_CODEPAGE hb_buildCodePage( const char * id, const char * info, else iMulti = 1; + if( nCaseSort != HB_CDP_CSSORT_UPLO ) + ucLo2 = 0; + for( iUp = iLo = 0, i = iMulti; i < 256; ++i ) { if( sort[ i ] == 0 ) @@ -2210,16 +2240,18 @@ HB_BOOL hb_cdpRegisterRaw( PHB_CODEPAGE cdp ) HB_BOOL hb_cdpRegisterNew( const char * id, const char * info, PHB_UNITABLE uniTable, const char * pszUpper, const char * pszLower, - unsigned int nACSort ) + unsigned int nACSort, + unsigned int nCaseSort ) { PHB_CODEPAGE * cdp_ptr; - HB_TRACE( HB_TR_DEBUG, ( "hb_cdpRegisterNew(%s,%s,%s,%s,%d)", id, info, pszUpper, pszLower, nACSort ) ); + HB_TRACE( HB_TR_DEBUG, ( "hb_cdpRegisterNew(%s,%s,%s,%s,%u,%u)", id, info, pszUpper, pszLower, nACSort, nCaseSort ) ); cdp_ptr = hb_cdpFindPos( id ); if( *cdp_ptr == NULL ) { - *cdp_ptr = hb_buildCodePage( id, info, uniTable, pszUpper, pszLower, nACSort ); + *cdp_ptr = hb_buildCodePage( id, info, uniTable, pszUpper, pszLower, + nACSort, nCaseSort ); return *cdp_ptr != NULL; } return HB_FALSE; diff --git a/harbour/tests/cpinfo.prg b/harbour/tests/cpinfo.prg index d6dc21425a..cfca60d13d 100644 --- a/harbour/tests/cpinfo.prg +++ b/harbour/tests/cpinfo.prg @@ -18,8 +18,8 @@ #include "fileio.ch" proc main( cdp, info, unicode ) - local cUp, cLo, cUp2, cLo2, cOrd, cOrd2, c, i, a - local lWarn, lBin, lSort, lEqual + local cUp, cLo, cUp2, cLo2, cOrd, cOrd2, cOrdMix, cMix, c, i, a + local lWarn, lBin, lSort, lEqual, lMixed set alternate to cpinfo.txt additive set alternate on @@ -29,7 +29,7 @@ proc main( cdp, info, unicode ) /* for test */ set( _SET_CODEPAGE, iif( empty( cdp ), "PLMAZ", upper( cdp ) ) ) hb_setTermCP( set( _SET_CODEPAGE ), set( _SET_CODEPAGE ) ) - lEqual := .t. + lEqual := .f. #else lEqual := .f. #endif @@ -57,7 +57,7 @@ proc main( cdp, info, unicode ) if !lSort ? "simple byte sorting !!!" endif - lBin := lWarn := .f. + lBin := lWarn := lMixed := .f. cUp := cLo := cOrd := "" for i := 1 to len( a ) if i < len(a) .and. a[i] > a[ i + 1 ] .and. !isalpha( chr( a[ i ] ) ) @@ -155,20 +155,31 @@ proc main( cdp, info, unicode ) lBin := lWarn := .t. endif next + cMix := "" if ! len( cUp ) == len( cLo ) ? "number of upper and lower characters is different" lWarn := .t. + else + for i := 1 to len( cUp ) + cMix += substr( cUp, i, 1 ) + cMix += substr( cLo, i, 1 ) + next endif - cOrd2 := "" + cOrd2 := cOrdMix := "" for i := 0 to 255 - if i == asc( cUp ) - cOrd2 += cUp - elseif i == asc( cLo ) - cOrd2 += cLo + if i == asc( cUp ) .or. i == asc( cLo ) + if i == asc( cUp ) + cOrd2 += cUp + else + cOrd2 += cLo + endif + cOrdMix += cMix + cMix := "" endif c := chr( i ) if ! c $ cUp .and. ! c $ cLo - cOrd2 += chr( i ) + cOrd2 += c + cOrdMix += c endif next if ! cOrd == cOrd2 @@ -220,8 +231,11 @@ proc main( cdp, info, unicode ) cLo := cLo2 endif endif + elseif cOrd == cOrdMix + ? "letters case are mixed" + lMixed := .t. endif - if ! cOrd == cOrd2 .and. lSort + if ! cOrd == cOrd2 .and. lSort .and. !lMixed ? "letters are not sorted continuously" lBin := lWarn := .t. endif @@ -246,7 +260,7 @@ proc main( cdp, info, unicode ) if !empty( cdp ) write_file( "cp" + lower( cdp ) + ".c", ; - genCP( cdp, info, unicode, lBin, lWarn, cUp, cLo ) ) + genCP( cdp, info, unicode, lBin, lWarn, lMixed, cUp, cLo ) ) endif return @@ -334,7 +348,7 @@ static function write_file( cName, cBody ) return lRet -static function genCP( id, info, unicode, lBin, lWarn, cUp, cLo ) +static function genCP( id, info, unicode, lBin, lWarn, lMixed, cUp, cLo ) local flags[ 256 ], upper[ 256 ], lower[ 256 ], sort[ 256 ], tmp[ 256 ] local i, c @@ -386,10 +400,10 @@ static function genCP( id, info, unicode, lBin, lWarn, cUp, cLo ) next return genCPfile( id, info, unicode, flags, upper, lower, sort, ; - lBin, lWarn, cUp, cLo ) + lBin, lWarn, lMixed, cUp, cLo ) static function genCPfile( id, info, unicode, flags, upper, lower, sort, ; - lBin, lWarn, cUp, cLo ) + lBin, lWarn, lMixed, cUp, cLo ) local cDef cDef := ; @@ -408,7 +422,11 @@ static function genCPfile( id, info, unicode, flags, upper, lower, sort, ; '#define HB_CP_UNITB HB_UNITB_$3' + EOL if !lBin cDef += ; - '#define HB_CP_ACSORT HB_CDP_ACSORT_NONE' + EOL + ; + '#define HB_CP_ACSORT HB_CDP_ACSORT_NONE' + EOL + if lMixed + cDef += '#define HB_CP_CSSORT HB_CDP_CSSORT_MIXED' + EOL + endif + cDef += ; '#define HB_CP_UPPER "' + cUp + '"' + EOL + ; '#define HB_CP_LOWER "' + cLo + '"' + EOL + ; EOL