From 6f27c78569942a161a9e021e042c87c4bb32689d Mon Sep 17 00:00:00 2001 From: Przemyslaw Czerpak Date: Tue, 8 May 2012 10:28:46 +0000 Subject: [PATCH] 2012-05-08 12:28 UTC+0200 Przemyslaw Czerpak (druzus/at/poczta.onet.pl) * harbour/src/common/expropt2.c * small code simplification * harbour/src/rtl/hbregex.c % eliminated call to pcre_config() on each regex compilation * harbour/utils/hbmk2/hbmk2.prg ! fixed -head=full for code using non UTF8 characters. It was broken because in UNIX builds UTF8 is always enabled as HVM CP in HBMK2 and this setting was inherited by PCRE with PCRE_UTF8 flag. If this flag is used PCRE validates all strings and refuse to make any operations if they are not valid UTF8 string so it was not working at all for source code using different encoding. ! fixed regex used in -head=full to respect shortcuts in #include directive, i.e.: #incl "file.ch" ! fixed regex used in -head=full to recognize #include directives separated by ';', i.e.: #include "file1.ch" ; #include "file2.ch" ; #include "file3.ch" ; TOFIX: UTF8 mode on output should not be enabled unconditionally in all UNIX builds. There are many *nix installations where UTF8 is not system CP, i.e. older Linux distributions. In fact it's user attribute so each user can use different encoding local to his connection and/or terminal settings. Probably it's good idea to use code like: lUTF8 := "UTF-8" $ GetEnv( "LANG" ) .OR. ; "UTF-8" $ GetEnv( "LC_CTYPE" ) --- harbour/ChangeLog | 30 ++++++++++++++++++++++++++++++ harbour/src/common/expropt2.c | 18 ++++++------------ harbour/src/rtl/hbregex.c | 30 ++++++++++++++---------------- harbour/utils/hbmk2/hbmk2.prg | 10 ++++++++-- 4 files changed, 58 insertions(+), 30 deletions(-) diff --git a/harbour/ChangeLog b/harbour/ChangeLog index cfc79e7330..3a9fa203b0 100644 --- a/harbour/ChangeLog +++ b/harbour/ChangeLog @@ -16,6 +16,36 @@ The license applies to all entries newer than 2009-04-28. */ +2012-05-08 12:28 UTC+0200 Przemyslaw Czerpak (druzus/at/poczta.onet.pl) + * harbour/src/common/expropt2.c + * small code simplification + + * harbour/src/rtl/hbregex.c + % eliminated call to pcre_config() on each regex compilation + + * harbour/utils/hbmk2/hbmk2.prg + ! fixed -head=full for code using non UTF8 characters. + It was broken because in UNIX builds UTF8 is always enabled + as HVM CP in HBMK2 and this setting was inherited by PCRE + with PCRE_UTF8 flag. If this flag is used PCRE validates + all strings and refuse to make any operations if they are + not valid UTF8 string so it was not working at all for + source code using different encoding. + ! fixed regex used in -head=full to respect shortcuts in #include + directive, i.e.: + #incl "file.ch" + ! fixed regex used in -head=full to recognize #include directives + separated by ';', i.e.: + #include "file1.ch" ; #include "file2.ch" ; #include "file3.ch" + ; TOFIX: UTF8 mode on output should not be enabled unconditionally in + all UNIX builds. There are many *nix installations where UTF8 + is not system CP, i.e. older Linux distributions. In fact + it's user attribute so each user can use different encoding + local to his connection and/or terminal settings. Probably + it's good idea to use code like: + lUTF8 := "UTF-8" $ GetEnv( "LANG" ) .OR. ; + "UTF-8" $ GetEnv( "LC_CTYPE" ) + 2012-05-08 12:26 UTC+0200 Viktor Szakats (harbour syenar.net) * ChangeLog * deleted my EOL spaces diff --git a/harbour/src/common/expropt2.c b/harbour/src/common/expropt2.c index 82f43863a3..5a8c27d960 100644 --- a/harbour/src/common/expropt2.c +++ b/harbour/src/common/expropt2.c @@ -2046,18 +2046,12 @@ HB_BOOL hb_compExprReduceBCHAR( HB_EXPR_PTR pSelf, HB_COMP_DECL ) HB_EXPR_PTR pExpr = HB_COMP_EXPR_NEW( HB_ET_STRING ); pExpr->ValType = HB_EV_STRING; - if( pArg->value.asNum.NumType == HB_ET_LONG ) - { - pExpr->value.asString.string = ( char * ) hb_szAscii[ ( int ) pArg->value.asNum.val.l & 0xff ]; - pExpr->value.asString.dealloc = HB_FALSE; - pExpr->nLength = 1; - } - else - { - pExpr->value.asString.string = ( char * ) hb_szAscii[ ( unsigned int ) pArg->value.asNum.val.d & 0xff ]; - pExpr->value.asString.dealloc = HB_FALSE; - pExpr->nLength = 1; - } + pExpr->value.asString.string = + ( char * ) hb_szAscii[ ( pArg->value.asNum.NumType == HB_ET_LONG ? + ( unsigned int ) pArg->value.asNum.val.l : + ( unsigned int ) pArg->value.asNum.val.d ) & 0xff ]; + pExpr->value.asString.dealloc = HB_FALSE; + pExpr->nLength = 1; HB_COMP_EXPR_FREE( pParms ); HB_COMP_EXPR_FREE( pSelf->value.asFunCall.pFunName ); diff --git a/harbour/src/rtl/hbregex.c b/harbour/src/rtl/hbregex.c index 0f8c176e28..fea09c6cd1 100644 --- a/harbour/src/rtl/hbregex.c +++ b/harbour/src/rtl/hbregex.c @@ -57,6 +57,10 @@ #include "hbapierr.h" #include "hbinit.h" +#if defined( HB_HAS_PCRE ) + static int s_iUTF8Enabled; +#endif + static void hb_regfree( PHB_REGEX pRegEx ) { #if defined( HB_HAS_PCRE ) @@ -81,15 +85,9 @@ static int hb_regcomp( PHB_REGEX pRegEx, const char * szRegEx ) pRegEx->iEFlags = ( ( pRegEx->iFlags & HBREG_NOTBOL ) ? PCRE_NOTBOL : 0 ) | ( ( pRegEx->iFlags & HBREG_NOTEOL ) ? PCRE_NOTEOL : 0 ); - /* detect UTF-8 support. */ - { - int iUTF8Enabled; - if( pcre_config( PCRE_CONFIG_UTF8, &iUTF8Enabled ) != 0 ) - iUTF8Enabled = 0; - /* use UTF8 in pcre when available and HVM CP is also UTF8. */ - if( iUTF8Enabled && hb_cdpIsUTF8( NULL ) ) - iCFlags |= PCRE_UTF8; - } + /* use UTF8 in pcre when available and HVM CP is also UTF8. */ + if( s_iUTF8Enabled && hb_cdpIsUTF8( NULL ) ) + iCFlags |= PCRE_UTF8; pRegEx->re_pcre = pcre_compile( szRegEx, iCFlags, &szError, &iErrOffset, pCharTable ); @@ -562,13 +560,13 @@ static void hb_pcre_free( void * ptr ) HB_CALL_ON_STARTUP_BEGIN( _hb_regex_init_ ) #if defined( HB_HAS_PCRE ) - /* Hack to force linking newer PCRE versions not the one included in BCC RTL */ -# if defined( __BORLANDC__ ) - { - int iUTF8Enabled; - pcre_config( PCRE_CONFIG_UTF8, &iUTF8Enabled ); - } -# endif + /* detect UTF-8 support. + * In BCC builds this code also forces linking newer PCRE versions + * then the one included in BCC RTL. + */ + if( pcre_config( PCRE_CONFIG_UTF8, &s_iUTF8Enabled ) != 0 ) + s_iUTF8Enabled = 0; + pcre_malloc = hb_pcre_grab; pcre_free = hb_pcre_free; pcre_stack_malloc = hb_pcre_grab; diff --git a/harbour/utils/hbmk2/hbmk2.prg b/harbour/utils/hbmk2/hbmk2.prg index d6ae9c30a0..c8d3eedf6e 100644 --- a/harbour/utils/hbmk2/hbmk2.prg +++ b/harbour/utils/hbmk2/hbmk2.prg @@ -7559,9 +7559,15 @@ STATIC FUNCTION s_getIncludedFiles( hbmk, cFile, cParentDir, lCMode ) http://www.pcre.org/pcre.txt */ IF s_pRegexInclude == NIL - s_pRegexInclude := hb_regexComp( '^[[:blank:]]*#[[:blank:]]*(include|import)[[:blank:]]*(\".+?\"|<.+?>'+"|'.+?'|`.+?'"+')',; + /* Switch to non UTF8 CP - otherwise PCRE fails on user files + * containing non UTF8 characters. For this expression we do + * not need UTF8 or any other fixed encoding. + */ + tmp := hb_cdpSelect( "EN" ) + s_pRegexInclude := hb_regexComp( '(^|;)[[:blank:]]*#[[:blank:]]*(incl|inclu|includ|include|import)[[:blank:]]*(\".+?\"|<.+?>'+"|['`].+?'"+')',; .F. /* lCaseSensitive */,; .T. /* lNewLine */ ) + hb_cdpSelect( tmp ) IF Empty( s_pRegexInclude ) hbmk_OutErr( hbmk, I_( "Internal Error: Regular expression engine missing or unsupported. Check your Harbour build settings." ) ) s_pRegexInclude := 0 /* To show the error only once by setting to non-NIL empty value */ @@ -7579,7 +7585,7 @@ STATIC FUNCTION s_getIncludedFiles( hbmk, cFile, cParentDir, lCMode ) NIL /* lNewLine */, NIL, ; NIL /* nGetMatch */, ; .T. /* lOnlyMatch */ ) - cHeader := tmp[ 3 ] /* First match marker */ + cHeader := atail( tmp ) /* Last group in match marker */ lSystemHeader := ( Left( cHeader, 1 ) == "<" ) cHeader := SubStr( cHeader, 2, Len( cHeader ) - 2 )