diff --git a/harbour/ChangeLog b/harbour/ChangeLog index cfc79e7330..3a9fa203b0 100644 --- a/harbour/ChangeLog +++ b/harbour/ChangeLog @@ -16,6 +16,36 @@ The license applies to all entries newer than 2009-04-28. */ +2012-05-08 12:28 UTC+0200 Przemyslaw Czerpak (druzus/at/poczta.onet.pl) + * harbour/src/common/expropt2.c + * small code simplification + + * harbour/src/rtl/hbregex.c + % eliminated call to pcre_config() on each regex compilation + + * harbour/utils/hbmk2/hbmk2.prg + ! fixed -head=full for code using non UTF8 characters. + It was broken because in UNIX builds UTF8 is always enabled + as HVM CP in HBMK2 and this setting was inherited by PCRE + with PCRE_UTF8 flag. If this flag is used PCRE validates + all strings and refuse to make any operations if they are + not valid UTF8 string so it was not working at all for + source code using different encoding. + ! fixed regex used in -head=full to respect shortcuts in #include + directive, i.e.: + #incl "file.ch" + ! fixed regex used in -head=full to recognize #include directives + separated by ';', i.e.: + #include "file1.ch" ; #include "file2.ch" ; #include "file3.ch" + ; TOFIX: UTF8 mode on output should not be enabled unconditionally in + all UNIX builds. There are many *nix installations where UTF8 + is not system CP, i.e. older Linux distributions. In fact + it's user attribute so each user can use different encoding + local to his connection and/or terminal settings. Probably + it's good idea to use code like: + lUTF8 := "UTF-8" $ GetEnv( "LANG" ) .OR. ; + "UTF-8" $ GetEnv( "LC_CTYPE" ) + 2012-05-08 12:26 UTC+0200 Viktor Szakats (harbour syenar.net) * ChangeLog * deleted my EOL spaces diff --git a/harbour/src/common/expropt2.c b/harbour/src/common/expropt2.c index 82f43863a3..5a8c27d960 100644 --- a/harbour/src/common/expropt2.c +++ b/harbour/src/common/expropt2.c @@ -2046,18 +2046,12 @@ HB_BOOL hb_compExprReduceBCHAR( HB_EXPR_PTR pSelf, HB_COMP_DECL ) HB_EXPR_PTR pExpr = HB_COMP_EXPR_NEW( HB_ET_STRING ); pExpr->ValType = HB_EV_STRING; - if( pArg->value.asNum.NumType == HB_ET_LONG ) - { - pExpr->value.asString.string = ( char * ) hb_szAscii[ ( int ) pArg->value.asNum.val.l & 0xff ]; - pExpr->value.asString.dealloc = HB_FALSE; - pExpr->nLength = 1; - } - else - { - pExpr->value.asString.string = ( char * ) hb_szAscii[ ( unsigned int ) pArg->value.asNum.val.d & 0xff ]; - pExpr->value.asString.dealloc = HB_FALSE; - pExpr->nLength = 1; - } + pExpr->value.asString.string = + ( char * ) hb_szAscii[ ( pArg->value.asNum.NumType == HB_ET_LONG ? + ( unsigned int ) pArg->value.asNum.val.l : + ( unsigned int ) pArg->value.asNum.val.d ) & 0xff ]; + pExpr->value.asString.dealloc = HB_FALSE; + pExpr->nLength = 1; HB_COMP_EXPR_FREE( pParms ); HB_COMP_EXPR_FREE( pSelf->value.asFunCall.pFunName ); diff --git a/harbour/src/rtl/hbregex.c b/harbour/src/rtl/hbregex.c index 0f8c176e28..fea09c6cd1 100644 --- a/harbour/src/rtl/hbregex.c +++ b/harbour/src/rtl/hbregex.c @@ -57,6 +57,10 @@ #include "hbapierr.h" #include "hbinit.h" +#if defined( HB_HAS_PCRE ) + static int s_iUTF8Enabled; +#endif + static void hb_regfree( PHB_REGEX pRegEx ) { #if defined( HB_HAS_PCRE ) @@ -81,15 +85,9 @@ static int hb_regcomp( PHB_REGEX pRegEx, const char * szRegEx ) pRegEx->iEFlags = ( ( pRegEx->iFlags & HBREG_NOTBOL ) ? PCRE_NOTBOL : 0 ) | ( ( pRegEx->iFlags & HBREG_NOTEOL ) ? PCRE_NOTEOL : 0 ); - /* detect UTF-8 support. */ - { - int iUTF8Enabled; - if( pcre_config( PCRE_CONFIG_UTF8, &iUTF8Enabled ) != 0 ) - iUTF8Enabled = 0; - /* use UTF8 in pcre when available and HVM CP is also UTF8. */ - if( iUTF8Enabled && hb_cdpIsUTF8( NULL ) ) - iCFlags |= PCRE_UTF8; - } + /* use UTF8 in pcre when available and HVM CP is also UTF8. */ + if( s_iUTF8Enabled && hb_cdpIsUTF8( NULL ) ) + iCFlags |= PCRE_UTF8; pRegEx->re_pcre = pcre_compile( szRegEx, iCFlags, &szError, &iErrOffset, pCharTable ); @@ -562,13 +560,13 @@ static void hb_pcre_free( void * ptr ) HB_CALL_ON_STARTUP_BEGIN( _hb_regex_init_ ) #if defined( HB_HAS_PCRE ) - /* Hack to force linking newer PCRE versions not the one included in BCC RTL */ -# if defined( __BORLANDC__ ) - { - int iUTF8Enabled; - pcre_config( PCRE_CONFIG_UTF8, &iUTF8Enabled ); - } -# endif + /* detect UTF-8 support. + * In BCC builds this code also forces linking newer PCRE versions + * then the one included in BCC RTL. + */ + if( pcre_config( PCRE_CONFIG_UTF8, &s_iUTF8Enabled ) != 0 ) + s_iUTF8Enabled = 0; + pcre_malloc = hb_pcre_grab; pcre_free = hb_pcre_free; pcre_stack_malloc = hb_pcre_grab; diff --git a/harbour/utils/hbmk2/hbmk2.prg b/harbour/utils/hbmk2/hbmk2.prg index d6ae9c30a0..c8d3eedf6e 100644 --- a/harbour/utils/hbmk2/hbmk2.prg +++ b/harbour/utils/hbmk2/hbmk2.prg @@ -7559,9 +7559,15 @@ STATIC FUNCTION s_getIncludedFiles( hbmk, cFile, cParentDir, lCMode ) http://www.pcre.org/pcre.txt */ IF s_pRegexInclude == NIL - s_pRegexInclude := hb_regexComp( '^[[:blank:]]*#[[:blank:]]*(include|import)[[:blank:]]*(\".+?\"|<.+?>'+"|'.+?'|`.+?'"+')',; + /* Switch to non UTF8 CP - otherwise PCRE fails on user files + * containing non UTF8 characters. For this expression we do + * not need UTF8 or any other fixed encoding. + */ + tmp := hb_cdpSelect( "EN" ) + s_pRegexInclude := hb_regexComp( '(^|;)[[:blank:]]*#[[:blank:]]*(incl|inclu|includ|include|import)[[:blank:]]*(\".+?\"|<.+?>'+"|['`].+?'"+')',; .F. /* lCaseSensitive */,; .T. /* lNewLine */ ) + hb_cdpSelect( tmp ) IF Empty( s_pRegexInclude ) hbmk_OutErr( hbmk, I_( "Internal Error: Regular expression engine missing or unsupported. Check your Harbour build settings." ) ) s_pRegexInclude := 0 /* To show the error only once by setting to non-NIL empty value */ @@ -7579,7 +7585,7 @@ STATIC FUNCTION s_getIncludedFiles( hbmk, cFile, cParentDir, lCMode ) NIL /* lNewLine */, NIL, ; NIL /* nGetMatch */, ; .T. /* lOnlyMatch */ ) - cHeader := tmp[ 3 ] /* First match marker */ + cHeader := atail( tmp ) /* Last group in match marker */ lSystemHeader := ( Left( cHeader, 1 ) == "<" ) cHeader := SubStr( cHeader, 2, Len( cHeader ) - 2 )