2012-05-08 12:28 UTC+0200 Przemyslaw Czerpak (druzus/at/poczta.onet.pl)

* harbour/src/common/expropt2.c
    * small code simplification

  * harbour/src/rtl/hbregex.c
    % eliminated call to pcre_config() on each regex compilation

  * harbour/utils/hbmk2/hbmk2.prg
    ! fixed -head=full for code using non UTF8 characters.
      It was broken because in UNIX builds UTF8 is always enabled
      as HVM CP in HBMK2 and this setting was inherited by PCRE
      with PCRE_UTF8 flag. If this flag is used PCRE validates
      all strings and refuse to make any operations if they are
      not valid UTF8 string so it was not working at all for
      source code using different encoding.
    ! fixed regex used in -head=full to respect shortcuts in #include
      directive, i.e.:
         #incl "file.ch"
    ! fixed regex used in -head=full to recognize #include directives
      separated by ';', i.e.:
         #include "file1.ch" ; #include "file2.ch" ; #include "file3.ch"
    ; TOFIX: UTF8 mode on output should not be enabled unconditionally in
             all UNIX builds. There are many *nix installations where UTF8
             is not system CP, i.e. older Linux distributions. In fact
             it's user attribute so each user can use different encoding
             local to his connection and/or terminal settings. Probably
             it's good idea to use code like:
                  lUTF8 := "UTF-8" $ GetEnv( "LANG" ) .OR. ;
                           "UTF-8" $ GetEnv( "LC_CTYPE" )
This commit is contained in:
Przemyslaw Czerpak
2012-05-08 10:28:46 +00:00
parent d7d875b9f2
commit 6f27c78569
4 changed files with 58 additions and 30 deletions

View File

@@ -16,6 +16,36 @@
The license applies to all entries newer than 2009-04-28.
*/
2012-05-08 12:28 UTC+0200 Przemyslaw Czerpak (druzus/at/poczta.onet.pl)
* harbour/src/common/expropt2.c
* small code simplification
* harbour/src/rtl/hbregex.c
% eliminated call to pcre_config() on each regex compilation
* harbour/utils/hbmk2/hbmk2.prg
! fixed -head=full for code using non UTF8 characters.
It was broken because in UNIX builds UTF8 is always enabled
as HVM CP in HBMK2 and this setting was inherited by PCRE
with PCRE_UTF8 flag. If this flag is used PCRE validates
all strings and refuse to make any operations if they are
not valid UTF8 string so it was not working at all for
source code using different encoding.
! fixed regex used in -head=full to respect shortcuts in #include
directive, i.e.:
#incl "file.ch"
! fixed regex used in -head=full to recognize #include directives
separated by ';', i.e.:
#include "file1.ch" ; #include "file2.ch" ; #include "file3.ch"
; TOFIX: UTF8 mode on output should not be enabled unconditionally in
all UNIX builds. There are many *nix installations where UTF8
is not system CP, i.e. older Linux distributions. In fact
it's user attribute so each user can use different encoding
local to his connection and/or terminal settings. Probably
it's good idea to use code like:
lUTF8 := "UTF-8" $ GetEnv( "LANG" ) .OR. ;
"UTF-8" $ GetEnv( "LC_CTYPE" )
2012-05-08 12:26 UTC+0200 Viktor Szakats (harbour syenar.net)
* ChangeLog
* deleted my EOL spaces

View File

@@ -2046,18 +2046,12 @@ HB_BOOL hb_compExprReduceBCHAR( HB_EXPR_PTR pSelf, HB_COMP_DECL )
HB_EXPR_PTR pExpr = HB_COMP_EXPR_NEW( HB_ET_STRING );
pExpr->ValType = HB_EV_STRING;
if( pArg->value.asNum.NumType == HB_ET_LONG )
{
pExpr->value.asString.string = ( char * ) hb_szAscii[ ( int ) pArg->value.asNum.val.l & 0xff ];
pExpr->value.asString.dealloc = HB_FALSE;
pExpr->nLength = 1;
}
else
{
pExpr->value.asString.string = ( char * ) hb_szAscii[ ( unsigned int ) pArg->value.asNum.val.d & 0xff ];
pExpr->value.asString.dealloc = HB_FALSE;
pExpr->nLength = 1;
}
pExpr->value.asString.string =
( char * ) hb_szAscii[ ( pArg->value.asNum.NumType == HB_ET_LONG ?
( unsigned int ) pArg->value.asNum.val.l :
( unsigned int ) pArg->value.asNum.val.d ) & 0xff ];
pExpr->value.asString.dealloc = HB_FALSE;
pExpr->nLength = 1;
HB_COMP_EXPR_FREE( pParms );
HB_COMP_EXPR_FREE( pSelf->value.asFunCall.pFunName );

View File

@@ -57,6 +57,10 @@
#include "hbapierr.h"
#include "hbinit.h"
#if defined( HB_HAS_PCRE )
static int s_iUTF8Enabled;
#endif
static void hb_regfree( PHB_REGEX pRegEx )
{
#if defined( HB_HAS_PCRE )
@@ -81,15 +85,9 @@ static int hb_regcomp( PHB_REGEX pRegEx, const char * szRegEx )
pRegEx->iEFlags = ( ( pRegEx->iFlags & HBREG_NOTBOL ) ? PCRE_NOTBOL : 0 ) |
( ( pRegEx->iFlags & HBREG_NOTEOL ) ? PCRE_NOTEOL : 0 );
/* detect UTF-8 support. */
{
int iUTF8Enabled;
if( pcre_config( PCRE_CONFIG_UTF8, &iUTF8Enabled ) != 0 )
iUTF8Enabled = 0;
/* use UTF8 in pcre when available and HVM CP is also UTF8. */
if( iUTF8Enabled && hb_cdpIsUTF8( NULL ) )
iCFlags |= PCRE_UTF8;
}
/* use UTF8 in pcre when available and HVM CP is also UTF8. */
if( s_iUTF8Enabled && hb_cdpIsUTF8( NULL ) )
iCFlags |= PCRE_UTF8;
pRegEx->re_pcre = pcre_compile( szRegEx, iCFlags, &szError,
&iErrOffset, pCharTable );
@@ -562,13 +560,13 @@ static void hb_pcre_free( void * ptr )
HB_CALL_ON_STARTUP_BEGIN( _hb_regex_init_ )
#if defined( HB_HAS_PCRE )
/* Hack to force linking newer PCRE versions not the one included in BCC RTL */
# if defined( __BORLANDC__ )
{
int iUTF8Enabled;
pcre_config( PCRE_CONFIG_UTF8, &iUTF8Enabled );
}
# endif
/* detect UTF-8 support.
* In BCC builds this code also forces linking newer PCRE versions
* then the one included in BCC RTL.
*/
if( pcre_config( PCRE_CONFIG_UTF8, &s_iUTF8Enabled ) != 0 )
s_iUTF8Enabled = 0;
pcre_malloc = hb_pcre_grab;
pcre_free = hb_pcre_free;
pcre_stack_malloc = hb_pcre_grab;

View File

@@ -7559,9 +7559,15 @@ STATIC FUNCTION s_getIncludedFiles( hbmk, cFile, cParentDir, lCMode )
http://www.pcre.org/pcre.txt */
IF s_pRegexInclude == NIL
s_pRegexInclude := hb_regexComp( '^[[:blank:]]*#[[:blank:]]*(include|import)[[:blank:]]*(\".+?\"|<.+?>'+"|'.+?'|`.+?'"+')',;
/* Switch to non UTF8 CP - otherwise PCRE fails on user files
* containing non UTF8 characters. For this expression we do
* not need UTF8 or any other fixed encoding.
*/
tmp := hb_cdpSelect( "EN" )
s_pRegexInclude := hb_regexComp( '(^|;)[[:blank:]]*#[[:blank:]]*(incl|inclu|includ|include|import)[[:blank:]]*(\".+?\"|<.+?>'+"|['`].+?'"+')',;
.F. /* lCaseSensitive */,;
.T. /* lNewLine */ )
hb_cdpSelect( tmp )
IF Empty( s_pRegexInclude )
hbmk_OutErr( hbmk, I_( "Internal Error: Regular expression engine missing or unsupported. Check your Harbour build settings." ) )
s_pRegexInclude := 0 /* To show the error only once by setting to non-NIL empty value */
@@ -7579,7 +7585,7 @@ STATIC FUNCTION s_getIncludedFiles( hbmk, cFile, cParentDir, lCMode )
NIL /* lNewLine */, NIL, ;
NIL /* nGetMatch */, ;
.T. /* lOnlyMatch */ )
cHeader := tmp[ 3 ] /* First match marker */
cHeader := atail( tmp ) /* Last group in match marker */
lSystemHeader := ( Left( cHeader, 1 ) == "<" )
cHeader := SubStr( cHeader, 2, Len( cHeader ) - 2 )