2012-05-08 12:28 UTC+0200 Przemyslaw Czerpak (druzus/at/poczta.onet.pl)

* harbour/src/common/expropt2.c * small code simplification * harbour/src/rtl/hbregex.c % eliminated call to pcre_config() on each regex compilation * harbour/utils/hbmk2/hbmk2.prg ! fixed -head=full for code using non UTF8 characters. It was broken because in UNIX builds UTF8 is always enabled as HVM CP in HBMK2 and this setting was inherited by PCRE with PCRE_UTF8 flag. If this flag is used PCRE validates all strings and refuse to make any operations if they are not valid UTF8 string so it was not working at all for source code using different encoding. ! fixed regex used in -head=full to respect shortcuts in #include directive, i.e.: #incl "file.ch" ! fixed regex used in -head=full to recognize #include directives separated by ';', i.e.: #include "file1.ch" ; #include "file2.ch" ; #include "file3.ch" ; TOFIX: UTF8 mode on output should not be enabled unconditionally in all UNIX builds. There are many *nix installations where UTF8 is not system CP, i.e. older Linux distributions. In fact it's user attribute so each user can use different encoding local to his connection and/or terminal settings. Probably it's good idea to use code like: lUTF8 := "UTF-8" $ GetEnv( "LANG" ) .OR. ; "UTF-8" $ GetEnv( "LC_CTYPE" )
2012-05-08 10:28:46 +00:00
parent d7d875b9f2
commit 6f27c78569
4 changed files with 58 additions and 30 deletions
--- a/harbour/ChangeLog
+++ b/harbour/ChangeLog
@@ -16,6 +16,36 @@
   The license applies to all entries newer than 2009-04-28.
 */

+2012-05-08 12:28 UTC+0200 Przemyslaw Czerpak (druzus/at/poczta.onet.pl)
+  * harbour/src/common/expropt2.c
+    * small code simplification
+
+  * harbour/src/rtl/hbregex.c
+    % eliminated call to pcre_config() on each regex compilation
+
+  * harbour/utils/hbmk2/hbmk2.prg
+    ! fixed -head=full for code using non UTF8 characters.
+      It was broken because in UNIX builds UTF8 is always enabled
+      as HVM CP in HBMK2 and this setting was inherited by PCRE
+      with PCRE_UTF8 flag. If this flag is used PCRE validates
+      all strings and refuse to make any operations if they are
+      not valid UTF8 string so it was not working at all for
+      source code using different encoding.
+    ! fixed regex used in -head=full to respect shortcuts in #include
+      directive, i.e.:
+         #incl "file.ch"
+    ! fixed regex used in -head=full to recognize #include directives
+      separated by ';', i.e.:
+         #include "file1.ch" ; #include "file2.ch" ; #include "file3.ch"
+    ; TOFIX: UTF8 mode on output should not be enabled unconditionally in
+             all UNIX builds. There are many *nix installations where UTF8
+             is not system CP, i.e. older Linux distributions. In fact
+             it's user attribute so each user can use different encoding
+             local to his connection and/or terminal settings. Probably
+             it's good idea to use code like:
+                  lUTF8 := "UTF-8" $ GetEnv( "LANG" ) .OR. ;
+                           "UTF-8" $ GetEnv( "LC_CTYPE" )
+
 2012-05-08 12:26 UTC+0200 Viktor Szakats (harbour syenar.net)
  * ChangeLog
    * deleted my EOL spaces
--- a/harbour/src/common/expropt2.c
+++ b/harbour/src/common/expropt2.c
@@ -2046,18 +2046,12 @@ HB_BOOL hb_compExprReduceBCHAR( HB_EXPR_PTR pSelf, HB_COMP_DECL )
      HB_EXPR_PTR pExpr = HB_COMP_EXPR_NEW( HB_ET_STRING );

      pExpr->ValType = HB_EV_STRING;
-      if( pArg->value.asNum.NumType == HB_ET_LONG )
-      {
-         pExpr->value.asString.string = ( char * ) hb_szAscii[ ( int ) pArg->value.asNum.val.l & 0xff ];
-         pExpr->value.asString.dealloc = HB_FALSE;
-         pExpr->nLength = 1;
-      }
-      else
-      {
-         pExpr->value.asString.string = ( char * ) hb_szAscii[ ( unsigned int ) pArg->value.asNum.val.d & 0xff ];
-         pExpr->value.asString.dealloc = HB_FALSE;
-         pExpr->nLength = 1;
-      }
+      pExpr->value.asString.string =
+         ( char * ) hb_szAscii[ ( pArg->value.asNum.NumType == HB_ET_LONG ?
+                           ( unsigned int ) pArg->value.asNum.val.l :
+                           ( unsigned int ) pArg->value.asNum.val.d ) & 0xff ];
+      pExpr->value.asString.dealloc = HB_FALSE;
+      pExpr->nLength = 1;

      HB_COMP_EXPR_FREE( pParms );
      HB_COMP_EXPR_FREE( pSelf->value.asFunCall.pFunName );
--- a/harbour/src/rtl/hbregex.c
+++ b/harbour/src/rtl/hbregex.c
@@ -57,6 +57,10 @@
 #include "hbapierr.h"
 #include "hbinit.h"

+#if defined( HB_HAS_PCRE )
+   static int s_iUTF8Enabled;
+#endif
+
 static void hb_regfree( PHB_REGEX pRegEx )
 {
 #if defined( HB_HAS_PCRE )
@@ -81,15 +85,9 @@ static int hb_regcomp( PHB_REGEX pRegEx, const char * szRegEx )
   pRegEx->iEFlags = ( ( pRegEx->iFlags & HBREG_NOTBOL ) ? PCRE_NOTBOL : 0 ) |
                     ( ( pRegEx->iFlags & HBREG_NOTEOL ) ? PCRE_NOTEOL : 0 );

-   /* detect UTF-8 support. */
-   {
-      int iUTF8Enabled;
-      if( pcre_config( PCRE_CONFIG_UTF8, &iUTF8Enabled ) != 0 )
-         iUTF8Enabled = 0;
-      /* use UTF8 in pcre when available and HVM CP is also UTF8. */
-      if( iUTF8Enabled && hb_cdpIsUTF8( NULL ) )
-         iCFlags |= PCRE_UTF8;
-   }
+   /* use UTF8 in pcre when available and HVM CP is also UTF8. */
+   if( s_iUTF8Enabled && hb_cdpIsUTF8( NULL ) )
+      iCFlags |= PCRE_UTF8;

   pRegEx->re_pcre = pcre_compile( szRegEx, iCFlags, &szError,
                                   &iErrOffset, pCharTable );
@@ -562,13 +560,13 @@ static void hb_pcre_free( void * ptr )

 HB_CALL_ON_STARTUP_BEGIN( _hb_regex_init_ )
 #if defined( HB_HAS_PCRE )
-   /* Hack to force linking newer PCRE versions not the one included in BCC RTL */
-#  if defined( __BORLANDC__ )
-   {
-      int iUTF8Enabled;
-      pcre_config( PCRE_CONFIG_UTF8, &iUTF8Enabled );
-   }
-#  endif
+   /* detect UTF-8 support.
+    * In BCC builds this code also forces linking newer PCRE versions
+    * then the one included in BCC RTL.
+    */
+   if( pcre_config( PCRE_CONFIG_UTF8, &s_iUTF8Enabled ) != 0 )
+      s_iUTF8Enabled = 0;
+
   pcre_malloc = hb_pcre_grab;
   pcre_free = hb_pcre_free;
   pcre_stack_malloc = hb_pcre_grab;
--- a/harbour/utils/hbmk2/hbmk2.prg
+++ b/harbour/utils/hbmk2/hbmk2.prg
@@ -7559,9 +7559,15 @@ STATIC FUNCTION s_getIncludedFiles( hbmk, cFile, cParentDir, lCMode )
         http://www.pcre.org/pcre.txt */

   IF s_pRegexInclude == NIL
-      s_pRegexInclude := hb_regexComp( '^[[:blank:]]*#[[:blank:]]*(include|import)[[:blank:]]*(\".+?\"|<.+?>'+"|'.+?'|`.+?'"+')',;
+      /* Switch to non UTF8 CP - otherwise PCRE fails on user files
+       * containing non UTF8 characters. For this expression we do
+       * not need UTF8 or any other fixed encoding.
+       */
+      tmp := hb_cdpSelect( "EN" )
+      s_pRegexInclude := hb_regexComp( '(^|;)[[:blank:]]*#[[:blank:]]*(incl|inclu|includ|include|import)[[:blank:]]*(\".+?\"|<.+?>'+"|['`].+?'"+')',;
         .F. /* lCaseSensitive */,;
         .T. /* lNewLine */ )
+      hb_cdpSelect( tmp )
      IF Empty( s_pRegexInclude )
         hbmk_OutErr( hbmk, I_( "Internal Error: Regular expression engine missing or unsupported. Check your Harbour build settings." ) )
         s_pRegexInclude := 0 /* To show the error only once by setting to non-NIL empty value */
@@ -7579,7 +7585,7 @@ STATIC FUNCTION s_getIncludedFiles( hbmk, cFile, cParentDir, lCMode )
                                      NIL /* lNewLine */, NIL, ;
                                      NIL /* nGetMatch */, ;
                                      .T. /* lOnlyMatch */ )
-            cHeader := tmp[ 3 ] /* First match marker */
+            cHeader := atail( tmp ) /* Last group in match marker */
            lSystemHeader := ( Left( cHeader, 1 ) == "<" )
            cHeader := SubStr( cHeader, 2, Len( cHeader ) - 2 )