From 1e4b18564fd926050ff0fd1ccc016fad0d986649 Mon Sep 17 00:00:00 2001 From: Viktor Szakats Date: Thu, 19 Apr 2012 10:15:59 +0000 Subject: [PATCH] 2012-04-19 12:15 UTC+0200 Viktor Szakats (harbour syenar.net) * include/hbapicdp.h * src/rtl/hbregex.c * src/rtl/cdpapi.c + will now automatically enable UTF8 in PCRE searches, if the HVM CP is set to UTF8. --- harbour/ChangeLog | 7 +++++++ harbour/include/hbapicdp.h | 1 + harbour/src/rtl/cdpapi.c | 14 ++++++++++++-- harbour/src/rtl/hbregex.c | 15 +++++++-------- 4 files changed, 27 insertions(+), 10 deletions(-) diff --git a/harbour/ChangeLog b/harbour/ChangeLog index 894faad9f2..73078cc377 100644 --- a/harbour/ChangeLog +++ b/harbour/ChangeLog @@ -16,6 +16,13 @@ The license applies to all entries newer than 2009-04-28. */ +2012-04-19 12:15 UTC+0200 Viktor Szakats (harbour syenar.net) + * include/hbapicdp.h + * src/rtl/hbregex.c + * src/rtl/cdpapi.c + + will now automatically enable UTF8 in PCRE searches, if + the HVM CP is set to UTF8. + 2012-04-19 02:30 UTC+0200 Viktor Szakats (harbour syenar.net) * include/harbour.hbx * contrib/hbnetio/utils/hbnetio/netiosrv.prg diff --git a/harbour/include/hbapicdp.h b/harbour/include/hbapicdp.h index 38be2039c7..ed4e53e41a 100644 --- a/harbour/include/hbapicdp.h +++ b/harbour/include/hbapicdp.h @@ -389,6 +389,7 @@ extern HB_EXPORT HB_BOOL hb_cdpRegisterNew( const char * id, extern HB_EXPORT void hb_cdpBuildTransTable( PHB_UNITABLE uniTable ); extern HB_EXPORT void hb_cdpReleaseAll( void ); extern HB_EXPORT const char * hb_cdpID( void ); +extern HB_EXPORT HB_BOOL hb_cdpIsUTF8( PHB_CODEPAGE cdp ); extern HB_EXPORT PHB_CODEPAGE hb_cdpSelect( PHB_CODEPAGE cdp ); extern HB_EXPORT const char * hb_cdpSelectID( const char * id ); extern HB_EXPORT PHB_CODEPAGE hb_cdpFind( const char * id ); diff --git a/harbour/src/rtl/cdpapi.c b/harbour/src/rtl/cdpapi.c index c13fa68ccf..20a22b37fb 100644 --- a/harbour/src/rtl/cdpapi.c +++ b/harbour/src/rtl/cdpapi.c @@ -132,7 +132,7 @@ HB_CODEPAGE_ANNOUNCE( UTF8 ) static HB_CODEPAGE s_en_codepage = { "EN", "English CP-437", HB_UNITB_437, - NULL, NULL, NULL, NULL, NULL, 0, + NULL, NULL, NULL, NULL, NULL, 0, HB_FALSE, hb_cdpStd_get, hb_cdpStd_put, hb_cdpStd_len, 0, 0, NULL, NULL, &s_utf8_codepage }; @@ -1350,7 +1350,7 @@ HB_SIZE hb_cdpUTF8AsStrLen( PHB_CODEPAGE cdp, const char * pSrc, HB_SIZE nSrc, return ulD; } -HB_SIZE hb_cdpUTF8ToStr( PHB_CODEPAGE cdp, +HB_SIZE hb_cdpUTF8ToStr( PHB_CODEPAGE cdp, const char * pSrc, HB_SIZE nSrc, char * pDst, HB_SIZE nDst ) { @@ -2632,6 +2632,16 @@ PHB_CODEPAGE hb_cdpFindExt( const char * id ) return id ? * hb_cdpFindPos( id ) : NULL; } +HB_BOOL hb_cdpIsUTF8( PHB_CODEPAGE cdp ) +{ + HB_TRACE( HB_TR_DEBUG, ( "hb_cdpIsUTF8(%p)", cdp ) ); + + if( cdp == NULL ) + cdp = hb_vmCDP(); + + return ( cdp == &s_utf8_codepage ); +} + PHB_CODEPAGE hb_cdpSelect( PHB_CODEPAGE cdp ) { PHB_CODEPAGE cdpOld; diff --git a/harbour/src/rtl/hbregex.c b/harbour/src/rtl/hbregex.c index bed8484039..b9565f289a 100644 --- a/harbour/src/rtl/hbregex.c +++ b/harbour/src/rtl/hbregex.c @@ -52,6 +52,7 @@ #define _HB_REGEX_INTERNAL_ #include "hbregex.h" +#include "hbapicdp.h" #include "hbapiitm.h" #include "hbapierr.h" #include "hbinit.h" @@ -71,7 +72,7 @@ static int hb_regcomp( PHB_REGEX pRegEx, const char * szRegEx ) { #if defined( HB_HAS_PCRE ) const unsigned char * pCharTable = NULL; - const char *szError = NULL; + const char * szError = NULL; int iErrOffset = 0; int iCFlags = ( ( pRegEx->iFlags & HBREG_ICASE ) ? PCRE_CASELESS : 0 ) | ( ( pRegEx->iFlags & HBREG_NEWLINE ) ? PCRE_MULTILINE : 0 ) | @@ -80,21 +81,19 @@ static int hb_regcomp( PHB_REGEX pRegEx, const char * szRegEx ) pRegEx->iEFlags = ( ( pRegEx->iFlags & HBREG_NOTBOL ) ? PCRE_NOTBOL : 0 ) | ( ( pRegEx->iFlags & HBREG_NOTEOL ) ? PCRE_NOTEOL : 0 ); -#if 0 /* detect UTF-8 support. */ { - int fUTF8Support; + int iUTF8Enabled; # if defined( PCRE_CONFIG_UTF8 ) - if( pcre_config( PCRE_CONFIG_UTF8, &fUTF8Support ) != 0 ) - fUTF8Support = 0; + if( pcre_config( PCRE_CONFIG_UTF8, &iUTF8Enabled ) != 0 ) + iUTF8Enabled = 0; # else - fUTF8Support = 0; + iUTF8Enabled = 0; # endif /* use UTF8 in pcre when available and HVM CP is also UTF8. */ - if( fUTF8Support && hb_cdpIsUTF8() /* TODO */ ) + if( iUTF8Enabled && hb_cdpIsUTF8( NULL ) ) iCFlags |= PCRE_UTF8; } -#endif pRegEx->re_pcre = pcre_compile( szRegEx, iCFlags, &szError, &iErrOffset, pCharTable );