Files
harbour-core/harbour/source/rtl/hbregex.c

561 lines
18 KiB
C

/*
* $Id$
*/
/*
* Harbour Project source code:
*
*
* Copyright 2007 Przemyslaw Czerpak <druzus / at / priv.onet.pl>
* www - http://www.harbour-project.org
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2, or (at your option)
* any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this software; see the file COPYING. If not, write to
* the Free Software Foundation, Inc., 59 Temple Place, Suite 330,
* Boston, MA 02111-1307 USA (or visit the web site http://www.gnu.org/).
*
* As a special exception, the Harbour Project gives permission for
* additional uses of the text contained in its release of Harbour.
*
* The exception is that, if you link the Harbour libraries with other
* files to produce an executable, this does not by itself cause the
* resulting executable to be covered by the GNU General Public License.
* Your use of that executable is in no way restricted on account of
* linking the Harbour library code into it.
*
* This exception does not however invalidate any other reasons why
* the executable file might be covered by the GNU General Public License.
*
* This exception applies only to the code released by the Harbour
* Project under the name Harbour. If you copy code from other
* Harbour Project or Free Software Foundation releases into a copy of
* Harbour, as the General Public License permits, the exception does
* not apply to the code that you add in this way. To avoid misleading
* anyone as to the status of such modified files, you must delete
* this exception notice from them.
*
* If you write modifications of your own for Harbour, it is your choice
* whether to permit this exception to apply to your modifications.
* If you do not wish that, delete this exception notice.
*
*/
/* #define HB_PCRE_REGEX */
#define _HB_REGEX_INTERNAL_
#include "hbregex.h"
#include "hbapiitm.h"
#include "hbapierr.h"
#include "hbinit.h"
static void hb_regfree( PHB_REGEX pRegEx )
{
#if defined( HB_PCRE_REGEX )
( pcre_free )( pRegEx->re_pcre );
#elif defined( HB_POSIX_REGEX )
regfree( &pRegEx->reg );
#else
HB_SYMBOL_UNUSED( pRegEx );
#endif
}
static int hb_regcomp( PHB_REGEX pRegEx, const char * szRegEx )
{
#if defined( HB_PCRE_REGEX )
const unsigned char * pCharTable = NULL;
const char *szError = NULL;
int iErrOffset = 0;
int iCFlags = ( ( pRegEx->iFlags & HBREG_ICASE ) ? PCRE_CASELESS : 0 ) |
( ( pRegEx->iFlags & HBREG_NEWLINE ) ? PCRE_MULTILINE : 0 ) |
( ( pRegEx->iFlags & HBREG_DOTALL ) ? PCRE_DOTALL : 0 );
pRegEx->iEFlags = ( ( pRegEx->iFlags & HBREG_NOTBOL ) ? PCRE_NOTBOL : 0 ) |
( ( pRegEx->iFlags & HBREG_NOTEOL ) ? PCRE_NOTEOL : 0 );
pRegEx->re_pcre = pcre_compile( szRegEx, iCFlags, &szError,
&iErrOffset, pCharTable );
return pRegEx->re_pcre ? 0 : -1;
#elif defined( HB_POSIX_REGEX )
int iCFlags = REG_EXTENDED |
( ( pRegEx->iFlags & HBREG_ICASE ) ? REG_ICASE : 0 ) |
( ( pRegEx->iFlags & HBREG_NEWLINE ) ? REG_NEWLINE : 0 ) |
( ( pRegEx->iFlags & HBREG_NOSUB ) ? REG_NOSUB : 0 );
pRegEx->iEFlags = ( ( pRegEx->iFlags & HBREG_NOTBOL ) ? REG_NOTBOL : 0 ) |
( ( pRegEx->iFlags & HBREG_NOTEOL ) ? REG_NOTEOL : 0 );
return regcomp( &pRegEx->reg, szRegEx, iCFlags );
#else
HB_SYMBOL_UNUSED( pRegEx );
HB_SYMBOL_UNUSED( szRegEx );
return -1;
#endif
}
static int hb_regexec( PHB_REGEX pRegEx, const char * szString, ULONG ulLen,
int iMatches, HB_REGMATCH * aMatches )
{
#if defined( HB_PCRE_REGEX )
int iResult, i;
iResult = pcre_exec( pRegEx->re_pcre, NULL /* pcre_extra */,
szString, ulLen, 0 /* startoffset */,
pRegEx->iEFlags, aMatches, HB_REGMATCH_SIZE( iMatches ) );
if( iResult == 0 )
{
for( i = 0; i < iMatches; i++ )
{
if( HB_REGMATCH_EO( aMatches, i ) != -1 )
iResult = i + 1;
}
}
return iResult;
#elif defined( HB_POSIX_REGEX )
char * szBuffer = NULL;
int iResult, i;
if( szString[ ulLen ] != 0 )
{
szBuffer = hb_strndup( szString, ulLen );
szString = szBuffer;
}
for( i = 0; i < iMatches; i++ )
HB_REGMATCH_EO( aMatches, i ) = -1;
iResult = regexec( &pRegEx->reg, szString, iMatches, aMatches, pRegEx->iEFlags );
if( iResult == 0 )
{
for( i = 0; i < iMatches; i++ )
{
if( HB_REGMATCH_EO( aMatches, i ) != -1 )
iResult = i + 1;
}
}
else
iResult = -1;
if( szBuffer )
hb_xfree( szBuffer );
return iResult;
#else
HB_SYMBOL_UNUSED( pRegEx );
HB_SYMBOL_UNUSED( szString );
HB_SYMBOL_UNUSED( ulLen );
HB_SYMBOL_UNUSED( iMatches );
HB_SYMBOL_UNUSED( aMatches );
return -1;
#endif
}
HB_FUNC( HB_REGEXCOMP )
{
ULONG ulLen = hb_parclen( 1 );
if( ulLen == 0 )
hb_errRT_BASE_SubstR( EG_ARG, 3012, "Wrong parameter count/type",
HB_ERR_FUNCNAME, HB_ERR_ARGS_BASEPARAMS );
else
{
int iFlags = HBREG_EXTENDED;
PHB_REGEX pRegEx;
if( ISLOG( 2 ) && !hb_parl( 2 ) )
iFlags |= HBREG_ICASE;
if( hb_parl( 3 ) )
iFlags |= HBREG_NEWLINE;
pRegEx = hb_regexCompile( hb_parc( 1 ), ulLen, iFlags );
if( pRegEx )
{
pRegEx->fFree = FALSE;
hb_retptrGC( pRegEx );
hb_gcUnlock( pRegEx );
}
}
}
HB_FUNC( HB_ISREGEX )
{
hb_retl( hb_parptrGC( hb_regexRelease, 1 ) != NULL );
}
HB_FUNC( HB_ATX )
{
char * pszString;
ULONG ulLen, ulStart, ulEnd;
PHB_REGEX pRegEx;
PHB_ITEM pString;
int iPCount = hb_pcount();
pString = hb_param( 2, HB_IT_STRING );
if( !pString )
{
hb_errRT_BASE_SubstR( EG_ARG, 3012, "Wrong parameters",
HB_ERR_FUNCNAME, HB_ERR_ARGS_BASEPARAMS );
return;
}
pszString = hb_itemGetCPtr( pString );
ulLen = hb_itemGetCLen( pString );
pRegEx = hb_regexGet( hb_param( 1, HB_IT_ANY ),
ISLOG( 3 ) && !hb_parl( 3 ) ? HBREG_ICASE : 0 );
if( !pRegEx )
return;
ulStart = hb_parnl( 4 );
ulEnd = hb_parnl( 5 );
if( ulLen && ulStart <= ulLen && ulStart <= ulEnd )
{
HB_REGMATCH aMatches[ HB_REGMATCH_SIZE( 1 ) ];
if( ulEnd < ulLen )
ulLen = ulEnd;
if( ulStart )
{
--ulStart;
ulLen -= ulStart;
}
if( hb_regexec( pRegEx, pszString + ulStart, ulLen, 1, aMatches ) > 0 )
{
ulStart += HB_REGMATCH_SO( aMatches, 0 ) + 1;
ulLen = HB_REGMATCH_EO( aMatches, 0 ) - HB_REGMATCH_SO( aMatches, 0 );
hb_retclen( pszString + ulStart - 1, ulLen );
}
else
ulStart = ulLen = 0;
}
else
ulStart = ulLen = 0;
hb_regexFree( pRegEx );
if( iPCount > 3 )
{
hb_stornl( ulStart, 4 );
if( iPCount > 4 )
hb_stornl( ulLen, 5 );
}
}
static BOOL hb_regex( int iRequest )
{
HB_REGMATCH aMatches[ HB_REGMATCH_SIZE( REGEX_MAX_GROUPS ) ];
PHB_ITEM pRetArray, pMatch, pString;
int i, iMatches, iMaxMatch;
BOOL fResult = FALSE;
PHB_REGEX pRegEx;
char * pszString;
ULONG ulLen;
pString = hb_param( 2, HB_IT_STRING );
if( !pString )
{
hb_errRT_BASE_SubstR( EG_ARG, 3012, "Wrong parameters",
HB_ERR_FUNCNAME, HB_ERR_ARGS_BASEPARAMS );
return FALSE;
}
pRegEx = hb_regexGet( hb_param( 1, HB_IT_ANY ),
( ISLOG( 3 ) && !hb_parl( 3 ) ? HBREG_ICASE : 0 ) |
( hb_parl( 4 ) ? HBREG_NEWLINE : 0 ) );
if( !pRegEx )
return FALSE;
pszString = hb_itemGetCPtr( pString );
ulLen = hb_itemGetCLen( pString );
iMaxMatch = iRequest == 0 || iRequest == 4 || iRequest == 5 ?
REGEX_MAX_GROUPS : 1;
iMatches = hb_regexec( pRegEx, pszString, ulLen, iMaxMatch, aMatches );
if( iMatches > 0 )
{
switch( iRequest )
{
case 0:
pRetArray = hb_itemArrayNew( iMatches );
for( i = 0; i < iMatches; i++ )
{
if( HB_REGMATCH_EO( aMatches, i ) > -1 )
hb_arraySetCL( pRetArray, i + 1,
pszString + HB_REGMATCH_SO( aMatches, i ),
HB_REGMATCH_EO( aMatches, i ) -
HB_REGMATCH_SO( aMatches, i ) );
else
hb_arraySetCL( pRetArray, i + 1, "", 0 );
}
hb_itemReturnRelease( pRetArray );
fResult = TRUE;
break;
case 1: /* LIKE */
fResult = HB_REGMATCH_SO( aMatches, 0 ) == 0 &&
( ULONG ) HB_REGMATCH_EO( aMatches, 0 ) == ulLen;
break;
case 2: /* MATCH ( HAS ) */
fResult = TRUE;
break;
case 3: /* SPLIT */
iMaxMatch = hb_parni( 5 );
pRetArray = hb_itemArrayNew( 0 );
pMatch = hb_itemNew( NULL );
iMatches = 0;
do
{
hb_itemPutCL( pMatch, pszString, HB_REGMATCH_SO( aMatches, 0 ) );
hb_arrayAddForward( pRetArray, pMatch );
ulLen -= HB_REGMATCH_EO( aMatches, 0 );
pszString += HB_REGMATCH_EO( aMatches, 0 );
iMatches++;
}
while( HB_REGMATCH_EO( aMatches, 0 ) > 0 && ulLen &&
( iMaxMatch == 0 || iMatches < iMaxMatch ) &&
hb_regexec( pRegEx, pszString, ulLen, 1, aMatches ) > 0 );
/* last match must be done also in case that pszString is empty;
this would mean an empty split field at the end of the string */
/* if( ulLen ) */
{
hb_itemPutCL( pMatch, pszString, ulLen );
hb_arrayAddForward( pRetArray, pMatch );
}
hb_itemRelease( pMatch );
hb_itemReturnRelease( pRetArray );
fResult = TRUE;
break;
case 4: /* results AND positions */
pRetArray = hb_itemArrayNew( iMatches );
for( i = 0; i < iMatches; i++ )
{
int iSO = HB_REGMATCH_SO( aMatches, i ),
iEO = HB_REGMATCH_EO( aMatches, i );
pMatch = hb_arrayGetItemPtr( pRetArray, i + 1 );
hb_arrayNew( pMatch, 3 );
if( iEO != -1 )
{
/* matched string */
hb_arraySetCL( pMatch, 1, pszString + iSO, iEO - iSO );
/* begin of match */
hb_arraySetNI( pMatch, 2, iSO + 1 );
/* End of match */
hb_arraySetNI( pMatch, 3, iEO );
}
else
{
hb_arraySetCL( pMatch, 1, "", 0 );
hb_arraySetNI( pMatch, 2, 0 );
hb_arraySetNI( pMatch, 3, 0 );
}
}
hb_itemReturnRelease( pRetArray );
fResult = TRUE;
break;
case 5: /* _ALL_ results AND positions */
{
PHB_ITEM pAtxArray;
int iMax = hb_parni( 5 ); /* max nuber of matches I want, 0 = unlimited */
int iGetMatch = hb_parni( 6 ); /* Gets if want only one single match or a sub-match */
BOOL fOnlyMatch = !ISLOG( 7 ) || hb_parl( 7 ); /* if TRUE returns only matches and sub-matches, not positions */
ULONG ulOffSet = 0;
int iCount = 0;
int iSO, iEO;
/* Set new array */
pRetArray = hb_itemArrayNew( 0 );
do
{
/* If I want all matches */
if( iGetMatch == 0 || /* Check boundaries */
( iGetMatch < 0 || iGetMatch > iMatches ) )
{
pAtxArray = hb_itemArrayNew( iMatches );
for( i = 0; i < iMatches; i++ )
{
iSO = HB_REGMATCH_SO( aMatches, i );
iEO = HB_REGMATCH_EO( aMatches, i );
pMatch = hb_arrayGetItemPtr( pAtxArray, i + 1 );
if( !fOnlyMatch )
{
hb_arrayNew( pMatch, 3 );
if( iEO != -1 )
{
/* matched string */
hb_arraySetCL( pMatch, 1, pszString + iSO, iEO - iSO );
/* begin of match */
hb_arraySetNI( pMatch, 2, ulOffSet + iSO + 1 );
/* End of match */
hb_arraySetNI( pMatch, 3, ulOffSet + iEO );
}
else
{
hb_arraySetCL( pMatch, 1, "", 0 );
hb_arraySetNI( pMatch, 2, 0 );
hb_arraySetNI( pMatch, 3, 0 );
}
}
else
{
if( iEO != -1 )
/* matched string */
hb_itemPutCL( pMatch, pszString + iSO, iEO - iSO );
else
hb_itemPutC( pMatch, NULL );
}
}
hb_arrayAddForward( pRetArray, pAtxArray );
hb_itemRelease( pAtxArray );
}
else /* Here I get only single matches */
{
i = iGetMatch - 1;
iSO = HB_REGMATCH_SO( aMatches, i );
iEO = HB_REGMATCH_EO( aMatches, i );
pMatch = hb_itemNew( NULL );
if( !fOnlyMatch )
{
hb_arrayNew( pMatch, 3 );
if( iEO != -1 )
{
/* matched string */
hb_arraySetCL( pMatch, 1, pszString + iSO, iEO - iSO );
/* begin of match */
hb_arraySetNI( pMatch, 2, ulOffSet + iSO + 1 );
/* End of match */
hb_arraySetNI( pMatch, 3, ulOffSet + iEO );
}
else
{
hb_arraySetCL( pMatch, 1, "", 0 );
hb_arraySetNI( pMatch, 2, 0 );
hb_arraySetNI( pMatch, 3, 0 );
}
}
else
{
if( iEO != -1 )
/* matched string */
hb_itemPutCL( pMatch, pszString + iSO, iEO - iSO );
else
hb_itemPutC( pMatch, NULL );
}
hb_arrayAddForward( pRetArray, pMatch );
hb_itemRelease( pMatch );
}
iEO = HB_REGMATCH_EO( aMatches, 0 );
if( iEO == -1 )
break;
ulLen -= iEO;
pszString += iEO;
ulOffSet += iEO;
iCount++;
}
while( iEO && ulLen && ( iMax == 0 || iCount < iMax ) &&
( iMatches = hb_regexec( pRegEx, pszString, ulLen, iMaxMatch, aMatches ) ) > 0 );
hb_itemReturnRelease( pRetArray );
fResult = TRUE;
break;
}
}
}
else if( iRequest == 3 )
{
pRetArray = hb_itemArrayNew( 1 );
hb_arraySet( pRetArray, 1, pString );
hb_itemReturnRelease( pRetArray );
fResult = TRUE;
}
hb_regexFree( pRegEx );
return fResult;
}
/* Returns array of Match + Sub-Matches. */
HB_FUNC( HB_REGEX )
{
hb_regex( 0 );
}
/* Returns just .T. if match found or .F. otherwise. */
HB_FUNC( HB_REGEXMATCH )
{
hb_retl( hb_regex( hb_parl( 3 ) ? 1 /* LIKE */ : 2 /* HAS */ ) );
}
HB_FUNC( HB_REGEXLIKE )
{
hb_retl( hb_regex( 1 ) );
}
HB_FUNC( HB_REGEXHAS )
{
hb_retl( hb_regex( 2 ) );
}
/* Splits the string in an array of matched expressions */
HB_FUNC( HB_REGEXSPLIT )
{
hb_regex( 3 );
}
/* Returns array of { Match, start, end }, { Sub-Matches, start, end } */
HB_FUNC( HB_REGEXATX )
{
hb_regex( 4 );
}
/* 2005-12-16 - Francesco Saverio Giudice
HB_RegExAll( cRegex, cString, lCaseSensitive, lNewLine, nMaxMatches, nGetMatch, lOnlyMatch ) -> aAllRegexMatches
This function return all matches from a Regex search.
It is a mix from hb_RegEx() and hb_RegExAtX()
PARAMETERS:
cRegex - Regex pattern string or precompiled Regex
cString - The string you want to search
lCaseSensitive - default = FALSE
lNewLine - default = FALSE
nMaxMatches - default = unlimited, this limit number of matches that have to return
nGetMatch - default = unlimited, this returns only one from Match + Sub-Matches
lOnlyMatch - default = TRUE, if TRUE returns Matches, otherwise it returns also start and end positions
*/
HB_FUNC( HB_REGEXALL )
{
hb_regex( 5 );
}
#if defined( HB_PCRE_REGEX )
static void * hb_pcre_grab( size_t size )
{
return hb_xgrab( size );
}
#endif
HB_CALL_ON_STARTUP_BEGIN( _hb_regex_init_ )
#if defined( HB_PCRE_REGEX )
pcre_malloc = hb_pcre_grab;
pcre_free = hb_xfree;
pcre_stack_malloc = hb_pcre_grab;
pcre_stack_free = hb_xfree;
#endif
hb_regexInit( hb_regfree, hb_regcomp, hb_regexec );
HB_CALL_ON_STARTUP_END( _hb_regex_init_ )
#if defined(HB_PRAGMA_STARTUP)
#pragma startup _hb_regex_init_
#elif defined(HB_MSC_STARTUP)
#pragma data_seg( HB_MSC_START_SEGMENT )
static HB_$INITSYM hb_vm_auto_regex_init_ = _hb_regex_init_;
#pragma data_seg()
#endif