/* * Harbour Project source code: * CT3 string functions * - Token() * - NumToken() * - AtToken() * - TokenLower() * - TokenUpper() * - TokenSep() * * Copyright 2001 IntTec GmbH, Neunlindenstr 32, 79106 Freiburg, Germany * Author: Martin Vogel * * www - http://harbour-project.org * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2, or (at your option) * any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this software; see the file COPYING.txt. If not, write to * the Free Software Foundation, Inc., 59 Temple Place, Suite 330, * Boston, MA 02111-1307 USA (or visit the web site http://www.gnu.org/). * * As a special exception, the Harbour Project gives permission for * additional uses of the text contained in its release of Harbour. * * The exception is that, if you link the Harbour libraries with other * files to produce an executable, this does not by itself cause the * resulting executable to be covered by the GNU General Public License. * Your use of that executable is in no way restricted on account of * linking the Harbour library code into it. * * This exception does not however invalidate any other reasons why * the executable file might be covered by the GNU General Public License. * * This exception applies only to the code released by the Harbour * Project under the name Harbour. If you copy code from other * Harbour Project or Free Software Foundation releases into a copy of * Harbour, as the General Public License permits, the exception does * not apply to the code that you add in this way. To avoid misleading * anyone as to the status of such modified files, you must delete * this exception notice from them. * * If you write modifications of your own for Harbour, it is your choice * whether to permit this exception to apply to your modifications. * If you do not wish that, delete this exception notice. * */ #include "ct.h" #include "hbstack.h" /* static const data */ static const char * sc_pcSeparatorStr = "\x00" "\x09" "\x0A" "\x0C" "\x1A" "\x20" "\x8A" "\x8C" ",.;:!\?/\\<>()#&%+-*"; static const HB_SIZE sc_sSeparatorStrLen = 26; /* static data */ typedef struct { /* even if these are chars, variable must be int, since we need an extra -1 */ int iPreSeparator; int iPostSeparator; } CT_TOKEN, * PCT_TOKEN; static void s_ct_token_init( void * cargo ) { PCT_TOKEN ct_token = ( PCT_TOKEN ) cargo; ct_token->iPreSeparator = -1; ct_token->iPostSeparator = -1; } static HB_TSD_NEW( s_ct_token, sizeof( CT_TOKEN ), s_ct_token_init, NULL ); /* defines */ #define DO_TOKEN1_TOKEN 0 #define DO_TOKEN1_NUMTOKEN 1 #define DO_TOKEN1_ATTOKEN 2 #define DO_TOKEN1_TOKENLOWER 3 #define DO_TOKEN1_TOKENUPPER 4 /* helper function for the token function group I */ static void do_token1( int iSwitch ) { PCT_TOKEN ct_token = ( PCT_TOKEN ) hb_stackGetTSD( &s_ct_token ); int iParamCheck = 0; int iNoRef = ct_getref() && HB_ISBYREF( 1 ); switch( iSwitch ) { case DO_TOKEN1_TOKEN: ct_token->iPreSeparator = ct_token->iPostSeparator = -1; /* no "break" here !! */ case DO_TOKEN1_ATTOKEN: case DO_TOKEN1_NUMTOKEN: case DO_TOKEN1_TOKENLOWER: case DO_TOKEN1_TOKENUPPER: iParamCheck = HB_ISCHAR( 1 ); break; } if( iParamCheck ) { const char * pcString = hb_parc( 1 ); HB_SIZE sStrLen = hb_parclen( 1 ); const char * pcSeparatorStr; HB_SIZE sSeparatorStrLen; HB_SIZE nTokenCounter = 0; HB_SIZE nSkip; const char * pcSubStr; char * pcRet = NULL; HB_SIZE sSubStrLen; HB_SIZE sRetStrLen = 0; HB_SIZE nToken = 0; HB_SIZE nSkipCnt; const char * pc; /* separator string */ sSeparatorStrLen = hb_parclen( 2 ); if( sSeparatorStrLen != 0 ) pcSeparatorStr = hb_parc( 2 ); else { pcSeparatorStr = sc_pcSeparatorStr; sSeparatorStrLen = sc_sSeparatorStrLen; } if( iSwitch == DO_TOKEN1_NUMTOKEN ) { /* token counter */ nTokenCounter = HB_SIZE_MAX; /* skip width */ nSkip = hb_parns( 3 ); } else { /* token counter */ nTokenCounter = hb_parns( 3 ); /* skip width */ nSkip = hb_parns( 4 ); /* HB_EXTENSION for AtToken()/TokenLower()/TokenUpper() */ } if( nTokenCounter == 0 ) nTokenCounter = HB_SIZE_MAX; if( nSkip == 0 ) nSkip = HB_SIZE_MAX; /* prepare return value for TokenUpper()/TokenLower() */ if( iSwitch == DO_TOKEN1_TOKENLOWER || iSwitch == DO_TOKEN1_TOKENUPPER ) { if( sStrLen == 0 ) { if( iNoRef ) hb_retl( HB_FALSE ); else hb_retc_null(); return; } sRetStrLen = sStrLen; pcRet = ( char * ) hb_xgrab( sRetStrLen + 1 ); hb_xmemcpy( pcRet, pcString, sRetStrLen ); } /* find the th token */ pcSubStr = pcString; sSubStrLen = sStrLen; /* scan start condition */ pc = pcSubStr - 1; while( nToken < nTokenCounter ) { HB_SIZE sMatchedPos = sSeparatorStrLen; /* Skip the left nSkip successive separators */ nSkipCnt = 0; do { sSubStrLen -= ( pc - pcSubStr ) + 1; pcSubStr = pc + 1; pc = ct_at_charset_forward( pcSubStr, sSubStrLen, pcSeparatorStr, sSeparatorStrLen, &sMatchedPos ); if( iSwitch == DO_TOKEN1_TOKEN ) { ct_token->iPreSeparator = ct_token->iPostSeparator; if( sMatchedPos < sSeparatorStrLen ) ct_token->iPostSeparator = pcSeparatorStr[ sMatchedPos ]; else ct_token->iPostSeparator = -1; } nSkipCnt++; } while( nSkipCnt < nSkip && pc == pcSubStr ); if( sSubStrLen == 0 ) { /* string ends with tokenizer (null string after tokenizer at end of string is not a token) */ switch( iSwitch ) { case DO_TOKEN1_TOKEN: { char cRet; hb_retc_null(); if( HB_ISBYREF( 5 ) ) /* HB_EXTENSION */ { cRet = ( char ) ct_token->iPreSeparator; hb_storclen( &cRet, ( ct_token->iPreSeparator != -1 ? 1 : 0 ), 5 ); } if( HB_ISBYREF( 6 ) ) /* HB_EXTENSION */ { cRet = ( char ) ct_token->iPostSeparator; hb_storclen( &cRet, ( ct_token->iPostSeparator != -1 ? 1 : 0 ), 6 ); } break; } case DO_TOKEN1_NUMTOKEN: hb_retns( nToken ); break; case DO_TOKEN1_ATTOKEN: hb_retns( 0 ); break; case DO_TOKEN1_TOKENLOWER: case DO_TOKEN1_TOKENUPPER: hb_storclen( pcRet, sRetStrLen, 1 ); if( iNoRef ) { hb_xfree( pcRet ); hb_retl( HB_FALSE ); } else hb_retclen_buffer( pcRet, sRetStrLen ); break; } return; } switch( iSwitch ) { case DO_TOKEN1_TOKEN: case DO_TOKEN1_NUMTOKEN: case DO_TOKEN1_ATTOKEN: break; case DO_TOKEN1_TOKENLOWER: if( pcSubStr != pc ) /* letters can be tokenizers, too, but they should not be lowercase'd */ *( pcRet + ( pcSubStr - pcString ) ) = ( char ) hb_charLower( ( HB_UCHAR ) *pcSubStr ); break; case DO_TOKEN1_TOKENUPPER: if( pcSubStr != pc ) /* letters can be tokenizers, too, but they should not be uppercase'd */ *( pcRet + ( pcSubStr - pcString ) ) = ( char ) hb_charUpper( ( HB_UCHAR ) *pcSubStr ); break; default: break; } nToken++; if( pc == NULL ) { /* little trick for return values */ pc = pcSubStr + sSubStrLen; /* we must leave the while loop even if we have not yet found the th token */ break; } /* should we find the last token, but string ends with tokenizer, i.e. pc points to the last character at the moment ? -> break here ! */ if( nTokenCounter == HB_SIZE_MAX ) { if( nSkip == HB_SIZE_MAX ) { const char * t; HB_BOOL bLast = HB_TRUE; for( t = pc + 1; t < pcString + sStrLen; t++ ) { if( ! memchr( pcSeparatorStr, *t, sSeparatorStrLen ) ) { bLast = HB_FALSE; break; } } if( bLast ) break; } else if( pc + 1 == pcString + sStrLen ) break; } } switch( iSwitch ) { case DO_TOKEN1_TOKEN: { char cRet; if( nTokenCounter == HB_SIZE_MAX || nToken == nTokenCounter ) hb_retclen( pcSubStr, pc - pcSubStr ); else hb_retc_null(); if( HB_ISBYREF( 5 ) ) /* HB_EXTENSION */ { cRet = ( char ) ct_token->iPreSeparator; hb_storclen( &cRet, ( ct_token->iPreSeparator != -1 ? 1 : 0 ), 5 ); } if( HB_ISBYREF( 6 ) ) /* HB_EXTENSION */ { cRet = ( char ) ct_token->iPostSeparator; hb_storclen( &cRet, ( ct_token->iPostSeparator != -1 ? 1 : 0 ), 6 ); } break; } case DO_TOKEN1_NUMTOKEN: hb_retns( nToken ); break; case DO_TOKEN1_ATTOKEN: if( nTokenCounter == HB_SIZE_MAX || nToken == nTokenCounter ) hb_retns( pcSubStr - pcString + 1 ); else hb_retns( 0 ); break; case DO_TOKEN1_TOKENLOWER: case DO_TOKEN1_TOKENUPPER: hb_storclen( pcRet, sRetStrLen, 1 ); if( iNoRef ) { hb_xfree( pcRet ); hb_retl( HB_FALSE ); } else hb_retclen_buffer( pcRet, sRetStrLen ); break; } } else { switch( iSwitch ) { case DO_TOKEN1_TOKEN: { PHB_ITEM pSubst = NULL; int iArgErrorMode = ct_getargerrormode(); char cRet; if( HB_ISBYREF( 5 ) ) /* HB_EXTENSION */ { cRet = ( char ) ct_token->iPreSeparator; hb_storclen( &cRet, ( ct_token->iPreSeparator != -1 ? 1 : 0 ), 5 ); } if( HB_ISBYREF( 6 ) ) /* HB_EXTENSION */ { cRet = ( char ) ct_token->iPostSeparator; hb_storclen( &cRet, ( ct_token->iPostSeparator != -1 ? 1 : 0 ), 6 ); } if( iArgErrorMode != CT_ARGERR_IGNORE ) pSubst = ct_error_subst( ( HB_USHORT ) iArgErrorMode, EG_ARG, CT_ERROR_TOKEN, NULL, HB_ERR_FUNCNAME, 0, EF_CANSUBSTITUTE, HB_ERR_ARGS_BASEPARAMS ); if( pSubst != NULL ) hb_itemReturnRelease( pSubst ); else if( ! iNoRef ) hb_retc_null(); else hb_retl( HB_FALSE ); break; } case DO_TOKEN1_TOKENLOWER: case DO_TOKEN1_TOKENUPPER: { PHB_ITEM pSubst = NULL; int iArgErrorMode = ct_getargerrormode(); if( iArgErrorMode != CT_ARGERR_IGNORE ) pSubst = ct_error_subst( ( HB_USHORT ) iArgErrorMode, EG_ARG, iSwitch == DO_TOKEN1_TOKENLOWER ? CT_ERROR_TOKENLOWER : CT_ERROR_TOKENUPPER, NULL, HB_ERR_FUNCNAME, 0, EF_CANSUBSTITUTE, HB_ERR_ARGS_BASEPARAMS ); if( pSubst != NULL ) hb_itemReturnRelease( pSubst ); else if( ! iNoRef ) hb_retc_null(); else hb_retl( HB_FALSE ); break; } case DO_TOKEN1_NUMTOKEN: case DO_TOKEN1_ATTOKEN: { PHB_ITEM pSubst = NULL; int iArgErrorMode = ct_getargerrormode(); if( iArgErrorMode != CT_ARGERR_IGNORE ) pSubst = ct_error_subst( ( HB_USHORT ) iArgErrorMode, EG_ARG, iSwitch == DO_TOKEN1_NUMTOKEN ? CT_ERROR_NUMTOKEN : CT_ERROR_ATTOKEN, NULL, HB_ERR_FUNCNAME, 0, EF_CANSUBSTITUTE, HB_ERR_ARGS_BASEPARAMS ); if( pSubst != NULL ) hb_itemReturnRelease( pSubst ); else hb_retns( 0 ); break; } } } } HB_FUNC( ATTOKEN ) { do_token1( DO_TOKEN1_ATTOKEN ); } HB_FUNC( TOKEN ) { do_token1( DO_TOKEN1_TOKEN ); } HB_FUNC( NUMTOKEN ) { do_token1( DO_TOKEN1_NUMTOKEN ); } HB_FUNC( TOKENLOWER ) { do_token1( DO_TOKEN1_TOKENLOWER ); } HB_FUNC( TOKENUPPER ) { do_token1( DO_TOKEN1_TOKENUPPER ); } HB_FUNC( TOKENSEP ) { PCT_TOKEN ct_token = ( PCT_TOKEN ) hb_stackGetTSD( &s_ct_token ); char cRet; if( hb_parl( 1 ) ) { /* return the separator char BEHIND the last token */ if( ct_token->iPostSeparator != -1 ) { cRet = ( char ) ct_token->iPostSeparator; hb_retclen( &cRet, 1 ); } else hb_retc_null(); } else { /* return the separator char BEFORE the last token */ if( ct_token->iPreSeparator != -1 ) { cRet = ( char ) ct_token->iPreSeparator; hb_retclen( &cRet, 1 ); } else hb_retc_null(); } }