Files
harbour-core/harbour/source/macro/macrolex.c
Przemyslaw Czerpak 0cada37011 2006-11-21 03:30 UTC+0100 Przemyslaw Czerpak (druzus/at/priv.onet.pl)
* harbour/common.mak
  * harbour/source/compiler/Makefile
  * harbour/include/hbcomp.h
  * harbour/include/hbexprb.c
  * harbour/include/hbexprc.c
  * harbour/source/compiler/cmdcheck.c
  * harbour/source/compiler/genc.c
  * harbour/source/compiler/harbour.c
  * harbour/source/compiler/harbour.l
  * harbour/source/compiler/harbour.slx
  * harbour/source/compiler/harbour.y
  * harbour/source/compiler/hbgenerr.c
  * harbour/source/compiler/hbident.c
  * harbour/source/compiler/ppcomp.c
  + harbour/source/compiler/complex.c
    + added new PP based compiler lexer - it's smaller, MT safe and a
      little bit faster then then the FLEX version.
    + added HB_COMP structure to hold compiler data in future MT version
    + added global variable HB_COMP_PTR hb_comp_data to make conversion
      to MT easier - now it holds only PP and lexer data.
    * update PP related code in compiler to be MT safe
    + added %pure-parser, %parse-param and %lex-param for bison to generate
      MT safe grammar parser.
    * updated FLEX to work with recent compiler modifications and pure-parser
      bison API

  * harbour/makefile.bc
  * harbour/makefile.vc
  * harbour/source/macro/Makefile
  * harbour/source/macro/macro.l
  * harbour/source/macro/macro.y
  * harbour/source/macro/macrolex.c
    * use hb_macro prefix instead of hb_comp in bison/flex parser/lexer
      used in macro compiler to avoid possible conflicts in the future
    * separated lexer data

  * harbour/include/hbapi.h
  * harbour/include/hbpp.h
  * harbour/source/pp/ppcore.c
  * harbour/source/pp/ppgen.c
  * harbour/source/pp/pplib.c
  * harbour/source/vm/macro.c
    * removed not used members from HB_MACRO structure to make it
      cleaner before creating common to compiler and macro compiler
      structure
    + added new token HB_PP_TOKEN_EPSILON
    + added void * cargo parameters passed to executed user functions
    + hb_pp_tokenGet(), hb_pp_tokenToString(), hb_pp_tokenBlockString()
      functions for new PP based compiler lexer

  * harbour/utils/hbpp/hbpp.c
  * harbour/utils/hbpp/hbpp.h
  * harbour/utils/hbpp/hbppcomp.c
  * harbour/utils/hbpp/hbppcore.c
  * harbour/utils/hbpp/hbpplib.c
  * harbour/utils/hbpp/pragma.c
    * updated to compile with recent compiler header file modifications


    PP, new lexer and most of grammar parser should be MT safe. Now we should
    update all compiler functions to pass pointer to HB_COMP data structure
    where we should all current global variables. This structure as first
    member should have HB_CMPCOMMON structure which will hold common to
    compiler and macro compiler data. Ryszard I think you are the best person
    to define this structure.

    We have new lexer which is MT safe but please note that it has to be
    extensively tested so I would like to ask everybody to compile as much
    as possible different code and check if the final programs work as
    expected. Working on new code I removed some limitations existing in
    FLEX though not all. At the beginning I tried to replicate the exact
    FLEX behavior but I've found that in few places it does not work as
    it should so I begin to encode rules in a way which remove some
    limitations. In fact now it's much easier to control some things.
    I kept the FLEX code working and made all necessary modifications
    so it still can be used but keeping FLEX working cost us IMHO too
    much. It's not possible to introduce some improvements to grammar
    parser. All identifiers, keyword and macros returned by new lexer
    are converted to upper letters, do not have to be freed by hb_xfree()
    and is guarantied that will be always accessible. So from grammar file
    we can remove all hb_compIdentifierNew( hb_strupr($1), TRUE ) what
    should give noticeable speed improvement but will break the FLEX code.
    Ryszard and other you will have to decide if we will support FLEX in
    the future. We can also clean the code and remove most of other
    redundant hb_strupr() and hb_strdup() used in many places. BTW only
    one terminal symbol can be returned with lower letters: DOIDENT
    and I make it intentionally so it's possible to use:
         DO prog1 WITH "sth"
    on case sensitive file systems so this symbol should be cloned in
    upper cases as function symbol but used without modification as
    file name. It's current behavior but I'm not sure you will want
    to keep it. Maybe compiler switch to always convert file names
    created from
         DO <id> [WITH <params,...>]
    to lower cases will be better. Please think about it.
2006-11-21 02:29:33 +00:00

619 lines
21 KiB
C

/*
* $Id$
*/
/*
* Harbour Project source code:
* small and MT safe lexer for macro compiler
*
* Copyright 2006 Przemyslaw Czerpak <druzus / at / priv.onet.pl>
* www - http://www.harbour-project.org
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2, or (at your option)
* any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this software; see the file COPYING. If not, write to
* the Free Software Foundation, Inc., 59 Temple Place, Suite 330,
* Boston, MA 02111-1307 USA (or visit the web site http://www.gnu.org/).
*
* As a special exception, the Harbour Project gives permission for
* additional uses of the text contained in its release of Harbour.
*
* The exception is that, if you link the Harbour libraries with other
* files to produce an executable, this does not by itself cause the
* resulting executable to be covered by the GNU General Public License.
* Your use of that executable is in no way restricted on account of
* linking the Harbour library code into it.
*
* This exception does not however invalidate any other reasons why
* the executable file might be covered by the GNU General Public License.
*
* This exception applies only to the code released by the Harbour
* Project under the name Harbour. If you copy code from other
* Harbour Project or Free Software Foundation releases into a copy of
* Harbour, as the General Public License permits, the exception does
* not apply to the code that you add in this way. To avoid misleading
* anyone as to the status of such modified files, you must delete
* this exception notice from them.
*
* If you write modifications of your own for Harbour, it is your choice
* whether to permit this exception to apply to your modifications.
* If you do not wish that, delete this exception notice.
*
*/
#define HB_MACRO_SUPPORT
#include "hbmacro.h"
#include "hbcomp.h"
#include "hbdate.h"
#include "macroy.h"
#define HB_LEX_ISFIRSTIDCHAR(c) ( ( (c) >= 'A' && (c) <= 'Z' ) || \
( (c) >= 'a' && (c) <= 'z' ) || \
(c) == '_' )
#define HB_LEX_ISDIGIT(c) ( (c) >= '0' && (c) <= '9' )
#define HB_LEX_ISHEXDIGIT(c) ( ( (c) >= '0' && (c) <= '9' ) || \
( (c) >= 'A' && (c) <= 'F' ) || \
( (c) >= 'a' && (c) <= 'f' ) )
#define HB_LEX_ISNEXTIDCHAR(c) ( HB_LEX_ISFIRSTIDCHAR(c) || \
HB_LEX_ISDIGIT(c) )
typedef struct _HB_MACRO_LEX
{
char * pString;
char * pDst;
ULONG ulLen;
ULONG ulSrc;
BOOL quote;
char pBuffer[ 2 ];
}
HB_MACRO_LEX, * PHB_MACRO_LEX;
BOOL hb_macroLexNew( HB_MACRO_PTR pMacro )
{
if( pMacro->length )
{
/*
* the total maximum size for parsed tokens delimited with ASCII NUL
* cannot be bigger then the size of macro string because only
* identifiers, strings, macrovars and macrotexts have to be returned
* as string and all these tokens have to be separated by some non
* value tokens or strings which will have not used delimiters
*/
pMacro->pLex = hb_xgrab( sizeof( HB_MACRO_LEX ) + pMacro->length );
( ( PHB_MACRO_LEX ) pMacro->pLex )->pString = pMacro->string;
( ( PHB_MACRO_LEX ) pMacro->pLex )->ulLen = pMacro->length;
( ( PHB_MACRO_LEX ) pMacro->pLex )->ulSrc = 0;
( ( PHB_MACRO_LEX ) pMacro->pLex )->quote = TRUE;
( ( PHB_MACRO_LEX ) pMacro->pLex )->pDst =
( ( PHB_MACRO_LEX ) pMacro->pLex )->pBuffer;
return TRUE;
}
return FALSE;
}
void hb_macroLexDelete( HB_MACRO_PTR pMacro )
{
if( pMacro->pLex )
{
hb_xfree( pMacro->pLex );
pMacro->pLex = NULL;
}
}
static void hb_lexIdentCopy( PHB_MACRO_LEX pLex )
{
while( pLex->ulSrc < pLex->ulLen )
{
char ch = pLex->pString[ pLex->ulSrc ];
if( ch >= 'a' && ch <= 'z' )
*pLex->pDst++ = ch - ( 'a' - 'A' );
else if( ( ch >= 'A' && ch <= 'Z' ) ||
( ch >= '0' && ch <= '9' ) || ch == '_' )
*pLex->pDst++ = ch;
else
break;
pLex->ulSrc++;
}
}
static int hb_lexStringCopy( YYSTYPE *yylval_ptr, HB_MACRO_PTR pMacro,
PHB_MACRO_LEX pLex, char cDelim )
{
pLex->quote = FALSE;
yylval_ptr->string = pLex->pDst;
while( pLex->ulSrc < pLex->ulLen )
{
char ch = pLex->pString[ pLex->ulSrc++ ];
if( ch == cDelim )
{
*pLex->pDst++ = '\0';
return LITERAL;
}
*pLex->pDst++ = ch;
}
*pLex->pDst++ = '\0';
hb_macroError( EG_SYNTAX, pMacro );
return LITERAL;
}
static int hb_lexNumConv( YYSTYPE *yylval_ptr, PHB_MACRO_LEX pLex, ULONG ulLen )
{
HB_LONG lNumber;
double dNumber;
int iDec, iWidth;
if( hb_compStrToNum( pLex->pString + pLex->ulSrc, ulLen,
&lNumber, &dNumber, &iDec, &iWidth ) )
{
yylval_ptr->valDouble.dNumber = dNumber;
yylval_ptr->valDouble.bDec = iDec;
yylval_ptr->valDouble.bWidth = iWidth;
pLex->ulSrc += ulLen;
return NUM_DOUBLE;
}
else
{
yylval_ptr->valLong.lNumber = lNumber;
yylval_ptr->valLong.bWidth = iWidth;
pLex->ulSrc += ulLen;
return NUM_LONG;
}
}
int hb_macrolex( YYSTYPE *yylval_ptr, HB_MACRO_PTR pMacro )
{
PHB_MACRO_LEX pLex = ( PHB_MACRO_LEX ) pMacro->pLex;
while( pLex->ulSrc < pLex->ulLen )
{
char ch = pLex->pString[ pLex->ulSrc++ ];
switch( ch )
{
case ' ':
case '\t':
case '\r':
break;
case '$':
case ',':
case '|':
case '@':
case '(':
case '{':
case ';':
case '\n':
pLex->quote = TRUE;
return ch;
case ')':
case '}':
case ']':
pLex->quote = FALSE;
return ch;
case '#':
pLex->quote = TRUE;
return NE1;
case '!':
pLex->quote = TRUE;
if( pLex->pString[ pLex->ulSrc ] == '=' )
{
pLex->ulSrc++;
return NE2;
}
return NOT;
case '<':
pLex->quote = TRUE;
if( pLex->pString[ pLex->ulSrc ] == '>' )
{
pLex->ulSrc++;
return NE2;
}
else if( pLex->pString[ pLex->ulSrc ] == '=' )
{
pLex->ulSrc++;
return LE;
}
return '<';
case '>':
pLex->quote = TRUE;
if( pLex->pString[ pLex->ulSrc ] == '=' )
{
pLex->ulSrc++;
return GE;
}
return '>';
case '=':
pLex->quote = TRUE;
if( pLex->pString[ pLex->ulSrc ] == '=' )
{
pLex->ulSrc++;
return EQ;
}
return '=';
case '+':
pLex->quote = TRUE;
if( pLex->pString[ pLex->ulSrc ] == '+' )
{
pLex->ulSrc++;
return INC;
}
else if( pLex->pString[ pLex->ulSrc ] == '=' )
{
pLex->ulSrc++;
return PLUSEQ;
}
return '+';
case '-':
pLex->quote = TRUE;
if( pLex->pString[ pLex->ulSrc ] == '-' )
{
pLex->ulSrc++;
return DEC;
}
else if( pLex->pString[ pLex->ulSrc ] == '=' )
{
pLex->ulSrc++;
return MINUSEQ;
}
else if( pLex->pString[ pLex->ulSrc ] == '>' )
{
pLex->ulSrc++;
return ALIASOP;
}
return '-';
case '*':
pLex->quote = TRUE;
if( pLex->pString[ pLex->ulSrc ] == '*' )
{
pLex->ulSrc++;
if( pLex->pString[ pLex->ulSrc ] == '=' )
{
pLex->ulSrc++;
return EXPEQ;
}
return POWER;
}
else if( pLex->pString[ pLex->ulSrc ] == '=' )
{
pLex->ulSrc++;
return MULTEQ;
}
return '*';
case '/':
pLex->quote = TRUE;
if( pLex->pString[ pLex->ulSrc ] == '=' )
{
pLex->ulSrc++;
return DIVEQ;
}
return '/';
case '%':
pLex->quote = TRUE;
if( pLex->pString[ pLex->ulSrc ] == '=' )
{
pLex->ulSrc++;
return MODEQ;
}
return '%';
case '^':
pLex->quote = TRUE;
if( pLex->pString[ pLex->ulSrc ] == '=' )
{
pLex->ulSrc++;
return EXPEQ;
}
return POWER;
case ':':
pLex->quote = TRUE;
if( pLex->pString[ pLex->ulSrc ] == '=' )
{
pLex->ulSrc++;
return INASSIGN;
}
else if( pLex->pString[ pLex->ulSrc ] == ':' )
{
yylval_ptr->string = "SELF";
return IDENTIFIER;
}
return ':';
case '.':
pLex->quote = TRUE;
if( pLex->ulSrc < pLex->ulLen &&
HB_LEX_ISDIGIT( pLex->pString[ pLex->ulSrc ] ) )
{
ULONG ul = pLex->ulSrc;
while( ++ul < pLex->ulLen &&
HB_LEX_ISDIGIT( pLex->pString[ ul ] ) );
ul -= --pLex->ulSrc;
return hb_lexNumConv( yylval_ptr, pLex, ul );
}
if( pLex->ulLen - pLex->ulSrc >= 4 &&
pLex->pString[ pLex->ulSrc + 3 ] == '.' )
{
if( ( pLex->pString[ pLex->ulSrc + 0 ] | ('a' - 'A') ) == 'a' &&
( pLex->pString[ pLex->ulSrc + 1 ] | ('a' - 'A') ) == 'n' &&
( pLex->pString[ pLex->ulSrc + 2 ] | ('a' - 'A') ) == 'd' )
{
pLex->ulSrc += 4;
return AND;
}
if( ( pLex->pString[ pLex->ulSrc + 0 ] | ('a' - 'A') ) == 'n' &&
( pLex->pString[ pLex->ulSrc + 1 ] | ('a' - 'A') ) == 'o' &&
( pLex->pString[ pLex->ulSrc + 2 ] | ('a' - 'A') ) == 't' )
{
pLex->ulSrc += 4;
return NOT;
}
}
if( pLex->ulLen - pLex->ulSrc >= 3 &&
pLex->pString[ pLex->ulSrc + 2 ] == '.' )
{
if( ( pLex->pString[ pLex->ulSrc + 0 ] | ('a' - 'A') ) == 'o' &&
( pLex->pString[ pLex->ulSrc + 1 ] | ('a' - 'A') ) == 'r' )
{
pLex->ulSrc += 3;
return OR;
}
}
if( pLex->ulLen - pLex->ulSrc >= 2 &&
pLex->pString[ pLex->ulSrc + 1 ] == '.' )
{
if( ( pLex->pString[ pLex->ulSrc ] | ('a' - 'A') ) == 't' ||
( pLex->pString[ pLex->ulSrc ] | ('a' - 'A') ) == 'y' )
{
pLex->quote = FALSE;
pLex->ulSrc += 2;
return TRUEVALUE;
}
if( ( pLex->pString[ pLex->ulSrc ] | ('a' - 'A') ) == 'f' ||
( pLex->pString[ pLex->ulSrc ] | ('a' - 'A') ) == 'n' )
{
pLex->quote = FALSE;
pLex->ulSrc += 2;
return FALSEVALUE;
}
}
return '.';
case '[':
if( pLex->quote )
return hb_lexStringCopy( yylval_ptr, pMacro, pLex, ']' );
pLex->quote = TRUE;
return '[';
case '`':
case '\'':
return hb_lexStringCopy( yylval_ptr, pMacro, pLex, '\'' );
case '"':
return hb_lexStringCopy( yylval_ptr, pMacro, pLex, '"' );
case '&':
if( pLex->ulSrc < pLex->ulLen )
{
if( HB_LEX_ISFIRSTIDCHAR( pLex->pString[ pLex->ulSrc ] ) )
{
/* [&<keyword>[.[<nextidchars>]]]+ */
int iParts = 0;
pLex->quote = FALSE;
yylval_ptr->string = pLex->pDst;
pLex->ulSrc--;
do
{
++iParts;
*pLex->pDst++ = '&';
pLex->ulSrc++;
hb_lexIdentCopy( pLex );
if( pLex->pString[ pLex->ulSrc ] == '.' )
{
++iParts;
*pLex->pDst++ = '.';
pLex->ulSrc++;
hb_lexIdentCopy( pLex );
}
}
while( pLex->ulLen - pLex->ulSrc > 1 &&
pLex->pString[ pLex->ulSrc ] == '&' &&
HB_LEX_ISFIRSTIDCHAR( pLex->pString[ pLex->ulSrc + 1 ] ) );
if( iParts == 2 && *( pLex->pDst - 1 ) == '.' )
{
pLex->pDst--;
iParts = 1;
}
*pLex->pDst++ = '\0';
if( iParts == 1 )
{
yylval_ptr->string++;
return MACROVAR;
}
return MACROTEXT;
}
else if( pLex->pString[ pLex->ulSrc ] == '\'' ||
pLex->pString[ pLex->ulSrc ] == '"' ||
pLex->pString[ pLex->ulSrc ] == '[' )
hb_macroError( EG_SYNTAX, pMacro );
}
pLex->quote = TRUE;
return '&';
default:
if( HB_LEX_ISDIGIT( ch ) )
{
ULONG ul = pLex->ulSrc;
pLex->quote = FALSE;
if( ch == '0' && ul < pLex->ulLen )
{
if( pLex->pString[ ul ] == 'd' || pLex->pString[ ul ] == 'D' )
{
while( ++ul < pLex->ulLen &&
HB_LEX_ISDIGIT( pLex->pString[ ul ] ) );
if( ul - pLex->ulSrc == 9 )
{
int year, month, day;
hb_dateStrGet( pLex->pString + pLex->ulSrc + 1,
&year, &month, &day );
yylval_ptr->valLong.lNumber =
hb_dateEncode( year, month, day );
pLex->ulSrc = ul;
return NUM_DATE;
}
ul = pLex->ulSrc;
}
else if( pLex->pString[ ul ] == 'x' ||
pLex->pString[ ul ] == 'X' )
{
while( ++ul < pLex->ulLen &&
HB_LEX_ISHEXDIGIT( pLex->pString[ ul ] ) );
if( ul == pLex->ulSrc + 1 )
--ul;
}
else
{
while( ul < pLex->ulLen &&
HB_LEX_ISDIGIT( pLex->pString[ ul ] ) )
++ul;
if( pLex->ulLen - ul > 1 && pLex->pString[ ul ] == '.' &&
HB_LEX_ISDIGIT( pLex->pString[ ul + 1 ] ) )
{
while( ++ul < pLex->ulLen &&
HB_LEX_ISDIGIT( pLex->pString[ ul ] ) );
}
}
}
else
{
while( ul < pLex->ulLen &&
HB_LEX_ISDIGIT( pLex->pString[ ul ] ) )
++ul;
if( pLex->ulLen - ul > 1 && pLex->pString[ ul ] == '.' &&
HB_LEX_ISDIGIT( pLex->pString[ ul + 1 ] ) )
{
while( ++ul < pLex->ulLen &&
HB_LEX_ISDIGIT( pLex->pString[ ul ] ) );
}
}
ul -= --pLex->ulSrc;
return hb_lexNumConv( yylval_ptr, pLex, ul );
}
else if( HB_LEX_ISFIRSTIDCHAR( ch ) )
{
pLex->quote = FALSE;
yylval_ptr->string = pLex->pDst;
*pLex->pDst++ = ch - ( ( ch >= 'a' && ch <= 'z' ) ? 'a' - 'A' : 0 );
hb_lexIdentCopy( pLex );
if( pLex->ulLen - pLex->ulSrc > 1 &&
pLex->pString[ pLex->ulSrc ] == '&' &&
HB_LEX_ISFIRSTIDCHAR( pLex->pString[ pLex->ulSrc + 1 ] ) )
{
/* [<keyword>][&<keyword>[.[<nextidchars>]]]+ */
do
{
*pLex->pDst++ = '&';
pLex->ulSrc++;
hb_lexIdentCopy( pLex );
if( pLex->pString[ pLex->ulSrc ] == '.' )
{
*pLex->pDst++ = '.';
pLex->ulSrc++;
hb_lexIdentCopy( pLex );
}
}
while( pLex->ulLen - pLex->ulSrc > 1 &&
pLex->pString[ pLex->ulSrc ] == '&' &&
HB_LEX_ISFIRSTIDCHAR( pLex->pString[ pLex->ulSrc + 1 ] ) );
*pLex->pDst++ = '\0';
return MACROTEXT;
}
*pLex->pDst++ = '\0';
if( pLex->pDst - yylval_ptr->string == 3 )
{
if( yylval_ptr->string[ 0 ] == 'I' &&
yylval_ptr->string[ 1 ] == 'F' )
return IF;
}
else if( pLex->pDst - yylval_ptr->string == 4 )
{
if( yylval_ptr->string[ 0 ] == 'I' &&
yylval_ptr->string[ 1 ] == 'I' &&
yylval_ptr->string[ 2 ] == 'F' )
return IIF;
else if( yylval_ptr->string[ 0 ] == 'N' &&
yylval_ptr->string[ 1 ] == 'I' &&
yylval_ptr->string[ 2 ] == 'L' )
return NIL;
}
else if( pLex->pDst - yylval_ptr->string > 4 )
{
if( yylval_ptr->string[ 0 ] == '_' )
{
if( memcmp( "FIELD", yylval_ptr->string + 1,
pLex->pDst - yylval_ptr->string - 2 ) == 0 )
return FIELD;
}
else if( yylval_ptr->string[ 0 ] == 'F' )
{
if( memcmp( "IELD", yylval_ptr->string + 1,
pLex->pDst - yylval_ptr->string - 2 ) == 0 )
return FIELD;
}
else if( pLex->pDst - yylval_ptr->string == 6 &&
yylval_ptr->string[ 0 ] == 'Q' &&
memcmp( "SELF", yylval_ptr->string + 1,
pLex->pDst - yylval_ptr->string - 2 ) == 0 )
{
while( pLex->ulSrc < pLex->ulLen &&
( pLex->pString[ pLex->ulSrc ] == ' ' ||
pLex->pString[ pLex->ulSrc ] == '\t' ) )
pLex->ulSrc++;
if( pLex->ulSrc < pLex->ulLen &&
pLex->pString[ pLex->ulSrc ] == '(' )
{
ULONG ul = pLex->ulSrc;
while( ++ul < pLex->ulLen )
{
if( pLex->pString[ ul ] == ')' )
{
pLex->ulSrc = ul + 1;
return SELF;
}
else if( pLex->pString[ ul ] != ' ' &&
pLex->pString[ ul ] != '\t' )
break;
}
}
}
}
return IDENTIFIER;
}
return ch;
}
}
return 0;
}