diff --git a/harbour/ChangeLog b/harbour/ChangeLog index 151e2485c9..96129f75c8 100644 --- a/harbour/ChangeLog +++ b/harbour/ChangeLog @@ -1,8 +1,15 @@ +2000-08-21 15:45 UTC+0800 Ron Pinkas + * source/compiler/simplex.c + ! Optimized Keywords and Words search, Lexer is 10-15% faster. + + * source/compiler/harbour.slx + * source/macro/macro.slx + ! Sorted Keywords and Words, to support optimized search, by simplex. + 2000-08-21 15:18 GMT+3 Alexander Kresin *source/rdd/dbcmd.c * fixed bugs, reported by Juergen Baez - 2000-08-17 12:18 UTC+0100 Victor Szakats * include/hbver.h @@ -20,7 +27,7 @@ * Minor correction (alphabetical order). * source/compiler/harbour.l - + AS USUAL added. I could not make this HB_COMPAT_VO dependent, so this + + AS USUAL added. I could not make this HB_COMPAT_VO dependent, so this is left as a TODO. 2000-08-16 18:40 UTC+0800 Brian Hays diff --git a/harbour/source/compiler/harbour.slx b/harbour/source/compiler/harbour.slx index af72d671cc..881e4588ae 100644 --- a/harbour/source/compiler/harbour.slx +++ b/harbour/source/compiler/harbour.slx @@ -199,39 +199,39 @@ SELF_CONTAINED_WORDS_ARE { /* Key Words. */ LANGUAGE_KEY_WORDS_ARE { - LEX_WORD( "FUNCTION" ) AS_TOKEN( HB_FUNCTION ), - LEX_WORD( "PROCEDURE" ) AS_TOKEN( HB_PROCEDURE), - LEX_WORD( "RETURN" ) AS_TOKEN( RETURN ), - LEX_WORD( "LOCAL" ) AS_TOKEN( HB_LOCAL ), - LEX_WORD( "STATIC" ) AS_TOKEN( STATIC ), - LEX_WORD( "IF" ) AS_TOKEN( IF ), + LEX_WORD( "ANNOUNCE" ) AS_TOKEN( ANNOUNCE ), + LEX_WORD( "BEGIN" ) AS_TOKEN( BEGIN_ ), + LEX_WORD( "BREAK" ) AS_TOKEN( BREAK ), + LEX_WORD( "CASE" ) AS_TOKEN( CASE ), + LEX_WORD( "DECLARE" ) AS_TOKEN( HB_DECLARE ), + LEX_WORD( "DO" ) AS_TOKEN( DO ), LEX_WORD( "ELSE" ) AS_TOKEN( ELSE ), LEX_WORD( "ELSEIF" ) AS_TOKEN( ELSEIF ), LEX_WORD( "END" ) AS_TOKEN( END ), - LEX_WORD( "ENDIF" ) AS_TOKEN( ENDIF ), - LEX_WORD( "ANNOUNCE" ) AS_TOKEN( ANNOUNCE ), - LEX_WORD( "EXTERNAL" ) AS_TOKEN( HB_EXTERN ), - LEX_WORD( "INIT" ) AS_TOKEN( INIT ), - LEX_WORD( "EXIT" ) AS_TOKEN( EXIT_ ), - LEX_WORD( "PUBLIC" ) AS_TOKEN( HB_PUBLIC ), - LEX_WORD( "CASE" ) AS_TOKEN( CASE ), - LEX_WORD( "OTHERWISE" ) AS_TOKEN( OTHERWISE ), LEX_WORD( "ENDCASE" ) AS_TOKEN( ENDCASE ), LEX_WORD( "ENDDO" ) AS_TOKEN( ENDDO ), - LEX_WORD( "MEMVAR" ) AS_TOKEN( HB_MEMVAR ), - LEX_WORD( "LOOP" ) AS_TOKEN( LOOP ), + LEX_WORD( "ENDIF" ) AS_TOKEN( ENDIF ), + LEX_WORD( "EXIT" ) AS_TOKEN( EXIT_ ), + LEX_WORD( "EXTERNAL" ) AS_TOKEN( HB_EXTERN ), + LEX_WORD( "FIELD" ) AS_TOKEN( HB_FIELD ), LEX_WORD( "FOR" ) AS_TOKEN( HB_FOR ), + LEX_WORD( "FUNCTION" ) AS_TOKEN( HB_FUNCTION ), + LEX_WORD( "IF" ) AS_TOKEN( IF ), + LEX_WORD( "INIT" ) AS_TOKEN( INIT ), + LEX_WORD( "LOCAL" ) AS_TOKEN( HB_LOCAL ), + LEX_WORD( "LOOP" ) AS_TOKEN( LOOP ), + LEX_WORD( "MEMVAR" ) AS_TOKEN( HB_MEMVAR ), LEX_WORD( "NEXT" ) AS_TOKEN( NEXT ), + LEX_WORD( "OTHERWISE" ) AS_TOKEN( OTHERWISE ), LEX_WORD( "PARAMETERS" ) AS_TOKEN( HB_PARAM ), LEX_WORD( "PRIVATE" ) AS_TOKEN( HB_PRIVATE ), - LEX_WORD( "BEGIN" ) AS_TOKEN( BEGIN_ ), - LEX_WORD( "BREAK" ) AS_TOKEN( BREAK ), + LEX_WORD( "PROCEDURE" ) AS_TOKEN( HB_PROCEDURE), + LEX_WORD( "PUBLIC" ) AS_TOKEN( HB_PUBLIC ), LEX_WORD( "RECOVER" ) AS_TOKEN( RECOVER ), - LEX_WORD( "DO" ) AS_TOKEN( DO ), + LEX_WORD( "RETURN" ) AS_TOKEN( RETURN ), + LEX_WORD( "STATIC" ) AS_TOKEN( STATIC ), LEX_WORD( "WHILE" ) AS_TOKEN( WHILE ), - LEX_WORD( "DECLARE" ) AS_TOKEN( HB_DECLARE ), - LEX_WORD( "_PROCREQ_" ) AS_TOKEN( PROCREQ_ ), - LEX_WORD( "FIELD" ) AS_TOKEN( HB_FIELD ) + LEX_WORD( "_PROCREQ_" ) AS_TOKEN( PROCREQ_ ) }; /* Intermediate Words when ambigious. */ @@ -262,35 +262,35 @@ LANGUAGE_KEY_WORDS_ARE { /* Words. */ LANGUAGE_WORDS_ARE { - LEX_WORD( "FUNCTION" ) AS_TOKEN( _FUNC_ ), - LEX_WORD( "PROCEDURE" ) AS_TOKEN( _PROC_ ), - LEX_WORD( "IF" ) AS_TOKEN( _IF_ ), - LEX_WORD( "CASE" ) AS_TOKEN( _CASE_ ), - LEX_WORD( "WHILE" ) AS_TOKEN( _WHILE_ ), - LEX_WORD( "SEQUENCE" ) AS_TOKEN( _SEQUENCE_ ), - LEX_WORD( "USING" ) AS_TOKEN( _USING_ ), - LEX_WORD( "OPTIONAL" ) AS_TOKEN( HB_OPTIONAL ), - LEX_WORD( "NIL" ) AS_TOKEN( NIL ), - LEX_WORD( "IIF" ) AS_TOKEN( IIF ), - LEX_WORD( "TO" ) AS_TOKEN( TO ), - LEX_WORD( "STEP" ) AS_TOKEN( STEP ), - LEX_WORD( "IN" ) AS_TOKEN( HB_CHK_IN ), - LEX_WORD( "WITH" ) AS_TOKEN( _WITH_ ), - LEX_WORD( "SELF" ) AS_TOKEN( _SELF_ ), - LEX_WORD( "QSELF" ) AS_TOKEN( QSELF ), - LEX_WORD( "LINE" ) AS_TOKEN( _LINE_ ), - LEX_WORD( "AS" ) AS_TOKEN( _AS_ ), - LEX_WORD( "OF" ) AS_TOKEN( _OF_ ), + LEX_WORD( "ANYTYPE" ) AS_TOKEN( _VARIANT_ ), LEX_WORD( "ARRAY" ) AS_TOKEN( _ARRAY_ ), - LEX_WORD( "CODEBLOCK" ) AS_TOKEN( _BLOCK_ ), - LEX_WORD( "STRING" ) AS_TOKEN( _STRING_ ), + LEX_WORD( "AS" ) AS_TOKEN( _AS_ ), + LEX_WORD( "CASE" ) AS_TOKEN( _CASE_ ), LEX_WORD( "CLASS" ) AS_TOKEN( _CLASS_ ), + LEX_WORD( "CODEBLOCK" ) AS_TOKEN( _BLOCK_ ), LEX_WORD( "DATE" ) AS_TOKEN( _DATE_ ), + LEX_WORD( "FIELD" ) AS_TOKEN( _FIELD_ ), + LEX_WORD( "FUNCTION" ) AS_TOKEN( _FUNC_ ), + LEX_WORD( "IF" ) AS_TOKEN( _IF_ ), + LEX_WORD( "IIF" ) AS_TOKEN( IIF ), + LEX_WORD( "IN" ) AS_TOKEN( HB_CHK_IN ), + LEX_WORD( "LINE" ) AS_TOKEN( _LINE_ ), LEX_WORD( "LOGICAL" ) AS_TOKEN( _LOGICAL_ ), + LEX_WORD( "NIL" ) AS_TOKEN( NIL ), LEX_WORD( "NUMERIC" ) AS_TOKEN( _NUMERIC_ ), LEX_WORD( "OBJECT" ) AS_TOKEN( _OBJECT_ ), - LEX_WORD( "ANYTYPE" ) AS_TOKEN( _VARIANT_ ), - LEX_WORD( "FIELD" ) AS_TOKEN( _FIELD_ ), + LEX_WORD( "OF" ) AS_TOKEN( _OF_ ), + LEX_WORD( "OPTIONAL" ) AS_TOKEN( HB_OPTIONAL ), + LEX_WORD( "PROCEDURE" ) AS_TOKEN( _PROC_ ), + LEX_WORD( "QSELF" ) AS_TOKEN( QSELF ), + LEX_WORD( "SELF" ) AS_TOKEN( _SELF_ ), + LEX_WORD( "SEQUENCE" ) AS_TOKEN( _SEQUENCE_ ), + LEX_WORD( "STEP" ) AS_TOKEN( STEP ), + LEX_WORD( "STRING" ) AS_TOKEN( _STRING_ ), + LEX_WORD( "TO" ) AS_TOKEN( TO ), + LEX_WORD( "USING" ) AS_TOKEN( _USING_ ), + LEX_WORD( "WHILE" ) AS_TOKEN( _WHILE_ ), + LEX_WORD( "WITH" ) AS_TOKEN( _WITH_ ), LEX_WORD( "_FIELD" ) AS_TOKEN( _FIELD ) }; diff --git a/harbour/source/compiler/simplex.c b/harbour/source/compiler/simplex.c index e8ea57e974..8993aa018c 100644 --- a/harbour/source/compiler/simplex.c +++ b/harbour/source/compiler/simplex.c @@ -176,12 +176,12 @@ static int aiReturn[4]; /* Rules Support */ static int aiMatched[ MAX_MATCH ]; -static int iMatched = 0; +static unsigned int iMatched = 0; static int aiTentative[2] = { 0, 0 }; -static int iTentative = 0; +static unsigned int iTentative = 0; static int aiProspects[ 256 ]; -static int iProspects = 0; -static int iFound = 0; +static unsigned int iProspects = 0; +static unsigned int iFound = 0; static int iReduce = 0; /* yylex */ @@ -189,7 +189,7 @@ static char * tmpPtr; static char sToken[TOKEN_SIZE]; static int iLen = 0; static char chr, cPrev = 0; -static int iKey, iWord, iMatch, iRemove, iWordLen, iPush, iLastToken = 0; +static unsigned int iMatch, iRemove, iWordLen, iPush, iLastToken = 0; static char szLexBuffer[ YY_BUF_SIZE ]; static char * s_szBuffer; static int iSize = 0; @@ -204,14 +204,24 @@ int yyleng; /* NewLine Support. */ static BOOL bNewLine = TRUE, bStart = TRUE; -static int iSelfs = (int) ( sizeof( aSelfs ) / LEX_WORD_SIZE ); -static int iKeys = (int) ( sizeof( aKeys ) / LEX_WORD_SIZE ); -static int iWords = (int) ( sizeof( aWords ) / LEX_WORD_SIZE ); -static int iRules = (int) ( sizeof( aiRules ) / LEX_RULE_SIZE ); -static int iPairs = (int) ( sizeof( aPairs ) / LEX_PAIR_SIZE ); +static unsigned int iSelfs = (int) ( sizeof( aSelfs ) / LEX_WORD_SIZE ); +static unsigned int iKeys = (int) ( sizeof( aKeys ) / LEX_WORD_SIZE ); +static unsigned int iWords = (int) ( sizeof( aWords ) / LEX_WORD_SIZE ); +static unsigned int iRules = (int) ( sizeof( aiRules ) / LEX_RULE_SIZE ); +static unsigned int iPairs = (int) ( sizeof( aPairs ) / LEX_PAIR_SIZE ); + +typedef struct _TREE_NODE +{ + unsigned int iMin; + unsigned int iMax; +} TREE_NODE; /* support structure for Streams (Pairs). */ + +TREE_NODE aKeyNodes[256], aWordNodes[256]; int Reduce( int iToken, BOOL bReal ); +static void GenTrees( void ); + /* --------------------------------------------------------------------------------- */ /* MACROS. */ @@ -256,9 +266,9 @@ int Reduce( int iToken, BOOL bReal ); #define IF_BEGIN_PAIR(chr) \ {\ - register int iPair = 0, iStartLen; \ - register char chrStart; \ - int iLastPair = 0, iLastLen = 0; \ + register unsigned int iPair = 0, iStartLen; \ + register unsigned char chrStart; \ + unsigned int iLastPair = 0, iLastLen = 0; \ \ DEBUG_INFO( printf( "Checking %i Streams for %c At: >%s<\n", iPairs, chr, szBuffer - 1 ) ); \ \ @@ -335,8 +345,8 @@ int Reduce( int iToken, BOOL bReal ); DEBUG_INFO( printf( "Checking %i Selfs for %c At: >%s<\n", iSelfs, chr, szBuffer - 1 ) ); \ \ {\ - register int iSelf = 0, iSelfLen; \ - register char chrSelf; \ + register unsigned int iSelf = 0, iSelfLen; \ + register unsigned char chrSelf; \ \ while( iSelf < iSelfs ) \ { \ @@ -587,7 +597,7 @@ int Reduce( int iToken, BOOL bReal ); #define SCAN_PROSPECTS()\ {\ - register int iScan = 0;\ + register unsigned int iScan = 0;\ \ DEBUG_INFO( printf( "Scaning %i Prospects for %i at Pos: %i\n", iProspects, iToken, iMatched ) ); \ \ @@ -638,7 +648,7 @@ int Reduce( int iToken, BOOL bReal ); #define SCAN_RULES()\ {\ - register int iScan = 0;\ + register unsigned int iScan = 0;\ \ DEBUG_INFO( printf( "Scaning %i Rules for %i at Pos: %i\n", iRules, iToken, iMatched ) ); \ \ @@ -905,6 +915,7 @@ YY_DECL if( bStart ) { bStart = FALSE; + GenTrees() INIT_ACTION(); } @@ -1209,6 +1220,8 @@ YY_DECL CheckToken: { + unsigned int i, iMax; + if( bNewLine ) { bIgnoreWords = FALSE; @@ -1222,13 +1235,16 @@ YY_DECL } #endif - iKey = 0; - while ( iKey < iKeys ) + i = aKeyNodes[ sToken[0] ].iMin; + iMax = aKeyNodes[ sToken[0] ].iMax + 1; + DEBUG_INFO( printf( "Scanning %i Keys for Token: %s\n", iMax - i, (char*) sToken ) ); + + while ( i < iMax ) { #ifdef LEX_ABBREVIATE_KEYS - if( strncmp( (char*) sToken, (char*)( aKeys[ iKey++ ].sWord ), iWordLen ) == 0 ) + if( strncmp( (char*) sToken, (char*)( aKeys[ i++ ].sWord ), iWordLen ) == 0 ) #else - if( strcmp( (char*) sToken, (char*) ( aKeys[ iKey++ ].sWord ) ) == 0 ) + if( strcmp( (char*) sToken, (char*) ( aKeys[ i++ ].sWord ) ) == 0 ) #endif { DEBUG_INFO( printf( "Reducing Key Word: %s\n", (char*) sToken ) ); @@ -1236,9 +1252,9 @@ YY_DECL bNewLine = FALSE; NEW_LINE_ACTION(); - if( aKeys[ iKey - 1 ].iToken < LEX_CUSTOM_ACTION ) + if( aKeys[ i - 1 ].iToken < LEX_CUSTOM_ACTION ) { - iRet = aKeys[ iKey - 1 ].iToken; + iRet = aKeys[ i - 1 ].iToken; iRet = CUSTOM_ACTION( iRet ); if( iRet ) { @@ -1251,7 +1267,7 @@ YY_DECL } else { - RETURN_TOKEN( REDUCE( aKeys[ iKey - 1 ].iToken ), (char*) sToken ); + RETURN_TOKEN( REDUCE( aKeys[ i - 1 ].iToken ), (char*) sToken ); } } } @@ -1263,7 +1279,6 @@ YY_DECL } } - if( bIgnoreWords ) { DEBUG_INFO( printf( "Skiped Words for Word: %s\n", (char*) sToken ) ); @@ -1280,20 +1295,23 @@ YY_DECL } #endif - iWord = 0; - while ( iWord < iWords ) + i = aWordNodes[ sToken[0] ].iMin; + iMax = aWordNodes[ sToken[0] ].iMax + 1; + DEBUG_INFO( printf( "Scanning %i Words for Token: %s\n", iMax - i, (char*) sToken ) ); + + while ( i < iMax ) { #ifdef LEX_ABBREVIATE_WORDS - if( strncmp( (char*) sToken, (char*) ( aWords[ iWord++ ].sWord ), iWordLen ) == 0 ) + if( strncmp( (char*) sToken, (char*) ( aWords[ i++ ].sWord ), iWordLen ) == 0 ) #else - if( strcmp( (char*) sToken, (char*) ( aWords[ iWord++ ].sWord ) ) == 0 ) + if( strcmp( (char*) sToken, (char*) ( aWords[ i++ ].sWord ) ) == 0 ) #endif { DEBUG_INFO( printf( "Reducing Word: %s\n", (char*) sToken ) ); - if( aWords[ iWord - 1 ].iToken < LEX_CUSTOM_ACTION ) + if( aWords[ i - 1 ].iToken < LEX_CUSTOM_ACTION ) { - iRet = aWords[ iWord - 1 ].iToken; + iRet = aWords[ i - 1 ].iToken; iRet = CUSTOM_ACTION( iRet ); if( iRet ) { @@ -1306,7 +1324,7 @@ YY_DECL } else { - RETURN_TOKEN( REDUCE( aWords[ iWord - 1 ].iToken ), (char*) sToken ); + RETURN_TOKEN( REDUCE( aWords[ i - 1 ].iToken ), (char*) sToken ); } } } @@ -1457,3 +1475,49 @@ void * yy_bytes_buffer( char * pBuffer, int iBufSize ) iSize = iBufSize; return s_szBuffer; } + +static void GenTrees( void ) +{ + register unsigned int i; + register unsigned char cIndex; + + i = 0; + while( i < 256 ) + { + aKeyNodes[i].iMin = 0; + aKeyNodes[i].iMax = 0; + aWordNodes[i].iMin = 0; + aWordNodes[i].iMax = 0; + i++; + } + + i = 0; + while ( i < iKeys ) + { + cIndex = aKeys[i].sWord[0]; + + if( aKeyNodes[ cIndex ].iMin == 0 ) + { + aKeyNodes[ cIndex ].iMin = i; + } + + aKeyNodes[ cIndex ].iMax = i; + + i++; + } + + i = 0; + while ( i < iWords ) + { + cIndex = aWords[i].sWord[0]; + + if( aWordNodes[ cIndex ].iMin == 0 ) + { + aWordNodes[ cIndex ].iMin = i; + } + + aWordNodes[ cIndex ].iMax = i; + + i++; + } +} diff --git a/harbour/source/macro/macro.slx b/harbour/source/macro/macro.slx index b402f5a58c..f99f9be7f0 100644 --- a/harbour/source/macro/macro.slx +++ b/harbour/source/macro/macro.slx @@ -129,12 +129,12 @@ LANGUAGE_KEY_WORDS_ARE { /* Words. */ LANGUAGE_WORDS_ARE { + LEX_WORD( "FIELD" ) AS_TOKEN( _FIELD_ ), LEX_WORD( "IF" ) AS_TOKEN( _IF_ ), LEX_WORD( "IIF" ) AS_TOKEN( IIF ), LEX_WORD( "NIL" ) AS_TOKEN( NIL ), - LEX_WORD( "SELF" ) AS_TOKEN( _SELF_ ), LEX_WORD( "QSELF" ) AS_TOKEN( QSELF ), - LEX_WORD( "FIELD" ) AS_TOKEN( _FIELD_ ), + LEX_WORD( "SELF" ) AS_TOKEN( _SELF_ ), LEX_WORD( "_FIELD" ) AS_TOKEN( _FIELD ) };