2000-08-21 15:45 UTC+0800 Ron Pinkas <ron@profit-master.com>

* source/compiler/simplex.c
     ! Optimized Keywords and Words search, Lexer is 10-15% faster.

   * source/compiler/harbour.slx
   * source/macro/macro.slx
     ! Sorted Keywords and Words, to support optimized search, by simplex.
This commit is contained in:
Ron Pinkas
2000-08-21 22:47:22 +00:00
parent 4307f615c8
commit 93f92c364e
4 changed files with 152 additions and 81 deletions

View File

@@ -1,8 +1,15 @@
2000-08-21 15:45 UTC+0800 Ron Pinkas <ron@profit-master.com>
* source/compiler/simplex.c
! Optimized Keywords and Words search, Lexer is 10-15% faster.
* source/compiler/harbour.slx
* source/macro/macro.slx
! Sorted Keywords and Words, to support optimized search, by simplex.
2000-08-21 15:18 GMT+3 Alexander Kresin <alex@belacy.belgorod.su>
*source/rdd/dbcmd.c
* fixed bugs, reported by Juergen Baez
2000-08-17 12:18 UTC+0100 Victor Szakats <info@szelvesz.hu>
* include/hbver.h
@@ -20,7 +27,7 @@
* Minor correction (alphabetical order).
* source/compiler/harbour.l
+ AS USUAL added. I could not make this HB_COMPAT_VO dependent, so this
+ AS USUAL added. I could not make this HB_COMPAT_VO dependent, so this
is left as a TODO.
2000-08-16 18:40 UTC+0800 Brian Hays <bhays@abacuslaw.com>

View File

@@ -199,39 +199,39 @@ SELF_CONTAINED_WORDS_ARE {
/* Key Words. */
LANGUAGE_KEY_WORDS_ARE {
LEX_WORD( "FUNCTION" ) AS_TOKEN( HB_FUNCTION ),
LEX_WORD( "PROCEDURE" ) AS_TOKEN( HB_PROCEDURE),
LEX_WORD( "RETURN" ) AS_TOKEN( RETURN ),
LEX_WORD( "LOCAL" ) AS_TOKEN( HB_LOCAL ),
LEX_WORD( "STATIC" ) AS_TOKEN( STATIC ),
LEX_WORD( "IF" ) AS_TOKEN( IF ),
LEX_WORD( "ANNOUNCE" ) AS_TOKEN( ANNOUNCE ),
LEX_WORD( "BEGIN" ) AS_TOKEN( BEGIN_ ),
LEX_WORD( "BREAK" ) AS_TOKEN( BREAK ),
LEX_WORD( "CASE" ) AS_TOKEN( CASE ),
LEX_WORD( "DECLARE" ) AS_TOKEN( HB_DECLARE ),
LEX_WORD( "DO" ) AS_TOKEN( DO ),
LEX_WORD( "ELSE" ) AS_TOKEN( ELSE ),
LEX_WORD( "ELSEIF" ) AS_TOKEN( ELSEIF ),
LEX_WORD( "END" ) AS_TOKEN( END ),
LEX_WORD( "ENDIF" ) AS_TOKEN( ENDIF ),
LEX_WORD( "ANNOUNCE" ) AS_TOKEN( ANNOUNCE ),
LEX_WORD( "EXTERNAL" ) AS_TOKEN( HB_EXTERN ),
LEX_WORD( "INIT" ) AS_TOKEN( INIT ),
LEX_WORD( "EXIT" ) AS_TOKEN( EXIT_ ),
LEX_WORD( "PUBLIC" ) AS_TOKEN( HB_PUBLIC ),
LEX_WORD( "CASE" ) AS_TOKEN( CASE ),
LEX_WORD( "OTHERWISE" ) AS_TOKEN( OTHERWISE ),
LEX_WORD( "ENDCASE" ) AS_TOKEN( ENDCASE ),
LEX_WORD( "ENDDO" ) AS_TOKEN( ENDDO ),
LEX_WORD( "MEMVAR" ) AS_TOKEN( HB_MEMVAR ),
LEX_WORD( "LOOP" ) AS_TOKEN( LOOP ),
LEX_WORD( "ENDIF" ) AS_TOKEN( ENDIF ),
LEX_WORD( "EXIT" ) AS_TOKEN( EXIT_ ),
LEX_WORD( "EXTERNAL" ) AS_TOKEN( HB_EXTERN ),
LEX_WORD( "FIELD" ) AS_TOKEN( HB_FIELD ),
LEX_WORD( "FOR" ) AS_TOKEN( HB_FOR ),
LEX_WORD( "FUNCTION" ) AS_TOKEN( HB_FUNCTION ),
LEX_WORD( "IF" ) AS_TOKEN( IF ),
LEX_WORD( "INIT" ) AS_TOKEN( INIT ),
LEX_WORD( "LOCAL" ) AS_TOKEN( HB_LOCAL ),
LEX_WORD( "LOOP" ) AS_TOKEN( LOOP ),
LEX_WORD( "MEMVAR" ) AS_TOKEN( HB_MEMVAR ),
LEX_WORD( "NEXT" ) AS_TOKEN( NEXT ),
LEX_WORD( "OTHERWISE" ) AS_TOKEN( OTHERWISE ),
LEX_WORD( "PARAMETERS" ) AS_TOKEN( HB_PARAM ),
LEX_WORD( "PRIVATE" ) AS_TOKEN( HB_PRIVATE ),
LEX_WORD( "BEGIN" ) AS_TOKEN( BEGIN_ ),
LEX_WORD( "BREAK" ) AS_TOKEN( BREAK ),
LEX_WORD( "PROCEDURE" ) AS_TOKEN( HB_PROCEDURE),
LEX_WORD( "PUBLIC" ) AS_TOKEN( HB_PUBLIC ),
LEX_WORD( "RECOVER" ) AS_TOKEN( RECOVER ),
LEX_WORD( "DO" ) AS_TOKEN( DO ),
LEX_WORD( "RETURN" ) AS_TOKEN( RETURN ),
LEX_WORD( "STATIC" ) AS_TOKEN( STATIC ),
LEX_WORD( "WHILE" ) AS_TOKEN( WHILE ),
LEX_WORD( "DECLARE" ) AS_TOKEN( HB_DECLARE ),
LEX_WORD( "_PROCREQ_" ) AS_TOKEN( PROCREQ_ ),
LEX_WORD( "FIELD" ) AS_TOKEN( HB_FIELD )
LEX_WORD( "_PROCREQ_" ) AS_TOKEN( PROCREQ_ )
};
/* Intermediate Words when ambigious. */
@@ -262,35 +262,35 @@ LANGUAGE_KEY_WORDS_ARE {
/* Words. */
LANGUAGE_WORDS_ARE {
LEX_WORD( "FUNCTION" ) AS_TOKEN( _FUNC_ ),
LEX_WORD( "PROCEDURE" ) AS_TOKEN( _PROC_ ),
LEX_WORD( "IF" ) AS_TOKEN( _IF_ ),
LEX_WORD( "CASE" ) AS_TOKEN( _CASE_ ),
LEX_WORD( "WHILE" ) AS_TOKEN( _WHILE_ ),
LEX_WORD( "SEQUENCE" ) AS_TOKEN( _SEQUENCE_ ),
LEX_WORD( "USING" ) AS_TOKEN( _USING_ ),
LEX_WORD( "OPTIONAL" ) AS_TOKEN( HB_OPTIONAL ),
LEX_WORD( "NIL" ) AS_TOKEN( NIL ),
LEX_WORD( "IIF" ) AS_TOKEN( IIF ),
LEX_WORD( "TO" ) AS_TOKEN( TO ),
LEX_WORD( "STEP" ) AS_TOKEN( STEP ),
LEX_WORD( "IN" ) AS_TOKEN( HB_CHK_IN ),
LEX_WORD( "WITH" ) AS_TOKEN( _WITH_ ),
LEX_WORD( "SELF" ) AS_TOKEN( _SELF_ ),
LEX_WORD( "QSELF" ) AS_TOKEN( QSELF ),
LEX_WORD( "LINE" ) AS_TOKEN( _LINE_ ),
LEX_WORD( "AS" ) AS_TOKEN( _AS_ ),
LEX_WORD( "OF" ) AS_TOKEN( _OF_ ),
LEX_WORD( "ANYTYPE" ) AS_TOKEN( _VARIANT_ ),
LEX_WORD( "ARRAY" ) AS_TOKEN( _ARRAY_ ),
LEX_WORD( "CODEBLOCK" ) AS_TOKEN( _BLOCK_ ),
LEX_WORD( "STRING" ) AS_TOKEN( _STRING_ ),
LEX_WORD( "AS" ) AS_TOKEN( _AS_ ),
LEX_WORD( "CASE" ) AS_TOKEN( _CASE_ ),
LEX_WORD( "CLASS" ) AS_TOKEN( _CLASS_ ),
LEX_WORD( "CODEBLOCK" ) AS_TOKEN( _BLOCK_ ),
LEX_WORD( "DATE" ) AS_TOKEN( _DATE_ ),
LEX_WORD( "FIELD" ) AS_TOKEN( _FIELD_ ),
LEX_WORD( "FUNCTION" ) AS_TOKEN( _FUNC_ ),
LEX_WORD( "IF" ) AS_TOKEN( _IF_ ),
LEX_WORD( "IIF" ) AS_TOKEN( IIF ),
LEX_WORD( "IN" ) AS_TOKEN( HB_CHK_IN ),
LEX_WORD( "LINE" ) AS_TOKEN( _LINE_ ),
LEX_WORD( "LOGICAL" ) AS_TOKEN( _LOGICAL_ ),
LEX_WORD( "NIL" ) AS_TOKEN( NIL ),
LEX_WORD( "NUMERIC" ) AS_TOKEN( _NUMERIC_ ),
LEX_WORD( "OBJECT" ) AS_TOKEN( _OBJECT_ ),
LEX_WORD( "ANYTYPE" ) AS_TOKEN( _VARIANT_ ),
LEX_WORD( "FIELD" ) AS_TOKEN( _FIELD_ ),
LEX_WORD( "OF" ) AS_TOKEN( _OF_ ),
LEX_WORD( "OPTIONAL" ) AS_TOKEN( HB_OPTIONAL ),
LEX_WORD( "PROCEDURE" ) AS_TOKEN( _PROC_ ),
LEX_WORD( "QSELF" ) AS_TOKEN( QSELF ),
LEX_WORD( "SELF" ) AS_TOKEN( _SELF_ ),
LEX_WORD( "SEQUENCE" ) AS_TOKEN( _SEQUENCE_ ),
LEX_WORD( "STEP" ) AS_TOKEN( STEP ),
LEX_WORD( "STRING" ) AS_TOKEN( _STRING_ ),
LEX_WORD( "TO" ) AS_TOKEN( TO ),
LEX_WORD( "USING" ) AS_TOKEN( _USING_ ),
LEX_WORD( "WHILE" ) AS_TOKEN( _WHILE_ ),
LEX_WORD( "WITH" ) AS_TOKEN( _WITH_ ),
LEX_WORD( "_FIELD" ) AS_TOKEN( _FIELD )
};

View File

@@ -176,12 +176,12 @@ static int aiReturn[4];
/* Rules Support */
static int aiMatched[ MAX_MATCH ];
static int iMatched = 0;
static unsigned int iMatched = 0;
static int aiTentative[2] = { 0, 0 };
static int iTentative = 0;
static unsigned int iTentative = 0;
static int aiProspects[ 256 ];
static int iProspects = 0;
static int iFound = 0;
static unsigned int iProspects = 0;
static unsigned int iFound = 0;
static int iReduce = 0;
/* yylex */
@@ -189,7 +189,7 @@ static char * tmpPtr;
static char sToken[TOKEN_SIZE];
static int iLen = 0;
static char chr, cPrev = 0;
static int iKey, iWord, iMatch, iRemove, iWordLen, iPush, iLastToken = 0;
static unsigned int iMatch, iRemove, iWordLen, iPush, iLastToken = 0;
static char szLexBuffer[ YY_BUF_SIZE ];
static char * s_szBuffer;
static int iSize = 0;
@@ -204,14 +204,24 @@ int yyleng;
/* NewLine Support. */
static BOOL bNewLine = TRUE, bStart = TRUE;
static int iSelfs = (int) ( sizeof( aSelfs ) / LEX_WORD_SIZE );
static int iKeys = (int) ( sizeof( aKeys ) / LEX_WORD_SIZE );
static int iWords = (int) ( sizeof( aWords ) / LEX_WORD_SIZE );
static int iRules = (int) ( sizeof( aiRules ) / LEX_RULE_SIZE );
static int iPairs = (int) ( sizeof( aPairs ) / LEX_PAIR_SIZE );
static unsigned int iSelfs = (int) ( sizeof( aSelfs ) / LEX_WORD_SIZE );
static unsigned int iKeys = (int) ( sizeof( aKeys ) / LEX_WORD_SIZE );
static unsigned int iWords = (int) ( sizeof( aWords ) / LEX_WORD_SIZE );
static unsigned int iRules = (int) ( sizeof( aiRules ) / LEX_RULE_SIZE );
static unsigned int iPairs = (int) ( sizeof( aPairs ) / LEX_PAIR_SIZE );
typedef struct _TREE_NODE
{
unsigned int iMin;
unsigned int iMax;
} TREE_NODE; /* support structure for Streams (Pairs). */
TREE_NODE aKeyNodes[256], aWordNodes[256];
int Reduce( int iToken, BOOL bReal );
static void GenTrees( void );
/* --------------------------------------------------------------------------------- */
/* MACROS. */
@@ -256,9 +266,9 @@ int Reduce( int iToken, BOOL bReal );
#define IF_BEGIN_PAIR(chr) \
{\
register int iPair = 0, iStartLen; \
register char chrStart; \
int iLastPair = 0, iLastLen = 0; \
register unsigned int iPair = 0, iStartLen; \
register unsigned char chrStart; \
unsigned int iLastPair = 0, iLastLen = 0; \
\
DEBUG_INFO( printf( "Checking %i Streams for %c At: >%s<\n", iPairs, chr, szBuffer - 1 ) ); \
\
@@ -335,8 +345,8 @@ int Reduce( int iToken, BOOL bReal );
DEBUG_INFO( printf( "Checking %i Selfs for %c At: >%s<\n", iSelfs, chr, szBuffer - 1 ) ); \
\
{\
register int iSelf = 0, iSelfLen; \
register char chrSelf; \
register unsigned int iSelf = 0, iSelfLen; \
register unsigned char chrSelf; \
\
while( iSelf < iSelfs ) \
{ \
@@ -587,7 +597,7 @@ int Reduce( int iToken, BOOL bReal );
#define SCAN_PROSPECTS()\
{\
register int iScan = 0;\
register unsigned int iScan = 0;\
\
DEBUG_INFO( printf( "Scaning %i Prospects for %i at Pos: %i\n", iProspects, iToken, iMatched ) ); \
\
@@ -638,7 +648,7 @@ int Reduce( int iToken, BOOL bReal );
#define SCAN_RULES()\
{\
register int iScan = 0;\
register unsigned int iScan = 0;\
\
DEBUG_INFO( printf( "Scaning %i Rules for %i at Pos: %i\n", iRules, iToken, iMatched ) ); \
\
@@ -905,6 +915,7 @@ YY_DECL
if( bStart )
{
bStart = FALSE;
GenTrees()
INIT_ACTION();
}
@@ -1209,6 +1220,8 @@ YY_DECL
CheckToken:
{
unsigned int i, iMax;
if( bNewLine )
{
bIgnoreWords = FALSE;
@@ -1222,13 +1235,16 @@ YY_DECL
}
#endif
iKey = 0;
while ( iKey < iKeys )
i = aKeyNodes[ sToken[0] ].iMin;
iMax = aKeyNodes[ sToken[0] ].iMax + 1;
DEBUG_INFO( printf( "Scanning %i Keys for Token: %s\n", iMax - i, (char*) sToken ) );
while ( i < iMax )
{
#ifdef LEX_ABBREVIATE_KEYS
if( strncmp( (char*) sToken, (char*)( aKeys[ iKey++ ].sWord ), iWordLen ) == 0 )
if( strncmp( (char*) sToken, (char*)( aKeys[ i++ ].sWord ), iWordLen ) == 0 )
#else
if( strcmp( (char*) sToken, (char*) ( aKeys[ iKey++ ].sWord ) ) == 0 )
if( strcmp( (char*) sToken, (char*) ( aKeys[ i++ ].sWord ) ) == 0 )
#endif
{
DEBUG_INFO( printf( "Reducing Key Word: %s\n", (char*) sToken ) );
@@ -1236,9 +1252,9 @@ YY_DECL
bNewLine = FALSE;
NEW_LINE_ACTION();
if( aKeys[ iKey - 1 ].iToken < LEX_CUSTOM_ACTION )
if( aKeys[ i - 1 ].iToken < LEX_CUSTOM_ACTION )
{
iRet = aKeys[ iKey - 1 ].iToken;
iRet = aKeys[ i - 1 ].iToken;
iRet = CUSTOM_ACTION( iRet );
if( iRet )
{
@@ -1251,7 +1267,7 @@ YY_DECL
}
else
{
RETURN_TOKEN( REDUCE( aKeys[ iKey - 1 ].iToken ), (char*) sToken );
RETURN_TOKEN( REDUCE( aKeys[ i - 1 ].iToken ), (char*) sToken );
}
}
}
@@ -1263,7 +1279,6 @@ YY_DECL
}
}
if( bIgnoreWords )
{
DEBUG_INFO( printf( "Skiped Words for Word: %s\n", (char*) sToken ) );
@@ -1280,20 +1295,23 @@ YY_DECL
}
#endif
iWord = 0;
while ( iWord < iWords )
i = aWordNodes[ sToken[0] ].iMin;
iMax = aWordNodes[ sToken[0] ].iMax + 1;
DEBUG_INFO( printf( "Scanning %i Words for Token: %s\n", iMax - i, (char*) sToken ) );
while ( i < iMax )
{
#ifdef LEX_ABBREVIATE_WORDS
if( strncmp( (char*) sToken, (char*) ( aWords[ iWord++ ].sWord ), iWordLen ) == 0 )
if( strncmp( (char*) sToken, (char*) ( aWords[ i++ ].sWord ), iWordLen ) == 0 )
#else
if( strcmp( (char*) sToken, (char*) ( aWords[ iWord++ ].sWord ) ) == 0 )
if( strcmp( (char*) sToken, (char*) ( aWords[ i++ ].sWord ) ) == 0 )
#endif
{
DEBUG_INFO( printf( "Reducing Word: %s\n", (char*) sToken ) );
if( aWords[ iWord - 1 ].iToken < LEX_CUSTOM_ACTION )
if( aWords[ i - 1 ].iToken < LEX_CUSTOM_ACTION )
{
iRet = aWords[ iWord - 1 ].iToken;
iRet = aWords[ i - 1 ].iToken;
iRet = CUSTOM_ACTION( iRet );
if( iRet )
{
@@ -1306,7 +1324,7 @@ YY_DECL
}
else
{
RETURN_TOKEN( REDUCE( aWords[ iWord - 1 ].iToken ), (char*) sToken );
RETURN_TOKEN( REDUCE( aWords[ i - 1 ].iToken ), (char*) sToken );
}
}
}
@@ -1457,3 +1475,49 @@ void * yy_bytes_buffer( char * pBuffer, int iBufSize )
iSize = iBufSize;
return s_szBuffer;
}
static void GenTrees( void )
{
register unsigned int i;
register unsigned char cIndex;
i = 0;
while( i < 256 )
{
aKeyNodes[i].iMin = 0;
aKeyNodes[i].iMax = 0;
aWordNodes[i].iMin = 0;
aWordNodes[i].iMax = 0;
i++;
}
i = 0;
while ( i < iKeys )
{
cIndex = aKeys[i].sWord[0];
if( aKeyNodes[ cIndex ].iMin == 0 )
{
aKeyNodes[ cIndex ].iMin = i;
}
aKeyNodes[ cIndex ].iMax = i;
i++;
}
i = 0;
while ( i < iWords )
{
cIndex = aWords[i].sWord[0];
if( aWordNodes[ cIndex ].iMin == 0 )
{
aWordNodes[ cIndex ].iMin = i;
}
aWordNodes[ cIndex ].iMax = i;
i++;
}
}

View File

@@ -129,12 +129,12 @@ LANGUAGE_KEY_WORDS_ARE {
/* Words. */
LANGUAGE_WORDS_ARE {
LEX_WORD( "FIELD" ) AS_TOKEN( _FIELD_ ),
LEX_WORD( "IF" ) AS_TOKEN( _IF_ ),
LEX_WORD( "IIF" ) AS_TOKEN( IIF ),
LEX_WORD( "NIL" ) AS_TOKEN( NIL ),
LEX_WORD( "SELF" ) AS_TOKEN( _SELF_ ),
LEX_WORD( "QSELF" ) AS_TOKEN( QSELF ),
LEX_WORD( "FIELD" ) AS_TOKEN( _FIELD_ ),
LEX_WORD( "SELF" ) AS_TOKEN( _SELF_ ),
LEX_WORD( "_FIELD" ) AS_TOKEN( _FIELD )
};