Cumulative season's silent-bug hunting (~62 fixes) across the FiveSql2 SQL engine, the Five compiler/runtime, and the hbrdd RDD layer. Saved as a single checkpoint before refactoring the parser to delegate xBase command translation to the preprocessor. Highlights: FiveSql2 engine (_FiveSql2/src/) - prefix-glob index attach -> explicit convention (<table>_pk.ntx, <table>_uq.ntx, <table>.cdx) — fixes silent multi-row INSERT row-drop - DROP/CREATE TABLE FErase chain extended (.cdx, .fsc, .fsv, .dbt, .fpt) - COUNT(DISTINCT col) parsed + aggregated via hSeen hash - UNION column-count mismatch returns SQL_ERR_GRAMMAR (was silent) - DISTINCT + ORDER BY hidden-col leak fixed (trim before DISTINCT) - Derived table FROM (SELECT...) + JOIN right-side derived - Self-FK CASCADE depth 2+ via SqlGetSingleColPK pre-collect - LAG/LEAD default arg uses SqlEvalRowExpr (handles -N const exprs) - DATE literal round-trip validation (Feb 29 non-leap rejected) - CREATE OR REPLACE VIEW; CREATE VIEW errors on already-exists - AlterTable type dispatcher comma-wrapped (1-char type "A" no longer matches CHARACTER) Compiler / runtime - gengo: HB_ -> FV_ prefix on emitted Go function names (Five identity) - gengo split: emit_block.go, emit_stmt.go, folding.go extracted - parser/stmtreg.go nudges - hbrt: debug TUI/CLI restructure (debugcmd, debugkey, termios_*), windows debug stubs collapsed - thread/vm/value/class/pcinterp tightening from panic traces RDD layer (hbrdd/) - dbf: null bitmap support (null.go + null_test.go), mmap split (mmap_posix.go / mmap_windows.go), byte-level numeric parse - ntx/cdx: windows mmap parity - workarea + mem RDD: cross-area state-bleed fixes RTL (hbrtl/) - errorlog rewrite with platform-specific FD (errorlog_fd_unix / errorlog_fd_other) - sqlscan, sqlhelpers, indexrtl, datetime extensions Gates green at checkpoint: - go test ./... : PASS - FiveSql2 SQL:1999 : 43/43 - Harbour compat : 56/56 Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
229 lines
6.3 KiB
Plaintext
229 lines
6.3 KiB
Plaintext
/*
|
|
* TSqlLexer.prg — SQL lexical analyzer (tokenizer)
|
|
*
|
|
* FiveSql — SQL Engine for Harbour DBF/NTX
|
|
*
|
|
* Copyright (c) 2025 Charles KWON (Charles KWON OhJun)
|
|
* Email: charleskwonohjun@gmail.com
|
|
*
|
|
* All rights reserved.
|
|
*/
|
|
|
|
#include "hbclass.ch"
|
|
#include "FiveSqlDef.ch"
|
|
|
|
CLASS TSqlLexer
|
|
|
|
DATA cInput
|
|
DATA aTokens
|
|
DATA nLen
|
|
|
|
METHOD New( cSQL ) CONSTRUCTOR
|
|
METHOD Tokenize()
|
|
METHOD GetTokens()
|
|
|
|
ENDCLASS
|
|
|
|
METHOD New( cSQL ) CLASS TSqlLexer
|
|
|
|
::cInput := cSQL
|
|
::aTokens := {}
|
|
::nLen := Len( cSQL )
|
|
|
|
RETURN SELF
|
|
|
|
METHOD GetTokens() CLASS TSqlLexer
|
|
RETURN ::aTokens
|
|
|
|
METHOD Tokenize() CLASS TSqlLexer
|
|
|
|
LOCAL nPos, ch, cToken, cLit
|
|
|
|
nPos := 1
|
|
::aTokens := {}
|
|
|
|
WHILE nPos <= ::nLen
|
|
ch := SubStr( ::cInput, nPos, 1 )
|
|
|
|
/* Skip whitespace */
|
|
IF ch == " " .OR. ch == Chr(9) .OR. ch == Chr(10) .OR. ch == Chr(13)
|
|
nPos++
|
|
LOOP
|
|
ENDIF
|
|
|
|
/* Skip single-line comment: -- ... */
|
|
IF ch == "-" .AND. nPos < ::nLen .AND. SubStr( ::cInput, nPos + 1, 1 ) == "-"
|
|
WHILE nPos <= ::nLen .AND. SubStr( ::cInput, nPos, 1 ) != Chr(10)
|
|
nPos++
|
|
ENDDO
|
|
LOOP
|
|
ENDIF
|
|
|
|
/* Skip block comment */
|
|
IF ch == "/" .AND. nPos < ::nLen .AND. SubStr( ::cInput, nPos + 1, 1 ) == "*"
|
|
nPos += 2
|
|
WHILE nPos < ::nLen
|
|
IF SubStr( ::cInput, nPos, 1 ) == "*" .AND. SubStr( ::cInput, nPos + 1, 1 ) == "/"
|
|
nPos += 2
|
|
EXIT
|
|
ENDIF
|
|
nPos++
|
|
ENDDO
|
|
LOOP
|
|
ENDIF
|
|
|
|
/* String literal (single-quoted) */
|
|
IF ch == "'"
|
|
nPos++
|
|
cToken := ""
|
|
WHILE nPos <= ::nLen
|
|
ch := SubStr( ::cInput, nPos, 1 )
|
|
IF ch == "'"
|
|
IF nPos < ::nLen .AND. SubStr( ::cInput, nPos + 1, 1 ) == "'"
|
|
cToken += "'"
|
|
nPos += 2
|
|
ELSE
|
|
nPos++
|
|
EXIT
|
|
ENDIF
|
|
ELSE
|
|
cToken += ch
|
|
nPos++
|
|
ENDIF
|
|
ENDDO
|
|
AAdd( ::aTokens, { TK_TEXT, cToken } )
|
|
LOOP
|
|
ENDIF
|
|
|
|
/* Numeric literal */
|
|
IF ch >= "0" .AND. ch <= "9"
|
|
cToken := ""
|
|
WHILE nPos <= ::nLen
|
|
ch := SubStr( ::cInput, nPos, 1 )
|
|
IF ( ch >= "0" .AND. ch <= "9" ) .OR. ch == "."
|
|
cToken += ch
|
|
nPos++
|
|
ELSE
|
|
EXIT
|
|
ENDIF
|
|
ENDDO
|
|
AAdd( ::aTokens, { TK_NUM, cToken } )
|
|
LOOP
|
|
ENDIF
|
|
|
|
/* Identifier or keyword */
|
|
IF IsAlpha( ch ) .OR. ch == "_"
|
|
cToken := ""
|
|
WHILE nPos <= ::nLen
|
|
ch := SubStr( ::cInput, nPos, 1 )
|
|
IF IsAlpha( ch ) .OR. IsDigit( ch ) .OR. ch == "_"
|
|
cToken += ch
|
|
nPos++
|
|
ELSE
|
|
EXIT
|
|
ENDIF
|
|
ENDDO
|
|
AAdd( ::aTokens, { TK_NAME, Upper( cToken ) } )
|
|
LOOP
|
|
ENDIF
|
|
|
|
/* Bracketed identifier: [column_name] */
|
|
IF ch == "["
|
|
nPos++
|
|
cToken := ""
|
|
WHILE nPos <= ::nLen .AND. SubStr( ::cInput, nPos, 1 ) != "]"
|
|
cToken += SubStr( ::cInput, nPos, 1 )
|
|
nPos++
|
|
ENDDO
|
|
IF nPos <= ::nLen
|
|
nPos++
|
|
ENDIF
|
|
AAdd( ::aTokens, { TK_NAME, Upper( cToken ) } )
|
|
LOOP
|
|
ENDIF
|
|
|
|
/* Positional parameter placeholder */
|
|
IF ch == "?"
|
|
AAdd( ::aTokens, { TK_QMARK, "?" } )
|
|
nPos++
|
|
LOOP
|
|
ENDIF
|
|
|
|
/* Harbour logical literals inside SQL text: `.T.` / `.F.` /
|
|
* `.Y.` / `.N.`. INSERT statements in Harbour hosts frequently
|
|
* use these rather than the SQL `TRUE` / `FALSE` keywords,
|
|
* especially when the source value is inlined from a
|
|
* build-time constant. Converted to TK_NAME("TRUE"/"FALSE")
|
|
* so the parser's primary handles them alongside SQL
|
|
* keywords without a new token kind. Must be tested *before*
|
|
* the bare `.` → TK_DOT punctuation case. */
|
|
IF ch == "." .AND. nPos + 2 <= ::nLen .AND. ;
|
|
SubStr( ::cInput, nPos + 2, 1 ) == "."
|
|
cLit := Upper( SubStr( ::cInput, nPos + 1, 1 ) )
|
|
IF cLit == "T" .OR. cLit == "Y"
|
|
AAdd( ::aTokens, { TK_NAME, "TRUE" } ) ; nPos += 3
|
|
LOOP
|
|
ELSEIF cLit == "F" .OR. cLit == "N"
|
|
AAdd( ::aTokens, { TK_NAME, "FALSE" } ) ; nPos += 3
|
|
LOOP
|
|
ENDIF
|
|
ENDIF
|
|
|
|
/* Punctuation and operators */
|
|
DO CASE
|
|
CASE ch == ","
|
|
AAdd( ::aTokens, { TK_COMMA, "," } ) ; nPos++
|
|
CASE ch == "."
|
|
AAdd( ::aTokens, { TK_DOT, "." } ) ; nPos++
|
|
CASE ch == "*"
|
|
AAdd( ::aTokens, { TK_STAR, "*" } ) ; nPos++
|
|
CASE ch == "("
|
|
AAdd( ::aTokens, { TK_LPAR, "(" } ) ; nPos++
|
|
CASE ch == ")"
|
|
AAdd( ::aTokens, { TK_RPAR, ")" } ) ; nPos++
|
|
CASE ch == "+"
|
|
AAdd( ::aTokens, { TK_PLUS, "+" } ) ; nPos++
|
|
CASE ch == "-"
|
|
AAdd( ::aTokens, { TK_MINUS, "-" } ) ; nPos++
|
|
CASE ch == "/"
|
|
AAdd( ::aTokens, { TK_SLASH, "/" } ) ; nPos++
|
|
CASE ch == "|"
|
|
IF nPos < ::nLen .AND. SubStr( ::cInput, nPos + 1, 1 ) == "|"
|
|
AAdd( ::aTokens, { TK_PIPES, "||" } ) ; nPos += 2
|
|
ELSE
|
|
nPos++
|
|
ENDIF
|
|
CASE ch == "="
|
|
AAdd( ::aTokens, { TK_EQ, "=" } ) ; nPos++
|
|
CASE ch == "<"
|
|
IF nPos < ::nLen .AND. SubStr( ::cInput, nPos + 1, 1 ) == "="
|
|
AAdd( ::aTokens, { TK_LTE, "<=" } ) ; nPos += 2
|
|
ELSEIF nPos < ::nLen .AND. SubStr( ::cInput, nPos + 1, 1 ) == ">"
|
|
AAdd( ::aTokens, { TK_NEQ, "<>" } ) ; nPos += 2
|
|
ELSE
|
|
AAdd( ::aTokens, { TK_LT, "<" } ) ; nPos++
|
|
ENDIF
|
|
CASE ch == ">"
|
|
IF nPos < ::nLen .AND. SubStr( ::cInput, nPos + 1, 1 ) == "="
|
|
AAdd( ::aTokens, { TK_GTE, ">=" } ) ; nPos += 2
|
|
ELSE
|
|
AAdd( ::aTokens, { TK_GT, ">" } ) ; nPos++
|
|
ENDIF
|
|
CASE ch == "!"
|
|
IF nPos < ::nLen .AND. SubStr( ::cInput, nPos + 1, 1 ) == "="
|
|
AAdd( ::aTokens, { TK_NEQ, "!=" } ) ; nPos += 2
|
|
ELSE
|
|
nPos++
|
|
ENDIF
|
|
CASE ch == ";"
|
|
nPos++
|
|
OTHERWISE
|
|
nPos++
|
|
ENDCASE
|
|
ENDDO
|
|
|
|
/* End-of-input sentinel */
|
|
AAdd( ::aTokens, { TK_END, "" } )
|
|
|
|
RETURN SELF
|