Files
five/_FiveSql2/src/TSqlLexer.prg
CharlesKWON f4ed42556b checkpoint: season-wide bug fix campaign + infra
Cumulative season's silent-bug hunting (~62 fixes) across the FiveSql2
SQL engine, the Five compiler/runtime, and the hbrdd RDD layer. Saved
as a single checkpoint before refactoring the parser to delegate xBase
command translation to the preprocessor.

Highlights:

FiveSql2 engine (_FiveSql2/src/)
- prefix-glob index attach -> explicit convention (<table>_pk.ntx,
  <table>_uq.ntx, <table>.cdx) — fixes silent multi-row INSERT row-drop
- DROP/CREATE TABLE FErase chain extended (.cdx, .fsc, .fsv, .dbt, .fpt)
- COUNT(DISTINCT col) parsed + aggregated via hSeen hash
- UNION column-count mismatch returns SQL_ERR_GRAMMAR (was silent)
- DISTINCT + ORDER BY hidden-col leak fixed (trim before DISTINCT)
- Derived table FROM (SELECT...) + JOIN right-side derived
- Self-FK CASCADE depth 2+ via SqlGetSingleColPK pre-collect
- LAG/LEAD default arg uses SqlEvalRowExpr (handles -N const exprs)
- DATE literal round-trip validation (Feb 29 non-leap rejected)
- CREATE OR REPLACE VIEW; CREATE VIEW errors on already-exists
- AlterTable type dispatcher comma-wrapped (1-char type "A" no longer
  matches CHARACTER)

Compiler / runtime
- gengo: HB_ -> FV_ prefix on emitted Go function names (Five identity)
- gengo split: emit_block.go, emit_stmt.go, folding.go extracted
- parser/stmtreg.go nudges
- hbrt: debug TUI/CLI restructure (debugcmd, debugkey, termios_*),
  windows debug stubs collapsed
- thread/vm/value/class/pcinterp tightening from panic traces

RDD layer (hbrdd/)
- dbf: null bitmap support (null.go + null_test.go), mmap split
  (mmap_posix.go / mmap_windows.go), byte-level numeric parse
- ntx/cdx: windows mmap parity
- workarea + mem RDD: cross-area state-bleed fixes

RTL (hbrtl/)
- errorlog rewrite with platform-specific FD (errorlog_fd_unix /
  errorlog_fd_other)
- sqlscan, sqlhelpers, indexrtl, datetime extensions

Gates green at checkpoint:
- go test ./...        : PASS
- FiveSql2 SQL:1999    : 43/43
- Harbour compat       : 56/56

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-30 09:26:25 +09:00

229 lines
6.3 KiB
Plaintext

/*
* TSqlLexer.prg — SQL lexical analyzer (tokenizer)
*
* FiveSql — SQL Engine for Harbour DBF/NTX
*
* Copyright (c) 2025 Charles KWON (Charles KWON OhJun)
* Email: charleskwonohjun@gmail.com
*
* All rights reserved.
*/
#include "hbclass.ch"
#include "FiveSqlDef.ch"
CLASS TSqlLexer
DATA cInput
DATA aTokens
DATA nLen
METHOD New( cSQL ) CONSTRUCTOR
METHOD Tokenize()
METHOD GetTokens()
ENDCLASS
METHOD New( cSQL ) CLASS TSqlLexer
::cInput := cSQL
::aTokens := {}
::nLen := Len( cSQL )
RETURN SELF
METHOD GetTokens() CLASS TSqlLexer
RETURN ::aTokens
METHOD Tokenize() CLASS TSqlLexer
LOCAL nPos, ch, cToken, cLit
nPos := 1
::aTokens := {}
WHILE nPos <= ::nLen
ch := SubStr( ::cInput, nPos, 1 )
/* Skip whitespace */
IF ch == " " .OR. ch == Chr(9) .OR. ch == Chr(10) .OR. ch == Chr(13)
nPos++
LOOP
ENDIF
/* Skip single-line comment: -- ... */
IF ch == "-" .AND. nPos < ::nLen .AND. SubStr( ::cInput, nPos + 1, 1 ) == "-"
WHILE nPos <= ::nLen .AND. SubStr( ::cInput, nPos, 1 ) != Chr(10)
nPos++
ENDDO
LOOP
ENDIF
/* Skip block comment */
IF ch == "/" .AND. nPos < ::nLen .AND. SubStr( ::cInput, nPos + 1, 1 ) == "*"
nPos += 2
WHILE nPos < ::nLen
IF SubStr( ::cInput, nPos, 1 ) == "*" .AND. SubStr( ::cInput, nPos + 1, 1 ) == "/"
nPos += 2
EXIT
ENDIF
nPos++
ENDDO
LOOP
ENDIF
/* String literal (single-quoted) */
IF ch == "'"
nPos++
cToken := ""
WHILE nPos <= ::nLen
ch := SubStr( ::cInput, nPos, 1 )
IF ch == "'"
IF nPos < ::nLen .AND. SubStr( ::cInput, nPos + 1, 1 ) == "'"
cToken += "'"
nPos += 2
ELSE
nPos++
EXIT
ENDIF
ELSE
cToken += ch
nPos++
ENDIF
ENDDO
AAdd( ::aTokens, { TK_TEXT, cToken } )
LOOP
ENDIF
/* Numeric literal */
IF ch >= "0" .AND. ch <= "9"
cToken := ""
WHILE nPos <= ::nLen
ch := SubStr( ::cInput, nPos, 1 )
IF ( ch >= "0" .AND. ch <= "9" ) .OR. ch == "."
cToken += ch
nPos++
ELSE
EXIT
ENDIF
ENDDO
AAdd( ::aTokens, { TK_NUM, cToken } )
LOOP
ENDIF
/* Identifier or keyword */
IF IsAlpha( ch ) .OR. ch == "_"
cToken := ""
WHILE nPos <= ::nLen
ch := SubStr( ::cInput, nPos, 1 )
IF IsAlpha( ch ) .OR. IsDigit( ch ) .OR. ch == "_"
cToken += ch
nPos++
ELSE
EXIT
ENDIF
ENDDO
AAdd( ::aTokens, { TK_NAME, Upper( cToken ) } )
LOOP
ENDIF
/* Bracketed identifier: [column_name] */
IF ch == "["
nPos++
cToken := ""
WHILE nPos <= ::nLen .AND. SubStr( ::cInput, nPos, 1 ) != "]"
cToken += SubStr( ::cInput, nPos, 1 )
nPos++
ENDDO
IF nPos <= ::nLen
nPos++
ENDIF
AAdd( ::aTokens, { TK_NAME, Upper( cToken ) } )
LOOP
ENDIF
/* Positional parameter placeholder */
IF ch == "?"
AAdd( ::aTokens, { TK_QMARK, "?" } )
nPos++
LOOP
ENDIF
/* Harbour logical literals inside SQL text: `.T.` / `.F.` /
* `.Y.` / `.N.`. INSERT statements in Harbour hosts frequently
* use these rather than the SQL `TRUE` / `FALSE` keywords,
* especially when the source value is inlined from a
* build-time constant. Converted to TK_NAME("TRUE"/"FALSE")
* so the parser's primary handles them alongside SQL
* keywords without a new token kind. Must be tested *before*
* the bare `.` → TK_DOT punctuation case. */
IF ch == "." .AND. nPos + 2 <= ::nLen .AND. ;
SubStr( ::cInput, nPos + 2, 1 ) == "."
cLit := Upper( SubStr( ::cInput, nPos + 1, 1 ) )
IF cLit == "T" .OR. cLit == "Y"
AAdd( ::aTokens, { TK_NAME, "TRUE" } ) ; nPos += 3
LOOP
ELSEIF cLit == "F" .OR. cLit == "N"
AAdd( ::aTokens, { TK_NAME, "FALSE" } ) ; nPos += 3
LOOP
ENDIF
ENDIF
/* Punctuation and operators */
DO CASE
CASE ch == ","
AAdd( ::aTokens, { TK_COMMA, "," } ) ; nPos++
CASE ch == "."
AAdd( ::aTokens, { TK_DOT, "." } ) ; nPos++
CASE ch == "*"
AAdd( ::aTokens, { TK_STAR, "*" } ) ; nPos++
CASE ch == "("
AAdd( ::aTokens, { TK_LPAR, "(" } ) ; nPos++
CASE ch == ")"
AAdd( ::aTokens, { TK_RPAR, ")" } ) ; nPos++
CASE ch == "+"
AAdd( ::aTokens, { TK_PLUS, "+" } ) ; nPos++
CASE ch == "-"
AAdd( ::aTokens, { TK_MINUS, "-" } ) ; nPos++
CASE ch == "/"
AAdd( ::aTokens, { TK_SLASH, "/" } ) ; nPos++
CASE ch == "|"
IF nPos < ::nLen .AND. SubStr( ::cInput, nPos + 1, 1 ) == "|"
AAdd( ::aTokens, { TK_PIPES, "||" } ) ; nPos += 2
ELSE
nPos++
ENDIF
CASE ch == "="
AAdd( ::aTokens, { TK_EQ, "=" } ) ; nPos++
CASE ch == "<"
IF nPos < ::nLen .AND. SubStr( ::cInput, nPos + 1, 1 ) == "="
AAdd( ::aTokens, { TK_LTE, "<=" } ) ; nPos += 2
ELSEIF nPos < ::nLen .AND. SubStr( ::cInput, nPos + 1, 1 ) == ">"
AAdd( ::aTokens, { TK_NEQ, "<>" } ) ; nPos += 2
ELSE
AAdd( ::aTokens, { TK_LT, "<" } ) ; nPos++
ENDIF
CASE ch == ">"
IF nPos < ::nLen .AND. SubStr( ::cInput, nPos + 1, 1 ) == "="
AAdd( ::aTokens, { TK_GTE, ">=" } ) ; nPos += 2
ELSE
AAdd( ::aTokens, { TK_GT, ">" } ) ; nPos++
ENDIF
CASE ch == "!"
IF nPos < ::nLen .AND. SubStr( ::cInput, nPos + 1, 1 ) == "="
AAdd( ::aTokens, { TK_NEQ, "!=" } ) ; nPos += 2
ELSE
nPos++
ENDIF
CASE ch == ";"
nPos++
OTHERWISE
nPos++
ENDCASE
ENDDO
/* End-of-input sentinel */
AAdd( ::aTokens, { TK_END, "" } )
RETURN SELF