Files
five/hbrtl/sqlhelpers.go
CharlesKWON f4ed42556b checkpoint: season-wide bug fix campaign + infra
Cumulative season's silent-bug hunting (~62 fixes) across the FiveSql2
SQL engine, the Five compiler/runtime, and the hbrdd RDD layer. Saved
as a single checkpoint before refactoring the parser to delegate xBase
command translation to the preprocessor.

Highlights:

FiveSql2 engine (_FiveSql2/src/)
- prefix-glob index attach -> explicit convention (<table>_pk.ntx,
  <table>_uq.ntx, <table>.cdx) — fixes silent multi-row INSERT row-drop
- DROP/CREATE TABLE FErase chain extended (.cdx, .fsc, .fsv, .dbt, .fpt)
- COUNT(DISTINCT col) parsed + aggregated via hSeen hash
- UNION column-count mismatch returns SQL_ERR_GRAMMAR (was silent)
- DISTINCT + ORDER BY hidden-col leak fixed (trim before DISTINCT)
- Derived table FROM (SELECT...) + JOIN right-side derived
- Self-FK CASCADE depth 2+ via SqlGetSingleColPK pre-collect
- LAG/LEAD default arg uses SqlEvalRowExpr (handles -N const exprs)
- DATE literal round-trip validation (Feb 29 non-leap rejected)
- CREATE OR REPLACE VIEW; CREATE VIEW errors on already-exists
- AlterTable type dispatcher comma-wrapped (1-char type "A" no longer
  matches CHARACTER)

Compiler / runtime
- gengo: HB_ -> FV_ prefix on emitted Go function names (Five identity)
- gengo split: emit_block.go, emit_stmt.go, folding.go extracted
- parser/stmtreg.go nudges
- hbrt: debug TUI/CLI restructure (debugcmd, debugkey, termios_*),
  windows debug stubs collapsed
- thread/vm/value/class/pcinterp tightening from panic traces

RDD layer (hbrdd/)
- dbf: null bitmap support (null.go + null_test.go), mmap split
  (mmap_posix.go / mmap_windows.go), byte-level numeric parse
- ntx/cdx: windows mmap parity
- workarea + mem RDD: cross-area state-bleed fixes

RTL (hbrtl/)
- errorlog rewrite with platform-specific FD (errorlog_fd_unix /
  errorlog_fd_other)
- sqlscan, sqlhelpers, indexrtl, datetime extensions

Gates green at checkpoint:
- go test ./...        : PASS
- FiveSql2 SQL:1999    : 43/43
- Harbour compat       : 56/56

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-30 09:26:25 +09:00

711 lines
18 KiB
Go

// Copyright (c) 2026 Charles KWON OhJun (charleskwonohjun@gmail.com)
// All rights reserved.
// FiveSql2 scalar helpers — Go replacements for the PRG functions in
// _FiveSql2/src/TSqlFunc.prg. These are invoked per-operator during
// expression evaluation (WHERE / HAVING / CASE); porting removes PRG
// VM frame overhead on the hot interpreter path. Semantics match the
// PRG source byte-for-byte.
package hbrtl
import (
"fmt"
"math"
"strconv"
"strings"
"five/hbrt"
)
// FiveSql2 lexer token type codes — must match FiveSqlDef.ch.
const (
tkEnd = 0
tkName = 1
tkText = 2
tkNum = 3
tkComma = 4
tkDot = 5
tkStar = 6
tkLPar = 7
tkRPar = 8
tkEq = 9
tkNEq = 10
tkLT = 11
tkGT = 12
tkLTE = 13
tkGTE = 14
tkQMark = 15
tkPlus = 16
tkMinus = 17
tkSlash = 18
tkPipes = 19
)
// makeTokValue wraps a (type, text) pair into the 2-element PRG array
// that TSqlParser2 consumes: { nTokenType, cTokenValue }.
func makeTokValue(ttype int, text string) hbrt.Value {
return hbrt.MakeArrayFrom([]hbrt.Value{
hbrt.MakeNumInt(int64(ttype)),
hbrt.MakeString(text),
})
}
// lexSQL is the Go port of TSqlLexer:Tokenize — byte-level FSM over the
// ASCII input string. Produces the same aTokens shape the PRG lexer did.
func lexSQL(s string) []hbrt.Value {
toks := make([]hbrt.Value, 0, 32)
n := len(s)
i := 0
for i < n {
c := s[i]
// Whitespace
if c == ' ' || c == '\t' || c == '\n' || c == '\r' {
i++
continue
}
// Line comment `-- ...`
if c == '-' && i+1 < n && s[i+1] == '-' {
i += 2
for i < n && s[i] != '\n' {
i++
}
continue
}
// Block comment `/* ... */`
if c == '/' && i+1 < n && s[i+1] == '*' {
i += 2
for i < n-1 {
if s[i] == '*' && s[i+1] == '/' {
i += 2
break
}
i++
}
continue
}
// String literal (single-quoted, '' escapes a quote)
if c == '\'' {
i++
start := i
var sb strings.Builder
inEscape := false
for i < n {
cc := s[i]
if cc == '\'' {
if i+1 < n && s[i+1] == '\'' {
if !inEscape {
sb.WriteString(s[start:i])
inEscape = true
} else {
sb.WriteByte('\'')
sb.WriteString(s[start:i])
}
sb.WriteByte('\'')
i += 2
start = i
} else {
break
}
} else {
i++
}
}
var val string
if inEscape {
sb.WriteString(s[start:i])
val = sb.String()
} else {
val = s[start:i]
}
if i < n {
i++ // skip closing quote
}
toks = append(toks, makeTokValue(tkText, val))
continue
}
// Numeric literal
if c >= '0' && c <= '9' {
start := i
for i < n && ((s[i] >= '0' && s[i] <= '9') || s[i] == '.') {
i++
}
toks = append(toks, makeTokValue(tkNum, s[start:i]))
continue
}
// Identifier / keyword
if isAlphaSQL(c) || c == '_' {
start := i
for i < n && (isAlphaSQL(s[i]) || (s[i] >= '0' && s[i] <= '9') || s[i] == '_') {
i++
}
toks = append(toks, makeTokValue(tkName, strings.ToUpper(s[start:i])))
continue
}
// Bracketed identifier `[col name]`
if c == '[' {
i++
start := i
for i < n && s[i] != ']' {
i++
}
name := strings.ToUpper(s[start:i])
if i < n {
i++ // skip ']'
}
toks = append(toks, makeTokValue(tkName, name))
continue
}
// Parameter placeholder
if c == '?' {
toks = append(toks, makeTokValue(tkQMark, "?"))
i++
continue
}
// Multi-char + single-char operators / punctuation
switch c {
case ',':
toks = append(toks, makeTokValue(tkComma, ","))
i++
case '.':
// Harbour logical literals inside SQL text: `.T.` / `.F.` /
// `.Y.` / `.N.`. Emit TK_NAME("TRUE"/"FALSE") so the
// parser's primary handles them alongside SQL TRUE/FALSE
// keywords without a dedicated token kind. Must precede
// the bare `.` → TK_DOT emission below, otherwise the
// three chars tokenize as DOT + NAME("T") + DOT and the
// INSERT column alignment drifts by two.
if i+2 < n && s[i+2] == '.' {
lit := s[i+1]
if lit == 't' || lit == 'T' || lit == 'y' || lit == 'Y' {
toks = append(toks, makeTokValue(tkName, "TRUE"))
i += 3
continue
}
if lit == 'f' || lit == 'F' || lit == 'n' || lit == 'N' {
toks = append(toks, makeTokValue(tkName, "FALSE"))
i += 3
continue
}
}
toks = append(toks, makeTokValue(tkDot, "."))
i++
case '*':
toks = append(toks, makeTokValue(tkStar, "*"))
i++
case '(':
toks = append(toks, makeTokValue(tkLPar, "("))
i++
case ')':
toks = append(toks, makeTokValue(tkRPar, ")"))
i++
case '+':
toks = append(toks, makeTokValue(tkPlus, "+"))
i++
case '-':
toks = append(toks, makeTokValue(tkMinus, "-"))
i++
case '/':
toks = append(toks, makeTokValue(tkSlash, "/"))
i++
case '|':
if i+1 < n && s[i+1] == '|' {
toks = append(toks, makeTokValue(tkPipes, "||"))
i += 2
} else {
i++
}
case '=':
toks = append(toks, makeTokValue(tkEq, "="))
i++
case '<':
if i+1 < n && s[i+1] == '=' {
toks = append(toks, makeTokValue(tkLTE, "<="))
i += 2
} else if i+1 < n && s[i+1] == '>' {
toks = append(toks, makeTokValue(tkNEq, "<>"))
i += 2
} else {
toks = append(toks, makeTokValue(tkLT, "<"))
i++
}
case '>':
if i+1 < n && s[i+1] == '=' {
toks = append(toks, makeTokValue(tkGTE, ">="))
i += 2
} else {
toks = append(toks, makeTokValue(tkGT, ">"))
i++
}
case '!':
if i+1 < n && s[i+1] == '=' {
toks = append(toks, makeTokValue(tkNEq, "!="))
i += 2
} else {
i++
}
case ';':
i++
default:
i++
}
}
toks = append(toks, makeTokValue(tkEnd, ""))
return toks
}
func isAlphaSQL(c byte) bool {
return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')
}
// SqlLexerTokenize(cSQL) → aTokens
// Direct Go port of TSqlLexer:Tokenize. Returns the same
// { { nType, cText }, ... } structure the PRG version produced.
func SqlLexerTokenize(t *hbrt.Thread) {
t.Frame(1, 0)
defer t.EndProc()
toks := lexSQL(t.Local(1).AsString())
t.PushValue(hbrt.MakeArrayFrom(toks))
t.RetValue()
}
// SqlLexAndExtractTemplate(cSQL) → { aTokens, cKey, aParams }
//
// Combined lex + template extraction — one Go call replaces three
// PRG-to-Go boundary crossings (lex, get tokens, extract). aTokens
// already has literal tokens replaced with TK_QMARK; aParams holds
// the extracted literal values in positional order; cKey is the
// plan cache key (digest of the normalized token-type sequence).
func SqlLexAndExtractTemplate(t *hbrt.Thread) {
t.Frame(1, 0)
defer t.EndProc()
src := t.Local(1).AsString()
toks := lexSQL(src)
params := make([]hbrt.Value, 0, 8)
var keyBuf strings.Builder
keyBuf.Grow(len(src))
for _, tv := range toks {
tok := tv.AsArray()
if tok == nil || len(tok.Items) < 2 {
continue
}
ttype := int(tok.Items[0].AsNumInt())
switch ttype {
case tkText:
params = append(params, tok.Items[1])
tok.Items[0] = hbrt.MakeNumInt(tkQMark)
tok.Items[1] = hbrt.MakeString("?")
keyBuf.WriteByte('?')
case tkNum:
s := tok.Items[1].AsString()
var val hbrt.Value
if i, err := strconv.ParseInt(s, 10, 64); err == nil {
val = hbrt.MakeNumInt(i)
} else if f, err := strconv.ParseFloat(s, 64); err == nil {
val = hbrt.MakeDoubleAuto(f)
} else {
val = hbrt.MakeString(s)
}
params = append(params, val)
tok.Items[0] = hbrt.MakeNumInt(tkQMark)
tok.Items[1] = hbrt.MakeString("?")
keyBuf.WriteByte('#')
default:
keyBuf.WriteByte(byte(ttype) + 0x20)
if ttype == tkName {
keyBuf.WriteString(tok.Items[1].AsString())
keyBuf.WriteByte(' ')
}
}
}
result := hbrt.MakeArrayFrom([]hbrt.Value{
hbrt.MakeArrayFrom(toks),
hbrt.MakeString(keyBuf.String()),
hbrt.MakeArrayFrom(params),
})
t.PushValue(result)
t.RetValue()
}
// SqlExtractTemplate(aTokens) → { cKey, aParams }
//
// Walks a FiveSql2 lexer token stream, replacing string (TK_TEXT=2)
// and numeric (TK_NUM=3) literals with the parameter placeholder
// token (TK_QMARK=15). Collected literal values are returned as
// aParams in their natural left-to-right order.
//
// Each token is a 2-element array {nTokenType, cTokenValue}. The
// mutation is in place so the caller can pass the resulting aTokens
// straight into TSqlParser2 — the parser then emits ND_PAR nodes
// that resolve against aParams at execution time.
//
// The template key is a compact digest of the non-literal token
// type sequence, used as the plan cache key for queries that share
// the same shape but differ only in literal values. Queries like:
//
// INSERT INTO t VALUES (1,'a')
// INSERT INTO t VALUES (2,'b')
//
// produce the SAME key once literals are collapsed to '?', letting
// the plan cache hit from the 2nd call onward.
//
// Returns a 2-element array: { cKey, aParams }.
func SqlExtractTemplate(t *hbrt.Thread) {
t.Frame(1, 0)
defer t.EndProc()
tokensVal := t.Local(1)
if !tokensVal.IsArray() {
empty := hbrt.MakeArrayFrom([]hbrt.Value{
hbrt.MakeString(""),
hbrt.MakeArrayFrom(nil),
})
t.PushValue(empty)
t.RetValue()
return
}
toks := tokensVal.AsArray().Items
params := make([]hbrt.Value, 0, 8)
// Template key — cheap digest of the token-type sequence.
var keyBuf strings.Builder
keyBuf.Grow(len(toks) * 2)
const (
tkText = 2
tkNum = 3
tkQmark = 15
)
for _, tokVal := range toks {
tok := tokVal.AsArray()
if tok == nil || len(tok.Items) < 2 {
continue
}
ttype := int(tok.Items[0].AsNumInt())
switch ttype {
case tkText:
// String literal → TK_QMARK + save raw string value.
params = append(params, tok.Items[1])
tok.Items[0] = hbrt.MakeInt(tkQmark)
tok.Items[1] = hbrt.MakeString("?")
keyBuf.WriteByte('?')
case tkNum:
// Numeric literal → TK_QMARK + parse value. Integer form
// when possible (common for id columns), double otherwise.
s := tok.Items[1].AsString()
var val hbrt.Value
if i, err := strconv.ParseInt(s, 10, 64); err == nil {
val = hbrt.MakeNumInt(i)
} else if f, err := strconv.ParseFloat(s, 64); err == nil {
val = hbrt.MakeDoubleAuto(f)
} else {
val = hbrt.MakeString(s)
}
params = append(params, val)
tok.Items[0] = hbrt.MakeInt(tkQmark)
tok.Items[1] = hbrt.MakeString("?")
keyBuf.WriteByte('#')
default:
// Non-literal token — include type code + text so two
// different-but-same-shape queries distinguish properly
// (e.g., SELECT id vs SELECT name).
keyBuf.WriteByte(byte(ttype) + 0x20) // offset to printable
if ttype == 1 { // TK_NAME — include name text
keyBuf.WriteString(strings.ToUpper(tok.Items[1].AsString()))
keyBuf.WriteByte(' ')
}
}
}
result := hbrt.MakeArrayFrom([]hbrt.Value{
hbrt.MakeString(keyBuf.String()),
hbrt.MakeArrayFrom(params),
})
t.PushValue(result)
t.RetValue()
}
// SqlCoerceStr(x) → cString
// Converts any scalar to its canonical string form (NULL-safe).
func SqlCoerceStr(t *hbrt.Thread) {
t.Frame(1, 0)
defer t.EndProc()
v := t.Local(1)
t.RetString(sqlCoerceStr(v))
}
func sqlCoerceStr(v hbrt.Value) string {
switch {
case v.IsNil():
return ""
case v.IsString():
return v.AsString()
case v.IsNumeric():
if v.IsNumInt() {
return strconv.FormatInt(v.AsNumInt(), 10)
}
return strconv.FormatFloat(v.AsNumDouble(), 'g', -1, 64)
case v.IsLogical():
if v.AsBool() {
return "T"
}
return "F"
case v.IsDate():
// Date → "YYYYMMDD" (the DToS canonical form). Previously
// dates fell through to the empty-string default, so any
// `WHERE date_col = '20240115'` comparison silently
// compared "" to the literal and returned 0 rows. YYYYMMDD
// is format-independent and matches how Harbour's DToS /
// HbSToD pair encodes dates for byte-stable round-trip.
y, m, d := julianToDate(v.AsJulian())
return fmt.Sprintf("%04d%02d%02d", y, m, d)
case v.IsTimestamp():
y, m, d := julianToDate(v.AsJulian())
ms := v.AsTimeMs()
hh := ms / 3600000
mm := (ms % 3600000) / 60000
ss := (ms % 60000) / 1000
return fmt.Sprintf("%04d%02d%02d%02d%02d%02d", y, m, d, hh, mm, ss)
}
return ""
}
// SqlCoerceNum(x) → nNumber
// Converts any scalar to numeric (NULL → 0, bool → 1/0, string → Val).
func SqlCoerceNum(t *hbrt.Thread) {
t.Frame(1, 0)
defer t.EndProc()
v := t.Local(1)
switch {
case v.IsNil():
t.RetInt(0)
case v.IsNumeric():
t.RetVal(v)
case v.IsString():
t.RetVal(hbrt.MakeDoubleAuto(parseLeadingNumeric(v.AsString())))
case v.IsLogical():
if v.AsBool() {
t.RetInt(1)
} else {
t.RetInt(0)
}
default:
t.RetInt(0)
}
}
// SqlCoerceForCmp(x) → xNormalized
// Trim + upper-case strings; pass-through for other types. Used to
// make SQL equality/ordering case-insensitive on CHAR values.
func SqlCoerceForCmp(t *hbrt.Thread) {
t.Frame(1, 0)
defer t.EndProc()
v := t.Local(1)
if v.IsString() {
t.RetString(strings.ToUpper(strings.TrimSpace(v.AsString())))
return
}
t.RetVal(v)
}
// SqlIsTrue(x) → lBool
// SQL truthiness: NIL → false, empty string → false, 0 → false.
func SqlIsTrue(t *hbrt.Thread) {
t.Frame(1, 0)
defer t.EndProc()
t.RetBool(sqlIsTrue(t.Local(1)))
}
func sqlIsTrue(v hbrt.Value) bool {
switch {
case v.IsNil():
return false
case v.IsLogical():
return v.AsBool()
case v.IsNumeric():
if v.IsNumInt() {
return v.AsNumInt() != 0
}
return v.AsNumDouble() != 0 && !math.IsNaN(v.AsNumDouble())
case v.IsString():
return strings.TrimSpace(v.AsString()) != ""
}
return false
}
// SqlCmpEq(a, b) → lBool
// Case-insensitive equality with cross-type N↔C coercion.
func SqlCmpEq(t *hbrt.Thread) {
t.Frame(2, 0)
defer t.EndProc()
t.RetBool(sqlCmpEq(t.Local(1), t.Local(2)))
}
func sqlCmpEq(a, b hbrt.Value) bool {
aNil, bNil := a.IsNil(), b.IsNil()
if aNil || bNil {
return aNil && bNil
}
// Numeric: compare regardless of Int/Double distinction.
if a.IsNumeric() && b.IsNumeric() {
return a.AsNumDouble() == b.AsNumDouble()
}
if a.IsString() && b.IsString() {
return strings.EqualFold(
strings.TrimSpace(a.AsString()),
strings.TrimSpace(b.AsString()),
)
}
if a.IsLogical() && b.IsLogical() {
return a.AsBool() == b.AsBool()
}
if a.IsDate() && b.IsDate() {
return a.AsJulian() == b.AsJulian()
}
// Cross-type N / C coercion.
if a.IsNumeric() && b.IsString() {
return a.AsNumDouble() == parseLeadingNumeric(b.AsString())
}
if a.IsString() && b.IsNumeric() {
return parseLeadingNumeric(a.AsString()) == b.AsNumDouble()
}
// Cross-type D / C coercion. SQL tests often write the right-hand
// side as a literal "YYYYMMDD" string (the DToS canonical form);
// without this arm the comparison fell through to false and
// `WHERE hired = '20240115'` silently returned no rows.
if a.IsDate() && b.IsString() {
return sqlCmpDateStr(a, b)
}
if a.IsString() && b.IsDate() {
return sqlCmpDateStr(b, a)
}
return false
}
// sqlCmpDateStr returns true when the date's YYYYMMDD form equals the
// string operand after trim + separator strip. Accepts both DToS form
// (20260425) and the more common ISO/SQL forms (2026-04-25, 2026/04/25,
// 2026.04.25). Without normalization, `WHERE d = '2026-04-25'` silently
// returned no rows because the literal didn't match the YYYYMMDD form.
func sqlCmpDateStr(d, s hbrt.Value) bool {
y, m, day := julianToDate(d.AsJulian())
return fmt.Sprintf("%04d%02d%02d", y, m, day) == normalizeDateStr(s.AsString())
}
// normalizeDateStr strips common date separators ('-', '/', '.') so
// '2026-04-25', '2026/04/25', '2026.04.25', '20260425' all collapse
// to '20260425'. Caller is responsible for ensuring the input is
// date-shaped; non-date strings are passed through with separators
// removed (harmless — a comparison against a date will still fail).
func normalizeDateStr(s string) string {
s = strings.TrimSpace(s)
if !strings.ContainsAny(s, "-/.") {
return s
}
var b strings.Builder
b.Grow(len(s))
for i := 0; i < len(s); i++ {
c := s[i]
if c != '-' && c != '/' && c != '.' {
b.WriteByte(c)
}
}
return b.String()
}
// SqlCmpLt(a, b) → lBool
// Case-insensitive less-than with cross-type N↔C coercion.
func SqlCmpLt(t *hbrt.Thread) {
t.Frame(2, 0)
defer t.EndProc()
t.RetBool(sqlCmpLt(t.Local(1), t.Local(2)))
}
func sqlCmpLt(a, b hbrt.Value) bool {
if a.IsNil() || b.IsNil() {
return false
}
if a.IsNumeric() && b.IsNumeric() {
return a.AsNumDouble() < b.AsNumDouble()
}
if a.IsString() && b.IsString() {
return strings.ToUpper(strings.TrimSpace(a.AsString())) <
strings.ToUpper(strings.TrimSpace(b.AsString()))
}
if a.IsDate() && b.IsDate() {
return a.AsJulian() < b.AsJulian()
}
if a.IsLogical() && b.IsLogical() {
return !a.AsBool() && b.AsBool()
}
if a.IsNumeric() && b.IsString() {
return a.AsNumDouble() < parseLeadingNumeric(b.AsString())
}
if a.IsString() && b.IsNumeric() {
return parseLeadingNumeric(a.AsString()) < b.AsNumDouble()
}
// Cross-type D / C: compare DToS form lexicographically (YYYYMMDD
// sorts identically to chronological order for well-formed strings).
// Normalize the string operand so 'YYYY-MM-DD' / 'YYYY/MM/DD' /
// 'YYYY.MM.DD' compare correctly, not just bare 'YYYYMMDD'. Without
// this, `WHERE d > '2026-06-01'` collapsed to a string compare of
// '20260425' < '2026-06-01' which is false because '2' < '2', '0' < '0'
// proceeds until '4' vs '-' (45 vs 45 — actually '4' = 0x34, '-' = 0x2d)
// → '4' > '-' so `'20260425' < '2026-06-01'` is false → all dates
// returned as "less than" → all rows match. Confusing but the symptom
// was every WHERE date > ISO-string returning the full table.
if a.IsDate() && b.IsString() {
y, m, d := julianToDate(a.AsJulian())
return fmt.Sprintf("%04d%02d%02d", y, m, d) < normalizeDateStr(b.AsString())
}
if a.IsString() && b.IsDate() {
y, m, d := julianToDate(b.AsJulian())
return normalizeDateStr(a.AsString()) < fmt.Sprintf("%04d%02d%02d", y, m, d)
}
return false
}
// SqlIsAggName(cName) → lBool
// Go-native replacement for TSqlExpr.prg SqlIsAggName. The PRG version
// was `("," + c + ",") $ ("," + AGG_FUNCTIONS + ",")` — two string
// allocations + a substring scan per call. Profile showed this at
// 8.7% of B4 GROUP+HAVING CPU. Uses the aggFuncSet already declared
// in sqlexpr.go for SqlExprHasAgg.
func SqlIsAggName(t *hbrt.Thread) {
t.Frame(1, 0)
defer t.EndProc()
name := t.Local(1).AsString()
if name == "" {
t.RetBool(false)
return
}
// Upper-case without allocating unless needed.
upper := name
for i := 0; i < len(name); i++ {
c := name[i]
if c >= 'a' && c <= 'z' {
upper = strings.ToUpper(name)
break
}
}
_, ok := aggFuncSet[upper]
t.RetBool(ok)
}