// Copyright (c) 2026 Charles KWON OhJun (charleskwonohjun@gmail.com) // All rights reserved. // FiveSql2 scalar helpers — Go replacements for the PRG functions in // _FiveSql2/src/TSqlFunc.prg. These are invoked per-operator during // expression evaluation (WHERE / HAVING / CASE); porting removes PRG // VM frame overhead on the hot interpreter path. Semantics match the // PRG source byte-for-byte. package hbrtl import ( "fmt" "math" "strconv" "strings" "five/hbrt" ) // FiveSql2 lexer token type codes — must match FiveSqlDef.ch. const ( tkEnd = 0 tkName = 1 tkText = 2 tkNum = 3 tkComma = 4 tkDot = 5 tkStar = 6 tkLPar = 7 tkRPar = 8 tkEq = 9 tkNEq = 10 tkLT = 11 tkGT = 12 tkLTE = 13 tkGTE = 14 tkQMark = 15 tkPlus = 16 tkMinus = 17 tkSlash = 18 tkPipes = 19 ) // makeTokValue wraps a (type, text) pair into the 2-element PRG array // that TSqlParser2 consumes: { nTokenType, cTokenValue }. func makeTokValue(ttype int, text string) hbrt.Value { return hbrt.MakeArrayFrom([]hbrt.Value{ hbrt.MakeNumInt(int64(ttype)), hbrt.MakeString(text), }) } // lexSQL is the Go port of TSqlLexer:Tokenize — byte-level FSM over the // ASCII input string. Produces the same aTokens shape the PRG lexer did. func lexSQL(s string) []hbrt.Value { toks := make([]hbrt.Value, 0, 32) n := len(s) i := 0 for i < n { c := s[i] // Whitespace if c == ' ' || c == '\t' || c == '\n' || c == '\r' { i++ continue } // Line comment `-- ...` if c == '-' && i+1 < n && s[i+1] == '-' { i += 2 for i < n && s[i] != '\n' { i++ } continue } // Block comment `/* ... */` if c == '/' && i+1 < n && s[i+1] == '*' { i += 2 for i < n-1 { if s[i] == '*' && s[i+1] == '/' { i += 2 break } i++ } continue } // String literal (single-quoted, '' escapes a quote) if c == '\'' { i++ start := i var sb strings.Builder inEscape := false for i < n { cc := s[i] if cc == '\'' { if i+1 < n && s[i+1] == '\'' { if !inEscape { sb.WriteString(s[start:i]) inEscape = true } else { sb.WriteByte('\'') sb.WriteString(s[start:i]) } sb.WriteByte('\'') i += 2 start = i } else { break } } else { i++ } } var val string if inEscape { sb.WriteString(s[start:i]) val = sb.String() } else { val = s[start:i] } if i < n { i++ // skip closing quote } toks = append(toks, makeTokValue(tkText, val)) continue } // Numeric literal if c >= '0' && c <= '9' { start := i for i < n && ((s[i] >= '0' && s[i] <= '9') || s[i] == '.') { i++ } toks = append(toks, makeTokValue(tkNum, s[start:i])) continue } // Identifier / keyword if isAlphaSQL(c) || c == '_' { start := i for i < n && (isAlphaSQL(s[i]) || (s[i] >= '0' && s[i] <= '9') || s[i] == '_') { i++ } toks = append(toks, makeTokValue(tkName, strings.ToUpper(s[start:i]))) continue } // Bracketed identifier `[col name]` if c == '[' { i++ start := i for i < n && s[i] != ']' { i++ } name := strings.ToUpper(s[start:i]) if i < n { i++ // skip ']' } toks = append(toks, makeTokValue(tkName, name)) continue } // Parameter placeholder if c == '?' { toks = append(toks, makeTokValue(tkQMark, "?")) i++ continue } // Multi-char + single-char operators / punctuation switch c { case ',': toks = append(toks, makeTokValue(tkComma, ",")) i++ case '.': // Harbour logical literals inside SQL text: `.T.` / `.F.` / // `.Y.` / `.N.`. Emit TK_NAME("TRUE"/"FALSE") so the // parser's primary handles them alongside SQL TRUE/FALSE // keywords without a dedicated token kind. Must precede // the bare `.` → TK_DOT emission below, otherwise the // three chars tokenize as DOT + NAME("T") + DOT and the // INSERT column alignment drifts by two. if i+2 < n && s[i+2] == '.' { lit := s[i+1] if lit == 't' || lit == 'T' || lit == 'y' || lit == 'Y' { toks = append(toks, makeTokValue(tkName, "TRUE")) i += 3 continue } if lit == 'f' || lit == 'F' || lit == 'n' || lit == 'N' { toks = append(toks, makeTokValue(tkName, "FALSE")) i += 3 continue } } toks = append(toks, makeTokValue(tkDot, ".")) i++ case '*': toks = append(toks, makeTokValue(tkStar, "*")) i++ case '(': toks = append(toks, makeTokValue(tkLPar, "(")) i++ case ')': toks = append(toks, makeTokValue(tkRPar, ")")) i++ case '+': toks = append(toks, makeTokValue(tkPlus, "+")) i++ case '-': toks = append(toks, makeTokValue(tkMinus, "-")) i++ case '/': toks = append(toks, makeTokValue(tkSlash, "/")) i++ case '|': if i+1 < n && s[i+1] == '|' { toks = append(toks, makeTokValue(tkPipes, "||")) i += 2 } else { i++ } case '=': toks = append(toks, makeTokValue(tkEq, "=")) i++ case '<': if i+1 < n && s[i+1] == '=' { toks = append(toks, makeTokValue(tkLTE, "<=")) i += 2 } else if i+1 < n && s[i+1] == '>' { toks = append(toks, makeTokValue(tkNEq, "<>")) i += 2 } else { toks = append(toks, makeTokValue(tkLT, "<")) i++ } case '>': if i+1 < n && s[i+1] == '=' { toks = append(toks, makeTokValue(tkGTE, ">=")) i += 2 } else { toks = append(toks, makeTokValue(tkGT, ">")) i++ } case '!': if i+1 < n && s[i+1] == '=' { toks = append(toks, makeTokValue(tkNEq, "!=")) i += 2 } else { i++ } case ';': i++ default: i++ } } toks = append(toks, makeTokValue(tkEnd, "")) return toks } func isAlphaSQL(c byte) bool { return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') } // SqlLexerTokenize(cSQL) → aTokens // Direct Go port of TSqlLexer:Tokenize. Returns the same // { { nType, cText }, ... } structure the PRG version produced. func SqlLexerTokenize(t *hbrt.Thread) { t.Frame(1, 0) defer t.EndProc() toks := lexSQL(t.Local(1).AsString()) t.PushValue(hbrt.MakeArrayFrom(toks)) t.RetValue() } // SqlLexAndExtractTemplate(cSQL) → { aTokens, cKey, aParams } // // Combined lex + template extraction — one Go call replaces three // PRG-to-Go boundary crossings (lex, get tokens, extract). aTokens // already has literal tokens replaced with TK_QMARK; aParams holds // the extracted literal values in positional order; cKey is the // plan cache key (digest of the normalized token-type sequence). func SqlLexAndExtractTemplate(t *hbrt.Thread) { t.Frame(1, 0) defer t.EndProc() src := t.Local(1).AsString() toks := lexSQL(src) params := make([]hbrt.Value, 0, 8) var keyBuf strings.Builder keyBuf.Grow(len(src)) for _, tv := range toks { tok := tv.AsArray() if tok == nil || len(tok.Items) < 2 { continue } ttype := int(tok.Items[0].AsNumInt()) switch ttype { case tkText: params = append(params, tok.Items[1]) tok.Items[0] = hbrt.MakeNumInt(tkQMark) tok.Items[1] = hbrt.MakeString("?") keyBuf.WriteByte('?') case tkNum: s := tok.Items[1].AsString() var val hbrt.Value if i, err := strconv.ParseInt(s, 10, 64); err == nil { val = hbrt.MakeNumInt(i) } else if f, err := strconv.ParseFloat(s, 64); err == nil { val = hbrt.MakeDoubleAuto(f) } else { val = hbrt.MakeString(s) } params = append(params, val) tok.Items[0] = hbrt.MakeNumInt(tkQMark) tok.Items[1] = hbrt.MakeString("?") keyBuf.WriteByte('#') default: keyBuf.WriteByte(byte(ttype) + 0x20) if ttype == tkName { keyBuf.WriteString(tok.Items[1].AsString()) keyBuf.WriteByte(' ') } } } result := hbrt.MakeArrayFrom([]hbrt.Value{ hbrt.MakeArrayFrom(toks), hbrt.MakeString(keyBuf.String()), hbrt.MakeArrayFrom(params), }) t.PushValue(result) t.RetValue() } // SqlExtractTemplate(aTokens) → { cKey, aParams } // // Walks a FiveSql2 lexer token stream, replacing string (TK_TEXT=2) // and numeric (TK_NUM=3) literals with the parameter placeholder // token (TK_QMARK=15). Collected literal values are returned as // aParams in their natural left-to-right order. // // Each token is a 2-element array {nTokenType, cTokenValue}. The // mutation is in place so the caller can pass the resulting aTokens // straight into TSqlParser2 — the parser then emits ND_PAR nodes // that resolve against aParams at execution time. // // The template key is a compact digest of the non-literal token // type sequence, used as the plan cache key for queries that share // the same shape but differ only in literal values. Queries like: // // INSERT INTO t VALUES (1,'a') // INSERT INTO t VALUES (2,'b') // // produce the SAME key once literals are collapsed to '?', letting // the plan cache hit from the 2nd call onward. // // Returns a 2-element array: { cKey, aParams }. func SqlExtractTemplate(t *hbrt.Thread) { t.Frame(1, 0) defer t.EndProc() tokensVal := t.Local(1) if !tokensVal.IsArray() { empty := hbrt.MakeArrayFrom([]hbrt.Value{ hbrt.MakeString(""), hbrt.MakeArrayFrom(nil), }) t.PushValue(empty) t.RetValue() return } toks := tokensVal.AsArray().Items params := make([]hbrt.Value, 0, 8) // Template key — cheap digest of the token-type sequence. var keyBuf strings.Builder keyBuf.Grow(len(toks) * 2) const ( tkText = 2 tkNum = 3 tkQmark = 15 ) for _, tokVal := range toks { tok := tokVal.AsArray() if tok == nil || len(tok.Items) < 2 { continue } ttype := int(tok.Items[0].AsNumInt()) switch ttype { case tkText: // String literal → TK_QMARK + save raw string value. params = append(params, tok.Items[1]) tok.Items[0] = hbrt.MakeInt(tkQmark) tok.Items[1] = hbrt.MakeString("?") keyBuf.WriteByte('?') case tkNum: // Numeric literal → TK_QMARK + parse value. Integer form // when possible (common for id columns), double otherwise. s := tok.Items[1].AsString() var val hbrt.Value if i, err := strconv.ParseInt(s, 10, 64); err == nil { val = hbrt.MakeNumInt(i) } else if f, err := strconv.ParseFloat(s, 64); err == nil { val = hbrt.MakeDoubleAuto(f) } else { val = hbrt.MakeString(s) } params = append(params, val) tok.Items[0] = hbrt.MakeInt(tkQmark) tok.Items[1] = hbrt.MakeString("?") keyBuf.WriteByte('#') default: // Non-literal token — include type code + text so two // different-but-same-shape queries distinguish properly // (e.g., SELECT id vs SELECT name). keyBuf.WriteByte(byte(ttype) + 0x20) // offset to printable if ttype == 1 { // TK_NAME — include name text keyBuf.WriteString(strings.ToUpper(tok.Items[1].AsString())) keyBuf.WriteByte(' ') } } } result := hbrt.MakeArrayFrom([]hbrt.Value{ hbrt.MakeString(keyBuf.String()), hbrt.MakeArrayFrom(params), }) t.PushValue(result) t.RetValue() } // SqlCoerceStr(x) → cString // Converts any scalar to its canonical string form (NULL-safe). func SqlCoerceStr(t *hbrt.Thread) { t.Frame(1, 0) defer t.EndProc() v := t.Local(1) t.RetString(sqlCoerceStr(v)) } func sqlCoerceStr(v hbrt.Value) string { switch { case v.IsNil(): return "" case v.IsString(): return v.AsString() case v.IsNumeric(): if v.IsNumInt() { return strconv.FormatInt(v.AsNumInt(), 10) } return strconv.FormatFloat(v.AsNumDouble(), 'g', -1, 64) case v.IsLogical(): if v.AsBool() { return "T" } return "F" case v.IsDate(): // Date → "YYYYMMDD" (the DToS canonical form). Previously // dates fell through to the empty-string default, so any // `WHERE date_col = '20240115'` comparison silently // compared "" to the literal and returned 0 rows. YYYYMMDD // is format-independent and matches how Harbour's DToS / // HbSToD pair encodes dates for byte-stable round-trip. y, m, d := julianToDate(v.AsJulian()) return fmt.Sprintf("%04d%02d%02d", y, m, d) case v.IsTimestamp(): y, m, d := julianToDate(v.AsJulian()) ms := v.AsTimeMs() hh := ms / 3600000 mm := (ms % 3600000) / 60000 ss := (ms % 60000) / 1000 return fmt.Sprintf("%04d%02d%02d%02d%02d%02d", y, m, d, hh, mm, ss) } return "" } // SqlCoerceNum(x) → nNumber // Converts any scalar to numeric (NULL → 0, bool → 1/0, string → Val). func SqlCoerceNum(t *hbrt.Thread) { t.Frame(1, 0) defer t.EndProc() v := t.Local(1) switch { case v.IsNil(): t.RetInt(0) case v.IsNumeric(): t.RetVal(v) case v.IsString(): t.RetVal(hbrt.MakeDoubleAuto(parseLeadingNumeric(v.AsString()))) case v.IsLogical(): if v.AsBool() { t.RetInt(1) } else { t.RetInt(0) } default: t.RetInt(0) } } // SqlCoerceForCmp(x) → xNormalized // Trim + upper-case strings; pass-through for other types. Used to // make SQL equality/ordering case-insensitive on CHAR values. func SqlCoerceForCmp(t *hbrt.Thread) { t.Frame(1, 0) defer t.EndProc() v := t.Local(1) if v.IsString() { t.RetString(strings.ToUpper(strings.TrimSpace(v.AsString()))) return } t.RetVal(v) } // SqlIsTrue(x) → lBool // SQL truthiness: NIL → false, empty string → false, 0 → false. func SqlIsTrue(t *hbrt.Thread) { t.Frame(1, 0) defer t.EndProc() t.RetBool(sqlIsTrue(t.Local(1))) } func sqlIsTrue(v hbrt.Value) bool { switch { case v.IsNil(): return false case v.IsLogical(): return v.AsBool() case v.IsNumeric(): if v.IsNumInt() { return v.AsNumInt() != 0 } return v.AsNumDouble() != 0 && !math.IsNaN(v.AsNumDouble()) case v.IsString(): return strings.TrimSpace(v.AsString()) != "" } return false } // SqlCmpEq(a, b) → lBool // Case-insensitive equality with cross-type N↔C coercion. func SqlCmpEq(t *hbrt.Thread) { t.Frame(2, 0) defer t.EndProc() t.RetBool(sqlCmpEq(t.Local(1), t.Local(2))) } func sqlCmpEq(a, b hbrt.Value) bool { aNil, bNil := a.IsNil(), b.IsNil() if aNil || bNil { return aNil && bNil } // Numeric: compare regardless of Int/Double distinction. if a.IsNumeric() && b.IsNumeric() { return a.AsNumDouble() == b.AsNumDouble() } if a.IsString() && b.IsString() { return strings.EqualFold( strings.TrimSpace(a.AsString()), strings.TrimSpace(b.AsString()), ) } if a.IsLogical() && b.IsLogical() { return a.AsBool() == b.AsBool() } if a.IsDate() && b.IsDate() { return a.AsJulian() == b.AsJulian() } // Cross-type N / C coercion. if a.IsNumeric() && b.IsString() { return a.AsNumDouble() == parseLeadingNumeric(b.AsString()) } if a.IsString() && b.IsNumeric() { return parseLeadingNumeric(a.AsString()) == b.AsNumDouble() } // Cross-type D / C coercion. SQL tests often write the right-hand // side as a literal "YYYYMMDD" string (the DToS canonical form); // without this arm the comparison fell through to false and // `WHERE hired = '20240115'` silently returned no rows. if a.IsDate() && b.IsString() { return sqlCmpDateStr(a, b) } if a.IsString() && b.IsDate() { return sqlCmpDateStr(b, a) } return false } // sqlCmpDateStr returns true when the date's YYYYMMDD form equals the // string operand after trim + separator strip. Accepts both DToS form // (20260425) and the more common ISO/SQL forms (2026-04-25, 2026/04/25, // 2026.04.25). Without normalization, `WHERE d = '2026-04-25'` silently // returned no rows because the literal didn't match the YYYYMMDD form. func sqlCmpDateStr(d, s hbrt.Value) bool { y, m, day := julianToDate(d.AsJulian()) return fmt.Sprintf("%04d%02d%02d", y, m, day) == normalizeDateStr(s.AsString()) } // normalizeDateStr strips common date separators ('-', '/', '.') so // '2026-04-25', '2026/04/25', '2026.04.25', '20260425' all collapse // to '20260425'. Caller is responsible for ensuring the input is // date-shaped; non-date strings are passed through with separators // removed (harmless — a comparison against a date will still fail). func normalizeDateStr(s string) string { s = strings.TrimSpace(s) if !strings.ContainsAny(s, "-/.") { return s } var b strings.Builder b.Grow(len(s)) for i := 0; i < len(s); i++ { c := s[i] if c != '-' && c != '/' && c != '.' { b.WriteByte(c) } } return b.String() } // SqlCmpLt(a, b) → lBool // Case-insensitive less-than with cross-type N↔C coercion. func SqlCmpLt(t *hbrt.Thread) { t.Frame(2, 0) defer t.EndProc() t.RetBool(sqlCmpLt(t.Local(1), t.Local(2))) } func sqlCmpLt(a, b hbrt.Value) bool { if a.IsNil() || b.IsNil() { return false } if a.IsNumeric() && b.IsNumeric() { return a.AsNumDouble() < b.AsNumDouble() } if a.IsString() && b.IsString() { return strings.ToUpper(strings.TrimSpace(a.AsString())) < strings.ToUpper(strings.TrimSpace(b.AsString())) } if a.IsDate() && b.IsDate() { return a.AsJulian() < b.AsJulian() } if a.IsLogical() && b.IsLogical() { return !a.AsBool() && b.AsBool() } if a.IsNumeric() && b.IsString() { return a.AsNumDouble() < parseLeadingNumeric(b.AsString()) } if a.IsString() && b.IsNumeric() { return parseLeadingNumeric(a.AsString()) < b.AsNumDouble() } // Cross-type D / C: compare DToS form lexicographically (YYYYMMDD // sorts identically to chronological order for well-formed strings). // Normalize the string operand so 'YYYY-MM-DD' / 'YYYY/MM/DD' / // 'YYYY.MM.DD' compare correctly, not just bare 'YYYYMMDD'. Without // this, `WHERE d > '2026-06-01'` collapsed to a string compare of // '20260425' < '2026-06-01' which is false because '2' < '2', '0' < '0' // proceeds until '4' vs '-' (45 vs 45 — actually '4' = 0x34, '-' = 0x2d) // → '4' > '-' so `'20260425' < '2026-06-01'` is false → all dates // returned as "less than" → all rows match. Confusing but the symptom // was every WHERE date > ISO-string returning the full table. if a.IsDate() && b.IsString() { y, m, d := julianToDate(a.AsJulian()) return fmt.Sprintf("%04d%02d%02d", y, m, d) < normalizeDateStr(b.AsString()) } if a.IsString() && b.IsDate() { y, m, d := julianToDate(b.AsJulian()) return normalizeDateStr(a.AsString()) < fmt.Sprintf("%04d%02d%02d", y, m, d) } return false } // SqlIsAggName(cName) → lBool // Go-native replacement for TSqlExpr.prg SqlIsAggName. The PRG version // was `("," + c + ",") $ ("," + AGG_FUNCTIONS + ",")` — two string // allocations + a substring scan per call. Profile showed this at // 8.7% of B4 GROUP+HAVING CPU. Uses the aggFuncSet already declared // in sqlexpr.go for SqlExprHasAgg. func SqlIsAggName(t *hbrt.Thread) { t.Frame(1, 0) defer t.EndProc() name := t.Local(1).AsString() if name == "" { t.RetBool(false) return } // Upper-case without allocating unless needed. upper := name for i := 0; i < len(name); i++ { c := name[i] if c >= 'a' && c <= 'z' { upper = strings.ToUpper(name) break } } _, ok := aggFuncSet[upper] t.RetBool(ok) }