B4 GROUP+HAVING profile showed SqlIsAggName at ~9% of CPU —
SqlEvalFunc checks it for every function in every row, and the
PRG body was two string allocations + a substring scan:
RETURN ("," + c + ",") $ ("," + AGG_FUNCTIONS + ",")
Replace with a hash lookup against the existing aggFuncSet map
in hbrtl/sqlexpr.go (already populated for SqlExprHasAgg, same
AGG_FUNCTIONS list). Upper-casing skips the allocation when the
input is already upper, which it almost always is in practice.
Bench deltas (median of 3 steady runs, 1000 iters):
B4_GROUP_HAVING 447 → 418 us -6.5%
B14_COUNT 252 → 235 us -7%
B15_CTE_WIN_JOIN 1595 → 1577 us -1%
Other benches unchanged (no aggregate calls per row).
FiveSql2 43/43, Harbour compat 56/56.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
615 lines
14 KiB
Go
615 lines
14 KiB
Go
// Copyright (c) 2026 Charles KWON OhJun (charleskwonohjun@gmail.com)
|
|
// All rights reserved.
|
|
|
|
// FiveSql2 scalar helpers — Go replacements for the PRG functions in
|
|
// _FiveSql2/src/TSqlFunc.prg. These are invoked per-operator during
|
|
// expression evaluation (WHERE / HAVING / CASE); porting removes PRG
|
|
// VM frame overhead on the hot interpreter path. Semantics match the
|
|
// PRG source byte-for-byte.
|
|
|
|
package hbrtl
|
|
|
|
import (
|
|
"math"
|
|
"strconv"
|
|
"strings"
|
|
|
|
"five/hbrt"
|
|
)
|
|
|
|
// FiveSql2 lexer token type codes — must match FiveSqlDef.ch.
|
|
const (
|
|
tkEnd = 0
|
|
tkName = 1
|
|
tkText = 2
|
|
tkNum = 3
|
|
tkComma = 4
|
|
tkDot = 5
|
|
tkStar = 6
|
|
tkLPar = 7
|
|
tkRPar = 8
|
|
tkEq = 9
|
|
tkNEq = 10
|
|
tkLT = 11
|
|
tkGT = 12
|
|
tkLTE = 13
|
|
tkGTE = 14
|
|
tkQMark = 15
|
|
tkPlus = 16
|
|
tkMinus = 17
|
|
tkSlash = 18
|
|
tkPipes = 19
|
|
)
|
|
|
|
// makeTokValue wraps a (type, text) pair into the 2-element PRG array
|
|
// that TSqlParser2 consumes: { nTokenType, cTokenValue }.
|
|
func makeTokValue(ttype int, text string) hbrt.Value {
|
|
return hbrt.MakeArrayFrom([]hbrt.Value{
|
|
hbrt.MakeNumInt(int64(ttype)),
|
|
hbrt.MakeString(text),
|
|
})
|
|
}
|
|
|
|
// lexSQL is the Go port of TSqlLexer:Tokenize — byte-level FSM over the
|
|
// ASCII input string. Produces the same aTokens shape the PRG lexer did.
|
|
func lexSQL(s string) []hbrt.Value {
|
|
toks := make([]hbrt.Value, 0, 32)
|
|
n := len(s)
|
|
i := 0
|
|
for i < n {
|
|
c := s[i]
|
|
|
|
// Whitespace
|
|
if c == ' ' || c == '\t' || c == '\n' || c == '\r' {
|
|
i++
|
|
continue
|
|
}
|
|
|
|
// Line comment `-- ...`
|
|
if c == '-' && i+1 < n && s[i+1] == '-' {
|
|
i += 2
|
|
for i < n && s[i] != '\n' {
|
|
i++
|
|
}
|
|
continue
|
|
}
|
|
|
|
// Block comment `/* ... */`
|
|
if c == '/' && i+1 < n && s[i+1] == '*' {
|
|
i += 2
|
|
for i < n-1 {
|
|
if s[i] == '*' && s[i+1] == '/' {
|
|
i += 2
|
|
break
|
|
}
|
|
i++
|
|
}
|
|
continue
|
|
}
|
|
|
|
// String literal (single-quoted, '' escapes a quote)
|
|
if c == '\'' {
|
|
i++
|
|
start := i
|
|
var sb strings.Builder
|
|
inEscape := false
|
|
for i < n {
|
|
cc := s[i]
|
|
if cc == '\'' {
|
|
if i+1 < n && s[i+1] == '\'' {
|
|
if !inEscape {
|
|
sb.WriteString(s[start:i])
|
|
inEscape = true
|
|
} else {
|
|
sb.WriteByte('\'')
|
|
sb.WriteString(s[start:i])
|
|
}
|
|
sb.WriteByte('\'')
|
|
i += 2
|
|
start = i
|
|
} else {
|
|
break
|
|
}
|
|
} else {
|
|
i++
|
|
}
|
|
}
|
|
var val string
|
|
if inEscape {
|
|
sb.WriteString(s[start:i])
|
|
val = sb.String()
|
|
} else {
|
|
val = s[start:i]
|
|
}
|
|
if i < n {
|
|
i++ // skip closing quote
|
|
}
|
|
toks = append(toks, makeTokValue(tkText, val))
|
|
continue
|
|
}
|
|
|
|
// Numeric literal
|
|
if c >= '0' && c <= '9' {
|
|
start := i
|
|
for i < n && ((s[i] >= '0' && s[i] <= '9') || s[i] == '.') {
|
|
i++
|
|
}
|
|
toks = append(toks, makeTokValue(tkNum, s[start:i]))
|
|
continue
|
|
}
|
|
|
|
// Identifier / keyword
|
|
if isAlphaSQL(c) || c == '_' {
|
|
start := i
|
|
for i < n && (isAlphaSQL(s[i]) || (s[i] >= '0' && s[i] <= '9') || s[i] == '_') {
|
|
i++
|
|
}
|
|
toks = append(toks, makeTokValue(tkName, strings.ToUpper(s[start:i])))
|
|
continue
|
|
}
|
|
|
|
// Bracketed identifier `[col name]`
|
|
if c == '[' {
|
|
i++
|
|
start := i
|
|
for i < n && s[i] != ']' {
|
|
i++
|
|
}
|
|
name := strings.ToUpper(s[start:i])
|
|
if i < n {
|
|
i++ // skip ']'
|
|
}
|
|
toks = append(toks, makeTokValue(tkName, name))
|
|
continue
|
|
}
|
|
|
|
// Parameter placeholder
|
|
if c == '?' {
|
|
toks = append(toks, makeTokValue(tkQMark, "?"))
|
|
i++
|
|
continue
|
|
}
|
|
|
|
// Multi-char + single-char operators / punctuation
|
|
switch c {
|
|
case ',':
|
|
toks = append(toks, makeTokValue(tkComma, ","))
|
|
i++
|
|
case '.':
|
|
toks = append(toks, makeTokValue(tkDot, "."))
|
|
i++
|
|
case '*':
|
|
toks = append(toks, makeTokValue(tkStar, "*"))
|
|
i++
|
|
case '(':
|
|
toks = append(toks, makeTokValue(tkLPar, "("))
|
|
i++
|
|
case ')':
|
|
toks = append(toks, makeTokValue(tkRPar, ")"))
|
|
i++
|
|
case '+':
|
|
toks = append(toks, makeTokValue(tkPlus, "+"))
|
|
i++
|
|
case '-':
|
|
toks = append(toks, makeTokValue(tkMinus, "-"))
|
|
i++
|
|
case '/':
|
|
toks = append(toks, makeTokValue(tkSlash, "/"))
|
|
i++
|
|
case '|':
|
|
if i+1 < n && s[i+1] == '|' {
|
|
toks = append(toks, makeTokValue(tkPipes, "||"))
|
|
i += 2
|
|
} else {
|
|
i++
|
|
}
|
|
case '=':
|
|
toks = append(toks, makeTokValue(tkEq, "="))
|
|
i++
|
|
case '<':
|
|
if i+1 < n && s[i+1] == '=' {
|
|
toks = append(toks, makeTokValue(tkLTE, "<="))
|
|
i += 2
|
|
} else if i+1 < n && s[i+1] == '>' {
|
|
toks = append(toks, makeTokValue(tkNEq, "<>"))
|
|
i += 2
|
|
} else {
|
|
toks = append(toks, makeTokValue(tkLT, "<"))
|
|
i++
|
|
}
|
|
case '>':
|
|
if i+1 < n && s[i+1] == '=' {
|
|
toks = append(toks, makeTokValue(tkGTE, ">="))
|
|
i += 2
|
|
} else {
|
|
toks = append(toks, makeTokValue(tkGT, ">"))
|
|
i++
|
|
}
|
|
case '!':
|
|
if i+1 < n && s[i+1] == '=' {
|
|
toks = append(toks, makeTokValue(tkNEq, "!="))
|
|
i += 2
|
|
} else {
|
|
i++
|
|
}
|
|
case ';':
|
|
i++
|
|
default:
|
|
i++
|
|
}
|
|
}
|
|
|
|
toks = append(toks, makeTokValue(tkEnd, ""))
|
|
return toks
|
|
}
|
|
|
|
func isAlphaSQL(c byte) bool {
|
|
return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')
|
|
}
|
|
|
|
// SqlLexerTokenize(cSQL) → aTokens
|
|
// Direct Go port of TSqlLexer:Tokenize. Returns the same
|
|
// { { nType, cText }, ... } structure the PRG version produced.
|
|
func SqlLexerTokenize(t *hbrt.Thread) {
|
|
t.Frame(1, 0)
|
|
defer t.EndProc()
|
|
toks := lexSQL(t.Local(1).AsString())
|
|
t.PushValue(hbrt.MakeArrayFrom(toks))
|
|
t.RetValue()
|
|
}
|
|
|
|
// SqlLexAndExtractTemplate(cSQL) → { aTokens, cKey, aParams }
|
|
//
|
|
// Combined lex + template extraction — one Go call replaces three
|
|
// PRG-to-Go boundary crossings (lex, get tokens, extract). aTokens
|
|
// already has literal tokens replaced with TK_QMARK; aParams holds
|
|
// the extracted literal values in positional order; cKey is the
|
|
// plan cache key (digest of the normalized token-type sequence).
|
|
func SqlLexAndExtractTemplate(t *hbrt.Thread) {
|
|
t.Frame(1, 0)
|
|
defer t.EndProc()
|
|
|
|
src := t.Local(1).AsString()
|
|
toks := lexSQL(src)
|
|
|
|
params := make([]hbrt.Value, 0, 8)
|
|
var keyBuf strings.Builder
|
|
keyBuf.Grow(len(src))
|
|
|
|
for _, tv := range toks {
|
|
tok := tv.AsArray()
|
|
if tok == nil || len(tok.Items) < 2 {
|
|
continue
|
|
}
|
|
ttype := int(tok.Items[0].AsNumInt())
|
|
switch ttype {
|
|
case tkText:
|
|
params = append(params, tok.Items[1])
|
|
tok.Items[0] = hbrt.MakeNumInt(tkQMark)
|
|
tok.Items[1] = hbrt.MakeString("?")
|
|
keyBuf.WriteByte('?')
|
|
case tkNum:
|
|
s := tok.Items[1].AsString()
|
|
var val hbrt.Value
|
|
if i, err := strconv.ParseInt(s, 10, 64); err == nil {
|
|
val = hbrt.MakeNumInt(i)
|
|
} else if f, err := strconv.ParseFloat(s, 64); err == nil {
|
|
val = hbrt.MakeDoubleAuto(f)
|
|
} else {
|
|
val = hbrt.MakeString(s)
|
|
}
|
|
params = append(params, val)
|
|
tok.Items[0] = hbrt.MakeNumInt(tkQMark)
|
|
tok.Items[1] = hbrt.MakeString("?")
|
|
keyBuf.WriteByte('#')
|
|
default:
|
|
keyBuf.WriteByte(byte(ttype) + 0x20)
|
|
if ttype == tkName {
|
|
keyBuf.WriteString(tok.Items[1].AsString())
|
|
keyBuf.WriteByte(' ')
|
|
}
|
|
}
|
|
}
|
|
|
|
result := hbrt.MakeArrayFrom([]hbrt.Value{
|
|
hbrt.MakeArrayFrom(toks),
|
|
hbrt.MakeString(keyBuf.String()),
|
|
hbrt.MakeArrayFrom(params),
|
|
})
|
|
t.PushValue(result)
|
|
t.RetValue()
|
|
}
|
|
|
|
// SqlExtractTemplate(aTokens) → { cKey, aParams }
|
|
//
|
|
// Walks a FiveSql2 lexer token stream, replacing string (TK_TEXT=2)
|
|
// and numeric (TK_NUM=3) literals with the parameter placeholder
|
|
// token (TK_QMARK=15). Collected literal values are returned as
|
|
// aParams in their natural left-to-right order.
|
|
//
|
|
// Each token is a 2-element array {nTokenType, cTokenValue}. The
|
|
// mutation is in place so the caller can pass the resulting aTokens
|
|
// straight into TSqlParser2 — the parser then emits ND_PAR nodes
|
|
// that resolve against aParams at execution time.
|
|
//
|
|
// The template key is a compact digest of the non-literal token
|
|
// type sequence, used as the plan cache key for queries that share
|
|
// the same shape but differ only in literal values. Queries like:
|
|
//
|
|
// INSERT INTO t VALUES (1,'a')
|
|
// INSERT INTO t VALUES (2,'b')
|
|
//
|
|
// produce the SAME key once literals are collapsed to '?', letting
|
|
// the plan cache hit from the 2nd call onward.
|
|
//
|
|
// Returns a 2-element array: { cKey, aParams }.
|
|
func SqlExtractTemplate(t *hbrt.Thread) {
|
|
t.Frame(1, 0)
|
|
defer t.EndProc()
|
|
|
|
tokensVal := t.Local(1)
|
|
if !tokensVal.IsArray() {
|
|
empty := hbrt.MakeArrayFrom([]hbrt.Value{
|
|
hbrt.MakeString(""),
|
|
hbrt.MakeArrayFrom(nil),
|
|
})
|
|
t.PushValue(empty)
|
|
t.RetValue()
|
|
return
|
|
}
|
|
|
|
toks := tokensVal.AsArray().Items
|
|
params := make([]hbrt.Value, 0, 8)
|
|
|
|
// Template key — cheap digest of the token-type sequence.
|
|
var keyBuf strings.Builder
|
|
keyBuf.Grow(len(toks) * 2)
|
|
|
|
const (
|
|
tkText = 2
|
|
tkNum = 3
|
|
tkQmark = 15
|
|
)
|
|
|
|
for _, tokVal := range toks {
|
|
tok := tokVal.AsArray()
|
|
if tok == nil || len(tok.Items) < 2 {
|
|
continue
|
|
}
|
|
ttype := int(tok.Items[0].AsNumInt())
|
|
|
|
switch ttype {
|
|
case tkText:
|
|
// String literal → TK_QMARK + save raw string value.
|
|
params = append(params, tok.Items[1])
|
|
tok.Items[0] = hbrt.MakeInt(tkQmark)
|
|
tok.Items[1] = hbrt.MakeString("?")
|
|
keyBuf.WriteByte('?')
|
|
case tkNum:
|
|
// Numeric literal → TK_QMARK + parse value. Integer form
|
|
// when possible (common for id columns), double otherwise.
|
|
s := tok.Items[1].AsString()
|
|
var val hbrt.Value
|
|
if i, err := strconv.ParseInt(s, 10, 64); err == nil {
|
|
val = hbrt.MakeNumInt(i)
|
|
} else if f, err := strconv.ParseFloat(s, 64); err == nil {
|
|
val = hbrt.MakeDoubleAuto(f)
|
|
} else {
|
|
val = hbrt.MakeString(s)
|
|
}
|
|
params = append(params, val)
|
|
tok.Items[0] = hbrt.MakeInt(tkQmark)
|
|
tok.Items[1] = hbrt.MakeString("?")
|
|
keyBuf.WriteByte('#')
|
|
default:
|
|
// Non-literal token — include type code + text so two
|
|
// different-but-same-shape queries distinguish properly
|
|
// (e.g., SELECT id vs SELECT name).
|
|
keyBuf.WriteByte(byte(ttype) + 0x20) // offset to printable
|
|
if ttype == 1 { // TK_NAME — include name text
|
|
keyBuf.WriteString(strings.ToUpper(tok.Items[1].AsString()))
|
|
keyBuf.WriteByte(' ')
|
|
}
|
|
}
|
|
}
|
|
|
|
result := hbrt.MakeArrayFrom([]hbrt.Value{
|
|
hbrt.MakeString(keyBuf.String()),
|
|
hbrt.MakeArrayFrom(params),
|
|
})
|
|
t.PushValue(result)
|
|
t.RetValue()
|
|
}
|
|
|
|
// SqlCoerceStr(x) → cString
|
|
// Converts any scalar to its canonical string form (NULL-safe).
|
|
func SqlCoerceStr(t *hbrt.Thread) {
|
|
t.Frame(1, 0)
|
|
defer t.EndProc()
|
|
v := t.Local(1)
|
|
t.RetString(sqlCoerceStr(v))
|
|
}
|
|
|
|
func sqlCoerceStr(v hbrt.Value) string {
|
|
switch {
|
|
case v.IsNil():
|
|
return ""
|
|
case v.IsString():
|
|
return v.AsString()
|
|
case v.IsNumeric():
|
|
if v.IsNumInt() {
|
|
return strconv.FormatInt(v.AsNumInt(), 10)
|
|
}
|
|
return strconv.FormatFloat(v.AsNumDouble(), 'g', -1, 64)
|
|
case v.IsLogical():
|
|
if v.AsBool() {
|
|
return "T"
|
|
}
|
|
return "F"
|
|
}
|
|
return ""
|
|
}
|
|
|
|
// SqlCoerceNum(x) → nNumber
|
|
// Converts any scalar to numeric (NULL → 0, bool → 1/0, string → Val).
|
|
func SqlCoerceNum(t *hbrt.Thread) {
|
|
t.Frame(1, 0)
|
|
defer t.EndProc()
|
|
v := t.Local(1)
|
|
switch {
|
|
case v.IsNil():
|
|
t.RetInt(0)
|
|
case v.IsNumeric():
|
|
t.RetVal(v)
|
|
case v.IsString():
|
|
t.RetVal(hbrt.MakeDoubleAuto(parseLeadingNumeric(v.AsString())))
|
|
case v.IsLogical():
|
|
if v.AsBool() {
|
|
t.RetInt(1)
|
|
} else {
|
|
t.RetInt(0)
|
|
}
|
|
default:
|
|
t.RetInt(0)
|
|
}
|
|
}
|
|
|
|
// SqlCoerceForCmp(x) → xNormalized
|
|
// Trim + upper-case strings; pass-through for other types. Used to
|
|
// make SQL equality/ordering case-insensitive on CHAR values.
|
|
func SqlCoerceForCmp(t *hbrt.Thread) {
|
|
t.Frame(1, 0)
|
|
defer t.EndProc()
|
|
v := t.Local(1)
|
|
if v.IsString() {
|
|
t.RetString(strings.ToUpper(strings.TrimSpace(v.AsString())))
|
|
return
|
|
}
|
|
t.RetVal(v)
|
|
}
|
|
|
|
// SqlIsTrue(x) → lBool
|
|
// SQL truthiness: NIL → false, empty string → false, 0 → false.
|
|
func SqlIsTrue(t *hbrt.Thread) {
|
|
t.Frame(1, 0)
|
|
defer t.EndProc()
|
|
t.RetBool(sqlIsTrue(t.Local(1)))
|
|
}
|
|
|
|
func sqlIsTrue(v hbrt.Value) bool {
|
|
switch {
|
|
case v.IsNil():
|
|
return false
|
|
case v.IsLogical():
|
|
return v.AsBool()
|
|
case v.IsNumeric():
|
|
if v.IsNumInt() {
|
|
return v.AsNumInt() != 0
|
|
}
|
|
return v.AsNumDouble() != 0 && !math.IsNaN(v.AsNumDouble())
|
|
case v.IsString():
|
|
return strings.TrimSpace(v.AsString()) != ""
|
|
}
|
|
return false
|
|
}
|
|
|
|
// SqlCmpEq(a, b) → lBool
|
|
// Case-insensitive equality with cross-type N↔C coercion.
|
|
func SqlCmpEq(t *hbrt.Thread) {
|
|
t.Frame(2, 0)
|
|
defer t.EndProc()
|
|
t.RetBool(sqlCmpEq(t.Local(1), t.Local(2)))
|
|
}
|
|
|
|
func sqlCmpEq(a, b hbrt.Value) bool {
|
|
aNil, bNil := a.IsNil(), b.IsNil()
|
|
if aNil || bNil {
|
|
return aNil && bNil
|
|
}
|
|
// Numeric: compare regardless of Int/Double distinction.
|
|
if a.IsNumeric() && b.IsNumeric() {
|
|
return a.AsNumDouble() == b.AsNumDouble()
|
|
}
|
|
if a.IsString() && b.IsString() {
|
|
return strings.EqualFold(
|
|
strings.TrimSpace(a.AsString()),
|
|
strings.TrimSpace(b.AsString()),
|
|
)
|
|
}
|
|
if a.IsLogical() && b.IsLogical() {
|
|
return a.AsBool() == b.AsBool()
|
|
}
|
|
if a.IsDate() && b.IsDate() {
|
|
return a.AsJulian() == b.AsJulian()
|
|
}
|
|
// Cross-type N / C coercion.
|
|
if a.IsNumeric() && b.IsString() {
|
|
return a.AsNumDouble() == parseLeadingNumeric(b.AsString())
|
|
}
|
|
if a.IsString() && b.IsNumeric() {
|
|
return parseLeadingNumeric(a.AsString()) == b.AsNumDouble()
|
|
}
|
|
return false
|
|
}
|
|
|
|
// SqlCmpLt(a, b) → lBool
|
|
// Case-insensitive less-than with cross-type N↔C coercion.
|
|
func SqlCmpLt(t *hbrt.Thread) {
|
|
t.Frame(2, 0)
|
|
defer t.EndProc()
|
|
t.RetBool(sqlCmpLt(t.Local(1), t.Local(2)))
|
|
}
|
|
|
|
func sqlCmpLt(a, b hbrt.Value) bool {
|
|
if a.IsNil() || b.IsNil() {
|
|
return false
|
|
}
|
|
if a.IsNumeric() && b.IsNumeric() {
|
|
return a.AsNumDouble() < b.AsNumDouble()
|
|
}
|
|
if a.IsString() && b.IsString() {
|
|
return strings.ToUpper(strings.TrimSpace(a.AsString())) <
|
|
strings.ToUpper(strings.TrimSpace(b.AsString()))
|
|
}
|
|
if a.IsDate() && b.IsDate() {
|
|
return a.AsJulian() < b.AsJulian()
|
|
}
|
|
if a.IsLogical() && b.IsLogical() {
|
|
return !a.AsBool() && b.AsBool()
|
|
}
|
|
if a.IsNumeric() && b.IsString() {
|
|
return a.AsNumDouble() < parseLeadingNumeric(b.AsString())
|
|
}
|
|
if a.IsString() && b.IsNumeric() {
|
|
return parseLeadingNumeric(a.AsString()) < b.AsNumDouble()
|
|
}
|
|
return false
|
|
}
|
|
|
|
// SqlIsAggName(cName) → lBool
|
|
// Go-native replacement for TSqlExpr.prg SqlIsAggName. The PRG version
|
|
// was `("," + c + ",") $ ("," + AGG_FUNCTIONS + ",")` — two string
|
|
// allocations + a substring scan per call. Profile showed this at
|
|
// 8.7% of B4 GROUP+HAVING CPU. Uses the aggFuncSet already declared
|
|
// in sqlexpr.go for SqlExprHasAgg.
|
|
func SqlIsAggName(t *hbrt.Thread) {
|
|
t.Frame(1, 0)
|
|
defer t.EndProc()
|
|
name := t.Local(1).AsString()
|
|
if name == "" {
|
|
t.RetBool(false)
|
|
return
|
|
}
|
|
// Upper-case without allocating unless needed.
|
|
upper := name
|
|
for i := 0; i < len(name); i++ {
|
|
c := name[i]
|
|
if c >= 'a' && c <= 'z' {
|
|
upper = strings.ToUpper(name)
|
|
break
|
|
}
|
|
}
|
|
_, ok := aggFuncSet[upper]
|
|
t.RetBool(ok)
|
|
}
|