Files
five/hbrtl/sqlhelpers.go
CharlesKWON c84cde6175 perf(fivesql2): Go-native SqlIsAggName — drop per-row substring scan
B4 GROUP+HAVING profile showed SqlIsAggName at ~9% of CPU —
SqlEvalFunc checks it for every function in every row, and the
PRG body was two string allocations + a substring scan:
  RETURN ("," + c + ",") $ ("," + AGG_FUNCTIONS + ",")

Replace with a hash lookup against the existing aggFuncSet map
in hbrtl/sqlexpr.go (already populated for SqlExprHasAgg, same
AGG_FUNCTIONS list). Upper-casing skips the allocation when the
input is already upper, which it almost always is in practice.

Bench deltas (median of 3 steady runs, 1000 iters):
  B4_GROUP_HAVING 447 → 418 us  -6.5%
  B14_COUNT       252 → 235 us  -7%
  B15_CTE_WIN_JOIN 1595 → 1577 us  -1%
Other benches unchanged (no aggregate calls per row).

FiveSql2 43/43, Harbour compat 56/56.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-18 13:40:19 +09:00

615 lines
14 KiB
Go

// Copyright (c) 2026 Charles KWON OhJun (charleskwonohjun@gmail.com)
// All rights reserved.
// FiveSql2 scalar helpers — Go replacements for the PRG functions in
// _FiveSql2/src/TSqlFunc.prg. These are invoked per-operator during
// expression evaluation (WHERE / HAVING / CASE); porting removes PRG
// VM frame overhead on the hot interpreter path. Semantics match the
// PRG source byte-for-byte.
package hbrtl
import (
"math"
"strconv"
"strings"
"five/hbrt"
)
// FiveSql2 lexer token type codes — must match FiveSqlDef.ch.
const (
tkEnd = 0
tkName = 1
tkText = 2
tkNum = 3
tkComma = 4
tkDot = 5
tkStar = 6
tkLPar = 7
tkRPar = 8
tkEq = 9
tkNEq = 10
tkLT = 11
tkGT = 12
tkLTE = 13
tkGTE = 14
tkQMark = 15
tkPlus = 16
tkMinus = 17
tkSlash = 18
tkPipes = 19
)
// makeTokValue wraps a (type, text) pair into the 2-element PRG array
// that TSqlParser2 consumes: { nTokenType, cTokenValue }.
func makeTokValue(ttype int, text string) hbrt.Value {
return hbrt.MakeArrayFrom([]hbrt.Value{
hbrt.MakeNumInt(int64(ttype)),
hbrt.MakeString(text),
})
}
// lexSQL is the Go port of TSqlLexer:Tokenize — byte-level FSM over the
// ASCII input string. Produces the same aTokens shape the PRG lexer did.
func lexSQL(s string) []hbrt.Value {
toks := make([]hbrt.Value, 0, 32)
n := len(s)
i := 0
for i < n {
c := s[i]
// Whitespace
if c == ' ' || c == '\t' || c == '\n' || c == '\r' {
i++
continue
}
// Line comment `-- ...`
if c == '-' && i+1 < n && s[i+1] == '-' {
i += 2
for i < n && s[i] != '\n' {
i++
}
continue
}
// Block comment `/* ... */`
if c == '/' && i+1 < n && s[i+1] == '*' {
i += 2
for i < n-1 {
if s[i] == '*' && s[i+1] == '/' {
i += 2
break
}
i++
}
continue
}
// String literal (single-quoted, '' escapes a quote)
if c == '\'' {
i++
start := i
var sb strings.Builder
inEscape := false
for i < n {
cc := s[i]
if cc == '\'' {
if i+1 < n && s[i+1] == '\'' {
if !inEscape {
sb.WriteString(s[start:i])
inEscape = true
} else {
sb.WriteByte('\'')
sb.WriteString(s[start:i])
}
sb.WriteByte('\'')
i += 2
start = i
} else {
break
}
} else {
i++
}
}
var val string
if inEscape {
sb.WriteString(s[start:i])
val = sb.String()
} else {
val = s[start:i]
}
if i < n {
i++ // skip closing quote
}
toks = append(toks, makeTokValue(tkText, val))
continue
}
// Numeric literal
if c >= '0' && c <= '9' {
start := i
for i < n && ((s[i] >= '0' && s[i] <= '9') || s[i] == '.') {
i++
}
toks = append(toks, makeTokValue(tkNum, s[start:i]))
continue
}
// Identifier / keyword
if isAlphaSQL(c) || c == '_' {
start := i
for i < n && (isAlphaSQL(s[i]) || (s[i] >= '0' && s[i] <= '9') || s[i] == '_') {
i++
}
toks = append(toks, makeTokValue(tkName, strings.ToUpper(s[start:i])))
continue
}
// Bracketed identifier `[col name]`
if c == '[' {
i++
start := i
for i < n && s[i] != ']' {
i++
}
name := strings.ToUpper(s[start:i])
if i < n {
i++ // skip ']'
}
toks = append(toks, makeTokValue(tkName, name))
continue
}
// Parameter placeholder
if c == '?' {
toks = append(toks, makeTokValue(tkQMark, "?"))
i++
continue
}
// Multi-char + single-char operators / punctuation
switch c {
case ',':
toks = append(toks, makeTokValue(tkComma, ","))
i++
case '.':
toks = append(toks, makeTokValue(tkDot, "."))
i++
case '*':
toks = append(toks, makeTokValue(tkStar, "*"))
i++
case '(':
toks = append(toks, makeTokValue(tkLPar, "("))
i++
case ')':
toks = append(toks, makeTokValue(tkRPar, ")"))
i++
case '+':
toks = append(toks, makeTokValue(tkPlus, "+"))
i++
case '-':
toks = append(toks, makeTokValue(tkMinus, "-"))
i++
case '/':
toks = append(toks, makeTokValue(tkSlash, "/"))
i++
case '|':
if i+1 < n && s[i+1] == '|' {
toks = append(toks, makeTokValue(tkPipes, "||"))
i += 2
} else {
i++
}
case '=':
toks = append(toks, makeTokValue(tkEq, "="))
i++
case '<':
if i+1 < n && s[i+1] == '=' {
toks = append(toks, makeTokValue(tkLTE, "<="))
i += 2
} else if i+1 < n && s[i+1] == '>' {
toks = append(toks, makeTokValue(tkNEq, "<>"))
i += 2
} else {
toks = append(toks, makeTokValue(tkLT, "<"))
i++
}
case '>':
if i+1 < n && s[i+1] == '=' {
toks = append(toks, makeTokValue(tkGTE, ">="))
i += 2
} else {
toks = append(toks, makeTokValue(tkGT, ">"))
i++
}
case '!':
if i+1 < n && s[i+1] == '=' {
toks = append(toks, makeTokValue(tkNEq, "!="))
i += 2
} else {
i++
}
case ';':
i++
default:
i++
}
}
toks = append(toks, makeTokValue(tkEnd, ""))
return toks
}
func isAlphaSQL(c byte) bool {
return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')
}
// SqlLexerTokenize(cSQL) → aTokens
// Direct Go port of TSqlLexer:Tokenize. Returns the same
// { { nType, cText }, ... } structure the PRG version produced.
func SqlLexerTokenize(t *hbrt.Thread) {
t.Frame(1, 0)
defer t.EndProc()
toks := lexSQL(t.Local(1).AsString())
t.PushValue(hbrt.MakeArrayFrom(toks))
t.RetValue()
}
// SqlLexAndExtractTemplate(cSQL) → { aTokens, cKey, aParams }
//
// Combined lex + template extraction — one Go call replaces three
// PRG-to-Go boundary crossings (lex, get tokens, extract). aTokens
// already has literal tokens replaced with TK_QMARK; aParams holds
// the extracted literal values in positional order; cKey is the
// plan cache key (digest of the normalized token-type sequence).
func SqlLexAndExtractTemplate(t *hbrt.Thread) {
t.Frame(1, 0)
defer t.EndProc()
src := t.Local(1).AsString()
toks := lexSQL(src)
params := make([]hbrt.Value, 0, 8)
var keyBuf strings.Builder
keyBuf.Grow(len(src))
for _, tv := range toks {
tok := tv.AsArray()
if tok == nil || len(tok.Items) < 2 {
continue
}
ttype := int(tok.Items[0].AsNumInt())
switch ttype {
case tkText:
params = append(params, tok.Items[1])
tok.Items[0] = hbrt.MakeNumInt(tkQMark)
tok.Items[1] = hbrt.MakeString("?")
keyBuf.WriteByte('?')
case tkNum:
s := tok.Items[1].AsString()
var val hbrt.Value
if i, err := strconv.ParseInt(s, 10, 64); err == nil {
val = hbrt.MakeNumInt(i)
} else if f, err := strconv.ParseFloat(s, 64); err == nil {
val = hbrt.MakeDoubleAuto(f)
} else {
val = hbrt.MakeString(s)
}
params = append(params, val)
tok.Items[0] = hbrt.MakeNumInt(tkQMark)
tok.Items[1] = hbrt.MakeString("?")
keyBuf.WriteByte('#')
default:
keyBuf.WriteByte(byte(ttype) + 0x20)
if ttype == tkName {
keyBuf.WriteString(tok.Items[1].AsString())
keyBuf.WriteByte(' ')
}
}
}
result := hbrt.MakeArrayFrom([]hbrt.Value{
hbrt.MakeArrayFrom(toks),
hbrt.MakeString(keyBuf.String()),
hbrt.MakeArrayFrom(params),
})
t.PushValue(result)
t.RetValue()
}
// SqlExtractTemplate(aTokens) → { cKey, aParams }
//
// Walks a FiveSql2 lexer token stream, replacing string (TK_TEXT=2)
// and numeric (TK_NUM=3) literals with the parameter placeholder
// token (TK_QMARK=15). Collected literal values are returned as
// aParams in their natural left-to-right order.
//
// Each token is a 2-element array {nTokenType, cTokenValue}. The
// mutation is in place so the caller can pass the resulting aTokens
// straight into TSqlParser2 — the parser then emits ND_PAR nodes
// that resolve against aParams at execution time.
//
// The template key is a compact digest of the non-literal token
// type sequence, used as the plan cache key for queries that share
// the same shape but differ only in literal values. Queries like:
//
// INSERT INTO t VALUES (1,'a')
// INSERT INTO t VALUES (2,'b')
//
// produce the SAME key once literals are collapsed to '?', letting
// the plan cache hit from the 2nd call onward.
//
// Returns a 2-element array: { cKey, aParams }.
func SqlExtractTemplate(t *hbrt.Thread) {
t.Frame(1, 0)
defer t.EndProc()
tokensVal := t.Local(1)
if !tokensVal.IsArray() {
empty := hbrt.MakeArrayFrom([]hbrt.Value{
hbrt.MakeString(""),
hbrt.MakeArrayFrom(nil),
})
t.PushValue(empty)
t.RetValue()
return
}
toks := tokensVal.AsArray().Items
params := make([]hbrt.Value, 0, 8)
// Template key — cheap digest of the token-type sequence.
var keyBuf strings.Builder
keyBuf.Grow(len(toks) * 2)
const (
tkText = 2
tkNum = 3
tkQmark = 15
)
for _, tokVal := range toks {
tok := tokVal.AsArray()
if tok == nil || len(tok.Items) < 2 {
continue
}
ttype := int(tok.Items[0].AsNumInt())
switch ttype {
case tkText:
// String literal → TK_QMARK + save raw string value.
params = append(params, tok.Items[1])
tok.Items[0] = hbrt.MakeInt(tkQmark)
tok.Items[1] = hbrt.MakeString("?")
keyBuf.WriteByte('?')
case tkNum:
// Numeric literal → TK_QMARK + parse value. Integer form
// when possible (common for id columns), double otherwise.
s := tok.Items[1].AsString()
var val hbrt.Value
if i, err := strconv.ParseInt(s, 10, 64); err == nil {
val = hbrt.MakeNumInt(i)
} else if f, err := strconv.ParseFloat(s, 64); err == nil {
val = hbrt.MakeDoubleAuto(f)
} else {
val = hbrt.MakeString(s)
}
params = append(params, val)
tok.Items[0] = hbrt.MakeInt(tkQmark)
tok.Items[1] = hbrt.MakeString("?")
keyBuf.WriteByte('#')
default:
// Non-literal token — include type code + text so two
// different-but-same-shape queries distinguish properly
// (e.g., SELECT id vs SELECT name).
keyBuf.WriteByte(byte(ttype) + 0x20) // offset to printable
if ttype == 1 { // TK_NAME — include name text
keyBuf.WriteString(strings.ToUpper(tok.Items[1].AsString()))
keyBuf.WriteByte(' ')
}
}
}
result := hbrt.MakeArrayFrom([]hbrt.Value{
hbrt.MakeString(keyBuf.String()),
hbrt.MakeArrayFrom(params),
})
t.PushValue(result)
t.RetValue()
}
// SqlCoerceStr(x) → cString
// Converts any scalar to its canonical string form (NULL-safe).
func SqlCoerceStr(t *hbrt.Thread) {
t.Frame(1, 0)
defer t.EndProc()
v := t.Local(1)
t.RetString(sqlCoerceStr(v))
}
func sqlCoerceStr(v hbrt.Value) string {
switch {
case v.IsNil():
return ""
case v.IsString():
return v.AsString()
case v.IsNumeric():
if v.IsNumInt() {
return strconv.FormatInt(v.AsNumInt(), 10)
}
return strconv.FormatFloat(v.AsNumDouble(), 'g', -1, 64)
case v.IsLogical():
if v.AsBool() {
return "T"
}
return "F"
}
return ""
}
// SqlCoerceNum(x) → nNumber
// Converts any scalar to numeric (NULL → 0, bool → 1/0, string → Val).
func SqlCoerceNum(t *hbrt.Thread) {
t.Frame(1, 0)
defer t.EndProc()
v := t.Local(1)
switch {
case v.IsNil():
t.RetInt(0)
case v.IsNumeric():
t.RetVal(v)
case v.IsString():
t.RetVal(hbrt.MakeDoubleAuto(parseLeadingNumeric(v.AsString())))
case v.IsLogical():
if v.AsBool() {
t.RetInt(1)
} else {
t.RetInt(0)
}
default:
t.RetInt(0)
}
}
// SqlCoerceForCmp(x) → xNormalized
// Trim + upper-case strings; pass-through for other types. Used to
// make SQL equality/ordering case-insensitive on CHAR values.
func SqlCoerceForCmp(t *hbrt.Thread) {
t.Frame(1, 0)
defer t.EndProc()
v := t.Local(1)
if v.IsString() {
t.RetString(strings.ToUpper(strings.TrimSpace(v.AsString())))
return
}
t.RetVal(v)
}
// SqlIsTrue(x) → lBool
// SQL truthiness: NIL → false, empty string → false, 0 → false.
func SqlIsTrue(t *hbrt.Thread) {
t.Frame(1, 0)
defer t.EndProc()
t.RetBool(sqlIsTrue(t.Local(1)))
}
func sqlIsTrue(v hbrt.Value) bool {
switch {
case v.IsNil():
return false
case v.IsLogical():
return v.AsBool()
case v.IsNumeric():
if v.IsNumInt() {
return v.AsNumInt() != 0
}
return v.AsNumDouble() != 0 && !math.IsNaN(v.AsNumDouble())
case v.IsString():
return strings.TrimSpace(v.AsString()) != ""
}
return false
}
// SqlCmpEq(a, b) → lBool
// Case-insensitive equality with cross-type N↔C coercion.
func SqlCmpEq(t *hbrt.Thread) {
t.Frame(2, 0)
defer t.EndProc()
t.RetBool(sqlCmpEq(t.Local(1), t.Local(2)))
}
func sqlCmpEq(a, b hbrt.Value) bool {
aNil, bNil := a.IsNil(), b.IsNil()
if aNil || bNil {
return aNil && bNil
}
// Numeric: compare regardless of Int/Double distinction.
if a.IsNumeric() && b.IsNumeric() {
return a.AsNumDouble() == b.AsNumDouble()
}
if a.IsString() && b.IsString() {
return strings.EqualFold(
strings.TrimSpace(a.AsString()),
strings.TrimSpace(b.AsString()),
)
}
if a.IsLogical() && b.IsLogical() {
return a.AsBool() == b.AsBool()
}
if a.IsDate() && b.IsDate() {
return a.AsJulian() == b.AsJulian()
}
// Cross-type N / C coercion.
if a.IsNumeric() && b.IsString() {
return a.AsNumDouble() == parseLeadingNumeric(b.AsString())
}
if a.IsString() && b.IsNumeric() {
return parseLeadingNumeric(a.AsString()) == b.AsNumDouble()
}
return false
}
// SqlCmpLt(a, b) → lBool
// Case-insensitive less-than with cross-type N↔C coercion.
func SqlCmpLt(t *hbrt.Thread) {
t.Frame(2, 0)
defer t.EndProc()
t.RetBool(sqlCmpLt(t.Local(1), t.Local(2)))
}
func sqlCmpLt(a, b hbrt.Value) bool {
if a.IsNil() || b.IsNil() {
return false
}
if a.IsNumeric() && b.IsNumeric() {
return a.AsNumDouble() < b.AsNumDouble()
}
if a.IsString() && b.IsString() {
return strings.ToUpper(strings.TrimSpace(a.AsString())) <
strings.ToUpper(strings.TrimSpace(b.AsString()))
}
if a.IsDate() && b.IsDate() {
return a.AsJulian() < b.AsJulian()
}
if a.IsLogical() && b.IsLogical() {
return !a.AsBool() && b.AsBool()
}
if a.IsNumeric() && b.IsString() {
return a.AsNumDouble() < parseLeadingNumeric(b.AsString())
}
if a.IsString() && b.IsNumeric() {
return parseLeadingNumeric(a.AsString()) < b.AsNumDouble()
}
return false
}
// SqlIsAggName(cName) → lBool
// Go-native replacement for TSqlExpr.prg SqlIsAggName. The PRG version
// was `("," + c + ",") $ ("," + AGG_FUNCTIONS + ",")` — two string
// allocations + a substring scan per call. Profile showed this at
// 8.7% of B4 GROUP+HAVING CPU. Uses the aggFuncSet already declared
// in sqlexpr.go for SqlExprHasAgg.
func SqlIsAggName(t *hbrt.Thread) {
t.Frame(1, 0)
defer t.EndProc()
name := t.Local(1).AsString()
if name == "" {
t.RetBool(false)
return
}
// Upper-case without allocating unless needed.
upper := name
for i := 0; i < len(name); i++ {
c := name[i]
if c >= 'a' && c <= 'z' {
upper = strings.ToUpper(name)
break
}
}
_, ok := aggFuncSet[upper]
t.RetBool(ok)
}