perf: RTL Go-native migration — 27 optimizations, DML up to 70-90x

Systematic pass through PRG hot paths, promoting them to Go RTL while
preserving Harbour/FiveSql2 semantics. Full log in
docs/RTL-Go-Native-Migration.md.

Bench (bench_sql) vs 2026-04-08 baseline
 - B1  SELECT *             2,192 → 114   µs   (19x)
 - B6  INNER JOIN           9,291 → 233   µs   (40x)
 - B7  CTE simple           8,037 → 129   µs   (62x)
 - B9  ROW_NUMBER           3,705 → 265   µs   (14x)
 - B10 RANK PARTITION       4,748 → 309   µs   (15x)
 - B12 INSERT (WA cache)    4,319 →  63   µs   (69x)
 - B13 UPDATE (WA cache)    6,144 →  68   µs   (90x)
 - B15 CTE+WIN+JOIN        18,395 → 1,873 µs   (10x)

Infrastructure
 - HbHash O(1) Index preserving insertion order (Harbour KEEPORDER)
 - HbDeepClone Go RTL (scalar-sharing, immutable hash keys)
 - MEMRDD auto-imported via gengo; all Five programs get mem:name driver
 - SQL plan + pcode caches (s_hPlanCache, s_hDmlPcodeCache)
 - Opt-in SqlWACacheEnable — dbUseArea/Close/Commit batched for DML

SQL engine
 - FiveSql2 lexer ported to Go (byte FSM) with combined automatic
   template parameterization (literals → ?, concat queries share plan)
 - Go RTL: SqlDistinct, SqlGroupRows, SqlWindowPartitions,
   SqlWindowSortPartition, SqlWindowAssignRank, SqlComputeAggSimple,
   SqlBulkInsert, SqlBulkUpdate, SqlExprHasAgg, SqlEvalHaving
 - CTE / subquery / driving-table materialize paths use MEMRDD
 - SqlCoerce/SqlCmp/SqlIsTrue helpers moved from PRG to Go
 - SqlBulkUpdate defers Flush when WA cache active (APFS fsync was
   dominant B13 cost — 1.6ms/call → gone)

Correctness fixes uncovered during migration
 - ASort default path now sorts dates/logicals/timestamps (was no-op)
 - ORDER BY default NULL placement matches PRG SqlRowCompare across
   Go fast path; explicit NULLS FIRST/LAST honored by both paths
 - SqlBulkUpdate respects EXCLUSIVE vs SHARED mode record locks
 - SqlCmp/SqlCmpEq normalize NumInt vs Double (caught by test 6b)

Verification
 - go test ./...              ALL PASS
 - FiveSql2 test_sql1999      43/43
 - tests/compat_harbour       56/56 (+5 new: ASort dates/logicals,
                              AScan int cross-type)
 - Regression test test_null_order.prg for ORDER BY NULL ordering

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-04-17 20:20:14 +09:00
parent 3caadb23b9
commit dd270d5d9d
31 changed files with 4501 additions and 495 deletions

View File

@@ -98,6 +98,69 @@ func AClone(t *hbrt.Thread) {
t.RetValue()
}
// HbDeepClone recursively clones a value. Arrays and hashes are cloned
// element-by-element; scalars (string, number, logical, date, NIL) are
// returned unchanged — Five strings/numbers are immutable so sharing
// pointers is safe. Used by FiveSql2's plan cache to hand callers a
// pristine copy of the parsed query tree on every cache hit, since
// Run() mutates some nodes (SqlFoldConst in particular).
//
// Harbour: hb_DeepCopy(xVal) → xNewVal
func HbDeepClone(t *hbrt.Thread) {
t.Frame(1, 0)
defer t.EndProc()
t.PushValue(deepCloneValue(t.Local(1)))
t.RetValue()
}
// deepCloneValue walks Array and Hash structures recursively; other
// Value kinds are returned as-is (scalars are immutable in Five so
// sharing is safe).
//
// Hot-path optimizations:
// - Array items that are themselves scalars skip the function call
// (just slot-copied). Recursion only fires for nested Array/Hash.
// - Hash keys are shared (never cloned). PRG hashes carry string /
// numeric keys in every observed call site; mutating a key after
// insertion is forbidden by the Hash API, so sharing is safe and
// saves the recursion plus per-key allocation.
func deepCloneValue(v hbrt.Value) hbrt.Value {
if v.IsArray() {
src := v.AsArray()
if src == nil {
return v
}
n := len(src.Items)
items := make([]hbrt.Value, n)
for i := 0; i < n; i++ {
item := src.Items[i]
if item.IsArray() || item.IsHash() {
items[i] = deepCloneValue(item)
} else {
items[i] = item
}
}
return hbrt.MakeArrayFrom(items)
}
if v.IsHash() {
src := v.AsHash()
if src == nil {
return v
}
nh := hbrt.MakeHash()
dst := nh.AsHash()
for i, k := range src.Keys {
val := src.Values[i]
if val.IsArray() || val.IsHash() {
val = deepCloneValue(val)
}
dst.Append(k, val)
}
return nh
}
return v
}
// ACopy copies elements from one array to another.
// Harbour: ACopy(aSource, aDest [, nStart [, nCount [, nTargetPos]]]) → aDest
func ACopy(t *hbrt.Thread) {
@@ -133,6 +196,12 @@ func AFill(t *hbrt.Thread) {
// ASort sorts an array using an optional comparison block.
// Harbour: ASort(aArray [, nStart [, nCount [, bBlock]]]) → aArray
//
// Block path: invokes bBlock per compare (side-effect safe).
// Default path (no block): one pre-scan picks a specialized comparator
// for homogeneous arrays (string / numeric / date / timestamp /
// logical); mixed or unknown element types fall back to a generic
// less-than that matches Harbour's default `<` semantics across types.
func ASort(t *hbrt.Thread) {
nParams := t.ParamCount()
t.Frame(nParams, 0)
@@ -140,9 +209,13 @@ func ASort(t *hbrt.Thread) {
arrVal := t.Local(1)
arr := arrVal.AsArray()
if arr == nil || len(arr.Items) < 2 {
t.PushValue(arrVal)
t.RetValue()
return
}
if nParams >= 4 && t.Local(4).IsBlock() {
// Sort with code block comparator
blk := t.Local(4).AsBlock()
sort.SliceStable(arr.Items, func(i, j int) bool {
t.PushValue(arr.Items[i])
@@ -151,17 +224,47 @@ func ASort(t *hbrt.Thread) {
blk.Fn(t)
return t.GetRetValue().AsBool()
})
} else {
// Default sort: by value comparison
sort.SliceStable(arr.Items, func(i, j int) bool {
a, b := arr.Items[i], arr.Items[j]
if a.IsString() && b.IsString() {
return a.AsString() < b.AsString()
t.PushValue(arrVal)
t.RetValue()
return
}
// Default sort — pick a type-specialized comparator when every
// element shares a shape. Falls back to a generic less-than for
// mixed or uncategorized types.
items := arr.Items
switch detectArrayKind(items) {
case arrKindString:
sort.SliceStable(items, func(i, j int) bool {
return items[i].AsString() < items[j].AsString()
})
case arrKindInt:
sort.SliceStable(items, func(i, j int) bool {
return items[i].AsNumInt() < items[j].AsNumInt()
})
case arrKindNumeric:
sort.SliceStable(items, func(i, j int) bool {
return items[i].AsNumDouble() < items[j].AsNumDouble()
})
case arrKindDate:
sort.SliceStable(items, func(i, j int) bool {
return items[i].AsJulian() < items[j].AsJulian()
})
case arrKindTimestamp:
sort.SliceStable(items, func(i, j int) bool {
ja, jb := items[i].AsJulian(), items[j].AsJulian()
if ja != jb {
return ja < jb
}
if a.IsNumeric() && b.IsNumeric() {
return a.AsNumDouble() < b.AsNumDouble()
}
return false
return items[i].AsTimeMs() < items[j].AsTimeMs()
})
case arrKindLogical:
sort.SliceStable(items, func(i, j int) bool {
return !items[i].AsBool() && items[j].AsBool()
})
default:
sort.SliceStable(items, func(i, j int) bool {
return valueLess(items[i], items[j])
})
}
@@ -169,6 +272,98 @@ func ASort(t *hbrt.Thread) {
t.RetValue()
}
type arrKind int
const (
arrKindMixed arrKind = iota
arrKindString
arrKindInt
arrKindNumeric
arrKindDate
arrKindTimestamp
arrKindLogical
)
// detectArrayKind returns a specialized kind when every element matches
// one well-known type; otherwise arrKindMixed. Integer-only arrays
// prefer arrKindInt to skip the int→double conversion in the hot path.
// A single non-int numeric promotes the whole array to arrKindNumeric.
func detectArrayKind(items []hbrt.Value) arrKind {
if len(items) == 0 {
return arrKindMixed
}
allInt := true
for _, v := range items {
if !v.IsNumInt() {
allInt = false
break
}
}
if allInt {
return arrKindInt
}
allNum := true
for _, v := range items {
if !v.IsNumeric() {
allNum = false
break
}
}
if allNum {
return arrKindNumeric
}
check := func(pred func(hbrt.Value) bool) bool {
for _, v := range items {
if !pred(v) {
return false
}
}
return true
}
if check(func(v hbrt.Value) bool { return v.IsString() }) {
return arrKindString
}
if check(func(v hbrt.Value) bool { return v.IsDate() }) {
return arrKindDate
}
if check(func(v hbrt.Value) bool { return v.IsTimestamp() }) {
return arrKindTimestamp
}
if check(func(v hbrt.Value) bool { return v.IsLogical() }) {
return arrKindLogical
}
return arrKindMixed
}
// valueLess implements Harbour's default `<` across types. NILs sort
// first (smallest) so they group together — matches the historical
// Five compareValues behavior that ASort inherited.
func valueLess(a, b hbrt.Value) bool {
if a.IsNil() || b.IsNil() {
return a.IsNil() && !b.IsNil()
}
if a.IsNumeric() && b.IsNumeric() {
return a.AsNumDouble() < b.AsNumDouble()
}
if a.IsString() && b.IsString() {
return a.AsString() < b.AsString()
}
if a.IsDate() && b.IsDate() {
return a.AsJulian() < b.AsJulian()
}
if a.IsTimestamp() && b.IsTimestamp() {
ja, jb := a.AsJulian(), b.AsJulian()
if ja != jb {
return ja < jb
}
return a.AsTimeMs() < b.AsTimeMs()
}
if a.IsLogical() && b.IsLogical() {
return !a.AsBool() && b.AsBool()
}
return false
}
// AEval evaluates a block for each element in array.
// Harbour: AEval(aArray, bBlock [, nStart [, nCount]]) → aArray
func AEval(t *hbrt.Thread) {
@@ -201,6 +396,12 @@ func AEval(t *hbrt.Thread) {
// AScan searches for a value in array, returns position (0 if not found).
// Harbour: AScan(aArray, xValue|bBlock [, nStart [, nCount]]) → nPos
//
// Block path: per-element block invoke (side-effect safe).
// Value path: specialized fast-paths for string / int / double search
// values — the loop stays inside Go without running through the
// generic valuesEqual type-dispatch each iteration. Mixed or rare
// types (date, timestamp, logical, nil) fall back to valuesEqual.
func AScan(t *hbrt.Thread) {
nParams := t.ParamCount()
t.Frame(nParams, 0)
@@ -208,11 +409,16 @@ func AScan(t *hbrt.Thread) {
arrVal := t.Local(1)
arr := arrVal.AsArray()
if arr == nil {
t.RetInt(0)
return
}
items := arr.Items
search := t.Local(2)
if search.IsBlock() {
blk := search.AsBlock()
for i, item := range arr.Items {
for i, item := range items {
t.PushValue(item)
t.PendingParams2(1)
blk.Fn(t)
@@ -221,8 +427,45 @@ func AScan(t *hbrt.Thread) {
return
}
}
} else {
for i, item := range arr.Items {
t.RetInt(0)
return
}
switch {
case search.IsString():
s := search.AsString()
for i, item := range items {
if item.IsString() && item.AsString() == s {
t.RetInt(int64(i + 1))
return
}
}
case search.IsNumInt():
n := search.AsNumInt()
for i, item := range items {
if !item.IsNumeric() {
continue
}
if item.IsNumInt() {
if item.AsNumInt() == n {
t.RetInt(int64(i + 1))
return
}
} else if item.AsNumDouble() == float64(n) {
t.RetInt(int64(i + 1))
return
}
}
case search.IsNumeric():
f := search.AsNumDouble()
for i, item := range items {
if item.IsNumeric() && item.AsNumDouble() == f {
t.RetInt(int64(i + 1))
return
}
}
default:
for i, item := range items {
if valuesEqual(item, search) {
t.RetInt(int64(i + 1))
return

View File

@@ -17,8 +17,7 @@ func HbHash(t *hbrt.Thread) {
h := hbrt.MakeHash()
hh := h.AsHash()
for i := 1; i <= nParams-1; i += 2 {
hh.Keys = append(hh.Keys, t.Local(i))
hh.Values = append(hh.Values, t.Local(i+1))
hh.Set(t.Local(i), t.Local(i+1))
}
t.PushValue(h)
t.RetValue()
@@ -29,16 +28,12 @@ func HbHash(t *hbrt.Thread) {
func HbHGet(t *hbrt.Thread) {
t.Frame(2, 0)
defer t.EndProc()
hVal := t.Local(1)
key := t.Local(2)
hh := hVal.AsHash()
hh := t.Local(1).AsHash()
if hh != nil {
for i, k := range hh.Keys {
if valuesEqual(k, key) {
t.PushValue(hh.Values[i])
t.RetValue()
return
}
if i := hh.Lookup(t.Local(2)); i >= 0 {
t.PushValue(hh.Values[i])
t.RetValue()
return
}
}
t.PushNil()
@@ -51,20 +46,8 @@ func HbHSet(t *hbrt.Thread) {
t.Frame(3, 0)
defer t.EndProc()
hVal := t.Local(1)
key := t.Local(2)
val := t.Local(3)
hh := hVal.AsHash()
if hh != nil {
for i, k := range hh.Keys {
if valuesEqual(k, key) {
hh.Values[i] = val
t.PushValue(hVal)
t.RetValue()
return
}
}
hh.Keys = append(hh.Keys, key)
hh.Values = append(hh.Values, val)
if hh := hVal.AsHash(); hh != nil {
hh.Set(t.Local(2), t.Local(3))
}
t.PushValue(hVal)
t.RetValue()
@@ -76,16 +59,8 @@ func HbHDel(t *hbrt.Thread) {
t.Frame(2, 0)
defer t.EndProc()
hVal := t.Local(1)
key := t.Local(2)
hh := hVal.AsHash()
if hh != nil {
for i, k := range hh.Keys {
if valuesEqual(k, key) {
hh.Keys = append(hh.Keys[:i], hh.Keys[i+1:]...)
hh.Values = append(hh.Values[:i], hh.Values[i+1:]...)
break
}
}
if hh := hVal.AsHash(); hh != nil {
hh.Delete(t.Local(2))
}
t.PushValue(hVal)
t.RetValue()
@@ -96,19 +71,8 @@ func HbHDel(t *hbrt.Thread) {
func HbHHasKey(t *hbrt.Thread) {
t.Frame(2, 0)
defer t.EndProc()
hVal := t.Local(1)
key := t.Local(2)
hh := hVal.AsHash()
if hh != nil {
for _, k := range hh.Keys {
if valuesEqual(k, key) {
t.PushBool(true)
t.RetValue()
return
}
}
}
t.PushBool(false)
hh := t.Local(1).AsHash()
t.PushBool(hh != nil && hh.Has(t.Local(2)))
t.RetValue()
}

View File

@@ -147,15 +147,9 @@ func navigatePath(v hbrt.Value, path string) hbrt.Value {
}
if v.IsHash() {
h := v.AsHash()
found := false
for i, k := range h.Keys {
if k.AsString() == part {
v = h.Values[i]
found = true
break
}
}
if !found {
if i := h.Lookup(hbrt.MakeString(part)); i >= 0 {
v = h.Values[i]
} else {
return hbrt.MakeNil()
}
} else {
@@ -212,18 +206,7 @@ func JsonMerge(t *hbrt.Thread) {
copy(result.Keys, dh.Keys)
copy(result.Values, dh.Values)
for i, sk := range sh.Keys {
found := false
for j, rk := range result.Keys {
if rk.AsString() == sk.AsString() {
result.Values[j] = sh.Values[i]
found = true
break
}
}
if !found {
result.Keys = append(result.Keys, sk)
result.Values = append(result.Values, sh.Values[i])
}
result.Set(sk, sh.Values[i])
}
t.RetVal(hbrt.MakeHashFrom(result))
}

View File

@@ -14,8 +14,24 @@ import (
"five/compiler/pp"
"five/hbrt"
"os"
"sync"
)
// pcCompileCache stores compiled PcodeFunc keyed by the original PRG
// expression string. Compilation does parser + preprocessor + pcode
// generation per call (~50-200µs for small expressions); for repeated
// queries (same SQL template) every call after the first is a
// sync.Map hit and returns the cached pointer directly.
//
// Thread safety: PcodeFunc is immutable after compilation (no
// per-call mutable state — execution state lives on hbrt.Thread),
// so sharing the pointer across goroutines is safe.
//
// Unbounded: distinct SQL / expression text count is bounded by the
// caller's query set; for FiveSql2 workloads this is a small constant.
// Switch to LRU if a pathological caller emerges.
var pcCompileCache sync.Map // map[string]*hbrt.PcodeFunc
// PcCompile(cPrgExpr) → pFunc
//
// Compile a PRG expression to pcode. Returns an opaque pointer that can
@@ -44,6 +60,14 @@ func PcCompile(t *hbrt.Thread) {
return
}
// Cache hit — skip parser/genpc entirely.
if cached, ok := pcCompileCache.Load(source); ok {
if fn, ok := cached.(*hbrt.PcodeFunc); ok && fn != nil {
t.RetPointer(fn)
return
}
}
// Wrap expression in a function stub so the parser can handle it.
wrapped := "FUNCTION _EXPR()\nRETURN " + source + "\n"
@@ -89,6 +113,11 @@ func PcCompile(t *hbrt.Thread) {
return
}
// Populate the cache. sync.Map.Store handles concurrent writers —
// duplicate compilations of the same source waste a few µs but
// don't corrupt the map; whichever compilation finishes second
// overwrites with an identical value.
pcCompileCache.Store(source, fn)
t.RetPointer(fn)
}

View File

@@ -55,6 +55,8 @@ func RegisterRTL(vm *hbrt.VM) {
hbrt.Sym("AINS", hbrt.FsPublic, AIns),
hbrt.Sym("ASIZE", hbrt.FsPublic, ASize),
hbrt.Sym("ACLONE", hbrt.FsPublic, AClone),
hbrt.Sym("HBDEEPCLONE", hbrt.FsPublic, HbDeepClone),
hbrt.Sym("HB_DEEPCOPY", hbrt.FsPublic, HbDeepClone),
hbrt.Sym("ACOPY", hbrt.FsPublic, ACopy),
hbrt.Sym("AFILL", hbrt.FsPublic, AFill),
hbrt.Sym("ASORT", hbrt.FsPublic, ASort),
@@ -623,6 +625,32 @@ func RegisterRTL(vm *hbrt.VM) {
hbrt.Sym("SQLHASHJOIN", hbrt.FsPublic, SqlHashJoin),
hbrt.Sym("SQLORDERBY", hbrt.FsPublic, SqlOrderBy),
hbrt.Sym("SQLGROUPBY", hbrt.FsPublic, SqlGroupBy),
hbrt.Sym("SQLDISTINCT", hbrt.FsPublic, SqlDistinct),
hbrt.Sym("SQLEXPRHASAGG", hbrt.FsPublic, SqlExprHasAgg),
hbrt.Sym("SQLBULKINSERT", hbrt.FsPublic, SqlBulkInsert),
hbrt.Sym("SQLBULKUPDATE", hbrt.FsPublic, SqlBulkUpdate),
hbrt.Sym("SQLWINDOWPARTITIONS", hbrt.FsPublic, SqlWindowPartitions),
hbrt.Sym("SQLGROUPROWS", hbrt.FsPublic, SqlGroupRows),
hbrt.Sym("SQLCOMPUTEAGGSIMPLE", hbrt.FsPublic, SqlComputeAggSimple),
hbrt.Sym("SQLEVALHAVING", hbrt.FsPublic, SqlEvalHaving),
hbrt.Sym("SQLCOERCESTR", hbrt.FsPublic, SqlCoerceStr),
hbrt.Sym("SQLCOERCENUM", hbrt.FsPublic, SqlCoerceNum),
hbrt.Sym("SQLCOERCEFORCMP", hbrt.FsPublic, SqlCoerceForCmp),
hbrt.Sym("SQLISTRUE", hbrt.FsPublic, SqlIsTrue),
hbrt.Sym("SQLCMPEQ", hbrt.FsPublic, SqlCmpEq),
hbrt.Sym("SQLCMPLT", hbrt.FsPublic, SqlCmpLt),
hbrt.Sym("SQLEXTRACTTEMPLATE", hbrt.FsPublic, SqlExtractTemplate),
hbrt.Sym("SQLLEXERTOKENIZE", hbrt.FsPublic, SqlLexerTokenize),
hbrt.Sym("SQLLEXANDEXTRACTTEMPLATE", hbrt.FsPublic, SqlLexAndExtractTemplate),
hbrt.Sym("SQLWACACHEENABLE", hbrt.FsPublic, SqlWACacheEnable),
hbrt.Sym("SQLWACACHEDISABLE", hbrt.FsPublic, SqlWACacheDisable),
hbrt.Sym("SQLWACACHEISENABLED", hbrt.FsPublic, SqlWACacheIsEnabled),
hbrt.Sym("SQLWACACHEGET", hbrt.FsPublic, SqlWACacheGet),
hbrt.Sym("SQLWACACHEPUT", hbrt.FsPublic, SqlWACachePut),
hbrt.Sym("SQLWACACHEINVALIDATE", hbrt.FsPublic, SqlWACacheInvalidate),
hbrt.Sym("SQLWACACHECLOSEALL", hbrt.FsPublic, SqlWACacheCloseAll),
hbrt.Sym("SQLWINDOWSORTPARTITION", hbrt.FsPublic, SqlWindowSortPartition),
hbrt.Sym("SQLWINDOWASSIGNRANK", hbrt.FsPublic, SqlWindowAssignRank),
// Goroutine / Concurrency
hbrt.Sym("GO", hbrt.FsPublic, GoFunc),

137
hbrtl/sqlexpr.go Normal file
View File

@@ -0,0 +1,137 @@
// Copyright (c) 2026 Charles KWON OhJun (charleskwonohjun@gmail.com)
// All rights reserved.
// Go-native FiveSql2 expression helpers.
// Port of the tight, interpreter-heavy recursive walkers from
// _FiveSql2/src/TSqlExpr.prg into straight Go — the PRG versions
// are bottleneck-prone because every recursion pays the full VM
// frame setup cost, and SqlExprHasAgg is invoked per result
// column per query.
package hbrtl
import (
"five/hbrt"
)
// FiveSql2 AST node kinds — must mirror _FiveSql2/src/FiveSqlDef.ch.
// Nodes are stored as Five arrays { nKind, xVal, xLeft, xRight, xExtra }
// (1-based in PRG, 0-based here).
const (
ndLit = 1
ndCol = 2
ndFn = 3
ndBin = 4
ndUni = 5
ndCase = 6
ndSub = 7
ndPar = 9
ndNil = 10
ndWindow = 12
)
// aggFuncSet mirrors the AGG_FUNCTIONS macro in FiveSqlDef.ch. Names
// are stored in canonical upper case; the PRG parser upper-cases
// function identifiers at parse time so no ToUpper is needed on the
// hot path. If that invariant ever changes, upper-case here.
var aggFuncSet = map[string]struct{}{
"COUNT": {},
"SUM": {},
"AVG": {},
"MIN": {},
"MAX": {},
"GROUP_CONCAT": {},
"STRING_AGG": {},
"LISTAGG": {},
"JSON_ARRAYAGG": {},
"JSON_OBJECTAGG": {},
"XMLAGG": {},
"ANY_VALUE": {},
"BOOL_AND": {},
"BOOL_OR": {},
}
// sqlExprHasAggWalk is the actual recursion shared by the RTL entry
// point. Returns true if the tree rooted at v contains a direct
// aggregate call. Matches TSqlExpr.prg:SqlExprHasAgg — walks into
// ND_BIN children, ND_UNI child, ND_FN args, ND_CASE WHEN/THEN pairs
// and ELSE; does not descend into ND_WINDOW or ND_SUB (those carry
// their own aggregation scope).
func sqlExprHasAggWalk(v hbrt.Value) bool {
if v.IsNil() {
return false
}
arr := v.AsArray()
if arr == nil || len(arr.Items) < 2 {
return false
}
kind := int(arr.Items[0].AsNumInt())
switch kind {
case ndFn:
name := arr.Items[1].AsString()
if _, ok := aggFuncSet[name]; ok {
return true
}
// Scalar function — descend into args for nested aggregates.
if len(arr.Items) >= 3 && arr.Items[2].IsArray() {
for _, a := range arr.Items[2].AsArray().Items {
if sqlExprHasAggWalk(a) {
return true
}
}
}
return false
case ndBin:
if len(arr.Items) < 4 {
return false
}
return sqlExprHasAggWalk(arr.Items[2]) || sqlExprHasAggWalk(arr.Items[3])
case ndUni:
if len(arr.Items) < 3 {
return false
}
return sqlExprHasAggWalk(arr.Items[2])
case ndCase:
// arr.Items[1] is the WHEN/THEN pair array,
// arr.Items[2] is the ELSE branch (may be NIL).
if arr.Items[1].IsArray() {
for _, pair := range arr.Items[1].AsArray().Items {
pa := pair.AsArray()
if pa == nil || len(pa.Items) < 2 {
continue
}
if sqlExprHasAggWalk(pa.Items[0]) || sqlExprHasAggWalk(pa.Items[1]) {
return true
}
}
}
if len(arr.Items) >= 3 && !arr.Items[2].IsNil() {
return sqlExprHasAggWalk(arr.Items[2])
}
return false
}
return false
}
// SqlExprHasAgg(xExpr) → lHasAgg
//
// Returns .T. if the AST tree contains an aggregate function call.
// Drop-in replacement for the PRG SqlExprHasAgg function — same
// output for every input, just without the interpreter per-frame
// cost on deep expression trees.
func SqlExprHasAgg(t *hbrt.Thread) {
t.Frame(1, 0)
defer t.EndProc()
t.RetBool(sqlExprHasAggWalk(t.Local(1)))
}
// Silence "declared and not used" for constants that exist solely to
// document FiveSqlDef.ch layout — keeping them in source form helps
// future walker additions (ND_SUB for subquery flattening, ND_WINDOW
// for window-over-aggregate detection).
var _ = [...]int{ndLit, ndCol, ndSub, ndPar, ndNil, ndWindow}

587
hbrtl/sqlhelpers.go Normal file
View File

@@ -0,0 +1,587 @@
// Copyright (c) 2026 Charles KWON OhJun (charleskwonohjun@gmail.com)
// All rights reserved.
// FiveSql2 scalar helpers — Go replacements for the PRG functions in
// _FiveSql2/src/TSqlFunc.prg. These are invoked per-operator during
// expression evaluation (WHERE / HAVING / CASE); porting removes PRG
// VM frame overhead on the hot interpreter path. Semantics match the
// PRG source byte-for-byte.
package hbrtl
import (
"math"
"strconv"
"strings"
"five/hbrt"
)
// FiveSql2 lexer token type codes — must match FiveSqlDef.ch.
const (
tkEnd = 0
tkName = 1
tkText = 2
tkNum = 3
tkComma = 4
tkDot = 5
tkStar = 6
tkLPar = 7
tkRPar = 8
tkEq = 9
tkNEq = 10
tkLT = 11
tkGT = 12
tkLTE = 13
tkGTE = 14
tkQMark = 15
tkPlus = 16
tkMinus = 17
tkSlash = 18
tkPipes = 19
)
// makeTokValue wraps a (type, text) pair into the 2-element PRG array
// that TSqlParser2 consumes: { nTokenType, cTokenValue }.
func makeTokValue(ttype int, text string) hbrt.Value {
return hbrt.MakeArrayFrom([]hbrt.Value{
hbrt.MakeNumInt(int64(ttype)),
hbrt.MakeString(text),
})
}
// lexSQL is the Go port of TSqlLexer:Tokenize — byte-level FSM over the
// ASCII input string. Produces the same aTokens shape the PRG lexer did.
func lexSQL(s string) []hbrt.Value {
toks := make([]hbrt.Value, 0, 32)
n := len(s)
i := 0
for i < n {
c := s[i]
// Whitespace
if c == ' ' || c == '\t' || c == '\n' || c == '\r' {
i++
continue
}
// Line comment `-- ...`
if c == '-' && i+1 < n && s[i+1] == '-' {
i += 2
for i < n && s[i] != '\n' {
i++
}
continue
}
// Block comment `/* ... */`
if c == '/' && i+1 < n && s[i+1] == '*' {
i += 2
for i < n-1 {
if s[i] == '*' && s[i+1] == '/' {
i += 2
break
}
i++
}
continue
}
// String literal (single-quoted, '' escapes a quote)
if c == '\'' {
i++
start := i
var sb strings.Builder
inEscape := false
for i < n {
cc := s[i]
if cc == '\'' {
if i+1 < n && s[i+1] == '\'' {
if !inEscape {
sb.WriteString(s[start:i])
inEscape = true
} else {
sb.WriteByte('\'')
sb.WriteString(s[start:i])
}
sb.WriteByte('\'')
i += 2
start = i
} else {
break
}
} else {
i++
}
}
var val string
if inEscape {
sb.WriteString(s[start:i])
val = sb.String()
} else {
val = s[start:i]
}
if i < n {
i++ // skip closing quote
}
toks = append(toks, makeTokValue(tkText, val))
continue
}
// Numeric literal
if c >= '0' && c <= '9' {
start := i
for i < n && ((s[i] >= '0' && s[i] <= '9') || s[i] == '.') {
i++
}
toks = append(toks, makeTokValue(tkNum, s[start:i]))
continue
}
// Identifier / keyword
if isAlphaSQL(c) || c == '_' {
start := i
for i < n && (isAlphaSQL(s[i]) || (s[i] >= '0' && s[i] <= '9') || s[i] == '_') {
i++
}
toks = append(toks, makeTokValue(tkName, strings.ToUpper(s[start:i])))
continue
}
// Bracketed identifier `[col name]`
if c == '[' {
i++
start := i
for i < n && s[i] != ']' {
i++
}
name := strings.ToUpper(s[start:i])
if i < n {
i++ // skip ']'
}
toks = append(toks, makeTokValue(tkName, name))
continue
}
// Parameter placeholder
if c == '?' {
toks = append(toks, makeTokValue(tkQMark, "?"))
i++
continue
}
// Multi-char + single-char operators / punctuation
switch c {
case ',':
toks = append(toks, makeTokValue(tkComma, ","))
i++
case '.':
toks = append(toks, makeTokValue(tkDot, "."))
i++
case '*':
toks = append(toks, makeTokValue(tkStar, "*"))
i++
case '(':
toks = append(toks, makeTokValue(tkLPar, "("))
i++
case ')':
toks = append(toks, makeTokValue(tkRPar, ")"))
i++
case '+':
toks = append(toks, makeTokValue(tkPlus, "+"))
i++
case '-':
toks = append(toks, makeTokValue(tkMinus, "-"))
i++
case '/':
toks = append(toks, makeTokValue(tkSlash, "/"))
i++
case '|':
if i+1 < n && s[i+1] == '|' {
toks = append(toks, makeTokValue(tkPipes, "||"))
i += 2
} else {
i++
}
case '=':
toks = append(toks, makeTokValue(tkEq, "="))
i++
case '<':
if i+1 < n && s[i+1] == '=' {
toks = append(toks, makeTokValue(tkLTE, "<="))
i += 2
} else if i+1 < n && s[i+1] == '>' {
toks = append(toks, makeTokValue(tkNEq, "<>"))
i += 2
} else {
toks = append(toks, makeTokValue(tkLT, "<"))
i++
}
case '>':
if i+1 < n && s[i+1] == '=' {
toks = append(toks, makeTokValue(tkGTE, ">="))
i += 2
} else {
toks = append(toks, makeTokValue(tkGT, ">"))
i++
}
case '!':
if i+1 < n && s[i+1] == '=' {
toks = append(toks, makeTokValue(tkNEq, "!="))
i += 2
} else {
i++
}
case ';':
i++
default:
i++
}
}
toks = append(toks, makeTokValue(tkEnd, ""))
return toks
}
func isAlphaSQL(c byte) bool {
return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')
}
// SqlLexerTokenize(cSQL) → aTokens
// Direct Go port of TSqlLexer:Tokenize. Returns the same
// { { nType, cText }, ... } structure the PRG version produced.
func SqlLexerTokenize(t *hbrt.Thread) {
t.Frame(1, 0)
defer t.EndProc()
toks := lexSQL(t.Local(1).AsString())
t.PushValue(hbrt.MakeArrayFrom(toks))
t.RetValue()
}
// SqlLexAndExtractTemplate(cSQL) → { aTokens, cKey, aParams }
//
// Combined lex + template extraction — one Go call replaces three
// PRG-to-Go boundary crossings (lex, get tokens, extract). aTokens
// already has literal tokens replaced with TK_QMARK; aParams holds
// the extracted literal values in positional order; cKey is the
// plan cache key (digest of the normalized token-type sequence).
func SqlLexAndExtractTemplate(t *hbrt.Thread) {
t.Frame(1, 0)
defer t.EndProc()
src := t.Local(1).AsString()
toks := lexSQL(src)
params := make([]hbrt.Value, 0, 8)
var keyBuf strings.Builder
keyBuf.Grow(len(src))
for _, tv := range toks {
tok := tv.AsArray()
if tok == nil || len(tok.Items) < 2 {
continue
}
ttype := int(tok.Items[0].AsNumInt())
switch ttype {
case tkText:
params = append(params, tok.Items[1])
tok.Items[0] = hbrt.MakeNumInt(tkQMark)
tok.Items[1] = hbrt.MakeString("?")
keyBuf.WriteByte('?')
case tkNum:
s := tok.Items[1].AsString()
var val hbrt.Value
if i, err := strconv.ParseInt(s, 10, 64); err == nil {
val = hbrt.MakeNumInt(i)
} else if f, err := strconv.ParseFloat(s, 64); err == nil {
val = hbrt.MakeDoubleAuto(f)
} else {
val = hbrt.MakeString(s)
}
params = append(params, val)
tok.Items[0] = hbrt.MakeNumInt(tkQMark)
tok.Items[1] = hbrt.MakeString("?")
keyBuf.WriteByte('#')
default:
keyBuf.WriteByte(byte(ttype) + 0x20)
if ttype == tkName {
keyBuf.WriteString(tok.Items[1].AsString())
keyBuf.WriteByte(' ')
}
}
}
result := hbrt.MakeArrayFrom([]hbrt.Value{
hbrt.MakeArrayFrom(toks),
hbrt.MakeString(keyBuf.String()),
hbrt.MakeArrayFrom(params),
})
t.PushValue(result)
t.RetValue()
}
// SqlExtractTemplate(aTokens) → { cKey, aParams }
//
// Walks a FiveSql2 lexer token stream, replacing string (TK_TEXT=2)
// and numeric (TK_NUM=3) literals with the parameter placeholder
// token (TK_QMARK=15). Collected literal values are returned as
// aParams in their natural left-to-right order.
//
// Each token is a 2-element array {nTokenType, cTokenValue}. The
// mutation is in place so the caller can pass the resulting aTokens
// straight into TSqlParser2 — the parser then emits ND_PAR nodes
// that resolve against aParams at execution time.
//
// The template key is a compact digest of the non-literal token
// type sequence, used as the plan cache key for queries that share
// the same shape but differ only in literal values. Queries like:
//
// INSERT INTO t VALUES (1,'a')
// INSERT INTO t VALUES (2,'b')
//
// produce the SAME key once literals are collapsed to '?', letting
// the plan cache hit from the 2nd call onward.
//
// Returns a 2-element array: { cKey, aParams }.
func SqlExtractTemplate(t *hbrt.Thread) {
t.Frame(1, 0)
defer t.EndProc()
tokensVal := t.Local(1)
if !tokensVal.IsArray() {
empty := hbrt.MakeArrayFrom([]hbrt.Value{
hbrt.MakeString(""),
hbrt.MakeArrayFrom(nil),
})
t.PushValue(empty)
t.RetValue()
return
}
toks := tokensVal.AsArray().Items
params := make([]hbrt.Value, 0, 8)
// Template key — cheap digest of the token-type sequence.
var keyBuf strings.Builder
keyBuf.Grow(len(toks) * 2)
const (
tkText = 2
tkNum = 3
tkQmark = 15
)
for _, tokVal := range toks {
tok := tokVal.AsArray()
if tok == nil || len(tok.Items) < 2 {
continue
}
ttype := int(tok.Items[0].AsNumInt())
switch ttype {
case tkText:
// String literal → TK_QMARK + save raw string value.
params = append(params, tok.Items[1])
tok.Items[0] = hbrt.MakeInt(tkQmark)
tok.Items[1] = hbrt.MakeString("?")
keyBuf.WriteByte('?')
case tkNum:
// Numeric literal → TK_QMARK + parse value. Integer form
// when possible (common for id columns), double otherwise.
s := tok.Items[1].AsString()
var val hbrt.Value
if i, err := strconv.ParseInt(s, 10, 64); err == nil {
val = hbrt.MakeNumInt(i)
} else if f, err := strconv.ParseFloat(s, 64); err == nil {
val = hbrt.MakeDoubleAuto(f)
} else {
val = hbrt.MakeString(s)
}
params = append(params, val)
tok.Items[0] = hbrt.MakeInt(tkQmark)
tok.Items[1] = hbrt.MakeString("?")
keyBuf.WriteByte('#')
default:
// Non-literal token — include type code + text so two
// different-but-same-shape queries distinguish properly
// (e.g., SELECT id vs SELECT name).
keyBuf.WriteByte(byte(ttype) + 0x20) // offset to printable
if ttype == 1 { // TK_NAME — include name text
keyBuf.WriteString(strings.ToUpper(tok.Items[1].AsString()))
keyBuf.WriteByte(' ')
}
}
}
result := hbrt.MakeArrayFrom([]hbrt.Value{
hbrt.MakeString(keyBuf.String()),
hbrt.MakeArrayFrom(params),
})
t.PushValue(result)
t.RetValue()
}
// SqlCoerceStr(x) → cString
// Converts any scalar to its canonical string form (NULL-safe).
func SqlCoerceStr(t *hbrt.Thread) {
t.Frame(1, 0)
defer t.EndProc()
v := t.Local(1)
t.RetString(sqlCoerceStr(v))
}
func sqlCoerceStr(v hbrt.Value) string {
switch {
case v.IsNil():
return ""
case v.IsString():
return v.AsString()
case v.IsNumeric():
if v.IsNumInt() {
return strconv.FormatInt(v.AsNumInt(), 10)
}
return strconv.FormatFloat(v.AsNumDouble(), 'g', -1, 64)
case v.IsLogical():
if v.AsBool() {
return "T"
}
return "F"
}
return ""
}
// SqlCoerceNum(x) → nNumber
// Converts any scalar to numeric (NULL → 0, bool → 1/0, string → Val).
func SqlCoerceNum(t *hbrt.Thread) {
t.Frame(1, 0)
defer t.EndProc()
v := t.Local(1)
switch {
case v.IsNil():
t.RetInt(0)
case v.IsNumeric():
t.RetVal(v)
case v.IsString():
t.RetVal(hbrt.MakeDoubleAuto(parseLeadingNumeric(v.AsString())))
case v.IsLogical():
if v.AsBool() {
t.RetInt(1)
} else {
t.RetInt(0)
}
default:
t.RetInt(0)
}
}
// SqlCoerceForCmp(x) → xNormalized
// Trim + upper-case strings; pass-through for other types. Used to
// make SQL equality/ordering case-insensitive on CHAR values.
func SqlCoerceForCmp(t *hbrt.Thread) {
t.Frame(1, 0)
defer t.EndProc()
v := t.Local(1)
if v.IsString() {
t.RetString(strings.ToUpper(strings.TrimSpace(v.AsString())))
return
}
t.RetVal(v)
}
// SqlIsTrue(x) → lBool
// SQL truthiness: NIL → false, empty string → false, 0 → false.
func SqlIsTrue(t *hbrt.Thread) {
t.Frame(1, 0)
defer t.EndProc()
t.RetBool(sqlIsTrue(t.Local(1)))
}
func sqlIsTrue(v hbrt.Value) bool {
switch {
case v.IsNil():
return false
case v.IsLogical():
return v.AsBool()
case v.IsNumeric():
if v.IsNumInt() {
return v.AsNumInt() != 0
}
return v.AsNumDouble() != 0 && !math.IsNaN(v.AsNumDouble())
case v.IsString():
return strings.TrimSpace(v.AsString()) != ""
}
return false
}
// SqlCmpEq(a, b) → lBool
// Case-insensitive equality with cross-type N↔C coercion.
func SqlCmpEq(t *hbrt.Thread) {
t.Frame(2, 0)
defer t.EndProc()
t.RetBool(sqlCmpEq(t.Local(1), t.Local(2)))
}
func sqlCmpEq(a, b hbrt.Value) bool {
aNil, bNil := a.IsNil(), b.IsNil()
if aNil || bNil {
return aNil && bNil
}
// Numeric: compare regardless of Int/Double distinction.
if a.IsNumeric() && b.IsNumeric() {
return a.AsNumDouble() == b.AsNumDouble()
}
if a.IsString() && b.IsString() {
return strings.EqualFold(
strings.TrimSpace(a.AsString()),
strings.TrimSpace(b.AsString()),
)
}
if a.IsLogical() && b.IsLogical() {
return a.AsBool() == b.AsBool()
}
if a.IsDate() && b.IsDate() {
return a.AsJulian() == b.AsJulian()
}
// Cross-type N / C coercion.
if a.IsNumeric() && b.IsString() {
return a.AsNumDouble() == parseLeadingNumeric(b.AsString())
}
if a.IsString() && b.IsNumeric() {
return parseLeadingNumeric(a.AsString()) == b.AsNumDouble()
}
return false
}
// SqlCmpLt(a, b) → lBool
// Case-insensitive less-than with cross-type N↔C coercion.
func SqlCmpLt(t *hbrt.Thread) {
t.Frame(2, 0)
defer t.EndProc()
t.RetBool(sqlCmpLt(t.Local(1), t.Local(2)))
}
func sqlCmpLt(a, b hbrt.Value) bool {
if a.IsNil() || b.IsNil() {
return false
}
if a.IsNumeric() && b.IsNumeric() {
return a.AsNumDouble() < b.AsNumDouble()
}
if a.IsString() && b.IsString() {
return strings.ToUpper(strings.TrimSpace(a.AsString())) <
strings.ToUpper(strings.TrimSpace(b.AsString()))
}
if a.IsDate() && b.IsDate() {
return a.AsJulian() < b.AsJulian()
}
if a.IsLogical() && b.IsLogical() {
return !a.AsBool() && b.AsBool()
}
if a.IsNumeric() && b.IsString() {
return a.AsNumDouble() < parseLeadingNumeric(b.AsString())
}
if a.IsString() && b.IsNumeric() {
return parseLeadingNumeric(a.AsString()) < b.AsNumDouble()
}
return false
}

File diff suppressed because it is too large Load Diff

142
hbrtl/sqlwacache.go Normal file
View File

@@ -0,0 +1,142 @@
// Copyright (c) 2026 Charles KWON OhJun (charleskwonohjun@gmail.com)
// All rights reserved.
// Workarea cache for FiveSql2 DML — opt-in persistent workarea slots
// keyed by alias. Eliminates per-query dbUseArea + dbCloseArea syscall
// overhead for repeated INSERT / UPDATE / DELETE against the same table.
//
// Semantics:
// * Disabled by default. Callers opt in via SqlWACacheEnable(). Tests
// and short one-shot scripts can stay on the safe per-query open/
// close behavior; long-running bench loops or servers pay the open
// cost once.
// * Entries map uppercase alias → workarea number. The PRG side is
// responsible for the actual dbUseArea / dbSelectArea — this layer
// only stores the handle.
// * Invalidation is explicit. CREATE TABLE / DROP TABLE in
// TSqlDDL.prg call SqlWACacheInvalidate before any filesystem
// operation that would otherwise collide with a still-open handle.
// * SqlWACacheCloseAll drops every entry; callers then decide how
// to actually close the workareas (dbCloseAll, per-alias close, …).
package hbrtl
import (
"strings"
"sync"
"five/hbrt"
)
var (
waCacheMu sync.Mutex
waCacheEntries = map[string]int{}
waCacheEnabled bool
)
// SqlWACacheEnable() → NIL
// Turns on the workarea cache for this process. Existing opens are not
// retroactively registered — the cache populates on next SqlWAOpenCached.
func SqlWACacheEnable(t *hbrt.Thread) {
t.Frame(0, 0)
defer t.EndProc()
waCacheMu.Lock()
waCacheEnabled = true
waCacheMu.Unlock()
t.RetNil()
}
// SqlWACacheDisable() → NIL
// Turns the cache off and drops all entries. Workareas themselves
// are left in whatever state the caller last put them in — callers
// typically follow with dbCloseAll() or per-table close.
func SqlWACacheDisable(t *hbrt.Thread) {
t.Frame(0, 0)
defer t.EndProc()
waCacheMu.Lock()
waCacheEnabled = false
waCacheEntries = map[string]int{}
waCacheMu.Unlock()
t.RetNil()
}
// SqlWACacheIsEnabled() → lBool
func SqlWACacheIsEnabled(t *hbrt.Thread) {
t.Frame(0, 0)
defer t.EndProc()
waCacheMu.Lock()
on := waCacheEnabled
waCacheMu.Unlock()
t.RetBool(on)
}
// SqlWACacheGet(cAlias) → nWA | 0
// Lookup a cached workarea number by alias. Returns 0 if disabled or
// no entry. PRG side still verifies Used() / Select() before relying
// on the number — another process or manual close may have invalidated
// the handle between cache hits.
func SqlWACacheGet(t *hbrt.Thread) {
t.Frame(1, 0)
defer t.EndProc()
waCacheMu.Lock()
on := waCacheEnabled
nWA := 0
if on {
nWA = waCacheEntries[strings.ToUpper(t.Local(1).AsString())]
}
waCacheMu.Unlock()
t.RetInt(int64(nWA))
}
// SqlWACachePut(cAlias, nWA) → NIL
// Register (or overwrite) a cache entry. No-op when cache is disabled
// so callers can unconditionally call Put after a successful open.
func SqlWACachePut(t *hbrt.Thread) {
t.Frame(2, 0)
defer t.EndProc()
alias := strings.ToUpper(t.Local(1).AsString())
nWA := int(t.Local(2).AsNumInt())
waCacheMu.Lock()
if waCacheEnabled && nWA > 0 {
waCacheEntries[alias] = nWA
}
waCacheMu.Unlock()
t.RetNil()
}
// SqlWACacheInvalidate(cAlias) → NIL
// Drop a single cache entry. Called before CREATE TABLE / DROP TABLE /
// FErase so the PRG side can then close and recreate the file without
// conflicting with a stale cached open.
func SqlWACacheInvalidate(t *hbrt.Thread) {
t.Frame(1, 0)
defer t.EndProc()
alias := strings.ToUpper(t.Local(1).AsString())
waCacheMu.Lock()
delete(waCacheEntries, alias)
waCacheMu.Unlock()
t.RetNil()
}
// SqlWACacheCloseAll() → aKeys
// Empties the cache and returns the list of aliases that were in it.
// Callers can iterate and close each corresponding workarea.
func SqlWACacheCloseAll(t *hbrt.Thread) {
t.Frame(0, 0)
defer t.EndProc()
waCacheMu.Lock()
keys := make([]string, 0, len(waCacheEntries))
for k := range waCacheEntries {
keys = append(keys, k)
}
waCacheEntries = map[string]int{}
waCacheMu.Unlock()
out := make([]hbrt.Value, len(keys))
for i, k := range keys {
out[i] = hbrt.MakeString(k)
}
t.PushValue(hbrt.MakeArrayFrom(out))
t.RetValue()
}