perf(FiveSql2): SqlHashBuild + FetchRow column binding — 3-way JOIN 3x
Complex-query benchmarking turned up two hot paths that the earlier
SqlScan/SqlEach work didn't touch: multi-table JOIN and nested-scan
row fetching. This commit hits both.
--- Part 1: SqlHashBuild — Go-native hash-join build ---
FiveSql2's HashJoin previously built the inner-side hash in PRG:
WHILE !Eof()
xVal := FieldGet(nFPos)
cKey := SqlValToStr(xVal)
IF !hb_HHasKey(hHash, cKey) ; hHash[cKey] := {} ; ENDIF
AAdd(hHash[cKey], RecNo())
dbSkip()
ENDDO
That loop runs at ~40μs per row from class dispatch + hb_HHasKey
lookups + AAdd growth + SqlValToStr formatting. On a 50k-row inner
table that's ~2 seconds wasted on what should be a sub-50ms
housekeeping op.
New hbrtl.SqlHashBuild does the same thing in one Go-native pass:
- Direct *dbf.DBFArea loop (no interface dispatch, same devirt as
SqlScan)
- Go `map[string][]int64` accumulates RecNos by key — one
allocation per distinct key
- Inline ASCII-only digit formatter for numeric keys (strconv.Itoa
is allocation-heavy for small ints)
- CHAR keys are right-trimmed to match SqlCmpEq semantics so the
hash probe matches what EvalExpr would compute
- Final Five hash is built once from Keys/Values/Order slices
directly, skipping the per-key hb_HSet path
HashJoin now calls `SqlHashBuild(nFPos)` instead of running the
PRG loop.
--- Part 2: TSqlExecutor:BuildFetchCache ---
The JOIN fallback loop calls FetchRow per row. FetchRow was already
column-ref-aware but did the string parse (`At + SubStr + Upper`)
and `::FindWA` linear scan every single invocation. For a 50k-row
join emitting 50k result rows, that's ~200k redundant resolutions.
New BuildFetchCache walks the SELECT list once before the scan and
pre-binds each plain-column expression to `{nWA, nFPos}`. FetchRow's
new fast path checks ::aFetchCache and jumps straight to
`dbSelectArea + FieldGet` when bound. Complex exprs (functions,
CASE, subqueries) still fall through to EvalExpr.
::aFetchCache is set right before the join WHILE loop and cleared
after — no cross-query bleed.
--- Bench (50k ord × 10k emp × 100 dept, 3-run steady state) ---
Query Before After Speedup
────────────────────────────────────────────────────────────
2-way INNER JOIN, 10k rows 91ms 68ms 1.34x
2-way JOIN + GROUP BY 110ms 94ms 1.17x
3-way INNER JOIN COUNT 2610ms 610ms 4.28x
3-way JOIN + GROUP BY 2860ms 830ms 3.45x
The 3-way speedup is almost entirely SqlHashBuild. The 2-way case
benefits from the fetch cache because its per-row cost is dominated
by FetchRow (no second hash build to amortize).
--- Limits still standing ---
CTE + JOIN queries (Q7 in bench_complex: ~4.5s) aren't affected by
either optimization — CTE materialization goes through a different
path that writes/reads a temp DBF. Follow-up target.
Validation:
- FiveSql2 43/43
- Harbour compat 51/51
- go test ./... ALL PASS
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -619,6 +619,7 @@ func RegisterRTL(vm *hbrt.VM) {
|
||||
// Go-native SQL scan loop (bypasses PRG interpreter for hot path)
|
||||
hbrt.Sym("SQLSCAN", hbrt.FsPublic, SqlScan),
|
||||
hbrt.Sym("SQLEACH", hbrt.FsPublic, SqlEach),
|
||||
hbrt.Sym("SQLHASHBUILD", hbrt.FsPublic, SqlHashBuild),
|
||||
|
||||
// Goroutine / Concurrency
|
||||
hbrt.Sym("GO", hbrt.FsPublic, GoFunc),
|
||||
|
||||
153
hbrtl/sqlscan.go
153
hbrtl/sqlscan.go
@@ -28,6 +28,7 @@ import (
|
||||
"five/hbrdd"
|
||||
"five/hbrdd/dbf"
|
||||
"five/hbrt"
|
||||
"strconv"
|
||||
)
|
||||
|
||||
// SqlScan(aFieldPositions, pcWhere) → aRows
|
||||
@@ -238,6 +239,158 @@ func SqlScan(t *hbrt.Thread) {
|
||||
t.RetValue()
|
||||
}
|
||||
|
||||
// SqlHashBuild(nFieldPos) → hHash
|
||||
//
|
||||
// Scans the current workarea and returns a hash mapping each field
|
||||
// value (as a string key) to an array of RecNos that have that value.
|
||||
// Used by FiveSql2's HashJoin: FiveSql2 currently builds this in PRG,
|
||||
// paying ~40μs per row from class dispatch + hb_HHasKey + AAdd growth.
|
||||
// 50k rows × 40μs = 2 seconds wasted on what should be a sub-50ms op.
|
||||
//
|
||||
// Go-native build goes through *dbf.DBFArea directly and uses a native
|
||||
// Go `map[string][]int64` which GC's as one unit. Final conversion to
|
||||
// a Five hash is done once at the end.
|
||||
func SqlHashBuild(t *hbrt.Thread) {
|
||||
t.Frame(1, 0)
|
||||
defer t.EndProc()
|
||||
|
||||
nFieldPos := int(t.Local(1).AsNumInt()) - 1
|
||||
if nFieldPos < 0 {
|
||||
t.PushValue(hbrt.MakeHash())
|
||||
t.RetValue()
|
||||
return
|
||||
}
|
||||
|
||||
wam, ok := t.WA.(*hbrdd.WorkAreaManager)
|
||||
if !ok {
|
||||
t.PushValue(hbrt.MakeHash())
|
||||
t.RetValue()
|
||||
return
|
||||
}
|
||||
area := wam.Current()
|
||||
if area == nil {
|
||||
t.PushValue(hbrt.MakeHash())
|
||||
t.RetValue()
|
||||
return
|
||||
}
|
||||
|
||||
// Type-assert once so the per-row field reads inline.
|
||||
dbfArea, _ := area.(*dbf.DBFArea)
|
||||
|
||||
goMap := make(map[string][]int64, 4096)
|
||||
|
||||
if dbfArea != nil {
|
||||
dbfArea.GoTop()
|
||||
for !dbfArea.EOF() {
|
||||
v, _ := dbfArea.GetValue(nFieldPos)
|
||||
key := valueHashKey(v)
|
||||
goMap[key] = append(goMap[key], int64(dbfArea.RecNo()))
|
||||
dbfArea.Skip(1)
|
||||
}
|
||||
} else {
|
||||
area.GoTop()
|
||||
for !area.EOF() {
|
||||
v, _ := area.GetValue(nFieldPos)
|
||||
key := valueHashKey(v)
|
||||
// Generic RecNo via interface
|
||||
var rn int64
|
||||
if rmgr, ok := area.(interface{ RecNo() uint32 }); ok {
|
||||
rn = int64(rmgr.RecNo())
|
||||
}
|
||||
goMap[key] = append(goMap[key], rn)
|
||||
area.Skip(1)
|
||||
}
|
||||
}
|
||||
|
||||
// Materialize as a Five hash — build Keys/Values slices directly on
|
||||
// the HbHash struct, skipping the per-key map-lookup path that PRG
|
||||
// hb_HSet would take.
|
||||
nKeys := len(goMap)
|
||||
keys := make([]hbrt.Value, 0, nKeys)
|
||||
vals := make([]hbrt.Value, 0, nKeys)
|
||||
order := make([]int, 0, nKeys)
|
||||
idx := 0
|
||||
for k, recs := range goMap {
|
||||
items := make([]hbrt.Value, len(recs))
|
||||
for i, r := range recs {
|
||||
items[i] = hbrt.MakeNumInt(r)
|
||||
}
|
||||
keys = append(keys, hbrt.MakeString(k))
|
||||
vals = append(vals, hbrt.MakeArrayFrom(items))
|
||||
order = append(order, idx)
|
||||
idx++
|
||||
}
|
||||
result := hbrt.MakeHash()
|
||||
hh := result.AsHash()
|
||||
hh.Keys = keys
|
||||
hh.Values = vals
|
||||
hh.Order = order
|
||||
|
||||
t.PushValue(result)
|
||||
t.RetValue()
|
||||
}
|
||||
|
||||
// valueHashKey converts a Value to a stable string key for Go map use.
|
||||
// Matches what SqlValToStr does in PRG, but without allocation detours.
|
||||
func valueHashKey(v hbrt.Value) string {
|
||||
switch {
|
||||
case v.IsNil():
|
||||
return "\x00NIL"
|
||||
case v.IsString():
|
||||
// Match PRG SqlValToStr: trim trailing spaces so CHAR hash probes
|
||||
// compare the same as the equivalent SqlCmpEq call.
|
||||
s := v.AsString()
|
||||
end := len(s)
|
||||
for end > 0 && s[end-1] == ' ' {
|
||||
end--
|
||||
}
|
||||
return s[:end]
|
||||
case v.IsNumeric():
|
||||
if v.IsNumInt() {
|
||||
return strconvItoa(v.AsNumInt())
|
||||
}
|
||||
return strconvFtoa(v.AsNumDouble())
|
||||
case v.IsLogical():
|
||||
if v.AsBool() {
|
||||
return "T"
|
||||
}
|
||||
return "F"
|
||||
case v.IsDate():
|
||||
return strconvItoa(v.AsJulian())
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
func strconvItoa(n int64) string {
|
||||
// strconv.Itoa is heavy on allocation for small ints — this is the
|
||||
// hot path for hash keys so use a tight formatter.
|
||||
if n == 0 {
|
||||
return "0"
|
||||
}
|
||||
neg := n < 0
|
||||
if neg {
|
||||
n = -n
|
||||
}
|
||||
var buf [20]byte
|
||||
i := len(buf)
|
||||
for n > 0 {
|
||||
i--
|
||||
buf[i] = byte('0' + n%10)
|
||||
n /= 10
|
||||
}
|
||||
if neg {
|
||||
i--
|
||||
buf[i] = '-'
|
||||
}
|
||||
return string(buf[i:])
|
||||
}
|
||||
|
||||
func strconvFtoa(f float64) string {
|
||||
// Only used for non-integer numeric field values (rare in join keys);
|
||||
// OK to call into strconv.
|
||||
return strconv.FormatFloat(f, 'g', -1, 64)
|
||||
}
|
||||
|
||||
// SqlEach(aFieldPositions, pcWhere, bBlock) → NIL
|
||||
//
|
||||
// Streaming variant of SqlScan — instead of materializing all matching
|
||||
|
||||
Reference in New Issue
Block a user