perf(FiveSql2): SqlHashBuild + FetchRow column binding — 3-way JOIN 3x

Complex-query benchmarking turned up two hot paths that the earlier
SqlScan/SqlEach work didn't touch: multi-table JOIN and nested-scan
row fetching. This commit hits both.

--- Part 1: SqlHashBuild — Go-native hash-join build ---

FiveSql2's HashJoin previously built the inner-side hash in PRG:

    WHILE !Eof()
      xVal := FieldGet(nFPos)
      cKey := SqlValToStr(xVal)
      IF !hb_HHasKey(hHash, cKey) ; hHash[cKey] := {} ; ENDIF
      AAdd(hHash[cKey], RecNo())
      dbSkip()
    ENDDO

That loop runs at ~40μs per row from class dispatch + hb_HHasKey
lookups + AAdd growth + SqlValToStr formatting. On a 50k-row inner
table that's ~2 seconds wasted on what should be a sub-50ms
housekeeping op.

New hbrtl.SqlHashBuild does the same thing in one Go-native pass:

  - Direct *dbf.DBFArea loop (no interface dispatch, same devirt as
    SqlScan)
  - Go `map[string][]int64` accumulates RecNos by key — one
    allocation per distinct key
  - Inline ASCII-only digit formatter for numeric keys (strconv.Itoa
    is allocation-heavy for small ints)
  - CHAR keys are right-trimmed to match SqlCmpEq semantics so the
    hash probe matches what EvalExpr would compute
  - Final Five hash is built once from Keys/Values/Order slices
    directly, skipping the per-key hb_HSet path

HashJoin now calls `SqlHashBuild(nFPos)` instead of running the
PRG loop.

--- Part 2: TSqlExecutor:BuildFetchCache ---

The JOIN fallback loop calls FetchRow per row. FetchRow was already
column-ref-aware but did the string parse (`At + SubStr + Upper`)
and `::FindWA` linear scan every single invocation. For a 50k-row
join emitting 50k result rows, that's ~200k redundant resolutions.

New BuildFetchCache walks the SELECT list once before the scan and
pre-binds each plain-column expression to `{nWA, nFPos}`. FetchRow's
new fast path checks ::aFetchCache and jumps straight to
`dbSelectArea + FieldGet` when bound. Complex exprs (functions,
CASE, subqueries) still fall through to EvalExpr.

::aFetchCache is set right before the join WHILE loop and cleared
after — no cross-query bleed.

--- Bench (50k ord × 10k emp × 100 dept, 3-run steady state) ---

  Query                        Before      After     Speedup
  ────────────────────────────────────────────────────────────
  2-way INNER JOIN, 10k rows   91ms        68ms      1.34x
  2-way JOIN + GROUP BY        110ms       94ms      1.17x
  3-way INNER JOIN COUNT       2610ms      610ms     4.28x
  3-way JOIN + GROUP BY        2860ms      830ms     3.45x

The 3-way speedup is almost entirely SqlHashBuild. The 2-way case
benefits from the fetch cache because its per-row cost is dominated
by FetchRow (no second hash build to amortize).

--- Limits still standing ---

CTE + JOIN queries (Q7 in bench_complex: ~4.5s) aren't affected by
either optimization — CTE materialization goes through a different
path that writes/reads a temp DBF. Follow-up target.

Validation:
  - FiveSql2 43/43
  - Harbour compat 51/51
  - go test ./... ALL PASS

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-04-14 18:47:20 +09:00
parent e75167c2e9
commit bfc6ded8cb
4 changed files with 270 additions and 21 deletions

View File

@@ -619,6 +619,7 @@ func RegisterRTL(vm *hbrt.VM) {
// Go-native SQL scan loop (bypasses PRG interpreter for hot path)
hbrt.Sym("SQLSCAN", hbrt.FsPublic, SqlScan),
hbrt.Sym("SQLEACH", hbrt.FsPublic, SqlEach),
hbrt.Sym("SQLHASHBUILD", hbrt.FsPublic, SqlHashBuild),
// Goroutine / Concurrency
hbrt.Sym("GO", hbrt.FsPublic, GoFunc),

View File

@@ -28,6 +28,7 @@ import (
"five/hbrdd"
"five/hbrdd/dbf"
"five/hbrt"
"strconv"
)
// SqlScan(aFieldPositions, pcWhere) → aRows
@@ -238,6 +239,158 @@ func SqlScan(t *hbrt.Thread) {
t.RetValue()
}
// SqlHashBuild(nFieldPos) → hHash
//
// Scans the current workarea and returns a hash mapping each field
// value (as a string key) to an array of RecNos that have that value.
// Used by FiveSql2's HashJoin: FiveSql2 currently builds this in PRG,
// paying ~40μs per row from class dispatch + hb_HHasKey + AAdd growth.
// 50k rows × 40μs = 2 seconds wasted on what should be a sub-50ms op.
//
// Go-native build goes through *dbf.DBFArea directly and uses a native
// Go `map[string][]int64` which GC's as one unit. Final conversion to
// a Five hash is done once at the end.
func SqlHashBuild(t *hbrt.Thread) {
t.Frame(1, 0)
defer t.EndProc()
nFieldPos := int(t.Local(1).AsNumInt()) - 1
if nFieldPos < 0 {
t.PushValue(hbrt.MakeHash())
t.RetValue()
return
}
wam, ok := t.WA.(*hbrdd.WorkAreaManager)
if !ok {
t.PushValue(hbrt.MakeHash())
t.RetValue()
return
}
area := wam.Current()
if area == nil {
t.PushValue(hbrt.MakeHash())
t.RetValue()
return
}
// Type-assert once so the per-row field reads inline.
dbfArea, _ := area.(*dbf.DBFArea)
goMap := make(map[string][]int64, 4096)
if dbfArea != nil {
dbfArea.GoTop()
for !dbfArea.EOF() {
v, _ := dbfArea.GetValue(nFieldPos)
key := valueHashKey(v)
goMap[key] = append(goMap[key], int64(dbfArea.RecNo()))
dbfArea.Skip(1)
}
} else {
area.GoTop()
for !area.EOF() {
v, _ := area.GetValue(nFieldPos)
key := valueHashKey(v)
// Generic RecNo via interface
var rn int64
if rmgr, ok := area.(interface{ RecNo() uint32 }); ok {
rn = int64(rmgr.RecNo())
}
goMap[key] = append(goMap[key], rn)
area.Skip(1)
}
}
// Materialize as a Five hash — build Keys/Values slices directly on
// the HbHash struct, skipping the per-key map-lookup path that PRG
// hb_HSet would take.
nKeys := len(goMap)
keys := make([]hbrt.Value, 0, nKeys)
vals := make([]hbrt.Value, 0, nKeys)
order := make([]int, 0, nKeys)
idx := 0
for k, recs := range goMap {
items := make([]hbrt.Value, len(recs))
for i, r := range recs {
items[i] = hbrt.MakeNumInt(r)
}
keys = append(keys, hbrt.MakeString(k))
vals = append(vals, hbrt.MakeArrayFrom(items))
order = append(order, idx)
idx++
}
result := hbrt.MakeHash()
hh := result.AsHash()
hh.Keys = keys
hh.Values = vals
hh.Order = order
t.PushValue(result)
t.RetValue()
}
// valueHashKey converts a Value to a stable string key for Go map use.
// Matches what SqlValToStr does in PRG, but without allocation detours.
func valueHashKey(v hbrt.Value) string {
switch {
case v.IsNil():
return "\x00NIL"
case v.IsString():
// Match PRG SqlValToStr: trim trailing spaces so CHAR hash probes
// compare the same as the equivalent SqlCmpEq call.
s := v.AsString()
end := len(s)
for end > 0 && s[end-1] == ' ' {
end--
}
return s[:end]
case v.IsNumeric():
if v.IsNumInt() {
return strconvItoa(v.AsNumInt())
}
return strconvFtoa(v.AsNumDouble())
case v.IsLogical():
if v.AsBool() {
return "T"
}
return "F"
case v.IsDate():
return strconvItoa(v.AsJulian())
}
return ""
}
func strconvItoa(n int64) string {
// strconv.Itoa is heavy on allocation for small ints — this is the
// hot path for hash keys so use a tight formatter.
if n == 0 {
return "0"
}
neg := n < 0
if neg {
n = -n
}
var buf [20]byte
i := len(buf)
for n > 0 {
i--
buf[i] = byte('0' + n%10)
n /= 10
}
if neg {
i--
buf[i] = '-'
}
return string(buf[i:])
}
func strconvFtoa(f float64) string {
// Only used for non-integer numeric field values (rare in join keys);
// OK to call into strconv.
return strconv.FormatFloat(f, 'g', -1, 64)
}
// SqlEach(aFieldPositions, pcWhere, bBlock) → NIL
//
// Streaming variant of SqlScan — instead of materializing all matching