// Copyright (c) 2026 Charles KWON OhJun (charleskwonohjun@gmail.com) // All rights reserved. // Go-native SQL scan loop for FiveSql2 hot path. // // Motivation: FiveSql2 is a PRG-based SQL interpreter. For simple // "SELECT cols FROM table WHERE cond" queries, the per-row cost is // dominated by PRG interpreter overhead (AST tree walk, field name // lookup, workarea switching). Moving just the inner scan loop to Go // bypasses all that overhead and gets us ~15x speedup for the common // case while keeping the rest of FiveSql2 untouched. // // The SQL engine remains responsible for: // - Parsing SQL and building AST // - Resolving field names to positions (column binding) // - Compiling WHERE expression to pcode (via PcCompile) // - GROUP BY, ORDER BY, aggregates (not per-row) // // This helper only handles the hot loop: // - Full table scan (workarea already positioned) // - Per-row WHERE evaluation via ExecPcode // - Column extraction via cached field positions // - Result array construction package hbrtl import ( "five/hbrdd" "five/hbrdd/dbf" "five/hbrt" "strconv" ) // SqlScan(aFieldPositions, pcWhere) → aRows // // Scans the current workarea top-to-bottom, evaluates pcWhere per row // (nil = no filter), collects selected column values into rows. // // aFieldPositions: array of 1-based field positions to extract per row. // Resolve once before calling (FieldPos cache is O(1) // but still has PRG → Go call overhead). // pcWhere: pcode function pointer from PcCompile, or NIL. // // Returns: // Array of rows, each row = Array of field values. // // Notes on CHAR trimming: DBF character fields are space-padded. The // caller decides whether to trim (via a SELECT-list AllTrim wrapper). // We don't trim here — that's a semantic choice, and callers who need // raw bytes shouldn't pay for a strings.TrimSpace(). func SqlScan(t *hbrt.Thread) { t.Frame(2, 0) defer t.EndProc() // Parse arguments fieldsVal := t.Local(1) if !fieldsVal.IsArray() { t.PushValue(hbrt.MakeArray(0)) t.RetValue() return } fieldsArr := fieldsVal.AsArray().Items nFields := len(fieldsArr) whereVal := t.Local(2) var whereFn *hbrt.PcodeFunc if !whereVal.IsNil() { if p := whereVal.AsPointer(); p != nil { whereFn, _ = p.(*hbrt.PcodeFunc) } } // Pre-convert field positions to []int (avoid Value->int per row) fieldPos := make([]int, nFields) for i := 0; i < nFields; i++ { fieldPos[i] = int(fieldsArr[i].AsNumInt()) if fieldPos[i] < 1 { fieldPos[i] = 1 } } wam, ok := t.WA.(*hbrdd.WorkAreaManager) if !ok { t.PushValue(hbrt.MakeArray(0)) t.RetValue() return } area := wam.Current() if area == nil { t.PushValue(hbrt.MakeArray(0)) t.RetValue() return } // Type-assert to concrete DBFArea once so the hot loop calls // GoTop/EOF/Skip/GetValue directly on *dbf.DBFArea without paying // the interface dispatch on every row. Falls back to the generic // Area path for non-DBF drivers (rare in FiveSql2 context). dbfArea, _ := area.(*dbf.DBFArea) // SQLite-inspired: instead of one slice allocation per row, maintain // a single flat backing buffer and hand each row a sub-slice into it. // This halves allocations (row header + backing → just row header) // and keeps row data contiguous in memory for better cache locality. // // Safety: we cap each sub-slice to exactly nFields via the 3-index // slice form (flat[off:end:end]). Any later `append` on an individual // row will then trigger a reallocation of that row's backing, so we // don't clobber neighboring rows if PRG code mutates via AAdd. // Size the initial backing based on the workarea's record count — // even if WHERE filters most rows out, over-allocating beats five // regrowths of a 200 KB buffer mid-scan. estRows := 1024 if rc, err := area.RecCount(); err == nil && rc > 0 { estRows = int(rc) if estRows > 1 << 20 { estRows = 1 << 20 } } rows := make([]hbrt.Value, 0, estRows) flat := make([]hbrt.Value, 0, estRows*nFields) slab := hbrt.NewArraySlab(estRows) // Install the hot-path field getter so PcOpFieldGet in the compiled // WHERE predicate bypasses PushSymbol + Function dispatch + the // FieldGet RTL's own Frame. The closure captures the concrete // DBFArea directly so there's no interface dispatch per access. prevFG := t.FastFieldGetter if dbfArea != nil { t.FastFieldGetter = func(idx int) hbrt.Value { v, _ := dbfArea.GetValue(idx - 1) return v } } else { t.FastFieldGetter = func(idx int) hbrt.Value { v, _ := area.GetValue(idx - 1) return v } } defer func() { t.FastFieldGetter = prevFG }() // Scan — four specialized loops. Two axes of specialization: // // DBF vs generic Area: devirtualization — Go inlines method calls // on the concrete type but pays an interface // dispatch on every call of the generic one. // // WHERE vs no-WHERE : branch hoisting — the no-WHERE case is a // hot full-scan path (SELECT * or similar), // where even the predictable `whereFn != nil` // check and the `keep` shadow variable show // up in pprof. // // Four combinations = four loop copies. Painful but each row save // counts when we're reaching for raw RDD parity. switch { case dbfArea != nil && whereFn != nil: dbfArea.GoTop() for !dbfArea.EOF() { hbrt.ExecPcodeFast(t, whereFn, nil) if t.GetRetValue().AsBool() { off := len(flat) end := off + nFields if end > cap(flat) { flat = append(flat, make([]hbrt.Value, nFields)...) } else { flat = flat[:end] } row := flat[off:end:end] for i := 0; i < nFields; i++ { v, _ := dbfArea.GetValue(fieldPos[i] - 1) row[i] = v } rows = append(rows, slab.WrapNext(row)) } dbfArea.Skip(1) } case dbfArea != nil: // DBF + no WHERE — tightest inner loop dbfArea.GoTop() for !dbfArea.EOF() { off := len(flat) end := off + nFields if end > cap(flat) { flat = append(flat, make([]hbrt.Value, nFields)...) } else { flat = flat[:end] } row := flat[off:end:end] for i := 0; i < nFields; i++ { v, _ := dbfArea.GetValue(fieldPos[i] - 1) row[i] = v } rows = append(rows, slab.WrapNext(row)) dbfArea.Skip(1) } case whereFn != nil: area.GoTop() for !area.EOF() { hbrt.ExecPcodeFast(t, whereFn, nil) if t.GetRetValue().AsBool() { off := len(flat) end := off + nFields if end > cap(flat) { flat = append(flat, make([]hbrt.Value, nFields)...) } else { flat = flat[:end] } row := flat[off:end:end] for i := 0; i < nFields; i++ { v, _ := area.GetValue(fieldPos[i] - 1) row[i] = v } rows = append(rows, slab.WrapNext(row)) } area.Skip(1) } default: area.GoTop() for !area.EOF() { off := len(flat) end := off + nFields if end > cap(flat) { flat = append(flat, make([]hbrt.Value, nFields)...) } else { flat = flat[:end] } row := flat[off:end:end] for i := 0; i < nFields; i++ { v, _ := area.GetValue(fieldPos[i] - 1) row[i] = v } rows = append(rows, slab.WrapNext(row)) area.Skip(1) } } t.PushValue(hbrt.MakeArrayFrom(rows)) t.RetValue() } // SqlHashBuild(nFieldPos) → hHash // // Scans the current workarea and returns a hash mapping each field // value (as a string key) to an array of RecNos that have that value. // Used by FiveSql2's HashJoin: FiveSql2 currently builds this in PRG, // paying ~40μs per row from class dispatch + hb_HHasKey + AAdd growth. // 50k rows × 40μs = 2 seconds wasted on what should be a sub-50ms op. // // Go-native build goes through *dbf.DBFArea directly and uses a native // Go `map[string][]int64` which GC's as one unit. Final conversion to // a Five hash is done once at the end. func SqlHashBuild(t *hbrt.Thread) { t.Frame(1, 0) defer t.EndProc() nFieldPos := int(t.Local(1).AsNumInt()) - 1 if nFieldPos < 0 { t.PushValue(hbrt.MakeHash()) t.RetValue() return } wam, ok := t.WA.(*hbrdd.WorkAreaManager) if !ok { t.PushValue(hbrt.MakeHash()) t.RetValue() return } area := wam.Current() if area == nil { t.PushValue(hbrt.MakeHash()) t.RetValue() return } // Type-assert once so the per-row field reads inline. dbfArea, _ := area.(*dbf.DBFArea) goMap := make(map[string][]int64, 4096) if dbfArea != nil { dbfArea.GoTop() for !dbfArea.EOF() { v, _ := dbfArea.GetValue(nFieldPos) key := valueHashKey(v) goMap[key] = append(goMap[key], int64(dbfArea.RecNo())) dbfArea.Skip(1) } } else { area.GoTop() for !area.EOF() { v, _ := area.GetValue(nFieldPos) key := valueHashKey(v) // Generic RecNo via interface var rn int64 if rmgr, ok := area.(interface{ RecNo() uint32 }); ok { rn = int64(rmgr.RecNo()) } goMap[key] = append(goMap[key], rn) area.Skip(1) } } // Materialize as a Five hash — build Keys/Values slices directly on // the HbHash struct, skipping the per-key map-lookup path that PRG // hb_HSet would take. nKeys := len(goMap) keys := make([]hbrt.Value, 0, nKeys) vals := make([]hbrt.Value, 0, nKeys) order := make([]int, 0, nKeys) idx := 0 for k, recs := range goMap { items := make([]hbrt.Value, len(recs)) for i, r := range recs { items[i] = hbrt.MakeNumInt(r) } keys = append(keys, hbrt.MakeString(k)) vals = append(vals, hbrt.MakeArrayFrom(items)) order = append(order, idx) idx++ } result := hbrt.MakeHash() hh := result.AsHash() hh.Keys = keys hh.Values = vals hh.Order = order t.PushValue(result) t.RetValue() } // valueHashKey converts a Value to a stable string key for Go map use. // Matches what SqlValToStr does in PRG, but without allocation detours. func valueHashKey(v hbrt.Value) string { switch { case v.IsNil(): return "\x00NIL" case v.IsString(): // Match PRG SqlValToStr: trim trailing spaces so CHAR hash probes // compare the same as the equivalent SqlCmpEq call. s := v.AsString() end := len(s) for end > 0 && s[end-1] == ' ' { end-- } return s[:end] case v.IsNumeric(): if v.IsNumInt() { return strconvItoa(v.AsNumInt()) } return strconvFtoa(v.AsNumDouble()) case v.IsLogical(): if v.AsBool() { return "T" } return "F" case v.IsDate(): return strconvItoa(v.AsJulian()) } return "" } func strconvItoa(n int64) string { // strconv.Itoa is heavy on allocation for small ints — this is the // hot path for hash keys so use a tight formatter. if n == 0 { return "0" } neg := n < 0 if neg { n = -n } var buf [20]byte i := len(buf) for n > 0 { i-- buf[i] = byte('0' + n%10) n /= 10 } if neg { i-- buf[i] = '-' } return string(buf[i:]) } func strconvFtoa(f float64) string { // Only used for non-integer numeric field values (rare in join keys); // OK to call into strconv. return strconv.FormatFloat(f, 'g', -1, 64) } // SqlHashJoin(aOuterFields, aJoinSpecs, aSelectFields) → aRows // // Go-native multi-table hash join. Replaces the per-row PRG overhead // of JoinRecurse → FetchRow → dbSelectArea × N when the query has // only equi-join conditions and all SELECT columns are plain field refs. // // Arguments (all PRG arrays): // aJoinSpecs: array of {nInnerWA, nInnerKeyField, nOuterKeyField} // Each entry describes one join level (1-based field positions). // nOuterKeyField refers to a field in the PREVIOUS level's // table (or the outer for the first entry). // aSelectFields: array of {nWA, nFieldPos} — columns to extract per // matched row combination. 1-based field positions. // nOuterWA: workarea number of the outermost (driving) table // // Returns: array of rows, each row = array of field values. // // The function builds hash tables for each inner level, then walks // the outer table and probes each level recursively. All field access // goes through *dbf.DBFArea.GetValue directly — no PRG frame overhead. func SqlHashJoin(t *hbrt.Thread) { t.Frame(3, 0) defer t.EndProc() joinSpecsVal := t.Local(1) selectFieldsVal := t.Local(2) nOuterWA := int(t.Local(3).AsNumInt()) if !joinSpecsVal.IsArray() || !selectFieldsVal.IsArray() { t.PushValue(hbrt.MakeArray(0)) t.RetValue() return } wam, ok := t.WA.(*hbrdd.WorkAreaManager) if !ok { t.PushValue(hbrt.MakeArray(0)) t.RetValue() return } // Parse join specs jsArr := joinSpecsVal.AsArray().Items type joinLevel struct { area *dbf.DBFArea innerKey int // 0-based field index for hash key outerKey int // 0-based field index on parent level hashTable map[string][]uint32 // key → list of RecNos parentArea *dbf.DBFArea } levels := make([]joinLevel, len(jsArr)) for i, js := range jsArr { row := js.AsArray() if row == nil || len(row.Items) < 3 { t.PushValue(hbrt.MakeArray(0)) t.RetValue() return } innerWA := int(row.Items[0].AsNumInt()) innerKeyF := int(row.Items[1].AsNumInt()) - 1 outerKeyF := int(row.Items[2].AsNumInt()) - 1 innerArea, _ := wam.AreaAt(uint16(innerWA)).(*dbf.DBFArea) if innerArea == nil { t.PushValue(hbrt.MakeArray(0)) t.RetValue() return } // Build hash table for this level ht := make(map[string][]uint32, 4096) innerArea.GoTop() for !innerArea.EOF() { v, _ := innerArea.GetValue(innerKeyF) key := valueHashKey(v) ht[key] = append(ht[key], innerArea.RecNo()) innerArea.Skip(1) } levels[i] = joinLevel{ area: innerArea, innerKey: innerKeyF, outerKey: outerKeyF, hashTable: ht, } } // Set parent area references outerArea, _ := wam.AreaAt(uint16(nOuterWA)).(*dbf.DBFArea) if outerArea == nil { t.PushValue(hbrt.MakeArray(0)) t.RetValue() return } for i := range levels { if i == 0 { levels[i].parentArea = outerArea } else { levels[i].parentArea = levels[i-1].area } } // Parse select fields sfArr := selectFieldsVal.AsArray().Items type selectCol struct { area *dbf.DBFArea fieldIdx int // 0-based } selCols := make([]selectCol, len(sfArr)) for i, sf := range sfArr { row := sf.AsArray() if row == nil || len(row.Items) < 2 { continue } waNum := int(row.Items[0].AsNumInt()) fIdx := int(row.Items[1].AsNumInt()) - 1 if waNum == 0 { // Aggregate placeholder — leave area nil, emit 0 per row selCols[i] = selectCol{area: nil, fieldIdx: -1} continue } a, _ := wam.AreaAt(uint16(waNum)).(*dbf.DBFArea) selCols[i] = selectCol{area: a, fieldIdx: fIdx} } nFields := len(selCols) estRows := 1024 rows := make([]hbrt.Value, 0, estRows) flat := make([]hbrt.Value, 0, estRows*nFields) slab := hbrt.NewArraySlab(estRows) // Recursive join traversal — iterative via explicit stack type frame struct { level int matches []uint32 matchIdx int } outerArea.GoTop() for !outerArea.EOF() { // Start the join chain from the outer row stack := []frame{{level: 0, matches: nil, matchIdx: 0}} // Get outer key for first level outerVal, _ := outerArea.GetValue(levels[0].outerKey) outerKey := valueHashKey(outerVal) matches, found := levels[0].hashTable[outerKey] if !found { outerArea.Skip(1) continue } stack[0].matches = matches for len(stack) > 0 { top := &stack[len(stack)-1] if top.matchIdx >= len(top.matches) { // Exhausted this level — pop stack = stack[:len(stack)-1] continue } // Position the inner area at the current match recNo := top.matches[top.matchIdx] top.matchIdx++ levels[top.level].area.GoTo(recNo) if top.level == len(levels)-1 { // Last level — emit result row off := len(flat) end := off + nFields if end > cap(flat) { flat = append(flat, make([]hbrt.Value, nFields)...) } else { flat = flat[:end] } row := flat[off:end:end] for c := 0; c < nFields; c++ { if selCols[c].area != nil { v, _ := selCols[c].area.GetValue(selCols[c].fieldIdx) row[c] = v } else { // Aggregate placeholder — 0 for numeric aggregation row[c] = hbrt.MakeInt(0) } } rows = append(rows, slab.WrapNext(row)) } else { // Probe next level nextLevel := top.level + 1 probeVal, _ := levels[top.level].area.GetValue(levels[nextLevel].outerKey) probeKey := valueHashKey(probeVal) nextMatches, found := levels[nextLevel].hashTable[probeKey] if found { stack = append(stack, frame{ level: nextLevel, matches: nextMatches, }) } } } outerArea.Skip(1) } t.PushValue(hbrt.MakeArrayFrom(rows)) t.RetValue() } // SqlEach(aFieldPositions, pcWhere, bBlock) → NIL // // Streaming variant of SqlScan — instead of materializing all matching // rows into a result array (which costs N HbArray allocations plus a // second pass when the PRG caller iterates it), we invoke a user-provided // code block once per matching row, passing the selected field values as // block parameters. // // This is the Harbour block-iteration idiom (`AEval`, `AScan`) applied // to SQL. Total heap traffic collapses to ~0 — no result rows, no slab, // no flat value buffer. Per-row overhead becomes just (field reads + // WHERE eval + block invoke). // // Expected to hit raw-RDD parity on end-to-end "SQL → user code" timing. // // Arguments: // aFieldPositions: 1-based field positions to pass as block params // pcWhere: compiled WHERE predicate, or NIL // bBlock: code block receiving nFields positional params func SqlEach(t *hbrt.Thread) { t.Frame(3, 0) defer t.EndProc() fieldsVal := t.Local(1) if !fieldsVal.IsArray() { t.RetNil() return } fieldsArr := fieldsVal.AsArray().Items nFields := len(fieldsArr) whereVal := t.Local(2) var whereFn *hbrt.PcodeFunc if !whereVal.IsNil() { if p := whereVal.AsPointer(); p != nil { whereFn, _ = p.(*hbrt.PcodeFunc) } } blockVal := t.Local(3) if !blockVal.IsBlock() { t.RetNil() return } blk := blockVal.AsBlock() fieldPos := make([]int, nFields) for i := 0; i < nFields; i++ { fieldPos[i] = int(fieldsArr[i].AsNumInt()) if fieldPos[i] < 1 { fieldPos[i] = 1 } } wam, ok := t.WA.(*hbrdd.WorkAreaManager) if !ok { t.RetNil() return } area := wam.Current() if area == nil { t.RetNil() return } dbfArea, _ := area.(*dbf.DBFArea) // Install FastFieldGetter for the WHERE predicate's PcOpFieldGet ops prevFG := t.FastFieldGetter if dbfArea != nil { t.FastFieldGetter = func(idx int) hbrt.Value { v, _ := dbfArea.GetValue(idx - 1) return v } } else { t.FastFieldGetter = func(idx int) hbrt.Value { v, _ := area.GetValue(idx - 1) return v } } defer func() { t.FastFieldGetter = prevFG }() // Block eval protocol: push N args on the stack, set pendingParams, // call blk.Fn(t). Matches what EvalBlock does inline, skipping the // per-call `make([]Value, nArgs)` temp slice. // // Four specialized loops on {DBF, generic}×{WHERE, none}, same // reasoning as SqlScan's loop split. switch { case dbfArea != nil && whereFn != nil: dbfArea.GoTop() for !dbfArea.EOF() { hbrt.ExecPcodeFast(t, whereFn, nil) if t.GetRetValue().AsBool() { for i := 0; i < nFields; i++ { v, _ := dbfArea.GetValue(fieldPos[i] - 1) t.PushValue(v) } t.PendingParams2(nFields) blk.Fn(t) } dbfArea.Skip(1) } case dbfArea != nil: dbfArea.GoTop() for !dbfArea.EOF() { for i := 0; i < nFields; i++ { v, _ := dbfArea.GetValue(fieldPos[i] - 1) t.PushValue(v) } t.PendingParams2(nFields) blk.Fn(t) dbfArea.Skip(1) } case whereFn != nil: area.GoTop() for !area.EOF() { hbrt.ExecPcodeFast(t, whereFn, nil) if t.GetRetValue().AsBool() { for i := 0; i < nFields; i++ { v, _ := area.GetValue(fieldPos[i] - 1) t.PushValue(v) } t.PendingParams2(nFields) blk.Fn(t) } area.Skip(1) } default: area.GoTop() for !area.EOF() { for i := 0; i < nFields; i++ { v, _ := area.GetValue(fieldPos[i] - 1) t.PushValue(v) } t.PendingParams2(nFields) blk.Fn(t) area.Skip(1) } } t.RetNil() }