// Copyright (c) 2026 Charles KWON OhJun (charleskwonohjun@gmail.com) // All rights reserved. // Go-native SQL scan loop for FiveSql2 hot path. // // Motivation: FiveSql2 is a PRG-based SQL interpreter. For simple // "SELECT cols FROM table WHERE cond" queries, the per-row cost is // dominated by PRG interpreter overhead (AST tree walk, field name // lookup, workarea switching). Moving just the inner scan loop to Go // bypasses all that overhead and gets us ~15x speedup for the common // case while keeping the rest of FiveSql2 untouched. // // The SQL engine remains responsible for: // - Parsing SQL and building AST // - Resolving field names to positions (column binding) // - Compiling WHERE expression to pcode (via PcCompile) // - GROUP BY, ORDER BY, aggregates (not per-row) // // This helper only handles the hot loop: // - Full table scan (workarea already positioned) // - Per-row WHERE evaluation via ExecPcode // - Column extraction via cached field positions // - Result array construction package hbrtl import ( "five/hbrdd" "five/hbrdd/dbf" "five/hbrt" ) // SqlScan(aFieldPositions, pcWhere) → aRows // // Scans the current workarea top-to-bottom, evaluates pcWhere per row // (nil = no filter), collects selected column values into rows. // // aFieldPositions: array of 1-based field positions to extract per row. // Resolve once before calling (FieldPos cache is O(1) // but still has PRG → Go call overhead). // pcWhere: pcode function pointer from PcCompile, or NIL. // // Returns: // Array of rows, each row = Array of field values. // // Notes on CHAR trimming: DBF character fields are space-padded. The // caller decides whether to trim (via a SELECT-list AllTrim wrapper). // We don't trim here — that's a semantic choice, and callers who need // raw bytes shouldn't pay for a strings.TrimSpace(). func SqlScan(t *hbrt.Thread) { t.Frame(2, 0) defer t.EndProc() // Parse arguments fieldsVal := t.Local(1) if !fieldsVal.IsArray() { t.PushValue(hbrt.MakeArray(0)) t.RetValue() return } fieldsArr := fieldsVal.AsArray().Items nFields := len(fieldsArr) whereVal := t.Local(2) var whereFn *hbrt.PcodeFunc if !whereVal.IsNil() { if p := whereVal.AsPointer(); p != nil { whereFn, _ = p.(*hbrt.PcodeFunc) } } // Pre-convert field positions to []int (avoid Value->int per row) fieldPos := make([]int, nFields) for i := 0; i < nFields; i++ { fieldPos[i] = int(fieldsArr[i].AsNumInt()) if fieldPos[i] < 1 { fieldPos[i] = 1 } } wam, ok := t.WA.(*hbrdd.WorkAreaManager) if !ok { t.PushValue(hbrt.MakeArray(0)) t.RetValue() return } area := wam.Current() if area == nil { t.PushValue(hbrt.MakeArray(0)) t.RetValue() return } // Type-assert to concrete DBFArea once so the hot loop calls // GoTop/EOF/Skip/GetValue directly on *dbf.DBFArea without paying // the interface dispatch on every row. Falls back to the generic // Area path for non-DBF drivers (rare in FiveSql2 context). dbfArea, _ := area.(*dbf.DBFArea) // SQLite-inspired: instead of one slice allocation per row, maintain // a single flat backing buffer and hand each row a sub-slice into it. // This halves allocations (row header + backing → just row header) // and keeps row data contiguous in memory for better cache locality. // // Safety: we cap each sub-slice to exactly nFields via the 3-index // slice form (flat[off:end:end]). Any later `append` on an individual // row will then trigger a reallocation of that row's backing, so we // don't clobber neighboring rows if PRG code mutates via AAdd. // Size the initial backing based on the workarea's record count — // even if WHERE filters most rows out, over-allocating beats five // regrowths of a 200 KB buffer mid-scan. estRows := 1024 if rc, err := area.RecCount(); err == nil && rc > 0 { estRows = int(rc) if estRows > 1 << 20 { estRows = 1 << 20 } } rows := make([]hbrt.Value, 0, estRows) flat := make([]hbrt.Value, 0, estRows*nFields) slab := hbrt.NewArraySlab(estRows) // Install the hot-path field getter so PcOpFieldGet in the compiled // WHERE predicate bypasses PushSymbol + Function dispatch + the // FieldGet RTL's own Frame. The closure captures the concrete // DBFArea directly so there's no interface dispatch per access. prevFG := t.FastFieldGetter if dbfArea != nil { t.FastFieldGetter = func(idx int) hbrt.Value { v, _ := dbfArea.GetValue(idx - 1) return v } } else { t.FastFieldGetter = func(idx int) hbrt.Value { v, _ := area.GetValue(idx - 1) return v } } defer func() { t.FastFieldGetter = prevFG }() // Scan — four specialized loops. Two axes of specialization: // // DBF vs generic Area: devirtualization — Go inlines method calls // on the concrete type but pays an interface // dispatch on every call of the generic one. // // WHERE vs no-WHERE : branch hoisting — the no-WHERE case is a // hot full-scan path (SELECT * or similar), // where even the predictable `whereFn != nil` // check and the `keep` shadow variable show // up in pprof. // // Four combinations = four loop copies. Painful but each row save // counts when we're reaching for raw RDD parity. switch { case dbfArea != nil && whereFn != nil: dbfArea.GoTop() for !dbfArea.EOF() { hbrt.ExecPcodeFast(t, whereFn, nil) if t.GetRetValue().AsBool() { off := len(flat) end := off + nFields if end > cap(flat) { flat = append(flat, make([]hbrt.Value, nFields)...) } else { flat = flat[:end] } row := flat[off:end:end] for i := 0; i < nFields; i++ { v, _ := dbfArea.GetValue(fieldPos[i] - 1) row[i] = v } rows = append(rows, slab.WrapNext(row)) } dbfArea.Skip(1) } case dbfArea != nil: // DBF + no WHERE — tightest inner loop dbfArea.GoTop() for !dbfArea.EOF() { off := len(flat) end := off + nFields if end > cap(flat) { flat = append(flat, make([]hbrt.Value, nFields)...) } else { flat = flat[:end] } row := flat[off:end:end] for i := 0; i < nFields; i++ { v, _ := dbfArea.GetValue(fieldPos[i] - 1) row[i] = v } rows = append(rows, slab.WrapNext(row)) dbfArea.Skip(1) } case whereFn != nil: area.GoTop() for !area.EOF() { hbrt.ExecPcodeFast(t, whereFn, nil) if t.GetRetValue().AsBool() { off := len(flat) end := off + nFields if end > cap(flat) { flat = append(flat, make([]hbrt.Value, nFields)...) } else { flat = flat[:end] } row := flat[off:end:end] for i := 0; i < nFields; i++ { v, _ := area.GetValue(fieldPos[i] - 1) row[i] = v } rows = append(rows, slab.WrapNext(row)) } area.Skip(1) } default: area.GoTop() for !area.EOF() { off := len(flat) end := off + nFields if end > cap(flat) { flat = append(flat, make([]hbrt.Value, nFields)...) } else { flat = flat[:end] } row := flat[off:end:end] for i := 0; i < nFields; i++ { v, _ := area.GetValue(fieldPos[i] - 1) row[i] = v } rows = append(rows, slab.WrapNext(row)) area.Skip(1) } } t.PushValue(hbrt.MakeArrayFrom(rows)) t.RetValue() } // SqlEach(aFieldPositions, pcWhere, bBlock) → NIL // // Streaming variant of SqlScan — instead of materializing all matching // rows into a result array (which costs N HbArray allocations plus a // second pass when the PRG caller iterates it), we invoke a user-provided // code block once per matching row, passing the selected field values as // block parameters. // // This is the Harbour block-iteration idiom (`AEval`, `AScan`) applied // to SQL. Total heap traffic collapses to ~0 — no result rows, no slab, // no flat value buffer. Per-row overhead becomes just (field reads + // WHERE eval + block invoke). // // Expected to hit raw-RDD parity on end-to-end "SQL → user code" timing. // // Arguments: // aFieldPositions: 1-based field positions to pass as block params // pcWhere: compiled WHERE predicate, or NIL // bBlock: code block receiving nFields positional params func SqlEach(t *hbrt.Thread) { t.Frame(3, 0) defer t.EndProc() fieldsVal := t.Local(1) if !fieldsVal.IsArray() { t.RetNil() return } fieldsArr := fieldsVal.AsArray().Items nFields := len(fieldsArr) whereVal := t.Local(2) var whereFn *hbrt.PcodeFunc if !whereVal.IsNil() { if p := whereVal.AsPointer(); p != nil { whereFn, _ = p.(*hbrt.PcodeFunc) } } blockVal := t.Local(3) if !blockVal.IsBlock() { t.RetNil() return } blk := blockVal.AsBlock() fieldPos := make([]int, nFields) for i := 0; i < nFields; i++ { fieldPos[i] = int(fieldsArr[i].AsNumInt()) if fieldPos[i] < 1 { fieldPos[i] = 1 } } wam, ok := t.WA.(*hbrdd.WorkAreaManager) if !ok { t.RetNil() return } area := wam.Current() if area == nil { t.RetNil() return } dbfArea, _ := area.(*dbf.DBFArea) // Install FastFieldGetter for the WHERE predicate's PcOpFieldGet ops prevFG := t.FastFieldGetter if dbfArea != nil { t.FastFieldGetter = func(idx int) hbrt.Value { v, _ := dbfArea.GetValue(idx - 1) return v } } else { t.FastFieldGetter = func(idx int) hbrt.Value { v, _ := area.GetValue(idx - 1) return v } } defer func() { t.FastFieldGetter = prevFG }() // Block eval protocol: push N args on the stack, set pendingParams, // call blk.Fn(t). Matches what EvalBlock does inline, skipping the // per-call `make([]Value, nArgs)` temp slice. // // Four specialized loops on {DBF, generic}×{WHERE, none}, same // reasoning as SqlScan's loop split. switch { case dbfArea != nil && whereFn != nil: dbfArea.GoTop() for !dbfArea.EOF() { hbrt.ExecPcodeFast(t, whereFn, nil) if t.GetRetValue().AsBool() { for i := 0; i < nFields; i++ { v, _ := dbfArea.GetValue(fieldPos[i] - 1) t.PushValue(v) } t.PendingParams2(nFields) blk.Fn(t) } dbfArea.Skip(1) } case dbfArea != nil: dbfArea.GoTop() for !dbfArea.EOF() { for i := 0; i < nFields; i++ { v, _ := dbfArea.GetValue(fieldPos[i] - 1) t.PushValue(v) } t.PendingParams2(nFields) blk.Fn(t) dbfArea.Skip(1) } case whereFn != nil: area.GoTop() for !area.EOF() { hbrt.ExecPcodeFast(t, whereFn, nil) if t.GetRetValue().AsBool() { for i := 0; i < nFields; i++ { v, _ := area.GetValue(fieldPos[i] - 1) t.PushValue(v) } t.PendingParams2(nFields) blk.Fn(t) } area.Skip(1) } default: area.GoTop() for !area.EOF() { for i := 0; i < nFields; i++ { v, _ := area.GetValue(fieldPos[i] - 1) t.PushValue(v) } t.PendingParams2(nFields) blk.Fn(t) area.Skip(1) } } t.RetNil() }