Files
five/hbrtl/sqlscan.go
CharlesKWON d2ed140273 feat(FiveSql2): SqlEach block callback — beats raw RDD on end-to-end timing
The structural 1.38x gap vs raw RDD for no-WHERE full scans wasn't
a limit of our engine — it was a limit of the result shape. SqlScan
materializes N rows as HbArray wrappers over a flat Value buffer,
then the PRG caller iterates that materialized array. Two passes
over the data. Raw RDD is one pass.

SqlEach folds both passes into one. The caller supplies a code block
that receives the selected column values as positional parameters;
SqlEach invokes it per matching row. No result array is ever built.

Usage (drop-in replacement for the common "scan + process" idiom):

    five_SQLEach( "SELECT id, name, salary FROM emp WHERE salary > 50000",
                  {|nID, cName, nSalary| Process(nID, cName, nSalary) } )

API shape borrows Harbour's AEval/ASort block-callback convention,
so there's nothing new to learn. Positional params also sidestep
the `SELECT COUNT(*)` naming problem — no need to invent names for
anonymous expressions.

Implementation notes:
  - 4-way loop specialization ({DBF, generic Area} × {WHERE, none}),
    matching SqlScan. Each path is zero-allocation in the steady state.
  - Block invocation uses the direct pendingParams + blk.Fn(t) protocol
    rather than EvalBlock, which would allocate a temporary args slice
    on every call (50k scans × small slice adds up).
  - FastFieldGetter is installed the same way as SqlScan so PcOpFieldGet
    in the WHERE predicate skips the PushSymbol + Function dispatch.

Bench (50k rows, end-to-end including user-code loop, steady state):

  Path                           Time     vs raw RDD
  ─────────────────────────────────────────────────────
  Raw PRG loop, WHERE + sum      8.7ms    1.00x
  SqlScan + PRG FOR, WHERE       5.1ms    0.59x
  SqlEach block, WHERE           4.1ms    0.47x  ← beats raw
  ─────────────────────────────────────────────────────
  Raw PRG loop, no WHERE         6.1ms    1.00x
  SqlEach block, no WHERE        3.8ms    0.62x  ← beats raw

SqlEach is faster than a hand-rolled `DO WHILE !Eof()` loop because
the per-row FieldGet in raw PRG still goes through a full Frame +
RTL dispatch, whereas SqlEach's FastFieldGetter captures the concrete
*dbf.DBFArea directly. The SQL abstraction now costs nothing — it
pays you to use it.

Validation:
  - FiveSql2 43/43
  - Harbour compat 51/51
  - go test ./... ALL PASS

Next step (not in this commit): FiveSql2 TSqlExecutor integration —
detect when five_SQL is called with a block argument and route to
SqlEach instead of SqlScan + array build.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-14 15:16:36 +09:00

383 lines
11 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
// Copyright (c) 2026 Charles KWON OhJun (charleskwonohjun@gmail.com)
// All rights reserved.
// Go-native SQL scan loop for FiveSql2 hot path.
//
// Motivation: FiveSql2 is a PRG-based SQL interpreter. For simple
// "SELECT cols FROM table WHERE cond" queries, the per-row cost is
// dominated by PRG interpreter overhead (AST tree walk, field name
// lookup, workarea switching). Moving just the inner scan loop to Go
// bypasses all that overhead and gets us ~15x speedup for the common
// case while keeping the rest of FiveSql2 untouched.
//
// The SQL engine remains responsible for:
// - Parsing SQL and building AST
// - Resolving field names to positions (column binding)
// - Compiling WHERE expression to pcode (via PcCompile)
// - GROUP BY, ORDER BY, aggregates (not per-row)
//
// This helper only handles the hot loop:
// - Full table scan (workarea already positioned)
// - Per-row WHERE evaluation via ExecPcode
// - Column extraction via cached field positions
// - Result array construction
package hbrtl
import (
"five/hbrdd"
"five/hbrdd/dbf"
"five/hbrt"
)
// SqlScan(aFieldPositions, pcWhere) → aRows
//
// Scans the current workarea top-to-bottom, evaluates pcWhere per row
// (nil = no filter), collects selected column values into rows.
//
// aFieldPositions: array of 1-based field positions to extract per row.
// Resolve once before calling (FieldPos cache is O(1)
// but still has PRG → Go call overhead).
// pcWhere: pcode function pointer from PcCompile, or NIL.
//
// Returns:
// Array of rows, each row = Array of field values.
//
// Notes on CHAR trimming: DBF character fields are space-padded. The
// caller decides whether to trim (via a SELECT-list AllTrim wrapper).
// We don't trim here — that's a semantic choice, and callers who need
// raw bytes shouldn't pay for a strings.TrimSpace().
func SqlScan(t *hbrt.Thread) {
t.Frame(2, 0)
defer t.EndProc()
// Parse arguments
fieldsVal := t.Local(1)
if !fieldsVal.IsArray() {
t.PushValue(hbrt.MakeArray(0))
t.RetValue()
return
}
fieldsArr := fieldsVal.AsArray().Items
nFields := len(fieldsArr)
whereVal := t.Local(2)
var whereFn *hbrt.PcodeFunc
if !whereVal.IsNil() {
if p := whereVal.AsPointer(); p != nil {
whereFn, _ = p.(*hbrt.PcodeFunc)
}
}
// Pre-convert field positions to []int (avoid Value->int per row)
fieldPos := make([]int, nFields)
for i := 0; i < nFields; i++ {
fieldPos[i] = int(fieldsArr[i].AsNumInt())
if fieldPos[i] < 1 {
fieldPos[i] = 1
}
}
wam, ok := t.WA.(*hbrdd.WorkAreaManager)
if !ok {
t.PushValue(hbrt.MakeArray(0))
t.RetValue()
return
}
area := wam.Current()
if area == nil {
t.PushValue(hbrt.MakeArray(0))
t.RetValue()
return
}
// Type-assert to concrete DBFArea once so the hot loop calls
// GoTop/EOF/Skip/GetValue directly on *dbf.DBFArea without paying
// the interface dispatch on every row. Falls back to the generic
// Area path for non-DBF drivers (rare in FiveSql2 context).
dbfArea, _ := area.(*dbf.DBFArea)
// SQLite-inspired: instead of one slice allocation per row, maintain
// a single flat backing buffer and hand each row a sub-slice into it.
// This halves allocations (row header + backing → just row header)
// and keeps row data contiguous in memory for better cache locality.
//
// Safety: we cap each sub-slice to exactly nFields via the 3-index
// slice form (flat[off:end:end]). Any later `append` on an individual
// row will then trigger a reallocation of that row's backing, so we
// don't clobber neighboring rows if PRG code mutates via AAdd.
// Size the initial backing based on the workarea's record count —
// even if WHERE filters most rows out, over-allocating beats five
// regrowths of a 200 KB buffer mid-scan.
estRows := 1024
if rc, err := area.RecCount(); err == nil && rc > 0 {
estRows = int(rc)
if estRows > 1 << 20 {
estRows = 1 << 20
}
}
rows := make([]hbrt.Value, 0, estRows)
flat := make([]hbrt.Value, 0, estRows*nFields)
slab := hbrt.NewArraySlab(estRows)
// Install the hot-path field getter so PcOpFieldGet in the compiled
// WHERE predicate bypasses PushSymbol + Function dispatch + the
// FieldGet RTL's own Frame. The closure captures the concrete
// DBFArea directly so there's no interface dispatch per access.
prevFG := t.FastFieldGetter
if dbfArea != nil {
t.FastFieldGetter = func(idx int) hbrt.Value {
v, _ := dbfArea.GetValue(idx - 1)
return v
}
} else {
t.FastFieldGetter = func(idx int) hbrt.Value {
v, _ := area.GetValue(idx - 1)
return v
}
}
defer func() { t.FastFieldGetter = prevFG }()
// Scan — four specialized loops. Two axes of specialization:
//
// DBF vs generic Area: devirtualization — Go inlines method calls
// on the concrete type but pays an interface
// dispatch on every call of the generic one.
//
// WHERE vs no-WHERE : branch hoisting — the no-WHERE case is a
// hot full-scan path (SELECT * or similar),
// where even the predictable `whereFn != nil`
// check and the `keep` shadow variable show
// up in pprof.
//
// Four combinations = four loop copies. Painful but each row save
// counts when we're reaching for raw RDD parity.
switch {
case dbfArea != nil && whereFn != nil:
dbfArea.GoTop()
for !dbfArea.EOF() {
hbrt.ExecPcodeFast(t, whereFn, nil)
if t.GetRetValue().AsBool() {
off := len(flat)
end := off + nFields
if end > cap(flat) {
flat = append(flat, make([]hbrt.Value, nFields)...)
} else {
flat = flat[:end]
}
row := flat[off:end:end]
for i := 0; i < nFields; i++ {
v, _ := dbfArea.GetValue(fieldPos[i] - 1)
row[i] = v
}
rows = append(rows, slab.WrapNext(row))
}
dbfArea.Skip(1)
}
case dbfArea != nil:
// DBF + no WHERE — tightest inner loop
dbfArea.GoTop()
for !dbfArea.EOF() {
off := len(flat)
end := off + nFields
if end > cap(flat) {
flat = append(flat, make([]hbrt.Value, nFields)...)
} else {
flat = flat[:end]
}
row := flat[off:end:end]
for i := 0; i < nFields; i++ {
v, _ := dbfArea.GetValue(fieldPos[i] - 1)
row[i] = v
}
rows = append(rows, slab.WrapNext(row))
dbfArea.Skip(1)
}
case whereFn != nil:
area.GoTop()
for !area.EOF() {
hbrt.ExecPcodeFast(t, whereFn, nil)
if t.GetRetValue().AsBool() {
off := len(flat)
end := off + nFields
if end > cap(flat) {
flat = append(flat, make([]hbrt.Value, nFields)...)
} else {
flat = flat[:end]
}
row := flat[off:end:end]
for i := 0; i < nFields; i++ {
v, _ := area.GetValue(fieldPos[i] - 1)
row[i] = v
}
rows = append(rows, slab.WrapNext(row))
}
area.Skip(1)
}
default:
area.GoTop()
for !area.EOF() {
off := len(flat)
end := off + nFields
if end > cap(flat) {
flat = append(flat, make([]hbrt.Value, nFields)...)
} else {
flat = flat[:end]
}
row := flat[off:end:end]
for i := 0; i < nFields; i++ {
v, _ := area.GetValue(fieldPos[i] - 1)
row[i] = v
}
rows = append(rows, slab.WrapNext(row))
area.Skip(1)
}
}
t.PushValue(hbrt.MakeArrayFrom(rows))
t.RetValue()
}
// SqlEach(aFieldPositions, pcWhere, bBlock) → NIL
//
// Streaming variant of SqlScan — instead of materializing all matching
// rows into a result array (which costs N HbArray allocations plus a
// second pass when the PRG caller iterates it), we invoke a user-provided
// code block once per matching row, passing the selected field values as
// block parameters.
//
// This is the Harbour block-iteration idiom (`AEval`, `AScan`) applied
// to SQL. Total heap traffic collapses to ~0 — no result rows, no slab,
// no flat value buffer. Per-row overhead becomes just (field reads +
// WHERE eval + block invoke).
//
// Expected to hit raw-RDD parity on end-to-end "SQL → user code" timing.
//
// Arguments:
// aFieldPositions: 1-based field positions to pass as block params
// pcWhere: compiled WHERE predicate, or NIL
// bBlock: code block receiving nFields positional params
func SqlEach(t *hbrt.Thread) {
t.Frame(3, 0)
defer t.EndProc()
fieldsVal := t.Local(1)
if !fieldsVal.IsArray() {
t.RetNil()
return
}
fieldsArr := fieldsVal.AsArray().Items
nFields := len(fieldsArr)
whereVal := t.Local(2)
var whereFn *hbrt.PcodeFunc
if !whereVal.IsNil() {
if p := whereVal.AsPointer(); p != nil {
whereFn, _ = p.(*hbrt.PcodeFunc)
}
}
blockVal := t.Local(3)
if !blockVal.IsBlock() {
t.RetNil()
return
}
blk := blockVal.AsBlock()
fieldPos := make([]int, nFields)
for i := 0; i < nFields; i++ {
fieldPos[i] = int(fieldsArr[i].AsNumInt())
if fieldPos[i] < 1 {
fieldPos[i] = 1
}
}
wam, ok := t.WA.(*hbrdd.WorkAreaManager)
if !ok {
t.RetNil()
return
}
area := wam.Current()
if area == nil {
t.RetNil()
return
}
dbfArea, _ := area.(*dbf.DBFArea)
// Install FastFieldGetter for the WHERE predicate's PcOpFieldGet ops
prevFG := t.FastFieldGetter
if dbfArea != nil {
t.FastFieldGetter = func(idx int) hbrt.Value {
v, _ := dbfArea.GetValue(idx - 1)
return v
}
} else {
t.FastFieldGetter = func(idx int) hbrt.Value {
v, _ := area.GetValue(idx - 1)
return v
}
}
defer func() { t.FastFieldGetter = prevFG }()
// Block eval protocol: push N args on the stack, set pendingParams,
// call blk.Fn(t). Matches what EvalBlock does inline, skipping the
// per-call `make([]Value, nArgs)` temp slice.
//
// Four specialized loops on {DBF, generic}×{WHERE, none}, same
// reasoning as SqlScan's loop split.
switch {
case dbfArea != nil && whereFn != nil:
dbfArea.GoTop()
for !dbfArea.EOF() {
hbrt.ExecPcodeFast(t, whereFn, nil)
if t.GetRetValue().AsBool() {
for i := 0; i < nFields; i++ {
v, _ := dbfArea.GetValue(fieldPos[i] - 1)
t.PushValue(v)
}
t.PendingParams2(nFields)
blk.Fn(t)
}
dbfArea.Skip(1)
}
case dbfArea != nil:
dbfArea.GoTop()
for !dbfArea.EOF() {
for i := 0; i < nFields; i++ {
v, _ := dbfArea.GetValue(fieldPos[i] - 1)
t.PushValue(v)
}
t.PendingParams2(nFields)
blk.Fn(t)
dbfArea.Skip(1)
}
case whereFn != nil:
area.GoTop()
for !area.EOF() {
hbrt.ExecPcodeFast(t, whereFn, nil)
if t.GetRetValue().AsBool() {
for i := 0; i < nFields; i++ {
v, _ := area.GetValue(fieldPos[i] - 1)
t.PushValue(v)
}
t.PendingParams2(nFields)
blk.Fn(t)
}
area.Skip(1)
}
default:
area.GoTop()
for !area.EOF() {
for i := 0; i < nFields; i++ {
v, _ := area.GetValue(fieldPos[i] - 1)
t.PushValue(v)
}
t.PendingParams2(nFields)
blk.Fn(t)
area.Skip(1)
}
}
t.RetNil()
}