perf(sqlscan): specialize four loop variants (DBF×WHERE matrix)

SqlScan's inner scan was written as a single loop with `if whereFn
!= nil` and a `keep` shadow variable. Branch-predictable for sure,
but still a few extra ops per row and it prevented Go from inlining
the non-nil interface call on the Area branch.

Split into four specialized loop bodies on the two axes that drive
per-row cost:

  1. dbfArea != nil && whereFn != nil
  2. dbfArea != nil && whereFn == nil       ← tightest path (SELECT *)
  3. dbfArea == nil && whereFn != nil       ← generic Area
  4. dbfArea == nil && whereFn == nil

Each body has exactly the instructions it needs — no dead branches,
no shadow variables, no interface dispatch where avoidable. Copy-paste
cost is real but each row save adds up at 50k iterations.

Bench impact (50k rows, 3-run steady state):

  No WHERE            9.1ms → 8.7ms   1.38x vs raw (was 1.47x)
  Numeric WHERE       6.9ms → 7.0ms   ~flat (within noise)
  String WHERE        6.2ms → 6.4ms   ~flat (within noise)
  Raw RDD             6.3ms baseline

Validation:
  - FiveSql2 43/43
  - Harbour compat 51/51
  - go test ./hbrtl/... PASS

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-04-14 14:04:48 +09:00
parent b1d89b9783
commit 5dd212c761

View File

@@ -138,20 +138,26 @@ func SqlScan(t *hbrt.Thread) {
}
defer func() { t.FastFieldGetter = prevFG }()
// Scan — dispatch two nearly-identical loops for devirtualization.
// The DBF branch is the common case; Go's compiler inlines the
// direct method calls, whereas the generic Area branch pays one
// interface dispatch per call as before.
if dbfArea != nil {
// Scan — four specialized loops. Two axes of specialization:
//
// DBF vs generic Area: devirtualization — Go inlines method calls
// on the concrete type but pays an interface
// dispatch on every call of the generic one.
//
// WHERE vs no-WHERE : branch hoisting — the no-WHERE case is a
// hot full-scan path (SELECT * or similar),
// where even the predictable `whereFn != nil`
// check and the `keep` shadow variable show
// up in pprof.
//
// Four combinations = four loop copies. Painful but each row save
// counts when we're reaching for raw RDD parity.
switch {
case dbfArea != nil && whereFn != nil:
dbfArea.GoTop()
for !dbfArea.EOF() {
keep := true
if whereFn != nil {
hbrt.ExecPcodeFast(t, whereFn, nil)
keep = t.GetRetValue().AsBool()
}
if keep {
hbrt.ExecPcodeFast(t, whereFn, nil)
if t.GetRetValue().AsBool() {
off := len(flat)
end := off + nFields
if end > cap(flat) {
@@ -166,19 +172,32 @@ func SqlScan(t *hbrt.Thread) {
}
rows = append(rows, slab.WrapNext(row))
}
dbfArea.Skip(1)
}
} else {
case dbfArea != nil:
// DBF + no WHERE — tightest inner loop
dbfArea.GoTop()
for !dbfArea.EOF() {
off := len(flat)
end := off + nFields
if end > cap(flat) {
flat = append(flat, make([]hbrt.Value, nFields)...)
} else {
flat = flat[:end]
}
row := flat[off:end:end]
for i := 0; i < nFields; i++ {
v, _ := dbfArea.GetValue(fieldPos[i] - 1)
row[i] = v
}
rows = append(rows, slab.WrapNext(row))
dbfArea.Skip(1)
}
case whereFn != nil:
area.GoTop()
for !area.EOF() {
keep := true
if whereFn != nil {
hbrt.ExecPcodeFast(t, whereFn, nil)
keep = t.GetRetValue().AsBool()
}
if keep {
hbrt.ExecPcodeFast(t, whereFn, nil)
if t.GetRetValue().AsBool() {
off := len(flat)
end := off + nFields
if end > cap(flat) {
@@ -193,7 +212,24 @@ func SqlScan(t *hbrt.Thread) {
}
rows = append(rows, slab.WrapNext(row))
}
area.Skip(1)
}
default:
area.GoTop()
for !area.EOF() {
off := len(flat)
end := off + nFields
if end > cap(flat) {
flat = append(flat, make([]hbrt.Value, nFields)...)
} else {
flat = flat[:end]
}
row := flat[off:end:end]
for i := 0; i < nFields; i++ {
v, _ := area.GetValue(fieldPos[i] - 1)
row[i] = v
}
rows = append(rows, slab.WrapNext(row))
area.Skip(1)
}
}