perf(FiveSql2): PcOpFieldTrim fused peephole — string WHERE at raw RDD parity
Second pcode peephole to match the one added for FieldGet(literal). SqlExprToPrg auto-wraps CHAR column references with AllTrim() to match SqlCmpEq's CHAR-padding trim semantics, so every string WHERE predicate evaluates `AllTrim(FieldGet(n)) == 'literal'` per row. Before this commit each of those per-row evaluations did: 1. PushSymbol ALLTRIM 2. PushSymbol FIELDGET → Function(1) [1 RTL Frame] 3. parseCharField → MakeString [alloc: copies raw bytes] 4. Function(1) → AllTrim RTL [1 RTL Frame] 5. strings.TrimSpace [alloc: new string] 6. Return, continue New opcode `PcOpFieldTrim <idx>` (0x47) fuses the two RTL calls into a single opcode that: 1. Calls FastFieldGetter directly (no Frame/Function dispatch). 2. Walks the returned string with ASCII-space trim in place. 3. Pushes `s[lo:hi]` — a sub-slice, no new allocation. 4. Short-circuits back to the same string if no trim needed. genpc recognizes the shape `AllTrim(FieldGet(<int-literal>))` in emitCall and emits the fused opcode automatically — no SQL-side API change. Matches the existing FieldGet peephole's shape. Bench impact (50k rows, 3-run steady state, vs raw RDD baseline 6.2ms): String WHERE before 7.9ms → after 6.2ms 1.00x (parity!) Numeric WHERE 6.9ms (unchanged) 1.11x No WHERE 9.1ms (unchanged) 1.47x String WHERE is now at parity with the raw Harbour-style RDD scan. Compared to session start (119ms), that's a 19x speedup. Validation: - FiveSql2 43/43 - Harbour compat 51/51 - go test ./... ALL PASS Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -174,6 +174,41 @@ func execPcodeBody(t *Thread, fn *PcodeFunc, mod *PcodeModule) {
|
||||
t.Function(1)
|
||||
}
|
||||
|
||||
// --- AllTrim(FieldGet(n)) fused peephole ---
|
||||
case PcOpFieldTrim:
|
||||
fIdx := int(binary.LittleEndian.Uint16(code[pc:]))
|
||||
pc += 2
|
||||
// Fast path: use direct field getter, trim inline.
|
||||
var v Value
|
||||
if fg := t.FastFieldGetter; fg != nil {
|
||||
v = fg(fIdx)
|
||||
} else {
|
||||
// Fallback: resolve via FIELDGET RTL
|
||||
t.PushSymbol(t.VM().FindSymbol("FIELDGET"))
|
||||
t.PushNil()
|
||||
t.PushLong(int64(fIdx))
|
||||
t.Function(1)
|
||||
v = t.Pop2()
|
||||
}
|
||||
if v.IsString() {
|
||||
s := v.AsString()
|
||||
// ASCII-space trim — DBF CHAR fields pad with 0x20 only
|
||||
lo, hi := 0, len(s)
|
||||
for lo < hi && s[lo] == ' ' {
|
||||
lo++
|
||||
}
|
||||
for hi > lo && s[hi-1] == ' ' {
|
||||
hi--
|
||||
}
|
||||
if lo == 0 && hi == len(s) {
|
||||
t.PushValue(v)
|
||||
} else {
|
||||
t.PushString(s[lo:hi])
|
||||
}
|
||||
} else {
|
||||
t.PushValue(v)
|
||||
}
|
||||
|
||||
// --- Function calls ---
|
||||
case PcOpPushSymbol:
|
||||
slen := int(binary.LittleEndian.Uint16(code[pc:]))
|
||||
|
||||
@@ -74,6 +74,12 @@ const (
|
||||
// peephole optimization. Operand: uint16 1-based field position.
|
||||
PcOpFieldGet byte = 0x46
|
||||
|
||||
// `AllTrim(FieldGet(n))` peephole — fetch the field, trim the
|
||||
// result in place, push one string. Skips two Function dispatches
|
||||
// (FieldGet + AllTrim) and one intermediate string allocation
|
||||
// per invocation. Operand: uint16 1-based field position.
|
||||
PcOpFieldTrim byte = 0x47
|
||||
|
||||
// Self / OOP
|
||||
PcOpPushSelf byte = 0x48
|
||||
PcOpPushSelfField byte = 0x49 // + uint16 len + name
|
||||
|
||||
Reference in New Issue
Block a user