From b1d89b97830bd984b4f2d78d77f7304dfe4086f0 Mon Sep 17 00:00:00 2001 From: CharlesKWON Date: Tue, 14 Apr 2026 14:03:03 +0900 Subject: [PATCH] =?UTF-8?q?perf(FiveSql2):=20PcOpFieldTrim=20fused=20peeph?= =?UTF-8?q?ole=20=E2=80=94=20string=20WHERE=20at=20raw=20RDD=20parity?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Second pcode peephole to match the one added for FieldGet(literal). SqlExprToPrg auto-wraps CHAR column references with AllTrim() to match SqlCmpEq's CHAR-padding trim semantics, so every string WHERE predicate evaluates `AllTrim(FieldGet(n)) == 'literal'` per row. Before this commit each of those per-row evaluations did: 1. PushSymbol ALLTRIM 2. PushSymbol FIELDGET → Function(1) [1 RTL Frame] 3. parseCharField → MakeString [alloc: copies raw bytes] 4. Function(1) → AllTrim RTL [1 RTL Frame] 5. strings.TrimSpace [alloc: new string] 6. Return, continue New opcode `PcOpFieldTrim ` (0x47) fuses the two RTL calls into a single opcode that: 1. Calls FastFieldGetter directly (no Frame/Function dispatch). 2. Walks the returned string with ASCII-space trim in place. 3. Pushes `s[lo:hi]` — a sub-slice, no new allocation. 4. Short-circuits back to the same string if no trim needed. genpc recognizes the shape `AllTrim(FieldGet())` in emitCall and emits the fused opcode automatically — no SQL-side API change. Matches the existing FieldGet peephole's shape. Bench impact (50k rows, 3-run steady state, vs raw RDD baseline 6.2ms): String WHERE before 7.9ms → after 6.2ms 1.00x (parity!) Numeric WHERE 6.9ms (unchanged) 1.11x No WHERE 9.1ms (unchanged) 1.47x String WHERE is now at parity with the raw Harbour-style RDD scan. Compared to session start (119ms), that's a 19x speedup. Validation: - FiveSql2 43/43 - Harbour compat 51/51 - go test ./... ALL PASS Co-Authored-By: Claude Opus 4.6 (1M context) --- compiler/genpc/genpc.go | 19 +++++++++++++++++++ hbrt/pcinterp.go | 35 +++++++++++++++++++++++++++++++++++ hbrt/pcode.go | 6 ++++++ 3 files changed, 60 insertions(+) diff --git a/compiler/genpc/genpc.go b/compiler/genpc/genpc.go index 28548a9..9523c45 100644 --- a/compiler/genpc/genpc.go +++ b/compiler/genpc/genpc.go @@ -498,6 +498,25 @@ func (g *generator) emitCall(e *ast.CallExpr) { } } } + // Peephole: AllTrim(FieldGet()) → PcOpFieldTrim . + // Fuses the character-field CHAR-trim normalization that + // SqlExprToPrg auto-wraps into one opcode, saving one Function + // dispatch + one intermediate string allocation per row. + if strings.EqualFold(ident.Name, "AllTrim") && len(e.Args) == 1 { + if inner, ok := e.Args[0].(*ast.CallExpr); ok { + if innerIdent, ok := inner.Func.(*ast.IdentExpr); ok && + strings.EqualFold(innerIdent.Name, "FieldGet") && + len(inner.Args) == 1 { + if lit, ok := inner.Args[0].(*ast.LiteralExpr); ok && lit.Kind == token.INT { + if n, err := strconv.Atoi(lit.Value); err == nil && n > 0 && n <= 0xFFFF { + g.emit(hbrt.PcOpFieldTrim) + g.emitU16(uint16(n)) + return + } + } + } + } + } g.emitString(hbrt.PcOpPushSymbol, strings.ToUpper(ident.Name)) g.emit(hbrt.PcOpPushNil) for _, arg := range e.Args { diff --git a/hbrt/pcinterp.go b/hbrt/pcinterp.go index adc02ba..975f3a5 100644 --- a/hbrt/pcinterp.go +++ b/hbrt/pcinterp.go @@ -174,6 +174,41 @@ func execPcodeBody(t *Thread, fn *PcodeFunc, mod *PcodeModule) { t.Function(1) } + // --- AllTrim(FieldGet(n)) fused peephole --- + case PcOpFieldTrim: + fIdx := int(binary.LittleEndian.Uint16(code[pc:])) + pc += 2 + // Fast path: use direct field getter, trim inline. + var v Value + if fg := t.FastFieldGetter; fg != nil { + v = fg(fIdx) + } else { + // Fallback: resolve via FIELDGET RTL + t.PushSymbol(t.VM().FindSymbol("FIELDGET")) + t.PushNil() + t.PushLong(int64(fIdx)) + t.Function(1) + v = t.Pop2() + } + if v.IsString() { + s := v.AsString() + // ASCII-space trim — DBF CHAR fields pad with 0x20 only + lo, hi := 0, len(s) + for lo < hi && s[lo] == ' ' { + lo++ + } + for hi > lo && s[hi-1] == ' ' { + hi-- + } + if lo == 0 && hi == len(s) { + t.PushValue(v) + } else { + t.PushString(s[lo:hi]) + } + } else { + t.PushValue(v) + } + // --- Function calls --- case PcOpPushSymbol: slen := int(binary.LittleEndian.Uint16(code[pc:])) diff --git a/hbrt/pcode.go b/hbrt/pcode.go index 1a16be5..e17219a 100644 --- a/hbrt/pcode.go +++ b/hbrt/pcode.go @@ -74,6 +74,12 @@ const ( // peephole optimization. Operand: uint16 1-based field position. PcOpFieldGet byte = 0x46 + // `AllTrim(FieldGet(n))` peephole — fetch the field, trim the + // result in place, push one string. Skips two Function dispatches + // (FieldGet + AllTrim) and one intermediate string allocation + // per invocation. Operand: uint16 1-based field position. + PcOpFieldTrim byte = 0x47 + // Self / OOP PcOpPushSelf byte = 0x48 PcOpPushSelfField byte = 0x49 // + uint16 len + name