Files
five/hbrt/pcinterp.go
CharlesKWON b1d89b9783 perf(FiveSql2): PcOpFieldTrim fused peephole — string WHERE at raw RDD parity
Second pcode peephole to match the one added for FieldGet(literal).
SqlExprToPrg auto-wraps CHAR column references with AllTrim() to
match SqlCmpEq's CHAR-padding trim semantics, so every string WHERE
predicate evaluates `AllTrim(FieldGet(n)) == 'literal'` per row.

Before this commit each of those per-row evaluations did:
  1. PushSymbol ALLTRIM
  2. PushSymbol FIELDGET → Function(1)  [1 RTL Frame]
  3. parseCharField → MakeString       [alloc: copies raw bytes]
  4. Function(1) → AllTrim RTL         [1 RTL Frame]
  5. strings.TrimSpace                  [alloc: new string]
  6. Return, continue

New opcode `PcOpFieldTrim <idx>` (0x47) fuses the two RTL calls into
a single opcode that:
  1. Calls FastFieldGetter directly (no Frame/Function dispatch).
  2. Walks the returned string with ASCII-space trim in place.
  3. Pushes `s[lo:hi]` — a sub-slice, no new allocation.
  4. Short-circuits back to the same string if no trim needed.

genpc recognizes the shape `AllTrim(FieldGet(<int-literal>))` in
emitCall and emits the fused opcode automatically — no SQL-side
API change. Matches the existing FieldGet peephole's shape.

Bench impact (50k rows, 3-run steady state, vs raw RDD baseline 6.2ms):

  String WHERE          before 7.9ms → after 6.2ms   1.00x (parity!)
  Numeric WHERE         6.9ms (unchanged)            1.11x
  No WHERE              9.1ms (unchanged)            1.47x

String WHERE is now at parity with the raw Harbour-style RDD scan.
Compared to session start (119ms), that's a 19x speedup.

Validation:
  - FiveSql2 43/43
  - Harbour compat 51/51
  - go test ./... ALL PASS

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-14 14:03:03 +09:00

308 lines
7.3 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
// Copyright (c) 2026 Charles KWON OhJun (charleskwonohjun@gmail.com)
// All rights reserved.
// Five pcode interpreter — executes pcode bytecode on a Thread.
// Each opcode directly calls the corresponding Thread method,
// so pcode execution is semantically identical to gengo-compiled code.
package hbrt
import (
"encoding/binary"
"fmt"
"math"
)
// ExecPcode runs a pcode function on the given thread.
// Full variant — installs a defer/recover so panics from inside the
// pcode body (HbError, BreakValue, user Break) are re-panicked with
// proper frame unwinding. Used for general-purpose pcode evaluation.
func ExecPcode(t *Thread, fn *PcodeFunc, mod *PcodeModule) {
t.Frame(fn.Params, fn.Locals)
defer t.EndProc()
execPcodeBody(t, fn, mod)
}
// ExecPcodeFast is a hot-path variant for short, pure expressions
// (FiveSql2 WHERE predicates, inline lambdas) where the caller has
// already guaranteed that the body will not panic with HbError /
// BreakValue. Skips the defer+recover dance in EndProc, saving ~15ns
// per call × tens of thousands of rows in scan loops.
//
// Contract: caller is responsible for panic discipline. If the pcode
// body panics, the frame stack is still cleaned up (EndProcFast) but
// no diagnostic is logged and SEQUENCE/RECOVER will not see the panic.
func ExecPcodeFast(t *Thread, fn *PcodeFunc, mod *PcodeModule) {
t.Frame(fn.Params, fn.Locals)
execPcodeBody(t, fn, mod)
t.EndProcFast()
}
// execPcodeBody is the shared opcode dispatch loop.
func execPcodeBody(t *Thread, fn *PcodeFunc, mod *PcodeModule) {
code := fn.Code
pc := 0 // program counter
for pc < len(code) {
op := code[pc]
pc++
switch op {
case PcOpNop:
// do nothing
// --- Stack ---
case PcOpPushNil:
t.PushNil()
case PcOpPushTrue:
t.PushBool(true)
case PcOpPushFalse:
t.PushBool(false)
case PcOpPushInt:
v := int64(binary.LittleEndian.Uint64(code[pc:]))
pc += 8
t.PushLong(v)
case PcOpPushDouble:
bits := binary.LittleEndian.Uint64(code[pc:])
pc += 8
t.PushDouble(math.Float64frombits(bits), 0, 0)
case PcOpPushString:
slen := int(binary.LittleEndian.Uint16(code[pc:]))
pc += 2
t.PushString(string(code[pc : pc+slen]))
pc += slen
case PcOpPushBool:
t.PushBool(code[pc] != 0)
pc++
case PcOpPushLocal:
idx := int(binary.LittleEndian.Uint16(code[pc:]))
pc += 2
t.PushLocal(idx)
case PcOpPopLocal:
idx := int(binary.LittleEndian.Uint16(code[pc:]))
pc += 2
t.PopLocal(idx)
case PcOpPop:
t.Pop()
case PcOpDup:
t.Dup()
// --- Arithmetic ---
case PcOpPlus:
t.Plus()
case PcOpMinus:
t.Minus()
case PcOpMult:
t.Mult()
case PcOpDivide:
t.Divide()
case PcOpMod:
t.Modulus()
case PcOpPower:
t.Power()
case PcOpNegate:
t.Negate()
// --- Comparison ---
case PcOpEqual:
t.Equal()
case PcOpNotEqual:
t.NotEqual()
case PcOpLess:
t.Less()
case PcOpGreater:
t.Greater()
case PcOpLessEq:
t.LessEqual()
case PcOpGreaterEq:
t.GreaterEqual()
case PcOpInString:
t.InString()
// --- Logical ---
case PcOpAnd:
t.And()
case PcOpOr:
t.Or()
case PcOpNot:
t.Not()
// --- Flow control ---
case PcOpJump:
offset := int32(binary.LittleEndian.Uint32(code[pc:]))
pc += 4
pc += int(offset)
case PcOpJumpFalse:
offset := int32(binary.LittleEndian.Uint32(code[pc:]))
pc += 4
if !t.PopLogical() {
pc += int(offset)
}
case PcOpJumpTrue:
offset := int32(binary.LittleEndian.Uint32(code[pc:]))
pc += 4
if t.PopLogical() {
pc += int(offset)
}
case PcOpReturn:
return
case PcOpRetValue:
t.RetValue()
return
// --- Frame ---
case PcOpFrame:
// Already called at function entry; skip if re-encountered
pc += 4 // params + locals
case PcOpEndProc:
return
// --- Workarea field access (peephole for FieldGet(literal)) ---
case PcOpFieldGet:
fIdx := int(binary.LittleEndian.Uint16(code[pc:]))
pc += 2
// Hot path — SqlScan plugs a direct field getter closure into
// t.FastFieldGetter before running the predicate, so we skip
// PushSymbol + Function dispatch + FieldGet RTL's own Frame.
if fg := t.FastFieldGetter; fg != nil {
t.PushValue(fg(fIdx))
} else {
// Generic fallback: resolve through RTL symbol table
t.PushSymbol(t.VM().FindSymbol("FIELDGET"))
t.PushNil()
t.PushLong(int64(fIdx))
t.Function(1)
}
// --- AllTrim(FieldGet(n)) fused peephole ---
case PcOpFieldTrim:
fIdx := int(binary.LittleEndian.Uint16(code[pc:]))
pc += 2
// Fast path: use direct field getter, trim inline.
var v Value
if fg := t.FastFieldGetter; fg != nil {
v = fg(fIdx)
} else {
// Fallback: resolve via FIELDGET RTL
t.PushSymbol(t.VM().FindSymbol("FIELDGET"))
t.PushNil()
t.PushLong(int64(fIdx))
t.Function(1)
v = t.Pop2()
}
if v.IsString() {
s := v.AsString()
// ASCII-space trim — DBF CHAR fields pad with 0x20 only
lo, hi := 0, len(s)
for lo < hi && s[lo] == ' ' {
lo++
}
for hi > lo && s[hi-1] == ' ' {
hi--
}
if lo == 0 && hi == len(s) {
t.PushValue(v)
} else {
t.PushString(s[lo:hi])
}
} else {
t.PushValue(v)
}
// --- Function calls ---
case PcOpPushSymbol:
slen := int(binary.LittleEndian.Uint16(code[pc:]))
pc += 2
name := string(code[pc : pc+slen])
pc += slen
sym := t.VM().FindSymbol(name)
t.PushSymbol(sym)
case PcOpPushNilArg:
t.PushNil()
case PcOpFunction:
nArgs := int(binary.LittleEndian.Uint16(code[pc:]))
pc += 2
t.Function(nArgs)
case PcOpDo:
nArgs := int(binary.LittleEndian.Uint16(code[pc:]))
pc += 2
t.Do(nArgs)
// --- Self / OOP ---
case PcOpPushSelf:
t.PushSelf()
case PcOpPushSelfField:
slen := int(binary.LittleEndian.Uint16(code[pc:]))
pc += 2
name := string(code[pc : pc+slen])
pc += slen
t.PushSelfField(name)
case PcOpSetSelfField:
slen := int(binary.LittleEndian.Uint16(code[pc:]))
pc += 2
name := string(code[pc : pc+slen])
pc += slen
t.SetSelfField(name)
case PcOpSend:
slen := int(binary.LittleEndian.Uint16(code[pc:]))
pc += 2
name := string(code[pc : pc+slen])
pc += slen
nArgs := int(binary.LittleEndian.Uint16(code[pc:]))
pc += 2
t.Send(name, nArgs)
// --- Array ---
case PcOpArrayGen:
count := int(binary.LittleEndian.Uint16(code[pc:]))
pc += 2
t.ArrayGen(count)
case PcOpArrayPush:
t.ArrayPush()
case PcOpArrayPop:
t.ArrayPop()
// --- Block ---
case PcOpPushBlock:
codeLen := int(binary.LittleEndian.Uint32(code[pc:]))
pc += 4
blockCode := make([]byte, codeLen)
copy(blockCode, code[pc:pc+codeLen])
pc += codeLen
nDetached := int(binary.LittleEndian.Uint16(code[pc:]))
pc += 2
// Create a Go function that interprets the block's pcode
blockFn := &PcodeFunc{Code: blockCode}
modCopy := mod
t.PushBlock(func(t2 *Thread) {
ExecPcode(t2, blockFn, modCopy)
}, nDetached)
// --- Local ops ---
case PcOpLocalAddInt:
idx := int(binary.LittleEndian.Uint16(code[pc:]))
pc += 2
val := int32(binary.LittleEndian.Uint32(code[pc:]))
pc += 4
t.LocalAddInt(idx, int64(val))
case PcOpInc:
t.Inc()
case PcOpDec:
t.Dec()
case PcOpPopLogical:
t.PopLogical()
case PcOpLine:
pc += 2 // skip line number (for debugging)
case PcOpHalt:
return
default:
panic(fmt.Sprintf("unknown pcode opcode: 0x%02X at pc=%d", op, pc-1))
}
}
}