Files
five/hbrt/pcinterp.go
CharlesKWON 29ca02e1bc fix(genpc,parser,pcinterp): pcode wider regression sweep (Tier 1 #3)
Six more silent miscompiles in the pcode path, all uncovered by a
new pcode regression sweep that exercises the full PRG surface a
dynamic FrbCompile body could legitimately use.

  * **xBase-keyword shadowing of variable names.** parseIdentStmt
    and parseExprStmt's fallback switches consumed an entire line
    when the leading IDENT matched LABEL / REPORT / ACCEPT / INPUT
    / NOTE / etc. Those words are also extremely common LOCAL /
    PRIVATE names — `LOCAL label ; label := "x"` had the
    assignment swallowed because the switch didn't peek at the
    next token. Both switches now look at peek(1): an assignment
    operator, [], (, -, ++, --, or `.` means it's a variable /
    call / member access, not the xBase command, and we fall
    through to expression parsing. Real silent bug — bit
    test_frb_pcode_sweep's `LOCAL label` declaration.

  * **`arr[i]` indexing not implemented in genpc.** ast.IndexExpr
    fell through to the default PushNil path, so any indexed read
    in a pcode-mode body returned NIL. New case emits the array,
    the index, and PcOpArrayPush (the get-op; PcOpArrayPop is the
    set-op — naming follows Harbour convention). Hashes go
    through the same opcode, which already special-cases
    IsHash() in ops_collection.go.

  * **Hash literals not implemented in genpc + dispatch missing
    in pcinterp.** `{ "k" => v, ... }` fell to PushNil. Added
    HashLitExpr emit (Push key, Push value pairs, then PcOpHashGen
    with count). Also wired up the PcOpHashGen dispatch in
    execPcodeBody — it had been declared in pcode.go since the
    initial design but the case statement was never added, so
    even hand-written modules couldn't use hashes.

  * **`x++` / `x--` postfix were silent no-ops.** PostfixExpr fell
    to PushNil and the surrounding ExprStmt then popped the NIL.
    DO WHILE loops with `n--` couldn't terminate; FOR loops with
    `i++` in the body were broken too. New case: PushLocal +
    LocalAddInt(±1).

  * **BlockExpr (`{|p| body }`) wasn't compiled.** Eval(b, n)
    inside a pcode body returned NIL. Added: build the body in a
    sub-codebuffer with the block's params occupying its locals,
    emit PcOpRetValue at the end, then PushBlock with the
    serialized bytes. Format extended with a uint16 nParams field
    so the runtime's PcOpPushBlock dispatch can set
    PcodeFunc.Params correctly — without it, ExecPcode's
    Frame(0, 0) pulled none of Eval's args and the block saw
    every parameter as NIL.

  * **All g.locals accesses were case-sensitive.** PRG is case-
    insensitive, but the pcode generator stored block params via
    strings.ToUpper while every other lookup site (function decl,
    mid-decl, ForStmt, IdentExpr read, AssignExpr write,
    PostfixExpr) used the raw .Name. So `{|x| x*x }` stored "X"
    but read "x" and missed. Normalized: all insertions and all
    lookups now go through strings.ToUpper.

  * **SeqExpr in pcode** — added the matching emit for comma-
    separated expression lists in code blocks (`{|| a, b, c }`).
    Same shape as the gengo SeqExpr case from Wave 1.

Test fixture: tests/frb/test_frb_pcode_sweep.prg covers 14 shapes
(string ops, arithmetic, comparison chains, array indexing, DO
WHILE with postfix, nested IF, IIf, hash literal + indexing,
block + Eval, character iteration). All 14 pass. Wired into the
FRB runner — suite now stands at 7/7.

Other gates green:
  go test ./...      : PASS
  FiveSql2 SQL:1999  : 43/43
  Harbour compat     : 56/56
  std.ch suite       : 15/15
  FRB suite          : 7/7

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-04 11:32:38 +09:00

338 lines
8.4 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
// Copyright (c) 2026 Charles KWON OhJun (charleskwonohjun@gmail.com)
// All rights reserved.
// Five pcode interpreter — executes pcode bytecode on a Thread.
// Each opcode directly calls the corresponding Thread method,
// so pcode execution is semantically identical to gengo-compiled code.
package hbrt
import (
"encoding/binary"
"fmt"
"math"
)
// ExecPcode runs a pcode function on the given thread.
// Full variant — installs a defer/recover so panics from inside the
// pcode body (HbError, BreakValue, user Break) are re-panicked with
// proper frame unwinding. Used for general-purpose pcode evaluation.
func ExecPcode(t *Thread, fn *PcodeFunc, mod *PcodeModule) {
t.Frame(fn.Params, fn.Locals)
defer t.EndProc()
execPcodeBody(t, fn, mod)
}
// ExecPcodeFast is a hot-path variant for short, pure expressions
// (FiveSql2 WHERE predicates, inline lambdas) where the caller has
// already guaranteed that the body will not panic with HbError /
// BreakValue. Skips the defer+recover dance in EndProc, saving ~15ns
// per call × tens of thousands of rows in scan loops.
//
// Contract: caller is responsible for panic discipline. If the pcode
// body panics, the frame stack is still cleaned up (EndProcFast) but
// no diagnostic is logged and SEQUENCE/RECOVER will not see the panic.
func ExecPcodeFast(t *Thread, fn *PcodeFunc, mod *PcodeModule) {
t.Frame(fn.Params, fn.Locals)
execPcodeBody(t, fn, mod)
t.EndProcFast()
}
// execPcodeBody is the shared opcode dispatch loop.
func execPcodeBody(t *Thread, fn *PcodeFunc, mod *PcodeModule) {
code := fn.Code
pc := 0 // program counter
for pc < len(code) {
op := code[pc]
pc++
switch op {
case PcOpNop:
// do nothing
// --- Stack ---
case PcOpPushNil:
t.PushNil()
case PcOpPushTrue:
t.PushBool(true)
case PcOpPushFalse:
t.PushBool(false)
case PcOpPushInt:
v := int64(binary.LittleEndian.Uint64(code[pc:]))
pc += 8
t.PushLong(v)
case PcOpPushDouble:
bits := binary.LittleEndian.Uint64(code[pc:])
pc += 8
t.PushDouble(math.Float64frombits(bits), 0, 0)
case PcOpPushString:
slen := int(binary.LittleEndian.Uint16(code[pc:]))
pc += 2
t.PushString(string(code[pc : pc+slen]))
pc += slen
case PcOpPushBool:
t.PushBool(code[pc] != 0)
pc++
case PcOpPushLocal:
idx := int(binary.LittleEndian.Uint16(code[pc:]))
pc += 2
t.PushLocal(idx)
case PcOpPushMemvar:
slen := int(binary.LittleEndian.Uint16(code[pc:]))
pc += 2
name := string(code[pc : pc+slen])
pc += slen
// Resolve through Memvars (PRIVATE shadows PUBLIC).
// Unknown names push NIL — matches Harbour behavior for
// undeclared memvars inside `&(expr)`.
if t.Memvars != nil {
if v, ok := t.Memvars.Get(name); ok {
t.push(v)
continue
}
}
t.PushNil()
case PcOpPopLocal:
idx := int(binary.LittleEndian.Uint16(code[pc:]))
pc += 2
t.PopLocal(idx)
case PcOpPop:
t.Pop()
case PcOpDup:
t.Dup()
// --- Arithmetic ---
case PcOpPlus:
t.Plus()
case PcOpMinus:
t.Minus()
case PcOpMult:
t.Mult()
case PcOpDivide:
t.Divide()
case PcOpMod:
t.Modulus()
case PcOpPower:
t.Power()
case PcOpNegate:
t.Negate()
// --- Comparison ---
case PcOpEqual:
t.Equal()
case PcOpNotEqual:
t.NotEqual()
case PcOpLess:
t.Less()
case PcOpGreater:
t.Greater()
case PcOpLessEq:
t.LessEqual()
case PcOpGreaterEq:
t.GreaterEqual()
case PcOpInString:
t.InString()
// --- Logical ---
case PcOpAnd:
t.And()
case PcOpOr:
t.Or()
case PcOpNot:
t.Not()
// --- Flow control ---
case PcOpJump:
offset := int32(binary.LittleEndian.Uint32(code[pc:]))
pc += 4
pc += int(offset)
case PcOpJumpFalse:
offset := int32(binary.LittleEndian.Uint32(code[pc:]))
pc += 4
if !t.PopLogical() {
pc += int(offset)
}
case PcOpJumpTrue:
offset := int32(binary.LittleEndian.Uint32(code[pc:]))
pc += 4
if t.PopLogical() {
pc += int(offset)
}
case PcOpReturn:
return
case PcOpRetValue:
t.RetValue()
return
// --- Frame ---
case PcOpFrame:
// Already called at function entry; skip if re-encountered
pc += 4 // params + locals
case PcOpEndProc:
return
// --- Workarea field access (peephole for FieldGet(literal)) ---
case PcOpFieldGet:
fIdx := int(binary.LittleEndian.Uint16(code[pc:]))
pc += 2
// Hot path — SqlScan plugs a direct field getter closure into
// t.FastFieldGetter before running the predicate, so we skip
// PushSymbol + Function dispatch + FieldGet RTL's own Frame.
if fg := t.FastFieldGetter; fg != nil {
t.PushValue(fg(fIdx))
} else {
// Generic fallback: resolve through RTL symbol table
t.PushSymbol(t.VM().FindSymbol("FIELDGET"))
t.PushNil()
t.PushLong(int64(fIdx))
t.Function(1)
}
// --- AllTrim(FieldGet(n)) fused peephole ---
case PcOpFieldTrim:
fIdx := int(binary.LittleEndian.Uint16(code[pc:]))
pc += 2
// Fast path: use direct field getter, trim inline.
var v Value
if fg := t.FastFieldGetter; fg != nil {
v = fg(fIdx)
} else {
// Fallback: resolve via FIELDGET RTL
t.PushSymbol(t.VM().FindSymbol("FIELDGET"))
t.PushNil()
t.PushLong(int64(fIdx))
t.Function(1)
v = t.Pop2()
}
if v.IsString() {
s := v.AsString()
// ASCII-space trim — DBF CHAR fields pad with 0x20 only
lo, hi := 0, len(s)
for lo < hi && s[lo] == ' ' {
lo++
}
for hi > lo && s[hi-1] == ' ' {
hi--
}
if lo == 0 && hi == len(s) {
t.PushValue(v)
} else {
t.PushString(s[lo:hi])
}
} else {
t.PushValue(v)
}
// --- Function calls ---
case PcOpPushSymbol:
slen := int(binary.LittleEndian.Uint16(code[pc:]))
pc += 2
name := string(code[pc : pc+slen])
pc += slen
sym := t.VM().FindSymbol(name)
t.PushSymbol(sym)
case PcOpPushNilArg:
t.PushNil()
case PcOpFunction:
nArgs := int(binary.LittleEndian.Uint16(code[pc:]))
pc += 2
t.Function(nArgs)
case PcOpDo:
nArgs := int(binary.LittleEndian.Uint16(code[pc:]))
pc += 2
t.Do(nArgs)
// --- Self / OOP ---
case PcOpPushSelf:
t.PushSelf()
case PcOpPushSelfField:
slen := int(binary.LittleEndian.Uint16(code[pc:]))
pc += 2
name := string(code[pc : pc+slen])
pc += slen
t.PushSelfField(name)
case PcOpSetSelfField:
slen := int(binary.LittleEndian.Uint16(code[pc:]))
pc += 2
name := string(code[pc : pc+slen])
pc += slen
t.SetSelfField(name)
case PcOpSend:
slen := int(binary.LittleEndian.Uint16(code[pc:]))
pc += 2
name := string(code[pc : pc+slen])
pc += slen
nArgs := int(binary.LittleEndian.Uint16(code[pc:]))
pc += 2
t.Send(name, nArgs)
// --- Array ---
case PcOpArrayGen:
count := int(binary.LittleEndian.Uint16(code[pc:]))
pc += 2
t.ArrayGen(count)
case PcOpArrayPush:
t.ArrayPush()
case PcOpArrayPop:
t.ArrayPop()
// --- Hash --- (PcOpHashGen has been declared since the
// initial pcode design but its dispatch case was missing,
// so any pcode body that built a hash literal panicked
// with "unknown pcode opcode: 0x51".)
case PcOpHashGen:
count := int(binary.LittleEndian.Uint16(code[pc:]))
pc += 2
t.HashGen(count)
// --- Block ---
case PcOpPushBlock:
codeLen := int(binary.LittleEndian.Uint32(code[pc:]))
pc += 4
blockCode := make([]byte, codeLen)
copy(blockCode, code[pc:pc+codeLen])
pc += codeLen
nParams := int(binary.LittleEndian.Uint16(code[pc:]))
pc += 2
nDetached := int(binary.LittleEndian.Uint16(code[pc:]))
pc += 2
// Create a Go function that interprets the block's pcode.
// Params count must be threaded through so ExecPcode's
// Frame() pulls Eval()'s args off the stack into the
// block's locals — without it, `{|x| x*x }` saw x=NIL
// and `x * x` panicked on the multiplication.
blockFn := &PcodeFunc{Code: blockCode, Params: nParams}
modCopy := mod
t.PushBlock(func(t2 *Thread) {
ExecPcode(t2, blockFn, modCopy)
}, nDetached)
// --- Local ops ---
case PcOpLocalAddInt:
idx := int(binary.LittleEndian.Uint16(code[pc:]))
pc += 2
val := int32(binary.LittleEndian.Uint32(code[pc:]))
pc += 4
t.LocalAddInt(idx, int64(val))
case PcOpInc:
t.Inc()
case PcOpDec:
t.Dec()
case PcOpPopLogical:
t.PopLogical()
case PcOpLine:
pc += 2 // skip line number (for debugging)
case PcOpHalt:
return
default:
panic(fmt.Sprintf("unknown pcode opcode: 0x%02X at pc=%d", op, pc-1))
}
}
}