Files
five/compiler/genpc/genpc.go
CharlesKWON efb615bed9 fix(frb,genpc): in-process compile + 4 pcode bugs
Compiling _FiveSql2/test/test_sql_extreme.prg + a sweep of the FRB
demos surfaced four real bugs in the dynamic-compilation pipeline.
All fixes shipped together because they were on the same critical
path; each is independently revertible.

  * **pcode FOR loop ignored STEP and direction.** emitFor in
    compiler/genpc emitted a fixed `<= to` comparison and a hardcoded
    `+1` increment, then deleted the actual step expression with
    slice arithmetic on the byte buffer. Result: `FOR 5 TO 1 STEP
    -1` exited on the first iteration; `FOR 1 TO 10 STEP 2` summed
    1..10 (55) instead of 1+3+5+7+9 (25). Rewritten to mirror
    gengo's emitFor: detect negative step from a literal `-N` or
    unary MINUS, pick `<=` vs `>=` accordingly, and emit a clean
    `var := var + step` increment per iteration.

  * **pcode compound `+=` operator stored only the RHS.** emitAssign
    looked at AssignExpr.Op only for the := case; +=/-=/etc.
    silently took the same path, so `n += i` compiled as `n := i`,
    discarding the accumulator. Loop reduces were wrong: `Reverse`
    returned "" and `n := 0; FOR i ... n += i; NEXT` returned only
    the last increment. New compoundBinOp helper maps PLUSEQ /
    MINUSEQ / STAREQ / SLASHEQ / PERCENTEQ / POWEREQ to their
    matching binary opcode; emitAssign emits `local + rhs ; pop
    local` for compound forms.

  * **Pcode body stack leaks polluted the caller's frame.** A pcode
    function whose body left intermediate values on the data stack
    (FOR control values, etc.) returned with extra entries past
    its declared retVal. FrbDoFunc / FrbExecFunc / FrbRunFunc then
    pushed retVal on top of those leaks, so the caller saw the
    leaked values where its own preceding arguments should have
    been: `? "Fibonacci(10) =", FrbDo(...), "(expect 55)"` printed
    `1 55 (expect 55)` because the FOR loop's `1` lived in arg-1's
    slot. Two new Thread methods (`SP()` / `SetSP(int)`) let the
    three FRB dispatchers snapshot stack depth before the inner
    call and clamp it back afterward, so the leaks evaporate before
    they reach the caller's frame.

  * **FrbExec / FrbRun recursed into the host's Main forever.** Both
    looked up "MAIN" via t.VM().FindSymbol, which always resolved
    to the OUTER program's Main since FRB modules deliberately keep
    Main local. Compile + run + unload became compile + recurse +
    OOM. Both now look up Main via mod.FindFunc("MAIN") (module
    scope) — Frbload's policy of leaving Main module-local now
    actually has the intended effect.

Plus an architectural improvement: in-memory compilation no longer
depends on shelling out to an external `five` binary. New
hbrtl.frbCompileInProc parses + preprocesses + generates pcode in
process, building a FrbModule directly. FrbCompile and FrbExec use
this exclusively, which means dynamic compilation works from any
directory regardless of PATH and without a second process. The
plugin-mode path (with its runtime-version-mismatch fragility) is
left available via hbrt.FrbCompileSource for callers that want it,
but FrbCompile no longer reaches for it by default.

Test suite: tests/frb/ holds five fixtures + a runner. 5/5 pass:
test_frb_simple / test_frb_pcode_load / test_frb_compile /
test_frb_loop / test_frb_step.

Other gates green:
  go test ./...      : PASS
  FiveSql2 SQL:1999  : 43/43
  Harbour compat     : 56/56
  std.ch suite       : 14/14
  FRB suite          : 5/5

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-02 10:25:35 +09:00

677 lines
16 KiB
Go

// Copyright (c) 2026 Charles KWON OhJun (charleskwonohjun@gmail.com)
// All rights reserved.
// genpc — Five pcode generator. Compiles AST to bytecode for FRB interpreter mode.
// Mirrors gengo's logic but emits bytecode opcodes instead of Go source code.
package genpc
import (
"encoding/binary"
"five/compiler/ast"
"five/compiler/token"
"five/hbrt"
"math"
"strconv"
"strings"
)
// Generate compiles an AST file to a PcodeModule.
func Generate(file *ast.File) *hbrt.PcodeModule {
g := &generator{
mod: &hbrt.PcodeModule{
Name: file.Name,
Funcs: make(map[string]*hbrt.PcodeFunc),
},
}
for _, d := range file.Decls {
switch decl := d.(type) {
case *ast.FuncDecl:
g.emitFunc(decl)
}
}
return g.mod
}
// CompileExpr compiles a single expression AST to a standalone PcodeFunc
// that, when executed, leaves the expression's value on the stack as a
// return value. Used by FiveSql2 for prepared-statement-style caching:
// compile WHERE / SELECT expressions once per query, execute per row.
//
// The returned function takes zero parameters and zero locals.
// Caller provides field access context via the current workarea.
func CompileExpr(expr ast.Expr) *hbrt.PcodeFunc {
g := &generator{
mod: &hbrt.PcodeModule{Funcs: make(map[string]*hbrt.PcodeFunc)},
locals: make(map[string]int),
}
// Note: ExecPcode emits its own Frame/EndProc around this code.
// We just emit the expression evaluation + RetValue.
g.emitExpr(expr)
g.emit(hbrt.PcOpRetValue)
return &hbrt.PcodeFunc{
Name: "_EXPR",
Code: g.code,
Params: 0,
Locals: 0,
}
}
type generator struct {
mod *hbrt.PcodeModule
code []byte
locals map[string]int
}
func (g *generator) emit(b ...byte) {
g.code = append(g.code, b...)
}
func (g *generator) emitU16(v uint16) {
var buf [2]byte
binary.LittleEndian.PutUint16(buf[:], v)
g.code = append(g.code, buf[:]...)
}
func (g *generator) emitI32(v int32) {
var buf [4]byte
binary.LittleEndian.PutUint32(buf[:], uint32(v))
g.code = append(g.code, buf[:]...)
}
func (g *generator) emitI64(v int64) {
var buf [8]byte
binary.LittleEndian.PutUint64(buf[:], uint64(v))
g.code = append(g.code, buf[:]...)
}
func (g *generator) emitF64(v float64) {
var buf [8]byte
binary.LittleEndian.PutUint64(buf[:], math.Float64bits(v))
g.code = append(g.code, buf[:]...)
}
func (g *generator) emitString(op byte, s string) {
g.emit(op)
g.emitU16(uint16(len(s)))
g.code = append(g.code, []byte(s)...)
}
func (g *generator) pc() int {
return len(g.code)
}
// placeholder for jump offset, returns position to patch
func (g *generator) emitJumpPlaceholder(op byte) int {
g.emit(op)
pos := g.pc()
g.emitI32(0) // placeholder
return pos
}
func (g *generator) patchJump(pos int) {
offset := int32(g.pc() - pos - 4) // relative to after the offset bytes
binary.LittleEndian.PutUint32(g.code[pos:], uint32(offset))
}
// --- Function ---
func (g *generator) emitFunc(fn *ast.FuncDecl) {
g.code = nil
g.locals = make(map[string]int)
// Build local map
idx := 1
for _, p := range fn.Params {
g.locals[p.Name] = idx
idx++
}
for _, d := range fn.Decls {
if vd, ok := d.(*ast.VarDecl); ok && vd.Scope == ast.ScopeLocal {
for _, v := range vd.Vars {
g.locals[v.Name] = idx
idx++
}
}
}
for _, s := range fn.Body {
if vd, ok := s.(*ast.VarDecl); ok && vd.Scope == ast.ScopeLocal {
for _, v := range vd.Vars {
g.locals[v.Name] = idx
idx++
}
}
}
nLocals := idx - 1 - len(fn.Params)
// Emit LOCAL initializers
localIdx := len(fn.Params) + 1
for _, d := range fn.Decls {
vd, ok := d.(*ast.VarDecl)
if !ok || vd.Scope != ast.ScopeLocal {
continue
}
for _, v := range vd.Vars {
if v.Init != nil {
g.emitExpr(v.Init)
g.emit(hbrt.PcOpPopLocal)
g.emitU16(uint16(localIdx))
}
localIdx++
}
}
// Emit body
for _, s := range fn.Body {
g.emitStmt(s)
}
// Implicit return NIL
g.emit(hbrt.PcOpPushNil)
g.emit(hbrt.PcOpRetValue)
pf := &hbrt.PcodeFunc{
Name: fn.Name,
Code: make([]byte, len(g.code)),
Params: len(fn.Params),
Locals: nLocals,
}
copy(pf.Code, g.code)
g.mod.Funcs[strings.ToUpper(fn.Name)] = pf
}
// --- Statements ---
func (g *generator) emitStmt(stmt ast.Stmt) {
switch s := stmt.(type) {
case *ast.ReturnStmt:
if s.Value != nil {
g.emitExpr(s.Value)
g.emit(hbrt.PcOpRetValue)
} else {
g.emit(hbrt.PcOpPushNil)
g.emit(hbrt.PcOpRetValue)
}
case *ast.ExprStmt:
if assign, ok := s.X.(*ast.AssignExpr); ok {
g.emitAssign(assign)
} else if call, ok := s.X.(*ast.CallExpr); ok {
g.emitCallStmt(call)
} else {
g.emitExpr(s.X)
g.emit(hbrt.PcOpPop)
}
case *ast.IfStmt:
g.emitIf(s)
case *ast.DoWhileStmt:
g.emitDoWhile(s)
case *ast.ForStmt:
g.emitFor(s)
case *ast.ExitStmt:
// handled by loop
g.emit(hbrt.PcOpHalt) // placeholder
case *ast.QOutStmt:
g.emitQOut(s)
case *ast.VarDecl:
// Mid-function LOCAL
for _, v := range s.Vars {
if v.Init != nil {
g.emitExpr(v.Init)
if idx, ok := g.locals[v.Name]; ok {
g.emit(hbrt.PcOpPopLocal)
g.emitU16(uint16(idx))
} else {
g.emit(hbrt.PcOpPop)
}
}
}
default:
// Unsupported statement — skip
}
}
func (g *generator) emitIf(s *ast.IfStmt) {
g.emitExpr(s.Cond)
jumpFalse := g.emitJumpPlaceholder(hbrt.PcOpJumpFalse)
for _, stmt := range s.Body {
g.emitStmt(stmt)
}
if len(s.ElseIfs) > 0 || len(s.ElseBody) > 0 {
jumpEnd := g.emitJumpPlaceholder(hbrt.PcOpJump)
g.patchJump(jumpFalse)
for _, elif := range s.ElseIfs {
g.emitExpr(elif.Cond)
nextJump := g.emitJumpPlaceholder(hbrt.PcOpJumpFalse)
for _, stmt := range elif.Body {
g.emitStmt(stmt)
}
jumpEnd2 := g.emitJumpPlaceholder(hbrt.PcOpJump)
g.patchJump(nextJump)
_ = jumpEnd2 // will be patched by end
}
for _, stmt := range s.ElseBody {
g.emitStmt(stmt)
}
g.patchJump(jumpEnd)
} else {
g.patchJump(jumpFalse)
}
}
func (g *generator) emitDoWhile(s *ast.DoWhileStmt) {
loopStart := g.pc()
for _, stmt := range s.Body {
g.emitStmt(stmt)
}
g.emitExpr(s.Cond)
// Jump back if true
g.emit(hbrt.PcOpJumpTrue)
offset := int32(loopStart - g.pc() - 4)
g.emitI32(offset)
}
func (g *generator) emitFor(s *ast.ForStmt) {
idx, ok := g.locals[s.Var]
if !ok {
return
}
// Init: var := start
g.emitExpr(s.Start)
g.emit(hbrt.PcOpPopLocal)
g.emitU16(uint16(idx))
// Detect step direction statically (matches gengo's emitFor):
// * no Step → +1, ascending
// * literal -N → descending
// * unary MINUS → descending
// Anything else (variable, expression) defaults to ascending.
// Without this we always emitted `var <= to`, which made `FOR
// 5 TO 1 STEP -1` exit on the first iteration; and we always
// stepped by hardcoded +1, which made `FOR i := 1 TO 10 STEP
// 2` summed 1+2+...+10 (55) instead of 1+3+5+7+9 (25).
negStep := false
if s.Step != nil {
if lit, ok := s.Step.(*ast.LiteralExpr); ok {
if lit.Kind == token.INT && len(lit.Value) > 0 && lit.Value[0] == '-' {
negStep = true
}
}
if un, ok := s.Step.(*ast.UnaryExpr); ok && un.Op == token.MINUS {
negStep = true
}
}
loopStart := g.pc()
// Comparison: ascending → var <= to; descending → var >= to.
g.emit(hbrt.PcOpPushLocal)
g.emitU16(uint16(idx))
g.emitExpr(s.To)
if negStep {
g.emit(hbrt.PcOpGreaterEq)
} else {
g.emit(hbrt.PcOpLessEq)
}
jumpOut := g.emitJumpPlaceholder(hbrt.PcOpJumpFalse)
// Body
for _, stmt := range s.Body {
g.emitStmt(stmt)
}
// Increment: var := var + step (re-evaluating step per iter is
// fine; constant-folding can hoist it later). Push var, push
// step, add, store back.
g.emit(hbrt.PcOpPushLocal)
g.emitU16(uint16(idx))
if s.Step != nil {
g.emitExpr(s.Step)
} else {
g.emit(hbrt.PcOpPushInt)
g.emitI64(1)
}
g.emit(hbrt.PcOpPlus)
g.emit(hbrt.PcOpPopLocal)
g.emitU16(uint16(idx))
// Jump back to comparison
g.emit(hbrt.PcOpJump)
g.emitI32(int32(loopStart - g.pc() - 4))
g.patchJump(jumpOut)
}
func (g *generator) emitQOut(s *ast.QOutStmt) {
sym := "QOUT"
if s.IsQQ {
sym = "QQOUT"
}
g.emitString(hbrt.PcOpPushSymbol, sym)
g.emit(hbrt.PcOpPushNil)
for _, expr := range s.Exprs {
g.emitExpr(expr)
}
g.emit(hbrt.PcOpFunction)
g.emitU16(uint16(len(s.Exprs)))
}
// --- Expressions ---
func (g *generator) emitExpr(expr ast.Expr) {
switch e := expr.(type) {
case *ast.LiteralExpr:
switch e.Kind {
case token.INT:
g.emit(hbrt.PcOpPushInt)
v := parseInt64(e.Value)
g.emitI64(v)
case token.DOUBLE:
g.emit(hbrt.PcOpPushDouble)
v := parseFloat64(e.Value)
g.emitF64(v)
case token.STRING:
g.emitString(hbrt.PcOpPushString, e.Value)
case token.TRUE:
g.emit(hbrt.PcOpPushTrue)
case token.FALSE:
g.emit(hbrt.PcOpPushFalse)
case token.NIL_LIT:
g.emit(hbrt.PcOpPushNil)
}
case *ast.IdentExpr:
upper := strings.ToUpper(e.Name)
if upper == "SELF" {
g.emit(hbrt.PcOpPushSelf)
return
}
if idx, ok := g.locals[e.Name]; ok {
g.emit(hbrt.PcOpPushLocal)
g.emitU16(uint16(idx))
} else {
// Unknown at compile time → runtime memvar lookup. This
// makes `&(expr)` and the debugger's `p` see PRIVATEs
// (including the frame-local injection the debugger does).
g.emitString(hbrt.PcOpPushMemvar, upper)
}
case *ast.BinaryExpr:
g.emitExpr(e.Left)
g.emitExpr(e.Right)
g.emitBinaryOp(e.Op)
case *ast.UnaryExpr:
g.emitExpr(e.X)
switch e.Op {
case token.MINUS:
g.emit(hbrt.PcOpNegate)
case token.NOT:
g.emit(hbrt.PcOpNot)
}
case *ast.CallExpr:
g.emitCall(e)
case *ast.IIfExpr:
g.emitExpr(e.Cond)
jumpFalse := g.emitJumpPlaceholder(hbrt.PcOpJumpFalse)
g.emitExpr(e.True)
jumpEnd := g.emitJumpPlaceholder(hbrt.PcOpJump)
g.patchJump(jumpFalse)
g.emitExpr(e.False)
g.patchJump(jumpEnd)
case *ast.SelfExpr:
g.emit(hbrt.PcOpPushSelf)
case *ast.SendExpr:
g.emitExpr(e.Object)
if e.HasParens {
for _, arg := range e.Args {
g.emitExpr(arg)
}
g.emitString(hbrt.PcOpSend, strings.ToUpper(e.Method))
g.emitU16(uint16(len(e.Args)))
} else {
if _, isSelf := e.Object.(*ast.SelfExpr); isSelf {
// Replace with PushSelfField (pop the self we pushed)
g.code = g.code[:len(g.code)] // keep self on stack... actually use dedicated op
g.emit(hbrt.PcOpPop) // remove self
g.emitString(hbrt.PcOpPushSelfField, strings.ToUpper(e.Method))
}
}
case *ast.ArrayLitExpr:
for _, item := range e.Items {
g.emitExpr(item)
}
g.emit(hbrt.PcOpArrayGen)
g.emitU16(uint16(len(e.Items)))
default:
g.emit(hbrt.PcOpPushNil) // fallback
}
}
func (g *generator) emitBinaryOp(op token.Kind) {
switch op {
case token.PLUS:
g.emit(hbrt.PcOpPlus)
case token.MINUS:
g.emit(hbrt.PcOpMinus)
case token.STAR:
g.emit(hbrt.PcOpMult)
case token.SLASH:
g.emit(hbrt.PcOpDivide)
case token.PERCENT:
g.emit(hbrt.PcOpMod)
case token.POWER:
g.emit(hbrt.PcOpPower)
case token.EQ, token.EXEQ:
g.emit(hbrt.PcOpEqual)
case token.NEQ:
g.emit(hbrt.PcOpNotEqual)
case token.LT:
g.emit(hbrt.PcOpLess)
case token.GT:
g.emit(hbrt.PcOpGreater)
case token.LTE:
g.emit(hbrt.PcOpLessEq)
case token.GTE:
g.emit(hbrt.PcOpGreaterEq)
case token.AND:
g.emit(hbrt.PcOpAnd)
case token.OR:
g.emit(hbrt.PcOpOr)
case token.DOLLAR:
g.emit(hbrt.PcOpInString)
}
}
func (g *generator) emitCall(e *ast.CallExpr) {
if ident, ok := e.Func.(*ast.IdentExpr); ok {
// Peephole: FieldGet(<int literal>) → PcOpFieldGet <idx>.
// Skips the entire PushSymbol + Function + Frame + RTL path in
// favor of a direct workarea field access. Huge win for WHERE
// predicates on scan loops where this is the per-row hot op.
if strings.EqualFold(ident.Name, "FieldGet") && len(e.Args) == 1 {
if lit, ok := e.Args[0].(*ast.LiteralExpr); ok && lit.Kind == token.INT {
if n, err := strconv.Atoi(lit.Value); err == nil && n > 0 && n <= 0xFFFF {
g.emit(hbrt.PcOpFieldGet)
g.emitU16(uint16(n))
return
}
}
}
// Peephole: AllTrim(FieldGet(<int literal>)) → PcOpFieldTrim <idx>.
// Fuses the character-field CHAR-trim normalization that
// SqlExprToPrg auto-wraps into one opcode, saving one Function
// dispatch + one intermediate string allocation per row.
if strings.EqualFold(ident.Name, "AllTrim") && len(e.Args) == 1 {
if inner, ok := e.Args[0].(*ast.CallExpr); ok {
if innerIdent, ok := inner.Func.(*ast.IdentExpr); ok &&
strings.EqualFold(innerIdent.Name, "FieldGet") &&
len(inner.Args) == 1 {
if lit, ok := inner.Args[0].(*ast.LiteralExpr); ok && lit.Kind == token.INT {
if n, err := strconv.Atoi(lit.Value); err == nil && n > 0 && n <= 0xFFFF {
g.emit(hbrt.PcOpFieldTrim)
g.emitU16(uint16(n))
return
}
}
}
}
}
g.emitString(hbrt.PcOpPushSymbol, strings.ToUpper(ident.Name))
g.emit(hbrt.PcOpPushNil)
for _, arg := range e.Args {
g.emitExpr(arg)
}
g.emit(hbrt.PcOpFunction)
g.emitU16(uint16(len(e.Args)))
} else {
g.emitExpr(e.Func)
for _, arg := range e.Args {
g.emitExpr(arg)
}
g.emit(hbrt.PcOpDo)
g.emitU16(uint16(len(e.Args)))
}
}
func (g *generator) emitCallStmt(e *ast.CallExpr) {
if ident, ok := e.Func.(*ast.IdentExpr); ok {
g.emitString(hbrt.PcOpPushSymbol, strings.ToUpper(ident.Name))
g.emit(hbrt.PcOpPushNil)
for _, arg := range e.Args {
g.emitExpr(arg)
}
g.emit(hbrt.PcOpDo)
g.emitU16(uint16(len(e.Args)))
} else {
g.emitExpr(e.Func)
for _, arg := range e.Args {
g.emitExpr(arg)
}
g.emit(hbrt.PcOpDo)
g.emitU16(uint16(len(e.Args)))
}
}
func (g *generator) emitAssign(a *ast.AssignExpr) {
// Compound operators (+=, -=, *=, /=, %=, ^=) need to fold the
// existing left-hand value with the right. Without this they got
// emitted as plain `:=`, dropping the accumulator: `n += i`
// behaved as `n := i`. So the FOR loop reduce idiom (e.g.
// `n := 0 ; FOR i := 1 TO 10 ; n += i ; NEXT`) returned only
// the LAST iteration's increment.
if a.Op != token.ASSIGN {
op, ok := compoundBinOp(a.Op)
if ok {
if ident, isIdent := a.Left.(*ast.IdentExpr); isIdent {
if idx, found := g.locals[ident.Name]; found {
g.emit(hbrt.PcOpPushLocal)
g.emitU16(uint16(idx))
g.emitExpr(a.Right)
g.emit(op)
g.emit(hbrt.PcOpPopLocal)
g.emitU16(uint16(idx))
return
}
}
}
}
if ident, ok := a.Left.(*ast.IdentExpr); ok {
if idx, found := g.locals[ident.Name]; found {
g.emitExpr(a.Right)
g.emit(hbrt.PcOpPopLocal)
g.emitU16(uint16(idx))
return
}
}
// Self field assignment
if send, ok := a.Left.(*ast.SendExpr); ok {
if _, isSelf := send.Object.(*ast.SelfExpr); isSelf {
g.emitExpr(a.Right)
g.emitString(hbrt.PcOpSetSelfField, strings.ToUpper(send.Method))
return
}
}
g.emitExpr(a.Right)
g.emit(hbrt.PcOpPop)
}
// compoundBinOp maps an `<op>=` token to the binary opcode it
// produces against the left-hand value. Returns false for ASSIGN
// (the caller should take the plain-store path).
func compoundBinOp(k token.Kind) (byte, bool) {
switch k {
case token.PLUSEQ:
return hbrt.PcOpPlus, true
case token.MINUSEQ:
return hbrt.PcOpMinus, true
case token.STAREQ:
return hbrt.PcOpMult, true
case token.SLASHEQ:
return hbrt.PcOpDivide, true
case token.PERCENTEQ:
return hbrt.PcOpMod, true
case token.POWEREQ:
return hbrt.PcOpPower, true
}
return 0, false
}
func parseInt64(s string) int64 {
var v int64
for _, c := range s {
if c >= '0' && c <= '9' {
v = v*10 + int64(c-'0')
}
}
if len(s) > 0 && s[0] == '-' {
v = -v
}
return v
}
func parseFloat64(s string) float64 {
var v float64
var dec float64
inDec := false
for _, c := range s {
if c == '.' {
inDec = true
dec = 0.1
continue
}
if c >= '0' && c <= '9' {
if inDec {
v += float64(c-'0') * dec
dec *= 0.1
} else {
v = v*10 + float64(c-'0')
}
}
}
if len(s) > 0 && s[0] == '-' {
v = -v
}
return v
}