Files
five/hbrt/thread.go
CharlesKWON 1f63c7fe63 perf(vm): symbol hoist + Function() stack shift — global 3-15%
The VM call path (PushSymbol → Function → Frame) is traversed by every
PRG function call. Three changes together cut per-call overhead across
the entire bench suite.

Changes
 - hbrt/call.go Function(): replace pop-push dance with a single slice
   shift (N+2 pops + N pushes → 1 copy of N slots + sp adjust). Kills
   the per-call `make([]Value, nArgs)` heap alloc. Resolved function
   pointer is cached back into sym.Func so subsequent calls on the
   same Symbol skip the VM lookup entirely.
 - hbrt/vm.go GetSym(): new helper. Generated code calls it with a
   pointer to a package-level `*Symbol` slot so FindSymbol (which takes
   the VM RWMutex + map lookup) runs at most once per symbol per
   process. Nil results are intentionally NOT cached — an init-order
   miss becomes a retry on the next call instead of a permanent sticky
   failure.
 - hbrt/thread.go pushPendingSym(): scalar fast slot for depth=1 call
   nesting (common case). Nil syms still go through the slice so the
   "empty vs stored nil" ambiguity can't produce a false pop.
 - compiler/gengo/gengo.go: emit `t.PushSymbol(t.GetSym(&_sym_<file>_<NAME>, "NAME"))`
   for every function call site, with a per-file prefix so multi-PRG
   builds don't collide on identical symbol names.

Bugs fixed during bring-up
 - pendingSymFast == nil was ambiguous ("unused" vs "nil stored"). Nil
   syms now spill to the slice, preserving distinguishability.
 - The old varName-reuse branch at the PushSymbol emit site skipped
   the GetSym wrapper, emitting a raw `t.PushSymbol(varName)` against
   an uninitialized package-level *Symbol. Every call path now funnels
   through emitPushSymbol.

bench_sql deltas vs prior build
 - B1  SELECT *          114 →  97 µs   (15%)
 - B4  GROUP_HAVING      584 → 554 µs   (5%)
 - B8  RECURSIVE CTE     150 → 141 µs   (6%)
 - B10 RANK PARTITION    310 → 296 µs   (5%)
 - B11 SUM OVER          335 → 320 µs   (4%)
 - B14 COUNT             295 → 281 µs   (5%)
 - B15 CTE+WIN+JOIN     1891 → 1826 µs  (3%)

Verification
 - go test ./...               ALL PASS
 - FiveSql2 test_sql1999       43/43
 - tests/compat_harbour        56/56

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-17 20:41:48 +09:00

714 lines
19 KiB
Go

// Copyright (c) 2026 Charles KWON OhJun (charleskwonohjun@gmail.com)
// All rights reserved.
package hbrt
import (
"fmt"
"os"
)
// Default stack/frame sizes
const (
DefaultStackSize = 2048 // initial eval stack capacity
MaxStackSize = 65536
MaxCallDepth = 256
InitialCallDepth = 32 // start small, grow if needed
)
// CallFrame saves the state of a function call.
// Harbour equivalent: HB_STACK_STATE
type CallFrame struct {
symbol *Symbol // function symbol (for debugging/profiling)
base int // stack base (start of this frame's args)
localBase int // where locals start in the locals slice
localCount int // number of locals in this frame
paramCount int // number of parameters passed
retVal Value // return value
}
// CurFrame returns the current call frame (for closure capture).
func (t *Thread) CurFrame() *CallFrame { return t.curFrame }
// LocalsSlice returns the underlying locals array (for closure capture).
func (t *Thread) LocalsSlice() []Value { return t.locals }
// GetLocal reads a local variable from a captured frame (1-based index).
func (f *CallFrame) GetLocal(n int, locals []Value) Value {
idx := f.localBase + n - 1
return locals[idx]
}
// SetLocal writes a local variable in a captured frame (1-based index).
func (f *CallFrame) SetLocal(n int, v Value, locals []Value) {
idx := f.localBase + n - 1
locals[idx] = v
}
// Thread is the per-goroutine execution context.
// Harbour equivalent: HB_STACK (thread-local stack)
//
// Each goroutine that runs Harbour code gets its own Thread.
// No locking needed for stack/locals/calls — they are goroutine-local.
type Thread struct {
// Eval stack (goroutine-local, no lock needed)
stack []Value
sp int // stack pointer (next free slot)
// Local variables: flat array, each frame gets a slice via localBase+localCount
locals []Value
// Call stack
calls []CallFrame
callSP int // call stack pointer
curFrame *CallFrame
// Return value (passed between caller/callee)
retVal Value
// Pending function call stack (PushSymbol pushes, Function pops).
// Depth=1 is the common case (non-nested call) and gets a scalar
// fast slot to skip slice append/trim; nested calls fall back to
// the heap slice. Balanced push/pop keeps the invariant:
// pendingSymFast set → slice empty
// slice non-empty → pendingSymFast may or may not be set.
pendingSymFast *Symbol
pendingSyms []*Symbol
pendingParams int // number of params for next Frame call
pendingCallSym *Symbol // symbol for next Frame (for PROCNAME)
// STATIC variables (per-module, shared but rarely written)
// Accessed via PushStatic/PopStatic with module reference
statics map[string][]Value
// OOP: current Self object (set during method dispatch)
self Value
// Error handling: last error from BEGIN SEQUENCE
lastError *HbError
// MEMVAR: PUBLIC/PRIVATE variables (shared across call stack)
Memvars *MemvarTable
// WorkArea manager (goroutine-local, no locks needed)
WA interface{} // *hbrdd.WorkAreaManager — set by caller to avoid import cycle
// FastFieldGetter is a hot-path closure set by SqlScan (or any other
// scan loop) to short-circuit PcOpFieldGet. When non-nil, the pcode
// interpreter calls this instead of going through PushSymbol +
// Function dispatch + FieldGet RTL's own Frame/EndProc. Caller is
// responsible for setting and clearing it around a scan.
FastFieldGetter func(int) Value
waStack []uint16 // saved workarea numbers for (expr)->(expr) context switching
// VM reference (shared, read-mostly)
vm *VM
}
// NewThread creates a new execution thread.
func NewThread(vm *VM) *Thread {
t := &Thread{
stack: make([]Value, DefaultStackSize),
sp: 0,
locals: make([]Value, 256), // will grow as needed
calls: make([]CallFrame, InitialCallDepth),
callSP: 0,
statics: make(map[string][]Value),
Memvars: NewMemvarTable(),
vm: vm,
}
return t
}
// --- Stack operations ---
func (t *Thread) push(v Value) {
if t.sp >= len(t.stack) {
if t.sp >= MaxStackSize {
panic(t.runtimeError("stack overflow"))
}
newStack := make([]Value, len(t.stack)*2)
copy(newStack, t.stack[:t.sp])
t.stack = newStack
}
t.stack[t.sp] = v
t.sp++
}
func (t *Thread) pop() Value {
if t.sp <= 0 {
panic(t.runtimeError("stack underflow"))
}
t.sp--
v := t.stack[t.sp]
t.stack[t.sp] = cachedNil
return v
}
func (t *Thread) peek() Value {
if t.sp <= 0 {
panic(t.runtimeError("stack underflow (peek)"))
}
return t.stack[t.sp-1]
}
func (t *Thread) peekPtr() *Value {
if t.sp <= 0 {
panic(t.runtimeError("stack underflow (peekPtr)"))
}
return &t.stack[t.sp-1]
}
func (t *Thread) setTop(v Value) {
if t.sp <= 0 {
panic(t.runtimeError("stack underflow (setTop)"))
}
t.stack[t.sp-1] = v
}
// stackAt returns a pointer to stack item at offset from top.
// 0 = top, -1 = second from top, etc.
func (t *Thread) stackAt(offset int) *Value {
idx := t.sp - 1 + offset
if idx < 0 || idx >= t.sp {
panic(t.runtimeError("stack access out of range"))
}
return &t.stack[idx]
}
// --- Push convenience methods (used by generated code) ---
func (t *Thread) PushNil() { t.push(MakeNil()) }
func (t *Thread) PushBool(b bool) { t.push(MakeBool(b)) }
func (t *Thread) PushInt(n int) { t.push(MakeInt(n)) }
func (t *Thread) PushLong(n int64) { t.push(MakeLong(n)) }
func (t *Thread) PushDouble(v float64, length, decimal uint16) {
t.push(MakeDouble(v, length, decimal))
}
func (t *Thread) PushString(s string) { t.push(MakeString(s)) }
func (t *Thread) PushValue(v Value) { t.push(v) }
func (t *Thread) Pop() { t.pop() }
func (t *Thread) Pop2() Value { return t.pop() } // pop and return
func (t *Thread) Dup() { t.push(t.peek()) }
// --- Frame management ---
// Harbour: hb_xvmFrame(params, locals)
// Called at the start of every function.
func (t *Thread) Frame(params, locals int) {
if t.callSP >= MaxCallDepth {
panic(t.runtimeError("call stack overflow"))
}
// Grow call stack dynamically if needed
if t.callSP >= len(t.calls) {
newSize := len(t.calls) * 2
if newSize > MaxCallDepth {
newSize = MaxCallDepth
}
newCalls := make([]CallFrame, newSize)
copy(newCalls, t.calls)
t.calls = newCalls
}
// Ensure locals slice has enough space
localBase := 0
if t.curFrame != nil {
localBase = t.curFrame.localBase + t.curFrame.localCount
}
needed := localBase + params + locals
if needed > len(t.locals) {
newLocals := make([]Value, needed*2)
copy(newLocals, t.locals)
t.locals = newLocals
}
// Save frame
// Handle case where fewer args were pushed than declared params
actual := t.pendingParams
if actual > params {
actual = params
}
if actual > t.sp {
actual = t.sp
}
frame := &t.calls[t.callSP]
frame.base = t.sp - actual // only actual args on stack
frame.localBase = localBase
frame.localCount = params + locals
frame.paramCount = t.pendingParams // actual args passed by caller (not declared count)
frame.retVal = MakeNil()
frame.symbol = t.pendingCallSym
t.pendingCallSym = nil
// Copy actual parameters from stack to locals
for i := 0; i < actual; i++ {
t.locals[localBase+i] = t.stack[frame.base+i]
}
// Initialize missing params and locals to NIL
for i := actual; i < params+locals; i++ {
t.locals[localBase+i] = MakeNil()
}
// Pop args from stack (they're now in locals)
t.sp = frame.base
t.curFrame = frame
t.callSP++
}
// EndProc is called via defer at the end of every function.
// Handles recover for BEGIN SEQUENCE and restores frame.
// All panics are re-panicked so the generated SEQUENCE/RECOVER handler
// can catch them. HbError + BreakValue (from Break() in hbrtl) are
// re-panicked silently; unknown panics also re-panic but with a
// diagnostic message on stderr.
func (t *Thread) EndProc() {
if r := recover(); r != nil {
t.endFrame()
if _, ok := r.(*HbError); ok {
panic(r) // HbError — re-panic silently
}
// Check for BreakValue from hbrtl.Break() via duck typing.
// We can't import hbrtl (cycle), so we check the type name.
rType := fmt.Sprintf("%T", r)
if rType == "hbrtl.BreakValue" {
panic(r) // BreakValue — re-panic silently for RECOVER USING
}
fmt.Fprintf(os.Stderr, "Five runtime error: %v [recovered, repanicked]\n", r)
panic(r)
}
t.endFrame()
}
// EndProcFast is called by RTL functions that don't need recover().
// ~3x faster than EndProc (no defer recover overhead).
func (t *Thread) EndProcFast() {
t.endFrame()
}
// endFrame restores the previous call frame.
func (t *Thread) endFrame() {
if t.callSP > 0 {
t.callSP--
if t.callSP > 0 {
t.curFrame = &t.calls[t.callSP-1]
} else {
t.curFrame = nil
}
}
}
// EndProcNoRecover cleans up the frame without recover (used by Break).
func (t *Thread) EndProcNoRecover() {
if t.callSP > 0 {
t.callSP--
if t.callSP > 0 {
t.curFrame = &t.calls[t.callSP-1]
} else {
t.curFrame = nil
}
}
}
// --- Local variable access ---
// Harbour convention: local index 1-based (1 = first param or local)
func (t *Thread) PushLocal(n int) {
idx := t.localIndex(n)
v := t.locals[idx]
if v.Type() == tByref {
t.push((*HbRefCell)(v.ptr).V)
} else {
t.push(v)
}
}
func (t *Thread) PopLocal(n int) {
idx := t.localIndex(n)
val := t.pop()
if e := t.locals[idx]; e.Type() == tByref {
(*HbRefCell)(e.ptr).V = val
} else {
t.locals[idx] = val
}
}
func (t *Thread) Local(n int) Value {
v := t.locals[t.localIndex(n)]
if v.Type() == tByref {
return (*HbRefCell)(v.ptr).V
}
return v
}
func (t *Thread) SetLocal(n int, v Value) {
idx := t.localIndex(n)
if e := t.locals[idx]; e.Type() == tByref {
(*HbRefCell)(e.ptr).V = v
} else {
t.locals[idx] = v
}
}
// Fast variants — no bounds checking (gengo guarantees valid indices).
// Byref-aware: transparently dereference/write-through RefCell.
func (t *Thread) PushLocalFast(n int) {
v := t.locals[t.curFrame.localBase+n-1]
if v.Type() == tByref {
t.push((*HbRefCell)(v.ptr).V)
} else {
t.push(v)
}
}
func (t *Thread) PopLocalFast(n int) {
idx := t.curFrame.localBase + n - 1
val := t.pop()
if e := t.locals[idx]; e.Type() == tByref {
(*HbRefCell)(e.ptr).V = val
} else {
t.locals[idx] = val
}
}
func (t *Thread) LocalFast(n int) Value {
v := t.locals[t.curFrame.localBase+n-1]
if v.Type() == tByref {
return (*HbRefCell)(v.ptr).V
}
return v
}
func (t *Thread) SetLocalFast(n int, v Value) {
idx := t.curFrame.localBase + n - 1
if e := t.locals[idx]; e.Type() == tByref {
(*HbRefCell)(e.ptr).V = v
} else {
t.locals[idx] = v
}
}
// PushLocalRef creates a shared RefCell and pushes it for @param.
// Both caller's local and callee's param point to the same cell.
func (t *Thread) PushLocalRef(n int) {
idx := t.localIndex(n)
v := t.locals[idx]
if v.Type() == tByref {
t.push(v) // already a RefCell — share it
return
}
cell := &HbRefCell{V: v}
ref := MakeByref(cell)
t.locals[idx] = ref // caller's local becomes RefCell
t.push(ref) // callee gets same RefCell
}
func (t *Thread) LocalAsString(n int) string {
return t.Local(n).AsString()
}
// EnsureLocalRef converts a local to a RefCell if it isn't one already.
// Used by closure capture to enable shared mutable access.
func (t *Thread) EnsureLocalRef(n int) {
idx := t.curFrame.localBase + n - 1
v := t.locals[idx]
if v.Type() != tByref {
cell := &HbRefCell{V: v}
t.locals[idx] = MakeByref(cell)
}
}
// LocalRaw returns the raw Value at local slot (including RefCell wrapper).
// Used by closure capture to grab the RefCell itself, not the dereferenced value.
func (t *Thread) LocalRaw(n int) Value {
return t.locals[t.curFrame.localBase+n-1]
}
// SetLocalRaw sets a local slot to the raw Value (including RefCell wrapper).
// Used by closure to inject shared RefCell into block locals.
func (t *Thread) SetLocalRaw(n int, v Value) {
t.locals[t.curFrame.localBase+n-1] = v
}
// LocalSetInt is an optimization: set local directly without stack. Byref-aware.
func (t *Thread) LocalSetInt(n int, val int) {
idx := t.localIndex(n)
if e := t.locals[idx]; e.Type() == tByref {
(*HbRefCell)(e.ptr).V = MakeInt(val)
} else {
t.locals[idx] = MakeInt(val)
}
}
func (t *Thread) localIndex(n int) int {
if t.curFrame == nil {
panic(t.runtimeError("no active frame"))
}
idx := t.curFrame.localBase + n - 1 // 1-based to 0-based
if idx < t.curFrame.localBase || idx >= t.curFrame.localBase+t.curFrame.localCount {
panic(t.runtimeError(fmt.Sprintf("local variable index out of range: %d", n)))
}
return idx
}
// --- Memvar access ---
// PushMemvar pushes a memvar value onto the stack. Harbour: M->varname
func (t *Thread) PushMemvar(name string) {
if v, ok := t.Memvars.Get(name); ok {
t.push(v)
} else {
t.push(MakeNil())
}
}
// PopMemvar pops stack and stores into a memvar. Harbour: M->varname := expr
func (t *Thread) PopMemvar(name string) {
val := t.pop()
if !t.Memvars.Set(name, val) {
// Auto-create as PRIVATE if not exists
t.Memvars.SetPrivate(name, val, t.callSP)
}
}
// DeclarePublic creates a PUBLIC memvar with NIL value.
func (t *Thread) DeclarePublic(name string) {
if !t.Memvars.Exists(name) {
t.Memvars.SetPublic(name, MakeNil())
}
}
// DeclarePrivate creates a PRIVATE memvar with NIL value.
func (t *Thread) DeclarePrivate(name string) {
t.Memvars.SetPrivate(name, MakeNil(), t.callSP)
}
// --- Return value ---
func (t *Thread) RetValue() {
t.retVal = t.pop()
}
func (t *Thread) RetInt(n int64) {
t.retVal = MakeNumInt(n)
}
func (t *Thread) RetNil() {
t.retVal = MakeNil()
}
func (t *Thread) RetString(s string) {
t.retVal = MakeString(s)
}
func (t *Thread) RetBool(b bool) {
t.retVal = MakeBool(b)
}
func (t *Thread) RetLong(n int64) {
t.retVal = MakeLong(n)
}
func (t *Thread) RetDouble(v float64, length, decimal uint16) {
t.retVal = MakeDouble(v, length, decimal)
}
func (t *Thread) RetPointer(val interface{}) {
t.retVal = MakePointer(val)
}
func (t *Thread) RetVal(v Value) {
t.retVal = v
}
// PushRetValue pushes the return value from the last call onto the stack.
func (t *Thread) PushRetValue() {
t.push(t.retVal)
}
// GetRetValue returns the current return value.
func (t *Thread) GetRetValue() Value {
return t.retVal
}
// --- Error handling ---
// HbError represents a Harbour runtime error.
type HbError struct {
Description string
Operation string
Args []Value
SubSystem string
GenCode int
}
func (e *HbError) Error() string {
return fmt.Sprintf("Five runtime error: %s (op: %s)", e.Description, e.Operation)
}
func (t *Thread) runtimeError(msg string) *HbError {
return &HbError{
Description: msg,
SubSystem: "BASE",
}
}
func (t *Thread) argError(op string, args ...Value) *HbError {
return &HbError{
Description: "argument error",
Operation: op,
Args: args,
SubSystem: "BASE",
GenCode: 1,
}
}
func (t *Thread) handleSequenceError(err *HbError) {
// BEGIN SEQUENCE / RECOVER: store error for RECOVER USING
t.lastError = err
// The recover block is handled by the generated code's defer/recover pattern.
// EndProc catches the panic and this function stores the error value.
}
// VM returns the VM this thread belongs to.
func (t *Thread) VM() *VM {
return t.vm
}
// ParamCount returns the number of parameters passed to the current call.
// Used by RTL functions that call ParamCount() BEFORE Frame() — returns
// pendingParams set by Function(nArgs). This is the original behavior
// that all existing RTL functions depend on.
//
// For PRG-level PCount(), use CallerParamCount() instead (via PCount RTL).
func (t *Thread) ParamCount() int {
return t.pendingParams
}
// CallerParamCount returns the param count of the calling PRG function
// (one frame below the current). Used by PCount() RTL which needs the
// caller's count, not its own.
func (t *Thread) CallerParamCount() int {
if t.callSP >= 2 {
return t.calls[t.callSP-2].paramCount
}
return 0
}
// PendingParams2 sets pending param count for direct block calls (AEval, ASort etc.)
func (t *Thread) PendingParams2(n int) {
t.pendingParams = n
}
func (t *Thread) pushPendingSym(sym *Symbol) {
// Fast path for depth=1 nesting — store in scalar slot without
// touching the slice. A nil sym (unresolved symbol, caught later
// in Function() with a descriptive error) must not use the fast
// path because `pendingSymFast == nil` already means "empty";
// falling back to the slice preserves distinguishability.
if sym != nil && t.pendingSymFast == nil && len(t.pendingSyms) == 0 {
t.pendingSymFast = sym
return
}
if t.pendingSymFast != nil {
t.pendingSyms = append(t.pendingSyms, t.pendingSymFast)
t.pendingSymFast = nil
}
t.pendingSyms = append(t.pendingSyms, sym)
}
func (t *Thread) popPendingSym() *Symbol {
if n := len(t.pendingSyms); n > 0 {
sym := t.pendingSyms[n-1]
t.pendingSyms = t.pendingSyms[:n-1]
return sym
}
if sym := t.pendingSymFast; sym != nil {
t.pendingSymFast = nil
return sym
}
return nil
}
// PushAliasField pushes a field value from a named alias workarea.
// Harbour: alias->field
func (t *Thread) PushAliasField(alias, field string) {
// Delegate to WorkAreaManager via WA interface
if t.WA != nil {
// Use reflection-free interface assertion
type aliasGetter interface {
GetAliasField(alias, field string) Value
}
if ag, ok := t.WA.(aliasGetter); ok {
t.push(ag.GetAliasField(alias, field))
return
}
}
t.push(MakeNil())
}
// PushDynAliasField pushes a field from dynamic alias: (expr)->field
func (t *Thread) PushDynAliasField(alias, field string) {
t.PushAliasField(alias, field)
}
// GetLastError returns the last error from BEGIN SEQUENCE.
func (t *Thread) GetLastError() *HbError {
return t.lastError
}
// --- STATIC variable access ---
func (t *Thread) RegisterStatics(module string, statics []Value) {
t.statics[module] = statics
}
func (t *Thread) PushStatic(module string, n int) {
statics := t.statics[module]
if n < 1 || n > len(statics) {
panic(t.runtimeError(fmt.Sprintf("static index out of range: %s[%d]", module, n)))
}
t.push(statics[n-1])
}
func (t *Thread) PopStatic(module string, n int) {
statics := t.statics[module]
if n < 1 || n > len(statics) {
panic(t.runtimeError(fmt.Sprintf("static index out of range: %s[%d]", module, n)))
}
statics[n-1] = t.pop()
}
// --- Workarea context switching for (alias)->(expr) ---
func (t *Thread) WASaveAndSelect(areaNum int) {
type waSel interface{ SelectByNum(uint16); Current() uint16 }
if wam, ok := t.WA.(waSel); ok {
t.waStack = append(t.waStack, wam.Current())
wam.SelectByNum(uint16(areaNum))
}
}
func (t *Thread) WASaveAndSelectAlias(alias string) {
type waSel interface{ SelectByAlias(string); Current() uint16 }
if wam, ok := t.WA.(waSel); ok {
t.waStack = append(t.waStack, wam.Current())
wam.SelectByAlias(alias)
}
}
func (t *Thread) WARestore() {
if n := len(t.waStack); n > 0 {
saved := t.waStack[n-1]
t.waStack = t.waStack[:n-1]
type waSel interface{ SelectByNum(uint16) }
if wam, ok := t.WA.(waSel); ok {
wam.SelectByNum(saved)
}
}
}