Files
five/hbrt/vm.go
CharlesKWON 1f63c7fe63 perf(vm): symbol hoist + Function() stack shift — global 3-15%
The VM call path (PushSymbol → Function → Frame) is traversed by every
PRG function call. Three changes together cut per-call overhead across
the entire bench suite.

Changes
 - hbrt/call.go Function(): replace pop-push dance with a single slice
   shift (N+2 pops + N pushes → 1 copy of N slots + sp adjust). Kills
   the per-call `make([]Value, nArgs)` heap alloc. Resolved function
   pointer is cached back into sym.Func so subsequent calls on the
   same Symbol skip the VM lookup entirely.
 - hbrt/vm.go GetSym(): new helper. Generated code calls it with a
   pointer to a package-level `*Symbol` slot so FindSymbol (which takes
   the VM RWMutex + map lookup) runs at most once per symbol per
   process. Nil results are intentionally NOT cached — an init-order
   miss becomes a retry on the next call instead of a permanent sticky
   failure.
 - hbrt/thread.go pushPendingSym(): scalar fast slot for depth=1 call
   nesting (common case). Nil syms still go through the slice so the
   "empty vs stored nil" ambiguity can't produce a false pop.
 - compiler/gengo/gengo.go: emit `t.PushSymbol(t.GetSym(&_sym_<file>_<NAME>, "NAME"))`
   for every function call site, with a per-file prefix so multi-PRG
   builds don't collide on identical symbol names.

Bugs fixed during bring-up
 - pendingSymFast == nil was ambiguous ("unused" vs "nil stored"). Nil
   syms now spill to the slice, preserving distinguishability.
 - The old varName-reuse branch at the PushSymbol emit site skipped
   the GetSym wrapper, emitting a raw `t.PushSymbol(varName)` against
   an uninitialized package-level *Symbol. Every call path now funnels
   through emitPushSymbol.

bench_sql deltas vs prior build
 - B1  SELECT *          114 →  97 µs   (15%)
 - B4  GROUP_HAVING      584 → 554 µs   (5%)
 - B8  RECURSIVE CTE     150 → 141 µs   (6%)
 - B10 RANK PARTITION    310 → 296 µs   (5%)
 - B11 SUM OVER          335 → 320 µs   (4%)
 - B14 COUNT             295 → 281 µs   (5%)
 - B15 CTE+WIN+JOIN     1891 → 1826 µs  (3%)

Verification
 - go test ./...               ALL PASS
 - FiveSql2 test_sql1999       43/43
 - tests/compat_harbour        56/56

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-17 20:41:48 +09:00

198 lines
4.9 KiB
Go

// Copyright (c) 2026 Charles KWON OhJun (charleskwonohjun@gmail.com)
// All rights reserved.
package hbrt
import "sync"
// VM is the shared state across all threads.
type VM struct {
mu sync.RWMutex
modules []*Module
symbols map[string]*Symbol
statics map[string][]Value
threads []*Thread // all threads created (for shutdown cleanup)
waFactory func() interface{} // creates WorkAreaManager for new threads
onExit func() // called when Run() finishes (restore terminal etc.)
Debugger *Debugger // nil = no debugging; set by five debug command
}
// SetWAFactory sets the factory for creating WorkAreaManagers.
func (vm *VM) SetWAFactory(f func() interface{}) {
vm.waFactory = f
}
// SetOnExit sets a callback for when Run() finishes.
func (vm *VM) SetOnExit(f func()) {
vm.onExit = f
}
// Library modules registered via init() — protected by mutex for FRB concurrent loading.
var (
libModules []*Module
dynamicFuncs []Symbol // from HB_FUNC() in #pragma BEGINDUMP
libRegistryMu sync.Mutex
)
// RegisterLibModule registers a module from a library PRG file.
// Called by init() in generated library code.
func RegisterLibModule(m *Module) {
libRegistryMu.Lock()
libModules = append(libModules, m)
libRegistryMu.Unlock()
}
// RegisterDynamicFunc registers a Go function callable from PRG.
// Called from init() in #pragma BEGINDUMP code via HB_FUNC().
func RegisterDynamicFunc(name string, fn func(*Thread)) {
libRegistryMu.Lock()
dynamicFuncs = append(dynamicFuncs, Symbol{
Name: name,
Scope: FsPublic | FsLocal,
Func: fn,
})
libRegistryMu.Unlock()
}
// RegisterLibModules registers any pending lib modules and dynamic functions.
func (vm *VM) RegisterLibModules() {
libRegistryMu.Lock()
mods := libModules
libModules = nil
dyns := dynamicFuncs
dynamicFuncs = nil
libRegistryMu.Unlock()
for _, m := range mods {
vm.RegisterModule(m)
}
for i := range dyns {
sym := &dyns[i]
vm.RegisterSymbol(sym)
}
dynamicFuncs = nil
}
// NewVM creates a new VM instance.
func NewVM() *VM {
return &VM{
modules: make([]*Module, 0),
symbols: make(map[string]*Symbol),
statics: make(map[string][]Value),
}
}
// RegisterModule registers a module's symbols with the VM.
func (vm *VM) RegisterModule(m *Module) {
vm.mu.Lock()
defer vm.mu.Unlock()
vm.modules = append(vm.modules, m)
for i := range m.Symbols {
sym := &m.Symbols[i]
vm.symbols[sym.Name] = sym
}
}
// RegisterSymbol registers a single symbol.
func (vm *VM) RegisterSymbol(sym *Symbol) {
vm.mu.Lock()
defer vm.mu.Unlock()
vm.symbols[sym.Name] = sym
}
// UnregisterSymbol removes a symbol by name. Returns the old symbol if any.
func (vm *VM) UnregisterSymbol(name string) *Symbol {
vm.mu.Lock()
defer vm.mu.Unlock()
old := vm.symbols[name]
delete(vm.symbols, name)
return old
}
// SymbolNames returns all registered symbol names.
func (vm *VM) SymbolNames() []string {
vm.mu.RLock()
defer vm.mu.RUnlock()
names := make([]string, 0, len(vm.symbols))
for n := range vm.symbols {
names = append(names, n)
}
return names
}
// FindSymbol looks up a symbol by name.
func (vm *VM) FindSymbol(name string) *Symbol {
vm.mu.RLock()
defer vm.mu.RUnlock()
return vm.symbols[name]
}
// GetSym returns the cached Symbol, performing a one-time FindSymbol
// lookup on first access and stashing the pointer in *cache for all
// subsequent calls. Generated code (gengo) declares a package-level
// `var _sym_NAME *Symbol` per unique call target and routes every
// PushSymbol through this helper so the hot path becomes a single
// non-nil check instead of vm.symbols map + RWMutex per invocation.
func (t *Thread) GetSym(cache **Symbol, name string) *Symbol {
if s := *cache; s != nil {
return s
}
s := t.vm.FindSymbol(name)
if s != nil {
// Only cache successful resolutions — nil might be due to
// init-order (another module's registrations pending);
// retry on next call once those complete.
*cache = s
}
return s
}
// NewThread creates a new Thread attached to this VM.
func (vm *VM) NewThread() *Thread {
t := NewThread(vm)
vm.mu.Lock()
vm.threads = append(vm.threads, t)
vm.mu.Unlock()
return t
}
// Run starts execution from the named function.
func (vm *VM) Run(funcName string) Value {
// Register any library modules from init()
for _, m := range libModules {
vm.RegisterModule(m)
}
libModules = nil
sym := vm.FindSymbol(funcName)
if sym == nil {
panic("function not found: " + funcName)
}
if sym.Func == nil {
panic("function has no implementation: " + funcName)
}
t := vm.NewThread()
// Auto-initialize WorkAreaManager if not set
if t.WA == nil && vm.waFactory != nil {
t.WA = vm.waFactory()
}
// Copy statics to thread
vm.mu.RLock()
for k, v := range vm.statics {
t.statics[k] = v
}
vm.mu.RUnlock()
// Install signal handlers for clean shutdown
vm.InstallSignalHandlers()
// Call the function, ensure full shutdown on exit
defer vm.Shutdown()
sym.Func(t)
return t.retVal
}