Files
five/hbrt/pcinterp.go
CharlesKWON f4ed42556b checkpoint: season-wide bug fix campaign + infra
Cumulative season's silent-bug hunting (~62 fixes) across the FiveSql2
SQL engine, the Five compiler/runtime, and the hbrdd RDD layer. Saved
as a single checkpoint before refactoring the parser to delegate xBase
command translation to the preprocessor.

Highlights:

FiveSql2 engine (_FiveSql2/src/)
- prefix-glob index attach -> explicit convention (<table>_pk.ntx,
  <table>_uq.ntx, <table>.cdx) — fixes silent multi-row INSERT row-drop
- DROP/CREATE TABLE FErase chain extended (.cdx, .fsc, .fsv, .dbt, .fpt)
- COUNT(DISTINCT col) parsed + aggregated via hSeen hash
- UNION column-count mismatch returns SQL_ERR_GRAMMAR (was silent)
- DISTINCT + ORDER BY hidden-col leak fixed (trim before DISTINCT)
- Derived table FROM (SELECT...) + JOIN right-side derived
- Self-FK CASCADE depth 2+ via SqlGetSingleColPK pre-collect
- LAG/LEAD default arg uses SqlEvalRowExpr (handles -N const exprs)
- DATE literal round-trip validation (Feb 29 non-leap rejected)
- CREATE OR REPLACE VIEW; CREATE VIEW errors on already-exists
- AlterTable type dispatcher comma-wrapped (1-char type "A" no longer
  matches CHARACTER)

Compiler / runtime
- gengo: HB_ -> FV_ prefix on emitted Go function names (Five identity)
- gengo split: emit_block.go, emit_stmt.go, folding.go extracted
- parser/stmtreg.go nudges
- hbrt: debug TUI/CLI restructure (debugcmd, debugkey, termios_*),
  windows debug stubs collapsed
- thread/vm/value/class/pcinterp tightening from panic traces

RDD layer (hbrdd/)
- dbf: null bitmap support (null.go + null_test.go), mmap split
  (mmap_posix.go / mmap_windows.go), byte-level numeric parse
- ntx/cdx: windows mmap parity
- workarea + mem RDD: cross-area state-bleed fixes

RTL (hbrtl/)
- errorlog rewrite with platform-specific FD (errorlog_fd_unix /
  errorlog_fd_other)
- sqlscan, sqlhelpers, indexrtl, datetime extensions

Gates green at checkpoint:
- go test ./...        : PASS
- FiveSql2 SQL:1999    : 43/43
- Harbour compat       : 56/56

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-30 09:26:25 +09:00

323 lines
7.8 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
// Copyright (c) 2026 Charles KWON OhJun (charleskwonohjun@gmail.com)
// All rights reserved.
// Five pcode interpreter — executes pcode bytecode on a Thread.
// Each opcode directly calls the corresponding Thread method,
// so pcode execution is semantically identical to gengo-compiled code.
package hbrt
import (
"encoding/binary"
"fmt"
"math"
)
// ExecPcode runs a pcode function on the given thread.
// Full variant — installs a defer/recover so panics from inside the
// pcode body (HbError, BreakValue, user Break) are re-panicked with
// proper frame unwinding. Used for general-purpose pcode evaluation.
func ExecPcode(t *Thread, fn *PcodeFunc, mod *PcodeModule) {
t.Frame(fn.Params, fn.Locals)
defer t.EndProc()
execPcodeBody(t, fn, mod)
}
// ExecPcodeFast is a hot-path variant for short, pure expressions
// (FiveSql2 WHERE predicates, inline lambdas) where the caller has
// already guaranteed that the body will not panic with HbError /
// BreakValue. Skips the defer+recover dance in EndProc, saving ~15ns
// per call × tens of thousands of rows in scan loops.
//
// Contract: caller is responsible for panic discipline. If the pcode
// body panics, the frame stack is still cleaned up (EndProcFast) but
// no diagnostic is logged and SEQUENCE/RECOVER will not see the panic.
func ExecPcodeFast(t *Thread, fn *PcodeFunc, mod *PcodeModule) {
t.Frame(fn.Params, fn.Locals)
execPcodeBody(t, fn, mod)
t.EndProcFast()
}
// execPcodeBody is the shared opcode dispatch loop.
func execPcodeBody(t *Thread, fn *PcodeFunc, mod *PcodeModule) {
code := fn.Code
pc := 0 // program counter
for pc < len(code) {
op := code[pc]
pc++
switch op {
case PcOpNop:
// do nothing
// --- Stack ---
case PcOpPushNil:
t.PushNil()
case PcOpPushTrue:
t.PushBool(true)
case PcOpPushFalse:
t.PushBool(false)
case PcOpPushInt:
v := int64(binary.LittleEndian.Uint64(code[pc:]))
pc += 8
t.PushLong(v)
case PcOpPushDouble:
bits := binary.LittleEndian.Uint64(code[pc:])
pc += 8
t.PushDouble(math.Float64frombits(bits), 0, 0)
case PcOpPushString:
slen := int(binary.LittleEndian.Uint16(code[pc:]))
pc += 2
t.PushString(string(code[pc : pc+slen]))
pc += slen
case PcOpPushBool:
t.PushBool(code[pc] != 0)
pc++
case PcOpPushLocal:
idx := int(binary.LittleEndian.Uint16(code[pc:]))
pc += 2
t.PushLocal(idx)
case PcOpPushMemvar:
slen := int(binary.LittleEndian.Uint16(code[pc:]))
pc += 2
name := string(code[pc : pc+slen])
pc += slen
// Resolve through Memvars (PRIVATE shadows PUBLIC).
// Unknown names push NIL — matches Harbour behavior for
// undeclared memvars inside `&(expr)`.
if t.Memvars != nil {
if v, ok := t.Memvars.Get(name); ok {
t.push(v)
continue
}
}
t.PushNil()
case PcOpPopLocal:
idx := int(binary.LittleEndian.Uint16(code[pc:]))
pc += 2
t.PopLocal(idx)
case PcOpPop:
t.Pop()
case PcOpDup:
t.Dup()
// --- Arithmetic ---
case PcOpPlus:
t.Plus()
case PcOpMinus:
t.Minus()
case PcOpMult:
t.Mult()
case PcOpDivide:
t.Divide()
case PcOpMod:
t.Modulus()
case PcOpPower:
t.Power()
case PcOpNegate:
t.Negate()
// --- Comparison ---
case PcOpEqual:
t.Equal()
case PcOpNotEqual:
t.NotEqual()
case PcOpLess:
t.Less()
case PcOpGreater:
t.Greater()
case PcOpLessEq:
t.LessEqual()
case PcOpGreaterEq:
t.GreaterEqual()
case PcOpInString:
t.InString()
// --- Logical ---
case PcOpAnd:
t.And()
case PcOpOr:
t.Or()
case PcOpNot:
t.Not()
// --- Flow control ---
case PcOpJump:
offset := int32(binary.LittleEndian.Uint32(code[pc:]))
pc += 4
pc += int(offset)
case PcOpJumpFalse:
offset := int32(binary.LittleEndian.Uint32(code[pc:]))
pc += 4
if !t.PopLogical() {
pc += int(offset)
}
case PcOpJumpTrue:
offset := int32(binary.LittleEndian.Uint32(code[pc:]))
pc += 4
if t.PopLogical() {
pc += int(offset)
}
case PcOpReturn:
return
case PcOpRetValue:
t.RetValue()
return
// --- Frame ---
case PcOpFrame:
// Already called at function entry; skip if re-encountered
pc += 4 // params + locals
case PcOpEndProc:
return
// --- Workarea field access (peephole for FieldGet(literal)) ---
case PcOpFieldGet:
fIdx := int(binary.LittleEndian.Uint16(code[pc:]))
pc += 2
// Hot path — SqlScan plugs a direct field getter closure into
// t.FastFieldGetter before running the predicate, so we skip
// PushSymbol + Function dispatch + FieldGet RTL's own Frame.
if fg := t.FastFieldGetter; fg != nil {
t.PushValue(fg(fIdx))
} else {
// Generic fallback: resolve through RTL symbol table
t.PushSymbol(t.VM().FindSymbol("FIELDGET"))
t.PushNil()
t.PushLong(int64(fIdx))
t.Function(1)
}
// --- AllTrim(FieldGet(n)) fused peephole ---
case PcOpFieldTrim:
fIdx := int(binary.LittleEndian.Uint16(code[pc:]))
pc += 2
// Fast path: use direct field getter, trim inline.
var v Value
if fg := t.FastFieldGetter; fg != nil {
v = fg(fIdx)
} else {
// Fallback: resolve via FIELDGET RTL
t.PushSymbol(t.VM().FindSymbol("FIELDGET"))
t.PushNil()
t.PushLong(int64(fIdx))
t.Function(1)
v = t.Pop2()
}
if v.IsString() {
s := v.AsString()
// ASCII-space trim — DBF CHAR fields pad with 0x20 only
lo, hi := 0, len(s)
for lo < hi && s[lo] == ' ' {
lo++
}
for hi > lo && s[hi-1] == ' ' {
hi--
}
if lo == 0 && hi == len(s) {
t.PushValue(v)
} else {
t.PushString(s[lo:hi])
}
} else {
t.PushValue(v)
}
// --- Function calls ---
case PcOpPushSymbol:
slen := int(binary.LittleEndian.Uint16(code[pc:]))
pc += 2
name := string(code[pc : pc+slen])
pc += slen
sym := t.VM().FindSymbol(name)
t.PushSymbol(sym)
case PcOpPushNilArg:
t.PushNil()
case PcOpFunction:
nArgs := int(binary.LittleEndian.Uint16(code[pc:]))
pc += 2
t.Function(nArgs)
case PcOpDo:
nArgs := int(binary.LittleEndian.Uint16(code[pc:]))
pc += 2
t.Do(nArgs)
// --- Self / OOP ---
case PcOpPushSelf:
t.PushSelf()
case PcOpPushSelfField:
slen := int(binary.LittleEndian.Uint16(code[pc:]))
pc += 2
name := string(code[pc : pc+slen])
pc += slen
t.PushSelfField(name)
case PcOpSetSelfField:
slen := int(binary.LittleEndian.Uint16(code[pc:]))
pc += 2
name := string(code[pc : pc+slen])
pc += slen
t.SetSelfField(name)
case PcOpSend:
slen := int(binary.LittleEndian.Uint16(code[pc:]))
pc += 2
name := string(code[pc : pc+slen])
pc += slen
nArgs := int(binary.LittleEndian.Uint16(code[pc:]))
pc += 2
t.Send(name, nArgs)
// --- Array ---
case PcOpArrayGen:
count := int(binary.LittleEndian.Uint16(code[pc:]))
pc += 2
t.ArrayGen(count)
case PcOpArrayPush:
t.ArrayPush()
case PcOpArrayPop:
t.ArrayPop()
// --- Block ---
case PcOpPushBlock:
codeLen := int(binary.LittleEndian.Uint32(code[pc:]))
pc += 4
blockCode := make([]byte, codeLen)
copy(blockCode, code[pc:pc+codeLen])
pc += codeLen
nDetached := int(binary.LittleEndian.Uint16(code[pc:]))
pc += 2
// Create a Go function that interprets the block's pcode
blockFn := &PcodeFunc{Code: blockCode}
modCopy := mod
t.PushBlock(func(t2 *Thread) {
ExecPcode(t2, blockFn, modCopy)
}, nDetached)
// --- Local ops ---
case PcOpLocalAddInt:
idx := int(binary.LittleEndian.Uint16(code[pc:]))
pc += 2
val := int32(binary.LittleEndian.Uint32(code[pc:]))
pc += 4
t.LocalAddInt(idx, int64(val))
case PcOpInc:
t.Inc()
case PcOpDec:
t.Dec()
case PcOpPopLogical:
t.PopLogical()
case PcOpLine:
pc += 2 // skip line number (for debugging)
case PcOpHalt:
return
default:
panic(fmt.Sprintf("unknown pcode opcode: 0x%02X at pc=%d", op, pc-1))
}
}
}