Systematic pass through PRG hot paths, promoting them to Go RTL while
preserving Harbour/FiveSql2 semantics. Full log in
docs/RTL-Go-Native-Migration.md.
Bench (bench_sql) vs 2026-04-08 baseline
- B1 SELECT * 2,192 → 114 µs (19x)
- B6 INNER JOIN 9,291 → 233 µs (40x)
- B7 CTE simple 8,037 → 129 µs (62x)
- B9 ROW_NUMBER 3,705 → 265 µs (14x)
- B10 RANK PARTITION 4,748 → 309 µs (15x)
- B12 INSERT (WA cache) 4,319 → 63 µs (69x)
- B13 UPDATE (WA cache) 6,144 → 68 µs (90x)
- B15 CTE+WIN+JOIN 18,395 → 1,873 µs (10x)
Infrastructure
- HbHash O(1) Index preserving insertion order (Harbour KEEPORDER)
- HbDeepClone Go RTL (scalar-sharing, immutable hash keys)
- MEMRDD auto-imported via gengo; all Five programs get mem:name driver
- SQL plan + pcode caches (s_hPlanCache, s_hDmlPcodeCache)
- Opt-in SqlWACacheEnable — dbUseArea/Close/Commit batched for DML
SQL engine
- FiveSql2 lexer ported to Go (byte FSM) with combined automatic
template parameterization (literals → ?, concat queries share plan)
- Go RTL: SqlDistinct, SqlGroupRows, SqlWindowPartitions,
SqlWindowSortPartition, SqlWindowAssignRank, SqlComputeAggSimple,
SqlBulkInsert, SqlBulkUpdate, SqlExprHasAgg, SqlEvalHaving
- CTE / subquery / driving-table materialize paths use MEMRDD
- SqlCoerce/SqlCmp/SqlIsTrue helpers moved from PRG to Go
- SqlBulkUpdate defers Flush when WA cache active (APFS fsync was
dominant B13 cost — 1.6ms/call → gone)
Correctness fixes uncovered during migration
- ASort default path now sorts dates/logicals/timestamps (was no-op)
- ORDER BY default NULL placement matches PRG SqlRowCompare across
Go fast path; explicit NULLS FIRST/LAST honored by both paths
- SqlBulkUpdate respects EXCLUSIVE vs SHARED mode record locks
- SqlCmp/SqlCmpEq normalize NumInt vs Double (caught by test 6b)
Verification
- go test ./... ALL PASS
- FiveSql2 test_sql1999 43/43
- tests/compat_harbour 56/56 (+5 new: ASort dates/logicals,
AScan int cross-type)
- Regression test test_null_order.prg for ORDER BY NULL ordering
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
151 lines
4.1 KiB
Go
151 lines
4.1 KiB
Go
// Copyright (c) 2026 Charles KWON OhJun (charleskwonohjun@gmail.com)
|
|
// All rights reserved.
|
|
|
|
// Expression bytecode compilation — PcCompile/PcEval.
|
|
// FiveSql2 and other prepared-statement engines use this to compile
|
|
// hot-path expressions ONCE and execute them per row via bytecode
|
|
// interpreter, avoiding PRG AST tree-walk overhead.
|
|
|
|
package hbrtl
|
|
|
|
import (
|
|
"five/compiler/genpc"
|
|
"five/compiler/parser"
|
|
"five/compiler/pp"
|
|
"five/hbrt"
|
|
"os"
|
|
"sync"
|
|
)
|
|
|
|
// pcCompileCache stores compiled PcodeFunc keyed by the original PRG
|
|
// expression string. Compilation does parser + preprocessor + pcode
|
|
// generation per call (~50-200µs for small expressions); for repeated
|
|
// queries (same SQL template) every call after the first is a
|
|
// sync.Map hit and returns the cached pointer directly.
|
|
//
|
|
// Thread safety: PcodeFunc is immutable after compilation (no
|
|
// per-call mutable state — execution state lives on hbrt.Thread),
|
|
// so sharing the pointer across goroutines is safe.
|
|
//
|
|
// Unbounded: distinct SQL / expression text count is bounded by the
|
|
// caller's query set; for FiveSql2 workloads this is a small constant.
|
|
// Switch to LRU if a pathological caller emerges.
|
|
var pcCompileCache sync.Map // map[string]*hbrt.PcodeFunc
|
|
|
|
// PcCompile(cPrgExpr) → pFunc
|
|
//
|
|
// Compile a PRG expression to pcode. Returns an opaque pointer that can
|
|
// be passed to PcEval(). The expression is wrapped in a stub FUNCTION
|
|
// so the full PRG parser can handle it; then the single RETURN value
|
|
// node is extracted and compiled to a standalone PcodeFunc.
|
|
//
|
|
// Example:
|
|
// pc := PcCompile("FieldGet(4) > 50000")
|
|
// WHILE ! Eof()
|
|
// IF PcEval(pc)
|
|
// AAdd(aRows, ...)
|
|
// ENDIF
|
|
// dbSkip()
|
|
// ENDDO
|
|
//
|
|
// Performance: ~3-5x faster than MacroEval for hot loops because the
|
|
// expression AST is walked once at compile time, not per row.
|
|
func PcCompile(t *hbrt.Thread) {
|
|
t.Frame(1, 0)
|
|
defer t.EndProc()
|
|
|
|
source := t.Local(1).AsString()
|
|
if source == "" {
|
|
t.RetNil()
|
|
return
|
|
}
|
|
|
|
// Cache hit — skip parser/genpc entirely.
|
|
if cached, ok := pcCompileCache.Load(source); ok {
|
|
if fn, ok := cached.(*hbrt.PcodeFunc); ok && fn != nil {
|
|
t.RetPointer(fn)
|
|
return
|
|
}
|
|
}
|
|
|
|
// Wrap expression in a function stub so the parser can handle it.
|
|
wrapped := "FUNCTION _EXPR()\nRETURN " + source + "\n"
|
|
|
|
// Preprocess
|
|
pre := pp.New()
|
|
processed, _ := pre.Process("_expr.prg", wrapped)
|
|
|
|
// Parse
|
|
file, errs := parser.ParseWithGoDumps("_expr.prg", processed, pre.GoDumps)
|
|
if len(errs) > 0 {
|
|
for _, e := range errs {
|
|
_, _ = os.Stderr.WriteString("PcCompile: " + e.Error() + "\n")
|
|
}
|
|
t.RetNil()
|
|
return
|
|
}
|
|
|
|
// Extract the RETURN expression from the first function
|
|
if len(file.Decls) == 0 {
|
|
t.RetNil()
|
|
return
|
|
}
|
|
|
|
// Compile the whole wrapped function to a PcodeModule, then extract
|
|
// the _EXPR function. This reuses all of genpc's mature emit logic.
|
|
mod := genpc.Generate(file)
|
|
if mod == nil {
|
|
t.RetNil()
|
|
return
|
|
}
|
|
fn, ok := mod.Funcs["_EXPR"]
|
|
if !ok {
|
|
// Try uppercase / case variations
|
|
for name, f := range mod.Funcs {
|
|
_ = name
|
|
fn = f
|
|
ok = true
|
|
break
|
|
}
|
|
}
|
|
if !ok || fn == nil {
|
|
t.RetNil()
|
|
return
|
|
}
|
|
|
|
// Populate the cache. sync.Map.Store handles concurrent writers —
|
|
// duplicate compilations of the same source waste a few µs but
|
|
// don't corrupt the map; whichever compilation finishes second
|
|
// overwrites with an identical value.
|
|
pcCompileCache.Store(source, fn)
|
|
t.RetPointer(fn)
|
|
}
|
|
|
|
// PcEval(pFunc) → xValue
|
|
//
|
|
// Execute a compiled pcode function. Returns the value produced by the
|
|
// compiled expression via the retVal slot. The caller's workarea context
|
|
// is used for field access, so position the WA via GoTo first.
|
|
func PcEval(t *hbrt.Thread) {
|
|
t.Frame(1, 0)
|
|
defer t.EndProcFast()
|
|
|
|
ptr := t.Local(1).AsPointer()
|
|
if ptr == nil {
|
|
t.RetNil()
|
|
return
|
|
}
|
|
fn, ok := ptr.(*hbrt.PcodeFunc)
|
|
if !ok || fn == nil {
|
|
t.RetNil()
|
|
return
|
|
}
|
|
|
|
// Execute the pcode. The RetValue opcode inside the pcode sets
|
|
// t.retVal, and ExecPcode's EndProc preserves it across the frame
|
|
// transition. After ExecPcode returns, t.retVal contains the
|
|
// expression's value — our own EndProc will use it as PcEval's
|
|
// return value.
|
|
hbrt.ExecPcode(t, fn, nil)
|
|
}
|