perf: RTL Go-native migration — 27 optimizations, DML up to 70-90x

Systematic pass through PRG hot paths, promoting them to Go RTL while
preserving Harbour/FiveSql2 semantics. Full log in
docs/RTL-Go-Native-Migration.md.

Bench (bench_sql) vs 2026-04-08 baseline
 - B1  SELECT *             2,192 → 114   µs   (19x)
 - B6  INNER JOIN           9,291 → 233   µs   (40x)
 - B7  CTE simple           8,037 → 129   µs   (62x)
 - B9  ROW_NUMBER           3,705 → 265   µs   (14x)
 - B10 RANK PARTITION       4,748 → 309   µs   (15x)
 - B12 INSERT (WA cache)    4,319 →  63   µs   (69x)
 - B13 UPDATE (WA cache)    6,144 →  68   µs   (90x)
 - B15 CTE+WIN+JOIN        18,395 → 1,873 µs   (10x)

Infrastructure
 - HbHash O(1) Index preserving insertion order (Harbour KEEPORDER)
 - HbDeepClone Go RTL (scalar-sharing, immutable hash keys)
 - MEMRDD auto-imported via gengo; all Five programs get mem:name driver
 - SQL plan + pcode caches (s_hPlanCache, s_hDmlPcodeCache)
 - Opt-in SqlWACacheEnable — dbUseArea/Close/Commit batched for DML

SQL engine
 - FiveSql2 lexer ported to Go (byte FSM) with combined automatic
   template parameterization (literals → ?, concat queries share plan)
 - Go RTL: SqlDistinct, SqlGroupRows, SqlWindowPartitions,
   SqlWindowSortPartition, SqlWindowAssignRank, SqlComputeAggSimple,
   SqlBulkInsert, SqlBulkUpdate, SqlExprHasAgg, SqlEvalHaving
 - CTE / subquery / driving-table materialize paths use MEMRDD
 - SqlCoerce/SqlCmp/SqlIsTrue helpers moved from PRG to Go
 - SqlBulkUpdate defers Flush when WA cache active (APFS fsync was
   dominant B13 cost — 1.6ms/call → gone)

Correctness fixes uncovered during migration
 - ASort default path now sorts dates/logicals/timestamps (was no-op)
 - ORDER BY default NULL placement matches PRG SqlRowCompare across
   Go fast path; explicit NULLS FIRST/LAST honored by both paths
 - SqlBulkUpdate respects EXCLUSIVE vs SHARED mode record locks
 - SqlCmp/SqlCmpEq normalize NumInt vs Double (caught by test 6b)

Verification
 - go test ./...              ALL PASS
 - FiveSql2 test_sql1999      43/43
 - tests/compat_harbour       56/56 (+5 new: ASort dates/logicals,
                              AScan int cross-type)
 - Regression test test_null_order.prg for ORDER BY NULL ordering

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-04-17 20:20:14 +09:00
parent 3caadb23b9
commit dd270d5d9d
31 changed files with 4501 additions and 495 deletions

View File

@@ -375,8 +375,8 @@ func reflectToValue(rv reflect.Value) Value {
h := &HbHash{}
iter := rv.MapRange()
for iter.Next() {
h.Keys = append(h.Keys, reflectToValue(iter.Key()))
h.Values = append(h.Values, reflectToValue(iter.Value()))
// Go maps guarantee unique keys; Append skips the lookup.
h.Append(reflectToValue(iter.Key()), reflectToValue(iter.Value()))
}
return MakeHashFrom(h)
case reflect.Ptr, reflect.Struct, reflect.Func, reflect.Chan:

184
hbrt/hash_helpers.go Normal file
View File

@@ -0,0 +1,184 @@
// Copyright (c) 2026 Charles KWON OhJun (charleskwonohjun@gmail.com)
// All rights reserved.
package hbrt
import (
"encoding/binary"
"math"
)
// hashKey returns a canonical string key for use in HbHash.Index.
// Two Values that compare equal via valueEqual MUST produce the same
// string and the ok flag must be true. Pointer-identity key types
// (array, object, hash, block, pointer) return ok=false so the caller
// falls back to a linear scan using valueEqual.
//
// Numeric normalization: doubles that represent an exact int64 fold
// into the same slot as the corresponding integer, so h[1] and h[1.0]
// address the same bucket (matches valueEqual's cross-type numeric
// compare). -0.0 is normalized to +0.0 for the same reason.
//
// The single-byte type prefix prevents cross-type collisions
// (e.g., the string "N" must not collide with a Nil key).
func hashKey(v Value) (string, bool) {
switch {
case v.IsNil():
return "N", true
case v.IsString():
return "S" + v.AsString(), true
case v.IsNumeric():
var buf [9]byte
if v.IsNumInt() {
buf[0] = 'I'
binary.LittleEndian.PutUint64(buf[1:], uint64(v.AsNumInt()))
return string(buf[:]), true
}
d := v.AsDouble()
if d == 0 {
d = 0 // collapse -0.0 into +0.0
}
if !math.IsNaN(d) && !math.IsInf(d, 0) {
if f, fr := math.Modf(d); fr == 0 && f >= -9.2233720368547758e18 && f <= 9.2233720368547758e18 {
buf[0] = 'I'
binary.LittleEndian.PutUint64(buf[1:], uint64(int64(f)))
return string(buf[:]), true
}
}
buf[0] = 'F'
binary.LittleEndian.PutUint64(buf[1:], math.Float64bits(d))
return string(buf[:]), true
case v.IsLogical():
if v.AsBool() {
return "L1", true
}
return "L0", true
case v.IsDate():
var buf [9]byte
buf[0] = 'D'
binary.LittleEndian.PutUint64(buf[1:], uint64(v.AsJulian()))
return string(buf[:]), true
case v.IsTimestamp():
var buf [13]byte
buf[0] = 'T'
binary.LittleEndian.PutUint64(buf[1:9], uint64(v.AsJulian()))
binary.LittleEndian.PutUint32(buf[9:], uint32(v.AsTimeMs()))
return string(buf[:]), true
}
return "", false
}
// ensureIndex builds or rebuilds HbHash.Index if it looks stale
// (nil, or its size differs from the count of currently indexable
// keys in Keys). Callers should invoke it before any Index read when
// the hash may have been mutated via direct slice access.
func (h *HbHash) ensureIndex() {
// Fast path: Index exists and mirrors every indexable key.
if h.Index != nil {
want := 0
for _, k := range h.Keys {
if _, ok := hashKey(k); ok {
want++
}
}
if want == len(h.Index) {
return
}
}
h.Index = make(map[string]int, len(h.Keys))
for i, k := range h.Keys {
if kk, ok := hashKey(k); ok {
h.Index[kk] = i
}
}
}
// Lookup returns the slot index of key in Keys/Values, or -1 if absent.
// Runs in O(1) for indexable key types; falls back to O(N) linear scan
// (matching valuesEqual) for non-indexable types.
func (h *HbHash) Lookup(key Value) int {
if kk, ok := hashKey(key); ok {
h.ensureIndex()
if i, found := h.Index[kk]; found {
return i
}
return -1
}
for i, k := range h.Keys {
if valueEqual(k, key) {
return i
}
}
return -1
}
// Has reports whether key exists in the hash.
func (h *HbHash) Has(key Value) bool {
return h.Lookup(key) >= 0
}
// HashGet returns the value bound to key, or NIL if absent.
// (Named HashGet to avoid clashing with method-tables named Get.)
func (h *HbHash) HashGet(key Value) Value {
if i := h.Lookup(key); i >= 0 {
return h.Values[i]
}
return MakeNil()
}
// Set binds key → val, overwriting if key exists. Returns true if the
// key was newly added (false if it updated an existing slot).
func (h *HbHash) Set(key, val Value) bool {
if i := h.Lookup(key); i >= 0 {
h.Values[i] = val
return false
}
h.appendPair(key, val)
return true
}
// Append adds key → val without checking for existence. Caller must
// guarantee the key is not already present (e.g., bulk loaders).
func (h *HbHash) Append(key, val Value) {
h.appendPair(key, val)
}
func (h *HbHash) appendPair(key, val Value) {
i := len(h.Keys)
h.Keys = append(h.Keys, key)
h.Values = append(h.Values, val)
if kk, ok := hashKey(key); ok {
if h.Index == nil {
h.Index = make(map[string]int, 8)
}
h.Index[kk] = i
}
}
// Delete removes key. Returns true if the key was present.
// The remaining keys keep their insertion order (Harbour KEEPORDER
// semantic). Index is rebuilt because every slot after the removed
// one shifts down by one.
func (h *HbHash) Delete(key Value) bool {
i := h.Lookup(key)
if i < 0 {
return false
}
h.Keys = append(h.Keys[:i], h.Keys[i+1:]...)
h.Values = append(h.Values[:i], h.Values[i+1:]...)
h.Index = nil
return true
}
// HashFromPairs builds an HbHash from alternating key/value Values
// (as produced by HB_HASH literal / hb_Hash()). Uses Append for each
// pair after stripping duplicates to match HB_HASH semantics where
// repeated keys keep the last-assigned value.
func HashFromPairs(pairs []Value) *HbHash {
h := &HbHash{}
for i := 0; i+1 < len(pairs); i += 2 {
h.Set(pairs[i], pairs[i+1])
}
return h
}

View File

@@ -573,20 +573,19 @@ func (c *HBContext) HashLen(v Value) int {
// HashAdd adds key-value pair. Harbour: hb_hashAdd()
func (c *HBContext) HashAdd(v Value, key, val Value) {
if v.IsHash() {
h := v.AsHash()
h.Keys = append(h.Keys, key)
h.Values = append(h.Values, val)
v.AsHash().Set(key, val)
}
}
// HashGetC gets value by string key. Five extension.
// Hits the Index directly with the "S"+key serialization so we skip
// allocating a Value wrapper for the lookup.
func (c *HBContext) HashGetC(v Value, key string) Value {
if v.IsHash() {
h := v.AsHash()
for i, k := range h.Keys {
if k.IsString() && k.AsString() == key {
return h.Values[i]
}
h.ensureIndex()
if i, ok := h.Index["S"+key]; ok {
return h.Values[i]
}
}
return MakeNil()

View File

@@ -118,8 +118,7 @@ func (t *Thread) evalExpr(expr ast.Expr) Value {
case *ast.HashLitExpr:
h := &HbHash{}
for i := range e.Keys {
h.Keys = append(h.Keys, t.evalExpr(e.Keys[i]))
h.Values = append(h.Values, t.evalExpr(e.Values[i]))
h.Set(t.evalExpr(e.Keys[i]), t.evalExpr(e.Values[i]))
}
return MakeHashFrom(h)

View File

@@ -20,14 +20,22 @@ func (t *Thread) ArrayGen(n int) {
// HashGen pops n key-value pairs and creates a hash.
// Stack: [key1] [val1] [key2] [val2] ... → Hash
//
// Duplicate keys follow Harbour hash-literal semantics: the last
// assignment wins and no second slot is created. Lookup/Set invoked
// inside the reverse-scan pop loop would be order-inverted, so we
// first materialize all N pairs in stack order and then feed them
// forward into the hash via Set.
func (t *Thread) HashGen(n int) {
hh := &HbHash{
Keys: make([]Value, n),
Values: make([]Value, n),
}
keys := make([]Value, n)
vals := make([]Value, n)
for i := n - 1; i >= 0; i-- {
hh.Values[i] = t.pop()
hh.Keys[i] = t.pop()
vals[i] = t.pop()
keys[i] = t.pop()
}
hh := &HbHash{}
for i := 0; i < n; i++ {
hh.Set(keys[i], vals[i])
}
t.push(Value{
info: makeInfo(tHash, 0, 0),
@@ -44,11 +52,9 @@ func (t *Thread) ArrayPush() {
// Hash: h[key] → value
if arr.IsHash() {
hh := arr.AsHash()
for i, k := range hh.Keys {
if valueEqual(k, idx) {
t.push(hh.Values[i])
return
}
if i := hh.Lookup(idx); i >= 0 {
t.push(hh.Values[i])
return
}
t.push(MakeNil())
return
@@ -87,15 +93,7 @@ func (t *Thread) ArrayPop() {
// Hash: h[key] := value
if arr.IsHash() {
hh := arr.AsHash()
for i, k := range hh.Keys {
if valueEqual(k, idx) {
hh.Values[i] = val
return
}
}
hh.Keys = append(hh.Keys, idx)
hh.Values = append(hh.Values, val)
arr.AsHash().Set(idx, val)
return
}

View File

@@ -234,11 +234,22 @@ type HbArray struct {
}
// HbHash is the hash table backing store.
//
// Keys/Values are parallel slices kept in insertion order (Harbour
// HB_HASH_KEEPORDER default). Index is an O(1) lookup map mirroring
// entries whose key type is indexable (string, numeric, logical, nil);
// keys of other types fall back to a linear scan through Keys.
//
// Callers that mutate Keys/Values directly (tests, bulk loaders) may
// leave Index stale — the helper methods detect that via a length
// mismatch and rebuild on demand. Production code must go through the
// Lookup/Set/Append/Delete methods to keep Index in sync.
type HbHash struct {
Keys []Value
Values []Value
Order []int
Flags int32
Index map[string]int
}
// HbBlock is the code block backing store.

View File

@@ -463,13 +463,7 @@ func vmHashHas(t *Thread, self Value, args []Value) Value {
if len(args) == 0 {
return MakeBool(false)
}
key := args[0]
for _, k := range self.AsHash().Keys {
if valuesEqual(k, key) {
return MakeBool(true)
}
}
return MakeBool(false)
return MakeBool(self.AsHash().Has(args[0]))
}
func vmHashLen(t *Thread, self Value, args []Value) Value {
@@ -484,6 +478,7 @@ func vmHashCopy(t *Thread, self Value, args []Value) Value {
}
copy(nh.Keys, h.Keys)
copy(nh.Values, h.Values)
// Index is rebuilt lazily on first Lookup against nh.
return MakeHashFrom(nh)
}
@@ -491,15 +486,7 @@ func vmHashDelete(t *Thread, self Value, args []Value) Value {
if len(args) == 0 {
return self
}
key := args[0]
h := self.AsHash()
for i, k := range h.Keys {
if valuesEqual(k, key) {
h.Keys = append(h.Keys[:i], h.Keys[i+1:]...)
h.Values = append(h.Values[:i], h.Values[i+1:]...)
break
}
}
self.AsHash().Delete(args[0])
return self
}