perf: RTL Go-native migration — 27 optimizations, DML up to 70-90x

Systematic pass through PRG hot paths, promoting them to Go RTL while preserving Harbour/FiveSql2 semantics. Full log in docs/RTL-Go-Native-Migration.md. Bench (bench_sql) vs 2026-04-08 baseline - B1 SELECT * 2,192 → 114 µs (19x) - B6 INNER JOIN 9,291 → 233 µs (40x) - B7 CTE simple 8,037 → 129 µs (62x) - B9 ROW_NUMBER 3,705 → 265 µs (14x) - B10 RANK PARTITION 4,748 → 309 µs (15x) - B12 INSERT (WA cache) 4,319 → 63 µs (69x) - B13 UPDATE (WA cache) 6,144 → 68 µs (90x) - B15 CTE+WIN+JOIN 18,395 → 1,873 µs (10x) Infrastructure - HbHash O(1) Index preserving insertion order (Harbour KEEPORDER) - HbDeepClone Go RTL (scalar-sharing, immutable hash keys) - MEMRDD auto-imported via gengo; all Five programs get mem:name driver - SQL plan + pcode caches (s_hPlanCache, s_hDmlPcodeCache) - Opt-in SqlWACacheEnable — dbUseArea/Close/Commit batched for DML SQL engine - FiveSql2 lexer ported to Go (byte FSM) with combined automatic template parameterization (literals → ?, concat queries share plan) - Go RTL: SqlDistinct, SqlGroupRows, SqlWindowPartitions, SqlWindowSortPartition, SqlWindowAssignRank, SqlComputeAggSimple, SqlBulkInsert, SqlBulkUpdate, SqlExprHasAgg, SqlEvalHaving - CTE / subquery / driving-table materialize paths use MEMRDD - SqlCoerce/SqlCmp/SqlIsTrue helpers moved from PRG to Go - SqlBulkUpdate defers Flush when WA cache active (APFS fsync was dominant B13 cost — 1.6ms/call → gone) Correctness fixes uncovered during migration - ASort default path now sorts dates/logicals/timestamps (was no-op) - ORDER BY default NULL placement matches PRG SqlRowCompare across Go fast path; explicit NULLS FIRST/LAST honored by both paths - SqlBulkUpdate respects EXCLUSIVE vs SHARED mode record locks - SqlCmp/SqlCmpEq normalize NumInt vs Double (caught by test 6b) Verification - go test ./... ALL PASS - FiveSql2 test_sql1999 43/43 - tests/compat_harbour 56/56 (+5 new: ASort dates/logicals, AScan int cross-type) - Regression test test_null_order.prg for ORDER BY NULL ordering Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-17 20:20:14 +09:00
parent 3caadb23b9
commit dd270d5d9d
31 changed files with 4501 additions and 495 deletions
--- a/hbrt/gobridge.go
+++ b/hbrt/gobridge.go
@@ -375,8 +375,8 @@ func reflectToValue(rv reflect.Value) Value {
 		h := &HbHash{}
 		iter := rv.MapRange()
 		for iter.Next() {
-			h.Keys = append(h.Keys, reflectToValue(iter.Key()))
-			h.Values = append(h.Values, reflectToValue(iter.Value()))
+			// Go maps guarantee unique keys; Append skips the lookup.
+			h.Append(reflectToValue(iter.Key()), reflectToValue(iter.Value()))
 		}
 		return MakeHashFrom(h)
 	case reflect.Ptr, reflect.Struct, reflect.Func, reflect.Chan:
--- a/hbrt/hash_helpers.go
+++ b/hbrt/hash_helpers.go
@@ -0,0 +1,184 @@
+// Copyright (c) 2026 Charles KWON OhJun (charleskwonohjun@gmail.com)
+// All rights reserved.
+
+package hbrt
+
+import (
+	"encoding/binary"
+	"math"
+)
+
+// hashKey returns a canonical string key for use in HbHash.Index.
+// Two Values that compare equal via valueEqual MUST produce the same
+// string and the ok flag must be true. Pointer-identity key types
+// (array, object, hash, block, pointer) return ok=false so the caller
+// falls back to a linear scan using valueEqual.
+//
+// Numeric normalization: doubles that represent an exact int64 fold
+// into the same slot as the corresponding integer, so h[1] and h[1.0]
+// address the same bucket (matches valueEqual's cross-type numeric
+// compare). -0.0 is normalized to +0.0 for the same reason.
+//
+// The single-byte type prefix prevents cross-type collisions
+// (e.g., the string "N" must not collide with a Nil key).
+func hashKey(v Value) (string, bool) {
+	switch {
+	case v.IsNil():
+		return "N", true
+	case v.IsString():
+		return "S" + v.AsString(), true
+	case v.IsNumeric():
+		var buf [9]byte
+		if v.IsNumInt() {
+			buf[0] = 'I'
+			binary.LittleEndian.PutUint64(buf[1:], uint64(v.AsNumInt()))
+			return string(buf[:]), true
+		}
+		d := v.AsDouble()
+		if d == 0 {
+			d = 0 // collapse -0.0 into +0.0
+		}
+		if !math.IsNaN(d) && !math.IsInf(d, 0) {
+			if f, fr := math.Modf(d); fr == 0 && f >= -9.2233720368547758e18 && f <= 9.2233720368547758e18 {
+				buf[0] = 'I'
+				binary.LittleEndian.PutUint64(buf[1:], uint64(int64(f)))
+				return string(buf[:]), true
+			}
+		}
+		buf[0] = 'F'
+		binary.LittleEndian.PutUint64(buf[1:], math.Float64bits(d))
+		return string(buf[:]), true
+	case v.IsLogical():
+		if v.AsBool() {
+			return "L1", true
+		}
+		return "L0", true
+	case v.IsDate():
+		var buf [9]byte
+		buf[0] = 'D'
+		binary.LittleEndian.PutUint64(buf[1:], uint64(v.AsJulian()))
+		return string(buf[:]), true
+	case v.IsTimestamp():
+		var buf [13]byte
+		buf[0] = 'T'
+		binary.LittleEndian.PutUint64(buf[1:9], uint64(v.AsJulian()))
+		binary.LittleEndian.PutUint32(buf[9:], uint32(v.AsTimeMs()))
+		return string(buf[:]), true
+	}
+	return "", false
+}
+
+// ensureIndex builds or rebuilds HbHash.Index if it looks stale
+// (nil, or its size differs from the count of currently indexable
+// keys in Keys). Callers should invoke it before any Index read when
+// the hash may have been mutated via direct slice access.
+func (h *HbHash) ensureIndex() {
+	// Fast path: Index exists and mirrors every indexable key.
+	if h.Index != nil {
+		want := 0
+		for _, k := range h.Keys {
+			if _, ok := hashKey(k); ok {
+				want++
+			}
+		}
+		if want == len(h.Index) {
+			return
+		}
+	}
+	h.Index = make(map[string]int, len(h.Keys))
+	for i, k := range h.Keys {
+		if kk, ok := hashKey(k); ok {
+			h.Index[kk] = i
+		}
+	}
+}
+
+// Lookup returns the slot index of key in Keys/Values, or -1 if absent.
+// Runs in O(1) for indexable key types; falls back to O(N) linear scan
+// (matching valuesEqual) for non-indexable types.
+func (h *HbHash) Lookup(key Value) int {
+	if kk, ok := hashKey(key); ok {
+		h.ensureIndex()
+		if i, found := h.Index[kk]; found {
+			return i
+		}
+		return -1
+	}
+	for i, k := range h.Keys {
+		if valueEqual(k, key) {
+			return i
+		}
+	}
+	return -1
+}
+
+// Has reports whether key exists in the hash.
+func (h *HbHash) Has(key Value) bool {
+	return h.Lookup(key) >= 0
+}
+
+// HashGet returns the value bound to key, or NIL if absent.
+// (Named HashGet to avoid clashing with method-tables named Get.)
+func (h *HbHash) HashGet(key Value) Value {
+	if i := h.Lookup(key); i >= 0 {
+		return h.Values[i]
+	}
+	return MakeNil()
+}
+
+// Set binds key → val, overwriting if key exists. Returns true if the
+// key was newly added (false if it updated an existing slot).
+func (h *HbHash) Set(key, val Value) bool {
+	if i := h.Lookup(key); i >= 0 {
+		h.Values[i] = val
+		return false
+	}
+	h.appendPair(key, val)
+	return true
+}
+
+// Append adds key → val without checking for existence. Caller must
+// guarantee the key is not already present (e.g., bulk loaders).
+func (h *HbHash) Append(key, val Value) {
+	h.appendPair(key, val)
+}
+
+func (h *HbHash) appendPair(key, val Value) {
+	i := len(h.Keys)
+	h.Keys = append(h.Keys, key)
+	h.Values = append(h.Values, val)
+	if kk, ok := hashKey(key); ok {
+		if h.Index == nil {
+			h.Index = make(map[string]int, 8)
+		}
+		h.Index[kk] = i
+	}
+}
+
+// Delete removes key. Returns true if the key was present.
+// The remaining keys keep their insertion order (Harbour KEEPORDER
+// semantic). Index is rebuilt because every slot after the removed
+// one shifts down by one.
+func (h *HbHash) Delete(key Value) bool {
+	i := h.Lookup(key)
+	if i < 0 {
+		return false
+	}
+	h.Keys = append(h.Keys[:i], h.Keys[i+1:]...)
+	h.Values = append(h.Values[:i], h.Values[i+1:]...)
+	h.Index = nil
+	return true
+}
+
+// HashFromPairs builds an HbHash from alternating key/value Values
+// (as produced by HB_HASH literal / hb_Hash()). Uses Append for each
+// pair after stripping duplicates to match HB_HASH semantics where
+// repeated keys keep the last-assigned value.
+func HashFromPairs(pairs []Value) *HbHash {
+	h := &HbHash{}
+	for i := 0; i+1 < len(pairs); i += 2 {
+		h.Set(pairs[i], pairs[i+1])
+	}
+	return h
+}
+
--- a/hbrt/hbfunc.go
+++ b/hbrt/hbfunc.go
@@ -573,20 +573,19 @@ func (c *HBContext) HashLen(v Value) int {
 // HashAdd adds key-value pair. Harbour: hb_hashAdd()
 func (c *HBContext) HashAdd(v Value, key, val Value) {
 	if v.IsHash() {
-		h := v.AsHash()
-		h.Keys = append(h.Keys, key)
-		h.Values = append(h.Values, val)
+		v.AsHash().Set(key, val)
 	}
 }

 // HashGetC gets value by string key. Five extension.
+// Hits the Index directly with the "S"+key serialization so we skip
+// allocating a Value wrapper for the lookup.
 func (c *HBContext) HashGetC(v Value, key string) Value {
 	if v.IsHash() {
 		h := v.AsHash()
-		for i, k := range h.Keys {
-			if k.IsString() && k.AsString() == key {
-				return h.Values[i]
-			}
+		h.ensureIndex()
+		if i, ok := h.Index["S"+key]; ok {
+			return h.Values[i]
 		}
 	}
 	return MakeNil()
--- a/hbrt/macroeval.go
+++ b/hbrt/macroeval.go
@@ -118,8 +118,7 @@ func (t *Thread) evalExpr(expr ast.Expr) Value {
 	case *ast.HashLitExpr:
 		h := &HbHash{}
 		for i := range e.Keys {
-			h.Keys = append(h.Keys, t.evalExpr(e.Keys[i]))
-			h.Values = append(h.Values, t.evalExpr(e.Values[i]))
+			h.Set(t.evalExpr(e.Keys[i]), t.evalExpr(e.Values[i]))
 		}
 		return MakeHashFrom(h)

--- a/hbrt/ops_collection.go
+++ b/hbrt/ops_collection.go
@@ -20,14 +20,22 @@ func (t *Thread) ArrayGen(n int) {

 // HashGen pops n key-value pairs and creates a hash.
 // Stack: [key1] [val1] [key2] [val2] ... → Hash
+//
+// Duplicate keys follow Harbour hash-literal semantics: the last
+// assignment wins and no second slot is created. Lookup/Set invoked
+// inside the reverse-scan pop loop would be order-inverted, so we
+// first materialize all N pairs in stack order and then feed them
+// forward into the hash via Set.
 func (t *Thread) HashGen(n int) {
-	hh := &HbHash{
-		Keys:   make([]Value, n),
-		Values: make([]Value, n),
-	}
+	keys := make([]Value, n)
+	vals := make([]Value, n)
 	for i := n - 1; i >= 0; i-- {
-		hh.Values[i] = t.pop()
-		hh.Keys[i] = t.pop()
+		vals[i] = t.pop()
+		keys[i] = t.pop()
+	}
+	hh := &HbHash{}
+	for i := 0; i < n; i++ {
+		hh.Set(keys[i], vals[i])
 	}
 	t.push(Value{
 		info: makeInfo(tHash, 0, 0),
@@ -44,11 +52,9 @@ func (t *Thread) ArrayPush() {
 	// Hash: h[key] → value
 	if arr.IsHash() {
 		hh := arr.AsHash()
-		for i, k := range hh.Keys {
-			if valueEqual(k, idx) {
-				t.push(hh.Values[i])
-				return
-			}
+		if i := hh.Lookup(idx); i >= 0 {
+			t.push(hh.Values[i])
+			return
 		}
 		t.push(MakeNil())
 		return
@@ -87,15 +93,7 @@ func (t *Thread) ArrayPop() {

 	// Hash: h[key] := value
 	if arr.IsHash() {
-		hh := arr.AsHash()
-		for i, k := range hh.Keys {
-			if valueEqual(k, idx) {
-				hh.Values[i] = val
-				return
-			}
-		}
-		hh.Keys = append(hh.Keys, idx)
-		hh.Values = append(hh.Values, val)
+		arr.AsHash().Set(idx, val)
 		return
 	}

--- a/hbrt/value.go
+++ b/hbrt/value.go
@@ -234,11 +234,22 @@ type HbArray struct {
 }

 // HbHash is the hash table backing store.
+//
+// Keys/Values are parallel slices kept in insertion order (Harbour
+// HB_HASH_KEEPORDER default). Index is an O(1) lookup map mirroring
+// entries whose key type is indexable (string, numeric, logical, nil);
+// keys of other types fall back to a linear scan through Keys.
+//
+// Callers that mutate Keys/Values directly (tests, bulk loaders) may
+// leave Index stale — the helper methods detect that via a length
+// mismatch and rebuild on demand. Production code must go through the
+// Lookup/Set/Append/Delete methods to keep Index in sync.
 type HbHash struct {
 	Keys   []Value
 	Values []Value
 	Order  []int
 	Flags  int32
+	Index  map[string]int
 }

 // HbBlock is the code block backing store.
--- a/hbrt/valuemethods.go
+++ b/hbrt/valuemethods.go
@@ -463,13 +463,7 @@ func vmHashHas(t *Thread, self Value, args []Value) Value {
 	if len(args) == 0 {
 		return MakeBool(false)
 	}
-	key := args[0]
-	for _, k := range self.AsHash().Keys {
-		if valuesEqual(k, key) {
-			return MakeBool(true)
-		}
-	}
-	return MakeBool(false)
+	return MakeBool(self.AsHash().Has(args[0]))
 }

 func vmHashLen(t *Thread, self Value, args []Value) Value {
@@ -484,6 +478,7 @@ func vmHashCopy(t *Thread, self Value, args []Value) Value {
 	}
 	copy(nh.Keys, h.Keys)
 	copy(nh.Values, h.Values)
+	// Index is rebuilt lazily on first Lookup against nh.
 	return MakeHashFrom(nh)
 }

@@ -491,15 +486,7 @@ func vmHashDelete(t *Thread, self Value, args []Value) Value {
 	if len(args) == 0 {
 		return self
 	}
-	key := args[0]
-	h := self.AsHash()
-	for i, k := range h.Keys {
-		if valuesEqual(k, key) {
-			h.Keys = append(h.Keys[:i], h.Keys[i+1:]...)
-			h.Values = append(h.Values[:i], h.Values[i+1:]...)
-			break
-		}
-	}
+	self.AsHash().Delete(args[0])
 	return self
 }