Files
five/hbrt/hash_helpers.go
CharlesKWON dd270d5d9d perf: RTL Go-native migration — 27 optimizations, DML up to 70-90x
Systematic pass through PRG hot paths, promoting them to Go RTL while
preserving Harbour/FiveSql2 semantics. Full log in
docs/RTL-Go-Native-Migration.md.

Bench (bench_sql) vs 2026-04-08 baseline
 - B1  SELECT *             2,192 → 114   µs   (19x)
 - B6  INNER JOIN           9,291 → 233   µs   (40x)
 - B7  CTE simple           8,037 → 129   µs   (62x)
 - B9  ROW_NUMBER           3,705 → 265   µs   (14x)
 - B10 RANK PARTITION       4,748 → 309   µs   (15x)
 - B12 INSERT (WA cache)    4,319 →  63   µs   (69x)
 - B13 UPDATE (WA cache)    6,144 →  68   µs   (90x)
 - B15 CTE+WIN+JOIN        18,395 → 1,873 µs   (10x)

Infrastructure
 - HbHash O(1) Index preserving insertion order (Harbour KEEPORDER)
 - HbDeepClone Go RTL (scalar-sharing, immutable hash keys)
 - MEMRDD auto-imported via gengo; all Five programs get mem:name driver
 - SQL plan + pcode caches (s_hPlanCache, s_hDmlPcodeCache)
 - Opt-in SqlWACacheEnable — dbUseArea/Close/Commit batched for DML

SQL engine
 - FiveSql2 lexer ported to Go (byte FSM) with combined automatic
   template parameterization (literals → ?, concat queries share plan)
 - Go RTL: SqlDistinct, SqlGroupRows, SqlWindowPartitions,
   SqlWindowSortPartition, SqlWindowAssignRank, SqlComputeAggSimple,
   SqlBulkInsert, SqlBulkUpdate, SqlExprHasAgg, SqlEvalHaving
 - CTE / subquery / driving-table materialize paths use MEMRDD
 - SqlCoerce/SqlCmp/SqlIsTrue helpers moved from PRG to Go
 - SqlBulkUpdate defers Flush when WA cache active (APFS fsync was
   dominant B13 cost — 1.6ms/call → gone)

Correctness fixes uncovered during migration
 - ASort default path now sorts dates/logicals/timestamps (was no-op)
 - ORDER BY default NULL placement matches PRG SqlRowCompare across
   Go fast path; explicit NULLS FIRST/LAST honored by both paths
 - SqlBulkUpdate respects EXCLUSIVE vs SHARED mode record locks
 - SqlCmp/SqlCmpEq normalize NumInt vs Double (caught by test 6b)

Verification
 - go test ./...              ALL PASS
 - FiveSql2 test_sql1999      43/43
 - tests/compat_harbour       56/56 (+5 new: ASort dates/logicals,
                              AScan int cross-type)
 - Regression test test_null_order.prg for ORDER BY NULL ordering

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-17 20:20:14 +09:00

185 lines
5.0 KiB
Go

// Copyright (c) 2026 Charles KWON OhJun (charleskwonohjun@gmail.com)
// All rights reserved.
package hbrt
import (
"encoding/binary"
"math"
)
// hashKey returns a canonical string key for use in HbHash.Index.
// Two Values that compare equal via valueEqual MUST produce the same
// string and the ok flag must be true. Pointer-identity key types
// (array, object, hash, block, pointer) return ok=false so the caller
// falls back to a linear scan using valueEqual.
//
// Numeric normalization: doubles that represent an exact int64 fold
// into the same slot as the corresponding integer, so h[1] and h[1.0]
// address the same bucket (matches valueEqual's cross-type numeric
// compare). -0.0 is normalized to +0.0 for the same reason.
//
// The single-byte type prefix prevents cross-type collisions
// (e.g., the string "N" must not collide with a Nil key).
func hashKey(v Value) (string, bool) {
switch {
case v.IsNil():
return "N", true
case v.IsString():
return "S" + v.AsString(), true
case v.IsNumeric():
var buf [9]byte
if v.IsNumInt() {
buf[0] = 'I'
binary.LittleEndian.PutUint64(buf[1:], uint64(v.AsNumInt()))
return string(buf[:]), true
}
d := v.AsDouble()
if d == 0 {
d = 0 // collapse -0.0 into +0.0
}
if !math.IsNaN(d) && !math.IsInf(d, 0) {
if f, fr := math.Modf(d); fr == 0 && f >= -9.2233720368547758e18 && f <= 9.2233720368547758e18 {
buf[0] = 'I'
binary.LittleEndian.PutUint64(buf[1:], uint64(int64(f)))
return string(buf[:]), true
}
}
buf[0] = 'F'
binary.LittleEndian.PutUint64(buf[1:], math.Float64bits(d))
return string(buf[:]), true
case v.IsLogical():
if v.AsBool() {
return "L1", true
}
return "L0", true
case v.IsDate():
var buf [9]byte
buf[0] = 'D'
binary.LittleEndian.PutUint64(buf[1:], uint64(v.AsJulian()))
return string(buf[:]), true
case v.IsTimestamp():
var buf [13]byte
buf[0] = 'T'
binary.LittleEndian.PutUint64(buf[1:9], uint64(v.AsJulian()))
binary.LittleEndian.PutUint32(buf[9:], uint32(v.AsTimeMs()))
return string(buf[:]), true
}
return "", false
}
// ensureIndex builds or rebuilds HbHash.Index if it looks stale
// (nil, or its size differs from the count of currently indexable
// keys in Keys). Callers should invoke it before any Index read when
// the hash may have been mutated via direct slice access.
func (h *HbHash) ensureIndex() {
// Fast path: Index exists and mirrors every indexable key.
if h.Index != nil {
want := 0
for _, k := range h.Keys {
if _, ok := hashKey(k); ok {
want++
}
}
if want == len(h.Index) {
return
}
}
h.Index = make(map[string]int, len(h.Keys))
for i, k := range h.Keys {
if kk, ok := hashKey(k); ok {
h.Index[kk] = i
}
}
}
// Lookup returns the slot index of key in Keys/Values, or -1 if absent.
// Runs in O(1) for indexable key types; falls back to O(N) linear scan
// (matching valuesEqual) for non-indexable types.
func (h *HbHash) Lookup(key Value) int {
if kk, ok := hashKey(key); ok {
h.ensureIndex()
if i, found := h.Index[kk]; found {
return i
}
return -1
}
for i, k := range h.Keys {
if valueEqual(k, key) {
return i
}
}
return -1
}
// Has reports whether key exists in the hash.
func (h *HbHash) Has(key Value) bool {
return h.Lookup(key) >= 0
}
// HashGet returns the value bound to key, or NIL if absent.
// (Named HashGet to avoid clashing with method-tables named Get.)
func (h *HbHash) HashGet(key Value) Value {
if i := h.Lookup(key); i >= 0 {
return h.Values[i]
}
return MakeNil()
}
// Set binds key → val, overwriting if key exists. Returns true if the
// key was newly added (false if it updated an existing slot).
func (h *HbHash) Set(key, val Value) bool {
if i := h.Lookup(key); i >= 0 {
h.Values[i] = val
return false
}
h.appendPair(key, val)
return true
}
// Append adds key → val without checking for existence. Caller must
// guarantee the key is not already present (e.g., bulk loaders).
func (h *HbHash) Append(key, val Value) {
h.appendPair(key, val)
}
func (h *HbHash) appendPair(key, val Value) {
i := len(h.Keys)
h.Keys = append(h.Keys, key)
h.Values = append(h.Values, val)
if kk, ok := hashKey(key); ok {
if h.Index == nil {
h.Index = make(map[string]int, 8)
}
h.Index[kk] = i
}
}
// Delete removes key. Returns true if the key was present.
// The remaining keys keep their insertion order (Harbour KEEPORDER
// semantic). Index is rebuilt because every slot after the removed
// one shifts down by one.
func (h *HbHash) Delete(key Value) bool {
i := h.Lookup(key)
if i < 0 {
return false
}
h.Keys = append(h.Keys[:i], h.Keys[i+1:]...)
h.Values = append(h.Values[:i], h.Values[i+1:]...)
h.Index = nil
return true
}
// HashFromPairs builds an HbHash from alternating key/value Values
// (as produced by HB_HASH literal / hb_Hash()). Uses Append for each
// pair after stripping duplicates to match HB_HASH semantics where
// repeated keys keep the last-assigned value.
func HashFromPairs(pairs []Value) *HbHash {
h := &HbHash{}
for i := 0; i+1 < len(pairs); i += 2 {
h.Set(pairs[i], pairs[i+1])
}
return h
}