Files
five/hbrdd/mem/memrdd.go
CharlesKWON dd270d5d9d perf: RTL Go-native migration — 27 optimizations, DML up to 70-90x
Systematic pass through PRG hot paths, promoting them to Go RTL while
preserving Harbour/FiveSql2 semantics. Full log in
docs/RTL-Go-Native-Migration.md.

Bench (bench_sql) vs 2026-04-08 baseline
 - B1  SELECT *             2,192 → 114   µs   (19x)
 - B6  INNER JOIN           9,291 → 233   µs   (40x)
 - B7  CTE simple           8,037 → 129   µs   (62x)
 - B9  ROW_NUMBER           3,705 → 265   µs   (14x)
 - B10 RANK PARTITION       4,748 → 309   µs   (15x)
 - B12 INSERT (WA cache)    4,319 →  63   µs   (69x)
 - B13 UPDATE (WA cache)    6,144 →  68   µs   (90x)
 - B15 CTE+WIN+JOIN        18,395 → 1,873 µs   (10x)

Infrastructure
 - HbHash O(1) Index preserving insertion order (Harbour KEEPORDER)
 - HbDeepClone Go RTL (scalar-sharing, immutable hash keys)
 - MEMRDD auto-imported via gengo; all Five programs get mem:name driver
 - SQL plan + pcode caches (s_hPlanCache, s_hDmlPcodeCache)
 - Opt-in SqlWACacheEnable — dbUseArea/Close/Commit batched for DML

SQL engine
 - FiveSql2 lexer ported to Go (byte FSM) with combined automatic
   template parameterization (literals → ?, concat queries share plan)
 - Go RTL: SqlDistinct, SqlGroupRows, SqlWindowPartitions,
   SqlWindowSortPartition, SqlWindowAssignRank, SqlComputeAggSimple,
   SqlBulkInsert, SqlBulkUpdate, SqlExprHasAgg, SqlEvalHaving
 - CTE / subquery / driving-table materialize paths use MEMRDD
 - SqlCoerce/SqlCmp/SqlIsTrue helpers moved from PRG to Go
 - SqlBulkUpdate defers Flush when WA cache active (APFS fsync was
   dominant B13 cost — 1.6ms/call → gone)

Correctness fixes uncovered during migration
 - ASort default path now sorts dates/logicals/timestamps (was no-op)
 - ORDER BY default NULL placement matches PRG SqlRowCompare across
   Go fast path; explicit NULLS FIRST/LAST honored by both paths
 - SqlBulkUpdate respects EXCLUSIVE vs SHARED mode record locks
 - SqlCmp/SqlCmpEq normalize NumInt vs Double (caught by test 6b)

Verification
 - go test ./...              ALL PASS
 - FiveSql2 test_sql1999      43/43
 - tests/compat_harbour       56/56 (+5 new: ASort dates/logicals,
                              AScan int cross-type)
 - Regression test test_null_order.prg for ORDER BY NULL ordering

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-17 20:20:14 +09:00

669 lines
13 KiB
Go

// Copyright (c) 2026 Charles KWON OhJun (charleskwonohjun@gmail.com)
// All rights reserved.
// memrdd.go — In-memory RDD for Five.
//
// Stores records as Go slices in RAM. No disk I/O at all.
// Supports full Area interface: CRUD, navigation, index, filter.
//
// Usage:
// USE "mem:customers" VIA "MEMRDD" NEW
// dbCreate("mem:temp", aStruct, "MEMRDD")
//
// Compared to file-based DBF:
// - 10-100x faster (no disk, no byte packing)
// - Data lost on exit (intentional — for temp tables)
// - Perfect for: query results, pivot tables, reports, caching
package mem
import (
"five/hbrdd"
"five/hbrt"
"fmt"
"sort"
"strings"
"sync"
)
// --- Driver ---
// MemDriver implements hbrdd.Driver for in-memory tables.
type MemDriver struct{}
var (
tables = make(map[string]*memTable) // uppercase name → table
tablesMu sync.RWMutex
)
func (d *MemDriver) Name() string { return "MEMRDD" }
func (d *MemDriver) Open(params hbrdd.OpenParams) (hbrdd.Area, error) {
name := normalizeName(params.Path)
tablesMu.RLock()
tbl, ok := tables[name]
tablesMu.RUnlock()
if !ok {
return nil, fmt.Errorf("table not found: %s", params.Path)
}
tbl.mu.Lock()
tbl.openCount++
tbl.mu.Unlock()
return newMemArea(tbl, params.Alias, d), nil
}
func (d *MemDriver) Create(params hbrdd.CreateParams) (hbrdd.Area, error) {
name := normalizeName(params.Path)
// Callers carrying DBF-style fixed-width names (PadR to 10 chars)
// are common — the SQL engine pads names so the DBF header encodes
// cleanly. Memory tables have no fixed-width constraint; strip the
// padding so FieldPos / outer SELECT lookups don't miss on the
// trailing whitespace.
fields := make([]hbrdd.FieldInfo, len(params.Fields))
for i, f := range params.Fields {
f.Name = strings.TrimRight(f.Name, " ")
fields[i] = f
}
tbl := &memTable{
name: name,
fields: fields,
}
tablesMu.Lock()
tables[name] = tbl
tbl.openCount = 1
tablesMu.Unlock()
return newMemArea(tbl, params.Alias, d), nil
}
// DropTable removes a table from memory.
func DropTable(name string) {
tablesMu.Lock()
delete(tables, normalizeName(name))
tablesMu.Unlock()
}
// TableExists checks if a table exists in memory.
func TableExists(name string) bool {
tablesMu.RLock()
_, ok := tables[normalizeName(name)]
tablesMu.RUnlock()
return ok
}
func normalizeName(s string) string {
s = strings.TrimPrefix(s, "mem:")
return strings.ToUpper(strings.TrimSpace(s))
}
// --- Table (shared data) ---
type memTable struct {
mu sync.RWMutex
name string
fields []hbrdd.FieldInfo
records []memRecord // all records
indexes []*memIndex // active indexes
openCount int
}
type memRecord struct {
data []hbrt.Value // field values (0-based)
deleted bool
}
type memIndex struct {
tag string
keyExpr string
keyFunc func(rec []hbrt.Value) hbrt.Value
entries []memIndexEntry // sorted
desc bool
}
type memIndexEntry struct {
key hbrt.Value
recNo uint32
}
// --- Area (per work area state) ---
type memArea struct {
tbl *memTable
alias string
driver *MemDriver
recNo uint32 // 1-based, 0 = phantom
bof bool
eof bool
found bool
curIndex int // -1 = natural order, 0+ = index
indexPos int // position in current index
closed bool
// Filter/Locate
filterExpr string
filterBlock func(*hbrt.Thread) bool
locateExpr string
locateBlock func(*hbrt.Thread) bool
}
func newMemArea(tbl *memTable, alias string, drv *MemDriver) *memArea {
a := &memArea{
tbl: tbl,
alias: alias,
driver: drv,
recNo: 0,
eof: true,
curIndex: -1,
}
if len(tbl.records) > 0 {
a.recNo = 1
a.eof = false
}
return a
}
// --- Identity ---
func (a *memArea) Driver() hbrdd.Driver { return a.driver }
func (a *memArea) Alias() string { return a.alias }
func (a *memArea) SetAlias(s string) { a.alias = s }
// --- Lifecycle ---
func (a *memArea) Close() error {
if a.closed {
return nil
}
a.closed = true
a.tbl.mu.Lock()
a.tbl.openCount--
a.tbl.mu.Unlock()
return nil
}
func (a *memArea) Flush() error { return nil } // no-op: memory only
// --- Navigation ---
func (a *memArea) BOF() bool { return a.bof }
func (a *memArea) EOF() bool { return a.eof }
func (a *memArea) Found() bool { return a.found }
func (a *memArea) SetFound(b bool) { a.found = b }
func (a *memArea) SetLocate(expr string, block func(*hbrt.Thread) bool) {
a.locateExpr = expr
a.locateBlock = block
}
func (a *memArea) LocateBlock() func(*hbrt.Thread) bool { return a.locateBlock }
func (a *memArea) SetFilter(expr string, block func(*hbrt.Thread) bool) error {
a.filterExpr = expr
a.filterBlock = block
return nil
}
func (a *memArea) ClearFilter() error {
a.filterExpr = ""
a.filterBlock = nil
return nil
}
func (a *memArea) HasFilter() bool { return a.filterBlock != nil }
func (a *memArea) GoTo(recNo uint32) error {
a.tbl.mu.RLock()
count := uint32(len(a.tbl.records))
a.tbl.mu.RUnlock()
a.bof = false
a.found = false
if recNo < 1 || recNo > count {
a.recNo = count + 1
a.eof = true
return nil
}
a.recNo = recNo
a.eof = false
return nil
}
func (a *memArea) GoTop() error {
a.tbl.mu.RLock()
count := uint32(len(a.tbl.records))
a.tbl.mu.RUnlock()
a.bof = false
a.found = false
if a.curIndex >= 0 && a.curIndex < len(a.tbl.indexes) {
idx := a.tbl.indexes[a.curIndex]
if len(idx.entries) == 0 {
a.eof = true
a.recNo = count + 1
return nil
}
a.indexPos = 0
a.recNo = idx.entries[0].recNo
a.eof = false
return nil
}
if count == 0 {
a.eof = true
a.recNo = 1
return nil
}
a.recNo = 1
a.eof = false
return nil
}
func (a *memArea) GoBottom() error {
a.tbl.mu.RLock()
count := uint32(len(a.tbl.records))
a.tbl.mu.RUnlock()
a.bof = false
a.found = false
if a.curIndex >= 0 && a.curIndex < len(a.tbl.indexes) {
idx := a.tbl.indexes[a.curIndex]
if len(idx.entries) == 0 {
a.eof = true
a.recNo = count + 1
return nil
}
a.indexPos = len(idx.entries) - 1
a.recNo = idx.entries[a.indexPos].recNo
a.eof = false
return nil
}
if count == 0 {
a.eof = true
a.recNo = 1
return nil
}
a.recNo = count
a.eof = false
return nil
}
func (a *memArea) Skip(count int64) error {
if a.curIndex >= 0 && a.curIndex < len(a.tbl.indexes) {
return a.skipIndexed(count)
}
a.tbl.mu.RLock()
total := uint32(len(a.tbl.records))
a.tbl.mu.RUnlock()
a.found = false
if count > 0 {
a.bof = false
newRec := int64(a.recNo) + count
if newRec > int64(total) {
a.recNo = total + 1
a.eof = true
} else {
a.recNo = uint32(newRec)
a.eof = false
}
} else if count < 0 {
a.eof = false
newRec := int64(a.recNo) + count
if newRec < 1 {
a.recNo = 1
a.bof = true
} else {
a.recNo = uint32(newRec)
a.bof = false
}
}
return nil
}
func (a *memArea) skipIndexed(count int64) error {
idx := a.tbl.indexes[a.curIndex]
a.found = false
if count > 0 {
a.bof = false
newPos := a.indexPos + int(count)
if newPos >= len(idx.entries) {
a.indexPos = len(idx.entries)
a.recNo = uint32(len(a.tbl.records)) + 1
a.eof = true
} else {
a.indexPos = newPos
a.recNo = idx.entries[newPos].recNo
a.eof = false
}
} else if count < 0 {
a.eof = false
newPos := a.indexPos + int(count)
if newPos < 0 {
a.indexPos = 0
if len(idx.entries) > 0 {
a.recNo = idx.entries[0].recNo
}
a.bof = true
} else {
a.indexPos = newPos
a.recNo = idx.entries[newPos].recNo
a.bof = false
}
}
return nil
}
// --- Record info ---
func (a *memArea) RecNo() uint32 { return a.recNo }
func (a *memArea) RecCount() (uint32, error) {
a.tbl.mu.RLock()
defer a.tbl.mu.RUnlock()
return uint32(len(a.tbl.records)), nil
}
func (a *memArea) Deleted() bool {
a.tbl.mu.RLock()
defer a.tbl.mu.RUnlock()
i := int(a.recNo) - 1
if i < 0 || i >= len(a.tbl.records) {
return false
}
return a.tbl.records[i].deleted
}
// --- Field access ---
func (a *memArea) FieldCount() int { return len(a.tbl.fields) }
func (a *memArea) GetFieldInfo(index int) hbrdd.FieldInfo {
if index >= 0 && index < len(a.tbl.fields) {
return a.tbl.fields[index]
}
return hbrdd.FieldInfo{}
}
func (a *memArea) GetValue(fieldIndex int) (hbrt.Value, error) {
a.tbl.mu.RLock()
defer a.tbl.mu.RUnlock()
i := int(a.recNo) - 1
if i < 0 || i >= len(a.tbl.records) {
return hbrt.MakeNil(), nil // phantom record
}
rec := a.tbl.records[i]
if fieldIndex < 0 || fieldIndex >= len(rec.data) {
return hbrt.MakeNil(), fmt.Errorf("field index %d out of range", fieldIndex)
}
return rec.data[fieldIndex], nil
}
func (a *memArea) PutValue(fieldIndex int, val hbrt.Value) error {
a.tbl.mu.Lock()
defer a.tbl.mu.Unlock()
i := int(a.recNo) - 1
if i < 0 || i >= len(a.tbl.records) {
return fmt.Errorf("no current record")
}
if fieldIndex < 0 || fieldIndex >= len(a.tbl.records[i].data) {
return fmt.Errorf("field index %d out of range", fieldIndex)
}
a.tbl.records[i].data[fieldIndex] = val
return nil
}
// --- Record operations ---
func (a *memArea) Append() error {
a.tbl.mu.Lock()
defer a.tbl.mu.Unlock()
rec := memRecord{
data: make([]hbrt.Value, len(a.tbl.fields)),
}
// Initialize with defaults
for j, f := range a.tbl.fields {
switch f.Type {
case 'C':
rec.data[j] = hbrt.MakeString(strings.Repeat(" ", f.Len))
case 'N', 'I', 'B':
rec.data[j] = hbrt.MakeInt(0)
case 'L':
rec.data[j] = hbrt.MakeBool(false)
case 'D':
rec.data[j] = hbrt.MakeDate(0)
default:
rec.data[j] = hbrt.MakeNil()
}
}
a.tbl.records = append(a.tbl.records, rec)
a.recNo = uint32(len(a.tbl.records))
a.eof = false
a.bof = false
return nil
}
func (a *memArea) Delete() error {
a.tbl.mu.Lock()
defer a.tbl.mu.Unlock()
i := int(a.recNo) - 1
if i >= 0 && i < len(a.tbl.records) {
a.tbl.records[i].deleted = true
}
return nil
}
func (a *memArea) Recall() error {
a.tbl.mu.Lock()
defer a.tbl.mu.Unlock()
i := int(a.recNo) - 1
if i >= 0 && i < len(a.tbl.records) {
a.tbl.records[i].deleted = false
}
return nil
}
func (a *memArea) Pack() error {
a.tbl.mu.Lock()
defer a.tbl.mu.Unlock()
var kept []memRecord
for _, r := range a.tbl.records {
if !r.deleted {
kept = append(kept, r)
}
}
a.tbl.records = kept
a.recNo = 1
if len(kept) == 0 {
a.eof = true
}
return nil
}
func (a *memArea) Zap() error {
a.tbl.mu.Lock()
defer a.tbl.mu.Unlock()
a.tbl.records = nil
a.tbl.indexes = nil
a.recNo = 1
a.eof = true
return nil
}
// --- Index support ---
// CreateIndex builds an in-memory index on a field.
func (a *memArea) CreateIndex(tag string, fieldIndex int, desc bool) {
a.tbl.mu.Lock()
defer a.tbl.mu.Unlock()
idx := &memIndex{
tag: strings.ToUpper(tag),
desc: desc,
}
// Build entries
for i, rec := range a.tbl.records {
if rec.deleted {
continue
}
var key hbrt.Value
if fieldIndex >= 0 && fieldIndex < len(rec.data) {
key = rec.data[fieldIndex]
} else {
key = hbrt.MakeNil()
}
idx.entries = append(idx.entries, memIndexEntry{
key: key,
recNo: uint32(i + 1),
})
}
// Sort
sort.SliceStable(idx.entries, func(i, j int) bool {
cmp := compareValues(idx.entries[i].key, idx.entries[j].key)
if desc {
return cmp > 0
}
return cmp < 0
})
a.tbl.indexes = append(a.tbl.indexes, idx)
a.curIndex = len(a.tbl.indexes) - 1
if len(idx.entries) > 0 {
a.indexPos = 0
a.recNo = idx.entries[0].recNo
a.eof = false
}
}
// Seek finds a key in the current index using binary search.
func (a *memArea) Seek(key hbrt.Value, soft bool) bool {
if a.curIndex < 0 || a.curIndex >= len(a.tbl.indexes) {
a.found = false
return false
}
idx := a.tbl.indexes[a.curIndex]
entries := idx.entries
// Binary search
lo, hi := 0, len(entries)-1
pos := len(entries) // default: past end
for lo <= hi {
mid := (lo + hi) / 2
cmp := compareValues(entries[mid].key, key)
if idx.desc {
cmp = -cmp
}
if cmp < 0 {
lo = mid + 1
} else if cmp > 0 {
pos = mid
hi = mid - 1
} else {
pos = mid
hi = mid - 1 // find first occurrence
}
}
if pos < len(entries) && compareValues(entries[pos].key, key) == 0 {
a.indexPos = pos
a.recNo = entries[pos].recNo
a.eof = false
a.found = true
return true
}
// Soft seek: position at first key >= target
if soft && pos < len(entries) {
a.indexPos = pos
a.recNo = entries[pos].recNo
a.eof = false
a.found = false
return false
}
// Not found
a.found = false
a.eof = true
a.recNo = uint32(len(a.tbl.records)) + 1
return false
}
// SetOrder sets the active index by tag name. -1 = natural order.
func (a *memArea) SetOrder(tag string) {
if tag == "" {
a.curIndex = -1
return
}
upper := strings.ToUpper(tag)
for i, idx := range a.tbl.indexes {
if idx.tag == upper {
a.curIndex = i
return
}
}
a.curIndex = -1
}
// --- Value comparison ---
func compareValues(a, b hbrt.Value) int {
if a.IsString() && b.IsString() {
sa, sb := a.AsString(), b.AsString()
if sa < sb {
return -1
}
if sa > sb {
return 1
}
return 0
}
if a.IsNumeric() && b.IsNumeric() {
fa, fb := a.AsNumDouble(), b.AsNumDouble()
if fa < fb {
return -1
}
if fa > fb {
return 1
}
return 0
}
if a.IsDate() && b.IsDate() {
ja, jb := a.AsJulian(), b.AsJulian()
if ja < jb {
return -1
}
if ja > jb {
return 1
}
return 0
}
if a.IsLogical() && b.IsLogical() {
ba, bb := a.AsBool(), b.AsBool()
if !ba && bb {
return -1
}
if ba && !bb {
return 1
}
return 0
}
return 0
}
// --- Registration ---
func init() {
hbrdd.RegisterDriver(&MemDriver{})
}