Files
five/hbrdd/driver.go
CharlesKWON 7cc729f394 perf(index): compiled key evaluator — UDF INDEX 2.7x faster
Eliminate MacroEval overhead for INDEX ON with UDF/complex expressions.

Before: gengo passed KeyExpr as a string → indexer called MacroEval()
        per record (50k × string parse + symbol lookup + function call).

After:  gengo emits a Go closure (_keyFunc) that inlines the AST of
        the key expression as direct Go code. The indexer calls the
        closure directly — zero string parsing, zero runtime symbol
        lookup for the hot loop.

Three code paths in the closure, depending on expression type:
  1. UDF call:          FindSymbol("FULLNAME") + Function(0)
                        (symbol lookup once per closure creation, not per record)
  2. Field reference:   GetValue(fieldIndex) inline
                        (no MacroEval, no FIELD-> alias resolution)
  3. UPPER/LOWER(expr): strings.ToUpper/Lower inline
                        (no RTL function call overhead)

Architecture (Go compiler design principle):
  Compile time knows the AST → emit native code.
  Don't serialize to string → re-parse at runtime 50k times.

Benchmark (50k records, 3 UDF indexes):
                  before    after     Harbour     ratio
  3 UDF INDEX    163.0ms   60.0ms    55.0ms      Five/HB = 1.09x
  SEEK 10k         7.6ms    7.6ms    14.0ms      Five 1.8x faster
  SCAN 50k         3.4ms    3.4ms     4.0ms      Five 15% faster
  TOTAL          233.0ms  130.0ms   147.0ms      Five 12% faster overall

UDF INDEX build went from 3x SLOWER than Harbour to nearly EQUAL.
SEEK/SCAN remain faster than Harbour (mmap + NTX optimizations).

Changes:
  hbrdd/driver.go     KeyFunc field in OrderCreateParams
  hbrdd/dbf/indexer.go  compiled path using KeyFunc before MacroEval fallback
  compiler/gengo/gengo.go  emitIndexKeyExpr: field-aware AST→Go emitter
                           for INDEX ON key expressions

Correctness: Harbour vs Five UDF diff = 0 (25-line output match)

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-14 02:36:37 +09:00

224 lines
5.9 KiB
Go

// Copyright (c) 2026 Charles KWON OhJun (charleskwonohjun@gmail.com)
// All rights reserved.
// RDD (Replaceable Database Driver) interface definitions for Five.
//
// Design: Harbour's 101-method RDDFUNCS vtable → Go interface composition.
// Each interface is small and focused (Go philosophy).
// Drivers implement only what they need; BaseArea provides defaults.
//
// Inheritance via Go embedding:
// BaseArea (WAAREA) → DBFArea → NTXArea / CDXArea
//
// Reference:
// /mnt/d/harbour-core/include/hbapirdd.h (lines 640-816)
// docs/rdd-architecture-spec.md
package hbrdd
import (
"five/hbrt"
"fmt"
"strings"
"sync"
)
// --- Driver registry ---
var (
driversMu sync.RWMutex
drivers = map[string]Driver{}
)
// RegisterDriver registers an RDD driver by name.
func RegisterDriver(d Driver) {
driversMu.Lock()
drivers[strings.ToUpper(d.Name())] = d
driversMu.Unlock()
}
// GetDriver returns a registered driver by name.
func GetDriver(name string) (Driver, error) {
driversMu.RLock()
d, ok := drivers[strings.ToUpper(name)]
driversMu.RUnlock()
if !ok {
return nil, fmt.Errorf("unknown RDD driver: %s", name)
}
return d, nil
}
// --- Core interfaces ---
// Driver creates and opens work areas.
// Harbour: RDD node with RDDFUNCS table.
type Driver interface {
Name() string
Open(params OpenParams) (Area, error)
Create(params CreateParams) (Area, error)
}
// OpenParams for opening an existing table.
type OpenParams struct {
Path string // file path (without extension)
Alias string // workarea alias
Shared bool // shared access mode
ReadOnly bool // read-only mode
CodePage string // code page name
}
// CreateParams for creating a new table.
type CreateParams struct {
Path string
Alias string
Fields []FieldInfo
CodePage string
}
// FieldInfo describes a database field.
// Harbour: DBFFIELD (32 bytes in file, this is the runtime representation)
type FieldInfo struct {
Name string // up to 10 chars (Harbour limit)
Type byte // 'C', 'N', 'L', 'D', 'M', 'I', 'B', '@', '+', '=', '^', 'Y', etc.
Len int // field length
Dec int // decimal places
Flags byte // 0x01=system, 0x02=nullable, 0x04=binary
}
// --- Area interface (WAAREA + DBF core) ---
// Area is the primary interface for accessing a database table.
// Harbour: AREAP with SELF_* macro dispatch.
type Area interface {
// Identity
Driver() Driver
Alias() string
SetAlias(string)
// Lifecycle
Close() error
Flush() error
// Movement — Harbour: hb_waBof, hb_waEof, hb_waFound, hb_waGoTo, etc.
BOF() bool
EOF() bool
Found() bool
GoTo(recNo uint32) error
GoTop() error
GoBottom() error
Skip(count int64) error
// Record info
RecNo() uint32
RecCount() (uint32, error)
Deleted() bool
// Field access — Harbour: SELF_FIELDCOUNT, SELF_GETVALUE, SELF_PUTVALUE
FieldCount() int
GetFieldInfo(index int) FieldInfo
GetValue(fieldIndex int) (hbrt.Value, error)
PutValue(fieldIndex int, val hbrt.Value) error
// Record operations — Harbour: SELF_APPEND, SELF_DELETE, SELF_RECALL
Append() error
Delete() error
Recall() error
// Bulk operations
Pack() error
Zap() error
// State — Harbour: hb_waSetFound, locate support
SetFound(b bool)
SetLocate(expr string, block func(*hbrt.Thread) bool)
LocateBlock() func(*hbrt.Thread) bool
// Filter
SetFilter(expr string, block func(*hbrt.Thread) bool) error
ClearFilter() error
HasFilter() bool
}
// --- Optional interfaces (drivers implement as needed) ---
// Indexer provides index management and key-based seeking.
// Harbour: order* methods in RDDFUNCS (9 methods).
// Only DBFNTX and DBFCDX implement this.
type Indexer interface {
// Order management
OrderCreate(params OrderCreateParams) error
OrderListAdd(path string) error
OrderListClear() error
OrderListFocus(tagName string) error
OrderListRebuild() error
OrderDestroy(tagName string) error
OrderInfo(ordNo int) (*OrderInfo, error)
// Key-based seeking — Harbour: SELF_SEEK
Seek(key hbrt.Value, softSeek bool, findLast bool) (bool, error)
}
// OrderCreateParams for INDEX ON.
type OrderCreateParams struct {
TagName string // index tag name
KeyExpr string // key expression (e.g., "UPPER(lastname+firstname)")
ForExpr string // FOR condition (e.g., "active = .T.")
FilePath string // index file path
Unique bool
Descending bool
// KeyFunc is an optional compiled key evaluator. When non-nil, the
// indexer calls it directly instead of going through MacroEval on the
// KeyExpr string. gengo emits this as an inline Go closure that
// mirrors the AST of the key expression — zero string parsing at
// runtime, symbol lookups hoisted out of the loop.
//
// Contract: caller must position the workarea (GoTo) before calling.
// Returns the key value for the current record.
KeyFunc func() hbrt.Value
}
// OrderInfo holds information about an index order.
type OrderInfo struct {
Name string
KeyExpr string
ForExpr string
Unique bool
Descending bool
KeyCount uint32
Custom bool
}
// Locker provides record and file locking.
// Harbour: SELF_LOCK, SELF_UNLOCK, SELF_RAWLOCK
type Locker interface {
LockRecord(recNo uint32) (bool, error)
UnlockRecord(recNo uint32) error
LockFile() (bool, error)
UnlockFile() error
IsLocked(recNo uint32) bool
}
// Relater provides SET RELATION support.
// Harbour: SELF_SETREL, SELF_CLEARREL, SELF_FORCEREL
type Relater interface {
SetRelation(child Area, keyExpr func(*hbrt.Thread) hbrt.Value, scoped bool) error
ClearRelation() error
ForceRel() error
SyncChildren() error
}
// MemoHandler provides memo field read/write.
// Harbour: SELF_OPENMEMFILE, SELF_CLOSEMEMFILE, SELF_GETVALUEFILE, SELF_PUTVALUEFILE
type MemoHandler interface {
OpenMemo(path string) error
CloseMemo() error
ReadMemo(blockNo uint32) ([]byte, error)
WriteMemo(data []byte) (uint32, error)
}
// Scoper provides SET SCOPE support for index-based range queries.
type Scoper interface {
SetScope(top, bottom hbrt.Value) error
ClearScope() error
}