perf: FieldPos O(1) cache + xbase import detection for function-call PRGs
Two SQLite-style optimizations for RDD and SQL workloads:
1. FieldPos() O(1) column binding cache
Before: FieldPos(name) linear scan — O(n) per call with string
comparison. In SQL engines that call FieldPos per row per
column, this is hundreds of thousands of calls.
After: DBFArea builds a map[UPPER(name)]→pos on first lookup.
All subsequent lookups are O(1) hash. SQLite calls this
"column affinity binding" — positions resolved at prepare,
not per row.
Implementation:
- hbrdd/dbf/dbf.go: DBFArea.FieldPosCache(name) method
- hbrtl/procinfo.go: FieldPos RTL uses fieldPosCacher interface
- Lazy init: only pays for tables that get queried
2. hbrdd import auto-detection for function-call style PRGs
Before: compiler only added hbrdd import when PRG used xBase commands
(USE, SKIP, INDEX...). Pure function-call style like
`dbUseArea(.T.,,"t")`, `FieldPut(1, val)` was missed —
generated Go failed to compile ("undefined: hbrdd").
After: scanStmtsForXBase walks ExprStmt bodies too, detecting
CallExpr to any of the ~40 xBase RTL function names.
FIELD->NAME alias expressions also trigger the import.
Resolves: small PRGs that use only dbUseArea/FieldGet/FieldPut.
Benchmark notes (50k records):
Raw RDD scan: 7 ms (baseline)
FiveSql2 SELECT WHERE: 157 ms (unchanged — bottleneck is
not FieldPos, it's PRG-level
expression tree walk per row)
compat_harbour 51/51: PASS
FiveSql2 43/43: 100%
The FieldPos cache helps heavy field-name-based code paths but the
primary FiveSql2 bottleneck is the PRG interpreter walking expression
ASTs per row (needs bytecode compilation to close the gap).
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -3,7 +3,10 @@
|
||||
|
||||
package gengo
|
||||
|
||||
import "five/compiler/ast"
|
||||
import (
|
||||
"five/compiler/ast"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// hasXBaseCommands checks if the file contains any xBase commands.
|
||||
func hasXBaseCommands(file *ast.File) bool {
|
||||
@@ -22,6 +25,50 @@ func hasXBaseCommands(file *ast.File) bool {
|
||||
return false
|
||||
}
|
||||
|
||||
// xbaseFuncNames is the set of RTL functions that require the hbrdd import
|
||||
// in the generated Go code. When a PRG only uses these via function-call
|
||||
// syntax (no xBase commands like USE), the compiler still needs hbrdd.
|
||||
var xbaseFuncNames = map[string]bool{
|
||||
"DBUSEAREA": true, "DBCREATE": true, "DBAPPEND": true, "DBSKIP": true,
|
||||
"DBGOTO": true, "DBGOTOP": true, "DBGOBOTTOM": true, "DBSEEK": true,
|
||||
"DBDELETE": true, "DBRECALL": true, "DBCLOSEAREA": true, "DBCLOSEALL": true,
|
||||
"DBSELECTAREA": true, "DBCOMMIT": true, "DBPACK": true, "DBZAP": true,
|
||||
"DBRLOCK": true, "DBRUNLOCK": true, "FLOCK": true, "DBUNLOCK": true,
|
||||
"FIELDGET": true, "FIELDPUT": true, "FIELDPOS": true, "FIELDNAME": true,
|
||||
"FCOUNT": true, "RECNO": true, "RECCOUNT": true, "EOF": true, "BOF": true,
|
||||
"FOUND": true, "DELETED": true, "LASTREC": true, "ALIAS": true,
|
||||
"ORDSETFOCUS": true, "ORDCOUNT": true, "ORDNAME": true, "ORDKEY": true,
|
||||
"USED": true, "SELECT": true, "SETDELETED": true,
|
||||
"FIVE_SQL": true, // FiveSql2 entry point also needs hbrdd
|
||||
}
|
||||
|
||||
func scanExprForXBase(expr ast.Expr) bool {
|
||||
if expr == nil {
|
||||
return false
|
||||
}
|
||||
switch e := expr.(type) {
|
||||
case *ast.CallExpr:
|
||||
if ident, ok := e.Func.(*ast.IdentExpr); ok {
|
||||
if xbaseFuncNames[strings.ToUpper(ident.Name)] {
|
||||
return true
|
||||
}
|
||||
}
|
||||
for _, arg := range e.Args {
|
||||
if scanExprForXBase(arg) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
case *ast.BinaryExpr:
|
||||
return scanExprForXBase(e.Left) || scanExprForXBase(e.Right)
|
||||
case *ast.AssignExpr:
|
||||
return scanExprForXBase(e.Right)
|
||||
case *ast.AliasExpr:
|
||||
// FIELD->NAME always needs hbrdd
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func scanStmtsForXBase(stmts []ast.Stmt) bool {
|
||||
for _, s := range stmts {
|
||||
switch v := s.(type) {
|
||||
@@ -29,6 +76,10 @@ func scanStmtsForXBase(stmts []ast.Stmt) bool {
|
||||
*ast.ReplaceCmd, *ast.AppendCmd, *ast.DeleteCmd,
|
||||
*ast.SelectCmd, *ast.IndexCmd, *ast.SetCmd:
|
||||
return true
|
||||
case *ast.ExprStmt:
|
||||
if scanExprForXBase(v.X) {
|
||||
return true
|
||||
}
|
||||
case *ast.IfStmt:
|
||||
if scanStmtsForXBase(v.Body) || scanStmtsForXBase(v.ElseBody) {
|
||||
return true
|
||||
|
||||
@@ -63,9 +63,13 @@ type DBFArea struct {
|
||||
idxState *indexState
|
||||
|
||||
// File locking state (byte-range locks via fcntl)
|
||||
// Harbour: hb_fsLockLarge with FL_LOCK + FLX_SHARED/FLX_EXCLUSIVE
|
||||
fileLocked bool // FLOCK() held
|
||||
lockedRecs map[uint32]bool // records locked by DBRLOCK()
|
||||
|
||||
// Field position cache — UPPER(name) → 1-based index.
|
||||
// Built lazily on first FieldPosCache() call.
|
||||
// SQLite: "column affinity binding" — O(1) vs O(n) linear scan.
|
||||
fieldPosMap map[string]int
|
||||
}
|
||||
|
||||
// DBFDriver is the driver factory for DBF files.
|
||||
@@ -357,6 +361,29 @@ func (a *DBFArea) Close() error {
|
||||
// MemoFile returns the FPT memo file, or nil if no memo fields.
|
||||
func (a *DBFArea) MemoFile() *FPTFile { return a.memoFile }
|
||||
|
||||
// FieldPosCache returns the 1-based field position for a field name.
|
||||
// Uses a lazily-built hash map for O(1) lookup instead of O(n) linear scan.
|
||||
// SQLite: "column affinity binding" — critical for SQL engines that call
|
||||
// FieldPos() hundreds of thousands of times per query.
|
||||
func (a *DBFArea) FieldPosCache(name string) int {
|
||||
if a.fieldPosMap == nil {
|
||||
a.fieldPosMap = make(map[string]int, len(a.fieldDescs))
|
||||
for i, fd := range a.fieldDescs {
|
||||
// Trim null bytes + spaces from the [11]byte field name
|
||||
n := 0
|
||||
for n < 11 && fd.Name[n] != 0 {
|
||||
n++
|
||||
}
|
||||
fname := strings.ToUpper(strings.TrimSpace(string(fd.Name[:n])))
|
||||
a.fieldPosMap[fname] = i + 1
|
||||
}
|
||||
}
|
||||
if pos, ok := a.fieldPosMap[name]; ok {
|
||||
return pos
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
func (a *DBFArea) Flush() error {
|
||||
if a.dirty {
|
||||
if err := a.flushRecord(); err != nil {
|
||||
|
||||
@@ -9,6 +9,7 @@ import (
|
||||
"five/hbrt"
|
||||
"os"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
)
|
||||
|
||||
@@ -109,8 +110,8 @@ func Center(t *hbrt.Thread) {
|
||||
// FIELDPOS(cFieldName) → nPos
|
||||
func FieldPos(t *hbrt.Thread) {
|
||||
t.Frame(1, 0)
|
||||
defer t.EndProc()
|
||||
fname := t.Local(1).AsString()
|
||||
defer t.EndProcFast()
|
||||
fname := strings.ToUpper(t.Local(1).AsString())
|
||||
wam := getWA(t)
|
||||
if wam == nil {
|
||||
t.RetInt(0)
|
||||
@@ -121,10 +122,23 @@ func FieldPos(t *hbrt.Thread) {
|
||||
t.RetInt(0)
|
||||
return
|
||||
}
|
||||
|
||||
// Try DBFArea's built-in field position cache (O(1) hash lookup).
|
||||
// Falls back to linear scan for non-DBF areas (mem RDD, etc.).
|
||||
type fieldPosCacher interface {
|
||||
FieldPosCache(name string) int
|
||||
}
|
||||
if fpc, ok := area.(fieldPosCacher); ok {
|
||||
pos := fpc.FieldPosCache(fname)
|
||||
t.RetInt(int64(pos))
|
||||
return
|
||||
}
|
||||
|
||||
// Fallback: linear scan
|
||||
for i := 0; i < area.FieldCount(); i++ {
|
||||
fi := area.GetFieldInfo(i)
|
||||
if eqFold(fi.Name, fname) {
|
||||
t.RetInt(int64(i + 1)) // Harbour: 1-based position
|
||||
if strings.EqualFold(fi.Name, fname) {
|
||||
t.RetInt(int64(i + 1))
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user