perf: FieldPos O(1) cache + xbase import detection for function-call PRGs

Two SQLite-style optimizations for RDD and SQL workloads:

1. FieldPos() O(1) column binding cache

   Before: FieldPos(name) linear scan — O(n) per call with string
           comparison. In SQL engines that call FieldPos per row per
           column, this is hundreds of thousands of calls.

   After:  DBFArea builds a map[UPPER(name)]→pos on first lookup.
           All subsequent lookups are O(1) hash. SQLite calls this
           "column affinity binding" — positions resolved at prepare,
           not per row.

   Implementation:
     - hbrdd/dbf/dbf.go: DBFArea.FieldPosCache(name) method
     - hbrtl/procinfo.go: FieldPos RTL uses fieldPosCacher interface
     - Lazy init: only pays for tables that get queried

2. hbrdd import auto-detection for function-call style PRGs

   Before: compiler only added hbrdd import when PRG used xBase commands
           (USE, SKIP, INDEX...). Pure function-call style like
           `dbUseArea(.T.,,"t")`, `FieldPut(1, val)` was missed —
           generated Go failed to compile ("undefined: hbrdd").

   After:  scanStmtsForXBase walks ExprStmt bodies too, detecting
           CallExpr to any of the ~40 xBase RTL function names.
           FIELD->NAME alias expressions also trigger the import.

   Resolves: small PRGs that use only dbUseArea/FieldGet/FieldPut.

Benchmark notes (50k records):
  Raw RDD scan:              7 ms    (baseline)
  FiveSql2 SELECT WHERE:   157 ms    (unchanged — bottleneck is
                                      not FieldPos, it's PRG-level
                                      expression tree walk per row)
  compat_harbour 51/51:    PASS
  FiveSql2 43/43:          100%

The FieldPos cache helps heavy field-name-based code paths but the
primary FiveSql2 bottleneck is the PRG interpreter walking expression
ASTs per row (needs bytecode compilation to close the gap).

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-04-14 07:42:00 +09:00
parent 7cc729f394
commit ed33af41c5
3 changed files with 98 additions and 6 deletions

View File

@@ -3,7 +3,10 @@
package gengo
import "five/compiler/ast"
import (
"five/compiler/ast"
"strings"
)
// hasXBaseCommands checks if the file contains any xBase commands.
func hasXBaseCommands(file *ast.File) bool {
@@ -22,6 +25,50 @@ func hasXBaseCommands(file *ast.File) bool {
return false
}
// xbaseFuncNames is the set of RTL functions that require the hbrdd import
// in the generated Go code. When a PRG only uses these via function-call
// syntax (no xBase commands like USE), the compiler still needs hbrdd.
var xbaseFuncNames = map[string]bool{
"DBUSEAREA": true, "DBCREATE": true, "DBAPPEND": true, "DBSKIP": true,
"DBGOTO": true, "DBGOTOP": true, "DBGOBOTTOM": true, "DBSEEK": true,
"DBDELETE": true, "DBRECALL": true, "DBCLOSEAREA": true, "DBCLOSEALL": true,
"DBSELECTAREA": true, "DBCOMMIT": true, "DBPACK": true, "DBZAP": true,
"DBRLOCK": true, "DBRUNLOCK": true, "FLOCK": true, "DBUNLOCK": true,
"FIELDGET": true, "FIELDPUT": true, "FIELDPOS": true, "FIELDNAME": true,
"FCOUNT": true, "RECNO": true, "RECCOUNT": true, "EOF": true, "BOF": true,
"FOUND": true, "DELETED": true, "LASTREC": true, "ALIAS": true,
"ORDSETFOCUS": true, "ORDCOUNT": true, "ORDNAME": true, "ORDKEY": true,
"USED": true, "SELECT": true, "SETDELETED": true,
"FIVE_SQL": true, // FiveSql2 entry point also needs hbrdd
}
func scanExprForXBase(expr ast.Expr) bool {
if expr == nil {
return false
}
switch e := expr.(type) {
case *ast.CallExpr:
if ident, ok := e.Func.(*ast.IdentExpr); ok {
if xbaseFuncNames[strings.ToUpper(ident.Name)] {
return true
}
}
for _, arg := range e.Args {
if scanExprForXBase(arg) {
return true
}
}
case *ast.BinaryExpr:
return scanExprForXBase(e.Left) || scanExprForXBase(e.Right)
case *ast.AssignExpr:
return scanExprForXBase(e.Right)
case *ast.AliasExpr:
// FIELD->NAME always needs hbrdd
return true
}
return false
}
func scanStmtsForXBase(stmts []ast.Stmt) bool {
for _, s := range stmts {
switch v := s.(type) {
@@ -29,6 +76,10 @@ func scanStmtsForXBase(stmts []ast.Stmt) bool {
*ast.ReplaceCmd, *ast.AppendCmd, *ast.DeleteCmd,
*ast.SelectCmd, *ast.IndexCmd, *ast.SetCmd:
return true
case *ast.ExprStmt:
if scanExprForXBase(v.X) {
return true
}
case *ast.IfStmt:
if scanStmtsForXBase(v.Body) || scanStmtsForXBase(v.ElseBody) {
return true

View File

@@ -63,9 +63,13 @@ type DBFArea struct {
idxState *indexState
// File locking state (byte-range locks via fcntl)
// Harbour: hb_fsLockLarge with FL_LOCK + FLX_SHARED/FLX_EXCLUSIVE
fileLocked bool // FLOCK() held
lockedRecs map[uint32]bool // records locked by DBRLOCK()
// Field position cache — UPPER(name) → 1-based index.
// Built lazily on first FieldPosCache() call.
// SQLite: "column affinity binding" — O(1) vs O(n) linear scan.
fieldPosMap map[string]int
}
// DBFDriver is the driver factory for DBF files.
@@ -357,6 +361,29 @@ func (a *DBFArea) Close() error {
// MemoFile returns the FPT memo file, or nil if no memo fields.
func (a *DBFArea) MemoFile() *FPTFile { return a.memoFile }
// FieldPosCache returns the 1-based field position for a field name.
// Uses a lazily-built hash map for O(1) lookup instead of O(n) linear scan.
// SQLite: "column affinity binding" — critical for SQL engines that call
// FieldPos() hundreds of thousands of times per query.
func (a *DBFArea) FieldPosCache(name string) int {
if a.fieldPosMap == nil {
a.fieldPosMap = make(map[string]int, len(a.fieldDescs))
for i, fd := range a.fieldDescs {
// Trim null bytes + spaces from the [11]byte field name
n := 0
for n < 11 && fd.Name[n] != 0 {
n++
}
fname := strings.ToUpper(strings.TrimSpace(string(fd.Name[:n])))
a.fieldPosMap[fname] = i + 1
}
}
if pos, ok := a.fieldPosMap[name]; ok {
return pos
}
return 0
}
func (a *DBFArea) Flush() error {
if a.dirty {
if err := a.flushRecord(); err != nil {

View File

@@ -9,6 +9,7 @@ import (
"five/hbrt"
"os"
"strconv"
"strings"
"time"
)
@@ -109,8 +110,8 @@ func Center(t *hbrt.Thread) {
// FIELDPOS(cFieldName) → nPos
func FieldPos(t *hbrt.Thread) {
t.Frame(1, 0)
defer t.EndProc()
fname := t.Local(1).AsString()
defer t.EndProcFast()
fname := strings.ToUpper(t.Local(1).AsString())
wam := getWA(t)
if wam == nil {
t.RetInt(0)
@@ -121,10 +122,23 @@ func FieldPos(t *hbrt.Thread) {
t.RetInt(0)
return
}
// Try DBFArea's built-in field position cache (O(1) hash lookup).
// Falls back to linear scan for non-DBF areas (mem RDD, etc.).
type fieldPosCacher interface {
FieldPosCache(name string) int
}
if fpc, ok := area.(fieldPosCacher); ok {
pos := fpc.FieldPosCache(fname)
t.RetInt(int64(pos))
return
}
// Fallback: linear scan
for i := 0; i < area.FieldCount(); i++ {
fi := area.GetFieldInfo(i)
if eqFold(fi.Name, fname) {
t.RetInt(int64(i + 1)) // Harbour: 1-based position
if strings.EqualFold(fi.Name, fname) {
t.RetInt(int64(i + 1))
return
}
}