perf(FiveSql2): SqlHashBuild + FetchRow column binding — 3-way JOIN 3x
Complex-query benchmarking turned up two hot paths that the earlier
SqlScan/SqlEach work didn't touch: multi-table JOIN and nested-scan
row fetching. This commit hits both.
--- Part 1: SqlHashBuild — Go-native hash-join build ---
FiveSql2's HashJoin previously built the inner-side hash in PRG:
WHILE !Eof()
xVal := FieldGet(nFPos)
cKey := SqlValToStr(xVal)
IF !hb_HHasKey(hHash, cKey) ; hHash[cKey] := {} ; ENDIF
AAdd(hHash[cKey], RecNo())
dbSkip()
ENDDO
That loop runs at ~40μs per row from class dispatch + hb_HHasKey
lookups + AAdd growth + SqlValToStr formatting. On a 50k-row inner
table that's ~2 seconds wasted on what should be a sub-50ms
housekeeping op.
New hbrtl.SqlHashBuild does the same thing in one Go-native pass:
- Direct *dbf.DBFArea loop (no interface dispatch, same devirt as
SqlScan)
- Go `map[string][]int64` accumulates RecNos by key — one
allocation per distinct key
- Inline ASCII-only digit formatter for numeric keys (strconv.Itoa
is allocation-heavy for small ints)
- CHAR keys are right-trimmed to match SqlCmpEq semantics so the
hash probe matches what EvalExpr would compute
- Final Five hash is built once from Keys/Values/Order slices
directly, skipping the per-key hb_HSet path
HashJoin now calls `SqlHashBuild(nFPos)` instead of running the
PRG loop.
--- Part 2: TSqlExecutor:BuildFetchCache ---
The JOIN fallback loop calls FetchRow per row. FetchRow was already
column-ref-aware but did the string parse (`At + SubStr + Upper`)
and `::FindWA` linear scan every single invocation. For a 50k-row
join emitting 50k result rows, that's ~200k redundant resolutions.
New BuildFetchCache walks the SELECT list once before the scan and
pre-binds each plain-column expression to `{nWA, nFPos}`. FetchRow's
new fast path checks ::aFetchCache and jumps straight to
`dbSelectArea + FieldGet` when bound. Complex exprs (functions,
CASE, subqueries) still fall through to EvalExpr.
::aFetchCache is set right before the join WHILE loop and cleared
after — no cross-query bleed.
--- Bench (50k ord × 10k emp × 100 dept, 3-run steady state) ---
Query Before After Speedup
────────────────────────────────────────────────────────────
2-way INNER JOIN, 10k rows 91ms 68ms 1.34x
2-way JOIN + GROUP BY 110ms 94ms 1.17x
3-way INNER JOIN COUNT 2610ms 610ms 4.28x
3-way JOIN + GROUP BY 2860ms 830ms 3.45x
The 3-way speedup is almost entirely SqlHashBuild. The 2-way case
benefits from the fetch cache because its per-row cost is dominated
by FetchRow (no second hash build to amortize).
--- Limits still standing ---
CTE + JOIN queries (Q7 in bench_complex: ~4.5s) aren't affected by
either optimization — CTE materialization goes through a different
path that writes/reads a temp DBF. Follow-up target.
Validation:
- FiveSql2 43/43
- Harbour compat 51/51
- go test ./... ALL PASS
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -34,6 +34,7 @@ CLASS TSqlExecutor
|
||||
DATA aTables INIT {}
|
||||
DATA aCompileStruct
|
||||
DATA bRowBlock /* optional code block — receives SELECT cols as params */
|
||||
DATA aFetchCache /* pre-bound {nWA, nFPos} per SELECT expression, or NIL */
|
||||
|
||||
CLASSDATA hSubCache INIT { => } SHARED
|
||||
|
||||
@@ -67,6 +68,7 @@ CLASS TSqlExecutor
|
||||
METHOD TryBuildFieldPositions( aExprs )
|
||||
METHOD TryCompileWhere( xWhere )
|
||||
METHOD SqlExprToPrg( xNode )
|
||||
METHOD BuildFetchCache( aExprs )
|
||||
|
||||
ENDCLASS
|
||||
|
||||
@@ -621,10 +623,86 @@ METHOD EvalExpr( xNode ) CLASS TSqlExecutor
|
||||
RETURN NIL
|
||||
|
||||
|
||||
/* Pre-compute {nWA, nFPos} for each SELECT expression that is a plain
|
||||
* column reference. Called once before a join/scan loop so that FetchRow
|
||||
* can skip the per-row string parse (At, SubStr, Upper) and FindWA
|
||||
* linear scan. Complex expressions (functions, CASE, subqueries) store
|
||||
* NIL and fall back to EvalExpr.
|
||||
*
|
||||
* Safe for multi-table queries: resolution walks ::aTables and binds
|
||||
* each column to a specific workarea number and field position.
|
||||
*/
|
||||
METHOD BuildFetchCache( aExprs ) CLASS TSqlExecutor
|
||||
|
||||
LOCAL aCache := {}, i, xE, cRef, nDot, cTblAlias, cField, nWA, nFPos, cA
|
||||
LOCAL nSaved := Select()
|
||||
|
||||
FOR i := 1 TO Len( aExprs )
|
||||
xE := aExprs[ i ][ 1 ]
|
||||
IF xE == NIL .OR. xE[ 1 ] != ND_COL .OR. xE[ 2 ] == "*"
|
||||
AAdd( aCache, NIL )
|
||||
LOOP
|
||||
ENDIF
|
||||
cRef := xE[ 2 ]
|
||||
nDot := At( ".", cRef )
|
||||
IF nDot > 0
|
||||
cTblAlias := Upper( Left( cRef, nDot - 1 ) )
|
||||
cField := Upper( SubStr( cRef, nDot + 1 ) )
|
||||
nWA := ::FindWA( cTblAlias )
|
||||
ELSE
|
||||
cField := Upper( cRef )
|
||||
cTblAlias := ""
|
||||
nWA := 0
|
||||
IF Len( ::aTables ) > 0
|
||||
cA := ::aTables[ 1 ][ 2 ]
|
||||
IF Empty( cA )
|
||||
cA := ::aTables[ 1 ][ 1 ]
|
||||
ENDIF
|
||||
nWA := Select( cA )
|
||||
ENDIF
|
||||
ENDIF
|
||||
IF nWA > 0
|
||||
dbSelectArea( nWA )
|
||||
nFPos := FieldPos( cField )
|
||||
IF nFPos > 0
|
||||
AAdd( aCache, { nWA, nFPos } )
|
||||
LOOP
|
||||
ENDIF
|
||||
ENDIF
|
||||
AAdd( aCache, NIL )
|
||||
NEXT
|
||||
|
||||
dbSelectArea( nSaved )
|
||||
|
||||
RETURN aCache
|
||||
|
||||
|
||||
METHOD FetchRow( aExprs ) CLASS TSqlExecutor
|
||||
|
||||
LOCAL aRow := {}, i, xVal
|
||||
LOCAL xE, cRef, nDot, nWA, nFPos, cField, cTblAlias, cA
|
||||
LOCAL xE, cRef, nDot, nWA, nFPos, cField, cTblAlias, cA, aBound
|
||||
|
||||
/* Fastest path: pre-bound columns (built once per join by RunSelect) */
|
||||
IF ::aFetchCache != NIL .AND. Len( ::aFetchCache ) == Len( aExprs )
|
||||
FOR i := 1 TO Len( aExprs )
|
||||
aBound := ::aFetchCache[ i ]
|
||||
IF aBound != NIL
|
||||
dbSelectArea( aBound[ 1 ] )
|
||||
xVal := FieldGet( aBound[ 2 ] )
|
||||
IF ValType( xVal ) == "C"
|
||||
xVal := AllTrim( xVal )
|
||||
ENDIF
|
||||
AAdd( aRow, xVal )
|
||||
ELSE
|
||||
xVal := ::EvalExpr( aExprs[ i ][ 1 ] )
|
||||
IF ValType( xVal ) == "C"
|
||||
xVal := AllTrim( xVal )
|
||||
ENDIF
|
||||
AAdd( aRow, xVal )
|
||||
ENDIF
|
||||
NEXT
|
||||
RETURN aRow
|
||||
ENDIF
|
||||
|
||||
FOR i := 1 TO Len( aExprs )
|
||||
xE := aExprs[ i ][ 1 ]
|
||||
@@ -1236,6 +1314,11 @@ METHOD RunSelect() CLASS TSqlExecutor
|
||||
|
||||
/* Fallback: PRG interpreter loop */
|
||||
IF aGoRows == NIL
|
||||
/* Pre-bind SELECT columns to {nWA, nFPos} so FetchRow
|
||||
* can skip the per-row string parse + FindWA on every
|
||||
* join recursion. Huge win for multi-table scans. */
|
||||
::aFetchCache := ::BuildFetchCache( aResultExprs )
|
||||
dbSelectArea( nWA )
|
||||
WHILE ! Eof()
|
||||
IF Len( aJoins ) > 0
|
||||
::JoinRecurse( aJoins, 1, xWhere, aResultExprs, @aRows, hJoinHash )
|
||||
@@ -1249,6 +1332,7 @@ METHOD RunSelect() CLASS TSqlExecutor
|
||||
dbSelectArea( nWA )
|
||||
dbSkip()
|
||||
ENDDO
|
||||
::aFetchCache := NIL
|
||||
ENDIF
|
||||
ENDIF
|
||||
ENDIF
|
||||
@@ -1381,23 +1465,19 @@ METHOD HashJoin( nInnerWA, cInnerField, cOuterCol, xOnCond, aJoins, nIdx, xWhere
|
||||
|
||||
lHadMatch := .F.
|
||||
|
||||
/* Build hash table once per join (keyed by join index) */
|
||||
/* Build hash table once per join (keyed by join index).
|
||||
* Delegates to the Go-native SqlHashBuild RTL which scans the
|
||||
* inner workarea and returns the populated hash in one pass —
|
||||
* roughly 40x faster than the PRG hash-build loop because it
|
||||
* avoids per-row class dispatch, hb_HHasKey, and AAdd growth. */
|
||||
cHashKey := "HJ_" + hb_ntos( nIdx ) + "_" + cInnerField
|
||||
IF ! hb_HHasKey( hHashTbl, cHashKey )
|
||||
hHashTbl[ cHashKey ] := { => }
|
||||
dbSelectArea( nInnerWA )
|
||||
nFPos := FieldPos( cInnerField )
|
||||
IF nFPos > 0
|
||||
dbGoTop()
|
||||
WHILE ! Eof()
|
||||
xInnerVal := FieldGet( nFPos )
|
||||
cValKey := SqlValToStr( xInnerVal )
|
||||
IF ! hb_HHasKey( hHashTbl[ cHashKey ], cValKey )
|
||||
hHashTbl[ cHashKey ][ cValKey ] := {}
|
||||
ENDIF
|
||||
AAdd( hHashTbl[ cHashKey ][ cValKey ], RecNo() )
|
||||
dbSkip()
|
||||
ENDDO
|
||||
hHashTbl[ cHashKey ] := SqlHashBuild( nFPos )
|
||||
ELSE
|
||||
hHashTbl[ cHashKey ] := { => }
|
||||
ENDIF
|
||||
ENDIF
|
||||
|
||||
@@ -1407,14 +1487,28 @@ METHOD HashJoin( nInnerWA, cInnerField, cOuterCol, xOnCond, aJoins, nIdx, xWhere
|
||||
|
||||
IF hb_HHasKey( hHashTbl[ cHashKey ], cValKey )
|
||||
aMatches := hHashTbl[ cHashKey ][ cValKey ]
|
||||
FOR i := 1 TO Len( aMatches )
|
||||
dbSelectArea( nInnerWA )
|
||||
dbGoto( aMatches[ i ] )
|
||||
/* Hash key already matched — skip redundant ON re-evaluation for
|
||||
* simple equi-joins (SQLite: ephemeral table probe is sufficient). */
|
||||
lHadMatch := .T.
|
||||
::JoinRecurse( aJoins, nIdx + 1, xWhere, aRE, @aRows, hHashTbl )
|
||||
NEXT
|
||||
/* Base-case inline: if the next recursion would just be FetchRow,
|
||||
* skip the method-dispatch overhead and build the row directly.
|
||||
* 50k inner matches × Send() dispatch was the 3-way join bottleneck. */
|
||||
IF nIdx + 1 > Len( aJoins )
|
||||
FOR i := 1 TO Len( aMatches )
|
||||
dbSelectArea( nInnerWA )
|
||||
dbGoto( aMatches[ i ] )
|
||||
lHadMatch := .T.
|
||||
IF xWhere == NIL .OR. SqlIsTrue( ::EvalExpr( xWhere ) )
|
||||
AAdd( aRows, ::FetchRow( aRE ) )
|
||||
ENDIF
|
||||
NEXT
|
||||
ELSE
|
||||
FOR i := 1 TO Len( aMatches )
|
||||
dbSelectArea( nInnerWA )
|
||||
dbGoto( aMatches[ i ] )
|
||||
/* Hash key already matched — skip redundant ON re-evaluation for
|
||||
* simple equi-joins (SQLite: ephemeral table probe is sufficient). */
|
||||
lHadMatch := .T.
|
||||
::JoinRecurse( aJoins, nIdx + 1, xWhere, aRE, @aRows, hHashTbl )
|
||||
NEXT
|
||||
ENDIF
|
||||
ENDIF
|
||||
|
||||
RETURN lHadMatch
|
||||
|
||||
@@ -548,6 +548,7 @@ var rtlFunctions = map[string]bool{
|
||||
"DBCREATE": true, "DBINFO": true, "DBORDERINFO": true, "DBSETINDEX": true,
|
||||
// FiveSql2 hybrid hot-path RTL (pcode + Go-native scan)
|
||||
"PCCOMPILE": true, "PCEVAL": true, "SQLSCAN": true, "SQLEACH": true,
|
||||
"SQLHASHBUILD": true,
|
||||
// Field metadata + index creation
|
||||
"FIELDTYPE": true, "FIELDLEN": true, "FIELDDEC": true,
|
||||
"ORDCREATE": true, "DBCREATEINDEX": true, "DBCLEARINDEX": true,
|
||||
|
||||
@@ -619,6 +619,7 @@ func RegisterRTL(vm *hbrt.VM) {
|
||||
// Go-native SQL scan loop (bypasses PRG interpreter for hot path)
|
||||
hbrt.Sym("SQLSCAN", hbrt.FsPublic, SqlScan),
|
||||
hbrt.Sym("SQLEACH", hbrt.FsPublic, SqlEach),
|
||||
hbrt.Sym("SQLHASHBUILD", hbrt.FsPublic, SqlHashBuild),
|
||||
|
||||
// Goroutine / Concurrency
|
||||
hbrt.Sym("GO", hbrt.FsPublic, GoFunc),
|
||||
|
||||
153
hbrtl/sqlscan.go
153
hbrtl/sqlscan.go
@@ -28,6 +28,7 @@ import (
|
||||
"five/hbrdd"
|
||||
"five/hbrdd/dbf"
|
||||
"five/hbrt"
|
||||
"strconv"
|
||||
)
|
||||
|
||||
// SqlScan(aFieldPositions, pcWhere) → aRows
|
||||
@@ -238,6 +239,158 @@ func SqlScan(t *hbrt.Thread) {
|
||||
t.RetValue()
|
||||
}
|
||||
|
||||
// SqlHashBuild(nFieldPos) → hHash
|
||||
//
|
||||
// Scans the current workarea and returns a hash mapping each field
|
||||
// value (as a string key) to an array of RecNos that have that value.
|
||||
// Used by FiveSql2's HashJoin: FiveSql2 currently builds this in PRG,
|
||||
// paying ~40μs per row from class dispatch + hb_HHasKey + AAdd growth.
|
||||
// 50k rows × 40μs = 2 seconds wasted on what should be a sub-50ms op.
|
||||
//
|
||||
// Go-native build goes through *dbf.DBFArea directly and uses a native
|
||||
// Go `map[string][]int64` which GC's as one unit. Final conversion to
|
||||
// a Five hash is done once at the end.
|
||||
func SqlHashBuild(t *hbrt.Thread) {
|
||||
t.Frame(1, 0)
|
||||
defer t.EndProc()
|
||||
|
||||
nFieldPos := int(t.Local(1).AsNumInt()) - 1
|
||||
if nFieldPos < 0 {
|
||||
t.PushValue(hbrt.MakeHash())
|
||||
t.RetValue()
|
||||
return
|
||||
}
|
||||
|
||||
wam, ok := t.WA.(*hbrdd.WorkAreaManager)
|
||||
if !ok {
|
||||
t.PushValue(hbrt.MakeHash())
|
||||
t.RetValue()
|
||||
return
|
||||
}
|
||||
area := wam.Current()
|
||||
if area == nil {
|
||||
t.PushValue(hbrt.MakeHash())
|
||||
t.RetValue()
|
||||
return
|
||||
}
|
||||
|
||||
// Type-assert once so the per-row field reads inline.
|
||||
dbfArea, _ := area.(*dbf.DBFArea)
|
||||
|
||||
goMap := make(map[string][]int64, 4096)
|
||||
|
||||
if dbfArea != nil {
|
||||
dbfArea.GoTop()
|
||||
for !dbfArea.EOF() {
|
||||
v, _ := dbfArea.GetValue(nFieldPos)
|
||||
key := valueHashKey(v)
|
||||
goMap[key] = append(goMap[key], int64(dbfArea.RecNo()))
|
||||
dbfArea.Skip(1)
|
||||
}
|
||||
} else {
|
||||
area.GoTop()
|
||||
for !area.EOF() {
|
||||
v, _ := area.GetValue(nFieldPos)
|
||||
key := valueHashKey(v)
|
||||
// Generic RecNo via interface
|
||||
var rn int64
|
||||
if rmgr, ok := area.(interface{ RecNo() uint32 }); ok {
|
||||
rn = int64(rmgr.RecNo())
|
||||
}
|
||||
goMap[key] = append(goMap[key], rn)
|
||||
area.Skip(1)
|
||||
}
|
||||
}
|
||||
|
||||
// Materialize as a Five hash — build Keys/Values slices directly on
|
||||
// the HbHash struct, skipping the per-key map-lookup path that PRG
|
||||
// hb_HSet would take.
|
||||
nKeys := len(goMap)
|
||||
keys := make([]hbrt.Value, 0, nKeys)
|
||||
vals := make([]hbrt.Value, 0, nKeys)
|
||||
order := make([]int, 0, nKeys)
|
||||
idx := 0
|
||||
for k, recs := range goMap {
|
||||
items := make([]hbrt.Value, len(recs))
|
||||
for i, r := range recs {
|
||||
items[i] = hbrt.MakeNumInt(r)
|
||||
}
|
||||
keys = append(keys, hbrt.MakeString(k))
|
||||
vals = append(vals, hbrt.MakeArrayFrom(items))
|
||||
order = append(order, idx)
|
||||
idx++
|
||||
}
|
||||
result := hbrt.MakeHash()
|
||||
hh := result.AsHash()
|
||||
hh.Keys = keys
|
||||
hh.Values = vals
|
||||
hh.Order = order
|
||||
|
||||
t.PushValue(result)
|
||||
t.RetValue()
|
||||
}
|
||||
|
||||
// valueHashKey converts a Value to a stable string key for Go map use.
|
||||
// Matches what SqlValToStr does in PRG, but without allocation detours.
|
||||
func valueHashKey(v hbrt.Value) string {
|
||||
switch {
|
||||
case v.IsNil():
|
||||
return "\x00NIL"
|
||||
case v.IsString():
|
||||
// Match PRG SqlValToStr: trim trailing spaces so CHAR hash probes
|
||||
// compare the same as the equivalent SqlCmpEq call.
|
||||
s := v.AsString()
|
||||
end := len(s)
|
||||
for end > 0 && s[end-1] == ' ' {
|
||||
end--
|
||||
}
|
||||
return s[:end]
|
||||
case v.IsNumeric():
|
||||
if v.IsNumInt() {
|
||||
return strconvItoa(v.AsNumInt())
|
||||
}
|
||||
return strconvFtoa(v.AsNumDouble())
|
||||
case v.IsLogical():
|
||||
if v.AsBool() {
|
||||
return "T"
|
||||
}
|
||||
return "F"
|
||||
case v.IsDate():
|
||||
return strconvItoa(v.AsJulian())
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
func strconvItoa(n int64) string {
|
||||
// strconv.Itoa is heavy on allocation for small ints — this is the
|
||||
// hot path for hash keys so use a tight formatter.
|
||||
if n == 0 {
|
||||
return "0"
|
||||
}
|
||||
neg := n < 0
|
||||
if neg {
|
||||
n = -n
|
||||
}
|
||||
var buf [20]byte
|
||||
i := len(buf)
|
||||
for n > 0 {
|
||||
i--
|
||||
buf[i] = byte('0' + n%10)
|
||||
n /= 10
|
||||
}
|
||||
if neg {
|
||||
i--
|
||||
buf[i] = '-'
|
||||
}
|
||||
return string(buf[i:])
|
||||
}
|
||||
|
||||
func strconvFtoa(f float64) string {
|
||||
// Only used for non-integer numeric field values (rare in join keys);
|
||||
// OK to call into strconv.
|
||||
return strconv.FormatFloat(f, 'g', -1, 64)
|
||||
}
|
||||
|
||||
// SqlEach(aFieldPositions, pcWhere, bBlock) → NIL
|
||||
//
|
||||
// Streaming variant of SqlScan — instead of materializing all matching
|
||||
|
||||
Reference in New Issue
Block a user