perf(fivesql2): Go-native FetchRow fast path — 1.3-1.7x on agg/window
TSqlExecutor:FetchRow was the per-row workhorse for aggregation, HAVING, and window queries. Even with the pre-built aFetchCache binding columns to (nWA, nFPos), the PRG FOR loop paid one method dispatch per column per row (dbSelectArea, FieldGet, AllTrim, AAdd) — profile pinned it at ~30% of B4 CPU. SqlFetchRowFast collapses the cache-path loop into a single Go call: - bound entry: SelectByNum + area.GetValue directly - unbound (aggregate/expression): self:EvalExpr via Send - character values: TrimSpace inline The PRG FetchRow keeps its original cache-miss fallback path unchanged for rare queries where aFetchCache isn't built. Bench deltas (median of 3 steady runs, 1000 iters): B4_GROUP_HAVING 418 → 327 us -22% (1.28x) B9_ROW_NUMBER 191 → 120 us -37% (1.59x) B10_RANK_PART 228 → 135 us -41% (1.69x) B11_SUM_OVER 249 → 156 us -37% (1.60x) B14_COUNT 235 → 219 us -7% B15_CTE_WIN_JOIN 1577 → 1452 us -8% Single-table SELECT (B1-B3, B5-B7, B8) stays flat — those already hit the column-binding fast path and don't need aggregate dispatch. FiveSql2 43/43, Harbour compat 56/56. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -753,28 +753,14 @@ RETURN aCache
|
||||
METHOD FetchRow( aExprs ) CLASS TSqlExecutor
|
||||
|
||||
LOCAL aRow := {}, i, xVal
|
||||
LOCAL xE, cRef, nDot, nWA, nFPos, cField, cTblAlias, cA, aBound
|
||||
LOCAL xE, cRef, nDot, nWA, nFPos, cField, cTblAlias, cA
|
||||
|
||||
/* Fastest path: pre-bound columns (built once per join by RunSelect) */
|
||||
/* Fastest path: pre-bound columns (built once per join by RunSelect).
|
||||
* Go-native: SqlFetchRowFast collapses the per-row Harbour FOR loop
|
||||
* into a single Go call, saving ~30% of GROUP BY CPU spent in PRG
|
||||
* method dispatch. Falls back to self:EvalExpr for unbound entries. */
|
||||
IF ::aFetchCache != NIL .AND. Len( ::aFetchCache ) == Len( aExprs )
|
||||
FOR i := 1 TO Len( aExprs )
|
||||
aBound := ::aFetchCache[ i ]
|
||||
IF aBound != NIL
|
||||
dbSelectArea( aBound[ 1 ] )
|
||||
xVal := FieldGet( aBound[ 2 ] )
|
||||
IF ValType( xVal ) == "C"
|
||||
xVal := AllTrim( xVal )
|
||||
ENDIF
|
||||
AAdd( aRow, xVal )
|
||||
ELSE
|
||||
xVal := ::EvalExpr( aExprs[ i ][ 1 ] )
|
||||
IF ValType( xVal ) == "C"
|
||||
xVal := AllTrim( xVal )
|
||||
ENDIF
|
||||
AAdd( aRow, xVal )
|
||||
ENDIF
|
||||
NEXT
|
||||
RETURN aRow
|
||||
RETURN SqlFetchRowFast( Self, aExprs, ::aFetchCache )
|
||||
ENDIF
|
||||
|
||||
FOR i := 1 TO Len( aExprs )
|
||||
|
||||
@@ -638,6 +638,7 @@ func RegisterRTL(vm *hbrt.VM) {
|
||||
hbrt.Sym("SQLCOERCEFORCMP", hbrt.FsPublic, SqlCoerceForCmp),
|
||||
hbrt.Sym("SQLISTRUE", hbrt.FsPublic, SqlIsTrue),
|
||||
hbrt.Sym("SQLISAGGNAME", hbrt.FsPublic, SqlIsAggName),
|
||||
hbrt.Sym("SQLFETCHROWFAST", hbrt.FsPublic, SqlFetchRowFast),
|
||||
hbrt.Sym("SQLCMPEQ", hbrt.FsPublic, SqlCmpEq),
|
||||
hbrt.Sym("SQLCMPLT", hbrt.FsPublic, SqlCmpLt),
|
||||
hbrt.Sym("SQLEXTRACTTEMPLATE", hbrt.FsPublic, SqlExtractTemplate),
|
||||
|
||||
@@ -2405,3 +2405,90 @@ func SqlEach(t *hbrt.Thread) {
|
||||
|
||||
t.RetNil()
|
||||
}
|
||||
|
||||
// SqlFetchRowFast(oSelf, aExprs, aFetchCache) → aRow
|
||||
//
|
||||
// Go-native replacement for TSqlExecutor:FetchRow. Profile showed
|
||||
// FetchRow at ~30% of B4 GROUP+HAVING CPU — 100 rows × 1000 iters of
|
||||
// PRG method dispatch per column per row, even with the aFetchCache
|
||||
// fast path. This collapses the per-row loop into one Go call: bound
|
||||
// cache entries (`{nWA, nFPos}`) do a direct SelectByNum+GetValue;
|
||||
// unbound entries fall back to `self:EvalExpr(exprs[i][1])` via Send.
|
||||
// Character values get trimmed inline (mirrors PRG AllTrim, which is
|
||||
// really TrimSpace in practice since DBF pads with ASCII space).
|
||||
func SqlFetchRowFast(t *hbrt.Thread) {
|
||||
t.Frame(3, 0)
|
||||
defer t.EndProc()
|
||||
|
||||
self := t.Local(1)
|
||||
exprsVal := t.Local(2)
|
||||
cacheVal := t.Local(3)
|
||||
|
||||
if !exprsVal.IsArray() {
|
||||
t.PushValue(hbrt.MakeArrayFrom(nil))
|
||||
t.RetValue()
|
||||
return
|
||||
}
|
||||
exprs := exprsVal.AsArray().Items
|
||||
n := len(exprs)
|
||||
|
||||
var cache []hbrt.Value
|
||||
useCache := false
|
||||
if cacheVal.IsArray() {
|
||||
cache = cacheVal.AsArray().Items
|
||||
useCache = len(cache) == n
|
||||
}
|
||||
|
||||
wa := getWA(t)
|
||||
out := make([]hbrt.Value, 0, n)
|
||||
|
||||
for i := 0; i < n; i++ {
|
||||
var val hbrt.Value
|
||||
hit := false
|
||||
|
||||
if useCache {
|
||||
entry := cache[i]
|
||||
if !entry.IsNil() && entry.IsArray() {
|
||||
items := entry.AsArray().Items
|
||||
if len(items) >= 2 && wa != nil {
|
||||
nWA := uint16(items[0].AsNumInt())
|
||||
nFPos := int(items[1].AsNumInt())
|
||||
wa.SelectByNum(nWA)
|
||||
if area := wa.Current(); area != nil {
|
||||
if v, err := area.GetValue(nFPos - 1); err == nil {
|
||||
val = v
|
||||
hit = true
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if !hit {
|
||||
// Fallback: self:EvalExpr(exprs[i][1])
|
||||
var exprNode hbrt.Value
|
||||
if exprs[i].IsArray() {
|
||||
items := exprs[i].AsArray().Items
|
||||
if len(items) > 0 {
|
||||
exprNode = items[0]
|
||||
}
|
||||
}
|
||||
t.PushValue(self)
|
||||
t.PushValue(exprNode)
|
||||
t.Send("EVALEXPR", 1)
|
||||
val = t.Pop2()
|
||||
}
|
||||
|
||||
if val.IsString() {
|
||||
s := val.AsString()
|
||||
trimmed := strings.TrimSpace(s)
|
||||
if len(trimmed) != len(s) {
|
||||
val = hbrt.MakeString(trimmed)
|
||||
}
|
||||
}
|
||||
out = append(out, val)
|
||||
}
|
||||
|
||||
t.PushValue(hbrt.MakeArrayFrom(out))
|
||||
t.RetValue()
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user