From 9e0f82c5a87724771fd7cd61215828144d294722 Mon Sep 17 00:00:00 2001 From: CharlesKWON Date: Tue, 14 Apr 2026 23:25:58 +0900 Subject: [PATCH] perf+fix(FiveSql2): recursive-CTE hash join + correct correlated subqueries MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two fixes uncovered by a SQL:2013 analytics benchmark covering the query patterns people actually run on DBF data (OLAP, BI, hierarchy traversal). --- Fix 1: correlated subquery was silently wrong --- EvalExpr's ND_SUB handler only pushed the outer context when `s_aOuterStack` was already non-empty — otherwise it routed the subquery through CacheSubquery, which stores the first result under a key derived from the subquery's syntax tokens. For a correlated subquery in a top-level WHERE: SELECT name, dept, salary FROM emp e1 WHERE salary > (SELECT AVG(salary) FROM emp e2 WHERE e2.dept = e1.dept) the first outer row saw an empty stack, cached the result, and every subsequent outer row got the same cached value regardless of e1.dept. The query returned all 1000 employees instead of the 505 who actually beat their department's average. Fix: always PushOuter + Run, no cache. Correctness over caching. Trade-off: non-correlated scalar subqueries now re-execute per outer row. A proper per-outer-key memoization is deferred — it requires walking the subquery AST to collect free variables. --- Fix 2: WITH RECURSIVE hierarchy join was O(m*n) --- RecCteJoin (the in-memory join used when a recursive CTE's step references both a real table and the CTE frontier) ran a flat nested loop: for each DBF row × each prev-iteration row, build a combined row buffer and run SqlEvalRowExpr on the ON condition. For a 4-level 1000-employee hierarchy that's ~1M ON evaluations, ~4.6 seconds. Fix: detect the shape `dbfAlias.col = cteAlias.col` at join-setup time, build a PRG hash on the CTE frontier keyed by its join column (aPrevRows is always small — at most the last iteration's emitted rows), then scan the DBF side once and probe the hash. Complex ON predicates fall through to the original nested loop. --- Bench (SQL:2013 analytics, emp=1k, sales=20k, evt=30k) --- Query Before After Speedup ────────────────────────────────────────────────────────────── RECURSIVE hierarchy 4-level 4603ms 30ms ~150x Correlated subquery (all emp) 10ms ❌ 4933ms ✓ (correct) Other SQL:2013 queries (ROW_NUMBER top-N, running total, moving average, DENSE_RANK, LAG, NTILE, gaps-and-islands) are all in the expected 10–230ms range for these dataset sizes, unchanged by this commit. Validation: - FiveSql2 43/43 - Harbour compat 51/51 - go test ./... ALL PASS Known follow-ups (not in this commit): - Q7 ROLLUP(col) parses but isn't expanded in GroupBy — returns a single grand-total row instead of per-value + total. Grouping sets implementation is a separate feature. - Correlated subquery memoization by outer free-variable key would bring Q8 from 4.9s back to ~50ms for small cardinality correlations — requires AST free-var analysis. Co-Authored-By: Claude Opus 4.6 (1M context) --- _FiveSql2/src/TSqlExecutor.prg | 176 ++++++++++++++++++++++++++------- 1 file changed, 138 insertions(+), 38 deletions(-) diff --git a/_FiveSql2/src/TSqlExecutor.prg b/_FiveSql2/src/TSqlExecutor.prg index 76e142c..a163d0d 100644 --- a/_FiveSql2/src/TSqlExecutor.prg +++ b/_FiveSql2/src/TSqlExecutor.prg @@ -596,16 +596,17 @@ METHOD EvalExpr( xNode ) CLASS TSqlExecutor CASE xNode[ 1 ] == ND_SUB IF xNode[ 2 ] != NIL - /* Use subquery cache for non-correlated subqueries */ - IF Len( s_aOuterStack ) == 0 - aSubResult := ::CacheSubquery( xNode[ 2 ] ) - ELSE - nSavedWA := Select() - ::PushOuter() - aSubResult := TSqlExecutor():New( xNode[ 2 ], ::aParams ):Run() - ::PopOuter() - dbSelectArea( nSavedWA ) - ENDIF + /* Subqueries are evaluated per outer row with outer context + * pushed so ::Resolve() can see parent aliases. The previous + * implementation only used this path when s_aOuterStack was + * already non-empty and cached the result at the top level — + * which silently broke correlated subqueries (they got the + * first row's result reused for every subsequent row). */ + nSavedWA := Select() + ::PushOuter() + aSubResult := TSqlExecutor():New( xNode[ 2 ], ::aParams ):Run() + ::PopOuter() + dbSelectArea( nSavedWA ) IF ValType( aSubResult ) == "A" .AND. Len( aSubResult ) >= 2 .AND. ; ValType( aSubResult[ 2 ] ) == "A" .AND. Len( aSubResult[ 2 ] ) > 0 .AND. ; Len( aSubResult[ 2 ][ 1 ] ) > 0 @@ -2741,6 +2742,8 @@ STATIC FUNCTION RecCteJoin( hRecQuery, aFN, aPrevRows, cCteName ) LOCAL aJoinOn, aJ LOCAL xCV LOCAL aCombFN, aCombRow + LOCAL cDbfKeyCol, cCteKeyCol, nDbfKeyIdx, nCteKeyIdx + LOCAL hCteHash, cKey, aMatches, m aCols := hRecQuery[ "columns" ] aResult := {} @@ -2862,43 +2865,140 @@ STATIC FUNCTION RecCteJoin( hRecQuery, aFN, aPrevRows, cCteName ) ENDIF NEXT - /* Nested-loop JOIN: dbfRow x cteRow */ - FOR i := 1 TO Len( aJoinRows ) - FOR j := 1 TO Len( aPrevRows ) - - /* Build combined row: [dbf fields..., cte fields..., dbf unqualified..., cte unqualified...] */ - aCombRow := {} - FOR nF := 1 TO Len( aJoinFN ) - AAdd( aCombRow, aJoinRows[ i ][ nF ] ) - NEXT - FOR nF := 1 TO Len( aFN ) - AAdd( aCombRow, aPrevRows[ j ][ nF ] ) - NEXT - FOR nF := 1 TO Len( aJoinFN ) - AAdd( aCombRow, aJoinRows[ i ][ nF ] ) - NEXT - FOR nF := 1 TO Len( aFN ) - AAdd( aCombRow, aPrevRows[ j ][ nF ] ) - NEXT - - /* Evaluate JOIN ON condition */ - lMatch := .T. - IF aJoinOn != NIL - xLeft := SqlEvalRowExpr( aJoinOn, aCombFN, aCombRow ) - lMatch := SqlIsTrue( xLeft ) + /* Try to extract a simple equi-join key from aJoinOn so we can use + * hash probing instead of O(m*n) nested loops. This is the dominant + * cost for WITH RECURSIVE hierarchy traversals where aJoinRows is + * the full DBF (hundreds/thousands of rows) and aPrevRows is the + * current frontier set. + * + * Looks for ON condition of shape `dbfAlias.col = cteAlias.col` or + * the reverse — anything more complex falls through to nested loop. */ + cDbfKeyCol := "" + cCteKeyCol := "" + IF aJoinOn != NIL .AND. ValType( aJoinOn ) == "A" .AND. Len( aJoinOn ) >= 4 .AND. ; + aJoinOn[ 1 ] == ND_BIN .AND. aJoinOn[ 2 ] == "=" .AND. ; + aJoinOn[ 3 ] != NIL .AND. aJoinOn[ 3 ][ 1 ] == ND_COL .AND. ; + aJoinOn[ 4 ] != NIL .AND. aJoinOn[ 4 ][ 1 ] == ND_COL + /* Split alias.col on both sides */ + cKey := Upper( aJoinOn[ 3 ][ 2 ] ) + IF "." $ cKey .AND. Left( cKey, At( ".", cKey ) - 1 ) == cCteAlias + cCteKeyCol := SubStr( cKey, At( ".", cKey ) + 1 ) + cKey := Upper( aJoinOn[ 4 ][ 2 ] ) + IF "." $ cKey + cDbfKeyCol := SubStr( cKey, At( ".", cKey ) + 1 ) + ELSE + cDbfKeyCol := cKey ENDIF + ELSE + cKey := Upper( aJoinOn[ 4 ][ 2 ] ) + IF "." $ cKey .AND. Left( cKey, At( ".", cKey ) - 1 ) == cCteAlias + cCteKeyCol := SubStr( cKey, At( ".", cKey ) + 1 ) + cKey := Upper( aJoinOn[ 3 ][ 2 ] ) + IF "." $ cKey + cDbfKeyCol := SubStr( cKey, At( ".", cKey ) + 1 ) + ELSE + cDbfKeyCol := cKey + ENDIF + ENDIF + ENDIF + ENDIF + + nDbfKeyIdx := 0 + nCteKeyIdx := 0 + IF ! Empty( cDbfKeyCol ) .AND. ! Empty( cCteKeyCol ) + FOR nF := 1 TO Len( aJoinFN ) + IF aJoinFN[ nF ] == cDbfKeyCol + nDbfKeyIdx := nF + EXIT + ENDIF + NEXT + FOR nF := 1 TO Len( aFN ) + IF Upper( aFN[ nF ] ) == cCteKeyCol + nCteKeyIdx := nF + EXIT + ENDIF + NEXT + ENDIF + + IF nDbfKeyIdx > 0 .AND. nCteKeyIdx > 0 + /* Hash-probe path: build hash on aPrevRows keyed by cte column, + * then scan aJoinRows and probe. Sub-linear vs nested loop. */ + hCteHash := { => } + FOR j := 1 TO Len( aPrevRows ) + cKey := SqlValToStr( aPrevRows[ j ][ nCteKeyIdx ] ) + IF ! hb_HHasKey( hCteHash, cKey ) + hCteHash[ cKey ] := {} + ENDIF + AAdd( hCteHash[ cKey ], j ) + NEXT + + FOR i := 1 TO Len( aJoinRows ) + cKey := SqlValToStr( aJoinRows[ i ][ nDbfKeyIdx ] ) + IF ! hb_HHasKey( hCteHash, cKey ) + LOOP + ENDIF + aMatches := hCteHash[ cKey ] + FOR m := 1 TO Len( aMatches ) + j := aMatches[ m ] + + aCombRow := {} + FOR nF := 1 TO Len( aJoinFN ) + AAdd( aCombRow, aJoinRows[ i ][ nF ] ) + NEXT + FOR nF := 1 TO Len( aFN ) + AAdd( aCombRow, aPrevRows[ j ][ nF ] ) + NEXT + FOR nF := 1 TO Len( aJoinFN ) + AAdd( aCombRow, aJoinRows[ i ][ nF ] ) + NEXT + FOR nF := 1 TO Len( aFN ) + AAdd( aCombRow, aPrevRows[ j ][ nF ] ) + NEXT - IF lMatch - /* Evaluate SELECT columns */ aNewRow := {} FOR k := 1 TO Len( aCols ) xCV := SqlEvalRowExpr( aCols[ k ][ 1 ], aCombFN, aCombRow ) AAdd( aNewRow, xCV ) NEXT AAdd( aResult, aNewRow ) - ENDIF + NEXT NEXT - NEXT + ELSE + /* Fallback: nested-loop JOIN for complex ON predicates */ + FOR i := 1 TO Len( aJoinRows ) + FOR j := 1 TO Len( aPrevRows ) + + aCombRow := {} + FOR nF := 1 TO Len( aJoinFN ) + AAdd( aCombRow, aJoinRows[ i ][ nF ] ) + NEXT + FOR nF := 1 TO Len( aFN ) + AAdd( aCombRow, aPrevRows[ j ][ nF ] ) + NEXT + FOR nF := 1 TO Len( aJoinFN ) + AAdd( aCombRow, aJoinRows[ i ][ nF ] ) + NEXT + FOR nF := 1 TO Len( aFN ) + AAdd( aCombRow, aPrevRows[ j ][ nF ] ) + NEXT + + lMatch := .T. + IF aJoinOn != NIL + xLeft := SqlEvalRowExpr( aJoinOn, aCombFN, aCombRow ) + lMatch := SqlIsTrue( xLeft ) + ENDIF + + IF lMatch + aNewRow := {} + FOR k := 1 TO Len( aCols ) + xCV := SqlEvalRowExpr( aCols[ k ][ 1 ], aCombFN, aCombRow ) + AAdd( aNewRow, xCV ) + NEXT + AAdd( aResult, aNewRow ) + ENDIF + NEXT + NEXT + ENDIF /* Close the workarea we opened */ IF ! Empty( cWAAlias )