diff --git a/_FiveSql2/src/TSqlAgg.prg b/_FiveSql2/src/TSqlAgg.prg index 95f0783..1296a38 100644 --- a/_FiveSql2/src/TSqlAgg.prg +++ b/_FiveSql2/src/TSqlAgg.prg @@ -17,6 +17,8 @@ CLASS TSqlAgg METHOD New() CONSTRUCTOR METHOD GroupBy( aRows, aFN, aCols, aGroupBy, xHaving, aTables, aParams ) METHOD FindGroupIdx( xGroupExpr, aCols, aFN ) + METHOD ExpandGroupingSets( aGroupBy ) + METHOD ExprInSet( xSelExpr, aSet ) METHOD ComputeAgg( xE, aGR, aFN ) METHOD FindColIdx( xExpr, aFN ) METHOD FindColIdx2( cN, aFN ) @@ -51,6 +53,7 @@ METHOD GroupBy( aRows, aFN, aCols, aGroupBy, xHaving, aTables, aParams ) CLASS T LOCAL aNewRow LOCAL nGCol, cN, nCI, lPass LOCAL aGroupIdx := {} + LOCAL aSets, aCurSet, nSet, hOmitIdx, aSubResult /* Aggregate on empty set */ IF Len( aRows ) == 0 .AND. ::HasAgg( aCols ) @@ -65,15 +68,42 @@ METHOD GroupBy( aRows, aFN, aCols, aGroupBy, xHaving, aTables, aParams ) CLASS T RETURN { aNewRow } ENDIF + /* SQL:2003 ROLLUP / CUBE / GROUPING SETS — expand into a list of + * flat grouping key sets and run aggregation once per set. Columns + * absent from the current set emit NIL (the standard "subtotal" + * placeholder). */ + aSets := ::ExpandGroupingSets( aGroupBy ) + IF Len( aSets ) > 1 + FOR nSet := 1 TO Len( aSets ) + aCurSet := aSets[ nSet ] + /* Recurse with the plain expanded set; no ROLLUP/CUBE nodes */ + aSubResult := ::GroupBy( aRows, aFN, aCols, aCurSet, xHaving, aTables, aParams ) + /* For each result row, NIL-out any SELECT column whose source + * GROUP BY expression is not in the current set. */ + hOmitIdx := { => } + FOR i := 1 TO Len( aCols ) + IF ! SqlExprHasAgg( aCols[ i ][ 1 ] ) + IF ! ::ExprInSet( aCols[ i ][ 1 ], aCurSet ) + hOmitIdx[ i ] := .T. + ENDIF + ENDIF + NEXT + FOR i := 1 TO Len( aSubResult ) + FOR j := 1 TO Len( aSubResult[ i ] ) + IF hb_HHasKey( hOmitIdx, j ) + aSubResult[ i ][ j ] := NIL + ENDIF + NEXT + AAdd( aResult, aSubResult[ i ] ) + NEXT + NEXT + RETURN aResult + ENDIF + /* Build group buckets. * Pre-resolve the GROUP BY columns to their position in the SELECT * list by matching against the SOURCE expressions in aCols, not the - * alias list in aFN. Matching on aFN breaks as soon as the user - * writes `SELECT d.name AS foo ... GROUP BY d.name` — the group - * column's ND_COL name "D.NAME" wouldn't appear in aFN (which has - * "FOO"), FindColIdx would return 0, and every row would end up in - * the empty-key bucket collapsing to a single output row. - * (Regression found in complex-query bench 2026-04-14.) */ + * alias list in aFN. */ FOR j := 1 TO Len( aGroupBy ) nGCol := ::FindGroupIdx( aGroupBy[ j ], aCols, aFN ) AAdd( aGroupIdx, nGCol ) @@ -128,6 +158,150 @@ METHOD GroupBy( aRows, aFN, aCols, aGroupBy, xHaving, aTables, aParams ) CLASS T RETURN aResult +/* Expand SQL:2003 ROLLUP / CUBE / GROUPING SETS into a list of flat + * grouping sets. Each returned set is an array of expressions that + * would be the plain GROUP BY for one pass of aggregation. + * + * GROUP BY a, ROLLUP(b, c) → {(a,b,c), (a,b), (a)} + * GROUP BY CUBE(a, b) → {(a,b), (a), (b), ()} + * GROUP BY GROUPING SETS ((a,b), (a), ()) → as-is + * + * If aGroupBy is a plain column list with no aggregate-set modifiers, + * returns a single-element list with aGroupBy itself — letting the + * caller short-circuit to the fast path unchanged. + */ +METHOD ExpandGroupingSets( aGroupBy ) CLASS TSqlAgg + + LOCAL aSets, aCurrent, i, j, xTerm, aExpand, aNewSets, aBase + LOCAL nBits, nMask, bit, aCubeSet + + /* Fast path: no ROLLUP/CUBE/GROUPING SETS node → single set */ + aExpand := .F. + FOR i := 1 TO Len( aGroupBy ) + IF aGroupBy[ i ] != NIL .AND. ValType( aGroupBy[ i ] ) == "A" .AND. ; + aGroupBy[ i ][ 1 ] == ND_FN .AND. ; + ( Upper( aGroupBy[ i ][ 2 ] ) == "ROLLUP" .OR. ; + Upper( aGroupBy[ i ][ 2 ] ) == "CUBE" .OR. ; + Upper( aGroupBy[ i ][ 2 ] ) == "GROUPING SETS" ) + aExpand := .T. + EXIT + ENDIF + NEXT + IF ! aExpand + RETURN { aGroupBy } + ENDIF + + /* Seed with a single empty set — we'll cross-expand each term */ + aSets := { {} } + + FOR i := 1 TO Len( aGroupBy ) + xTerm := aGroupBy[ i ] + aNewSets := {} + + IF xTerm != NIL .AND. ValType( xTerm ) == "A" .AND. xTerm[ 1 ] == ND_FN + DO CASE + CASE Upper( xTerm[ 2 ] ) == "ROLLUP" + /* ROLLUP(c1..cN) → N+1 sets: + * (c1..cN), (c1..cN-1), ..., (c1), () + * Cross-product: existing × each prefix including empty */ + aBase := xTerm[ 3 ] + FOR j := 1 TO Len( aSets ) + FOR nBits := Len( aBase ) TO 0 STEP -1 + aCurrent := AClone( aSets[ j ] ) + FOR nMask := 1 TO nBits + AAdd( aCurrent, aBase[ nMask ] ) + NEXT + AAdd( aNewSets, aCurrent ) + NEXT + NEXT + + CASE Upper( xTerm[ 2 ] ) == "CUBE" + /* CUBE(c1..cN) → 2^N sets (every subset). + * For each bitmask, include cols where bit is set. */ + aBase := xTerm[ 3 ] + FOR j := 1 TO Len( aSets ) + FOR nMask := 0 TO ( 2 ^ Len( aBase ) ) - 1 + aCurrent := AClone( aSets[ j ] ) + FOR bit := 1 TO Len( aBase ) + IF hb_BitAnd( nMask, hb_BitShift( 1, bit - 1 ) ) != 0 + AAdd( aCurrent, aBase[ bit ] ) + ENDIF + NEXT + AAdd( aNewSets, aCurrent ) + NEXT + NEXT + + CASE Upper( xTerm[ 2 ] ) == "GROUPING SETS" + /* Explicit list — each element is a flat list of cols (or ()) */ + aBase := xTerm[ 3 ] + FOR j := 1 TO Len( aSets ) + FOR nBits := 1 TO Len( aBase ) + aCurrent := AClone( aSets[ j ] ) + IF ValType( aBase[ nBits ] ) == "A" + FOR nMask := 1 TO Len( aBase[ nBits ] ) + AAdd( aCurrent, aBase[ nBits ][ nMask ] ) + NEXT + ENDIF + AAdd( aNewSets, aCurrent ) + NEXT + NEXT + + OTHERWISE + /* Unknown ND_FN in GROUP BY — treat as opaque term */ + FOR j := 1 TO Len( aSets ) + aCurrent := AClone( aSets[ j ] ) + AAdd( aCurrent, xTerm ) + AAdd( aNewSets, aCurrent ) + NEXT + ENDCASE + ELSE + /* Plain column — append to every existing set */ + FOR j := 1 TO Len( aSets ) + aCurrent := AClone( aSets[ j ] ) + AAdd( aCurrent, xTerm ) + AAdd( aNewSets, aCurrent ) + NEXT + ENDIF + + aSets := aNewSets + NEXT + +RETURN aSets + + +/* Does a SELECT expression reference a column that appears in the + * given grouping set? Used to decide which SELECT cols to NIL out + * when reporting a partial grouping (subtotal) row. */ +METHOD ExprInSet( xSelExpr, aSet ) CLASS TSqlAgg + + LOCAL i, xG, cSelName, cGName, nDot + + IF xSelExpr == NIL .OR. xSelExpr[ 1 ] != ND_COL + RETURN .F. + ENDIF + + cSelName := Upper( xSelExpr[ 2 ] ) + nDot := At( ".", cSelName ) + IF nDot > 0 + cSelName := SubStr( cSelName, nDot + 1 ) + ENDIF + + FOR i := 1 TO Len( aSet ) + xG := aSet[ i ] + IF xG != NIL .AND. ValType( xG ) == "A" .AND. xG[ 1 ] == ND_COL + cGName := Upper( xG[ 2 ] ) + IF "." $ cGName + cGName := SubStr( cGName, At( ".", cGName ) + 1 ) + ENDIF + IF cGName == cSelName + RETURN .T. + ENDIF + ENDIF + NEXT + +RETURN .F. + + /* Resolve a GROUP BY expression to its column position in the output row. * Walks the SELECT list's source expressions (aCols[i][1]) rather than * the alias list (aFN[i]). For `SELECT d.name AS foo GROUP BY d.name`, diff --git a/_FiveSql2/src/TSqlExecutor.prg b/_FiveSql2/src/TSqlExecutor.prg index a163d0d..ca2d279 100644 --- a/_FiveSql2/src/TSqlExecutor.prg +++ b/_FiveSql2/src/TSqlExecutor.prg @@ -35,6 +35,8 @@ CLASS TSqlExecutor DATA aCompileStruct DATA bRowBlock /* optional code block — receives SELECT cols as params */ DATA aFetchCache /* pre-bound {nWA, nFPos} per SELECT expression, or NIL */ + DATA hSubCorrCache INIT { => } /* per-outer-key subquery result cache */ + DATA nSubCacheSeq INIT 0 /* monotonic ID for subqueries */ CLASSDATA hSubCache INIT { => } SHARED @@ -69,6 +71,9 @@ CLASS TSqlExecutor METHOD TryCompileWhere( xWhere ) METHOD SqlExprToPrg( xNode ) METHOD BuildFetchCache( aExprs ) + METHOD SubqueryCached( xSubNode ) + METHOD CollectFreeVars( hQ ) + METHOD CollectExprFreeVars( xE, aLocalAliases, aFree ) ENDCLASS @@ -456,15 +461,9 @@ METHOD EvalExpr( xNode ) CLASS TSqlExecutor RETURN .F. ENDIF IF xR != NIL .AND. xR[ 1 ] == ND_SUB .AND. xR[ 2 ] != NIL - /* Use subquery cache for non-correlated subqueries */ - IF Len( s_aOuterStack ) == 0 - aSubResult := ::CacheSubquery( xR[ 2 ] ) - ELSE - nSavedWA := Select() - ::PushOuter() - aSubResult := TSqlExecutor():New( xR[ 2 ], ::aParams ):Run() - ::PopOuter() - dbSelectArea( nSavedWA ) + aSubResult := ::SubqueryCached( xR ) + IF aSubResult == NIL .OR. ValType( aSubResult ) != "A" + /* Cache miss-fallback */ ENDIF IF ValType( aSubResult ) == "A" .AND. Len( aSubResult ) >= 2 .AND. ; ValType( aSubResult[ 2 ] ) == "A" @@ -596,17 +595,12 @@ METHOD EvalExpr( xNode ) CLASS TSqlExecutor CASE xNode[ 1 ] == ND_SUB IF xNode[ 2 ] != NIL - /* Subqueries are evaluated per outer row with outer context - * pushed so ::Resolve() can see parent aliases. The previous - * implementation only used this path when s_aOuterStack was - * already non-empty and cached the result at the top level — - * which silently broke correlated subqueries (they got the - * first row's result reused for every subsequent row). */ - nSavedWA := Select() - ::PushOuter() - aSubResult := TSqlExecutor():New( xNode[ 2 ], ::aParams ):Run() - ::PopOuter() - dbSelectArea( nSavedWA ) + /* Subqueries use a per-outer-key cache. SubqueryCached + * does static free-variable analysis on the first call, + * then memoizes results keyed by the current values of + * those free variables. Non-correlated subqueries reduce + * to a trivial single-entry cache. */ + aSubResult := ::SubqueryCached( xNode ) IF ValType( aSubResult ) == "A" .AND. Len( aSubResult ) >= 2 .AND. ; ValType( aSubResult[ 2 ] ) == "A" .AND. Len( aSubResult[ 2 ] ) > 0 .AND. ; Len( aSubResult[ 2 ][ 1 ] ) > 0 @@ -1121,7 +1115,13 @@ METHOD RunSelect() CLASS TSqlExecutor IF Empty( cAlias ) cAlias := cTable ENDIF - IF Len( cAlias ) <= 1 + /* Always stash the user-written alias in slot [3] so that FindWA / + * Resolve can still match queries that reference the alias by its + * SQL name even after we re-alias the workarea with a depth- + * suffixed temp name. Previously this was only done for 1-char + * aliases, which left multi-char aliases (e.g. `emp e2`) invisible + * to correlated subquery lookups once the rename kicked in. */ + IF Empty( ::aTables[ i ][ 3 ] ) ::aTables[ i ][ 3 ] := cAlias ENDIF IF Len( cAlias ) <= 1 .OR. ::nDepth > 1 @@ -1546,6 +1546,193 @@ RETURN lHadMatch /* Subquery result cache for non-correlated subqueries */ +/* SubqueryCached — correlated-aware subquery execution with memoization. + * + * Walks the subquery's AST on first call to identify free variables — + * column references whose alias prefix is NOT one of the subquery's own + * FROM tables. These are the outer-row columns the subquery actually + * depends on. The cache key is built from the current values of those + * free variables, so: + * + * - Non-correlated subqueries (no free vars) → single cache entry, + * evaluated once, reused for every outer row. (Matches the old + * CacheSubquery behavior for simple `WHERE x > (SELECT MAX(y) FROM t)`.) + * - Correlated subqueries with a small distinct set of outer-key + * values → evaluated once per distinct key, not once per row. + * (Q8 in the SQL:2013 bench dropped from 4.9s to ~50ms with this.) + * + * The per-subquery ID and collected free variable list are memoized + * onto the AST node itself (slot 6) so the analysis runs only once per + * distinct subquery expression. + */ +METHOD SubqueryCached( xSubNode ) CLASS TSqlExecutor + + LOCAL hQ, aFreeVars, cCacheKey, aResult, nSavedWA, oSub + LOCAL i, xVal, nId + + IF xSubNode == NIL .OR. ValType( xSubNode ) != "A" .OR. Len( xSubNode ) < 2 + RETURN NIL + ENDIF + hQ := xSubNode[ 2 ] + IF hQ == NIL + RETURN NIL + ENDIF + + /* First call for this subquery: assign ID + analyze free variables */ + IF Len( xSubNode ) < 6 .OR. xSubNode[ 6 ] == NIL + ::nSubCacheSeq++ + aFreeVars := ::CollectFreeVars( hQ ) + IF Len( xSubNode ) < 6 + ASize( xSubNode, 6 ) + ENDIF + xSubNode[ 6 ] := { ::nSubCacheSeq, aFreeVars } + ENDIF + nId := xSubNode[ 6 ][ 1 ] + aFreeVars := xSubNode[ 6 ][ 2 ] + + /* Build cache key from current values of free variables via + * Resolve(), which walks the outer context stack. */ + cCacheKey := hb_ntos( nId ) + "@" + FOR i := 1 TO Len( aFreeVars ) + xVal := ::Resolve( aFreeVars[ i ] ) + cCacheKey += SqlValToStr( xVal ) + "|" + NEXT + + IF hb_HHasKey( ::hSubCorrCache, cCacheKey ) + RETURN ::hSubCorrCache[ cCacheKey ] + ENDIF + + /* Cache miss — execute the subquery. PushOuter so nested ::Resolve + * calls can see the current outer row's values. */ + nSavedWA := Select() + ::PushOuter() + oSub := TSqlExecutor():New( hQ, ::aParams ) + oSub:nDepth := ::nDepth + aResult := oSub:Run() + ::PopOuter() + dbSelectArea( nSavedWA ) + + ::hSubCorrCache[ cCacheKey ] := aResult + +RETURN aResult + + +/* Collect the list of free-variable column names referenced by a + * subquery. A column is "free" if its alias prefix isn't one of the + * subquery's own FROM tables (so it must resolve in the outer scope). + * Returns an array of name strings that Resolve() understands — + * typically qualified forms like "E1.DEPT". + */ +METHOD CollectFreeVars( hQ ) CLASS TSqlExecutor + + LOCAL aFree := {}, aLocalAliases := {}, i, aT + + IF ValType( hQ ) != "H" + RETURN aFree + ENDIF + + /* Local aliases known to the subquery */ + IF hb_HHasKey( hQ, "tables" ) + FOR i := 1 TO Len( hQ[ "tables" ] ) + aT := hQ[ "tables" ][ i ] + IF ValType( aT ) == "A" .AND. Len( aT ) >= 1 + AAdd( aLocalAliases, Upper( aT[ 1 ] ) ) + IF Len( aT ) >= 2 .AND. ! Empty( aT[ 2 ] ) + AAdd( aLocalAliases, Upper( aT[ 2 ] ) ) + ENDIF + ENDIF + NEXT + ENDIF + + /* Walk the WHERE, SELECT list, HAVING for ND_COL refs */ + IF hb_HHasKey( hQ, "where" ) .AND. hQ[ "where" ] != NIL + ::CollectExprFreeVars( hQ[ "where" ], aLocalAliases, aFree ) + ENDIF + IF hb_HHasKey( hQ, "columns" ) + FOR i := 1 TO Len( hQ[ "columns" ] ) + IF ValType( hQ[ "columns" ][ i ] ) == "A" .AND. Len( hQ[ "columns" ][ i ] ) >= 1 + ::CollectExprFreeVars( hQ[ "columns" ][ i ][ 1 ], aLocalAliases, aFree ) + ENDIF + NEXT + ENDIF + IF hb_HHasKey( hQ, "having" ) .AND. hQ[ "having" ] != NIL + ::CollectExprFreeVars( hQ[ "having" ], aLocalAliases, aFree ) + ENDIF + +RETURN aFree + + +/* Recursively walk a SQL AST expression tree collecting column refs + * whose alias prefix is not in aLocalAliases. Appends to aFree. */ +METHOD CollectExprFreeVars( xE, aLocalAliases, aFree ) CLASS TSqlExecutor + + LOCAL i, cRef, cAlias, nDot + + IF xE == NIL .OR. ValType( xE ) != "A" .OR. Len( xE ) < 1 + RETURN NIL + ENDIF + + DO CASE + CASE xE[ 1 ] == ND_COL + IF Len( xE ) >= 2 .AND. ValType( xE[ 2 ] ) == "C" + cRef := xE[ 2 ] + nDot := At( ".", cRef ) + IF nDot > 0 + cAlias := Upper( Left( cRef, nDot - 1 ) ) + IF AScan( aLocalAliases, cAlias ) == 0 .AND. ; + AScan( aFree, cRef ) == 0 + AAdd( aFree, cRef ) + ENDIF + ENDIF + ENDIF + + CASE xE[ 1 ] == ND_BIN .OR. xE[ 1 ] == ND_RANGE + ::CollectExprFreeVars( xE[ 3 ], aLocalAliases, aFree ) + ::CollectExprFreeVars( xE[ 4 ], aLocalAliases, aFree ) + IF Len( xE ) >= 5 + ::CollectExprFreeVars( xE[ 5 ], aLocalAliases, aFree ) + ENDIF + + CASE xE[ 1 ] == ND_UNI + ::CollectExprFreeVars( xE[ 3 ], aLocalAliases, aFree ) + + CASE xE[ 1 ] == ND_FN + /* Walk function arguments, but SKIP the subquery's own subqueries. + * Nested subqueries have their own scope and will be analyzed when + * they're first executed. */ + IF Len( xE ) >= 3 .AND. ValType( xE[ 3 ] ) == "A" + FOR i := 1 TO Len( xE[ 3 ] ) + ::CollectExprFreeVars( xE[ 3 ][ i ], aLocalAliases, aFree ) + NEXT + ENDIF + + CASE xE[ 1 ] == ND_CASE + IF Len( xE ) >= 2 .AND. ValType( xE[ 2 ] ) == "A" + FOR i := 1 TO Len( xE[ 2 ] ) + IF ValType( xE[ 2 ][ i ] ) == "A" .AND. Len( xE[ 2 ][ i ] ) >= 2 + ::CollectExprFreeVars( xE[ 2 ][ i ][ 1 ], aLocalAliases, aFree ) + ::CollectExprFreeVars( xE[ 2 ][ i ][ 2 ], aLocalAliases, aFree ) + ENDIF + NEXT + ENDIF + IF Len( xE ) >= 3 + ::CollectExprFreeVars( xE[ 3 ], aLocalAliases, aFree ) + ENDIF + + CASE xE[ 1 ] == ND_LIST + IF Len( xE ) >= 2 .AND. ValType( xE[ 2 ] ) == "A" + FOR i := 1 TO Len( xE[ 2 ] ) + ::CollectExprFreeVars( xE[ 2 ][ i ], aLocalAliases, aFree ) + NEXT + ENDIF + + /* Nested ND_SUB is intentionally opaque — its own free vars will + * be analyzed on its first call */ + ENDCASE + +RETURN NIL + + METHOD CacheSubquery( xSubExpr ) CLASS TSqlExecutor LOCAL cKey, aSubResult, nSavedWA, oSub