/* * TSqlAgg.prg — GROUP BY aggregation and HAVING filter * * FiveSql — SQL Engine for Harbour DBF/NTX * * Copyright (c) 2025 Charles KWON (Charles KWON OhJun) * Email: charleskwonohjun@gmail.com * * All rights reserved. */ #include "hbclass.ch" #include "FiveSqlDef.ch" CLASS TSqlAgg METHOD New() CONSTRUCTOR METHOD GroupBy( aRows, aFN, aCols, aGroupBy, xHaving, aTables, aParams ) METHOD FindGroupIdx( xGroupExpr, aCols, aFN ) METHOD ExpandGroupingSets( aGroupBy ) METHOD ExprInSet( xSelExpr, aSet ) METHOD ComputeAgg( xE, aGR, aFN ) METHOD FindColIdx( xExpr, aFN ) METHOD FindColIdx2( cN, aFN ) METHOD EvalHaving( xHaving, aNewRow, aCols, aGroupRows, aFN, aParams ) METHOD HasAgg( aCols ) METHOD EvalHavingExpr( xE, aNewRow, aCols, aGR, aFN, aParams ) ENDCLASS METHOD New() CLASS TSqlAgg RETURN SELF METHOD HasAgg( aCols ) CLASS TSqlAgg LOCAL i FOR i := 1 TO Len( aCols ) IF SqlExprHasAgg( aCols[ i ][ 1 ] ) RETURN .T. ENDIF NEXT RETURN .F. METHOD GroupBy( aRows, aFN, aCols, aGroupBy, xHaving, aTables, aParams ) CLASS TSqlAgg LOCAL hGroups := { => } LOCAL i, j, cKey, aGroupRows, aResult := {} LOCAL aNewRow LOCAL nGCol, cN, nCI, lPass LOCAL aGroupIdx := {} LOCAL aSets, aCurSet, nSet, hOmitIdx, aSubResult /* Aggregate on empty set */ IF Len( aRows ) == 0 .AND. ::HasAgg( aCols ) aNewRow := {} FOR j := 1 TO Len( aCols ) IF SqlExprHasAgg( aCols[ j ][ 1 ] ) AAdd( aNewRow, 0 ) ELSE AAdd( aNewRow, NIL ) ENDIF NEXT RETURN { aNewRow } ENDIF /* SQL:2003 ROLLUP / CUBE / GROUPING SETS — expand into a list of * flat grouping key sets and run aggregation once per set. Columns * absent from the current set emit NIL (the standard "subtotal" * placeholder). */ aSets := ::ExpandGroupingSets( aGroupBy ) IF Len( aSets ) > 1 FOR nSet := 1 TO Len( aSets ) aCurSet := aSets[ nSet ] /* Recurse with the plain expanded set; no ROLLUP/CUBE nodes */ aSubResult := ::GroupBy( aRows, aFN, aCols, aCurSet, xHaving, aTables, aParams ) /* For each result row, NIL-out any SELECT column whose source * GROUP BY expression is not in the current set. */ hOmitIdx := { => } FOR i := 1 TO Len( aCols ) IF ! SqlExprHasAgg( aCols[ i ][ 1 ] ) IF ! ::ExprInSet( aCols[ i ][ 1 ], aCurSet ) hOmitIdx[ i ] := .T. ENDIF ENDIF NEXT FOR i := 1 TO Len( aSubResult ) FOR j := 1 TO Len( aSubResult[ i ] ) IF hb_HHasKey( hOmitIdx, j ) aSubResult[ i ][ j ] := NIL ENDIF NEXT AAdd( aResult, aSubResult[ i ] ) NEXT NEXT RETURN aResult ENDIF /* Build group buckets. * Pre-resolve the GROUP BY columns to their position in the SELECT * list by matching against the SOURCE expressions in aCols, not the * alias list in aFN. */ FOR j := 1 TO Len( aGroupBy ) nGCol := ::FindGroupIdx( aGroupBy[ j ], aCols, aFN ) AAdd( aGroupIdx, nGCol ) NEXT IF Len( aGroupBy ) == 0 .AND. ::HasAgg( aCols ) hGroups[ "__ALL__" ] := aRows ELSE FOR i := 1 TO Len( aRows ) cKey := "" FOR j := 1 TO Len( aGroupBy ) nGCol := aGroupIdx[ j ] IF nGCol > 0 .AND. nGCol <= Len( aRows[ i ] ) cKey += SqlValToStr( aRows[ i ][ nGCol ] ) + "|" ENDIF NEXT IF ! hb_HHasKey( hGroups, cKey ) hGroups[ cKey ] := {} ENDIF AAdd( hGroups[ cKey ], aRows[ i ] ) NEXT ENDIF /* Compute aggregates for each group */ FOR EACH aGroupRows IN hb_HValues( hGroups ) aNewRow := {} FOR j := 1 TO Len( aCols ) IF SqlExprHasAgg( aCols[ j ][ 1 ] ) AAdd( aNewRow, ::ComputeAgg( aCols[ j ][ 1 ], aGroupRows, aFN ) ) ELSE cN := SqlExprName( aCols[ j ][ 1 ] ) nCI := ::FindColIdx2( cN, aFN ) IF nCI > 0 .AND. Len( aGroupRows ) > 0 .AND. nCI <= Len( aGroupRows[ 1 ] ) AAdd( aNewRow, aGroupRows[ 1 ][ nCI ] ) ELSE AAdd( aNewRow, NIL ) ENDIF ENDIF NEXT /* HAVING filter */ IF xHaving != NIL lPass := ::EvalHaving( xHaving, aNewRow, aCols, aGroupRows, aFN, aParams ) IF ! lPass LOOP ENDIF ENDIF AAdd( aResult, aNewRow ) NEXT RETURN aResult /* Expand SQL:2003 ROLLUP / CUBE / GROUPING SETS into a list of flat * grouping sets. Each returned set is an array of expressions that * would be the plain GROUP BY for one pass of aggregation. * * GROUP BY a, ROLLUP(b, c) → {(a,b,c), (a,b), (a)} * GROUP BY CUBE(a, b) → {(a,b), (a), (b), ()} * GROUP BY GROUPING SETS ((a,b), (a), ()) → as-is * * If aGroupBy is a plain column list with no aggregate-set modifiers, * returns a single-element list with aGroupBy itself — letting the * caller short-circuit to the fast path unchanged. */ METHOD ExpandGroupingSets( aGroupBy ) CLASS TSqlAgg LOCAL aSets, aCurrent, i, j, xTerm, aExpand, aNewSets, aBase LOCAL nBits, nMask, bit, aCubeSet /* Fast path: no ROLLUP/CUBE/GROUPING SETS node → single set */ aExpand := .F. FOR i := 1 TO Len( aGroupBy ) IF aGroupBy[ i ] != NIL .AND. ValType( aGroupBy[ i ] ) == "A" .AND. ; aGroupBy[ i ][ 1 ] == ND_FN .AND. ; ( Upper( aGroupBy[ i ][ 2 ] ) == "ROLLUP" .OR. ; Upper( aGroupBy[ i ][ 2 ] ) == "CUBE" .OR. ; Upper( aGroupBy[ i ][ 2 ] ) == "GROUPING SETS" ) aExpand := .T. EXIT ENDIF NEXT IF ! aExpand RETURN { aGroupBy } ENDIF /* Seed with a single empty set — we'll cross-expand each term */ aSets := { {} } FOR i := 1 TO Len( aGroupBy ) xTerm := aGroupBy[ i ] aNewSets := {} IF xTerm != NIL .AND. ValType( xTerm ) == "A" .AND. xTerm[ 1 ] == ND_FN DO CASE CASE Upper( xTerm[ 2 ] ) == "ROLLUP" /* ROLLUP(c1..cN) → N+1 sets: * (c1..cN), (c1..cN-1), ..., (c1), () * Cross-product: existing × each prefix including empty */ aBase := xTerm[ 3 ] FOR j := 1 TO Len( aSets ) FOR nBits := Len( aBase ) TO 0 STEP -1 aCurrent := AClone( aSets[ j ] ) FOR nMask := 1 TO nBits AAdd( aCurrent, aBase[ nMask ] ) NEXT AAdd( aNewSets, aCurrent ) NEXT NEXT CASE Upper( xTerm[ 2 ] ) == "CUBE" /* CUBE(c1..cN) → 2^N sets (every subset). * For each bitmask, include cols where bit is set. */ aBase := xTerm[ 3 ] FOR j := 1 TO Len( aSets ) FOR nMask := 0 TO ( 2 ^ Len( aBase ) ) - 1 aCurrent := AClone( aSets[ j ] ) FOR bit := 1 TO Len( aBase ) IF hb_BitAnd( nMask, hb_BitShift( 1, bit - 1 ) ) != 0 AAdd( aCurrent, aBase[ bit ] ) ENDIF NEXT AAdd( aNewSets, aCurrent ) NEXT NEXT CASE Upper( xTerm[ 2 ] ) == "GROUPING SETS" /* Explicit list — each element is a flat list of cols (or ()) */ aBase := xTerm[ 3 ] FOR j := 1 TO Len( aSets ) FOR nBits := 1 TO Len( aBase ) aCurrent := AClone( aSets[ j ] ) IF ValType( aBase[ nBits ] ) == "A" FOR nMask := 1 TO Len( aBase[ nBits ] ) AAdd( aCurrent, aBase[ nBits ][ nMask ] ) NEXT ENDIF AAdd( aNewSets, aCurrent ) NEXT NEXT OTHERWISE /* Unknown ND_FN in GROUP BY — treat as opaque term */ FOR j := 1 TO Len( aSets ) aCurrent := AClone( aSets[ j ] ) AAdd( aCurrent, xTerm ) AAdd( aNewSets, aCurrent ) NEXT ENDCASE ELSE /* Plain column — append to every existing set */ FOR j := 1 TO Len( aSets ) aCurrent := AClone( aSets[ j ] ) AAdd( aCurrent, xTerm ) AAdd( aNewSets, aCurrent ) NEXT ENDIF aSets := aNewSets NEXT RETURN aSets /* Does a SELECT expression reference a column that appears in the * given grouping set? Used to decide which SELECT cols to NIL out * when reporting a partial grouping (subtotal) row. */ METHOD ExprInSet( xSelExpr, aSet ) CLASS TSqlAgg LOCAL i, xG, cSelName, cGName, nDot IF xSelExpr == NIL .OR. xSelExpr[ 1 ] != ND_COL RETURN .F. ENDIF cSelName := Upper( xSelExpr[ 2 ] ) nDot := At( ".", cSelName ) IF nDot > 0 cSelName := SubStr( cSelName, nDot + 1 ) ENDIF FOR i := 1 TO Len( aSet ) xG := aSet[ i ] IF xG != NIL .AND. ValType( xG ) == "A" .AND. xG[ 1 ] == ND_COL cGName := Upper( xG[ 2 ] ) IF "." $ cGName cGName := SubStr( cGName, At( ".", cGName ) + 1 ) ENDIF IF cGName == cSelName RETURN .T. ENDIF ENDIF NEXT RETURN .F. /* Resolve a GROUP BY expression to its column position in the output row. * Walks the SELECT list's source expressions (aCols[i][1]) rather than * the alias list (aFN[i]). For `SELECT d.name AS foo GROUP BY d.name`, * aFN is {"FOO"} but aCols[1][1] is ND_COL "d.name" — we need to match * the latter, otherwise the group key collapses every row into one * bucket. Falls back to FindColIdx (alias/name lookup) for cases where * the GROUP BY uses a simple identifier that isn't in the SELECT list. */ METHOD FindGroupIdx( xGroupExpr, aCols, aFN ) CLASS TSqlAgg LOCAL i, xSel, cGName, cSName, nDot IF xGroupExpr == NIL .OR. xGroupExpr[ 1 ] != ND_COL RETURN ::FindColIdx( xGroupExpr, aFN ) ENDIF cGName := Upper( xGroupExpr[ 2 ] ) nDot := At( ".", cGName ) IF nDot > 0 cGName := SubStr( cGName, nDot + 1 ) ENDIF FOR i := 1 TO Len( aCols ) xSel := aCols[ i ][ 1 ] IF xSel != NIL .AND. xSel[ 1 ] == ND_COL cSName := Upper( xSel[ 2 ] ) IF "." $ cSName cSName := SubStr( cSName, At( ".", cSName ) + 1 ) ENDIF IF cSName == cGName RETURN i ENDIF ENDIF NEXT /* Last resort: alias-based lookup (handles GROUP BY of unrelated cols) */ RETURN ::FindColIdx( xGroupExpr, aFN ) METHOD FindColIdx( xExpr, aFN ) CLASS TSqlAgg LOCAL cN, i IF xExpr != NIL .AND. xExpr[ 1 ] == ND_COL cN := Upper( xExpr[ 2 ] ) IF "." $ cN cN := SubStr( cN, At( ".", cN ) + 1 ) ENDIF FOR i := 1 TO Len( aFN ) IF Upper( aFN[ i ] ) == cN RETURN i ENDIF NEXT ENDIF RETURN 0 METHOD FindColIdx2( cN, aFN ) CLASS TSqlAgg LOCAL i cN := Upper( cN ) FOR i := 1 TO Len( aFN ) IF Upper( aFN[ i ] ) == cN RETURN i ENDIF NEXT RETURN 0 METHOD ComputeAgg( xE, aGR, aFN ) CLASS TSqlAgg LOCAL cFunc, cArgName, nCol, i, xVal LOCAL nCount := 0, nSum := 0, xMin := NIL, xMax := NIL LOCAL cResult, cSep LOCAL xArg IF xE == NIL .OR. xE[ 1 ] != ND_FN RETURN 0 ENDIF cFunc := Upper( xE[ 2 ] ) IF Len( xE[ 3 ] ) > 0 xArg := xE[ 3 ][ 1 ] IF xArg[ 1 ] == ND_COL .AND. xArg[ 2 ] == "*" IF cFunc == "COUNT" RETURN Len( aGR ) ENDIF RETURN 0 ENDIF cArgName := SqlExprName( xArg ) ELSE IF cFunc == "COUNT" RETURN Len( aGR ) ENDIF RETURN 0 ENDIF nCol := ::FindColIdx2( cArgName, aFN ) IF nCol == 0 .AND. xArg[ 1 ] == ND_COL IF cFunc == "COUNT" RETURN Len( aGR ) ENDIF RETURN 0 ENDIF FOR i := 1 TO Len( aGR ) IF nCol > 0 .AND. nCol <= Len( aGR[ i ] ) xVal := aGR[ i ][ nCol ] ELSEIF nCol == 0 /* Complex expression (CASE, BIN, etc.) inside aggregate: * evaluate the expression tree against the current row data. */ xVal := SqlEvalRowExpr( xArg, aFN, aGR[ i ] ) ELSE xVal := NIL ENDIF IF xVal != NIL nCount++ nSum += SqlCoerceNum( xVal ) /* Use SqlCmpLt for type-safe comparison (handles strings, dates) */ IF xMin == NIL .OR. SqlCmpLt( xVal, xMin ) xMin := xVal ENDIF IF xMax == NIL .OR. SqlCmpLt( xMax, xVal ) xMax := xVal ENDIF ENDIF NEXT DO CASE CASE cFunc == "COUNT" RETURN nCount CASE cFunc == "SUM" /* SQL standard: SUM of all NULLs = NULL, not 0 */ RETURN iif( nCount > 0, nSum, NIL ) CASE cFunc == "AVG" RETURN iif( nCount > 0, nSum / nCount, NIL ) CASE cFunc == "MIN" RETURN xMin CASE cFunc == "MAX" RETURN xMax CASE cFunc == "GROUP_CONCAT" .OR. cFunc == "STRING_AGG" cResult := "" cSep := ", " FOR i := 1 TO Len( aGR ) IF nCol > 0 .AND. nCol <= Len( aGR[ i ] ) xVal := aGR[ i ][ nCol ] ELSEIF nCol == 0 xVal := SqlEvalRowExpr( xArg, aFN, aGR[ i ] ) ELSE xVal := NIL ENDIF IF xVal != NIL IF ! Empty( cResult ) cResult += cSep ENDIF cResult += SqlCoerceStr( xVal ) ENDIF NEXT RETURN cResult ENDCASE RETURN 0 METHOD EvalHaving( xHaving, aNewRow, aCols, aGroupRows, aFN, aParams ) CLASS TSqlAgg LOCAL xResult xResult := ::EvalHavingExpr( xHaving, aNewRow, aCols, aGroupRows, aFN, aParams ) RETURN SqlIsTrue( xResult ) METHOD EvalHavingExpr( xE, aNewRow, aCols, aGR, aFN, aParams ) CLASS TSqlAgg LOCAL xL, xR, cOp, i, nCI, cN IF xE == NIL RETURN NIL ENDIF DO CASE CASE xE[ 1 ] == ND_LIT RETURN xE[ 2 ] CASE xE[ 1 ] == ND_NIL RETURN NIL CASE xE[ 1 ] == ND_COL cN := xE[ 2 ] IF "." $ cN cN := SubStr( cN, At( ".", cN ) + 1 ) ENDIF FOR i := 1 TO Len( aCols ) IF Upper( aCols[ i ][ 2 ] ) == Upper( cN ) .AND. i <= Len( aNewRow ) RETURN aNewRow[ i ] ENDIF NEXT nCI := ::FindColIdx2( cN, aFN ) IF nCI > 0 .AND. Len( aGR ) > 0 .AND. nCI <= Len( aGR[ 1 ] ) RETURN aGR[ 1 ][ nCI ] ENDIF RETURN NIL CASE xE[ 1 ] == ND_FN IF SqlIsAggName( xE[ 2 ] ) RETURN ::ComputeAgg( xE, aGR, aFN ) ENDIF RETURN NIL CASE xE[ 1 ] == ND_BIN cOp := xE[ 2 ] IF cOp == "AND" xL := ::EvalHavingExpr( xE[ 3 ], aNewRow, aCols, aGR, aFN, aParams ) xR := ::EvalHavingExpr( xE[ 4 ], aNewRow, aCols, aGR, aFN, aParams ) RETURN SqlIsTrue( xL ) .AND. SqlIsTrue( xR ) ENDIF IF cOp == "OR" xL := ::EvalHavingExpr( xE[ 3 ], aNewRow, aCols, aGR, aFN, aParams ) xR := ::EvalHavingExpr( xE[ 4 ], aNewRow, aCols, aGR, aFN, aParams ) RETURN SqlIsTrue( xL ) .OR. SqlIsTrue( xR ) ENDIF xL := ::EvalHavingExpr( xE[ 3 ], aNewRow, aCols, aGR, aFN, aParams ) xR := ::EvalHavingExpr( xE[ 4 ], aNewRow, aCols, aGR, aFN, aParams ) xL := SqlCoerceForCmp( xL ) xR := SqlCoerceForCmp( xR ) IF cOp == "=" .OR. cOp == "==" RETURN SqlCmpEq( xL, xR ) ENDIF IF cOp == "<>" .OR. cOp == "!=" RETURN ! SqlCmpEq( xL, xR ) ENDIF IF cOp == ">" RETURN SqlCmpLt( xR, xL ) ENDIF IF cOp == "<" RETURN SqlCmpLt( xL, xR ) ENDIF IF cOp == ">=" RETURN SqlCmpEq( xL, xR ) .OR. SqlCmpLt( xR, xL ) ENDIF IF cOp == "<=" RETURN SqlCmpEq( xL, xR ) .OR. SqlCmpLt( xL, xR ) ENDIF RETURN NIL CASE xE[ 1 ] == ND_UNI IF xE[ 2 ] == "NOT" xL := ::EvalHavingExpr( xE[ 3 ], aNewRow, aCols, aGR, aFN, aParams ) RETURN ! SqlIsTrue( xL ) ENDIF RETURN NIL ENDCASE RETURN NIL