Files
five/_FiveSql2/src/TSqlAgg.prg
CharlesKWON 7babfb7281 fix(FiveSql2): 9 latent bugs from static analysis sweep
Systematic bug-hunt driven by an automated analysis of all FiveSql2
source files. Each fix is targeted — no speculative refactoring.

--- #1 CLASSDATA hSubCache leaked across queries (CRITICAL) ---

  CLASSDATA hSubCache INIT { => } SHARED

shared one hash across ALL TSqlExecutor instances. A non-correlated
subquery cached in query A was silently returned for an unrelated
query B if the subquery text happened to produce the same cache key.
Converted to instance DATA initialized in New().

--- #5+#21 IS NULL / COALESCE treated empty string as NULL (HIGH) ---

  RETURN xL == NIL .OR. ( ValType(xL) == "C" .AND. Empty(AllTrim(xL)) )

SQL standard: '' is a valid non-NULL value. Removed the empty-string
check from both IS NULL evaluation and COALESCE skip logic.

--- #4 Multiple ? parameters all returned first value (HIGH) ---

ND_PAR nodes had no index — EvalExpr always returned ::aParams[1].
Parser now stamps each ? with a sequential 1-based index in xNode[2].
EvalExpr uses it to return the correct ::aParams[n].

--- #10+#11 SqlEvalRowExpr missing / and || operators, single-arg
    function eval (MEDIUM) ---

Division and string concatenation fell through to RETURN NIL in the
row-expression evaluator used by recursive CTEs and aggregate
ComputeAgg. Also, multi-argument functions like SUBSTR(x,2,3) only
received the first argument. Both fixed.

--- #9 SUM/AVG/MIN/MAX of all NULLs returned 0 instead of NULL
    (MEDIUM) ---

SQL standard requires NULL. Changed the aggregate return path to
return NIL when nCount == 0 (SUM/AVG) or when xMin/xMax == NIL.

--- #8 MIN/MAX used SqlCoerceNum for comparison (MEDIUM) ---

Strings and dates were coerced to numbers (Val()) before comparing,
making MIN('banana') == MIN('apple') == 0. Switched to SqlCmpLt
which handles type-appropriate comparison.

--- #7 SqlExprHasAgg only checked top-level node (MEDIUM) ---

Expressions like `salary + COUNT(*)` were not detected as containing
an aggregate because the top node was ND_BIN, not ND_FN. Made the
function recursive — walks ND_BIN, ND_UNI, ND_FN args, ND_CASE
branches.

--- #13 SELECT * only expanded first table in JOINs (MEDIUM) ---

`SELECT * FROM orders o JOIN customers c ON ...` only included
fields from orders. Changed the expansion loop to iterate ALL
entries in ::aTables.

--- #2 s_aOuterStack not unwound on subquery error (HIGH) ---

SubqueryCached's PushOuter/PopOuter pair was not protected by
BEGIN SEQUENCE. A runtime error inside the subquery left a stale
entry on the module-level outer stack, corrupting all subsequent
queries' correlated column resolution. Wrapped in SEQUENCE/RECOVER.

Validation:
  - FiveSql2 43/43
  - Harbour compat 51/51
  - go test ./... ALL PASS

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-16 17:26:05 +09:00

567 lines
16 KiB
Plaintext
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
/*
* TSqlAgg.prg — GROUP BY aggregation and HAVING filter
*
* FiveSql — SQL Engine for Harbour DBF/NTX
*
* Copyright (c) 2025 Charles KWON (Charles KWON OhJun)
* Email: charleskwonohjun@gmail.com
*
* All rights reserved.
*/
#include "hbclass.ch"
#include "FiveSqlDef.ch"
CLASS TSqlAgg
METHOD New() CONSTRUCTOR
METHOD GroupBy( aRows, aFN, aCols, aGroupBy, xHaving, aTables, aParams )
METHOD FindGroupIdx( xGroupExpr, aCols, aFN )
METHOD ExpandGroupingSets( aGroupBy )
METHOD ExprInSet( xSelExpr, aSet )
METHOD ComputeAgg( xE, aGR, aFN )
METHOD FindColIdx( xExpr, aFN )
METHOD FindColIdx2( cN, aFN )
METHOD EvalHaving( xHaving, aNewRow, aCols, aGroupRows, aFN, aParams )
METHOD HasAgg( aCols )
METHOD EvalHavingExpr( xE, aNewRow, aCols, aGR, aFN, aParams )
ENDCLASS
METHOD New() CLASS TSqlAgg
RETURN SELF
METHOD HasAgg( aCols ) CLASS TSqlAgg
LOCAL i
FOR i := 1 TO Len( aCols )
IF SqlExprHasAgg( aCols[ i ][ 1 ] )
RETURN .T.
ENDIF
NEXT
RETURN .F.
METHOD GroupBy( aRows, aFN, aCols, aGroupBy, xHaving, aTables, aParams ) CLASS TSqlAgg
LOCAL hGroups := { => }
LOCAL i, j, cKey, aGroupRows, aResult := {}
LOCAL aNewRow
LOCAL nGCol, cN, nCI, lPass
LOCAL aGroupIdx := {}
LOCAL aSets, aCurSet, nSet, hOmitIdx, aSubResult
/* Aggregate on empty set */
IF Len( aRows ) == 0 .AND. ::HasAgg( aCols )
aNewRow := {}
FOR j := 1 TO Len( aCols )
IF SqlExprHasAgg( aCols[ j ][ 1 ] )
AAdd( aNewRow, 0 )
ELSE
AAdd( aNewRow, NIL )
ENDIF
NEXT
RETURN { aNewRow }
ENDIF
/* SQL:2003 ROLLUP / CUBE / GROUPING SETS — expand into a list of
* flat grouping key sets and run aggregation once per set. Columns
* absent from the current set emit NIL (the standard "subtotal"
* placeholder). */
aSets := ::ExpandGroupingSets( aGroupBy )
IF Len( aSets ) > 1
FOR nSet := 1 TO Len( aSets )
aCurSet := aSets[ nSet ]
/* Recurse with the plain expanded set; no ROLLUP/CUBE nodes */
aSubResult := ::GroupBy( aRows, aFN, aCols, aCurSet, xHaving, aTables, aParams )
/* For each result row, NIL-out any SELECT column whose source
* GROUP BY expression is not in the current set. */
hOmitIdx := { => }
FOR i := 1 TO Len( aCols )
IF ! SqlExprHasAgg( aCols[ i ][ 1 ] )
IF ! ::ExprInSet( aCols[ i ][ 1 ], aCurSet )
hOmitIdx[ i ] := .T.
ENDIF
ENDIF
NEXT
FOR i := 1 TO Len( aSubResult )
FOR j := 1 TO Len( aSubResult[ i ] )
IF hb_HHasKey( hOmitIdx, j )
aSubResult[ i ][ j ] := NIL
ENDIF
NEXT
AAdd( aResult, aSubResult[ i ] )
NEXT
NEXT
RETURN aResult
ENDIF
/* Build group buckets.
* Pre-resolve the GROUP BY columns to their position in the SELECT
* list by matching against the SOURCE expressions in aCols, not the
* alias list in aFN. */
FOR j := 1 TO Len( aGroupBy )
nGCol := ::FindGroupIdx( aGroupBy[ j ], aCols, aFN )
AAdd( aGroupIdx, nGCol )
NEXT
IF Len( aGroupBy ) == 0 .AND. ::HasAgg( aCols )
hGroups[ "__ALL__" ] := aRows
ELSE
FOR i := 1 TO Len( aRows )
cKey := ""
FOR j := 1 TO Len( aGroupBy )
nGCol := aGroupIdx[ j ]
IF nGCol > 0 .AND. nGCol <= Len( aRows[ i ] )
cKey += SqlValToStr( aRows[ i ][ nGCol ] ) + "|"
ENDIF
NEXT
IF ! hb_HHasKey( hGroups, cKey )
hGroups[ cKey ] := {}
ENDIF
AAdd( hGroups[ cKey ], aRows[ i ] )
NEXT
ENDIF
/* Compute aggregates for each group */
FOR EACH aGroupRows IN hb_HValues( hGroups )
aNewRow := {}
FOR j := 1 TO Len( aCols )
IF SqlExprHasAgg( aCols[ j ][ 1 ] )
AAdd( aNewRow, ::ComputeAgg( aCols[ j ][ 1 ], aGroupRows, aFN ) )
ELSE
cN := SqlExprName( aCols[ j ][ 1 ] )
nCI := ::FindColIdx2( cN, aFN )
IF nCI > 0 .AND. Len( aGroupRows ) > 0 .AND. nCI <= Len( aGroupRows[ 1 ] )
AAdd( aNewRow, aGroupRows[ 1 ][ nCI ] )
ELSE
AAdd( aNewRow, NIL )
ENDIF
ENDIF
NEXT
/* HAVING filter */
IF xHaving != NIL
lPass := ::EvalHaving( xHaving, aNewRow, aCols, aGroupRows, aFN, aParams )
IF ! lPass
LOOP
ENDIF
ENDIF
AAdd( aResult, aNewRow )
NEXT
RETURN aResult
/* Expand SQL:2003 ROLLUP / CUBE / GROUPING SETS into a list of flat
* grouping sets. Each returned set is an array of expressions that
* would be the plain GROUP BY for one pass of aggregation.
*
* GROUP BY a, ROLLUP(b, c) → {(a,b,c), (a,b), (a)}
* GROUP BY CUBE(a, b) → {(a,b), (a), (b), ()}
* GROUP BY GROUPING SETS ((a,b), (a), ()) → as-is
*
* If aGroupBy is a plain column list with no aggregate-set modifiers,
* returns a single-element list with aGroupBy itself — letting the
* caller short-circuit to the fast path unchanged.
*/
METHOD ExpandGroupingSets( aGroupBy ) CLASS TSqlAgg
LOCAL aSets, aCurrent, i, j, xTerm, aExpand, aNewSets, aBase
LOCAL nBits, nMask, bit, aCubeSet
/* Fast path: no ROLLUP/CUBE/GROUPING SETS node → single set */
aExpand := .F.
FOR i := 1 TO Len( aGroupBy )
IF aGroupBy[ i ] != NIL .AND. ValType( aGroupBy[ i ] ) == "A" .AND. ;
aGroupBy[ i ][ 1 ] == ND_FN .AND. ;
( Upper( aGroupBy[ i ][ 2 ] ) == "ROLLUP" .OR. ;
Upper( aGroupBy[ i ][ 2 ] ) == "CUBE" .OR. ;
Upper( aGroupBy[ i ][ 2 ] ) == "GROUPING SETS" )
aExpand := .T.
EXIT
ENDIF
NEXT
IF ! aExpand
RETURN { aGroupBy }
ENDIF
/* Seed with a single empty set — we'll cross-expand each term */
aSets := { {} }
FOR i := 1 TO Len( aGroupBy )
xTerm := aGroupBy[ i ]
aNewSets := {}
IF xTerm != NIL .AND. ValType( xTerm ) == "A" .AND. xTerm[ 1 ] == ND_FN
DO CASE
CASE Upper( xTerm[ 2 ] ) == "ROLLUP"
/* ROLLUP(c1..cN) → N+1 sets:
* (c1..cN), (c1..cN-1), ..., (c1), ()
* Cross-product: existing × each prefix including empty */
aBase := xTerm[ 3 ]
FOR j := 1 TO Len( aSets )
FOR nBits := Len( aBase ) TO 0 STEP -1
aCurrent := AClone( aSets[ j ] )
FOR nMask := 1 TO nBits
AAdd( aCurrent, aBase[ nMask ] )
NEXT
AAdd( aNewSets, aCurrent )
NEXT
NEXT
CASE Upper( xTerm[ 2 ] ) == "CUBE"
/* CUBE(c1..cN) → 2^N sets (every subset).
* For each bitmask, include cols where bit is set. */
aBase := xTerm[ 3 ]
FOR j := 1 TO Len( aSets )
FOR nMask := 0 TO ( 2 ^ Len( aBase ) ) - 1
aCurrent := AClone( aSets[ j ] )
FOR bit := 1 TO Len( aBase )
IF hb_BitAnd( nMask, hb_BitShift( 1, bit - 1 ) ) != 0
AAdd( aCurrent, aBase[ bit ] )
ENDIF
NEXT
AAdd( aNewSets, aCurrent )
NEXT
NEXT
CASE Upper( xTerm[ 2 ] ) == "GROUPING SETS"
/* Explicit list — each element is a flat list of cols (or ()) */
aBase := xTerm[ 3 ]
FOR j := 1 TO Len( aSets )
FOR nBits := 1 TO Len( aBase )
aCurrent := AClone( aSets[ j ] )
IF ValType( aBase[ nBits ] ) == "A"
FOR nMask := 1 TO Len( aBase[ nBits ] )
AAdd( aCurrent, aBase[ nBits ][ nMask ] )
NEXT
ENDIF
AAdd( aNewSets, aCurrent )
NEXT
NEXT
OTHERWISE
/* Unknown ND_FN in GROUP BY — treat as opaque term */
FOR j := 1 TO Len( aSets )
aCurrent := AClone( aSets[ j ] )
AAdd( aCurrent, xTerm )
AAdd( aNewSets, aCurrent )
NEXT
ENDCASE
ELSE
/* Plain column — append to every existing set */
FOR j := 1 TO Len( aSets )
aCurrent := AClone( aSets[ j ] )
AAdd( aCurrent, xTerm )
AAdd( aNewSets, aCurrent )
NEXT
ENDIF
aSets := aNewSets
NEXT
RETURN aSets
/* Does a SELECT expression reference a column that appears in the
* given grouping set? Used to decide which SELECT cols to NIL out
* when reporting a partial grouping (subtotal) row. */
METHOD ExprInSet( xSelExpr, aSet ) CLASS TSqlAgg
LOCAL i, xG, cSelName, cGName, nDot
IF xSelExpr == NIL .OR. xSelExpr[ 1 ] != ND_COL
RETURN .F.
ENDIF
cSelName := Upper( xSelExpr[ 2 ] )
nDot := At( ".", cSelName )
IF nDot > 0
cSelName := SubStr( cSelName, nDot + 1 )
ENDIF
FOR i := 1 TO Len( aSet )
xG := aSet[ i ]
IF xG != NIL .AND. ValType( xG ) == "A" .AND. xG[ 1 ] == ND_COL
cGName := Upper( xG[ 2 ] )
IF "." $ cGName
cGName := SubStr( cGName, At( ".", cGName ) + 1 )
ENDIF
IF cGName == cSelName
RETURN .T.
ENDIF
ENDIF
NEXT
RETURN .F.
/* Resolve a GROUP BY expression to its column position in the output row.
* Walks the SELECT list's source expressions (aCols[i][1]) rather than
* the alias list (aFN[i]). For `SELECT d.name AS foo GROUP BY d.name`,
* aFN is {"FOO"} but aCols[1][1] is ND_COL "d.name" — we need to match
* the latter, otherwise the group key collapses every row into one
* bucket. Falls back to FindColIdx (alias/name lookup) for cases where
* the GROUP BY uses a simple identifier that isn't in the SELECT list.
*/
METHOD FindGroupIdx( xGroupExpr, aCols, aFN ) CLASS TSqlAgg
LOCAL i, xSel, cGName, cSName, nDot
IF xGroupExpr == NIL .OR. xGroupExpr[ 1 ] != ND_COL
RETURN ::FindColIdx( xGroupExpr, aFN )
ENDIF
cGName := Upper( xGroupExpr[ 2 ] )
nDot := At( ".", cGName )
IF nDot > 0
cGName := SubStr( cGName, nDot + 1 )
ENDIF
FOR i := 1 TO Len( aCols )
xSel := aCols[ i ][ 1 ]
IF xSel != NIL .AND. xSel[ 1 ] == ND_COL
cSName := Upper( xSel[ 2 ] )
IF "." $ cSName
cSName := SubStr( cSName, At( ".", cSName ) + 1 )
ENDIF
IF cSName == cGName
RETURN i
ENDIF
ENDIF
NEXT
/* Last resort: alias-based lookup (handles GROUP BY of unrelated cols) */
RETURN ::FindColIdx( xGroupExpr, aFN )
METHOD FindColIdx( xExpr, aFN ) CLASS TSqlAgg
LOCAL cN, i
IF xExpr != NIL .AND. xExpr[ 1 ] == ND_COL
cN := Upper( xExpr[ 2 ] )
IF "." $ cN
cN := SubStr( cN, At( ".", cN ) + 1 )
ENDIF
FOR i := 1 TO Len( aFN )
IF Upper( aFN[ i ] ) == cN
RETURN i
ENDIF
NEXT
ENDIF
RETURN 0
METHOD FindColIdx2( cN, aFN ) CLASS TSqlAgg
LOCAL i
cN := Upper( cN )
FOR i := 1 TO Len( aFN )
IF Upper( aFN[ i ] ) == cN
RETURN i
ENDIF
NEXT
RETURN 0
METHOD ComputeAgg( xE, aGR, aFN ) CLASS TSqlAgg
LOCAL cFunc, cArgName, nCol, i, xVal
LOCAL nCount := 0, nSum := 0, xMin := NIL, xMax := NIL
LOCAL cResult, cSep
LOCAL xArg
IF xE == NIL .OR. xE[ 1 ] != ND_FN
RETURN 0
ENDIF
cFunc := Upper( xE[ 2 ] )
IF Len( xE[ 3 ] ) > 0
xArg := xE[ 3 ][ 1 ]
IF xArg[ 1 ] == ND_COL .AND. xArg[ 2 ] == "*"
IF cFunc == "COUNT"
RETURN Len( aGR )
ENDIF
RETURN 0
ENDIF
cArgName := SqlExprName( xArg )
ELSE
IF cFunc == "COUNT"
RETURN Len( aGR )
ENDIF
RETURN 0
ENDIF
nCol := ::FindColIdx2( cArgName, aFN )
IF nCol == 0 .AND. xArg[ 1 ] == ND_COL
IF cFunc == "COUNT"
RETURN Len( aGR )
ENDIF
RETURN 0
ENDIF
FOR i := 1 TO Len( aGR )
IF nCol > 0 .AND. nCol <= Len( aGR[ i ] )
xVal := aGR[ i ][ nCol ]
ELSEIF nCol == 0
/* Complex expression (CASE, BIN, etc.) inside aggregate:
* evaluate the expression tree against the current row data. */
xVal := SqlEvalRowExpr( xArg, aFN, aGR[ i ] )
ELSE
xVal := NIL
ENDIF
IF xVal != NIL
nCount++
nSum += SqlCoerceNum( xVal )
/* Use SqlCmpLt for type-safe comparison (handles strings, dates) */
IF xMin == NIL .OR. SqlCmpLt( xVal, xMin )
xMin := xVal
ENDIF
IF xMax == NIL .OR. SqlCmpLt( xMax, xVal )
xMax := xVal
ENDIF
ENDIF
NEXT
DO CASE
CASE cFunc == "COUNT"
RETURN nCount
CASE cFunc == "SUM"
/* SQL standard: SUM of all NULLs = NULL, not 0 */
RETURN iif( nCount > 0, nSum, NIL )
CASE cFunc == "AVG"
RETURN iif( nCount > 0, nSum / nCount, NIL )
CASE cFunc == "MIN"
RETURN xMin
CASE cFunc == "MAX"
RETURN xMax
CASE cFunc == "GROUP_CONCAT" .OR. cFunc == "STRING_AGG"
cResult := ""
cSep := ", "
FOR i := 1 TO Len( aGR )
IF nCol > 0 .AND. nCol <= Len( aGR[ i ] )
xVal := aGR[ i ][ nCol ]
ELSEIF nCol == 0
xVal := SqlEvalRowExpr( xArg, aFN, aGR[ i ] )
ELSE
xVal := NIL
ENDIF
IF xVal != NIL
IF ! Empty( cResult )
cResult += cSep
ENDIF
cResult += SqlCoerceStr( xVal )
ENDIF
NEXT
RETURN cResult
ENDCASE
RETURN 0
METHOD EvalHaving( xHaving, aNewRow, aCols, aGroupRows, aFN, aParams ) CLASS TSqlAgg
LOCAL xResult
xResult := ::EvalHavingExpr( xHaving, aNewRow, aCols, aGroupRows, aFN, aParams )
RETURN SqlIsTrue( xResult )
METHOD EvalHavingExpr( xE, aNewRow, aCols, aGR, aFN, aParams ) CLASS TSqlAgg
LOCAL xL, xR, cOp, i, nCI, cN
IF xE == NIL
RETURN NIL
ENDIF
DO CASE
CASE xE[ 1 ] == ND_LIT
RETURN xE[ 2 ]
CASE xE[ 1 ] == ND_NIL
RETURN NIL
CASE xE[ 1 ] == ND_COL
cN := xE[ 2 ]
IF "." $ cN
cN := SubStr( cN, At( ".", cN ) + 1 )
ENDIF
FOR i := 1 TO Len( aCols )
IF Upper( aCols[ i ][ 2 ] ) == Upper( cN ) .AND. i <= Len( aNewRow )
RETURN aNewRow[ i ]
ENDIF
NEXT
nCI := ::FindColIdx2( cN, aFN )
IF nCI > 0 .AND. Len( aGR ) > 0 .AND. nCI <= Len( aGR[ 1 ] )
RETURN aGR[ 1 ][ nCI ]
ENDIF
RETURN NIL
CASE xE[ 1 ] == ND_FN
IF SqlIsAggName( xE[ 2 ] )
RETURN ::ComputeAgg( xE, aGR, aFN )
ENDIF
RETURN NIL
CASE xE[ 1 ] == ND_BIN
cOp := xE[ 2 ]
IF cOp == "AND"
xL := ::EvalHavingExpr( xE[ 3 ], aNewRow, aCols, aGR, aFN, aParams )
xR := ::EvalHavingExpr( xE[ 4 ], aNewRow, aCols, aGR, aFN, aParams )
RETURN SqlIsTrue( xL ) .AND. SqlIsTrue( xR )
ENDIF
IF cOp == "OR"
xL := ::EvalHavingExpr( xE[ 3 ], aNewRow, aCols, aGR, aFN, aParams )
xR := ::EvalHavingExpr( xE[ 4 ], aNewRow, aCols, aGR, aFN, aParams )
RETURN SqlIsTrue( xL ) .OR. SqlIsTrue( xR )
ENDIF
xL := ::EvalHavingExpr( xE[ 3 ], aNewRow, aCols, aGR, aFN, aParams )
xR := ::EvalHavingExpr( xE[ 4 ], aNewRow, aCols, aGR, aFN, aParams )
xL := SqlCoerceForCmp( xL )
xR := SqlCoerceForCmp( xR )
IF cOp == "=" .OR. cOp == "=="
RETURN SqlCmpEq( xL, xR )
ENDIF
IF cOp == "<>" .OR. cOp == "!="
RETURN ! SqlCmpEq( xL, xR )
ENDIF
IF cOp == ">"
RETURN SqlCmpLt( xR, xL )
ENDIF
IF cOp == "<"
RETURN SqlCmpLt( xL, xR )
ENDIF
IF cOp == ">="
RETURN SqlCmpEq( xL, xR ) .OR. SqlCmpLt( xR, xL )
ENDIF
IF cOp == "<="
RETURN SqlCmpEq( xL, xR ) .OR. SqlCmpLt( xL, xR )
ENDIF
RETURN NIL
CASE xE[ 1 ] == ND_UNI
IF xE[ 2 ] == "NOT"
xL := ::EvalHavingExpr( xE[ 3 ], aNewRow, aCols, aGR, aFN, aParams )
RETURN ! SqlIsTrue( xL )
ENDIF
RETURN NIL
ENDCASE
RETURN NIL