From c6799a599e89130cca6da64ca4769080f2341555 Mon Sep 17 00:00:00 2001 From: CharlesKWON Date: Tue, 14 Apr 2026 20:25:02 +0900 Subject: [PATCH] fix(FiveSql2): GROUP BY with aliased SELECT collapses all rows into one MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Surfaced by complex-query benchmarking. Query like: SELECT d.name AS dept, COUNT(*) AS n, SUM(o.amount) AS total FROM dept d INNER JOIN emp e ON ... INNER JOIN ord o ON ... GROUP BY d.name returned exactly 1 row instead of 100. Removing the AS aliases made it work correctly. Semantic bug, not a performance issue. Root cause: TSqlAgg:GroupBy resolved each GROUP BY column by calling FindColIdx against aFN — the output alias list. For GROUP BY d.name with d.name AS dept, the group expression's column name was looked up in {"dept","n","total"} and missed. FindColIdx returned 0, every row got an empty group key, and the hash collapsed everything into one bucket. Fix: new FindGroupIdx walks aCols (SELECT list expressions) instead, matching the GROUP BY column against each SELECT item's source expression ND_COL name. Handles qualified refs (d.name -> NAME) and falls back to FindColIdx for cases where GROUP BY uses a column not in the SELECT list. Also hoisted the resolution out of the per-row loop — GROUP BY columns resolve once into aGroupIdx[] so each row just indexes. Validation: - FiveSql2 43/43 - Harbour compat 51/51 - Complex bench Q4: 1 row -> 100 rows (correct) Co-Authored-By: Claude Opus 4.6 (1M context) --- _FiveSql2/src/TSqlAgg.prg | 58 +++++++++++++++++++++++++++++++++++++-- 1 file changed, 56 insertions(+), 2 deletions(-) diff --git a/_FiveSql2/src/TSqlAgg.prg b/_FiveSql2/src/TSqlAgg.prg index 112e110..95f0783 100644 --- a/_FiveSql2/src/TSqlAgg.prg +++ b/_FiveSql2/src/TSqlAgg.prg @@ -16,6 +16,7 @@ CLASS TSqlAgg METHOD New() CONSTRUCTOR METHOD GroupBy( aRows, aFN, aCols, aGroupBy, xHaving, aTables, aParams ) + METHOD FindGroupIdx( xGroupExpr, aCols, aFN ) METHOD ComputeAgg( xE, aGR, aFN ) METHOD FindColIdx( xExpr, aFN ) METHOD FindColIdx2( cN, aFN ) @@ -49,6 +50,7 @@ METHOD GroupBy( aRows, aFN, aCols, aGroupBy, xHaving, aTables, aParams ) CLASS T LOCAL i, j, cKey, aGroupRows, aResult := {} LOCAL aNewRow LOCAL nGCol, cN, nCI, lPass + LOCAL aGroupIdx := {} /* Aggregate on empty set */ IF Len( aRows ) == 0 .AND. ::HasAgg( aCols ) @@ -63,14 +65,27 @@ METHOD GroupBy( aRows, aFN, aCols, aGroupBy, xHaving, aTables, aParams ) CLASS T RETURN { aNewRow } ENDIF - /* Build group buckets */ + /* Build group buckets. + * Pre-resolve the GROUP BY columns to their position in the SELECT + * list by matching against the SOURCE expressions in aCols, not the + * alias list in aFN. Matching on aFN breaks as soon as the user + * writes `SELECT d.name AS foo ... GROUP BY d.name` — the group + * column's ND_COL name "D.NAME" wouldn't appear in aFN (which has + * "FOO"), FindColIdx would return 0, and every row would end up in + * the empty-key bucket collapsing to a single output row. + * (Regression found in complex-query bench 2026-04-14.) */ + FOR j := 1 TO Len( aGroupBy ) + nGCol := ::FindGroupIdx( aGroupBy[ j ], aCols, aFN ) + AAdd( aGroupIdx, nGCol ) + NEXT + IF Len( aGroupBy ) == 0 .AND. ::HasAgg( aCols ) hGroups[ "__ALL__" ] := aRows ELSE FOR i := 1 TO Len( aRows ) cKey := "" FOR j := 1 TO Len( aGroupBy ) - nGCol := ::FindColIdx( aGroupBy[ j ], aFN ) + nGCol := aGroupIdx[ j ] IF nGCol > 0 .AND. nGCol <= Len( aRows[ i ] ) cKey += SqlValToStr( aRows[ i ][ nGCol ] ) + "|" ENDIF @@ -113,6 +128,45 @@ METHOD GroupBy( aRows, aFN, aCols, aGroupBy, xHaving, aTables, aParams ) CLASS T RETURN aResult +/* Resolve a GROUP BY expression to its column position in the output row. + * Walks the SELECT list's source expressions (aCols[i][1]) rather than + * the alias list (aFN[i]). For `SELECT d.name AS foo GROUP BY d.name`, + * aFN is {"FOO"} but aCols[1][1] is ND_COL "d.name" — we need to match + * the latter, otherwise the group key collapses every row into one + * bucket. Falls back to FindColIdx (alias/name lookup) for cases where + * the GROUP BY uses a simple identifier that isn't in the SELECT list. + */ +METHOD FindGroupIdx( xGroupExpr, aCols, aFN ) CLASS TSqlAgg + + LOCAL i, xSel, cGName, cSName, nDot + + IF xGroupExpr == NIL .OR. xGroupExpr[ 1 ] != ND_COL + RETURN ::FindColIdx( xGroupExpr, aFN ) + ENDIF + + cGName := Upper( xGroupExpr[ 2 ] ) + nDot := At( ".", cGName ) + IF nDot > 0 + cGName := SubStr( cGName, nDot + 1 ) + ENDIF + + FOR i := 1 TO Len( aCols ) + xSel := aCols[ i ][ 1 ] + IF xSel != NIL .AND. xSel[ 1 ] == ND_COL + cSName := Upper( xSel[ 2 ] ) + IF "." $ cSName + cSName := SubStr( cSName, At( ".", cSName ) + 1 ) + ENDIF + IF cSName == cGName + RETURN i + ENDIF + ENDIF + NEXT + + /* Last resort: alias-based lookup (handles GROUP BY of unrelated cols) */ +RETURN ::FindColIdx( xGroupExpr, aFN ) + + METHOD FindColIdx( xExpr, aFN ) CLASS TSqlAgg LOCAL cN, i