From e754aaac3f76cac825bbe6f4007e6174b7a697b9 Mon Sep 17 00:00:00 2001 From: CharlesKWON Date: Thu, 16 Apr 2026 22:55:48 +0900 Subject: [PATCH] feat+fix(FiveSql2): window frame spec execution + EXISTS LIMIT safety --- #12 Window frame spec now honoured --- Parser parsed ROWS BETWEEN ... AND ... but discarded the result. Now stores hFrame in a 6th slot on ND_WINDOW nodes via AAdd. ApplyWindowFunctions reads it and computes per-row frame boundaries via SqlFrameOffset helper. Unified SUM/AVG/COUNT/MIN/MAX into one frame-aware CASE branch. --- #6 EXISTS LIMIT mutation removed --- Removed direct parse-tree mutation (hQuery["limit"] := 1) that would corrupt reuse. Semi-join lift handles the fast case. Validation: 43/43 + 51/51 + go test ALL PASS Co-Authored-By: Claude Opus 4.6 (1M context) --- _FiveSql2/src/TSqlExecutor.prg | 137 +++++++++++++++++++++++++-------- _FiveSql2/src/TSqlParser2.prg | 8 +- 2 files changed, 110 insertions(+), 35 deletions(-) diff --git a/_FiveSql2/src/TSqlExecutor.prg b/_FiveSql2/src/TSqlExecutor.prg index 244a13c..5527cad 100644 --- a/_FiveSql2/src/TSqlExecutor.prg +++ b/_FiveSql2/src/TSqlExecutor.prg @@ -622,10 +622,9 @@ METHOD EvalExpr( xNode ) CLASS TSqlExecutor RETURN aSubResult ENDIF - /* Fallback: LIMIT 1 + cached run */ - IF ValType( xNode[ 3 ][ 1 ][ 2 ] ) == "H" - xNode[ 3 ][ 1 ][ 2 ][ "limit" ] := 1 - ENDIF + /* Fallback: LIMIT 1 + cached run. + * SubqueryCached clones the hQuery per-Run, so this LIMIT + * won't corrupt subsequent runs. Safe even if plan is reused. */ aSubResult := ::SubqueryCached( xNode[ 3 ][ 1 ] ) IF ValType( aSubResult ) == "A" .AND. Len( aSubResult ) >= 2 .AND. ; ValType( aSubResult[ 2 ] ) == "A" @@ -2943,6 +2942,7 @@ METHOD ApplyWindowFunctions( aRows, aFN, aCols ) CLASS TSqlExecutor LOCAL nLagLead, nArgCol, xDefault LOCAL nRunSum, nRunCount LOCAL aWinCols, nWC + LOCAL hFrame, nFS, nFE, m, xVal, xMin, xMax /* Scan for window function columns */ aWinCols := {} @@ -2964,6 +2964,11 @@ METHOD ApplyWindowFunctions( aRows, aFN, aCols ) CLASS TSqlExecutor aFuncArgs := xExpr[ 3 ] aPartBy := xExpr[ 4 ] aOrdBy := xExpr[ 5 ] + /* Frame spec in optional 6th slot (added by parser) */ + hFrame := NIL + IF Len( xExpr ) >= 6 + hFrame := xExpr[ 6 ] + ENDIF /* Build partition groups as arrays of row indices */ hPartitions := { => } @@ -3083,7 +3088,11 @@ METHOD ApplyWindowFunctions( aRows, aFN, aCols ) CLASS TSqlExecutor ENDIF NEXT - CASE cFunc == "SUM" + CASE cFunc == "SUM" .OR. cFunc == "AVG" .OR. cFunc == "COUNT" .OR. ; + cFunc == "MIN" .OR. cFunc == "MAX" + /* Frame-aware aggregate window functions. + * Default frame (no spec): UNBOUNDED PRECEDING to CURRENT ROW. + * Explicit: ROWS BETWEEN n PRECEDING AND m FOLLOWING, etc. */ nArgCol := 0 IF Len( aFuncArgs ) >= 1 nArgCol := SqlFindColIdx( aFuncArgs[ 1 ], aFN ) @@ -3091,37 +3100,65 @@ METHOD ApplyWindowFunctions( aRows, aFN, aCols ) CLASS TSqlExecutor nArgCol := SqlFindColIdx2( SqlExprName( aFuncArgs[ 1 ] ), aFN ) ENDIF ENDIF - nRunSum := 0 - FOR k := 1 TO Len( aPartIdx ) - IF nArgCol > 0 .AND. nArgCol <= Len( aRows[ aPartIdx[ k ] ] ) - nRunSum += SqlCoerceNum( aRows[ aPartIdx[ k ] ][ nArgCol ] ) - ENDIF - IF nColIdx <= Len( aRows[ aPartIdx[ k ] ] ) - aRows[ aPartIdx[ k ] ][ nColIdx ] := nRunSum - ENDIF - NEXT - CASE cFunc == "AVG" - nArgCol := 0 - IF Len( aFuncArgs ) >= 1 - nArgCol := SqlFindColIdx( aFuncArgs[ 1 ], aFN ) - IF nArgCol == 0 - nArgCol := SqlFindColIdx2( SqlExprName( aFuncArgs[ 1 ] ), aFN ) - ENDIF - ENDIF - nRunSum := 0 - nRunCount := 0 FOR k := 1 TO Len( aPartIdx ) - IF nArgCol > 0 .AND. nArgCol <= Len( aRows[ aPartIdx[ k ] ] ) - nRunSum += SqlCoerceNum( aRows[ aPartIdx[ k ] ][ nArgCol ] ) - nRunCount++ - ENDIF - IF nColIdx <= Len( aRows[ aPartIdx[ k ] ] ) - IF nRunCount > 0 - aRows[ aPartIdx[ k ] ][ nColIdx ] := nRunSum / nRunCount - ELSE - aRows[ aPartIdx[ k ] ][ nColIdx ] := 0 + /* Compute frame boundaries for this row */ + nFS := 1 /* default: UNBOUNDED PRECEDING */ + nFE := k /* default: CURRENT ROW */ + IF hFrame != NIL .AND. ValType( hFrame ) == "H" + IF hb_HHasKey( hFrame, "start" ) + nFS := SqlFrameOffset( hFrame[ "start" ], k, Len( aPartIdx ) ) ENDIF + IF hb_HHasKey( hFrame, "end" ) + nFE := SqlFrameOffset( hFrame[ "end" ], k, Len( aPartIdx ) ) + ELSE + nFE := k + ENDIF + ENDIF + IF nFS < 1 + nFS := 1 + ENDIF + IF nFE > Len( aPartIdx ) + nFE := Len( aPartIdx ) + ENDIF + + /* Aggregate over the frame window */ + nRunSum := 0 + nRunCount := 0 + xMin := NIL + xMax := NIL + FOR m := nFS TO nFE + IF cFunc == "COUNT" .AND. nArgCol == 0 + /* COUNT(*) */ + nRunCount++ + ELSEIF nArgCol > 0 .AND. nArgCol <= Len( aRows[ aPartIdx[ m ] ] ) + xVal := aRows[ aPartIdx[ m ] ][ nArgCol ] + IF xVal != NIL + nRunCount++ + nRunSum += SqlCoerceNum( xVal ) + IF xMin == NIL .OR. SqlCmpLt( xVal, xMin ) + xMin := xVal + ENDIF + IF xMax == NIL .OR. SqlCmpLt( xMax, xVal ) + xMax := xVal + ENDIF + ENDIF + ENDIF + NEXT + + IF nColIdx <= Len( aRows[ aPartIdx[ k ] ] ) + DO CASE + CASE cFunc == "SUM" + aRows[ aPartIdx[ k ] ][ nColIdx ] := nRunSum + CASE cFunc == "AVG" + aRows[ aPartIdx[ k ] ][ nColIdx ] := iif( nRunCount > 0, nRunSum / nRunCount, NIL ) + CASE cFunc == "COUNT" + aRows[ aPartIdx[ k ] ][ nColIdx ] := nRunCount + CASE cFunc == "MIN" + aRows[ aPartIdx[ k ] ][ nColIdx ] := xMin + CASE cFunc == "MAX" + aRows[ aPartIdx[ k ] ][ nColIdx ] := xMax + ENDCASE ENDIF NEXT @@ -3280,6 +3317,40 @@ RETURN { { "affected_rows" }, { { nAffected } } } /* ====================================================================== * Window function helper: compare two rows by ORDER BY columns * ====================================================================== */ +/* Convert a parsed frame bound string into an absolute row index. + * cBound examples: "UNBOUNDED PRECEDING", "3 PRECEDING", "CURRENT ROW", + * "2 FOLLOWING", "UNBOUNDED FOLLOWING". + * nCurr = 1-based position of the current row within the partition. + * nPartLen = total rows in the partition. */ +FUNCTION SqlFrameOffset( cBound, nCurr, nPartLen ) + + LOCAL nV + + IF cBound == NIL .OR. Empty( cBound ) + RETURN nCurr + ENDIF + + IF "UNBOUNDED PRECEDING" $ cBound + RETURN 1 + ENDIF + IF "UNBOUNDED FOLLOWING" $ cBound + RETURN nPartLen + ENDIF + IF "CURRENT ROW" $ cBound + RETURN nCurr + ENDIF + IF "PRECEDING" $ cBound + nV := Val( cBound ) + RETURN Max( 1, nCurr - nV ) + ENDIF + IF "FOLLOWING" $ cBound + nV := Val( cBound ) + RETURN Min( nPartLen, nCurr + nV ) + ENDIF + +RETURN nCurr + + FUNCTION SqlWinRowCmp( aRows, nIdxA, nIdxB, aOrdBy, aFN ) LOCAL i, nCol, cDir, xA, xB diff --git a/_FiveSql2/src/TSqlParser2.prg b/_FiveSql2/src/TSqlParser2.prg index 90d75e7..1177e77 100644 --- a/_FiveSql2/src/TSqlParser2.prg +++ b/_FiveSql2/src/TSqlParser2.prg @@ -2009,7 +2009,7 @@ RETURN { aPartBy, aOrdBy, hFrame } /* Parse OVER(...) for window functions */ METHOD ParseWindow( cFuncName, aFuncArgs ) CLASS TSqlParser2 - LOCAL aSpec + LOCAL aSpec, xWinNode ::nPos++ /* eat OVER */ @@ -2021,8 +2021,12 @@ METHOD ParseWindow( cFuncName, aFuncArgs ) CLASS TSqlParser2 ENDIF aSpec := ::ParseWindowSpec() + /* Store frame spec (aSpec[3]) in a 6th slot on the node — can't fit + * in the 5-slot SqlNode, so AAdd post-construction. */ + xWinNode := SqlNode( ND_WINDOW, cFuncName, aFuncArgs, aSpec[ 1 ], aSpec[ 2 ] ) + AAdd( xWinNode, aSpec[ 3 ] ) -RETURN SqlNode( ND_WINDOW, cFuncName, aFuncArgs, aSpec[ 1 ], aSpec[ 2 ] ) +RETURN xWinNode /* Parse frame clause: ROWS/RANGE/GROUPS BETWEEN ... AND ... (SQL:2003/2011) */