From 63f75bf2bc25cabb60a7067f91b3b2180088e623 Mon Sep 17 00:00:00 2001 From: CharlesKWON Date: Thu, 16 Apr 2026 20:34:42 +0900 Subject: [PATCH] =?UTF-8?q?fix(FiveSql2):=205=20more=20latent=20bugs=20?= =?UTF-8?q?=E2=80=94=20Resolve=20NULL,=20LEFT=20JOIN,=20UNION=20order,=20D?= =?UTF-8?q?ATEADD,=20VIEW=20cleanup?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Continues the static-analysis sweep from 7babfb7. --- #3 Resolve NIL ambiguity (HIGH) --- ResolveFromOuter returned NIL for both "column not found" and "column value is NULL". Callers tested `xVal != NIL` to decide success, which silently dropped legitimate NULL outer-row values in correlated subqueries. Added a by-reference lFound flag so callers distinguish the two cases. --- #14 Multi-level LEFT JOIN null-fill (MEDIUM) --- LEFT JOIN null-fill only fired at the last join level (`nIdx >= Len(aJoins)`). For `a LEFT JOIN b ON ... JOIN c ON ...` where b had no match, the null-fill for b was skipped and the outer row was dropped entirely. Now recurses into subsequent joins when the match fails, so the base case can still emit a row with NULLs for b's columns. --- #18 UNION/INTERSECT/EXCEPT applied after LIMIT (MEDIUM) --- SQL standard requires set operations before ORDER BY / DISTINCT / OFFSET / LIMIT. Reordered to: RIGHT JOIN pass → UNION/INTERSECT/EXCEPT → DISTINCT → ORDER BY → OFFSET → LIMIT. Previously LIMIT clipped the first SELECT before UNION merged the second's rows, producing more rows than intended. --- #22 DATEADD month overflow (LOW) --- `DATEADD('MONTH', 1, '2024-01-31')` produced `SToD("20240231")` (Feb 31) → empty date. Now normalizes month overflow/underflow into year rollover and clamps the day to the target month's last day. Year addition also handles Feb 29 → Feb 28 on non-leap years. --- #23 VIEW temp file leak (LOW) --- TSqlIndex:CheckView creates `__view_.dbf` temp files that were never cleaned up. Added post-scan cleanup in RunSelect's close section (after CTE cleanup) that erases matching temp files. Validation: - FiveSql2 43/43 - Harbour compat 51/51 - go test ./... ALL PASS Co-Authored-By: Claude Opus 4.6 (1M context) --- _FiveSql2/src/TSqlExecutor.prg | 110 +++++++++++++++++++++------------ _FiveSql2/src/TSqlFunc.prg | 29 ++++++++- 2 files changed, 99 insertions(+), 40 deletions(-) diff --git a/_FiveSql2/src/TSqlExecutor.prg b/_FiveSql2/src/TSqlExecutor.prg index 2e48304..244a13c 100644 --- a/_FiveSql2/src/TSqlExecutor.prg +++ b/_FiveSql2/src/TSqlExecutor.prg @@ -60,7 +60,7 @@ CLASS TSqlExecutor METHOD ColBelongsTo( cColRef, cAlias ) METHOD PushOuter() METHOD PopOuter() - METHOD ResolveFromOuter( cRef, cTblAlias, cField ) + METHOD ResolveFromOuter( cRef, cTblAlias, cField, lFound ) METHOD MakeError( nCode, cMsg ) METHOD HashJoin( nInnerWA, cInnerField, cOuterCol, xOnCond, aJoins, nIdx, xWhere, aRE, aRows, hHashTbl ) METHOD CacheSubquery( xSubExpr ) @@ -274,7 +274,7 @@ RETURN 0 METHOD Resolve( cRef ) CLASS TSqlExecutor LOCAL cField, cTblAlias, nDot, nWA, nFPos, xVal, nSavedArea - LOCAL i, cA + LOCAL i, cA, lOuterFound LOCAL aCTEInfo, aCTEFN, aCTERows, nCTERow IF cRef == "*" @@ -305,8 +305,9 @@ METHOD Resolve( cRef ) CLASS TSqlExecutor dbSelectArea( nSavedArea ) ENDIF IF Len( s_aOuterStack ) > 0 - xVal := ::ResolveFromOuter( cRef, cTblAlias, cField ) - IF xVal != NIL + lOuterFound := .F. + xVal := ::ResolveFromOuter( cRef, cTblAlias, cField, @lOuterFound ) + IF lOuterFound dbSelectArea( nSavedArea ) RETURN xVal ENDIF @@ -342,8 +343,9 @@ METHOD Resolve( cRef ) CLASS TSqlExecutor /* Correlated subquery outer context */ IF Len( s_aOuterStack ) > 0 - xVal := ::ResolveFromOuter( cRef, cTblAlias, cField ) - IF xVal != NIL + lOuterFound := .F. + xVal := ::ResolveFromOuter( cRef, cTblAlias, cField, @lOuterFound ) + IF lOuterFound dbSelectArea( nSavedArea ) RETURN xVal ENDIF @@ -354,10 +356,17 @@ METHOD Resolve( cRef ) CLASS TSqlExecutor RETURN NIL -METHOD ResolveFromOuter( cRef, cTblAlias, cField ) CLASS TSqlExecutor +/* ResolveFromOuter — resolve a column reference in the outer + * context stack. Sets lFound to .T. (by ref) when the column is + * located, even if its value is NIL. Callers must check lFound + * rather than testing `xVal != NIL` — the latter conflates a + * legitimate NULL column value with "column not found", silently + * breaking correlated subqueries where the outer row has NULLs. */ +METHOD ResolveFromOuter( cRef, cTblAlias, cField, lFound ) CLASS TSqlExecutor LOCAL i, j, aOuterTbls, cA, nWA, nFPos, xVal, nSavedArea + lFound := .F. nSavedArea := Select() FOR i := Len( s_aOuterStack ) TO 1 STEP -1 @@ -380,6 +389,7 @@ METHOD ResolveFromOuter( cRef, cTblAlias, cField ) CLASS TSqlExecutor nFPos := FieldPos( cField ) IF nFPos > 0 xVal := FieldGet( nFPos ) + lFound := .T. dbSelectArea( nSavedArea ) RETURN xVal ENDIF @@ -1013,13 +1023,24 @@ METHOD JoinRecurse( aJoins, nIdx, xWhere, aRE, aRows, hHashTbl ) CLASS TSqlExecu ENDDO ENDIF - /* LEFT JOIN NULL fill */ + /* LEFT JOIN NULL fill — when no match was found for the current + * join level, emit a NULL-filled row. For multi-level JOINs + * (a LEFT JOIN b ON ... JOIN c ON ...) we must recurse into + * subsequent join levels rather than only emitting at the last + * one — otherwise the middle LEFT JOIN's NULL fill never reaches + * the base case and the entire outer row is silently dropped. */ IF ! lHadMatch .AND. ( cJoinType == "LEFT" .OR. cJoinType == "FULL" ) IF nIdx >= Len( aJoins ) + /* Last join — emit directly */ aRow := ::FetchRowNull( aRE, cJAlias ) IF xWhere == NIL .OR. SqlIsTrue( ::EvalExpr( xWhere ) ) AAdd( aRows, aRow ) ENDIF + ELSE + /* Middle join — recurse with NULL-filled state for this level + * so subsequent joins can still process and emit their own + * NULL rows or matches. */ + ::JoinRecurse( aJoins, nIdx + 1, xWhere, aRE, @aRows, hHashTbl ) ENDIF ENDIF @@ -1465,35 +1486,8 @@ METHOD RunSelect() CLASS TSqlExecutor ENDIF ENDIF - /* DISTINCT */ - IF lDistinct - aRows := ::oSort:Distinct( aRows ) - ENDIF - - /* OFFSET */ - IF nOffset > 0 .AND. nOffset < Len( aRows ) - aTmp := {} - FOR i := nOffset + 1 TO Len( aRows ) - AAdd( aTmp, aRows[ i ] ) - NEXT - aRows := aTmp - ELSEIF nOffset >= Len( aRows ) - aRows := {} - ENDIF - - /* TOP / LIMIT */ - nMaxRows := 0 - IF nTop > 0 - nMaxRows := nTop - ENDIF - IF nLimit > 0 - nMaxRows := nLimit - ENDIF - IF nMaxRows > 0 .AND. Len( aRows ) > nMaxRows - ASize( aRows, nMaxRows ) - ENDIF - - /* RIGHT JOIN second pass */ + /* RIGHT JOIN second pass — must run before set operations and + * LIMIT so unmatched inner rows are included in the full result. */ IF Len( aJoins ) > 0 FOR i := 1 TO Len( aJoins ) IF Upper( aJoins[ i ][ 1 ] ) == "RIGHT" .OR. Upper( aJoins[ i ][ 1 ] ) == "FULL" @@ -1502,7 +1496,11 @@ METHOD RunSelect() CLASS TSqlExecutor NEXT ENDIF - /* UNION / INTERSECT / EXCEPT */ + /* UNION / INTERSECT / EXCEPT — per SQL standard, set operations + * are applied to the full result of each SELECT before ORDER BY / + * DISTINCT / OFFSET / LIMIT. Previous order applied them last, + * which meant LIMIT clipped the first SELECT before UNION added + * the second's rows, producing more rows than intended. */ IF hUnion != NIL aU := TSqlExecutor():New( hUnion, ::aParams ):Run() IF hb_HHasKey( hUnion, "set_op" ) @@ -1525,6 +1523,34 @@ METHOD RunSelect() CLASS TSqlExecutor ENDIF ENDIF + /* DISTINCT */ + IF lDistinct + aRows := ::oSort:Distinct( aRows ) + ENDIF + + /* OFFSET */ + IF nOffset > 0 .AND. nOffset < Len( aRows ) + aTmp := {} + FOR i := nOffset + 1 TO Len( aRows ) + AAdd( aTmp, aRows[ i ] ) + NEXT + aRows := aTmp + ELSEIF nOffset >= Len( aRows ) + aRows := {} + ENDIF + + /* TOP / LIMIT */ + nMaxRows := 0 + IF ValType( nTop ) == "N" .AND. nTop > 0 + nMaxRows := nTop + ENDIF + IF ValType( nLimit ) == "N" .AND. nLimit > 0 + nMaxRows := nLimit + ENDIF + IF nMaxRows > 0 .AND. Len( aRows ) > nMaxRows + ASize( aRows, nMaxRows ) + ENDIF + /* Close opened tables */ ::CloseOpened() @@ -1545,6 +1571,14 @@ METHOD RunSelect() CLASS TSqlExecutor NEXT ENDIF + /* Clean up VIEW temp files — created by TSqlIndex:CheckView when + * a query references a .fsv view. Not tracked elsewhere. */ + FOR i := 1 TO Len( ::aTables ) + IF hb_FileExists( "__view_" + Lower( ::aTables[ i ][ 1 ] ) + ".dbf" ) + FErase( "__view_" + Lower( ::aTables[ i ][ 1 ] ) + ".dbf" ) + ENDIF + NEXT + ::nDepth-- IF Len( aSavedAreas ) > 0 diff --git a/_FiveSql2/src/TSqlFunc.prg b/_FiveSql2/src/TSqlFunc.prg index 95c9ba4..4537d19 100644 --- a/_FiveSql2/src/TSqlFunc.prg +++ b/_FiveSql2/src/TSqlFunc.prg @@ -17,6 +17,7 @@ STATIC s_cCollation := "" FUNCTION SqlEvalFunc( cName, aArgs ) LOCAL i, xV, xV2, cS, nN, nN2, cRev + LOCAL nNewM, nNewY, nNewD, nYr, nDy, dTmp /* Aggregate functions return placeholder during row-level fetch */ IF SqlIsAggName( cName ) @@ -181,9 +182,33 @@ FUNCTION SqlEvalFunc( cName, aArgs ) IF cS == "D" .OR. cS == "DAY" .OR. cS == "DD" RETURN xV + nN ELSEIF cS == "M" .OR. cS == "MONTH" .OR. cS == "MM" - RETURN SToD( StrZero( Year(xV), 4 ) + StrZero( Month(xV) + nN, 2 ) + StrZero( Day(xV), 2 ) ) + /* Normalize month overflow/underflow and clamp day to + * end-of-month (Jan 31 + 1 month → Feb 28/29, not Feb 31) */ + nNewM := Month(xV) + nN - 1 + nNewY := Year(xV) + Int( nNewM / 12 ) + nNewD := Day(xV) + nNewM := ( nNewM % 12 ) + 1 + IF nNewM <= 0 + nNewM += 12 + nNewY-- + ENDIF + /* Clamp day: find last day of target month */ + dTmp := SToD( StrZero( nNewY, 4 ) + StrZero( nNewM, 2 ) + "01" ) + dTmp := dTmp + 32 + dTmp := SToD( StrZero( Year(dTmp), 4 ) + StrZero( Month(dTmp), 2 ) + "01" ) - 1 + IF nNewD > Day(dTmp) + nNewD := Day(dTmp) + ENDIF + RETURN SToD( StrZero( nNewY, 4 ) + StrZero( nNewM, 2 ) + StrZero( nNewD, 2 ) ) ELSEIF cS == "Y" .OR. cS == "YEAR" .OR. cS == "YY" .OR. cS == "YYYY" - RETURN SToD( StrZero( Year(xV) + nN, 4 ) + StrZero( Month(xV), 2 ) + StrZero( Day(xV), 2 ) ) + /* Clamp Feb 29 → Feb 28 on non-leap year */ + nYr := Year(xV) + nN + nDy := Day(xV) + IF Month(xV) == 2 .AND. nDy == 29 + dTmp := SToD( StrZero( nYr, 4 ) + "0301" ) - 1 + nDy := Day(dTmp) + ENDIF + RETURN SToD( StrZero( nYr, 4 ) + StrZero( Month(xV), 2 ) + StrZero( nDy, 2 ) ) ENDIF ENDIF RETURN xV