From 6c8d5f8b3b6d303030b8b12558d5d0d0863a4cb0 Mon Sep 17 00:00:00 2001 From: CharlesKWON Date: Thu, 16 Apr 2026 11:33:35 +0900 Subject: [PATCH] =?UTF-8?q?fix(FiveSql2):=20correlated=20scalar=20subquery?= =?UTF-8?q?=20with=20JOIN=20=E2=80=94=203=20interacting=20bugs?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A scalar correlated subquery with a JOIN inside: SELECT e.name, (SELECT SUM(o.qty * p.price) FROM ord o INNER JOIN prod p ON o.prod_id = p.id WHERE o.emp_id = e.id) AS revenue FROM emp e WHERE e.dept = 'SALES' returned wrong values (equal to SUM(qty) instead of SUM(qty*price)) or zero for all but the first outer row. Root cause was a triple interaction between three independent bugs. --- Bug 1: Subquery cache leaked across five_SQL invocations --- hSubCorrCache, aSubCacheSlots, aSemiJoinSlots, nSubCacheSeq were declared as DATA ... INIT { => } / {} / 0. In Five's compiled output, hash/array INIT literals may share the same backing instance across New() calls, so the cache from query A (SUM qty, no join) was still there when query B ran, providing a hit on the same key — returning A's cached (wrong) value instead of re-executing B's subquery. Fix: explicit initialization in New(). --- Bug 2: aJoins alias mutation across subquery invocations --- RunSelect's join-alias sync loop mutated aJoins[i][3] from the user alias ("p") to the depth-suffixed temp alias ("FA_0003"). aJoins was a direct reference into hQuery["joins"], so the mutation persisted across re-executions of the same hQuery. On the 2nd call, the sync loop couldn't find a matching aTables entry because the stale temp alias ("FA_0003") didn't match the new one ("FA_0005"). The join table's workarea was positioned wrong → empty join result. Fix: deep-clone both ::aTables and aJoins at the start of RunSelect so each invocation starts from the parsed originals. --- Bug 3: SqlCollectCols stripped alias prefixes --- When adding hidden columns for complex aggregate arguments (e.g. SUM(o.qty * p.price)), SqlCollectCols returned bare names like "qty" and "price" instead of qualified "o.qty" / "p.price". In a JOIN context, unqualified "price" routed FetchRow to the first table (ord) instead of prod — FieldPos returned 0, the column was silently NIL, and the multiplication collapsed to qty*1 = qty. Fix: new SqlCollectColExprs returns the original ND_COL AST nodes with qualified names preserved. The hidden-column loop now inserts these directly so FetchRow's dot-qualified path resolves to the correct workarea via FindWA. --- Verification --- Deterministic 5-emp / 6-order / 3-product test: Expected revenues per emp: Emp 1: 2*10 + 3*20 = 80 → got 80.00 ✓ Emp 2: 1*10 + 4*30 = 130 → got 130.00 ✓ Emp 3: 5*20 = 100 → got 100.00 ✓ Emp 4: no orders = 0 → got 0 ✓ Emp 5: 7*10 = 70 → got 70.00 ✓ Also verified SUM(qty*2) and SUM(p.price) variants. Validation: - FiveSql2 43/43 - Harbour compat 51/51 - go test ./... ALL PASS Co-Authored-By: Claude Opus 4.6 (1M context) --- _FiveSql2/src/TSqlExecutor.prg | 46 ++++++++++++++++++++++++----- _FiveSql2/src/TSqlExpr.prg | 54 ++++++++++++++++++++++++++++++++++ 2 files changed, 93 insertions(+), 7 deletions(-) diff --git a/_FiveSql2/src/TSqlExecutor.prg b/_FiveSql2/src/TSqlExecutor.prg index 67c5101..2084054 100644 --- a/_FiveSql2/src/TSqlExecutor.prg +++ b/_FiveSql2/src/TSqlExecutor.prg @@ -95,6 +95,14 @@ METHOD New( hQuery, aParams ) CLASS TSqlExecutor ::nDepth := 0 ::aOpened := {} ::aTables := {} + /* Explicit fresh initialization — DATA INIT on hash/array literals + * can end up sharing the same instance across New() calls depending + * on the compile path, which would let one query's subquery cache + * leak into the next query's results. */ + ::hSubCorrCache := { => } + ::aSubCacheSlots := {} + ::aSemiJoinSlots := {} + ::nSubCacheSeq := 0 RETURN SELF @@ -1103,8 +1111,26 @@ METHOD RunSelect() CLASS TSqlExecutor LOCAL nEarlyLimit aCols := ::hQuery[ "columns" ] - ::aTables := ::hQuery[ "tables" ] - aJoins := ::hQuery[ "joins" ] + /* Deep-clone tables and joins so cross-run state (alias renames, + * fetch-cache references, etc.) doesn't leak between invocations + * of the same hQuery. A scalar correlated subquery that opens its + * FROM tables gets depth-suffixed temp aliases written back into + * aTables[i][2] and aJoins[i][3]; without this clone, the second + * call inherits the first call's dead alias and the JOIN sync + * loop below fails to match, leaving stale aliases that resolve + * to closed workareas. */ + ::aTables := AClone( ::hQuery[ "tables" ] ) + FOR i := 1 TO Len( ::aTables ) + IF ValType( ::aTables[ i ] ) == "A" + ::aTables[ i ] := AClone( ::aTables[ i ] ) + ENDIF + NEXT + aJoins := AClone( ::hQuery[ "joins" ] ) + FOR i := 1 TO Len( aJoins ) + IF ValType( aJoins[ i ] ) == "A" + aJoins[ i ] := AClone( aJoins[ i ] ) + ENDIF + NEXT xWhere := ::hQuery[ "where" ] aGroupBy := ::hQuery[ "group_by" ] xHaving := ::hQuery[ "having" ] @@ -1259,11 +1285,16 @@ METHOD RunSelect() CLASS TSqlExecutor ENDIF ELSEIF xArgExpr[ 1 ] != ND_COL /* Complex expression (CASE, BIN, etc.) inside aggregate: - * collect all leaf column references and add them as - * hidden result columns so they appear in fetched rows. */ - aLeafCols := SqlCollectCols( xArgExpr, NIL ) + * collect the original ND_COL leaf nodes and add them as + * hidden result columns so they appear in fetched rows. + * Must preserve the qualified name (e.g. "o.qty") so + * subqueries with JOINs resolve to the right workarea. + * Using bare names here used to send `price` to ord in + * a `FROM ord o JOIN prod p` query, silently yielding + * NIL/wrong row data. */ + aLeafCols := SqlCollectColExprs( xArgExpr, NIL ) FOR k := 1 TO Len( aLeafCols ) - cBare := aLeafCols[ k ] + cBare := aLeafCols[ k ][ 2 ] lFound := .F. FOR j := 1 TO Len( aResultExprs ) IF Upper( aResultExprs[ j ][ 2 ] ) == Upper( cBare ) @@ -1272,7 +1303,7 @@ METHOD RunSelect() CLASS TSqlExecutor ENDIF NEXT IF ! lFound - AAdd( aResultExprs, { SqlNode( ND_COL, cBare, NIL, NIL, NIL ), cBare } ) + AAdd( aResultExprs, { aLeafCols[ k ], cBare } ) ENDIF NEXT ENDIF @@ -1284,6 +1315,7 @@ METHOD RunSelect() CLASS TSqlExecutor AAdd( aFieldNames, aResultExprs[ i ][ 2 ] ) NEXT + /* Constant folding */ IF xWhere != NIL xWhere := SqlFoldConst( xWhere ) diff --git a/_FiveSql2/src/TSqlExpr.prg b/_FiveSql2/src/TSqlExpr.prg index 5f00c84..47fb282 100644 --- a/_FiveSql2/src/TSqlExpr.prg +++ b/_FiveSql2/src/TSqlExpr.prg @@ -282,6 +282,60 @@ FUNCTION SqlEvalRowExpr( xExpr, aFN, aRow ) RETURN NIL +/* Collect all ND_COL leaf nodes from an expression tree. + * Returns an array of the original ND_COL AST nodes so callers can + * re-emit them with their qualified names preserved — e.g. FetchRow + * needs "o.qty" / "p.price" rather than the bare "qty" / "price" so + * it can route through FindWA to the right workarea in JOIN contexts. + */ +FUNCTION SqlCollectColExprs( xE, aCols ) + + LOCAL i + + IF aCols == NIL + aCols := {} + ENDIF + + IF xE == NIL + RETURN aCols + ENDIF + + DO CASE + CASE xE[ 1 ] == ND_COL + IF xE[ 2 ] != "*" + AAdd( aCols, xE ) + ENDIF + + CASE xE[ 1 ] == ND_BIN + SqlCollectColExprs( xE[ 3 ], aCols ) + SqlCollectColExprs( xE[ 4 ], aCols ) + + CASE xE[ 1 ] == ND_UNI + SqlCollectColExprs( xE[ 3 ], aCols ) + + CASE xE[ 1 ] == ND_FN + IF ValType( xE[ 3 ] ) == "A" + FOR i := 1 TO Len( xE[ 3 ] ) + SqlCollectColExprs( xE[ 3 ][ i ], aCols ) + NEXT + ENDIF + + CASE xE[ 1 ] == ND_CASE + IF ValType( xE[ 2 ] ) == "A" + FOR i := 1 TO Len( xE[ 2 ] ) + SqlCollectColExprs( xE[ 2 ][ i ][ 1 ], aCols ) + SqlCollectColExprs( xE[ 2 ][ i ][ 2 ], aCols ) + NEXT + ENDIF + IF xE[ 3 ] != NIL + SqlCollectColExprs( xE[ 3 ], aCols ) + ENDIF + + ENDCASE + +RETURN aCols + + /* Collect all ND_COL leaf column names from an expression tree. * Returns an array of bare (unqualified) column name strings. */ FUNCTION SqlCollectCols( xE, aCols )