From 64b7cf66762e04e1b21749febf14b703a1ae309e Mon Sep 17 00:00:00 2001 From: CharlesKWON Date: Tue, 14 Apr 2026 20:31:27 +0900 Subject: [PATCH] =?UTF-8?q?perf(FiveSql2):=20compound-AND=20equi-join=20pi?= =?UTF-8?q?cks=20up=20hash=20path=20=E2=80=94=20CTE+JOIN=2022x?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit FiveSql2's HashJoin only recognized bare equi-terms (xOnCond[1]=ND_BIN, xOnCond[2]="="), so a compound ON predicate like ON e.dept_id = t.dept_id AND e.salary = t.max_sal fell through to the nested-loop ELSE branch: dbSelectArea(nInnerWA) dbGoTop() WHILE !Eof() IF SqlIsTrue(EvalExpr(xOnCond)) JoinRecurse(...) ENDIF dbSkip() ENDDO That's O(outer × inner) per outer row, re-evaluating the full AND tree every probe. Query Q7 in the complex benchmark (CTE top_emp joined back to emp on compound key) ran at 4.6 seconds for 100 inner × 10k outer. Fix has two pieces: 1. **Probe-term extraction in JoinRecurse**: when xOnCond is an AND, walk the left-associative chain looking for the first equi-term (`a.x = b.x`). Use that as the hash-probe key, drive the normal hash-join code path through it. 2. **Post-filter in HashJoin**: after a hash match, if the *original* xOnCond was compound, re-evaluate the full predicate with EvalExpr to drop matches that satisfied the hash key but not the rest of the AND (e.g. same dept but different salary). Bare equi- joins still skip the re-eval — the hash match is conclusive. Bench (10k × 100 × compound ON predicate): Query Before After Speedup ───────────────────────────────────────────────────────── Q7 CTE + JOIN compound ON 4573ms 209ms 21.9x Still works for the existing bare equi case (43-test unchanged) and the 3-way JOIN case (no regression). Falls back to the generic nested loop only when no probe-term can be extracted at all. Validation: - FiveSql2 43/43 - Harbour compat 51/51 - go test ./... ALL PASS - Q7 result: 100 rows (correct) Co-Authored-By: Claude Opus 4.6 (1M context) --- _FiveSql2/src/TSqlExecutor.prg | 54 ++++++++++++++++++++++++++-------- 1 file changed, 42 insertions(+), 12 deletions(-) diff --git a/_FiveSql2/src/TSqlExecutor.prg b/_FiveSql2/src/TSqlExecutor.prg index 3dba821..76e142c 100644 --- a/_FiveSql2/src/TSqlExecutor.prg +++ b/_FiveSql2/src/TSqlExecutor.prg @@ -840,6 +840,7 @@ METHOD JoinRecurse( aJoins, nIdx, xWhere, aRE, aRows, hHashTbl ) CLASS TSqlExecu LOCAL lUseIndex, lFound, nPI LOCAL cJoinType, lHadMatch LOCAL nRecCount, lUseHash + LOCAL xProbe IF hHashTbl == NIL hHashTbl := { => } @@ -888,16 +889,35 @@ METHOD JoinRecurse( aJoins, nIdx, xWhere, aRE, aRows, hHashTbl ) CLASS TSqlExecu cInnerCol := "" cInnerField := "" - /* Analyze ON condition for index or hash join optimization */ - IF xOnCond != NIL .AND. xOnCond[ 1 ] == ND_BIN .AND. xOnCond[ 2 ] == "=" - IF xOnCond[ 3 ] != NIL .AND. xOnCond[ 3 ][ 1 ] == ND_COL .AND. ; - xOnCond[ 4 ] != NIL .AND. xOnCond[ 4 ][ 1 ] == ND_COL - IF ::ColBelongsTo( xOnCond[ 4 ][ 2 ], cJAlias ) - cOuterCol := xOnCond[ 3 ][ 2 ] - cInnerCol := xOnCond[ 4 ][ 2 ] - ELSEIF ::ColBelongsTo( xOnCond[ 3 ][ 2 ], cJAlias ) - cOuterCol := xOnCond[ 4 ][ 2 ] - cInnerCol := xOnCond[ 3 ][ 2 ] + /* Analyze ON condition for index or hash join optimization. + * Handles both `a.x = b.x` and `a.x = b.x AND ...` — for the AND + * case we pick the first equi-join term as the hash key and the + * HashJoin method re-evaluates the full xOnCond after probe to + * filter out spurious matches. This is how SQLite's hash-join + * fallback handles compound predicates. */ + xProbe := xOnCond + IF xOnCond != NIL .AND. xOnCond[ 1 ] == ND_BIN .AND. xOnCond[ 2 ] == "AND" + /* Walk left-associative AND chain until we find an equi-term */ + xProbe := xOnCond + WHILE xProbe != NIL .AND. xProbe[ 1 ] == ND_BIN .AND. xProbe[ 2 ] == "AND" + /* Prefer left operand if it's an equi-join */ + IF xProbe[ 3 ] != NIL .AND. xProbe[ 3 ][ 1 ] == ND_BIN .AND. xProbe[ 3 ][ 2 ] == "=" + xProbe := xProbe[ 3 ] + EXIT + ENDIF + xProbe := xProbe[ 4 ] /* descend right */ + ENDDO + ENDIF + + IF xProbe != NIL .AND. xProbe[ 1 ] == ND_BIN .AND. xProbe[ 2 ] == "=" + IF xProbe[ 3 ] != NIL .AND. xProbe[ 3 ][ 1 ] == ND_COL .AND. ; + xProbe[ 4 ] != NIL .AND. xProbe[ 4 ][ 1 ] == ND_COL + IF ::ColBelongsTo( xProbe[ 4 ][ 2 ], cJAlias ) + cOuterCol := xProbe[ 3 ][ 2 ] + cInnerCol := xProbe[ 4 ][ 2 ] + ELSEIF ::ColBelongsTo( xProbe[ 3 ][ 2 ], cJAlias ) + cOuterCol := xProbe[ 4 ][ 2 ] + cInnerCol := xProbe[ 3 ][ 2 ] ENDIF ENDIF @@ -1462,6 +1482,7 @@ METHOD HashJoin( nInnerWA, cInnerField, cOuterCol, xOnCond, aJoins, nIdx, xWhere LOCAL cHashKey, aMatches, xOuterVal, xInnerVal, cValKey LOCAL nFPos, nSavedRec, i, lHadMatch + LOCAL lCompound lHadMatch := .F. @@ -1487,6 +1508,11 @@ METHOD HashJoin( nInnerWA, cInnerField, cOuterCol, xOnCond, aJoins, nIdx, xWhere IF hb_HHasKey( hHashTbl[ cHashKey ], cValKey ) aMatches := hHashTbl[ cHashKey ][ cValKey ] + /* If xOnCond is a compound AND (not a bare equi-term), re-evaluate + * the full condition after the hash probe to filter out partial + * matches. xOnCond[2] == "=" indicates a bare equi-join where the + * hash match is sufficient. */ + lCompound := ( xOnCond != NIL .AND. xOnCond[ 1 ] == ND_BIN .AND. xOnCond[ 2 ] != "=" ) /* Base-case inline: if the next recursion would just be FetchRow, * skip the method-dispatch overhead and build the row directly. * 50k inner matches × Send() dispatch was the 3-way join bottleneck. */ @@ -1494,6 +1520,9 @@ METHOD HashJoin( nInnerWA, cInnerField, cOuterCol, xOnCond, aJoins, nIdx, xWhere FOR i := 1 TO Len( aMatches ) dbSelectArea( nInnerWA ) dbGoto( aMatches[ i ] ) + IF lCompound .AND. ! SqlIsTrue( ::EvalExpr( xOnCond ) ) + LOOP + ENDIF lHadMatch := .T. IF xWhere == NIL .OR. SqlIsTrue( ::EvalExpr( xWhere ) ) AAdd( aRows, ::FetchRow( aRE ) ) @@ -1503,8 +1532,9 @@ METHOD HashJoin( nInnerWA, cInnerField, cOuterCol, xOnCond, aJoins, nIdx, xWhere FOR i := 1 TO Len( aMatches ) dbSelectArea( nInnerWA ) dbGoto( aMatches[ i ] ) - /* Hash key already matched — skip redundant ON re-evaluation for - * simple equi-joins (SQLite: ephemeral table probe is sufficient). */ + IF lCompound .AND. ! SqlIsTrue( ::EvalExpr( xOnCond ) ) + LOOP + ENDIF lHadMatch := .T. ::JoinRecurse( aJoins, nIdx + 1, xWhere, aRE, @aRows, hHashTbl ) NEXT