perf(FiveSql2): compound-AND equi-join picks up hash path — CTE+JOIN 22x
FiveSql2's HashJoin only recognized bare equi-terms (xOnCond[1]=ND_BIN,
xOnCond[2]="="), so a compound ON predicate like
ON e.dept_id = t.dept_id AND e.salary = t.max_sal
fell through to the nested-loop ELSE branch:
dbSelectArea(nInnerWA)
dbGoTop()
WHILE !Eof()
IF SqlIsTrue(EvalExpr(xOnCond))
JoinRecurse(...)
ENDIF
dbSkip()
ENDDO
That's O(outer × inner) per outer row, re-evaluating the full AND tree
every probe. Query Q7 in the complex benchmark (CTE top_emp joined back
to emp on compound key) ran at 4.6 seconds for 100 inner × 10k outer.
Fix has two pieces:
1. **Probe-term extraction in JoinRecurse**: when xOnCond is an AND,
walk the left-associative chain looking for the first equi-term
(`a.x = b.x`). Use that as the hash-probe key, drive the normal
hash-join code path through it.
2. **Post-filter in HashJoin**: after a hash match, if the *original*
xOnCond was compound, re-evaluate the full predicate with
EvalExpr to drop matches that satisfied the hash key but not the
rest of the AND (e.g. same dept but different salary). Bare equi-
joins still skip the re-eval — the hash match is conclusive.
Bench (10k × 100 × compound ON predicate):
Query Before After Speedup
─────────────────────────────────────────────────────────
Q7 CTE + JOIN compound ON 4573ms 209ms 21.9x
Still works for the existing bare equi case (43-test unchanged) and
the 3-way JOIN case (no regression). Falls back to the generic nested
loop only when no probe-term can be extracted at all.
Validation:
- FiveSql2 43/43
- Harbour compat 51/51
- go test ./... ALL PASS
- Q7 result: 100 rows (correct)
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -840,6 +840,7 @@ METHOD JoinRecurse( aJoins, nIdx, xWhere, aRE, aRows, hHashTbl ) CLASS TSqlExecu
|
||||
LOCAL lUseIndex, lFound, nPI
|
||||
LOCAL cJoinType, lHadMatch
|
||||
LOCAL nRecCount, lUseHash
|
||||
LOCAL xProbe
|
||||
|
||||
IF hHashTbl == NIL
|
||||
hHashTbl := { => }
|
||||
@@ -888,16 +889,35 @@ METHOD JoinRecurse( aJoins, nIdx, xWhere, aRE, aRows, hHashTbl ) CLASS TSqlExecu
|
||||
cInnerCol := ""
|
||||
cInnerField := ""
|
||||
|
||||
/* Analyze ON condition for index or hash join optimization */
|
||||
IF xOnCond != NIL .AND. xOnCond[ 1 ] == ND_BIN .AND. xOnCond[ 2 ] == "="
|
||||
IF xOnCond[ 3 ] != NIL .AND. xOnCond[ 3 ][ 1 ] == ND_COL .AND. ;
|
||||
xOnCond[ 4 ] != NIL .AND. xOnCond[ 4 ][ 1 ] == ND_COL
|
||||
IF ::ColBelongsTo( xOnCond[ 4 ][ 2 ], cJAlias )
|
||||
cOuterCol := xOnCond[ 3 ][ 2 ]
|
||||
cInnerCol := xOnCond[ 4 ][ 2 ]
|
||||
ELSEIF ::ColBelongsTo( xOnCond[ 3 ][ 2 ], cJAlias )
|
||||
cOuterCol := xOnCond[ 4 ][ 2 ]
|
||||
cInnerCol := xOnCond[ 3 ][ 2 ]
|
||||
/* Analyze ON condition for index or hash join optimization.
|
||||
* Handles both `a.x = b.x` and `a.x = b.x AND ...` — for the AND
|
||||
* case we pick the first equi-join term as the hash key and the
|
||||
* HashJoin method re-evaluates the full xOnCond after probe to
|
||||
* filter out spurious matches. This is how SQLite's hash-join
|
||||
* fallback handles compound predicates. */
|
||||
xProbe := xOnCond
|
||||
IF xOnCond != NIL .AND. xOnCond[ 1 ] == ND_BIN .AND. xOnCond[ 2 ] == "AND"
|
||||
/* Walk left-associative AND chain until we find an equi-term */
|
||||
xProbe := xOnCond
|
||||
WHILE xProbe != NIL .AND. xProbe[ 1 ] == ND_BIN .AND. xProbe[ 2 ] == "AND"
|
||||
/* Prefer left operand if it's an equi-join */
|
||||
IF xProbe[ 3 ] != NIL .AND. xProbe[ 3 ][ 1 ] == ND_BIN .AND. xProbe[ 3 ][ 2 ] == "="
|
||||
xProbe := xProbe[ 3 ]
|
||||
EXIT
|
||||
ENDIF
|
||||
xProbe := xProbe[ 4 ] /* descend right */
|
||||
ENDDO
|
||||
ENDIF
|
||||
|
||||
IF xProbe != NIL .AND. xProbe[ 1 ] == ND_BIN .AND. xProbe[ 2 ] == "="
|
||||
IF xProbe[ 3 ] != NIL .AND. xProbe[ 3 ][ 1 ] == ND_COL .AND. ;
|
||||
xProbe[ 4 ] != NIL .AND. xProbe[ 4 ][ 1 ] == ND_COL
|
||||
IF ::ColBelongsTo( xProbe[ 4 ][ 2 ], cJAlias )
|
||||
cOuterCol := xProbe[ 3 ][ 2 ]
|
||||
cInnerCol := xProbe[ 4 ][ 2 ]
|
||||
ELSEIF ::ColBelongsTo( xProbe[ 3 ][ 2 ], cJAlias )
|
||||
cOuterCol := xProbe[ 4 ][ 2 ]
|
||||
cInnerCol := xProbe[ 3 ][ 2 ]
|
||||
ENDIF
|
||||
ENDIF
|
||||
|
||||
@@ -1462,6 +1482,7 @@ METHOD HashJoin( nInnerWA, cInnerField, cOuterCol, xOnCond, aJoins, nIdx, xWhere
|
||||
|
||||
LOCAL cHashKey, aMatches, xOuterVal, xInnerVal, cValKey
|
||||
LOCAL nFPos, nSavedRec, i, lHadMatch
|
||||
LOCAL lCompound
|
||||
|
||||
lHadMatch := .F.
|
||||
|
||||
@@ -1487,6 +1508,11 @@ METHOD HashJoin( nInnerWA, cInnerField, cOuterCol, xOnCond, aJoins, nIdx, xWhere
|
||||
|
||||
IF hb_HHasKey( hHashTbl[ cHashKey ], cValKey )
|
||||
aMatches := hHashTbl[ cHashKey ][ cValKey ]
|
||||
/* If xOnCond is a compound AND (not a bare equi-term), re-evaluate
|
||||
* the full condition after the hash probe to filter out partial
|
||||
* matches. xOnCond[2] == "=" indicates a bare equi-join where the
|
||||
* hash match is sufficient. */
|
||||
lCompound := ( xOnCond != NIL .AND. xOnCond[ 1 ] == ND_BIN .AND. xOnCond[ 2 ] != "=" )
|
||||
/* Base-case inline: if the next recursion would just be FetchRow,
|
||||
* skip the method-dispatch overhead and build the row directly.
|
||||
* 50k inner matches × Send() dispatch was the 3-way join bottleneck. */
|
||||
@@ -1494,6 +1520,9 @@ METHOD HashJoin( nInnerWA, cInnerField, cOuterCol, xOnCond, aJoins, nIdx, xWhere
|
||||
FOR i := 1 TO Len( aMatches )
|
||||
dbSelectArea( nInnerWA )
|
||||
dbGoto( aMatches[ i ] )
|
||||
IF lCompound .AND. ! SqlIsTrue( ::EvalExpr( xOnCond ) )
|
||||
LOOP
|
||||
ENDIF
|
||||
lHadMatch := .T.
|
||||
IF xWhere == NIL .OR. SqlIsTrue( ::EvalExpr( xWhere ) )
|
||||
AAdd( aRows, ::FetchRow( aRE ) )
|
||||
@@ -1503,8 +1532,9 @@ METHOD HashJoin( nInnerWA, cInnerField, cOuterCol, xOnCond, aJoins, nIdx, xWhere
|
||||
FOR i := 1 TO Len( aMatches )
|
||||
dbSelectArea( nInnerWA )
|
||||
dbGoto( aMatches[ i ] )
|
||||
/* Hash key already matched — skip redundant ON re-evaluation for
|
||||
* simple equi-joins (SQLite: ephemeral table probe is sufficient). */
|
||||
IF lCompound .AND. ! SqlIsTrue( ::EvalExpr( xOnCond ) )
|
||||
LOOP
|
||||
ENDIF
|
||||
lHadMatch := .T.
|
||||
::JoinRecurse( aJoins, nIdx + 1, xWhere, aRE, @aRows, hHashTbl )
|
||||
NEXT
|
||||
|
||||
Reference in New Issue
Block a user