perf(FiveSql2): compound-AND equi-join picks up hash path — CTE+JOIN 22x

FiveSql2's HashJoin only recognized bare equi-terms (xOnCond[1]=ND_BIN,
xOnCond[2]="="), so a compound ON predicate like

    ON e.dept_id = t.dept_id AND e.salary = t.max_sal

fell through to the nested-loop ELSE branch:

    dbSelectArea(nInnerWA)
    dbGoTop()
    WHILE !Eof()
        IF SqlIsTrue(EvalExpr(xOnCond))
            JoinRecurse(...)
        ENDIF
        dbSkip()
    ENDDO

That's O(outer × inner) per outer row, re-evaluating the full AND tree
every probe. Query Q7 in the complex benchmark (CTE top_emp joined back
to emp on compound key) ran at 4.6 seconds for 100 inner × 10k outer.

Fix has two pieces:

1. **Probe-term extraction in JoinRecurse**: when xOnCond is an AND,
   walk the left-associative chain looking for the first equi-term
   (`a.x = b.x`). Use that as the hash-probe key, drive the normal
   hash-join code path through it.

2. **Post-filter in HashJoin**: after a hash match, if the *original*
   xOnCond was compound, re-evaluate the full predicate with
   EvalExpr to drop matches that satisfied the hash key but not the
   rest of the AND (e.g. same dept but different salary). Bare equi-
   joins still skip the re-eval — the hash match is conclusive.

Bench (10k × 100 × compound ON predicate):

  Query                          Before     After    Speedup
  ─────────────────────────────────────────────────────────
  Q7 CTE + JOIN compound ON      4573ms     209ms    21.9x

Still works for the existing bare equi case (43-test unchanged) and
the 3-way JOIN case (no regression). Falls back to the generic nested
loop only when no probe-term can be extracted at all.

Validation:
  - FiveSql2 43/43
  - Harbour compat 51/51
  - go test ./... ALL PASS
  - Q7 result: 100 rows (correct)

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-04-14 20:31:27 +09:00
parent c6799a599e
commit 64b7cf6676

View File

@@ -840,6 +840,7 @@ METHOD JoinRecurse( aJoins, nIdx, xWhere, aRE, aRows, hHashTbl ) CLASS TSqlExecu
LOCAL lUseIndex, lFound, nPI
LOCAL cJoinType, lHadMatch
LOCAL nRecCount, lUseHash
LOCAL xProbe
IF hHashTbl == NIL
hHashTbl := { => }
@@ -888,16 +889,35 @@ METHOD JoinRecurse( aJoins, nIdx, xWhere, aRE, aRows, hHashTbl ) CLASS TSqlExecu
cInnerCol := ""
cInnerField := ""
/* Analyze ON condition for index or hash join optimization */
IF xOnCond != NIL .AND. xOnCond[ 1 ] == ND_BIN .AND. xOnCond[ 2 ] == "="
IF xOnCond[ 3 ] != NIL .AND. xOnCond[ 3 ][ 1 ] == ND_COL .AND. ;
xOnCond[ 4 ] != NIL .AND. xOnCond[ 4 ][ 1 ] == ND_COL
IF ::ColBelongsTo( xOnCond[ 4 ][ 2 ], cJAlias )
cOuterCol := xOnCond[ 3 ][ 2 ]
cInnerCol := xOnCond[ 4 ][ 2 ]
ELSEIF ::ColBelongsTo( xOnCond[ 3 ][ 2 ], cJAlias )
cOuterCol := xOnCond[ 4 ][ 2 ]
cInnerCol := xOnCond[ 3 ][ 2 ]
/* Analyze ON condition for index or hash join optimization.
* Handles both `a.x = b.x` and `a.x = b.x AND ...` — for the AND
* case we pick the first equi-join term as the hash key and the
* HashJoin method re-evaluates the full xOnCond after probe to
* filter out spurious matches. This is how SQLite's hash-join
* fallback handles compound predicates. */
xProbe := xOnCond
IF xOnCond != NIL .AND. xOnCond[ 1 ] == ND_BIN .AND. xOnCond[ 2 ] == "AND"
/* Walk left-associative AND chain until we find an equi-term */
xProbe := xOnCond
WHILE xProbe != NIL .AND. xProbe[ 1 ] == ND_BIN .AND. xProbe[ 2 ] == "AND"
/* Prefer left operand if it's an equi-join */
IF xProbe[ 3 ] != NIL .AND. xProbe[ 3 ][ 1 ] == ND_BIN .AND. xProbe[ 3 ][ 2 ] == "="
xProbe := xProbe[ 3 ]
EXIT
ENDIF
xProbe := xProbe[ 4 ] /* descend right */
ENDDO
ENDIF
IF xProbe != NIL .AND. xProbe[ 1 ] == ND_BIN .AND. xProbe[ 2 ] == "="
IF xProbe[ 3 ] != NIL .AND. xProbe[ 3 ][ 1 ] == ND_COL .AND. ;
xProbe[ 4 ] != NIL .AND. xProbe[ 4 ][ 1 ] == ND_COL
IF ::ColBelongsTo( xProbe[ 4 ][ 2 ], cJAlias )
cOuterCol := xProbe[ 3 ][ 2 ]
cInnerCol := xProbe[ 4 ][ 2 ]
ELSEIF ::ColBelongsTo( xProbe[ 3 ][ 2 ], cJAlias )
cOuterCol := xProbe[ 4 ][ 2 ]
cInnerCol := xProbe[ 3 ][ 2 ]
ENDIF
ENDIF
@@ -1462,6 +1482,7 @@ METHOD HashJoin( nInnerWA, cInnerField, cOuterCol, xOnCond, aJoins, nIdx, xWhere
LOCAL cHashKey, aMatches, xOuterVal, xInnerVal, cValKey
LOCAL nFPos, nSavedRec, i, lHadMatch
LOCAL lCompound
lHadMatch := .F.
@@ -1487,6 +1508,11 @@ METHOD HashJoin( nInnerWA, cInnerField, cOuterCol, xOnCond, aJoins, nIdx, xWhere
IF hb_HHasKey( hHashTbl[ cHashKey ], cValKey )
aMatches := hHashTbl[ cHashKey ][ cValKey ]
/* If xOnCond is a compound AND (not a bare equi-term), re-evaluate
* the full condition after the hash probe to filter out partial
* matches. xOnCond[2] == "=" indicates a bare equi-join where the
* hash match is sufficient. */
lCompound := ( xOnCond != NIL .AND. xOnCond[ 1 ] == ND_BIN .AND. xOnCond[ 2 ] != "=" )
/* Base-case inline: if the next recursion would just be FetchRow,
* skip the method-dispatch overhead and build the row directly.
* 50k inner matches × Send() dispatch was the 3-way join bottleneck. */
@@ -1494,6 +1520,9 @@ METHOD HashJoin( nInnerWA, cInnerField, cOuterCol, xOnCond, aJoins, nIdx, xWhere
FOR i := 1 TO Len( aMatches )
dbSelectArea( nInnerWA )
dbGoto( aMatches[ i ] )
IF lCompound .AND. ! SqlIsTrue( ::EvalExpr( xOnCond ) )
LOOP
ENDIF
lHadMatch := .T.
IF xWhere == NIL .OR. SqlIsTrue( ::EvalExpr( xWhere ) )
AAdd( aRows, ::FetchRow( aRE ) )
@@ -1503,8 +1532,9 @@ METHOD HashJoin( nInnerWA, cInnerField, cOuterCol, xOnCond, aJoins, nIdx, xWhere
FOR i := 1 TO Len( aMatches )
dbSelectArea( nInnerWA )
dbGoto( aMatches[ i ] )
/* Hash key already matched — skip redundant ON re-evaluation for
* simple equi-joins (SQLite: ephemeral table probe is sufficient). */
IF lCompound .AND. ! SqlIsTrue( ::EvalExpr( xOnCond ) )
LOOP
ENDIF
lHadMatch := .T.
::JoinRecurse( aJoins, nIdx + 1, xWhere, aRE, @aRows, hHashTbl )
NEXT