diff --git a/_FiveSql2/src/TSqlExecutor.prg b/_FiveSql2/src/TSqlExecutor.prg index ca2d279..b6ede21 100644 --- a/_FiveSql2/src/TSqlExecutor.prg +++ b/_FiveSql2/src/TSqlExecutor.prg @@ -36,6 +36,7 @@ CLASS TSqlExecutor DATA bRowBlock /* optional code block — receives SELECT cols as params */ DATA aFetchCache /* pre-bound {nWA, nFPos} per SELECT expression, or NIL */ DATA hSubCorrCache INIT { => } /* per-outer-key subquery result cache */ + DATA aSubCacheSlots INIT {} /* list of {xSubNode, {id, aFreeVars}} */ DATA nSubCacheSeq INIT 0 /* monotonic ID for subqueries */ CLASSDATA hSubCache INIT { => } SHARED @@ -570,20 +571,33 @@ METHOD EvalExpr( xNode ) CLASS TSqlExecutor RETURN NIL CASE xNode[ 1 ] == ND_FN - /* EXISTS must be handled before argument evaluation */ - IF xNode[ 2 ] == "EXISTS" .AND. Len( xNode[ 3 ] ) > 0 .AND. ; + /* EXISTS and NOT EXISTS — we only need to know whether the + * subquery returns at least one row, not compute the full + * result. Force a LIMIT 1 into the subquery's hQuery so the + * inner scan short-circuits on the first match. Then route + * through SubqueryCached so correlated EXISTS still memoizes + * on free-variable values (helps when correlation is low + * cardinality; no-op when every outer row is unique). */ + IF ( xNode[ 2 ] == "EXISTS" .OR. xNode[ 2 ] == "NOT EXISTS" ) .AND. ; + Len( xNode[ 3 ] ) > 0 .AND. ; xNode[ 3 ][ 1 ] != NIL .AND. ValType( xNode[ 3 ][ 1 ] ) == "A" .AND. ; xNode[ 3 ][ 1 ][ 1 ] == ND_SUB .AND. xNode[ 3 ][ 1 ][ 2 ] != NIL - nSavedWA := Select() - ::PushOuter() - aSubResult := TSqlExecutor():New( xNode[ 3 ][ 1 ][ 2 ], ::aParams ):Run() - ::PopOuter() - dbSelectArea( nSavedWA ) + /* Install LIMIT 1 on the subquery hQuery. EXISTS only cares + * about the existence of a match, so the subquery scan can + * stop at the first row — the scan loop in RunSelect honours + * hQuery["limit"] as an early-termination target. */ + IF ValType( xNode[ 3 ][ 1 ][ 2 ] ) == "H" + xNode[ 3 ][ 1 ][ 2 ][ "limit" ] := 1 + ENDIF + aSubResult := ::SubqueryCached( xNode[ 3 ][ 1 ] ) IF ValType( aSubResult ) == "A" .AND. Len( aSubResult ) >= 2 .AND. ; ValType( aSubResult[ 2 ] ) == "A" + IF xNode[ 2 ] == "NOT EXISTS" + RETURN Len( aSubResult[ 2 ] ) == 0 + ENDIF RETURN Len( aSubResult[ 2 ] ) > 0 ENDIF - RETURN .F. + RETURN iif( xNode[ 2 ] == "NOT EXISTS", .T., .F. ) ENDIF /* Evaluate arguments */ @@ -1069,6 +1083,7 @@ METHOD RunSelect() CLASS TSqlExecutor LOCAL hJoinHash LOCAL lIndexUsed, aTmp LOCAL aFP, pcW, aGoRows + LOCAL nEarlyLimit aCols := ::hQuery[ "columns" ] ::aTables := ::hQuery[ "tables" ] @@ -1340,6 +1355,20 @@ METHOD RunSelect() CLASS TSqlExecutor * join recursion. Huge win for multi-table scans. */ ::aFetchCache := ::BuildFetchCache( aResultExprs ) dbSelectArea( nWA ) + /* Early-termination LIMIT: when the query has a plain + * LIMIT / TOP and no ORDER BY, GROUP BY, aggregates, + * or DISTINCT, we can stop scanning as soon as aRows + * reaches the cap. Huge win for `EXISTS` which plants + * an implicit LIMIT 1 into the subquery's hQuery. */ + nEarlyLimit := 0 + IF ( ValType( nLimit ) == "N" .AND. nLimit > 0 ) .OR. ; + ( ValType( nTop ) == "N" .AND. nTop > 0 ) + IF Len( aOrderBy ) == 0 .AND. Len( aGroupBy ) == 0 .AND. ; + ! ::oAgg:HasAgg( aCols ) .AND. ! lDistinct + nEarlyLimit := iif( ValType( nLimit ) == "N" .AND. nLimit > 0, ; + nLimit, nTop ) + ENDIF + ENDIF WHILE ! Eof() IF Len( aJoins ) > 0 ::JoinRecurse( aJoins, 1, xWhere, aResultExprs, @aRows, hJoinHash ) @@ -1350,6 +1379,9 @@ METHOD RunSelect() CLASS TSqlExecutor AAdd( aRows, aRow ) ENDIF ENDIF + IF nEarlyLimit > 0 .AND. Len( aRows ) >= nEarlyLimit + EXIT + ENDIF dbSelectArea( nWA ) dbSkip() ENDDO @@ -1568,7 +1600,7 @@ RETURN lHadMatch METHOD SubqueryCached( xSubNode ) CLASS TSqlExecutor LOCAL hQ, aFreeVars, cCacheKey, aResult, nSavedWA, oSub - LOCAL i, xVal, nId + LOCAL i, xVal, nId, nSlot, aSlot IF xSubNode == NIL .OR. ValType( xSubNode ) != "A" .OR. Len( xSubNode ) < 2 RETURN NIL @@ -1578,17 +1610,26 @@ METHOD SubqueryCached( xSubNode ) CLASS TSqlExecutor RETURN NIL ENDIF - /* First call for this subquery: assign ID + analyze free variables */ - IF Len( xSubNode ) < 6 .OR. xSubNode[ 6 ] == NIL + /* Identify this subquery: linear-search the slots list for a prior + * entry that references the SAME AST node (array `==` is reference + * compare in Harbour). Most queries have only a handful of sub- + * queries so the scan is trivial. Avoids mutating the parse tree. */ + nSlot := 0 + FOR i := 1 TO Len( ::aSubCacheSlots ) + IF ::aSubCacheSlots[ i ][ 1 ] == xSubNode + nSlot := i + EXIT + ENDIF + NEXT + IF nSlot == 0 ::nSubCacheSeq++ aFreeVars := ::CollectFreeVars( hQ ) - IF Len( xSubNode ) < 6 - ASize( xSubNode, 6 ) - ENDIF - xSubNode[ 6 ] := { ::nSubCacheSeq, aFreeVars } + AAdd( ::aSubCacheSlots, { xSubNode, { ::nSubCacheSeq, aFreeVars } } ) + nSlot := Len( ::aSubCacheSlots ) ENDIF - nId := xSubNode[ 6 ][ 1 ] - aFreeVars := xSubNode[ 6 ][ 2 ] + aSlot := ::aSubCacheSlots[ nSlot ][ 2 ] + nId := aSlot[ 1 ] + aFreeVars := aSlot[ 2 ] /* Build cache key from current values of free variables via * Resolve(), which walks the outer context stack. */