From c869a08365024214bd061429905ec998ea60dfb0 Mon Sep 17 00:00:00 2001 From: CharlesKWON Date: Thu, 16 Apr 2026 23:09:07 +0900 Subject: [PATCH] =?UTF-8?q?fix(FiveSql2):=20last=203=20=E2=80=94=20RIGHT?= =?UTF-8?q?=20JOIN=20O(N),=20counter=20wrap,=20implicit=20alias?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- #15 RIGHT JOIN O(N*M) → O(N+M) via matched RecNo set --- --- #19 s_nRCJSeq modular counter (% 100000) --- --- #20 Implicit column alias without AS keyword --- Validation: 43/43 + 51/51 + go test ALL PASS Co-Authored-By: Claude Opus 4.6 (1M context) --- _FiveSql2/src/TSqlExecutor.prg | 65 ++++++++++++++++++++-------------- _FiveSql2/src/TSqlParser2.prg | 13 +++++++ 2 files changed, 52 insertions(+), 26 deletions(-) diff --git a/_FiveSql2/src/TSqlExecutor.prg b/_FiveSql2/src/TSqlExecutor.prg index 5527cad..8db3bab 100644 --- a/_FiveSql2/src/TSqlExecutor.prg +++ b/_FiveSql2/src/TSqlExecutor.prg @@ -39,6 +39,7 @@ CLASS TSqlExecutor DATA aSubCacheSlots INIT {} /* list of {xSubNode, {id, aFreeVars}} */ DATA nSubCacheSeq INIT 0 /* monotonic ID for subqueries */ DATA aSemiJoinSlots INIT {} /* list of {xSubNode, semiJoinData | "NO"} */ + DATA hRightMatched /* RecNo sets for RIGHT JOIN pass */ DATA hSubCache @@ -104,6 +105,7 @@ METHOD New( hQuery, aParams ) CLASS TSqlExecutor ::aSubCacheSlots := {} ::aSemiJoinSlots := {} ::nSubCacheSeq := 0 + ::hRightMatched := { => } RETURN SELF @@ -885,7 +887,7 @@ METHOD JoinRecurse( aJoins, nIdx, xWhere, aRE, aRows, hHashTbl ) CLASS TSqlExecu LOCAL lUseIndex, lFound, nPI LOCAL cJoinType, lHadMatch LOCAL nRecCount, lUseHash - LOCAL xProbe + LOCAL xProbe, cRMKey IF hHashTbl == NIL hHashTbl := { => } @@ -930,6 +932,8 @@ METHOD JoinRecurse( aJoins, nIdx, xWhere, aRE, aRows, hHashTbl ) CLASS TSqlExecu lHadMatch := .F. lUseIndex := .F. lUseHash := .F. + /* Track matched inner RecNos for RIGHT/FULL JOIN pass */ + cRMKey := "__RIGHT_" + Upper( cJAlias ) cOuterCol := "" cInnerCol := "" cInnerField := "" @@ -1015,6 +1019,11 @@ METHOD JoinRecurse( aJoins, nIdx, xWhere, aRE, aRows, hHashTbl ) CLASS TSqlExecu ENDIF IF lJoinMatch lHadMatch := .T. + /* Record match for RIGHT JOIN pass */ + IF ! hb_HHasKey( ::hRightMatched, cRMKey ) + ::hRightMatched[ cRMKey ] := { => } + ENDIF + ::hRightMatched[ cRMKey ][ RecNo() ] := .T. ::JoinRecurse( aJoins, nIdx + 1, xWhere, aRE, @aRows, hHashTbl ) ENDIF dbSelectArea( nWA ) @@ -1046,17 +1055,23 @@ METHOD JoinRecurse( aJoins, nIdx, xWhere, aRE, aRows, hHashTbl ) CLASS TSqlExecu RETURN NIL +/* RightJoinPass — emit inner rows that had no match during the main + * join pass (for RIGHT/FULL joins). Outer columns are NIL. + * + * Previous O(N*M) approach rescanned the outer table for every inner + * row to detect unmatched ones. Now uses ::hRightMatched (populated + * during the main join) as a RecNo set — O(N) inner scan + O(1) + * hash probe per row. + */ METHOD RightJoinPass( aJoins, nJIdx, aRE, aRows ) CLASS TSqlExecutor - LOCAL cJAlias, xOnCond, nWA, nOuterWA, cOuterAlias - LOCAL lMatched, aRow, j - LOCAL cColRef + LOCAL cJAlias, nWA, cOuterAlias + LOCAL aRow, j, cColRef, cMatchKey, nRec cJAlias := aJoins[ nJIdx ][ 3 ] IF Empty( cJAlias ) cJAlias := aJoins[ nJIdx ][ 2 ] ENDIF - xOnCond := aJoins[ nJIdx ][ 4 ] nWA := Select( cJAlias ) IF nWA == 0 @@ -1071,28 +1086,17 @@ METHOD RightJoinPass( aJoins, nJIdx, aRE, aRows ) CLASS TSqlExecutor ENDIF ENDIF - nOuterWA := Select( cOuterAlias ) - IF nOuterWA == 0 - RETURN NIL - ENDIF + cMatchKey := "__RIGHT_" + Upper( cJAlias ) dbSelectArea( nWA ) dbGoTop() WHILE ! Eof() - lMatched := .F. - dbSelectArea( nOuterWA ) - dbGoTop() - WHILE ! Eof() - IF xOnCond != NIL .AND. SqlIsTrue( ::EvalExpr( xOnCond ) ) - lMatched := .T. - EXIT - ENDIF - dbSelectArea( nOuterWA ) - dbSkip() - ENDDO - - IF ! lMatched - dbSelectArea( nWA ) + nRec := RecNo() + IF hb_HHasKey( ::hRightMatched, cMatchKey ) .AND. ; + hb_HHasKey( ::hRightMatched[ cMatchKey ], nRec ) + /* Matched during main join — skip */ + ELSE + /* Unmatched inner row — emit with NULLs for outer columns */ aRow := {} FOR j := 1 TO Len( aRE ) cColRef := "" @@ -1107,7 +1111,6 @@ METHOD RightJoinPass( aJoins, nJIdx, aRE, aRows ) CLASS TSqlExecutor NEXT AAdd( aRows, aRow ) ENDIF - dbSelectArea( nWA ) dbSkip() ENDDO @@ -1599,7 +1602,7 @@ METHOD HashJoin( nInnerWA, cInnerField, cOuterCol, xOnCond, aJoins, nIdx, xWhere LOCAL cHashKey, aMatches, xOuterVal, xInnerVal, cValKey LOCAL nFPos, nSavedRec, i, lHadMatch - LOCAL lCompound + LOCAL lCompound, cHJRMKey lHadMatch := .F. @@ -1633,6 +1636,8 @@ METHOD HashJoin( nInnerWA, cInnerField, cOuterCol, xOnCond, aJoins, nIdx, xWhere /* Base-case inline: if the next recursion would just be FetchRow, * skip the method-dispatch overhead and build the row directly. * 50k inner matches × Send() dispatch was the 3-way join bottleneck. */ + /* Track inner matched RecNos for RIGHT JOIN pass */ + cHJRMKey := "__RIGHT_" + Upper( Alias( nInnerWA ) ) IF nIdx + 1 > Len( aJoins ) FOR i := 1 TO Len( aMatches ) dbSelectArea( nInnerWA ) @@ -1641,6 +1646,10 @@ METHOD HashJoin( nInnerWA, cInnerField, cOuterCol, xOnCond, aJoins, nIdx, xWhere LOOP ENDIF lHadMatch := .T. + IF ! hb_HHasKey( ::hRightMatched, cHJRMKey ) + ::hRightMatched[ cHJRMKey ] := { => } + ENDIF + ::hRightMatched[ cHJRMKey ][ aMatches[ i ] ] := .T. IF xWhere == NIL .OR. SqlIsTrue( ::EvalExpr( xWhere ) ) AAdd( aRows, ::FetchRow( aRE ) ) ENDIF @@ -1653,6 +1662,10 @@ METHOD HashJoin( nInnerWA, cInnerField, cOuterCol, xOnCond, aJoins, nIdx, xWhere LOOP ENDIF lHadMatch := .T. + IF ! hb_HHasKey( ::hRightMatched, cHJRMKey ) + ::hRightMatched[ cHJRMKey ] := { => } + ENDIF + ::hRightMatched[ cHJRMKey ][ aMatches[ i ] ] := .T. ::JoinRecurse( aJoins, nIdx + 1, xWhere, aRE, @aRows, hHashTbl ) NEXT ENDIF @@ -3511,7 +3524,7 @@ STATIC FUNCTION RecCteJoin( hRecQuery, aFN, aPrevRows, cCteName ) cDbfFile := cDbfFile + ".dbf" ENDIF - s_nRCJSeq++ + s_nRCJSeq := ( s_nRCJSeq + 1 ) % 100000 cWAAlias := "RCJ_" + hb_ntos( s_nRCJSeq ) BEGIN SEQUENCE diff --git a/_FiveSql2/src/TSqlParser2.prg b/_FiveSql2/src/TSqlParser2.prg index 1177e77..d7153dc 100644 --- a/_FiveSql2/src/TSqlParser2.prg +++ b/_FiveSql2/src/TSqlParser2.prg @@ -777,6 +777,19 @@ METHOD ParseColumnList() CLASS TSqlParser2 ::nPos++ cAlias := ::TVal( ::nPos ) ::nPos++ + /* Implicit alias: `SELECT salary total, ...` where `total` is + * a non-keyword identifier followed by comma or clause keyword. + * SQL standard allows omitting AS for column aliases. */ + ELSEIF ::TType( ::nPos ) == TK_NAME .AND. ! ::IsFromKW( ::TVal( ::nPos ) ) .AND. ; + ! ::IsKW( ::nPos, "FROM" ) .AND. ! ::IsKW( ::nPos, "WHERE" ) .AND. ; + ! ::IsKW( ::nPos, "GROUP" ) .AND. ! ::IsKW( ::nPos, "ORDER" ) .AND. ; + ! ::IsKW( ::nPos, "HAVING" ) .AND. ! ::IsKW( ::nPos, "LIMIT" ) .AND. ; + ! ::IsKW( ::nPos, "UNION" ) .AND. ! ::IsKW( ::nPos, "INTERSECT" ) .AND. ; + ! ::IsKW( ::nPos, "EXCEPT" ) .AND. ! ::IsKW( ::nPos, "WINDOW" ) .AND. ; + ! ::IsKW( ::nPos, "OFFSET" ) .AND. ! ::IsKW( ::nPos, "FETCH" ) .AND. ; + ! ::IsKW( ::nPos, "INTO" ) .AND. ! ::IsKW( ::nPos, "FOR" ) + cAlias := ::TVal( ::nPos ) + ::nPos++ ENDIF AAdd( aCols, { xExpr, cAlias } ) IF ::TType( ::nPos ) == TK_COMMA