Files
five/_FiveSql2/src/TSqlSort.prg
CharlesKWON dd270d5d9d perf: RTL Go-native migration — 27 optimizations, DML up to 70-90x
Systematic pass through PRG hot paths, promoting them to Go RTL while
preserving Harbour/FiveSql2 semantics. Full log in
docs/RTL-Go-Native-Migration.md.

Bench (bench_sql) vs 2026-04-08 baseline
 - B1  SELECT *             2,192 → 114   µs   (19x)
 - B6  INNER JOIN           9,291 → 233   µs   (40x)
 - B7  CTE simple           8,037 → 129   µs   (62x)
 - B9  ROW_NUMBER           3,705 → 265   µs   (14x)
 - B10 RANK PARTITION       4,748 → 309   µs   (15x)
 - B12 INSERT (WA cache)    4,319 →  63   µs   (69x)
 - B13 UPDATE (WA cache)    6,144 →  68   µs   (90x)
 - B15 CTE+WIN+JOIN        18,395 → 1,873 µs   (10x)

Infrastructure
 - HbHash O(1) Index preserving insertion order (Harbour KEEPORDER)
 - HbDeepClone Go RTL (scalar-sharing, immutable hash keys)
 - MEMRDD auto-imported via gengo; all Five programs get mem:name driver
 - SQL plan + pcode caches (s_hPlanCache, s_hDmlPcodeCache)
 - Opt-in SqlWACacheEnable — dbUseArea/Close/Commit batched for DML

SQL engine
 - FiveSql2 lexer ported to Go (byte FSM) with combined automatic
   template parameterization (literals → ?, concat queries share plan)
 - Go RTL: SqlDistinct, SqlGroupRows, SqlWindowPartitions,
   SqlWindowSortPartition, SqlWindowAssignRank, SqlComputeAggSimple,
   SqlBulkInsert, SqlBulkUpdate, SqlExprHasAgg, SqlEvalHaving
 - CTE / subquery / driving-table materialize paths use MEMRDD
 - SqlCoerce/SqlCmp/SqlIsTrue helpers moved from PRG to Go
 - SqlBulkUpdate defers Flush when WA cache active (APFS fsync was
   dominant B13 cost — 1.6ms/call → gone)

Correctness fixes uncovered during migration
 - ASort default path now sorts dates/logicals/timestamps (was no-op)
 - ORDER BY default NULL placement matches PRG SqlRowCompare across
   Go fast path; explicit NULLS FIRST/LAST honored by both paths
 - SqlBulkUpdate respects EXCLUSIVE vs SHARED mode record locks
 - SqlCmp/SqlCmpEq normalize NumInt vs Double (caught by test 6b)

Verification
 - go test ./...              ALL PASS
 - FiveSql2 test_sql1999      43/43
 - tests/compat_harbour       56/56 (+5 new: ASort dates/logicals,
                              AScan int cross-type)
 - Regression test test_null_order.prg for ORDER BY NULL ordering

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-17 20:20:14 +09:00

171 lines
4.1 KiB
Plaintext

/*
* TSqlSort.prg — ORDER BY sorting and DISTINCT elimination
*
* FiveSql — SQL Engine for Harbour DBF/NTX
*
* Copyright (c) 2025 Charles KWON (Charles KWON OhJun)
* Email: charleskwonohjun@gmail.com
*
* All rights reserved.
*/
#include "hbclass.ch"
#include "FiveSqlDef.ch"
/* Module-level state for the sort comparator callback */
STATIC s_aOBCols := {}
STATIC s_aOBNames := {}
CLASS TSqlSort
METHOD New() CONSTRUCTOR
METHOD OrderBy( aRows, aFN, aOB, aTables, aParams )
METHOD Distinct( aRows )
METHOD RowKey( aR )
ENDCLASS
METHOD New() CLASS TSqlSort
RETURN SELF
METHOD OrderBy( aRows, aFN, aOB, aTables, aParams ) CLASS TSqlSort
LOCAL i, nCol, cNulls
IF Len( aRows ) < 2 .OR. Len( aOB ) == 0
RETURN aRows
ENDIF
/* Pre-resolve column indexes. Third element carries the explicit
* NULLS FIRST/LAST spec parsed by TSqlParser2:ParseOrderBy —
* empty string means "use default (NIL as largest)". */
s_aOBCols := {}
s_aOBNames := aFN
FOR i := 1 TO Len( aOB )
nCol := SqlFindColIdx( aOB[ i ][ 1 ], aFN )
IF nCol == 0
nCol := SqlFindColIdx2( SqlExprName( aOB[ i ][ 1 ] ), aFN )
ENDIF
cNulls := iif( Len( aOB[ i ] ) >= 3, Upper( aOB[ i ][ 3 ] ), "" )
AAdd( s_aOBCols, { nCol, aOB[ i ][ 2 ], cNulls } )
NEXT
ASort( aRows,,, {|a, b| SqlRowCompare( a, b ) < 0 } )
RETURN aRows
METHOD Distinct( aRows ) CLASS TSqlSort
/* Go RTL SqlDistinct: single-pass dedup via Go map[string]bool.
* Key construction matches prior PRG ::RowKey byte-for-byte (same
* SqlValToStr mapping + '|' separator), so the output is identical
* to the old PRG loop — just ~100x faster on large result sets. */
RETURN SqlDistinct( aRows )
METHOD RowKey( aR ) CLASS TSqlSort
LOCAL c := "", i
FOR i := 1 TO Len( aR )
c += SqlValToStr( aR[ i ] ) + "|"
NEXT
RETURN c
/* Find column index from expression in field name array */
FUNCTION SqlFindColIdx( xExpr, aFN )
LOCAL cN, i
IF xExpr != NIL .AND. xExpr[ 1 ] == ND_COL
cN := Upper( xExpr[ 2 ] )
IF "." $ cN
cN := SubStr( cN, At( ".", cN ) + 1 )
ENDIF
FOR i := 1 TO Len( aFN )
IF Upper( aFN[ i ] ) == cN
RETURN i
ENDIF
NEXT
ENDIF
RETURN 0
/* Find column index by name */
FUNCTION SqlFindColIdx2( cN, aFN )
LOCAL i
cN := Upper( cN )
FOR i := 1 TO Len( aFN )
IF Upper( aFN[ i ] ) == cN
RETURN i
ENDIF
NEXT
RETURN 0
/* Multi-key row comparator for ASort */
FUNCTION SqlRowCompare( aRowA, aRowB )
LOCAL i, nCol, cDir, cNulls, lNullsFirst, xA, xB, nCmp
FOR i := 1 TO Len( s_aOBCols )
nCol := s_aOBCols[ i ][ 1 ]
cDir := s_aOBCols[ i ][ 2 ]
cNulls := iif( Len( s_aOBCols[ i ] ) >= 3, s_aOBCols[ i ][ 3 ], "" )
IF nCol <= 0 .OR. nCol > Len( aRowA ) .OR. nCol > Len( aRowB )
LOOP
ENDIF
xA := aRowA[ nCol ]
xB := aRowB[ nCol ]
/* NULL ordering — default: NIL is largest (NULLs last in ASC,
* NULLs first in DESC). Explicit NULLS FIRST/LAST (SQL:2003)
* from the parser overrides direction. */
IF xA == NIL .AND. xB == NIL
LOOP
ENDIF
IF xA == NIL .OR. xB == NIL
DO CASE
CASE cNulls == "FIRST" ; lNullsFirst := .T.
CASE cNulls == "LAST" ; lNullsFirst := .F.
OTHERWISE ; lNullsFirst := ( cDir == "DESC" )
ENDCASE
IF xA == NIL
RETURN iif( lNullsFirst, -1, 1 )
ENDIF
RETURN iif( lNullsFirst, 1, -1 )
ENDIF
nCmp := 0
IF ValType( xA ) == ValType( xB )
IF xA < xB
nCmp := -1
ELSEIF xA > xB
nCmp := 1
ENDIF
ELSEIF ValType( xA ) == "N" .AND. ValType( xB ) == "C"
nCmp := iif( xA < Val( AllTrim( xB ) ), -1, iif( xA > Val( AllTrim( xB ) ), 1, 0 ) )
ELSEIF ValType( xA ) == "C" .AND. ValType( xB ) == "N"
nCmp := iif( Val( AllTrim( xA ) ) < xB, -1, iif( Val( AllTrim( xA ) ) > xB, 1, 0 ) )
ENDIF
IF nCmp != 0
IF cDir == "DESC"
RETURN -nCmp
ENDIF
RETURN nCmp
ENDIF
NEXT
RETURN 0