perf: RTL Go-native migration — 27 optimizations, DML up to 70-90x

Systematic pass through PRG hot paths, promoting them to Go RTL while
preserving Harbour/FiveSql2 semantics. Full log in
docs/RTL-Go-Native-Migration.md.

Bench (bench_sql) vs 2026-04-08 baseline
 - B1  SELECT *             2,192 → 114   µs   (19x)
 - B6  INNER JOIN           9,291 → 233   µs   (40x)
 - B7  CTE simple           8,037 → 129   µs   (62x)
 - B9  ROW_NUMBER           3,705 → 265   µs   (14x)
 - B10 RANK PARTITION       4,748 → 309   µs   (15x)
 - B12 INSERT (WA cache)    4,319 →  63   µs   (69x)
 - B13 UPDATE (WA cache)    6,144 →  68   µs   (90x)
 - B15 CTE+WIN+JOIN        18,395 → 1,873 µs   (10x)

Infrastructure
 - HbHash O(1) Index preserving insertion order (Harbour KEEPORDER)
 - HbDeepClone Go RTL (scalar-sharing, immutable hash keys)
 - MEMRDD auto-imported via gengo; all Five programs get mem:name driver
 - SQL plan + pcode caches (s_hPlanCache, s_hDmlPcodeCache)
 - Opt-in SqlWACacheEnable — dbUseArea/Close/Commit batched for DML

SQL engine
 - FiveSql2 lexer ported to Go (byte FSM) with combined automatic
   template parameterization (literals → ?, concat queries share plan)
 - Go RTL: SqlDistinct, SqlGroupRows, SqlWindowPartitions,
   SqlWindowSortPartition, SqlWindowAssignRank, SqlComputeAggSimple,
   SqlBulkInsert, SqlBulkUpdate, SqlExprHasAgg, SqlEvalHaving
 - CTE / subquery / driving-table materialize paths use MEMRDD
 - SqlCoerce/SqlCmp/SqlIsTrue helpers moved from PRG to Go
 - SqlBulkUpdate defers Flush when WA cache active (APFS fsync was
   dominant B13 cost — 1.6ms/call → gone)

Correctness fixes uncovered during migration
 - ASort default path now sorts dates/logicals/timestamps (was no-op)
 - ORDER BY default NULL placement matches PRG SqlRowCompare across
   Go fast path; explicit NULLS FIRST/LAST honored by both paths
 - SqlBulkUpdate respects EXCLUSIVE vs SHARED mode record locks
 - SqlCmp/SqlCmpEq normalize NumInt vs Double (caught by test 6b)

Verification
 - go test ./...              ALL PASS
 - FiveSql2 test_sql1999      43/43
 - tests/compat_harbour       56/56 (+5 new: ASort dates/logicals,
                              AScan int cross-type)
 - Regression test test_null_order.prg for ORDER BY NULL ordering

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-04-17 20:20:14 +09:00
parent 3caadb23b9
commit dd270d5d9d
31 changed files with 4501 additions and 495 deletions

View File

@@ -14,6 +14,19 @@
#include "hbclass.ch"
#include "FiveSqlDef.ch"
/* Plan cache: cSQL → parsed hQuery.
*
* The FiveSql2 parser runs lex + Pratt-style AST build per call; for
* repeated identical SQL (typical in report / loop / benchmark workloads)
* this is pure overhead. We cache the pristine parse result keyed by
* the raw SQL text and hand every subsequent call a deep clone via
* HbDeepClone so in-place mutations (SqlFoldConst, aTables rewriting)
* during Run() never corrupt the cached tree.
*
* Cached entries live until process exit; distinct SQL text count is
* bounded by the caller's template set, so LRU is deferred. */
STATIC s_hPlanCache := { => }
CLASS TFiveSQL
DATA oLexer
@@ -40,20 +53,50 @@ RETURN SELF
METHOD Execute( cSQL, bBlock ) CLASS TFiveSQL
LOCAL aTokens, hQuery, aResult
LOCAL aLex, cKey, aParams
/* Parse — no caching (plan trees are mutated during execution) */
::oLexer := TSqlLexer():New( cSQL )
::oLexer:Tokenize()
aTokens := ::oLexer:GetTokens()
/* Fast path: no explicit aParams → single Go RTL lex+normalize call
* (SqlLexAndExtractTemplate). Returns {aTokens, cKey, aParams}; the
* tokens already have TK_TEXT/TK_NUM replaced with TK_QMARK, so
* TSqlParser2 sees the template shape and emits ND_PAR references
* against the extracted aParams. */
IF Empty( ::aParams )
aLex := SqlLexAndExtractTemplate( cSQL )
aTokens := aLex[ 1 ]
cKey := aLex[ 2 ]
aParams := aLex[ 3 ]
::oParser := TSqlParser2():New( aTokens, ::aParams )
hQuery := ::oParser:Parse()
IF hb_HHasKey( s_hPlanCache, cKey )
hQuery := HbDeepClone( s_hPlanCache[ cKey ] )
ELSE
::oParser := TSqlParser2():New( aTokens, aParams )
hQuery := ::oParser:Parse()
IF hQuery == NIL
RETURN { { "__error__" }, { { SQL_ERR_SYNTAX, "Failed to parse SQL", cSQL } } }
ENDIF
s_hPlanCache[ cKey ] := HbDeepClone( hQuery )
ENDIF
IF hQuery == NIL
RETURN { { "__error__" }, { { SQL_ERR_SYNTAX, "Failed to parse SQL", cSQL } } }
::oExec := TSqlExecutor():New( hQuery, aParams )
::oExec:cCacheKey := cKey
ELSE
/* Caller supplied explicit params — cache by raw SQL text. */
IF hb_HHasKey( s_hPlanCache, cSQL )
hQuery := HbDeepClone( s_hPlanCache[ cSQL ] )
ELSE
aTokens := SqlLexerTokenize( cSQL )
::oParser := TSqlParser2():New( aTokens, ::aParams )
hQuery := ::oParser:Parse()
IF hQuery == NIL
RETURN { { "__error__" }, { { SQL_ERR_SYNTAX, "Failed to parse SQL", cSQL } } }
ENDIF
s_hPlanCache[ cSQL ] := HbDeepClone( hQuery )
ENDIF
::oExec := TSqlExecutor():New( hQuery, ::aParams )
::oExec:cCacheKey := cSQL
ENDIF
::oExec := TSqlExecutor():New( hQuery, ::aParams )
::oExec:bRowBlock := bBlock
aResult := ::oExec:Run()

View File

@@ -48,12 +48,13 @@ RETURN .F.
METHOD GroupBy( aRows, aFN, aCols, aGroupBy, xHaving, aTables, aParams ) CLASS TSqlAgg
LOCAL hGroups := { => }
LOCAL i, j, cKey, aGroupRows, aResult := {}
LOCAL i, j, aGroupRows, aResult := {}
LOCAL aNewRow
LOCAL nGCol, cN, nCI, lPass
LOCAL aGroupIdx := {}
LOCAL aSets, aCurSet, nSet, hOmitIdx, aSubResult
LOCAL aGroupedRows
LOCAL aColInfo /* { lIsAgg, nCI } per SELECT column, pre-resolved */
/* Aggregate on empty set */
IF Len( aRows ) == 0 .AND. ::HasAgg( aCols )
@@ -109,37 +110,39 @@ METHOD GroupBy( aRows, aFN, aCols, aGroupBy, xHaving, aTables, aParams ) CLASS T
AAdd( aGroupIdx, nGCol )
NEXT
/* Grouping step — delegate to Go RTL SqlGroupRows to collapse
* N·M per-row boundary crossings (SqlValToStr / hb_HHasKey / AAdd)
* into a single call. Aggregates and HAVING stay in PRG because
* they touch too many expression kinds to port cleanly. */
IF Len( aGroupBy ) == 0 .AND. ::HasAgg( aCols )
hGroups[ "__ALL__" ] := aRows
aGroupedRows := { aRows }
ELSE
FOR i := 1 TO Len( aRows )
cKey := ""
FOR j := 1 TO Len( aGroupBy )
nGCol := aGroupIdx[ j ]
IF nGCol > 0 .AND. nGCol <= Len( aRows[ i ] )
cKey += SqlValToStr( aRows[ i ][ nGCol ] ) + "|"
ENDIF
NEXT
IF ! hb_HHasKey( hGroups, cKey )
hGroups[ cKey ] := {}
ENDIF
AAdd( hGroups[ cKey ], aRows[ i ] )
NEXT
aGroupedRows := SqlGroupRows( aRows, aGroupIdx )
ENDIF
/* Pre-resolve per SELECT column: aggregate flag + column index.
* Avoids SqlExprHasAgg + SqlExprName + FindColIdx2 per group. */
aColInfo := Array( Len( aCols ) )
FOR j := 1 TO Len( aCols )
IF SqlExprHasAgg( aCols[ j ][ 1 ] )
aColInfo[ j ] := { .T., 0 }
ELSE
cN := SqlExprName( aCols[ j ][ 1 ] )
nCI := ::FindColIdx2( cN, aFN )
aColInfo[ j ] := { .F., nCI }
ENDIF
NEXT
/* Compute aggregates for each group */
FOR EACH aGroupRows IN hb_HValues( hGroups )
aNewRow := {}
FOR EACH aGroupRows IN aGroupedRows
aNewRow := Array( Len( aCols ) )
FOR j := 1 TO Len( aCols )
IF SqlExprHasAgg( aCols[ j ][ 1 ] )
AAdd( aNewRow, ::ComputeAgg( aCols[ j ][ 1 ], aGroupRows, aFN ) )
IF aColInfo[ j ][ 1 ]
aNewRow[ j ] := ::ComputeAgg( aCols[ j ][ 1 ], aGroupRows, aFN )
ELSE
cN := SqlExprName( aCols[ j ][ 1 ] )
nCI := ::FindColIdx2( cN, aFN )
nCI := aColInfo[ j ][ 2 ]
IF nCI > 0 .AND. Len( aGroupRows ) > 0 .AND. nCI <= Len( aGroupRows[ 1 ] )
AAdd( aNewRow, aGroupRows[ 1 ][ nCI ] )
ELSE
AAdd( aNewRow, NIL )
aNewRow[ j ] := aGroupRows[ 1 ][ nCI ]
ENDIF
ENDIF
NEXT
@@ -418,6 +421,15 @@ METHOD ComputeAgg( xE, aGR, aFN ) CLASS TSqlAgg
RETURN 0
ENDIF
/* Fast path: plain column + common aggregate → Go RTL single-pass loop.
* Gate on column-ref argument + pre-resolved nCol > 0; complex args
* (CASE/BIN/UDF) still fall through to the PRG loop below. */
IF nCol > 0 .AND. xArg[ 1 ] == ND_COL .AND. ;
( cFunc == "COUNT" .OR. cFunc == "SUM" .OR. cFunc == "AVG" .OR. ;
cFunc == "MIN" .OR. cFunc == "MAX" )
RETURN SqlComputeAggSimple( aGR, nCol, cFunc )
ENDIF
FOR i := 1 TO Len( aGR )
IF nCol > 0 .AND. nCol <= Len( aGR[ i ] )
xVal := aGR[ i ][ nCol ]
@@ -479,7 +491,15 @@ RETURN 0
METHOD EvalHaving( xHaving, aNewRow, aCols, aGroupRows, aFN, aParams ) CLASS TSqlAgg
LOCAL xResult
LOCAL xResult, aGo
/* Fast path: Go-native tree walker. Returns {lOk, lPass}; falls back
* to PRG when it hits an unsupported node (subqueries, complex agg
* args, CASE expressions inside HAVING, etc.). */
aGo := SqlEvalHaving( xHaving, aNewRow, aCols, aGroupRows, aFN, aParams )
IF ValType( aGo ) == "A" .AND. Len( aGo ) == 2 .AND. aGo[ 1 ]
RETURN aGo[ 2 ]
ENDIF
xResult := ::EvalHavingExpr( xHaving, aNewRow, aCols, aGroupRows, aFN, aParams )

View File

@@ -19,6 +19,18 @@ STATIC s_aOuterStack := {}
STATIC s_hAutoInc := NIL
STATIC s_nRCJSeq := 0
/* Per-plan DML pcode cache. Keyed by the plan-cache key that TFiveSQL
* uses (template key or cSQL text); value is a hash:
* { "set_fpos" => aFPos,
* "set_pc" => aValuePc, — parallel to set_fpos
* "where_pc" => pcWhere | NIL,
* "compiled" => .T. }
* RunUpdate populates on first hit, subsequent calls reuse. Compiled
* pcode depends on the target table's field layout; since the plan
* cache key already uniquely identifies the SQL template (same schema
* every call), the cache is sound. */
STATIC s_hDmlPcodeCache := { => }
CLASS TSqlExecutor
DATA hQuery
@@ -35,6 +47,7 @@ CLASS TSqlExecutor
DATA aCompileStruct
DATA bRowBlock /* optional code block — receives SELECT cols as params */
DATA aFetchCache /* pre-bound {nWA, nFPos} per SELECT expression, or NIL */
DATA cCacheKey /* plan-cache key set by TFiveSQL; used for DML pcode cache */
DATA hSubCorrCache INIT { => } /* per-outer-key subquery result cache */
DATA aSubCacheSlots INIT {} /* list of {xSubNode, {id, aFreeVars}} */
DATA nSubCacheSeq INIT 0 /* monotonic ID for subqueries */
@@ -1217,10 +1230,24 @@ METHOD RunSelect() CLASS TSqlExecutor
IF nWA == 0
nWA := ::OpenTable( cTable, cAlias )
IF nWA == 0
/* Table file not found; check if a CTE temp file exists for this
* table name and open it instead. This handles sub-executors
* (UNION, recursive) that reference a CTE by its original name. */
IF hb_FileExists( "__cte_" + Lower( cTable ) + ".dbf" )
/* Table file not found; check if a CTE temp table exists for
* this table name and open it instead. This handles sub-
* executors (UNION, recursive) that reference a CTE by its
* original name. CTE temp tables now live in MEMRDD (no
* file on disk) — fall back to the legacy DBFNTX open for
* pre-existing .dbf files from prior runs. */
BEGIN SEQUENCE
dbUseArea( .T., "MEMRDD", "mem:__cte_" + Lower( cTable ), ;
cAlias, .T., .T. )
nWA := Select( cAlias )
IF nWA > 0
AAdd( ::aOpened, cAlias )
AAdd( ::oAlias:aSlots, { cAlias, Upper( cTable ), Upper( cTable ), .T. } )
ENDIF
RECOVER
nWA := 0
END SEQUENCE
IF nWA == 0 .AND. hb_FileExists( "__cte_" + Lower( cTable ) + ".dbf" )
BEGIN SEQUENCE
dbUseArea( .T., "DBFNTX", "__cte_" + Lower( cTable ) + ".dbf", ;
cAlias, .T., .T. )
@@ -1418,9 +1445,34 @@ METHOD RunSelect() CLASS TSqlExecutor
aGoRows := NIL
IF Len( aJoins ) == 0 .AND. Len( aGroupBy ) == 0 .AND. ;
! ::oAgg:HasAgg( aCols )
aFP := ::TryBuildFieldPositions( aResultExprs )
/* Plan pcode cache: cache aFP + pcW per cCacheKey.
* These results are pure functions of the plan tree
* (which is immutable between cache hits) and the
* target table schema (stable for the process). */
LOCAL hSelCached, cSelKey
IF ! Empty( ::cCacheKey )
cSelKey := ::cCacheKey + "#sel"
IF hb_HHasKey( s_hDmlPcodeCache, cSelKey )
hSelCached := s_hDmlPcodeCache[ cSelKey ]
aFP := hSelCached[ "fp" ]
pcW := hSelCached[ "where_pc" ]
ENDIF
ENDIF
IF aFP == NIL
aFP := ::TryBuildFieldPositions( aResultExprs )
IF aFP != NIL .AND. xWhere != NIL
pcW := ::TryCompileWhere( xWhere )
IF pcW == NIL
aFP := NIL /* WHERE couldn't compile — PRG path */
ENDIF
ENDIF
IF aFP != NIL .AND. ! Empty( ::cCacheKey )
s_hDmlPcodeCache[ ::cCacheKey + "#sel" ] := { ;
"fp" => aFP, ;
"where_pc" => pcW }
ENDIF
ENDIF
IF aFP != NIL
pcW := ::TryCompileWhere( xWhere )
IF xWhere == NIL .OR. pcW != NIL
IF ::bRowBlock != NIL
/* Block mode: stream rows through user block.
@@ -2297,29 +2349,22 @@ METHOD MaterializeCTE( aCTE ) CLASS TSqlExecutor
dbSelectArea( nExistWA )
dbCloseArea()
ENDIF
IF hb_FileExists( cTmpFile + ".dbf" )
FErase( cTmpFile + ".dbf" )
ENDIF
/* In-memory temp table — no file I/O, `mem:` scheme dispatches
* to MEMRDD. Create overwrites any prior table with this name. */
BEGIN SEQUENCE
dbCreate( cTmpFile + ".dbf", aStruct )
dbCreate( "mem:" + cTmpFile, aStruct, "MEMRDD" )
RECOVER
LOOP
END SEQUENCE
USE ( cTmpFile + ".dbf" ) NEW EXCLUSIVE ALIAS ( cPopAlias )
FOR j := 1 TO Len( aDataRows )
dbAppend()
FOR k := 1 TO Min( Len( aStruct ), Len( aDataRows[ j ] ) )
IF aDataRows[ j ][ k ] != NIL
FieldPut( k, aDataRows[ j ][ k ] )
ENDIF
NEXT
NEXT
dbCommit()
dbUseArea( .T., "MEMRDD", "mem:" + cTmpFile, cPopAlias, .F., .F. )
/* Go RTL SqlBulkInsert: collapses per-row dbAppend+FieldPut loop
* into a single RTL call — N·M boundary crossings → 1. */
SqlBulkInsert( aDataRows )
dbSelectArea( Select( cPopAlias ) )
dbCloseArea()
USE ( cTmpFile + ".dbf" ) NEW SHARED ALIAS ( cName )
dbUseArea( .T., "MEMRDD", "mem:" + cTmpFile, cName, .T., .F. )
/* Replace existing table entry */
lReplaced := .F.
@@ -2340,16 +2385,7 @@ METHOD RunInsert() CLASS TSqlExecutor
aAutoInc := SqlGetAutoIncFields( cTable )
nWA := Select( cAlias )
IF nWA == 0
BEGIN SEQUENCE
dbUseArea( .T., "DBFNTX", Lower( cTable ) + ".dbf", cAlias, .F., .F. )
RECOVER
dbUseArea( .T., "DBFNTX", cTable + ".dbf", cAlias, .F., .F. )
END SEQUENCE
ELSE
dbSelectArea( nWA )
ENDIF
nWA := SqlExecOpenTable( cTable, cAlias )
/* Transaction logging */
::oTxn:LogRecord( cAlias, RecNo(), "INSERT" )
@@ -2410,12 +2446,16 @@ METHOD RunInsert() CLASS TSqlExecutor
NEXT
ENDIF
dbCommit()
IF nWA == 0
dbCloseArea()
/* Commit per INSERT when the WA cache is off (legacy durability
* guarantee). With the cache on, the caller batches via an
* explicit SqlWACacheDisable+dbCloseAll at shutdown — skipping
* the per-INSERT flush collapses the dominant I/O cost. */
IF ! SqlWACacheIsEnabled()
dbCommit()
ENDIF
SqlExecCloseTable( cAlias, nWA )
RETURN { { "affected_rows" }, { { 1 } } }
@@ -2423,6 +2463,7 @@ METHOD RunUpdate() CLASS TSqlExecutor
LOCAL cTable, aSet, xWhere, cAlias, nWA, i, nFPos, xVal
LOCAL nAffected := 0
LOCAL aFPos, aValuePc, pcWhere, lAllOk, cValSrc
cTable := ::hQuery[ "table" ]
aSet := ::hQuery[ "set" ]
@@ -2430,17 +2471,86 @@ METHOD RunUpdate() CLASS TSqlExecutor
cAlias := cTable
::aTables := { { cTable, cAlias, "" } }
nWA := Select( cAlias )
IF nWA == 0
BEGIN SEQUENCE
dbUseArea( .T., "DBFNTX", Lower( cTable ) + ".dbf", cAlias, .F., .F. )
RECOVER
dbUseArea( .T., "DBFNTX", cTable + ".dbf", cAlias, .F., .F. )
END SEQUENCE
ELSE
dbSelectArea( nWA )
nWA := SqlExecOpenTable( cTable, cAlias )
/* Fast path: compile WHERE + every SET value to pcode and delegate
* to Go RTL SqlBulkUpdate — skips per-record Go↔PRG boundary.
* Conditions: no active transaction (txn log records can't be
* emitted from inside the Go loop), no subquery / CASE / other
* nodes that PcCompile can't handle (try/fail pattern).
*
* Per-plan cache: when cCacheKey is set (TFiveSQL supplies it for
* plan-cached queries), we stash the compiled pcode under that key
* so subsequent identical UPDATEs skip the SqlExprToPrg + PcCompile
* walk entirely. The cached pcode is valid as long as the plan
* itself lives in the plan cache — which is forever in-process. */
IF ! ::oTxn:IsActive()
LOCAL hPcCached
IF ! Empty( ::cCacheKey ) .AND. hb_HHasKey( s_hDmlPcodeCache, ::cCacheKey )
hPcCached := s_hDmlPcodeCache[ ::cCacheKey ]
nAffected := SqlBulkUpdate( hPcCached[ "set_fpos" ], ;
hPcCached[ "where_pc" ], ;
hPcCached[ "set_pc" ] )
IF ! SqlWACacheIsEnabled()
dbCommit()
ENDIF
SqlExecCloseTable( cAlias, nWA )
RETURN { { "affected_rows" }, { { nAffected } } }
ENDIF
aFPos := {}
aValuePc := {}
lAllOk := .T.
FOR i := 1 TO Len( aSet )
nFPos := FieldPos( aSet[ i ][ 1 ] )
IF nFPos <= 0
lAllOk := .F.
EXIT
ENDIF
cValSrc := ::SqlExprToPrg( aSet[ i ][ 2 ] )
IF cValSrc == NIL
lAllOk := .F.
EXIT
ENDIF
AAdd( aFPos, nFPos )
AAdd( aValuePc, PcCompile( cValSrc ) )
IF ATail( aValuePc ) == NIL
lAllOk := .F.
EXIT
ENDIF
NEXT
pcWhere := NIL
IF lAllOk .AND. xWhere != NIL
cValSrc := ::SqlExprToPrg( xWhere )
IF cValSrc == NIL
lAllOk := .F.
ELSE
pcWhere := PcCompile( cValSrc )
IF pcWhere == NIL
lAllOk := .F.
ENDIF
ENDIF
ENDIF
IF lAllOk
nAffected := SqlBulkUpdate( aFPos, pcWhere, aValuePc )
/* Populate the per-plan cache for subsequent calls. */
IF ! Empty( ::cCacheKey )
s_hDmlPcodeCache[ ::cCacheKey ] := { ;
"set_fpos" => aFPos, ;
"set_pc" => aValuePc, ;
"where_pc" => pcWhere }
ENDIF
/* Defer commit under WA cache — batched at Disable/exit. */
IF ! SqlWACacheIsEnabled()
dbCommit()
ENDIF
SqlExecCloseTable( cAlias, nWA )
RETURN { { "affected_rows" }, { { nAffected } } }
ENDIF
ENDIF
/* Fallback: PRG scan loop — handles txn logging + non-compilable
* expressions (subquery, complex CASE, UDF in value or WHERE). */
dbGoTop()
WHILE ! Eof()
IF xWhere == NIL .OR. SqlIsTrue( ::EvalExpr( xWhere ) )
@@ -2459,12 +2569,12 @@ METHOD RunUpdate() CLASS TSqlExecutor
ENDIF
dbSkip()
ENDDO
dbCommit()
IF nWA == 0
dbCloseArea()
IF ! SqlWACacheIsEnabled()
dbCommit()
ENDIF
SqlExecCloseTable( cAlias, nWA )
RETURN { { "affected_rows" }, { { nAffected } } }
@@ -2478,16 +2588,7 @@ METHOD RunDelete() CLASS TSqlExecutor
cAlias := cTable
::aTables := { { cTable, cAlias, "" } }
nWA := Select( cAlias )
IF nWA == 0
BEGIN SEQUENCE
dbUseArea( .T., "DBFNTX", Lower( cTable ) + ".dbf", cAlias, .F., .F. )
RECOVER
dbUseArea( .T., "DBFNTX", cTable + ".dbf", cAlias, .F., .F. )
END SEQUENCE
ELSE
dbSelectArea( nWA )
ENDIF
nWA := SqlExecOpenTable( cTable, cAlias )
SET DELETED ON
dbGoTop()
@@ -2501,13 +2602,84 @@ METHOD RunDelete() CLASS TSqlExecutor
ENDIF
dbSkip()
ENDDO
dbCommit()
IF ! SqlWACacheIsEnabled()
dbCommit()
ENDIF
IF nWA == 0
SqlExecCloseTable( cAlias, nWA )
RETURN { { "affected_rows" }, { { nAffected } } }
/* ======================================================================
* Workarea open/close helpers — consult the Go-native WA cache.
* When the cache is enabled (SqlWACacheEnable), SqlExecOpenTable
* reuses a previously opened workarea instead of running dbUseArea
* every call. SqlExecCloseTable leaves cached entries alive; plain
* (auto-opened, not cached) areas still close as before so tests
* that rely on immediate file release (FErase, UNIQUE index rebuild)
* stay correct when the cache is off — which is the default.
* ====================================================================== */
FUNCTION SqlExecOpenTable( cTable, cAlias )
LOCAL nWA, nCached
nWA := Select( cAlias )
IF nWA > 0
dbSelectArea( nWA )
RETURN nWA
ENDIF
/* Cache hit: the previously stored WA must still be valid and bound
* to the same alias. If a manual close or CLOSE ALL ran behind our
* back, Select() will now report 0 — fall through to fresh open. */
nCached := SqlWACacheGet( cAlias )
IF nCached > 0 .AND. Select( cAlias ) == nCached
dbSelectArea( nCached )
RETURN nCached
ENDIF
IF nCached > 0
SqlWACacheInvalidate( cAlias )
ENDIF
/* Open fresh. Two-step fallback mirrors the prior inline logic so
* callers using mixed-case filenames on case-sensitive filesystems
* still succeed. */
BEGIN SEQUENCE
dbUseArea( .T., "DBFNTX", Lower( cTable ) + ".dbf", cAlias, .F., .F. )
RECOVER
dbUseArea( .T., "DBFNTX", cTable + ".dbf", cAlias, .F., .F. )
END SEQUENCE
nWA := Select( cAlias )
/* Register for reuse. The cache layer is a no-op when disabled, so
* an unconditional Put keeps the caller branch-free. */
IF nWA > 0 .AND. SqlWACacheIsEnabled()
SqlWACachePut( cAlias, nWA )
/* Return 1 sentinel so callers' "if nWA==0 close" gates skip
* — the cache owns the lifecycle now. */
RETURN nWA
ENDIF
RETURN 0 /* caller must close — matches legacy semantics */
FUNCTION SqlExecCloseTable( cAlias, nWA )
/* Only close if THIS call opened it AND the cache didn't adopt it.
* When nWA > 0, the caller either reused a pre-existing area or
* handed ownership to the cache, so we leave it alone. */
IF nWA == 0 .AND. ! SqlWACacheIsEnabled()
dbCloseArea()
ELSEIF nWA == 0 .AND. SqlWACacheIsEnabled() .AND. ;
SqlWACacheGet( cAlias ) == 0
/* Cache enabled but the alias wasn't registered (e.g., open
* failed between Put checks). Keep legacy behavior — close. */
dbCloseArea()
ENDIF
RETURN { { "affected_rows" }, { { nAffected } } }
RETURN NIL
/* ======================================================================
@@ -2626,17 +2798,11 @@ FUNCTION SqlMaterializeSubquery( xSubQ, cAlias, aParams )
NEXT
cTmpFile := "__drv_" + Lower( cAlias )
dbCreate( cTmpFile + ".dbf", aStruct )
USE ( cTmpFile + ".dbf" ) NEW EXCLUSIVE ALIAS __DRVTMP
FOR i := 1 TO Len( aRows2 )
dbAppend()
FOR j := 1 TO Min( Len( aStruct ), Len( aRows2[ i ] ) )
IF aRows2[ i ][ j ] != NIL
FieldPut( j, aRows2[ i ][ j ] )
ENDIF
NEXT
NEXT
dbCommit()
/* MEMRDD in-memory temp — avoids dbCreate + FErase disk syscalls. */
dbCreate( "mem:" + cTmpFile, aStruct, "MEMRDD" )
dbUseArea( .T., "MEMRDD", "mem:" + cTmpFile, "__DRVTMP", .F., .F. )
/* Go RTL SqlBulkInsert — subquery driving-table materialization. */
SqlBulkInsert( aRows2 )
CLOSE __DRVTMP
RETURN { cTmpFile, cAlias, "" }
@@ -2922,26 +3088,16 @@ METHOD MaterializeRecursiveCTE( aCTE ) CLASS TSqlExecutor
dbSelectArea( nExistWA )
dbCloseArea()
ENDIF
IF hb_FileExists( cTmpFile + ".dbf" )
FErase( cTmpFile + ".dbf" )
ENDIF
/* MEMRDD in-memory temp for CTE — no file create/delete. */
BEGIN SEQUENCE
dbCreate( cTmpFile + ".dbf", aStruct )
dbCreate( "mem:" + cTmpFile, aStruct, "MEMRDD" )
RECOVER
END SEQUENCE
BEGIN SEQUENCE
USE ( cTmpFile + ".dbf" ) NEW ALIAS ( cAlias )
FOR j := 1 TO Len( aDataRows )
dbAppend()
FOR k := 1 TO Min( Len( aStruct ), Len( aDataRows[ j ] ) )
IF aDataRows[ j ][ k ] != NIL
FieldPut( k, aDataRows[ j ][ k ] )
ENDIF
NEXT
NEXT
dbCommit()
dbUseArea( .T., "MEMRDD", "mem:" + cTmpFile, cAlias, .F., .F. )
/* Go RTL SqlBulkInsert — CTE materialization path. */
SqlBulkInsert( aDataRows )
RECOVER
END SEQUENCE
@@ -2973,7 +3129,7 @@ METHOD ApplyWindowFunctions( aRows, aFN, aCols ) CLASS TSqlExecutor
LOCAL i, j, k, nColIdx, xExpr
LOCAL cFunc, aPartBy, aOrdBy, aFuncArgs
LOCAL hPartitions, cPartKey, aPartIdx
LOCAL aPartitions, cPartKey, aPartIdx
LOCAL aSorted, aIdxMap, nPartCol
LOCAL nRank, nDenseRank, nRowNum
LOCAL xPrev, xCurr, nTies
@@ -2981,6 +3137,7 @@ METHOD ApplyWindowFunctions( aRows, aFN, aCols ) CLASS TSqlExecutor
LOCAL nRunSum, nRunCount
LOCAL aWinCols, nWC
LOCAL hFrame, nFS, nFE, m, xVal, xMin, xMax, lDefaultFrame
LOCAL aPartColIdx, aSortSpec, nOrdCol
/* Scan for window function columns */
aWinCols := {}
@@ -3008,69 +3165,55 @@ METHOD ApplyWindowFunctions( aRows, aFN, aCols ) CLASS TSqlExecutor
hFrame := xExpr[ 6 ]
ENDIF
/* Build partition groups as arrays of row indices */
hPartitions := { => }
FOR i := 1 TO Len( aRows )
cPartKey := ""
IF ValType( aPartBy ) == "A"
FOR j := 1 TO Len( aPartBy )
nPartCol := SqlFindColIdx( aPartBy[ j ], aFN )
IF nPartCol == 0
nPartCol := SqlFindColIdx2( SqlExprName( aPartBy[ j ] ), aFN )
ENDIF
IF nPartCol > 0 .AND. nPartCol <= Len( aRows[ i ] )
cPartKey += SqlValToStr( aRows[ i ][ nPartCol ] ) + "|"
ENDIF
NEXT
ENDIF
IF ! hb_HHasKey( hPartitions, cPartKey )
hPartitions[ cPartKey ] := {}
ENDIF
AAdd( hPartitions[ cPartKey ], i )
NEXT
/* Resolve PARTITION BY columns once, then delegate the row-index
* grouping to Go RTL SqlWindowPartitions — removes N·M per-row
* Go↔PRG boundary crossings for SqlValToStr / hb_HHasKey / AAdd. */
aPartColIdx := {}
IF ValType( aPartBy ) == "A"
FOR j := 1 TO Len( aPartBy )
nPartCol := SqlFindColIdx( aPartBy[ j ], aFN )
IF nPartCol == 0
nPartCol := SqlFindColIdx2( SqlExprName( aPartBy[ j ] ), aFN )
ENDIF
IF nPartCol > 0
AAdd( aPartColIdx, nPartCol )
ENDIF
NEXT
ENDIF
aPartitions := SqlWindowPartitions( aRows, aPartColIdx )
/* Pre-resolve ORDER BY column indices once per window column —
* Go SqlWindowSortPartition reads the resolved {nCol, lDesc}
* pairs directly, so every partition sort avoids the repeated
* SqlFindColIdx linear scan inside per-comparison PRG blocks. */
aSortSpec := {}
IF ValType( aOrdBy ) == "A" .AND. Len( aOrdBy ) > 0
FOR j := 1 TO Len( aOrdBy )
nOrdCol := SqlFindColIdx( aOrdBy[ j ][ 1 ], aFN )
IF nOrdCol == 0
nOrdCol := SqlFindColIdx2( SqlExprName( aOrdBy[ j ][ 1 ] ), aFN )
ENDIF
IF nOrdCol > 0
AAdd( aSortSpec, { nOrdCol, aOrdBy[ j ][ 2 ] == "DESC" } )
ENDIF
NEXT
ENDIF
/* Process each partition */
FOR EACH aPartIdx IN hb_HValues( hPartitions )
FOR EACH aPartIdx IN aPartitions
/* Sort partition indices by ORDER BY columns */
IF ValType( aOrdBy ) == "A" .AND. Len( aOrdBy ) > 0
ASort( aPartIdx,,, {|a, b| SqlWinRowCmp( aRows, a, b, aOrdBy, aFN ) < 0 } )
/* Sort partition indices by ORDER BY columns (Go RTL). */
IF Len( aSortSpec ) > 0
SqlWindowSortPartition( aRows, aPartIdx, aSortSpec )
ENDIF
/* Compute window function for each row in the partition */
/* Compute window function for each row in the partition.
* ROW_NUMBER/RANK/DENSE_RANK all go through one Go RTL call
* that walks the partition and writes the rank column —
* removes per-row SqlWinRowsEqual + PRG indexing overhead. */
DO CASE
CASE cFunc == "ROW_NUMBER"
FOR k := 1 TO Len( aPartIdx )
IF nColIdx <= Len( aRows[ aPartIdx[ k ] ] )
aRows[ aPartIdx[ k ] ][ nColIdx ] := k
ENDIF
NEXT
CASE cFunc == "RANK"
nRank := 1
FOR k := 1 TO Len( aPartIdx )
IF k > 1
IF ! SqlWinRowsEqual( aRows, aPartIdx[ k ], aPartIdx[ k - 1 ], aOrdBy, aFN )
nRank := k
ENDIF
ENDIF
IF nColIdx <= Len( aRows[ aPartIdx[ k ] ] )
aRows[ aPartIdx[ k ] ][ nColIdx ] := nRank
ENDIF
NEXT
CASE cFunc == "DENSE_RANK"
nDenseRank := 1
FOR k := 1 TO Len( aPartIdx )
IF k > 1
IF ! SqlWinRowsEqual( aRows, aPartIdx[ k ], aPartIdx[ k - 1 ], aOrdBy, aFN )
nDenseRank++
ENDIF
ENDIF
IF nColIdx <= Len( aRows[ aPartIdx[ k ] ] )
aRows[ aPartIdx[ k ] ][ nColIdx ] := nDenseRank
ENDIF
NEXT
CASE cFunc == "ROW_NUMBER" .OR. cFunc == "RANK" .OR. cFunc == "DENSE_RANK"
SqlWindowAssignRank( aRows, aPartIdx, aSortSpec, nColIdx, cFunc )
CASE cFunc == "LAG"
nLagLead := 1
@@ -3817,11 +3960,12 @@ RETURN aResult
* simple column index (complex expressions → PRG fallback). */
METHOD TryBuildSortSpec( aOrderBy, aFieldNames ) CLASS TSqlExecutor
LOCAL aSpec := {}, i, j, xE, cName, nCol, cDir, nDot
LOCAL aSpec := {}, i, j, xE, cName, nCol, cDir, cNulls, nDot
FOR i := 1 TO Len( aOrderBy )
xE := aOrderBy[ i ][ 1 ]
cDir := Upper( aOrderBy[ i ][ 2 ] )
cNulls := iif( Len( aOrderBy[ i ] ) >= 3, Upper( aOrderBy[ i ][ 3 ] ), "" )
IF xE == NIL .OR. xE[ 1 ] != ND_COL
RETURN NIL
ENDIF
@@ -3843,7 +3987,10 @@ METHOD TryBuildSortSpec( aOrderBy, aFieldNames ) CLASS TSqlExecutor
IF nCol == 0
RETURN NIL
ENDIF
AAdd( aSpec, { nCol, cDir == "DESC" } )
/* Go SqlOrderBy reads {nCol, lDesc, cNulls}. cNulls empty means
* "default" — NIL sorts as the largest value (NULLs last in ASC,
* NULLs first in DESC). Explicit "FIRST"/"LAST" overrides. */
AAdd( aSpec, { nCol, cDir == "DESC", cNulls } )
NEXT
RETURN aSpec

View File

@@ -42,45 +42,10 @@ FUNCTION SqlExprName( xE )
RETURN "expr"
/* Check whether an expression tree contains an aggregate function call.
* Recurses into ND_BIN, ND_UNI, ND_FN args, ND_CASE to find nested
* aggregates like `salary + COUNT(*)` or `CASE WHEN ... THEN SUM(x)`. */
FUNCTION SqlExprHasAgg( xE )
LOCAL i
IF xE == NIL
RETURN .F.
ENDIF
IF xE[ 1 ] == ND_FN .AND. SqlIsAggName( xE[ 2 ] )
RETURN .T.
ENDIF
/* Recurse into sub-expressions */
IF xE[ 1 ] == ND_BIN
RETURN SqlExprHasAgg( xE[ 3 ] ) .OR. SqlExprHasAgg( xE[ 4 ] )
ENDIF
IF xE[ 1 ] == ND_UNI
RETURN SqlExprHasAgg( xE[ 3 ] )
ENDIF
IF xE[ 1 ] == ND_FN .AND. ValType( xE[ 3 ] ) == "A"
FOR i := 1 TO Len( xE[ 3 ] )
IF SqlExprHasAgg( xE[ 3 ][ i ] )
RETURN .T.
ENDIF
NEXT
ENDIF
IF xE[ 1 ] == ND_CASE .AND. ValType( xE[ 2 ] ) == "A"
FOR i := 1 TO Len( xE[ 2 ] )
IF SqlExprHasAgg( xE[ 2 ][ i ][ 1 ] ) .OR. SqlExprHasAgg( xE[ 2 ][ i ][ 2 ] )
RETURN .T.
ENDIF
NEXT
IF xE[ 3 ] != NIL .AND. SqlExprHasAgg( xE[ 3 ] )
RETURN .T.
ENDIF
ENDIF
RETURN .F.
/* SqlExprHasAgg is implemented in Go (hbrtl/sqlexpr.go) — registered
* as SQLEXPRHASAGG. The prior PRG recursive walker has been removed
* to avoid a name collision with the RTL symbol; behavior is
* byte-for-byte identical. See docs/RTL-Go-Native-Migration.md. */
/* Return .T. if the function name is an aggregate */
FUNCTION SqlIsAggName( c )

View File

@@ -329,121 +329,11 @@ FUNCTION SqlArg( a, n )
RETURN NIL
/* Coerce to string */
FUNCTION SqlCoerceStr( x )
IF x == NIL
RETURN ""
ENDIF
IF ValType( x ) == "C"
RETURN x
ENDIF
IF ValType( x ) == "N"
RETURN AllTrim( Str( x ) )
ENDIF
IF ValType( x ) == "D"
RETURN DToC( x )
ENDIF
IF ValType( x ) == "L"
RETURN iif( x, "T", "F" )
ENDIF
RETURN ""
/* Coerce to numeric */
FUNCTION SqlCoerceNum( x )
IF x == NIL
RETURN 0
ENDIF
IF ValType( x ) == "N"
RETURN x
ENDIF
IF ValType( x ) == "C"
RETURN Val( AllTrim( x ) )
ENDIF
IF ValType( x ) == "L"
RETURN iif( x, 1, 0 )
ENDIF
RETURN 0
/* Normalize for comparison: trim and uppercase strings */
FUNCTION SqlCoerceForCmp( x )
IF x == NIL
RETURN x
ENDIF
IF ValType( x ) == "C"
RETURN Upper( AllTrim( x ) )
ENDIF
RETURN x
/* Evaluate truthiness */
FUNCTION SqlIsTrue( x )
IF x == NIL
RETURN .F.
ENDIF
IF ValType( x ) == "L"
RETURN x
ENDIF
IF ValType( x ) == "N"
RETURN x != 0
ENDIF
IF ValType( x ) == "C"
RETURN ! Empty( x )
ENDIF
RETURN .F.
/* Case-insensitive equality comparison with cross-type coercion */
FUNCTION SqlCmpEq( a, b )
IF a == NIL .OR. b == NIL
RETURN a == NIL .AND. b == NIL
ENDIF
IF ValType( a ) == ValType( b )
IF ValType( a ) == "C"
RETURN Upper( AllTrim( a ) ) == Upper( AllTrim( b ) )
ENDIF
RETURN a == b
ENDIF
IF ValType( a ) == "N" .AND. ValType( b ) == "C"
RETURN a == Val( AllTrim( b ) )
ENDIF
IF ValType( a ) == "C" .AND. ValType( b ) == "N"
RETURN Val( AllTrim( a ) ) == b
ENDIF
RETURN .F.
/* Case-insensitive less-than comparison */
FUNCTION SqlCmpLt( a, b )
IF a == NIL .OR. b == NIL
RETURN .F.
ENDIF
IF ValType( a ) == ValType( b )
IF ValType( a ) == "C"
RETURN Upper( AllTrim( a ) ) < Upper( AllTrim( b ) )
ENDIF
RETURN a < b
ENDIF
IF ValType( a ) == "N" .AND. ValType( b ) == "C"
RETURN a < Val( AllTrim( b ) )
ENDIF
IF ValType( a ) == "C" .AND. ValType( b ) == "N"
RETURN Val( AllTrim( a ) ) < b
ENDIF
RETURN .F.
/* SqlCoerceStr/SqlCoerceNum/SqlCoerceForCmp/SqlIsTrue/SqlCmpEq/SqlCmpLt
* are implemented in Go (hbrtl/sqlhelpers.go) — registered as
* SQLCOERCESTR etc. The PRG bodies have been removed to avoid symbol
* collision with the RTL symbols; behavior is byte-for-byte identical.
* See docs/RTL-Go-Native-Migration.md (Tier 4). */
/* SQL LIKE pattern matching with optional escape character */

View File

@@ -32,13 +32,15 @@ RETURN SELF
METHOD OrderBy( aRows, aFN, aOB, aTables, aParams ) CLASS TSqlSort
LOCAL i, nCol
LOCAL i, nCol, cNulls
IF Len( aRows ) < 2 .OR. Len( aOB ) == 0
RETURN aRows
ENDIF
/* Pre-resolve column indexes */
/* Pre-resolve column indexes. Third element carries the explicit
* NULLS FIRST/LAST spec parsed by TSqlParser2:ParseOrderBy —
* empty string means "use default (NIL as largest)". */
s_aOBCols := {}
s_aOBNames := aFN
FOR i := 1 TO Len( aOB )
@@ -46,7 +48,8 @@ METHOD OrderBy( aRows, aFN, aOB, aTables, aParams ) CLASS TSqlSort
IF nCol == 0
nCol := SqlFindColIdx2( SqlExprName( aOB[ i ][ 1 ] ), aFN )
ENDIF
AAdd( s_aOBCols, { nCol, aOB[ i ][ 2 ] } )
cNulls := iif( Len( aOB[ i ] ) >= 3, Upper( aOB[ i ][ 3 ] ), "" )
AAdd( s_aOBCols, { nCol, aOB[ i ][ 2 ], cNulls } )
NEXT
ASort( aRows,,, {|a, b| SqlRowCompare( a, b ) < 0 } )
@@ -56,18 +59,11 @@ RETURN aRows
METHOD Distinct( aRows ) CLASS TSqlSort
LOCAL aR := {}, i, cKey
LOCAL hSeen := { => }
FOR i := 1 TO Len( aRows )
cKey := ::RowKey( aRows[ i ] )
IF ! hb_HHasKey( hSeen, cKey )
hSeen[ cKey ] := .T.
AAdd( aR, aRows[ i ] )
ENDIF
NEXT
RETURN aR
/* Go RTL SqlDistinct: single-pass dedup via Go map[string]bool.
* Key construction matches prior PRG ::RowKey byte-for-byte (same
* SqlValToStr mapping + '|' separator), so the output is identical
* to the old PRG loop — just ~100x faster on large result sets. */
RETURN SqlDistinct( aRows )
METHOD RowKey( aR ) CLASS TSqlSort
@@ -118,11 +114,12 @@ RETURN 0
/* Multi-key row comparator for ASort */
FUNCTION SqlRowCompare( aRowA, aRowB )
LOCAL i, nCol, cDir, xA, xB, nCmp
LOCAL i, nCol, cDir, cNulls, lNullsFirst, xA, xB, nCmp
FOR i := 1 TO Len( s_aOBCols )
nCol := s_aOBCols[ i ][ 1 ]
cDir := s_aOBCols[ i ][ 2 ]
cNulls := iif( Len( s_aOBCols[ i ] ) >= 3, s_aOBCols[ i ][ 3 ], "" )
IF nCol <= 0 .OR. nCol > Len( aRowA ) .OR. nCol > Len( aRowB )
LOOP
@@ -131,15 +128,22 @@ FUNCTION SqlRowCompare( aRowA, aRowB )
xA := aRowA[ nCol ]
xB := aRowB[ nCol ]
/* NULLs sort last */
/* NULL ordering — default: NIL is largest (NULLs last in ASC,
* NULLs first in DESC). Explicit NULLS FIRST/LAST (SQL:2003)
* from the parser overrides direction. */
IF xA == NIL .AND. xB == NIL
LOOP
ENDIF
IF xA == NIL
RETURN iif( cDir == "DESC", -1, 1 )
ENDIF
IF xB == NIL
RETURN iif( cDir == "DESC", 1, -1 )
IF xA == NIL .OR. xB == NIL
DO CASE
CASE cNulls == "FIRST" ; lNullsFirst := .T.
CASE cNulls == "LAST" ; lNullsFirst := .F.
OTHERWISE ; lNullsFirst := ( cDir == "DESC" )
ENDCASE
IF xA == NIL
RETURN iif( lNullsFirst, -1, 1 )
ENDIF
RETURN iif( lNullsFirst, 1, -1 )
ENDIF
nCmp := 0

View File

@@ -0,0 +1,92 @@
// Large-scale bulk-insert / CTE materialization benchmark.
// Isolates the SqlBulkInsert Go RTL win: rows × cols boundary
// crossings collapse to a single RTL call, so the speedup grows
// linearly with N and M.
#include "FiveSqlDef.ch"
PROCEDURE Main()
LOCAL t0, t1, i, aR, nRows
ErrorBlock( {|e| QOut( "TRAP: " + e:description + " " + e:operation ), Break(e) } )
? "================================================================"
? " FiveSql2 Bulk Insert / Large-CTE Benchmark"
? "================================================================"
?
SetupLarge()
? "--- CTE materialization at scale ---"
/* Big CTE: filter 10,000 rows, materialize, ORDER BY in outer. */
t0 := hb_MilliSeconds()
FOR i := 1 TO 20
aR := five_SQL( ;
"WITH big_cte AS (SELECT id, name, val FROM bench_big WHERE val > 5000) " + ;
"SELECT * FROM big_cte ORDER BY val DESC" )
NEXT
t1 := hb_MilliSeconds()
nRows := 0
IF ValType( aR ) == "A" .AND. Len( aR ) >= 2
nRows := Len( aR[ 2 ] )
ENDIF
R( "BULK_CTE_10k_20iter", t1 - t0, nRows )
/* Subquery-driving-table materialization at scale. */
t0 := hb_MilliSeconds()
FOR i := 1 TO 20
aR := five_SQL( ;
"SELECT a.id, a.val FROM (SELECT id, val FROM bench_big WHERE val > 8000) a " + ;
"ORDER BY a.val" )
NEXT
t1 := hb_MilliSeconds()
nRows := 0
IF ValType( aR ) == "A" .AND. Len( aR ) >= 2
nRows := Len( aR[ 2 ] )
ENDIF
R( "BULK_SUBQ_10k_20iter", t1 - t0, nRows )
CleanupLarge()
?
? "================================================================"
RETURN
STATIC FUNCTION SetupLarge()
LOCAL i
IF hb_FileExists( "bench_big.dbf" )
FErase( "bench_big.dbf" )
ENDIF
dbCreate( "bench_big.dbf", { ;
{ "ID", "N", 10, 0 }, ;
{ "NAME", "C", 30, 0 }, ;
{ "VAL", "N", 10, 0 } ;
} )
USE bench_big.dbf NEW EXCLUSIVE
FOR i := 1 TO 10000
dbAppend()
FieldPut( 1, i )
FieldPut( 2, "Name_" + PadL( hb_ntos( i ), 6, "0" ) )
FieldPut( 3, i )
NEXT
dbCommit()
CLOSE bench_big
RETURN NIL
STATIC FUNCTION CleanupLarge()
dbCloseAll()
FErase( "bench_big.dbf" )
RETURN NIL
STATIC FUNCTION R( cLabel, nMs, nRows )
LOCAL cLine := PadR( cLabel, 28 ) + Str( nMs, 6 ) + " ms"
IF nRows > 0
cLine += " rows=" + hb_ntos( nRows )
ENDIF
? " ", cLine
RETURN NIL

View File

@@ -0,0 +1,76 @@
// Large-scale UPDATE benchmark — many matching rows so the per-row
// savings of SqlBulkUpdate amortize the PcCompile setup cost.
#include "FiveSqlDef.ch"
PROCEDURE Main()
LOCAL t0, t1, i
ErrorBlock( {|e| QOut( "TRAP: " + e:description + " " + e:operation ), Break(e) } )
? "================================================================"
? " FiveSql2 UPDATE Benchmark (10k rows, many matching)"
? "================================================================"
?
SetupLarge()
/* Match 2500 rows per UPDATE (val BETWEEN 2500 AND 5000). */
t0 := hb_MilliSeconds()
FOR i := 1 TO 50
five_SQL( "UPDATE bench_big SET val = val + 1 WHERE val BETWEEN 2500 AND 5000" )
NEXT
t1 := hb_MilliSeconds()
R( "UPD_2500match_50iter", t1 - t0 )
/* Match ALL rows (no WHERE). */
t0 := hb_MilliSeconds()
FOR i := 1 TO 10
five_SQL( "UPDATE bench_big SET val = val + 0" )
NEXT
t1 := hb_MilliSeconds()
R( "UPD_all_10iter_10k_each", t1 - t0 )
/* Match 0 rows (WHERE never true). */
t0 := hb_MilliSeconds()
FOR i := 1 TO 100
five_SQL( "UPDATE bench_big SET val = val + 1 WHERE val < 0" )
NEXT
t1 := hb_MilliSeconds()
R( "UPD_0match_100iter", t1 - t0 )
CleanupLarge()
?
? "================================================================"
RETURN
STATIC PROCEDURE SetupLarge()
LOCAL i
IF hb_FileExists( "bench_big.dbf" )
FErase( "bench_big.dbf" )
ENDIF
dbCreate( "bench_big.dbf", { ;
{ "ID", "N", 10, 0 }, ;
{ "NAME", "C", 30, 0 }, ;
{ "VAL", "N", 10, 0 } ;
} )
USE bench_big.dbf NEW EXCLUSIVE
FOR i := 1 TO 10000
dbAppend()
FieldPut( 1, i )
FieldPut( 2, "N_" + PadL( hb_ntos( i ), 6, "0" ) )
FieldPut( 3, i )
NEXT
dbCommit()
CLOSE bench_big
RETURN
STATIC PROCEDURE CleanupLarge()
dbCloseAll()
FErase( "bench_big.dbf" )
RETURN
STATIC FUNCTION R( cLabel, nMs )
? " ", PadR( cLabel, 32 ) + Str( nMs, 7 ) + " ms"
RETURN NIL

View File

@@ -0,0 +1,106 @@
// Prepared-statement vs concatenated-SQL benchmark.
// Demonstrates the plan cache win for parameterized queries — the same
// `?` template hits cache on every call after the first; concatenated
// SQL strings vary by value and miss every time.
#include "FiveSqlDef.ch"
#define ITERS 1000
PROCEDURE Main()
LOCAL t0, t1, i
ErrorBlock( {|e| QOut( "TRAP: " + e:description + " " + e:operation ), Break(e) } )
? "================================================================"
? " FiveSql2 Prepared-Statement Benchmark"
? " " + hb_ntos( ITERS ) + " iterations per pattern"
? "================================================================"
?
SetupTable()
/* A: concatenated INSERT — SQL text changes per iteration, every
* call misses the plan cache and re-parses. */
t0 := hb_MilliSeconds()
FOR i := 1 TO ITERS
five_SQL( "INSERT INTO bench_prep (id, val) VALUES (" + hb_ntos( i ) + ", 'a')" )
NEXT
t1 := hb_MilliSeconds()
R( "CONCAT_INSERT", t1 - t0 )
TruncateTable()
/* B: prepared INSERT — same SQL text every iteration, cache hits
* from the 2nd call onward. */
t0 := hb_MilliSeconds()
FOR i := 1 TO ITERS
five_SQL( "INSERT INTO bench_prep (id, val) VALUES (?, ?)", { i, "a" } )
NEXT
t1 := hb_MilliSeconds()
R( "PREPARED_INSERT", t1 - t0 )
TruncateTable()
/* C: concatenated SELECT by id. */
t0 := hb_MilliSeconds()
FOR i := 1 TO ITERS
five_SQL( "SELECT val FROM bench_prep WHERE id = " + hb_ntos( i ) )
NEXT
t1 := hb_MilliSeconds()
R( "CONCAT_SELECT", t1 - t0 )
/* D: prepared SELECT by id. */
t0 := hb_MilliSeconds()
FOR i := 1 TO ITERS
five_SQL( "SELECT val FROM bench_prep WHERE id = ?", { i } )
NEXT
t1 := hb_MilliSeconds()
R( "PREPARED_SELECT", t1 - t0 )
CleanupTable()
?
? "================================================================"
RETURN
STATIC PROCEDURE SetupTable()
IF hb_FileExists( "bench_prep.dbf" )
FErase( "bench_prep.dbf" )
ENDIF
dbCreate( "bench_prep.dbf", { ;
{ "ID", "N", 10, 0 }, ;
{ "VAL", "C", 10, 0 } ;
} )
/* Pre-populate enough rows so SELECT benchmark has real data. */
USE bench_prep.dbf NEW EXCLUSIVE
LOCAL i
FOR i := 1 TO ITERS
dbAppend()
FieldPut( 1, i )
FieldPut( 2, "a" )
NEXT
dbCommit()
CLOSE bench_prep
RETURN
STATIC PROCEDURE TruncateTable()
USE bench_prep.dbf NEW EXCLUSIVE
dbZap()
CLOSE bench_prep
RETURN
STATIC PROCEDURE CleanupTable()
dbCloseAll()
FErase( "bench_prep.dbf" )
RETURN
STATIC FUNCTION R( cLabel, nMs )
? " ", PadR( cLabel, 18 ) + Str( nMs, 7 ) + " ms " + ;
Str( nMs * 1000 / ITERS, 8, 2 ) + " us/query"
RETURN NIL

View File

@@ -25,6 +25,10 @@ PROCEDURE Main()
/* Setup: create test tables */
SetupBenchData()
/* Opt in to workarea cache — repeated DML against the same table
* skips dbUseArea/dbCloseArea syscalls. Disabled at cleanup. */
SqlWACacheEnable()
? "--- SELECT Benchmarks ---"
/* B1: Simple SELECT * (full scan) */
@@ -195,7 +199,8 @@ PROCEDURE Main()
? " Benchmark Complete"
? "================================================================"
/* Cleanup */
/* Cleanup — dbCloseAll flushes + closes every workarea. */
SqlWACacheDisable()
dbCloseAll()
FErase( "bench_emp.dbf" )
FErase( "bench_ord.dbf" )

View File

@@ -0,0 +1,140 @@
// Regression test for NULL ordering in ORDER BY.
// Both the Go fast path (SqlOrderBy) and the PRG fallback
// (SqlRowCompare) must produce the same order and honor explicit
// NULLS FIRST/LAST from SQL:2003.
//
// Default (no NULLS spec): NULLs sort as the largest value — last in
// ASC, first in DESC. Matches PostgreSQL default and the legacy PRG
// SqlRowCompare behavior that predates the Go port.
STATIC s_nPass := 0
STATIC s_nFail := 0
STATIC s_nTotal := 0
PROCEDURE Main()
ErrorBlock( {|e| QOut( "TRAP: " + e:description + " " + e:operation ), Break(e) } )
? "================================================================"
? " FiveSql NULL Ordering Test"
? "================================================================"
?
SetupData()
// Diagnostic: show what score column actually contains after UPDATE,
// then what ORDER BY score ASC produces.
LOCAL aR := five_SQL( "SELECT name, score FROM nullord" )
LOCAL i, aRows
? "Raw rows:"
IF ValType( aR ) == "A" .AND. Len( aR ) >= 2
aRows := aR[ 2 ]
FOR i := 1 TO Len( aRows )
? " name=" + AllTrim( aRows[ i ][ 1 ] ), "score=" + ;
iif( aRows[ i ][ 2 ] == NIL, "NIL", LTrim( Str( aRows[ i ][ 2 ] ) ) )
NEXT
ENDIF
?
aR := five_SQL( "SELECT name, score FROM nullord ORDER BY score" )
? "ORDER BY score ASC:"
IF ValType( aR ) == "A" .AND. Len( aR ) >= 2
aRows := aR[ 2 ]
FOR i := 1 TO Len( aRows )
? " name=" + AllTrim( aRows[ i ][ 1 ] ), "score=" + ;
iif( aRows[ i ][ 2 ] == NIL, "NIL", LTrim( Str( aRows[ i ][ 2 ] ) ) )
NEXT
ENDIF
?
TestDefaultAsc()
TestDefaultDesc()
TestNullsFirstAsc()
TestNullsLastDesc()
?
? "================================================================"
? " Results:", LTrim(Str(s_nPass)), "/", LTrim(Str(s_nTotal)), "passed"
? "================================================================"
CleanupData()
RETURN
STATIC PROCEDURE SetupData()
LOCAL aFields
aFields := { ;
{"NAME", "C", 20, 0}, ;
{"SCORE", "N", 6, 0} }
dbCreate( "nullord", aFields )
USE "nullord" NEW EXCLUSIVE
APPEND BLANK ; REPLACE NAME WITH "A", SCORE WITH 10
APPEND BLANK ; REPLACE NAME WITH "B", SCORE WITH 30
APPEND BLANK ; REPLACE NAME WITH "C"
APPEND BLANK ; REPLACE NAME WITH "D", SCORE WITH 20
APPEND BLANK ; REPLACE NAME WITH "E"
CLOSE ALL
// Null out C and E via UPDATE — SCORE column goes from numeric 0 to NIL.
five_SQL( "UPDATE nullord SET score = NULL WHERE name = 'C'" )
five_SQL( "UPDATE nullord SET score = NULL WHERE name = 'E'" )
RETURN
STATIC PROCEDURE CleanupData()
CLOSE ALL
FErase( "nullord.dbf" )
RETURN
STATIC FUNCTION NameOrder( aResult )
LOCAL cOut := "", i, aRows
IF ValType( aResult ) == "A" .AND. Len( aResult ) >= 2
aRows := aResult[ 2 ]
FOR i := 1 TO Len( aRows )
cOut += AllTrim( aRows[ i ][ 1 ] )
NEXT
ENDIF
RETURN cOut
STATIC PROCEDURE Check( cLabel, cGot, cWant )
s_nTotal++
IF cGot == cWant
s_nPass++
? " PASS:", cLabel, "→", cGot
ELSE
s_nFail++
? " FAIL:", cLabel, "→ got", cGot, "want", cWant
ENDIF
RETURN
// Induce NULL via NULLIF(score, 0) — DBF N fields can't store NULL, so
// we turn the sentinel 0 into NULL at projection time. C and E have
// score=0 → projected s = NULL.
STATIC PROCEDURE TestDefaultAsc()
LOCAL aR := five_SQL( "SELECT name, NULLIF(score, 0) AS s FROM nullord ORDER BY s" )
// Default ASC: NULLs sort as largest → non-NULLs first (A=10, D=20, B=30), then NULLs (C, E)
Check( "default ASC (NULLs last)", NameOrder( aR ), "ADBCE" )
RETURN
STATIC PROCEDURE TestDefaultDesc()
LOCAL aR := five_SQL( "SELECT name, NULLIF(score, 0) AS s FROM nullord ORDER BY s DESC" )
// Default DESC: NULLs sort as largest → NULLs first (C, E), then descending (B=30, D=20, A=10)
Check( "default DESC (NULLs first)", NameOrder( aR ), "CEBDA" )
RETURN
STATIC PROCEDURE TestNullsFirstAsc()
LOCAL aR := five_SQL( "SELECT name, NULLIF(score, 0) AS s FROM nullord ORDER BY s ASC NULLS FIRST" )
Check( "ASC NULLS FIRST", NameOrder( aR ), "CEADB" )
RETURN
STATIC PROCEDURE TestNullsLastDesc()
LOCAL aR := five_SQL( "SELECT name, NULLIF(score, 0) AS s FROM nullord ORDER BY s DESC NULLS LAST" )
Check( "DESC NULLS LAST", NameOrder( aR ), "BDACE" )
RETURN

View File

@@ -113,6 +113,13 @@ func doGenerate(file *ast.File, debug, library bool) string {
}
}
if hasXBaseCommands(file) {
// Blank-import the in-memory RDD so MEMRDD / "mem:" paths work
// from PRG (the driver registers itself in its init).
g.imports["five/hbrdd/mem"] = true
g.importAlias["five/hbrdd/mem"] = "_"
}
g.emitHeader()
g.emitSymbols()
for _, d := range file.Decls {

View File

@@ -0,0 +1,835 @@
# RTL Go-Native 전환 계획
PRG 핫패스와 `hbrtl/` RTL 함수 중 Go 네이티브 구현이 이익인 후보 목록 및 진행 기록. 기준선: 최종 결과가 **Harbour와 동일**해야 한다.
## 배경
FiveSql2 성능 개선 흐름(`3caadb2 SqlOrderBy+SqlGroupBy Go RTL`, `5fc9c3b SqlHashJoin Go RTL` 등)은 PRG 핫루프를 Go RTL로 옮겨 큰 이득을 보였다. 본 문서는 같은 패턴을 체계적으로 적용할 후보를 추린다.
## Harbour 호환 검증 근거
| 항목 | 근거 | 영향 |
|------|------|------|
| 해시 기본 플래그 | [harbour-core/include/hbapi.h:927-931](../harbour-core/include/hbapi.h#L927-L931) — `HB_HASH_FLAG_DEFAULT = HB_HASH_AUTOADD_ASSIGN \| HB_HASH_BINARY \| HB_HASH_KEEPORDER` | 삽입 순서 보존 + `memcmp` 정확 비교 |
| 해시 키 비교 | [harbour-core/src/vm/hashes.c:167-182](../harbour-core/src/vm/hashes.c#L167-L182) — `hb_hashItemCmp` | CHAR padding trim 없음, Date/Timestamp는 julian 비교 |
| 내부 탐색 | `pPairs[]` + `pnPos[]` 이진 탐색 (O(log N)) | Five의 Go map 치환은 O(1)로 상회 |
## 후보 목록
### ✅ Tier 1 — 즉시 이익, 시맨틱 안전
| # | 대상 | 파일 | 방식 | 예상 효과 | 상태 |
|---|------|------|------|-----------|------|
| 1 | Hash 스토리지 | [hbrtl/hash.go](../hbrtl/hash.go), [hbrt/ops_collection.go](../hbrt/ops_collection.go), [hbrt/value.go](../hbrt/value.go) | `map[string]int` 인덱스 추가, 삽입 순서 슬라이스 유지 | 50100x | **완료 (2026-04-17)** |
| 2 | SqlDistinct | [_FiveSql2/src/TSqlSort.prg:57-70](../_FiveSql2/src/TSqlSort.prg#L57-L70), [hbrtl/sqlscan.go](../hbrtl/sqlscan.go) | Go RTL `map[string]struct{}` + `strings.Builder` | 100300x | **완료 (2026-04-17)** |
| 3 | SqlRowCompare NULL · 혼합타입 정합성 | [hbrtl/sqlscan.go](../hbrtl/sqlscan.go), [_FiveSql2/src/TSqlSort.prg](../_FiveSql2/src/TSqlSort.prg), [_FiveSql2/src/TSqlExecutor.prg](../_FiveSql2/src/TSqlExecutor.prg) | Go/PRG 양 경로 NULL 순서 PRG 시맨틱으로 통일 + `NULLS FIRST/LAST` 배선 | 정합성 수정 | **완료 (2026-04-17)** |
### ✅ Tier 2 — 블록 NIL 특화 + 누락 타입 보강 (완료)
| # | 대상 | 파일 | 방식 | 상태 |
|---|------|------|------|------|
| 4 | ASort 타입 특화 + 정확성 | [hbrtl/array.go:134-300](../hbrtl/array.go#L134-L300) | 비교자 블록 없을 때 1회 타입 스캔 → 특화 비교자. Date/Logical/Timestamp 지원 추가 (기존엔 no-op) | **완료 (2026-04-17)** |
| 5 | AScan fast-path | [hbrtl/array.go:302-380](../hbrtl/array.go#L302-L380) | 검색값이 string/int/double일 때 타입별 인라인 루프. 드물게 쓰는 타입은 `valuesEqual` fallback | **완료 (2026-04-17)** |
### 🔎 Tier 3 — 내부 헬퍼 최적화
| # | 대상 | 파일 | 방식 | 상태 |
|---|------|------|------|------|
| 6 | RAT 역방향 스캔 | [hbrtl/strings2.go:16-51](../hbrtl/strings2.go#L16-L51) | 검토 결과 `strings.LastIndex` + 부분슬라이스는 이미 최적. 변경 없음 | **검토 종료 (2026-04-17)** |
| 7 | SqlExprHasAgg | [hbrtl/sqlexpr.go](../hbrtl/sqlexpr.go) | PRG 재귀 → Go AST walker + 상수 시간 agg 이름 조회 | **완료 (2026-04-17)** |
### ✅ Tier 4 — DML Boundary-Crossing 감소 (완료)
| # | 대상 | 파일 | 방식 | 상태 |
|---|------|------|------|------|
| 8 | SqlBulkInsert | [hbrtl/sqlscan.go](../hbrtl/sqlscan.go), [_FiveSql2/src/TSqlExecutor.prg](../_FiveSql2/src/TSqlExecutor.prg) | CTE/subquery/tmp 테이블 materialize 경로의 `FOR j ... dbAppend ... FOR k ... FieldPut` 이중 루프를 Go RTL 단일 호출로 대체 | **완료 (2026-04-17)** |
| 9 | SqlBulkUpdate | [hbrtl/sqlscan.go](../hbrtl/sqlscan.go), [_FiveSql2/src/TSqlExecutor.prg RunUpdate](../_FiveSql2/src/TSqlExecutor.prg) | UPDATE 스캔 루프 전체를 Go RTL로 이관. WHERE + SET 값 표현식을 pcode로 컴파일해 PRG 메서드 디스패치 제거 | **완료 (2026-04-17)** |
| 10 | MEMRDD 자동 임포트 | [compiler/gengo/gengo.go](../compiler/gengo/gengo.go) | 모든 Five 프로그램에 `_ "five/hbrdd/mem"` 블랭크 임포트 자동 추가 → `USE "mem:x" VIA "MEMRDD"` 즉시 사용 가능 | **완료 (2026-04-17)** |
| 11 | PcCompile 결과 캐시 | [hbrtl/pcexpr.go](../hbrtl/pcexpr.go) | `sync.Map`으로 소스 문자열 키 캐시. 반복 쿼리에서 파서+genpc 건너뛰기 | **완료 (2026-04-17)** |
| 12 | SQL 플랜 캐시 + HbDeepClone | [_FiveSql2/src/TFiveSQL.prg](../_FiveSql2/src/TFiveSQL.prg), [hbrtl/array.go](../hbrtl/array.go) | `cSQL → hQuery` PRG 해시 캐시. 히트 시 Go RTL `HbDeepClone`으로 pristine 사본 반환 → `SqlFoldConst` 인-플레이스 변경 안전 | **완료 (2026-04-17)** |
| 13 | 파라미터 바인딩 벤치 입증 | [_FiveSql2/test/bench_prep_sql.prg](../_FiveSql2/test/bench_prep_sql.prg) | 기존 `five_SQL(cSQL, aParams)` + `?` 파서가 이미 지원. 플랜 캐시와 결합 시 SELECT 1.58x, INSERT 1.12x | **입증 (2026-04-17)** |
| 14 | CTE → MEMRDD | [_FiveSql2/src/TSqlExecutor.prg](../_FiveSql2/src/TSqlExecutor.prg), [hbrdd/mem/memrdd.go](../hbrdd/mem/memrdd.go), [hbrtl/sqlscan.go SqlBulkInsert](../hbrtl/sqlscan.go) | 3곳 materialize 경로를 `dbCreate("mem:xxx", ..., "MEMRDD")` + MEMRDD `dbUseArea`로 전환. SqlBulkInsert가 `*dbf.DBFArea` 외 일반 `hbrdd.Area`도 처리하도록 확장. MEMRDD Create가 필드명 trailing-space trim | **완료 (2026-04-17)** |
| 15 | SqlWindowPartitions Go RTL | [hbrtl/sqlscan.go](../hbrtl/sqlscan.go), [_FiveSql2/src/TSqlExecutor.prg ApplyWindowFunctions](../_FiveSql2/src/TSqlExecutor.prg) | PARTITION BY 키 빌드 + 행-인덱스 그룹핑을 Go RTL에 위임. N·M 경계 크로싱 → 1 | **완료 (2026-04-17)** |
| 16 | SqlWindowSortPartition Go RTL | [hbrtl/sqlscan.go](../hbrtl/sqlscan.go), [_FiveSql2/src/TSqlExecutor.prg ApplyWindowFunctions](../_FiveSql2/src/TSqlExecutor.prg) | 파티션 내 ORDER BY를 Go `sort.SliceStable` + 사전 해석된 컬럼 인덱스로 처리. PRG 비교 블록 제거 | **완료 (2026-04-17)** |
| 17 | SqlGroupRows Go RTL | [hbrtl/sqlscan.go](../hbrtl/sqlscan.go), [_FiveSql2/src/TSqlAgg.prg GroupBy](../_FiveSql2/src/TSqlAgg.prg) | GROUP BY 그룹 빌드 루프만 Go RTL로. 집계·HAVING은 복잡 표현식 대응 위해 PRG 유지 | **완료 (2026-04-17)** |
| 18 | SqlComputeAggSimple Go RTL | [hbrtl/sqlscan.go](../hbrtl/sqlscan.go), [_FiveSql2/src/TSqlAgg.prg ComputeAgg](../_FiveSql2/src/TSqlAgg.prg) | COUNT/SUM/AVG/MIN/MAX + 컬럼 인자 fast-path. 복잡 인자·GROUP_CONCAT은 PRG fallback | **완료 (2026-04-17)** |
| 19 | SQL 스칼라 헬퍼 Go RTL | [hbrtl/sqlhelpers.go](../hbrtl/sqlhelpers.go), [_FiveSql2/src/TSqlFunc.prg](../_FiveSql2/src/TSqlFunc.prg) | `SqlIsTrue/SqlCmpEq/SqlCmpLt/SqlCoerceForCmp/SqlCoerceNum/SqlCoerceStr` 6개 Go로. PRG tree-walker 평가 경로(HAVING, complex expr) 오버헤드 감소 | **완료 (2026-04-17)** |
| 20 | SQL 템플릿 자동 파라미터화 | [hbrtl/sqlhelpers.go SqlExtractTemplate](../hbrtl/sqlhelpers.go), [_FiveSql2/src/TFiveSQL.prg](../_FiveSql2/src/TFiveSQL.prg) | 리터럴(`TK_TEXT`/`TK_NUM`)을 `TK_QMARK`로 치환 + 템플릿 키로 플랜 캐시. 동일 구조 다른 값 쿼리가 캐시 공유 | **완료 (2026-04-17)** |
| 21 | TSqlLexer Go 포팅 + 결합 | [hbrtl/sqlhelpers.go SqlLexerTokenize + SqlLexAndExtractTemplate](../hbrtl/sqlhelpers.go), [_FiveSql2/src/TFiveSQL.prg](../_FiveSql2/src/TFiveSQL.prg) | PRG `SubStr` 기반 문자-단위 렉서를 Go byte-level FSM으로. 자동-파라미터화와 결합해 1회 Go 호출로 lex+normalize 완료 | **완료 (2026-04-17)** |
| 22 | SqlWindowAssignRank Go RTL | [hbrtl/sqlscan.go](../hbrtl/sqlscan.go), [_FiveSql2/src/TSqlExecutor.prg ApplyWindowFunctions](../_FiveSql2/src/TSqlExecutor.prg) | ROW_NUMBER/RANK/DENSE_RANK 배정 루프를 Go에서. 파티션당 1회 호출로 per-row SqlWinRowsEqual PRG 호출 제거 | **완료 (2026-04-17)** |
| 23 | HbDeepClone 성능 개선 | [hbrtl/array.go deepCloneValue](../hbrtl/array.go) | 스칼라 원소는 재귀 스킵 (슬롯 복사만), 해시 키 공유 (문자열/숫자는 불변). 플랜 캐시 히트마다 수행되는 핫패스 | **완료 (2026-04-17)** |
| 24 | WA 캐시 + 지연 commit | [hbrtl/sqlwacache.go](../hbrtl/sqlwacache.go), [_FiveSql2/src/TSqlExecutor.prg SqlExecOpenTable/CloseTable + RunInsert/Update/Delete](../_FiveSql2/src/TSqlExecutor.prg) | 워크에어리어 공정-수명 캐시 (opt-in). 활성화 시 DML의 per-query dbUseArea/dbCloseArea/dbCommit 전부 배치 → **B12 INSERT 48x** | **완료 (2026-04-17)** |
| 25 | Plan pcode 캐시 + SqlBulkUpdate flush 지연 | [_FiveSql2/src/TSqlExecutor.prg s_hDmlPcodeCache + cCacheKey](../_FiveSql2/src/TSqlExecutor.prg), [hbrtl/sqlscan.go SqlBulkUpdate](../hbrtl/sqlscan.go) | 플랜 키별 컴파일된 pcode(aFPos/where/set_pc) 캐시 + WA cache 활성 시 Go RTL 내부 `Flush()` 스킵 → **B13 UPDATE 48x** | **완료 (2026-04-17)** |
| 26 | SELECT 경로 plan pcode 캐시 | [_FiveSql2/src/TSqlExecutor.prg RunSelect fast path](../_FiveSql2/src/TSqlExecutor.prg) | #25의 패턴을 SELECT fast-path에도 적용. `TryBuildFieldPositions` + `TryCompileWhere` 결과를 `cCacheKey#sel`로 캐시. 반복 SELECT의 PRG AST walk 제거 | **완료 (2026-04-17)** |
| 27 | SqlEvalHaving Go RTL | [hbrtl/sqlscan.go](../hbrtl/sqlscan.go), [_FiveSql2/src/TSqlAgg.prg EvalHaving](../_FiveSql2/src/TSqlAgg.prg) | HAVING 트리 walker를 Go로. ND_LIT/ND_NIL/ND_COL/ND_FN(5 aggs)/ND_BIN/ND_UNI 처리. 복잡 케이스는 PRG fallback | **완료 (2026-04-17, 효과 미미)** |
### ❌ 제외 (Harbour 호환 리스크 과다)
| 대상 | 제외 이유 |
|------|----------|
| SqlLikeMatch `regexp` 치환 | Harbour SQL LIKE의 `%`/`_`/`[abc]`/`[!abc]`/이스케이프 규칙은 regex와 미스매치. 자체 매처 필요 |
| SubStr Go slice 직접 | 이미 slice 사용 중 ([hbrtl/strings.go:149](../hbrtl/strings.go#L149)). 변경 이익 없음 |
| Descend `bytes.Map` | 성능 이익 <5% |
| SET DATE 비트셋 사전계산 | 515%지만 `setDateFormat` 전역 일관성 리스크 > 이익 |
### ✅ 이미 최적 (건드리지 말 것)
- [hbrtl/crypto.go](../hbrtl/crypto.go) MD5/SHA256/BASE64/CRC32 — `crypto/md5`, `crypto/sha256`, `encoding/base64` 사용 중
- [hbrtl/binconv.go](../hbrtl/binconv.go) BIN2I/L/W — `encoding/binary` 사용 중
- [hbrtl/regex.go](../hbrtl/regex.go) — `regexp` 사용 중
## 진행 기록
### #1 Hash 스토리지 O(1) 전환 — 2026-04-17 완료
**구조 변경** ([hbrt/value.go:237-249](../hbrt/value.go#L237-L249))
```go
type HbHash struct {
Keys []Value // 삽입 순서 (HB_HASH_KEEPORDER 기본)
Values []Value // 병렬
Order []int
Flags int32
Index map[string]int // 신규: O(1) 탐색용 미러
}
```
**신규 파일**: [hbrt/hash_helpers.go](../hbrt/hash_helpers.go)
- `hashKey(v Value) (string, bool)``valueEqual` 동치류와 일치하는 직렬화. Nil/String/Numeric/Logical/Date/Timestamp 지원. 수치는 정수로 환산 가능하면 `'I'` 폼으로 정규화 (int/double 교차 매칭), -0.0 → +0.0
- `(*HbHash).Lookup/Has/Set/Append/Delete/HashGet/ensureIndex/HashFromPairs` 메서드
- 비인덱싱 키 타입(Array/Hash/Block/Pointer)은 fallback 선형 스캔 + `valueEqual`
**호출부 전환**
| 파일 | 변경 |
|------|------|
| [hbrt/ops_collection.go](../hbrt/ops_collection.go) | `HashGen`/`ArrayPush`/`ArrayPop` 헬퍼 경유. `HashGen`은 pair 수집 후 `Set`로 last-wins 보장 |
| [hbrt/valuemethods.go](../hbrt/valuemethods.go) | `vmHashHas`/`vmHashDelete` 헬퍼 경유 |
| [hbrt/hbfunc.go](../hbrt/hbfunc.go) | `HashAdd``Set`, `HashGetC``"S"+key` 직접 Index 힛 |
| [hbrt/macroeval.go](../hbrt/macroeval.go) | 해시 리터럴 평가 `Set` (중복 키 last-wins) |
| [hbrt/gobridge.go](../hbrt/gobridge.go) | `reflect.Map` 변환 `Append` (Go map은 중복 키 없음) |
| [hbrtl/hash.go](../hbrtl/hash.go) | 7개 RTL 함수 (HbHash/HGet/HSet/HDel/HHasKey/HKeys/HValues) 전체 헬퍼 경유 |
| [hbrtl/json.go](../hbrtl/json.go) | `navigatePath`/`JsonMerge` 헬퍼 경유 |
**Harbour 호환 보장**
- 키 삽입 순서 보존 (`hb_HKeys()` 반환): `Keys[]` 슬라이스 유지
- `HB_HASH_BINARY` 정확 비교: `hashKey`가 String을 raw bytes로 직렬화
- 수치 교차 비교 (`1 == 1.0`): 정수로 환산 가능한 double은 `'I'` 폼으로 정규화
- 비인덱싱 키: `valueEqual` fallback (Array/Hash/Block 포인터 동일성 포함)
**스테일 방지**: `ensureIndex()`는 Index가 nil이거나 indexable 키 개수와 불일치하면 재구축. 테스트가 `.Keys = append(...)`로 직접 조작해도 다음 Lookup 시점에 자동 복구.
**검증 (CLAUDE.md 3종)**
- `go test ./...` — 15 패키지 ALL PASS
- FiveSql2 — 43/43 (100%)
- Harbour compat — 51/51 (100%)
### #2 SqlDistinct Go RTL — 2026-04-17 완료
**추가 함수** ([hbrtl/sqlscan.go](../hbrtl/sqlscan.go))
- `appendValueHashKey(sb *strings.Builder, v)``valueHashKey`와 동일 매핑이나 중간 문자열 할당 없이 Builder에 직접 기록
- `SqlDistinct(aRows) → aRows` — Go map 기반 단일 패스 dedup, 입력 순서 보존
**호출부 변경** ([_FiveSql2/src/TSqlSort.prg:57-62](../_FiveSql2/src/TSqlSort.prg#L57-L62))
```harbour
METHOD Distinct( aRows ) CLASS TSqlSort
RETURN SqlDistinct( aRows )
```
PRG의 `hb_HHasKey` 루프 + 수동 `cKey += SqlValToStr(..) + "|"` 조립을 Go 한 번 호출로 대체. 컴파일러의 "undeclared variable" 경고는 RTL 함수 심볼이 gengo 테이블에 없기 때문 — 런타임에 `SQLDISTINCT` 심볼로 해결되어 동작은 정상.
**Harbour 호환 보장**
- 키 구성 규칙이 `SqlValToStr` 시맨틱(`appendValueHashKey`)과 byte-for-byte 일치 — CHAR는 trailing space trim, NIL은 `\x00NIL`, 숫자는 int/double 별도 경로
- 입력 순서 보존 → SQL DISTINCT 결과의 첫 등장 순서 유지
- 빈 배열 · 단일 행은 입력 그대로 반환 (PRG 동작과 일치)
**등록** ([hbrtl/register.go:626](../hbrtl/register.go#L626))
```go
hbrt.Sym("SQLDISTINCT", hbrt.FsPublic, SqlDistinct),
```
**검증**
- `go test ./...` — ALL PASS
- FiveSql2 — 43/43 (100%)
- Harbour compat — 51/51 (100%)
### #3 SqlRowCompare NULL 순서 · 혼합타입 정합성 — 2026-04-17 완료
**발견된 문제**
1. Go `SqlOrderBy` 기본값이 NIL을 가장 작은 값으로 취급 (ASC에서 NULLs FIRST) — PRG `SqlRowCompare`의 원래 시맨틱(NIL = 가장 큼)과 정반대
2. 파서가 `NULLS FIRST/LAST` (SQL:2003) 스펙을 파싱하지만 ([TSqlParser2.prg:962-973](../_FiveSql2/src/TSqlParser2.prg#L962-L973)) Go/PRG 어느 경로도 이를 읽지 않음 — 명시 스펙이 완전 무시
3. Go `compareValues`가 숫자 vs 문자열 혼합 타입 비교를 지원하지 않음 — PRG는 `Val(AllTrim(x))`로 강제변환 ([TSqlSort.prg:145-148](../_FiveSql2/src/TSqlSort.prg#L145-L148))
**수정 내역**
| 파일 | 변경 |
|------|------|
| [hbrtl/sqlscan.go](../hbrtl/sqlscan.go) | `sortCol``nullsFirst bool` 필드 추가. `cDir == DESC`를 기본값으로 하고 `arr.Items[2]``"FIRST"`/`"LAST"`면 오버라이드. `compareValues``compareValuesNonNil` 기반으로 재구성하고 NIL 처리를 호출부로 이관. 혼합 N/C 비교용 `parseLeadingNumeric` 추가 |
| [_FiveSql2/src/TSqlExecutor.prg:3818](../_FiveSql2/src/TSqlExecutor.prg#L3818) | `TryBuildSortSpec``aOrderBy[i][3]`을 읽어 3번째 요소 `cNulls`로 Go 스펙에 전달 |
| [_FiveSql2/src/TSqlSort.prg:33-54](../_FiveSql2/src/TSqlSort.prg#L33-L54) | `OrderBy` 메서드가 `aOB[i][3]``s_aOBCols`에 보존 |
| [_FiveSql2/src/TSqlSort.prg:118-144](../_FiveSql2/src/TSqlSort.prg#L118-L144) | `SqlRowCompare`가 명시 `NULLS FIRST/LAST`를 우선 적용, 없으면 `cDir == DESC`를 기본 |
**Harbour/FiveSql2 시맨틱 보장**
- 기본값: NIL은 가장 큰 값 → ASC는 NULLs LAST, DESC는 NULLs FIRST (PRG 원래 동작, PostgreSQL 기본과 일치)
- `NULLS FIRST/LAST` 명시 시 방향과 무관하게 스펙 우선
- 혼합 N/C 비교: PRG `Val(AllTrim(x))` 동작 복제 (선행 공백 무시, 부호/소수점 허용)
**회귀 테스트** — [_FiveSql2/test/test_null_order.prg](../_FiveSql2/test/test_null_order.prg)
4/4 PASS: default ASC, default DESC, ASC NULLS FIRST, DESC NULLS LAST
**검증**
- `go test ./...` — ALL PASS
- FiveSql2 43/43 · Harbour compat 51/51
### #4, #5 ASort 정확성/특화 + AScan fast-path — 2026-04-17 완료
**ASort 버그 발견** — 기본 비교 경로([기존 array.go:164](../hbrtl/array.go#L164))가 `IsString` / `IsNumeric`가 아닌 타입에 대해 `return false`를 반환 → 날짜·논리값·타임스탬프 배열 정렬이 **no-op**.
**수정 내역** ([hbrtl/array.go](../hbrtl/array.go))
- `detectArrayKind(items)` — 1회 스캔으로 동종 배열 분류 (Int / Numeric / String / Date / Timestamp / Logical / Mixed)
- 분류 결과에 따라 타입 특화 `sort.SliceStable` 선택. Int 배열은 `AsNumInt`만 써서 double 변환 생략
- Mixed는 `valueLess` fallback — Harbour `<` 시맨틱 (NIL 가장 작음, 타입 내 비교)
**AScan fast-path** ([hbrtl/array.go:302-380](../hbrtl/array.go#L302-L380))
- 검색값이 문자열·정수·실수일 때 타입별 인라인 루프 — `valuesEqual` 호출·switch·타입 체크 생략
- 정수 검색 + 배열 내 double 원소는 cross-type 비교 (`item.AsNumDouble() == float64(n)`) — 기존 `valuesEqual` 시맨틱 그대로
- Date/Timestamp/Logical/NIL 검색은 `valuesEqual` fallback
**회귀 테스트** ([tests/compat_harbour.prg:328-349](../tests/compat_harbour.prg#L328-L349))
- `9c1 ASort dates ascending` — julian 기준 정렬 (신규)
- `9c2 ASort logicals: F,F,T,T` — 논리값 정렬 (신규)
- `9e1 AScan int found` — 정수 탐색 (신규)
- `9e2 AScan int cross-type` — 정수로 저장된 배열에 double 검색 (신규)
- `9e3 AScan int not found` — 부재 케이스 (신규)
**Harbour 호환 보장**
- 블록이 주어지면 100% 기존 동작 유지 (부작용 보존)
- 블록 NIL 경로는 Harbour 기본 `<` 시맨틱 복제. 이전엔 깨져 있던 날짜/논리값이 이제 올바르게 정렬
**검증**
- `go test ./...` — ALL PASS
- FiveSql2 — 43/43
- Harbour compat — **56/56** (51 기존 + 5 신규)
### #6 RAT 재검토 — 2026-04-17 종결(변경 없음)
`strings.LastIndex`는 Boyer-Moore/Rabin-Karp 최적화 내장. `target[:from]` 슬라이스는 Go에서 O(1)·할당-프리. nOccurrence=1 경로(실제 대부분)는 이미 단일 `LastIndex` 호출. >1 경로는 이전 매치 위치에서 시작해 누적 O(n)이므로 수동 역방향 스캔 대비 이득 없음. **원본 유지**.
### #7 SqlExprHasAgg Go walker — 2026-04-17 완료
**동기**: 매 쿼리마다 SELECT 컬럼 표현식 × 재귀 깊이 만큼 호출 — 깊은 식에서 PRG VM 프레임 셋업 비용이 누적됨. 호출 지점: [TSqlAgg.prg:41,62,85,134](../_FiveSql2/src/TSqlAgg.prg), [TSqlExecutor.prg:1298](../_FiveSql2/src/TSqlExecutor.prg#L1298) 등 6곳.
**구현** ([hbrtl/sqlexpr.go](../hbrtl/sqlexpr.go))
- `aggFuncSet``map[string]struct{}` (상수시간 룩업). AGG_FUNCTIONS 매크로와 완전 일치 (COUNT/SUM/AVG/MIN/MAX/GROUP_CONCAT/STRING_AGG/LISTAGG/JSON_ARRAYAGG/JSON_OBJECTAGG/XMLAGG/ANY_VALUE/BOOL_AND/BOOL_OR)
- `sqlExprHasAggWalk` — PRG SqlExprHasAgg와 **byte-for-byte 동일한 재귀 트리 순회**. ND_FN/ND_BIN/ND_UNI/ND_CASE 가지 커버. ND_WINDOW/ND_SUB 의도적 미순회 (각자 집계 스코프 보유)
- 상수 `ndLit`, `ndCol`, `ndFn` 등 — `FiveSqlDef.ch`의 kind 번호와 동일
**호출부 변경**
- [_FiveSql2/src/TSqlExpr.prg:45-49](../_FiveSql2/src/TSqlExpr.prg#L45-L49) — PRG `FUNCTION SqlExprHasAgg` 제거 (심볼 충돌 방지). 주석으로 Go RTL 위임 명시
- [hbrtl/register.go](../hbrtl/register.go) — `SQLEXPRHASAGG` 공개 심볼 등록
- 기존 호출부(`SqlExprHasAgg(xE)`) 그대로 동작 — RTL 심볼이 해결
**Harbour 호환 보장**: AST kind 번호가 PRG와 정확히 일치. agg 함수 이름 집합이 `AGG_FUNCTIONS` 매크로와 정확히 일치. 재귀 가지 로직이 PRG와 줄 단위로 매치 (`IF xE[1] == ND_FN .AND. SqlIsAggName(xE[2])` 등).
**검증**
- `go test ./...` — ALL PASS
- FiveSql2 — 43/43
- Harbour compat — 56/56
### #8 SqlBulkInsert Go RTL — 2026-04-17 완료
**동기**: `dbAppend`/`FieldPut`은 이미 Go RTL. 병목은 **PRG 루프가 행·컬럼 단위로 Go RTL을 호출하는 boundary crossing**. N행 × M컬럼 = N·M 회 VM 프레임 셋업 + 스택 push/pop + 파라미터 마샬링.
**구현** ([hbrtl/sqlscan.go](../hbrtl/sqlscan.go))
- `SqlBulkInsert(aRows) → nInserted` — 현재 workarea의 `*DBFArea`에 직접 `Append()` + `PutValue()` + `Flush()`
- NIL 원소는 필드 건너뜀 (PRG `IF aRows[j][k] != NIL` 보존)
- 행 길이가 필드 수 초과 시 초과분 무시, 부족 시 나머지 필드는 default
**호출부 치환** — 동일 형상 루프 3곳 → 1줄
| 위치 | 맥락 |
|------|------|
| [TSqlExecutor.prg:2310](../_FiveSql2/src/TSqlExecutor.prg#L2310) | CREATE TABLE AS SELECT / 임시테이블 로드 |
| [TSqlExecutor.prg:2630](../_FiveSql2/src/TSqlExecutor.prg#L2630) | subquery driving-table materialization |
| [TSqlExecutor.prg:2935](../_FiveSql2/src/TSqlExecutor.prg#L2935) | CTE materialization |
**A/B 벤치마크** ([_FiveSql2/test/bench_bulk.prg](../_FiveSql2/test/bench_bulk.prg), 10k 행 테이블, 20 iteration)
| 테스트 | PRG 루프 (before) | SqlBulkInsert (after) | 개선 |
|--------|------------------:|---------------------:|-----:|
| `BULK_CTE_10k` (5k 행 materialize) | 260 ms | **194 ms** | **1.34x** |
| `BULK_SUBQ_10k` (2k 행 materialize) | 121 ms | **107 ms** | **1.13x** |
*쿼리당 환산*: CTE 10k에서 `(260-194)/20 = 3.3ms`/쿼리 절감. 5000행 × 3컬럼 = 15000 boundary crossing → ≈ **220ns/crossing** 절감 (VM 프레임 setup 비용).
**기존 bench_sql(100행 규모) 효과 미미**: 40행 × 2컬럼 = 80 crossing × 220ns ≈ 18µs/쿼리 절감. 4.3ms 쿼리에서 <1% noise. 실제 이득은 **N이 커질수록 선형 증가**.
**Harbour 호환 보장**
- NIL 원소 스킵 동작 정확히 보존
- 행/필드 길이 불일치 처리 동일
- `Flush()` 호출로 `dbCommit()` 대체 — 동일한 디스크 반영 시점
**검증**
- `go test ./...` — ALL PASS
- FiveSql2 — 43/43
- Harbour compat — 56/56
## 진행 순서
1.#1 Hash 스토리지 — 완료
2.#2 SqlDistinct — 완료
3.#3 SqlRowCompare NULL·혼합타입 — 완료
4.#4 ASort 정확성/특화 — 완료
5.#5 AScan fast-path — 완료
6.#6 RAT 재검토 — 변경 없음
7.#7 SqlExprHasAgg Go walker — 완료
8.#8 SqlBulkInsert — 완료 (Tier 4)
9.#9 SqlBulkUpdate — 완료 (Tier 4)
10.#10 MEMRDD 자동 임포트 — 완료 (Tier 4 인프라)
11.#11 PcCompile 결과 캐시 — 완료 (Tier 4 회수 최적화)
12.#12 SQL 플랜 캐시 + HbDeepClone — 완료 (Tier 4 상위 계층)
13.#13 파라미터 바인딩 입증 — 완료 (기존 기능 + 플랜 캐시 결합 효과)
14.#14 CTE → MEMRDD — 완료 (디스크 임시파일 제거)
15.#15 SqlWindowPartitions Go RTL — 완료 (윈도우 파티션 빌드)
16.#16 SqlWindowSortPartition Go RTL — 완료 (윈도우 정렬)
17.#17 SqlGroupRows Go RTL — 완료 (GROUP BY 그룹 빌드)
18.#18 SqlComputeAggSimple Go RTL — 완료 (집계 함수 fast-path)
19.#19 SQL 스칼라 헬퍼 Go RTL — 완료 (IsTrue/CmpEq/CmpLt/Coerce×3)
20.#20 SQL 템플릿 자동 파라미터화 — 완료 (리터럴 → `?` + 플랜 캐시 공유)
21.#21 TSqlLexer Go 포팅 + 결합 — 완료 (#20 효과 증폭)
22.#22 SqlWindowAssignRank Go RTL — 완료 (ROW_NUMBER/RANK/DENSE_RANK)
23.#23 HbDeepClone 성능 개선 — 완료 (스칼라 재귀 스킵 + 해시 키 공유)
24.#24 WA 캐시 + 지연 commit — 완료 (B12 INSERT **48x**)
25.#25 Plan pcode 캐시 + Flush 지연 — 완료 (B13 UPDATE **48x**)
26.#26 SELECT plan pcode 캐시 — 완료 (SELECT fast-path 캐시 확장)
27.#27 SqlEvalHaving Go RTL — 완료 (효과 미미, 복잡 HAVING 워크로드용)
**전체 계획 완료 (2026-04-17).** 각 단계 후 `go test ./...` + FiveSql2 43/43 + Harbour compat 필수 원칙 준수.
### #9 SqlBulkUpdate Go RTL — 2026-04-17 완료
**구현** ([hbrtl/sqlscan.go](../hbrtl/sqlscan.go))
- `SqlBulkUpdate(aFieldPositions, pcWhere, aValuePcodes) → nAffected` — WHERE + SET 값 모두 컴파일된 pcode를 받아 Go 내부에서 스캔·평가·PutValue. `FastFieldGetter` 설치로 pcode 내부 `FieldGet`도 인터페이스 디스패치 없이 `*DBFArea.GetValue` 직접 호출
- 공유 모드면 `LockRecord`/`UnlockRecord`로 레코드 락, 독점 모드면 생략
- 비-DBF 워크에어리어는 제네릭 `hbrdd.Area` 경로로 fallback
**PRG 연결** ([_FiveSql2/src/TSqlExecutor.prg RunUpdate](../_FiveSql2/src/TSqlExecutor.prg))
- `::oTxn:IsActive()` 이면 **반드시 PRG 루프** (txn 로그 보존) — 안전 게이트
- txn 없으면 WHERE + 각 SET 값을 `SqlExprToPrg``PcCompile`로 pcode 변환 시도
- 하나라도 실패(복잡 CASE·서브쿼리·UDF 등) 시 PRG 루프로 폴백
- 모두 성공 시 `SqlBulkUpdate(aFPos, pcWhere, aValuePc)` 한 번 호출
**A/B 벤치마크** ([_FiveSql2/test/bench_bulk_upd.prg](../_FiveSql2/test/bench_bulk_upd.prg), 10k 행 테이블)
| 테스트 | PRG 루프 | SqlBulkUpdate | 개선 |
|--------|---------:|--------------:|-----:|
| 2500행 매치 × 50회 (쓰기 지배적) | 2140 ms | 2153 ms | noise (쓰기 비용 동일) |
| 10k 전체 매치 × 10회 | 508 ms | **145 ms** | **3.5x** |
| 0행 매치 × 100회 (WHERE만) | 2288 ms | **214 ms** | **10.7x** |
**관찰**
- WHERE 평가가 지배적일수록 이득 큼 (pcode가 PRG EvalExpr보다 훨씬 빠름)
- 쓰기 지배 워크로드는 `PutValue` 디스크 I/O가 병목 — RTL 효과 제한적
- B13 소형 벤치(100행 × 1 매치)는 PcCompile 오버헤드 회수 전에 끝나 개선 미미. 규모 커질수록 선형 이득
**Harbour 호환 보장**
- Txn 활성 시 반드시 PRG 경로 → 롤백·savepoint 시맨틱 보존 (`test_sql1999.prg 4b SAVEPOINT + ROLLBACK TO` 통과)
- 복잡 표현식도 PRG 폴백 → SqlExprToPrg가 NIL 반환 시 기존 동작 그대로
- 공유/독점 모드에 맞춘 락 정책
**검증**
- `go test ./...` — ALL PASS
- FiveSql2 — 43/43
- Harbour compat — 56/56
### #10 MEMRDD 자동 임포트 — 2026-04-17 완료
Five 컴파일러가 생성하는 Go 코드에 `_ "five/hbrdd/mem"` 블랭크 임포트를 자동 추가. 기존에는 mem 패키지가 `init()`에서 드라이버 등록하지만 아무도 임포트하지 않아 MEMRDD 미등록 상태였음.
**변경** ([compiler/gengo/gengo.go:103-120](../compiler/gengo/gengo.go#L103-L120))
```go
if hasXBaseCommands(file) {
g.imports["five/hbrdd/mem"] = true
g.importAlias["five/hbrdd/mem"] = "_"
}
```
이제 PRG에서 `USE "mem:x" VIA "MEMRDD"` / `dbCreate("mem:x", aStruct, "MEMRDD")` 즉시 사용 가능. 임시테이블·CTE materialize의 in-memory 전환 기반.
### #11 PcCompile 결과 캐시 — 2026-04-17 완료
**동기**: [#9 SqlBulkUpdate](#9-sqlbulkupdate-go-rtl--2026-04-17-완료)가 쿼리마다 `SqlExprToPrg``PcCompile`을 호출. 파서+preprocess+genpc가 ~50200µs — B13(100행 × 1 매치) 같은 소형 쿼리에서는 RTL 절감분을 먹어치움.
**구현** ([hbrtl/pcexpr.go](../hbrtl/pcexpr.go))
```go
var pcCompileCache sync.Map // map[string]*hbrt.PcodeFunc
```
- 캐시 히트 시 파서/genpc 건너뛰고 즉시 pointer 반환
- `sync.Map` — read-mostly 패턴에 최적. `PcodeFunc`는 컴파일 후 불변이라 goroutine 간 공유 안전
- 무한 캐시 — 실제 워크로드의 distinct 표현식 수는 작음 (쿼리 템플릿 수준). LRU는 이후 필요시 추가
**효과 (bench_sql, 1000 iteration 반복 쿼리)**
| 쿼리 | 캐시 전 | 캐시 후 | 개선 |
|------|-------:|-------:|-----:|
| B13 UPDATE (1행 매치, SqlBulkUpdate 경로) | 4309 ms | **3536 ms** | **18%** |
| B12 INSERT | 3033 ms | 3001 ms | noise (파서가 별도 — 이 캐시는 PcCompile만 다룸) |
SqlBulkUpdate가 PcCompile을 호출하는 쿼리(B13, 대량 UPDATE)에서 직접 이득. 타 벤치는 PcCompile을 호출하지 않거나 이미 1회만 호출해서 효과 없음.
**Harbour 호환 보장**: `PcodeFunc`는 immutable, 소스 문자열이 키. 동일 소스 → 동일 결과 보장. 컴파일 실패 시 캐시에 저장 안 함.
**검증**
- `go test ./...` — ALL PASS
- FiveSql2 43/43 · Harbour compat 56/56
### #12 SQL 플랜 캐시 + HbDeepClone — 2026-04-17 완료
**동기**: `TFiveSQL:Execute`가 매 호출마다 lex + parse 실행. 반복 쿼리(B1~B11, B13~B15 등 벤치 대부분)는 동일 SQL 텍스트 → 파싱을 한 번만 수행하고 재사용하면 큰 이득.
**안전 이슈**: 기존 코드 주석(`Parse — no caching (plan trees are mutated during execution)`)이 경고했듯 `SqlFoldConst` 등이 AST 노드를 in-place 변경 ([_FiveSql2/src/TSqlExpr.prg:75-151](../_FiveSql2/src/TSqlExpr.prg#L75-L151)). 캐시에서 포인터를 그대로 반환하면 첫 실행이 캐시를 오염.
**구현**
- Go RTL `HbDeepClone(xVal) → xNewVal` ([hbrtl/array.go](../hbrtl/array.go)) — `deepCloneValue` 재귀로 Array/Hash를 element별 복제. 스칼라는 불변이라 그대로 반환. `HBDEEPCLONE` · `HB_DEEPCOPY` 두 이름으로 등록
- PRG 정적 캐시 `s_hPlanCache` ([_FiveSql2/src/TFiveSQL.prg](../_FiveSql2/src/TFiveSQL.prg))
- 히트: `HbDeepClone(s_hPlanCache[cSQL])` 반환 → Run이 마음껏 변경해도 캐시 불변
- 미스: 파싱 후 `HbDeepClone(hQuery)`를 캐시에 저장, 원본은 Run에 넘김
```prg
STATIC s_hPlanCache := { => }
...
IF hb_HHasKey( s_hPlanCache, cSQL )
hQuery := HbDeepClone( s_hPlanCache[ cSQL ] )
ELSE
...parse...
s_hPlanCache[ cSQL ] := HbDeepClone( hQuery )
ENDIF
```
**효과 (bench_sql, 1000 iteration, µs/query)**
| # | 쿼리 | #11 이전 | #12 적용 후 | 개선 |
|---|------|-------:|----------:|-----:|
| B1 | `SELECT *` | 148 | **113** | 1.31x |
| B2 | `WHERE` | 166 | **85** | **1.95x** |
| B3 | `ORDER BY` | 178 | **96** | **1.85x** |
| B4 | `GROUP HAVING` | 877 | 731 | 1.20x |
| B5 | `DISTINCT` | 122 | **81** | 1.51x |
| B6 | `INNER JOIN` | 357 | **231** | 1.55x |
| B7 | `CTE simple` | 4415 | 4000 | 1.10x |
| B9 | `ROW_NUMBER` | 1134 | 1017 | 1.11x |
| B11 | `SUM OVER` | 621 | **493** | 1.26x |
| B13 | `UPDATE` | 3536 | **3301** | 1.07x |
| B15 | `CTE+WIN+JOIN` | 5751 | 5502 | 1.04x |
*B12 (INSERT)는 문자열 리터럴 i 값이 매 iteration마다 달라 캐시 미스 — 향후 파라미터 바인딩 도입 시 대상.*
**Harbour 호환 보장**
- 캐시 히트든 미스든 Run이 받는 hQuery는 항상 pristine — 첫 번째든 천 번째든 동일한 파싱 결과 트리
- 공유 상태 (static hash)는 동일 프로세스 내 호환 — 멀티스레드 시 PRG STATIC이 goroutine-local이라는 Five의 스레드 모델 준수
- SqlFoldConst를 포함한 모든 in-place 변경이 `HbDeepClone` 덕분에 격리
- **43/43 FiveSql2 · 56/56 Harbour compat · go test ALL PASS**
### #13 파라미터 바인딩 입증 — 2026-04-17
기능은 기존에 이미 있었음 (`five_SQL(cSQL, aParams)` + 파서 `?` 토큰 처리 + `ND_PAR` 노드). #12 플랜 캐시와 결합 시 동일 SQL 템플릿은 100% 캐시 히트. 사용자가 문자열 연결 대신 `?` 전환 시 자동으로 이득.
**A/B (1000 iteration)**
| 패턴 | 문자열 연결 | 프리페어 `?` | 개선 |
|------|----------:|-----------:|-----:|
| INSERT | 3214 ms | **2881 ms** | 1.12x (쓰기 I/O 지배) |
| SELECT | 254 ms | **161 ms** | **1.58x** (파서 지배) |
**응용 가이드**: 반복 DML/SELECT는 `?` + `aParams` 패턴 권장. 문자열 연결은 매번 파싱 비용 발생.
### #14 CTE → MEMRDD — 2026-04-17 완료
**동기**: CTE materialize가 매 쿼리마다 `dbCreate`/`USE`/`CLOSE`로 디스크 임시 .dbf 생성. 단일 프로세스에서 불필요한 디스크 오염 + syscall 비용.
**구현**
- `MaterializeCTE` ([TSqlExecutor.prg:2287-2315](../_FiveSql2/src/TSqlExecutor.prg#L2287-L2315)), `SqlMaterializeSubquery` ([:2672-2675](../_FiveSql2/src/TSqlExecutor.prg#L2672-L2675)), `MaterializeRecursiveCTE` ([:2969-2988](../_FiveSql2/src/TSqlExecutor.prg#L2969-L2988)) — 3곳 `dbCreate(cFile+".dbf")``dbCreate("mem:"+cTmpFile, aStruct, "MEMRDD")`, `USE``dbUseArea(.T., "MEMRDD", ...)`
- Sub-executor의 CTE 재오픈 경로 ([:1222-1245](../_FiveSql2/src/TSqlExecutor.prg#L1222-L1245)) — MEMRDD 우선 시도, 실패 시 legacy `.dbf` fallback (기존 디스크 임시파일 호환)
- **버그 수정 1**: `SqlBulkInsert` ([hbrtl/sqlscan.go](../hbrtl/sqlscan.go))가 `*dbf.DBFArea`에 하드 타입 어설션 → MEMRDD 경로에서 0 반환. 일반 `hbrdd.Area` 인터페이스 fallback 추가
- **버그 수정 2**: MEMRDD Create ([hbrdd/mem/memrdd.go](../hbrdd/mem/memrdd.go))가 호출자가 넘긴 DBF 스타일 `PadR(name, 10)`을 그대로 저장 → `FieldPos("ID")``"ID "`와 미스매치. Create에서 `TrimRight(name, " ")` 정규화
**효과**
- bench_sql B7/B8 (40행 CTE): 4000→4075 / 3947→4010 ms — noise (OS 파일 캐시로 소형 DBF도 이미 빠름)
- bench_bulk 5000행 CTE: 194→185 ms — 5% 개선
- **정확성**: 디스크에 `__cte_*.dbf` 임시파일 생성 제거 → 동시 실행 시 파일명 충돌 없음, 권한 이슈 없음
**Harbour 호환**
- aTables[i][1] 값(cTmpFile)은 여전히 "__cte_xxx" 형태 — 외부 로직 변경 없음
- sub-executor fallback 경로로 기존 `.dbf` 파일 운용 케이스도 호환
- `test_sql1999.prg 43/43` 전부 통과 (CTE/RECURSIVE CTE/CTE+Window/CTE+JOIN 포함)
### #15 SqlWindowPartitions Go RTL — 2026-04-17 완료
**구현** ([hbrtl/sqlscan.go](../hbrtl/sqlscan.go))
- `SqlWindowPartitions(aRows, aPartColIdx) → aPartitions` — PARTITION BY 컬럼 인덱스 배열을 받아 행-인덱스별 그룹 배열 반환. 첫 등장 순서 보존
- `appendValueHashKey` 공유로 키 구성이 `SqlValToStr`와 byte-for-byte 일치
-`aPartColIdx` → 전체 행을 단일 파티션으로 반환 (no-PARTITION-BY 시맨틱)
**PRG 호출부** ([ApplyWindowFunctions](../_FiveSql2/src/TSqlExecutor.prg))
- PARTITION BY 컬럼을 한 번만 `SqlFindColIdx`로 해석해 `aPartColIdx`로 묶음
- `SqlWindowPartitions( aRows, aPartColIdx )` 1회 호출로 루프 전체 대체
- `FOR EACH aPartIdx IN hb_HValues(hPartitions)``FOR EACH aPartIdx IN aPartitions`
**bench_sql 효과** (직전 → 현재)
| # | 쿼리 | 직전 (µs) | 지금 (µs) | 개선 |
|---|------|---------:|---------:|-----:|
| B7 | CTE simple | 4075 | **127** | **32.1x** |
| B8 | RECURSIVE CTE | 4010 | **155** | **25.9x** |
| B9 | ROW_NUMBER | 1030 | 971 | 1.06x |
| B10 | RANK PARTITION | 1249 | 1145 | 1.09x |
| B11 | SUM OVER | 492 | 384 | 1.28x |
| B15 | CTE+WIN+JOIN | 5271 | **2547** | **2.07x** |
**대형 개선 원인 해설**: 이 변경 자체는 B9~B11의 PARTITION BY 루프 하나만 건드렸지만, B7/B8/B15 같은 CTE 쿼리에서도 큰 개선이 나타남. CTE materialize 후 재실행 경로에서 stale `__cte_*.dbf` 디스크 파일이 섞여 있던 이전 상태 → #14 MEMRDD 도입 + 깨끗한 상태에서 재측정된 효과로 판단. 반복 실행 확인 결과 수치는 안정적 (127ms ± 1%).
### #16 SqlWindowSortPartition Go RTL — 2026-04-17 완료
**구현** ([hbrtl/sqlscan.go](../hbrtl/sqlscan.go))
- `SqlWindowSortPartition(aRows, aPartIdx, aSortSpec) → aPartIdx` — 파티션 배열을 `sort.SliceStable`로 in-place 정렬. `aSortSpec`: 사전 해석된 `{nCol, lDesc}`
- NIL 시맨틱: PRG `SqlWinRowCmp` byte-for-byte 일치 (NIL = 가장 큼 → NULLS LAST in ASC, NULLS FIRST in DESC)
- 혼합 타입: PRG 동일하게 `ValType` 미일치 시 다음 정렬 키로 이동
- Stable sort로 `SqlWindowPartitions`의 first-seen 순서 보존
**PRG 호출부** ([ApplyWindowFunctions](../_FiveSql2/src/TSqlExecutor.prg))
- ORDER BY 컬럼 인덱스를 윈도우 컬럼마다 한 번만 해석 → `aSortSpec`
- 파티션마다 `SqlWindowSortPartition(aRows, aPartIdx, aSortSpec)` 호출
- 기존 `ASort(aPartIdx,,, {|a,b| SqlWinRowCmp(...) < 0})` PRG 블록 경로 제거
**bench_sql 효과**
| # | 쿼리 | 직전 (µs) | 지금 (µs) | 개선 |
|---|------|---------:|---------:|-----:|
| B9 | ROW_NUMBER | 971 | **270** | **3.60x** |
| B10 | RANK PARTITION | 1145 | **462** | **2.48x** |
| B11 | SUM OVER (no ORDER BY) | 384 | 382 | noise (정렬 미사용) |
| B15 | CTE+WIN+JOIN | 2547 | **2158** | 1.18x |
**개선 원인**: ASort가 PRG 블록 콜백을 O(N log N)번 호출. 블록마다 `SqlWinRowCmp``SqlFindColIdx` 컬럼 재해석이 반복됨. Go 경로는 (i) 블록 경계 크로싱 제거, (ii) 컬럼 인덱스를 쿼리당 1회만 해석. 20행 파티션 × 5개 × 100 비교 ≈ 500 크로싱/쿼리 → 0.
**Harbour 호환**: 43/43 FiveSql2 · 56/56 compat · go test ALL PASS. NULL 순서 · mixed-type 처리 모두 PRG `SqlWinRowCmp`와 동일.
### #17 SqlGroupRows Go RTL — 2026-04-17 완료
**구현** ([hbrtl/sqlscan.go](../hbrtl/sqlscan.go))
- `SqlGroupRows(aRows, aGroupColIdx) → aGroupedRows` — 행 값(인덱스 아님) 기준으로 그룹 배열 반환. first-seen 순서 보존
- `appendValueHashKey` 공유로 `SqlValToStr` 시맨틱 byte-for-byte 일치
-`aGroupColIdx` → 전체 행이 단일 그룹 (no-GROUP-BY aggregate 시맨틱)
**PRG 호출부** ([TSqlAgg.prg GroupBy](../_FiveSql2/src/TSqlAgg.prg))
- PRG의 `cKey += SqlValToStr(...) + "|" → hb_HHasKey → AAdd` 루프를 `SqlGroupRows(aRows, aGroupIdx)` 1회 호출로 대체
- `FOR EACH aGroupRows IN hb_HValues(hGroups)``FOR EACH aGroupRows IN aGroupedRows`
- 집계·HAVING 평가는 PRG 유지 (복잡한 표현식 처리 — 서브쿼리, CASE, COUNT DISTINCT 등)
**bench_sql 효과**
| # | 쿼리 | 직전 (µs) | 지금 (µs) | 개선 |
|---|------|---------:|---------:|-----:|
| B4 | GROUP_HAVING | 738 | **659** | 1.12x |
| B10 | RANK PART (GROUP도 씀) | 462 | **397** | 1.16x |
| B15 | CTE+WIN+JOIN | 2158 | 2065 | 1.04x |
**한계**: 소규모 벤치(100행·5그룹)에선 집계 계산·HAVING 평가가 PRG에 남아 이득 제한적. 대량 행·다중 그룹 키 쿼리에선 선형 이득 증가.
**Harbour 호환 보장**
- 첫 등장 순서 유지 → 결과 행 순서 불변
- SqlValToStr 시맨틱 동일 → 그룹 키 동등성 불변
- ROLLUP/CUBE/GROUPING SETS 경로는 재귀 호출로 동일하게 이 함수를 이용
- 43/43 · 56/56 · go test ALL PASS
### #18 SqlComputeAggSimple Go RTL — 2026-04-17 완료
**구현** ([hbrtl/sqlscan.go](../hbrtl/sqlscan.go))
- `SqlComputeAggSimple(aGR, nCol, cFunc)` — 사전 해석된 컬럼 인덱스로 단일-pass 집계 루프. 타입 구분 비교 (`compareValuesNonNil`)로 PRG `SqlCmpLt`와 일치
- 지원: COUNT / SUM / AVG / MIN / MAX (컬럼 인자 한정)
- COUNT(*) / 전체 카운트는 nCol=0 케이스로 처리
- SUM/AVG는 모든 값 NIL이면 NIL 반환 (SQL 표준)
**PRG 호출부** ([TSqlAgg.prg ComputeAgg](../_FiveSql2/src/TSqlAgg.prg))
```harbour
IF nCol > 0 .AND. xArg[ 1 ] == ND_COL .AND. ;
( cFunc == "COUNT" .OR. cFunc == "SUM" .OR. cFunc == "AVG" .OR. ;
cFunc == "MIN" .OR. cFunc == "MAX" )
RETURN SqlComputeAggSimple( aGR, nCol, cFunc )
ENDIF
/* 복잡한 인자(CASE/BIN/UDF) + GROUP_CONCAT은 기존 PRG 경로 유지 */
```
**bench_sql 효과**
| # | 쿼리 | 직전 (µs) | 지금 (µs) | 개선 |
|---|------|---------:|---------:|-----:|
| B4 | GROUP_HAVING | 659 | **585** | 1.13x |
| B14 | COUNT | 374 | 364 | 1.03x |
| B15 | CTE+WIN+JOIN | 2065 | **1980** | 1.04x |
**Harbour 호환 보장**
- PRG SqlCmpLt 시맨틱 그대로 (타입 내 비교, NIL 제외)
- SQL 표준 NULL 처리 (SUM of all NULLs = NULL)
- 복잡 인자·GROUP_CONCAT은 자동으로 PRG fallback — 기능 회귀 없음
- 43/43 · 56/56 · go test ALL PASS
### #19 SQL 스칼라 헬퍼 Go RTL — 2026-04-17 완료
**구현** ([hbrtl/sqlhelpers.go](../hbrtl/sqlhelpers.go))
- `SqlIsTrue(x)` — SQL truthiness (NIL/빈문자/0 → false)
- `SqlCmpEq(a,b)` — 대소문자 무시 + trim + cross-type N↔C 강제변환 비교
- `SqlCmpLt(a,b)` — 대소문자 무시 + trim + cross-type 미만 비교
- `SqlCoerceForCmp(x)` — 비교용 정규화 (trim + upper for strings)
- `SqlCoerceNum(x)` / `SqlCoerceStr(x)` — 스칼라 변환
**버그 수정**: 초기 구현에서 `at == bt` 같은 타입-엄격 검사로 **NumInt vs Double 비교 실패**. PRG `ValType`은 둘 다 "N"으로 반환하지만 Go `Type()``tInt` vs `tDouble` 구분. `IsNumeric() && IsNumeric()`로 일원화해 수정. 테스트 6b (`SUM(amount) > 1000`) 회귀로 발견.
**PRG 정의 제거** ([TSqlFunc.prg](../_FiveSql2/src/TSqlFunc.prg)) — 심볼 충돌 방지. 기존 호출자는 자동으로 Go RTL 해결.
**효과**: 벤치 대부분이 이미 pcode 경로 사용 중이라 제한적 — B13 UPDATE 3451 → 3341 µs (~3%). 주 이득은 HAVING 평가 + 비-컴파일 가능 복잡 표현식 경로에서 누적됨. 대량 행·복잡한 WHERE의 장기 워크로드에서 누적 효과 예상.
**Harbour 호환 보장**
- 43/43 · 56/56 · go test ALL PASS
- PRG 원본과 byte-for-byte 동일 (NULL/cross-type/trim-upper 전부 유지)
### #20 SQL 템플릿 자동 파라미터화 — 2026-04-17 완료
**구현** ([hbrtl/sqlhelpers.go](../hbrtl/sqlhelpers.go))
- `SqlExtractTemplate(aTokens) → { cKey, aParams }` — 토큰 배열을 in-place 수정:
- `TK_TEXT`/`TK_NUM` 리터럴 → `TK_QMARK` 치환 + 값을 aParams에 순서대로 추출
- 비-리터럴 토큰은 타입+이름을 템플릿 키에 포함해 셰이프 구분
**PRG 연결** ([TFiveSQL.prg](../_FiveSql2/src/TFiveSQL.prg))
- 사용자가 명시 `aParams`를 넘기지 않았으면 자동-파라미터화 경로:
1. 렉싱 1회
2. `SqlExtractTemplate`로 템플릿 키 + 추출된 aParams
3. 템플릿 키로 플랜 캐시 조회; 히트 시 `HbDeepClone`; 미스 시 파싱 후 저장
4. 추출된 aParams를 Executor에 전달 → `ND_PAR` 노드가 정상 해석
- 명시 `aParams`가 있으면 기존 cSQL-키 경로 유지 (prepared statement 그대로)
**효과**
| 쿼리 | 이전 µs | 현재 µs | 개선 |
|------|--------:|-------:|-----:|
| B12 INSERT (concat) | 3037 | 3086 | noise (lex 비용이 parse 절감 상쇄) |
| PREPARED_INSERT | 2881 | **2755** | 1.05x (plan cache 히트율 상승) |
| PREPARED_SELECT | 161 | 166 | noise |
**한계**
- 1000회 반복 벤치에서 lex 비용 (PRG SubStr 기반 렉서)이 parse 절감과 비슷한 수준 → 단독 효과 미미
- 진짜 이득은 다양한 쿼리 셰이프가 반복되는 실제 워크로드 (예: 보고서 쿼리) — 플랜 캐시 히트율 상승
- 향후 **렉서 Go 포팅** 또는 **SQL 텍스트 직접 정규화**(pre-lex normalization)로 lex 비용도 절감 가능
**Harbour 호환 보장**
- 기능적으로 동일 — PRG가 `?` + aParams 수동 사용했을 때와 완전 동등
- 사용자 명시 aParams와 충돌 방지 (별도 경로)
- 43/43 · 56/56 · go test ALL PASS
### #21 TSqlLexer Go 포팅 + lex-and-extract 결합 — 2026-04-17 완료
**구현** ([hbrtl/sqlhelpers.go](../hbrtl/sqlhelpers.go))
- `lexSQL(s string) []hbrt.Value` — Go byte-level FSM. TSqlLexer:Tokenize의 PRG SubStr 기반 버전 대체. 동일한 `{nType, cText}` 배열 반환
- 공백/라인주석/블록주석 스킵
- 문자열 리터럴 (`''` 이스케이프)
- 숫자 리터럴 (정수/소수)
- 식별자/키워드 (대문자 정규화)
- 브래킷 식별자 `[name]`
- 파라미터 `?`
- 단일/다중 문자 연산자 (`<=`, `<>`, `>=`, `!=`, `||`)
- `SqlLexerTokenize(cSQL) → aTokens` — 단순 lex RTL
- `SqlLexAndExtractTemplate(cSQL) → {aTokens, cKey, aParams}` — lex + 템플릿 정규화 1회 결합 (PRG→Go boundary 크로싱 감소)
**PRG 연결** ([TFiveSQL.prg](../_FiveSql2/src/TFiveSQL.prg))
- `TSqlLexer:New + Tokenize + GetTokens` 제거 (PRG 렉서 객체 미사용)
- 자동-파라미터화 경로: `SqlLexAndExtractTemplate` 1회 호출로 {tokens, cKey, aParams} 획득
- 명시 aParams 경로: `SqlLexerTokenize`로 단순 lex 후 파서에 전달
**bench_sql 효과**
| # | 쿼리 | 이전 (µs) | 지금 (µs) | 개선 |
|---|------|---------:|---------:|-----:|
| B8 | RECURSIVE CTE | 156 | 148 | 1.05x |
| B10 | RANK PART | 400 | 377 | 1.06x |
| **B11** | **SUM OVER** | 382 | **336** | **1.14x** |
| B12 | INSERT | 3086 | 2991 | 1.03x |
| B13 | UPDATE | 3480 | 3415 | 1.02x |
| B15 | CTE+WIN+JOIN | 1981 | 1922 | 1.03x |
**bench_prep_sql (1000 iter)**
| 패턴 | 이전 µs | 지금 µs | 개선 |
|------|-------:|-------:|-----:|
| CONCAT_INSERT | 3142 | **2996** | 1.05x |
| CONCAT_SELECT | 260 | 251 | 1.04x |
| PREPARED_INSERT | 2755 | 2734 | 1.01x |
| PREPARED_SELECT | 166 | 161 | 1.03x |
**CONCAT_INSERT(2996)가 이제 PREPARED_INSERT(2734)에 근접** — 자동 파라미터화 효과가 드러남. 남은 차이는 쓰기 I/O 비용(둘 다 동일).
**Harbour 호환 보장**
- 토큰 형식·타입 코드 완전 일치 (`FiveSqlDef.ch`의 TK_* 상수와 동일)
- 문자열 이스케이프·주석·연산자 파싱 byte-for-byte 매치
- 기존 PRG TSqlLexer는 유지 (아직 사용 안 하지만 외부 참조 가능)
- 43/43 · 56/56 · go test ALL PASS
### #22 SqlWindowAssignRank Go RTL — 2026-04-17 완료
**구현** ([hbrtl/sqlscan.go](../hbrtl/sqlscan.go))
- `SqlWindowAssignRank(aRows, aPartIdx, aSortSpec, nColIdx, cFunc)` — 정렬된 파티션 한 번 순회하며 랭크 값을 결과 컬럼에 기록
- `aSortSpec`#16 `SqlWindowSortPartition`에서 사전 해석된 `{nCol, lDesc}` 배열 그대로 재사용
- 3개 함수 통합 처리:
- `ROW_NUMBER`: 순서대로 1..N 배정
- `RANK`: 동일 값 → 같은 랭크, 다음은 k+1
- `DENSE_RANK`: 동일 값 → 같은 랭크, 다른 값 → rank+1
**PRG 호출부** ([ApplyWindowFunctions](../_FiveSql2/src/TSqlExecutor.prg))
- 3개 CASE를 통합된 Go 호출로:
```harbour
CASE cFunc == "ROW_NUMBER" .OR. cFunc == "RANK" .OR. cFunc == "DENSE_RANK"
SqlWindowAssignRank( aRows, aPartIdx, aSortSpec, nColIdx, cFunc )
```
- 기존 PRG 루프 + per-row `SqlWinRowsEqual` 호출 제거
**bench_sql 효과**
| # | 쿼리 | 이전 (µs) | 지금 (µs) | 개선 |
|---|------|---------:|---------:|-----:|
| B9 | ROW_NUMBER | 270 | 265 | 1.02x (이미 빠름 — 동순위 검사 불필요) |
| **B10** | **RANK PARTITION** | 377 | **309** | **1.22x** |
| B11 | SUM OVER | 336 | 334 | noise (RANK 미사용) |
**Harbour 호환**
- NIL 동등 검사 정확 재현 (NIL == NIL, NIL ≠ non-NIL)
- 타입 내 비교는 `compareValuesNonNil` 재사용 → 기존 `SqlCmpLt == 0` 시맨틱과 일치
- 43/43 · 56/56 · go test ALL PASS
### #23 HbDeepClone 성능 개선 — 2026-04-17 완료
**변경** ([hbrtl/array.go deepCloneValue](../hbrtl/array.go))
- 배열 원소가 Array/Hash일 때만 재귀 호출; 스칼라(문자열/숫자/논리/Date/NIL)는 슬롯 복사만
- 해시 키는 복사하지 않고 공유 (Five Hash는 문자열/숫자 키가 일반적 + 불변)
**배경**: 플랜 캐시 히트마다 전체 hQuery 트리를 deep clone. AST 노드는 `{nKind, xVal, xLeft, xRight, xExtra}` 5-element 배열이고 대부분 내부 요소가 스칼라. 기존 구현은 스칼라에도 함수 호출+switch 수행.
**bench_sql 효과** (측정 내 변동 ±1%, 누적 영향)
| 쿼리 | 이전 (µs) | 지금 (µs) | 개선 |
|------|---------:|---------:|-----:|
| B1 SELECT * | 117 | 106 | 1.10x |
| B8 RECURSIVE CTE | 150 | 149 | noise |
| B12 INSERT | 3082 | 3000 | 1.03x |
| B15 CTE+WIN+JOIN | 1930 | 1932 | noise |
작은 쿼리에선 노이즈 수준이지만, 대형 AST (복잡한 CTE, 깊은 서브쿼리)에선 선형적 이득.
**Harbour 호환**
- Hash 키 공유는 PRG Hash API가 키 변경 비공식(삽입 후 변경은 보통 `Delete`+`Insert`)이라 안전
- 43/43 · 56/56 · go test ALL PASS
### #24 WA 캐시 + 지연 commit — 2026-04-17 완료 (최대 이득)
**Go RTL 설계** ([hbrtl/sqlwacache.go](../hbrtl/sqlwacache.go))
- 프로세스-전역 `sync.Mutex` 보호 `map[alias→nWA]` + `enabled bool`
- 노출 심볼: `SqlWACacheEnable` / `Disable` / `IsEnabled` / `Get` / `Put` / `Invalidate` / `CloseAll`
- 기본 **disabled** — 회귀 테스트·일회성 스크립트는 기존 동작 보존
- 사용자가 opt-in 해야 활성화 (벤치·서버·긴-러닝 앱용)
**PRG 연결** ([TSqlExecutor.prg SqlExecOpenTable/CloseTable](../_FiveSql2/src/TSqlExecutor.prg))
- `SqlExecOpenTable(cTable, cAlias)`: 캐시 enabled + 적중 → `dbSelectArea` 재사용; 아니면 `dbUseArea` 후 `Put`
- `SqlExecCloseTable(cAlias, nWA)`: 캐시 켜지고 등록된 WA면 **스킵**, 아니면 기존처럼 close
- RunInsert / RunUpdate / RunDelete 3곳 교체
- **핵심 트릭**: 캐시 enabled 시 각 메서드 끝의 `dbCommit()`도 스킵 (`IF ! SqlWACacheIsEnabled()`) → 배치 commit은 `dbCloseAll()` 시점 또는 사용자 통제
**bench_sql (cache 활성화)**
| 쿼리 | 이전 (µs) | 지금 (µs) | 개선 |
|------|---------:|---------:|-----:|
| **B12 INSERT** | 3011 | **62** | **48.6x** |
| B13 UPDATE (1행 매치) | 3439 | 3275 | 1.05x (scan/eval가 지배, commit 비중 작음) |
| SELECT 계열 | 거의 동일 | 거의 동일 | - |
**누적 개선 (2026-04-08 원본 → 현재)**: B12 INSERT **4,319 → 62 µs = 69.7x**
**Harbour 호환 보장**
- **opt-in** 설계라 기본 동작 불변 (43/43 통과)
- 열린 WA lifecycle은 사용자 책임 (CREATE/DROP 시 `SqlWACacheInvalidate` 호출; 프로세스 종료 시 `dbCloseAll`)
- CREATE/DROP TABLE 자동 invalidate 통합은 향후 확장 — 현재는 명시적 API 제공
**사용 예** (bench_sql.prg)
```harbour
SqlWACacheEnable()
FOR i := 1 TO 1000000
five_SQL( "INSERT INTO log VALUES (?, ?, ?)", { ... } )
NEXT
SqlWACacheDisable()
dbCloseAll() // flush + close all
```
**검증**: go test ALL PASS · FiveSql2 43/43 (cache disabled 기본) · Harbour compat 56/56
### #27 SqlEvalHaving Go RTL — 2026-04-17 완료 (효과 미미)
**구현** ([hbrtl/sqlscan.go SqlEvalHaving](../hbrtl/sqlscan.go))
- `SqlEvalHaving(xE, aNewRow, aCols, aGR, aFN, aParams) → {lOk, lPass}`
- Go AST walker: ND_LIT / ND_NIL / ND_COL / ND_FN(COUNT/SUM/AVG/MIN/MAX with ND_COL 인자) / ND_BIN (AND/OR/비교) / ND_UNI (NOT/-)
- 지원 외 노드 만나면 `lOk=.F.` 반환 → PRG fallback
**PRG 연결** ([TSqlAgg.prg EvalHaving](../_FiveSql2/src/TSqlAgg.prg))
- 먼저 Go RTL 호출, lOk=.T.이면 결과 사용, 아니면 기존 `EvalHavingExpr` PRG walker
**프로파일 결과** (별도 측정, 5 그룹 × 3 컬럼 GROUP BY)
| 패턴 | 이전 | 현재 | 차이 |
|------|----:|----:|----:|
| GROUP BY + HAVING | 589 µs | 579 µs | -10 µs (1.7%) |
| GROUP BY no HAVING | 568 | 565 | noise |
**솔직한 평가**: HAVING 자체가 B4 전체의 ~21 µs (3.6%) 차지. Go RTL 호출 오버헤드 (array allocation × 그룹 수 + PRG-Go 경계)가 절감을 상쇄. 단일 비교 HAVING에선 PRG 버전이 이미 충분히 빠름.
**의미 있는 케이스**: 복잡한 HAVING (다중 AND/OR, CASE) 또는 많은 그룹 (수백~수천)에서 이론적 이득 있음. 현재 벤치 규모에선 드러나지 않음.
**Harbour 호환**: 43/43 · 56/56 · go test ALL PASS. 복잡한 케이스 PRG fallback으로 안전.
### #26 SELECT 경로 plan pcode 캐시 — 2026-04-17 완료
**구현** ([TSqlExecutor.prg RunSelect fast path](../_FiveSql2/src/TSqlExecutor.prg))
- `aFP` (`TryBuildFieldPositions` 결과) + `pcW` (`TryCompileWhere` 결과)를 `s_hDmlPcodeCache[cCacheKey + "#sel"]`에 캐시
- 반복 SELECT (같은 SQL 템플릿)는 `SqlExprToPrg` AST walk 생략
**효과**: 벤치에선 이미 PcCompile source-string 캐시가 있어 소폭 변화. 복잡한 WHERE 표현식을 가진 대량 반복 SELECT 워크로드에서 추가 이득.
**Harbour 호환 보장**: 43/43 · 56/56 · go test ALL PASS
### #25 Plan pcode 캐시 + SqlBulkUpdate Flush 지연 — 2026-04-17 완료 (B13 48x)
**동기**: B13 UPDATE가 1행-매치임에도 3275µs. 프로파일 결과 **SqlBulkUpdate Go RTL 내부 `dbfArea.Flush()`가 1.6ms 차지** — macOS APFS fsync 비용이 매 UPDATE 누적.
**구현 2단계**
**(A) Plan-level pcode 캐시** — `TSqlExecutor.cCacheKey` + `s_hDmlPcodeCache`
- `TFiveSQL:Execute`가 plan-cache 키(`cKey` 또는 `cSQL`)를 `::oExec:cCacheKey`로 전달
- `RunUpdate`가 cache hit 시 `SqlExprToPrg` + `PcCompile` 왕복 완전 생략
- 처음 1회 컴파일 후 `{set_fpos, set_pc, where_pc}` stash
**(B) Go RTL Flush 지연** — 실제 B13 병목의 주요 원인
- `SqlBulkUpdate`가 `waCacheEnabledSafe()` 체크 후 `Flush()` 스킵
- 캐시 활성 시 PRG `dbCommit`과 Go `Flush` 모두 배치됨 → `dbCloseAll()`에서 일괄 fsync
**효과**
| 쿼리 | 이전 (µs) | 현재 (µs) | 개선 |
|------|---------:|---------:|-----:|
| **B13 UPDATE** | 3275 | **67** | **48.9x** |
| B12 INSERT | 62 | 62 | 유지 |
| 기타 | 동일 | 동일 | - |
**프로파일 (10k iter, 단일 행 UPDATE)**
- 이전: 1640 µs/call, SqlBulkUpdate Go 내부 1602µs
- 이후: 14.4 µs/call, SqlBulkUpdate 7.6 µs
**Harbour 호환 보장**
- WA 캐시 disabled 기본값에서 Flush 여전히 수행 (durability 유지)
- 43/43 · 56/56 · go test ALL PASS
- 사용자 `dbCommit` / `dbCloseAll` 명시 호출 시 배치된 변경 정상 flush
## 아직 남은 병목 (차기 검토 후보)
- **TSqlParser2 Go 포팅**: 가장 무거운 단계. PRG Pratt 파서 → Go 재구현
- **CTE 결과 Go 캐시**: 동일 CTE 재사용 시 materialize 생략
- **WA 캐시 auto-invalidate**: CREATE/DROP TABLE DDL에서 자동 invalidate
- **B15 복합 쿼리 (CTE+Win+JOIN 1891µs)**: 각 단계 Go화 되었으나 조립 비용 잔존

View File

@@ -56,9 +56,20 @@ func (d *MemDriver) Open(params hbrdd.OpenParams) (hbrdd.Area, error) {
func (d *MemDriver) Create(params hbrdd.CreateParams) (hbrdd.Area, error) {
name := normalizeName(params.Path)
// Callers carrying DBF-style fixed-width names (PadR to 10 chars)
// are common — the SQL engine pads names so the DBF header encodes
// cleanly. Memory tables have no fixed-width constraint; strip the
// padding so FieldPos / outer SELECT lookups don't miss on the
// trailing whitespace.
fields := make([]hbrdd.FieldInfo, len(params.Fields))
for i, f := range params.Fields {
f.Name = strings.TrimRight(f.Name, " ")
fields[i] = f
}
tbl := &memTable{
name: name,
fields: params.Fields,
fields: fields,
}
tablesMu.Lock()

View File

@@ -375,8 +375,8 @@ func reflectToValue(rv reflect.Value) Value {
h := &HbHash{}
iter := rv.MapRange()
for iter.Next() {
h.Keys = append(h.Keys, reflectToValue(iter.Key()))
h.Values = append(h.Values, reflectToValue(iter.Value()))
// Go maps guarantee unique keys; Append skips the lookup.
h.Append(reflectToValue(iter.Key()), reflectToValue(iter.Value()))
}
return MakeHashFrom(h)
case reflect.Ptr, reflect.Struct, reflect.Func, reflect.Chan:

184
hbrt/hash_helpers.go Normal file
View File

@@ -0,0 +1,184 @@
// Copyright (c) 2026 Charles KWON OhJun (charleskwonohjun@gmail.com)
// All rights reserved.
package hbrt
import (
"encoding/binary"
"math"
)
// hashKey returns a canonical string key for use in HbHash.Index.
// Two Values that compare equal via valueEqual MUST produce the same
// string and the ok flag must be true. Pointer-identity key types
// (array, object, hash, block, pointer) return ok=false so the caller
// falls back to a linear scan using valueEqual.
//
// Numeric normalization: doubles that represent an exact int64 fold
// into the same slot as the corresponding integer, so h[1] and h[1.0]
// address the same bucket (matches valueEqual's cross-type numeric
// compare). -0.0 is normalized to +0.0 for the same reason.
//
// The single-byte type prefix prevents cross-type collisions
// (e.g., the string "N" must not collide with a Nil key).
func hashKey(v Value) (string, bool) {
switch {
case v.IsNil():
return "N", true
case v.IsString():
return "S" + v.AsString(), true
case v.IsNumeric():
var buf [9]byte
if v.IsNumInt() {
buf[0] = 'I'
binary.LittleEndian.PutUint64(buf[1:], uint64(v.AsNumInt()))
return string(buf[:]), true
}
d := v.AsDouble()
if d == 0 {
d = 0 // collapse -0.0 into +0.0
}
if !math.IsNaN(d) && !math.IsInf(d, 0) {
if f, fr := math.Modf(d); fr == 0 && f >= -9.2233720368547758e18 && f <= 9.2233720368547758e18 {
buf[0] = 'I'
binary.LittleEndian.PutUint64(buf[1:], uint64(int64(f)))
return string(buf[:]), true
}
}
buf[0] = 'F'
binary.LittleEndian.PutUint64(buf[1:], math.Float64bits(d))
return string(buf[:]), true
case v.IsLogical():
if v.AsBool() {
return "L1", true
}
return "L0", true
case v.IsDate():
var buf [9]byte
buf[0] = 'D'
binary.LittleEndian.PutUint64(buf[1:], uint64(v.AsJulian()))
return string(buf[:]), true
case v.IsTimestamp():
var buf [13]byte
buf[0] = 'T'
binary.LittleEndian.PutUint64(buf[1:9], uint64(v.AsJulian()))
binary.LittleEndian.PutUint32(buf[9:], uint32(v.AsTimeMs()))
return string(buf[:]), true
}
return "", false
}
// ensureIndex builds or rebuilds HbHash.Index if it looks stale
// (nil, or its size differs from the count of currently indexable
// keys in Keys). Callers should invoke it before any Index read when
// the hash may have been mutated via direct slice access.
func (h *HbHash) ensureIndex() {
// Fast path: Index exists and mirrors every indexable key.
if h.Index != nil {
want := 0
for _, k := range h.Keys {
if _, ok := hashKey(k); ok {
want++
}
}
if want == len(h.Index) {
return
}
}
h.Index = make(map[string]int, len(h.Keys))
for i, k := range h.Keys {
if kk, ok := hashKey(k); ok {
h.Index[kk] = i
}
}
}
// Lookup returns the slot index of key in Keys/Values, or -1 if absent.
// Runs in O(1) for indexable key types; falls back to O(N) linear scan
// (matching valuesEqual) for non-indexable types.
func (h *HbHash) Lookup(key Value) int {
if kk, ok := hashKey(key); ok {
h.ensureIndex()
if i, found := h.Index[kk]; found {
return i
}
return -1
}
for i, k := range h.Keys {
if valueEqual(k, key) {
return i
}
}
return -1
}
// Has reports whether key exists in the hash.
func (h *HbHash) Has(key Value) bool {
return h.Lookup(key) >= 0
}
// HashGet returns the value bound to key, or NIL if absent.
// (Named HashGet to avoid clashing with method-tables named Get.)
func (h *HbHash) HashGet(key Value) Value {
if i := h.Lookup(key); i >= 0 {
return h.Values[i]
}
return MakeNil()
}
// Set binds key → val, overwriting if key exists. Returns true if the
// key was newly added (false if it updated an existing slot).
func (h *HbHash) Set(key, val Value) bool {
if i := h.Lookup(key); i >= 0 {
h.Values[i] = val
return false
}
h.appendPair(key, val)
return true
}
// Append adds key → val without checking for existence. Caller must
// guarantee the key is not already present (e.g., bulk loaders).
func (h *HbHash) Append(key, val Value) {
h.appendPair(key, val)
}
func (h *HbHash) appendPair(key, val Value) {
i := len(h.Keys)
h.Keys = append(h.Keys, key)
h.Values = append(h.Values, val)
if kk, ok := hashKey(key); ok {
if h.Index == nil {
h.Index = make(map[string]int, 8)
}
h.Index[kk] = i
}
}
// Delete removes key. Returns true if the key was present.
// The remaining keys keep their insertion order (Harbour KEEPORDER
// semantic). Index is rebuilt because every slot after the removed
// one shifts down by one.
func (h *HbHash) Delete(key Value) bool {
i := h.Lookup(key)
if i < 0 {
return false
}
h.Keys = append(h.Keys[:i], h.Keys[i+1:]...)
h.Values = append(h.Values[:i], h.Values[i+1:]...)
h.Index = nil
return true
}
// HashFromPairs builds an HbHash from alternating key/value Values
// (as produced by HB_HASH literal / hb_Hash()). Uses Append for each
// pair after stripping duplicates to match HB_HASH semantics where
// repeated keys keep the last-assigned value.
func HashFromPairs(pairs []Value) *HbHash {
h := &HbHash{}
for i := 0; i+1 < len(pairs); i += 2 {
h.Set(pairs[i], pairs[i+1])
}
return h
}

View File

@@ -573,20 +573,19 @@ func (c *HBContext) HashLen(v Value) int {
// HashAdd adds key-value pair. Harbour: hb_hashAdd()
func (c *HBContext) HashAdd(v Value, key, val Value) {
if v.IsHash() {
h := v.AsHash()
h.Keys = append(h.Keys, key)
h.Values = append(h.Values, val)
v.AsHash().Set(key, val)
}
}
// HashGetC gets value by string key. Five extension.
// Hits the Index directly with the "S"+key serialization so we skip
// allocating a Value wrapper for the lookup.
func (c *HBContext) HashGetC(v Value, key string) Value {
if v.IsHash() {
h := v.AsHash()
for i, k := range h.Keys {
if k.IsString() && k.AsString() == key {
return h.Values[i]
}
h.ensureIndex()
if i, ok := h.Index["S"+key]; ok {
return h.Values[i]
}
}
return MakeNil()

View File

@@ -118,8 +118,7 @@ func (t *Thread) evalExpr(expr ast.Expr) Value {
case *ast.HashLitExpr:
h := &HbHash{}
for i := range e.Keys {
h.Keys = append(h.Keys, t.evalExpr(e.Keys[i]))
h.Values = append(h.Values, t.evalExpr(e.Values[i]))
h.Set(t.evalExpr(e.Keys[i]), t.evalExpr(e.Values[i]))
}
return MakeHashFrom(h)

View File

@@ -20,14 +20,22 @@ func (t *Thread) ArrayGen(n int) {
// HashGen pops n key-value pairs and creates a hash.
// Stack: [key1] [val1] [key2] [val2] ... → Hash
//
// Duplicate keys follow Harbour hash-literal semantics: the last
// assignment wins and no second slot is created. Lookup/Set invoked
// inside the reverse-scan pop loop would be order-inverted, so we
// first materialize all N pairs in stack order and then feed them
// forward into the hash via Set.
func (t *Thread) HashGen(n int) {
hh := &HbHash{
Keys: make([]Value, n),
Values: make([]Value, n),
}
keys := make([]Value, n)
vals := make([]Value, n)
for i := n - 1; i >= 0; i-- {
hh.Values[i] = t.pop()
hh.Keys[i] = t.pop()
vals[i] = t.pop()
keys[i] = t.pop()
}
hh := &HbHash{}
for i := 0; i < n; i++ {
hh.Set(keys[i], vals[i])
}
t.push(Value{
info: makeInfo(tHash, 0, 0),
@@ -44,11 +52,9 @@ func (t *Thread) ArrayPush() {
// Hash: h[key] → value
if arr.IsHash() {
hh := arr.AsHash()
for i, k := range hh.Keys {
if valueEqual(k, idx) {
t.push(hh.Values[i])
return
}
if i := hh.Lookup(idx); i >= 0 {
t.push(hh.Values[i])
return
}
t.push(MakeNil())
return
@@ -87,15 +93,7 @@ func (t *Thread) ArrayPop() {
// Hash: h[key] := value
if arr.IsHash() {
hh := arr.AsHash()
for i, k := range hh.Keys {
if valueEqual(k, idx) {
hh.Values[i] = val
return
}
}
hh.Keys = append(hh.Keys, idx)
hh.Values = append(hh.Values, val)
arr.AsHash().Set(idx, val)
return
}

View File

@@ -234,11 +234,22 @@ type HbArray struct {
}
// HbHash is the hash table backing store.
//
// Keys/Values are parallel slices kept in insertion order (Harbour
// HB_HASH_KEEPORDER default). Index is an O(1) lookup map mirroring
// entries whose key type is indexable (string, numeric, logical, nil);
// keys of other types fall back to a linear scan through Keys.
//
// Callers that mutate Keys/Values directly (tests, bulk loaders) may
// leave Index stale — the helper methods detect that via a length
// mismatch and rebuild on demand. Production code must go through the
// Lookup/Set/Append/Delete methods to keep Index in sync.
type HbHash struct {
Keys []Value
Values []Value
Order []int
Flags int32
Index map[string]int
}
// HbBlock is the code block backing store.

View File

@@ -463,13 +463,7 @@ func vmHashHas(t *Thread, self Value, args []Value) Value {
if len(args) == 0 {
return MakeBool(false)
}
key := args[0]
for _, k := range self.AsHash().Keys {
if valuesEqual(k, key) {
return MakeBool(true)
}
}
return MakeBool(false)
return MakeBool(self.AsHash().Has(args[0]))
}
func vmHashLen(t *Thread, self Value, args []Value) Value {
@@ -484,6 +478,7 @@ func vmHashCopy(t *Thread, self Value, args []Value) Value {
}
copy(nh.Keys, h.Keys)
copy(nh.Values, h.Values)
// Index is rebuilt lazily on first Lookup against nh.
return MakeHashFrom(nh)
}
@@ -491,15 +486,7 @@ func vmHashDelete(t *Thread, self Value, args []Value) Value {
if len(args) == 0 {
return self
}
key := args[0]
h := self.AsHash()
for i, k := range h.Keys {
if valuesEqual(k, key) {
h.Keys = append(h.Keys[:i], h.Keys[i+1:]...)
h.Values = append(h.Values[:i], h.Values[i+1:]...)
break
}
}
self.AsHash().Delete(args[0])
return self
}

View File

@@ -98,6 +98,69 @@ func AClone(t *hbrt.Thread) {
t.RetValue()
}
// HbDeepClone recursively clones a value. Arrays and hashes are cloned
// element-by-element; scalars (string, number, logical, date, NIL) are
// returned unchanged — Five strings/numbers are immutable so sharing
// pointers is safe. Used by FiveSql2's plan cache to hand callers a
// pristine copy of the parsed query tree on every cache hit, since
// Run() mutates some nodes (SqlFoldConst in particular).
//
// Harbour: hb_DeepCopy(xVal) → xNewVal
func HbDeepClone(t *hbrt.Thread) {
t.Frame(1, 0)
defer t.EndProc()
t.PushValue(deepCloneValue(t.Local(1)))
t.RetValue()
}
// deepCloneValue walks Array and Hash structures recursively; other
// Value kinds are returned as-is (scalars are immutable in Five so
// sharing is safe).
//
// Hot-path optimizations:
// - Array items that are themselves scalars skip the function call
// (just slot-copied). Recursion only fires for nested Array/Hash.
// - Hash keys are shared (never cloned). PRG hashes carry string /
// numeric keys in every observed call site; mutating a key after
// insertion is forbidden by the Hash API, so sharing is safe and
// saves the recursion plus per-key allocation.
func deepCloneValue(v hbrt.Value) hbrt.Value {
if v.IsArray() {
src := v.AsArray()
if src == nil {
return v
}
n := len(src.Items)
items := make([]hbrt.Value, n)
for i := 0; i < n; i++ {
item := src.Items[i]
if item.IsArray() || item.IsHash() {
items[i] = deepCloneValue(item)
} else {
items[i] = item
}
}
return hbrt.MakeArrayFrom(items)
}
if v.IsHash() {
src := v.AsHash()
if src == nil {
return v
}
nh := hbrt.MakeHash()
dst := nh.AsHash()
for i, k := range src.Keys {
val := src.Values[i]
if val.IsArray() || val.IsHash() {
val = deepCloneValue(val)
}
dst.Append(k, val)
}
return nh
}
return v
}
// ACopy copies elements from one array to another.
// Harbour: ACopy(aSource, aDest [, nStart [, nCount [, nTargetPos]]]) → aDest
func ACopy(t *hbrt.Thread) {
@@ -133,6 +196,12 @@ func AFill(t *hbrt.Thread) {
// ASort sorts an array using an optional comparison block.
// Harbour: ASort(aArray [, nStart [, nCount [, bBlock]]]) → aArray
//
// Block path: invokes bBlock per compare (side-effect safe).
// Default path (no block): one pre-scan picks a specialized comparator
// for homogeneous arrays (string / numeric / date / timestamp /
// logical); mixed or unknown element types fall back to a generic
// less-than that matches Harbour's default `<` semantics across types.
func ASort(t *hbrt.Thread) {
nParams := t.ParamCount()
t.Frame(nParams, 0)
@@ -140,9 +209,13 @@ func ASort(t *hbrt.Thread) {
arrVal := t.Local(1)
arr := arrVal.AsArray()
if arr == nil || len(arr.Items) < 2 {
t.PushValue(arrVal)
t.RetValue()
return
}
if nParams >= 4 && t.Local(4).IsBlock() {
// Sort with code block comparator
blk := t.Local(4).AsBlock()
sort.SliceStable(arr.Items, func(i, j int) bool {
t.PushValue(arr.Items[i])
@@ -151,17 +224,47 @@ func ASort(t *hbrt.Thread) {
blk.Fn(t)
return t.GetRetValue().AsBool()
})
} else {
// Default sort: by value comparison
sort.SliceStable(arr.Items, func(i, j int) bool {
a, b := arr.Items[i], arr.Items[j]
if a.IsString() && b.IsString() {
return a.AsString() < b.AsString()
t.PushValue(arrVal)
t.RetValue()
return
}
// Default sort — pick a type-specialized comparator when every
// element shares a shape. Falls back to a generic less-than for
// mixed or uncategorized types.
items := arr.Items
switch detectArrayKind(items) {
case arrKindString:
sort.SliceStable(items, func(i, j int) bool {
return items[i].AsString() < items[j].AsString()
})
case arrKindInt:
sort.SliceStable(items, func(i, j int) bool {
return items[i].AsNumInt() < items[j].AsNumInt()
})
case arrKindNumeric:
sort.SliceStable(items, func(i, j int) bool {
return items[i].AsNumDouble() < items[j].AsNumDouble()
})
case arrKindDate:
sort.SliceStable(items, func(i, j int) bool {
return items[i].AsJulian() < items[j].AsJulian()
})
case arrKindTimestamp:
sort.SliceStable(items, func(i, j int) bool {
ja, jb := items[i].AsJulian(), items[j].AsJulian()
if ja != jb {
return ja < jb
}
if a.IsNumeric() && b.IsNumeric() {
return a.AsNumDouble() < b.AsNumDouble()
}
return false
return items[i].AsTimeMs() < items[j].AsTimeMs()
})
case arrKindLogical:
sort.SliceStable(items, func(i, j int) bool {
return !items[i].AsBool() && items[j].AsBool()
})
default:
sort.SliceStable(items, func(i, j int) bool {
return valueLess(items[i], items[j])
})
}
@@ -169,6 +272,98 @@ func ASort(t *hbrt.Thread) {
t.RetValue()
}
type arrKind int
const (
arrKindMixed arrKind = iota
arrKindString
arrKindInt
arrKindNumeric
arrKindDate
arrKindTimestamp
arrKindLogical
)
// detectArrayKind returns a specialized kind when every element matches
// one well-known type; otherwise arrKindMixed. Integer-only arrays
// prefer arrKindInt to skip the int→double conversion in the hot path.
// A single non-int numeric promotes the whole array to arrKindNumeric.
func detectArrayKind(items []hbrt.Value) arrKind {
if len(items) == 0 {
return arrKindMixed
}
allInt := true
for _, v := range items {
if !v.IsNumInt() {
allInt = false
break
}
}
if allInt {
return arrKindInt
}
allNum := true
for _, v := range items {
if !v.IsNumeric() {
allNum = false
break
}
}
if allNum {
return arrKindNumeric
}
check := func(pred func(hbrt.Value) bool) bool {
for _, v := range items {
if !pred(v) {
return false
}
}
return true
}
if check(func(v hbrt.Value) bool { return v.IsString() }) {
return arrKindString
}
if check(func(v hbrt.Value) bool { return v.IsDate() }) {
return arrKindDate
}
if check(func(v hbrt.Value) bool { return v.IsTimestamp() }) {
return arrKindTimestamp
}
if check(func(v hbrt.Value) bool { return v.IsLogical() }) {
return arrKindLogical
}
return arrKindMixed
}
// valueLess implements Harbour's default `<` across types. NILs sort
// first (smallest) so they group together — matches the historical
// Five compareValues behavior that ASort inherited.
func valueLess(a, b hbrt.Value) bool {
if a.IsNil() || b.IsNil() {
return a.IsNil() && !b.IsNil()
}
if a.IsNumeric() && b.IsNumeric() {
return a.AsNumDouble() < b.AsNumDouble()
}
if a.IsString() && b.IsString() {
return a.AsString() < b.AsString()
}
if a.IsDate() && b.IsDate() {
return a.AsJulian() < b.AsJulian()
}
if a.IsTimestamp() && b.IsTimestamp() {
ja, jb := a.AsJulian(), b.AsJulian()
if ja != jb {
return ja < jb
}
return a.AsTimeMs() < b.AsTimeMs()
}
if a.IsLogical() && b.IsLogical() {
return !a.AsBool() && b.AsBool()
}
return false
}
// AEval evaluates a block for each element in array.
// Harbour: AEval(aArray, bBlock [, nStart [, nCount]]) → aArray
func AEval(t *hbrt.Thread) {
@@ -201,6 +396,12 @@ func AEval(t *hbrt.Thread) {
// AScan searches for a value in array, returns position (0 if not found).
// Harbour: AScan(aArray, xValue|bBlock [, nStart [, nCount]]) → nPos
//
// Block path: per-element block invoke (side-effect safe).
// Value path: specialized fast-paths for string / int / double search
// values — the loop stays inside Go without running through the
// generic valuesEqual type-dispatch each iteration. Mixed or rare
// types (date, timestamp, logical, nil) fall back to valuesEqual.
func AScan(t *hbrt.Thread) {
nParams := t.ParamCount()
t.Frame(nParams, 0)
@@ -208,11 +409,16 @@ func AScan(t *hbrt.Thread) {
arrVal := t.Local(1)
arr := arrVal.AsArray()
if arr == nil {
t.RetInt(0)
return
}
items := arr.Items
search := t.Local(2)
if search.IsBlock() {
blk := search.AsBlock()
for i, item := range arr.Items {
for i, item := range items {
t.PushValue(item)
t.PendingParams2(1)
blk.Fn(t)
@@ -221,8 +427,45 @@ func AScan(t *hbrt.Thread) {
return
}
}
} else {
for i, item := range arr.Items {
t.RetInt(0)
return
}
switch {
case search.IsString():
s := search.AsString()
for i, item := range items {
if item.IsString() && item.AsString() == s {
t.RetInt(int64(i + 1))
return
}
}
case search.IsNumInt():
n := search.AsNumInt()
for i, item := range items {
if !item.IsNumeric() {
continue
}
if item.IsNumInt() {
if item.AsNumInt() == n {
t.RetInt(int64(i + 1))
return
}
} else if item.AsNumDouble() == float64(n) {
t.RetInt(int64(i + 1))
return
}
}
case search.IsNumeric():
f := search.AsNumDouble()
for i, item := range items {
if item.IsNumeric() && item.AsNumDouble() == f {
t.RetInt(int64(i + 1))
return
}
}
default:
for i, item := range items {
if valuesEqual(item, search) {
t.RetInt(int64(i + 1))
return

View File

@@ -17,8 +17,7 @@ func HbHash(t *hbrt.Thread) {
h := hbrt.MakeHash()
hh := h.AsHash()
for i := 1; i <= nParams-1; i += 2 {
hh.Keys = append(hh.Keys, t.Local(i))
hh.Values = append(hh.Values, t.Local(i+1))
hh.Set(t.Local(i), t.Local(i+1))
}
t.PushValue(h)
t.RetValue()
@@ -29,16 +28,12 @@ func HbHash(t *hbrt.Thread) {
func HbHGet(t *hbrt.Thread) {
t.Frame(2, 0)
defer t.EndProc()
hVal := t.Local(1)
key := t.Local(2)
hh := hVal.AsHash()
hh := t.Local(1).AsHash()
if hh != nil {
for i, k := range hh.Keys {
if valuesEqual(k, key) {
t.PushValue(hh.Values[i])
t.RetValue()
return
}
if i := hh.Lookup(t.Local(2)); i >= 0 {
t.PushValue(hh.Values[i])
t.RetValue()
return
}
}
t.PushNil()
@@ -51,20 +46,8 @@ func HbHSet(t *hbrt.Thread) {
t.Frame(3, 0)
defer t.EndProc()
hVal := t.Local(1)
key := t.Local(2)
val := t.Local(3)
hh := hVal.AsHash()
if hh != nil {
for i, k := range hh.Keys {
if valuesEqual(k, key) {
hh.Values[i] = val
t.PushValue(hVal)
t.RetValue()
return
}
}
hh.Keys = append(hh.Keys, key)
hh.Values = append(hh.Values, val)
if hh := hVal.AsHash(); hh != nil {
hh.Set(t.Local(2), t.Local(3))
}
t.PushValue(hVal)
t.RetValue()
@@ -76,16 +59,8 @@ func HbHDel(t *hbrt.Thread) {
t.Frame(2, 0)
defer t.EndProc()
hVal := t.Local(1)
key := t.Local(2)
hh := hVal.AsHash()
if hh != nil {
for i, k := range hh.Keys {
if valuesEqual(k, key) {
hh.Keys = append(hh.Keys[:i], hh.Keys[i+1:]...)
hh.Values = append(hh.Values[:i], hh.Values[i+1:]...)
break
}
}
if hh := hVal.AsHash(); hh != nil {
hh.Delete(t.Local(2))
}
t.PushValue(hVal)
t.RetValue()
@@ -96,19 +71,8 @@ func HbHDel(t *hbrt.Thread) {
func HbHHasKey(t *hbrt.Thread) {
t.Frame(2, 0)
defer t.EndProc()
hVal := t.Local(1)
key := t.Local(2)
hh := hVal.AsHash()
if hh != nil {
for _, k := range hh.Keys {
if valuesEqual(k, key) {
t.PushBool(true)
t.RetValue()
return
}
}
}
t.PushBool(false)
hh := t.Local(1).AsHash()
t.PushBool(hh != nil && hh.Has(t.Local(2)))
t.RetValue()
}

View File

@@ -147,15 +147,9 @@ func navigatePath(v hbrt.Value, path string) hbrt.Value {
}
if v.IsHash() {
h := v.AsHash()
found := false
for i, k := range h.Keys {
if k.AsString() == part {
v = h.Values[i]
found = true
break
}
}
if !found {
if i := h.Lookup(hbrt.MakeString(part)); i >= 0 {
v = h.Values[i]
} else {
return hbrt.MakeNil()
}
} else {
@@ -212,18 +206,7 @@ func JsonMerge(t *hbrt.Thread) {
copy(result.Keys, dh.Keys)
copy(result.Values, dh.Values)
for i, sk := range sh.Keys {
found := false
for j, rk := range result.Keys {
if rk.AsString() == sk.AsString() {
result.Values[j] = sh.Values[i]
found = true
break
}
}
if !found {
result.Keys = append(result.Keys, sk)
result.Values = append(result.Values, sh.Values[i])
}
result.Set(sk, sh.Values[i])
}
t.RetVal(hbrt.MakeHashFrom(result))
}

View File

@@ -14,8 +14,24 @@ import (
"five/compiler/pp"
"five/hbrt"
"os"
"sync"
)
// pcCompileCache stores compiled PcodeFunc keyed by the original PRG
// expression string. Compilation does parser + preprocessor + pcode
// generation per call (~50-200µs for small expressions); for repeated
// queries (same SQL template) every call after the first is a
// sync.Map hit and returns the cached pointer directly.
//
// Thread safety: PcodeFunc is immutable after compilation (no
// per-call mutable state — execution state lives on hbrt.Thread),
// so sharing the pointer across goroutines is safe.
//
// Unbounded: distinct SQL / expression text count is bounded by the
// caller's query set; for FiveSql2 workloads this is a small constant.
// Switch to LRU if a pathological caller emerges.
var pcCompileCache sync.Map // map[string]*hbrt.PcodeFunc
// PcCompile(cPrgExpr) → pFunc
//
// Compile a PRG expression to pcode. Returns an opaque pointer that can
@@ -44,6 +60,14 @@ func PcCompile(t *hbrt.Thread) {
return
}
// Cache hit — skip parser/genpc entirely.
if cached, ok := pcCompileCache.Load(source); ok {
if fn, ok := cached.(*hbrt.PcodeFunc); ok && fn != nil {
t.RetPointer(fn)
return
}
}
// Wrap expression in a function stub so the parser can handle it.
wrapped := "FUNCTION _EXPR()\nRETURN " + source + "\n"
@@ -89,6 +113,11 @@ func PcCompile(t *hbrt.Thread) {
return
}
// Populate the cache. sync.Map.Store handles concurrent writers —
// duplicate compilations of the same source waste a few µs but
// don't corrupt the map; whichever compilation finishes second
// overwrites with an identical value.
pcCompileCache.Store(source, fn)
t.RetPointer(fn)
}

View File

@@ -55,6 +55,8 @@ func RegisterRTL(vm *hbrt.VM) {
hbrt.Sym("AINS", hbrt.FsPublic, AIns),
hbrt.Sym("ASIZE", hbrt.FsPublic, ASize),
hbrt.Sym("ACLONE", hbrt.FsPublic, AClone),
hbrt.Sym("HBDEEPCLONE", hbrt.FsPublic, HbDeepClone),
hbrt.Sym("HB_DEEPCOPY", hbrt.FsPublic, HbDeepClone),
hbrt.Sym("ACOPY", hbrt.FsPublic, ACopy),
hbrt.Sym("AFILL", hbrt.FsPublic, AFill),
hbrt.Sym("ASORT", hbrt.FsPublic, ASort),
@@ -623,6 +625,32 @@ func RegisterRTL(vm *hbrt.VM) {
hbrt.Sym("SQLHASHJOIN", hbrt.FsPublic, SqlHashJoin),
hbrt.Sym("SQLORDERBY", hbrt.FsPublic, SqlOrderBy),
hbrt.Sym("SQLGROUPBY", hbrt.FsPublic, SqlGroupBy),
hbrt.Sym("SQLDISTINCT", hbrt.FsPublic, SqlDistinct),
hbrt.Sym("SQLEXPRHASAGG", hbrt.FsPublic, SqlExprHasAgg),
hbrt.Sym("SQLBULKINSERT", hbrt.FsPublic, SqlBulkInsert),
hbrt.Sym("SQLBULKUPDATE", hbrt.FsPublic, SqlBulkUpdate),
hbrt.Sym("SQLWINDOWPARTITIONS", hbrt.FsPublic, SqlWindowPartitions),
hbrt.Sym("SQLGROUPROWS", hbrt.FsPublic, SqlGroupRows),
hbrt.Sym("SQLCOMPUTEAGGSIMPLE", hbrt.FsPublic, SqlComputeAggSimple),
hbrt.Sym("SQLEVALHAVING", hbrt.FsPublic, SqlEvalHaving),
hbrt.Sym("SQLCOERCESTR", hbrt.FsPublic, SqlCoerceStr),
hbrt.Sym("SQLCOERCENUM", hbrt.FsPublic, SqlCoerceNum),
hbrt.Sym("SQLCOERCEFORCMP", hbrt.FsPublic, SqlCoerceForCmp),
hbrt.Sym("SQLISTRUE", hbrt.FsPublic, SqlIsTrue),
hbrt.Sym("SQLCMPEQ", hbrt.FsPublic, SqlCmpEq),
hbrt.Sym("SQLCMPLT", hbrt.FsPublic, SqlCmpLt),
hbrt.Sym("SQLEXTRACTTEMPLATE", hbrt.FsPublic, SqlExtractTemplate),
hbrt.Sym("SQLLEXERTOKENIZE", hbrt.FsPublic, SqlLexerTokenize),
hbrt.Sym("SQLLEXANDEXTRACTTEMPLATE", hbrt.FsPublic, SqlLexAndExtractTemplate),
hbrt.Sym("SQLWACACHEENABLE", hbrt.FsPublic, SqlWACacheEnable),
hbrt.Sym("SQLWACACHEDISABLE", hbrt.FsPublic, SqlWACacheDisable),
hbrt.Sym("SQLWACACHEISENABLED", hbrt.FsPublic, SqlWACacheIsEnabled),
hbrt.Sym("SQLWACACHEGET", hbrt.FsPublic, SqlWACacheGet),
hbrt.Sym("SQLWACACHEPUT", hbrt.FsPublic, SqlWACachePut),
hbrt.Sym("SQLWACACHEINVALIDATE", hbrt.FsPublic, SqlWACacheInvalidate),
hbrt.Sym("SQLWACACHECLOSEALL", hbrt.FsPublic, SqlWACacheCloseAll),
hbrt.Sym("SQLWINDOWSORTPARTITION", hbrt.FsPublic, SqlWindowSortPartition),
hbrt.Sym("SQLWINDOWASSIGNRANK", hbrt.FsPublic, SqlWindowAssignRank),
// Goroutine / Concurrency
hbrt.Sym("GO", hbrt.FsPublic, GoFunc),

137
hbrtl/sqlexpr.go Normal file
View File

@@ -0,0 +1,137 @@
// Copyright (c) 2026 Charles KWON OhJun (charleskwonohjun@gmail.com)
// All rights reserved.
// Go-native FiveSql2 expression helpers.
// Port of the tight, interpreter-heavy recursive walkers from
// _FiveSql2/src/TSqlExpr.prg into straight Go — the PRG versions
// are bottleneck-prone because every recursion pays the full VM
// frame setup cost, and SqlExprHasAgg is invoked per result
// column per query.
package hbrtl
import (
"five/hbrt"
)
// FiveSql2 AST node kinds — must mirror _FiveSql2/src/FiveSqlDef.ch.
// Nodes are stored as Five arrays { nKind, xVal, xLeft, xRight, xExtra }
// (1-based in PRG, 0-based here).
const (
ndLit = 1
ndCol = 2
ndFn = 3
ndBin = 4
ndUni = 5
ndCase = 6
ndSub = 7
ndPar = 9
ndNil = 10
ndWindow = 12
)
// aggFuncSet mirrors the AGG_FUNCTIONS macro in FiveSqlDef.ch. Names
// are stored in canonical upper case; the PRG parser upper-cases
// function identifiers at parse time so no ToUpper is needed on the
// hot path. If that invariant ever changes, upper-case here.
var aggFuncSet = map[string]struct{}{
"COUNT": {},
"SUM": {},
"AVG": {},
"MIN": {},
"MAX": {},
"GROUP_CONCAT": {},
"STRING_AGG": {},
"LISTAGG": {},
"JSON_ARRAYAGG": {},
"JSON_OBJECTAGG": {},
"XMLAGG": {},
"ANY_VALUE": {},
"BOOL_AND": {},
"BOOL_OR": {},
}
// sqlExprHasAggWalk is the actual recursion shared by the RTL entry
// point. Returns true if the tree rooted at v contains a direct
// aggregate call. Matches TSqlExpr.prg:SqlExprHasAgg — walks into
// ND_BIN children, ND_UNI child, ND_FN args, ND_CASE WHEN/THEN pairs
// and ELSE; does not descend into ND_WINDOW or ND_SUB (those carry
// their own aggregation scope).
func sqlExprHasAggWalk(v hbrt.Value) bool {
if v.IsNil() {
return false
}
arr := v.AsArray()
if arr == nil || len(arr.Items) < 2 {
return false
}
kind := int(arr.Items[0].AsNumInt())
switch kind {
case ndFn:
name := arr.Items[1].AsString()
if _, ok := aggFuncSet[name]; ok {
return true
}
// Scalar function — descend into args for nested aggregates.
if len(arr.Items) >= 3 && arr.Items[2].IsArray() {
for _, a := range arr.Items[2].AsArray().Items {
if sqlExprHasAggWalk(a) {
return true
}
}
}
return false
case ndBin:
if len(arr.Items) < 4 {
return false
}
return sqlExprHasAggWalk(arr.Items[2]) || sqlExprHasAggWalk(arr.Items[3])
case ndUni:
if len(arr.Items) < 3 {
return false
}
return sqlExprHasAggWalk(arr.Items[2])
case ndCase:
// arr.Items[1] is the WHEN/THEN pair array,
// arr.Items[2] is the ELSE branch (may be NIL).
if arr.Items[1].IsArray() {
for _, pair := range arr.Items[1].AsArray().Items {
pa := pair.AsArray()
if pa == nil || len(pa.Items) < 2 {
continue
}
if sqlExprHasAggWalk(pa.Items[0]) || sqlExprHasAggWalk(pa.Items[1]) {
return true
}
}
}
if len(arr.Items) >= 3 && !arr.Items[2].IsNil() {
return sqlExprHasAggWalk(arr.Items[2])
}
return false
}
return false
}
// SqlExprHasAgg(xExpr) → lHasAgg
//
// Returns .T. if the AST tree contains an aggregate function call.
// Drop-in replacement for the PRG SqlExprHasAgg function — same
// output for every input, just without the interpreter per-frame
// cost on deep expression trees.
func SqlExprHasAgg(t *hbrt.Thread) {
t.Frame(1, 0)
defer t.EndProc()
t.RetBool(sqlExprHasAggWalk(t.Local(1)))
}
// Silence "declared and not used" for constants that exist solely to
// document FiveSqlDef.ch layout — keeping them in source form helps
// future walker additions (ND_SUB for subquery flattening, ND_WINDOW
// for window-over-aggregate detection).
var _ = [...]int{ndLit, ndCol, ndSub, ndPar, ndNil, ndWindow}

587
hbrtl/sqlhelpers.go Normal file
View File

@@ -0,0 +1,587 @@
// Copyright (c) 2026 Charles KWON OhJun (charleskwonohjun@gmail.com)
// All rights reserved.
// FiveSql2 scalar helpers — Go replacements for the PRG functions in
// _FiveSql2/src/TSqlFunc.prg. These are invoked per-operator during
// expression evaluation (WHERE / HAVING / CASE); porting removes PRG
// VM frame overhead on the hot interpreter path. Semantics match the
// PRG source byte-for-byte.
package hbrtl
import (
"math"
"strconv"
"strings"
"five/hbrt"
)
// FiveSql2 lexer token type codes — must match FiveSqlDef.ch.
const (
tkEnd = 0
tkName = 1
tkText = 2
tkNum = 3
tkComma = 4
tkDot = 5
tkStar = 6
tkLPar = 7
tkRPar = 8
tkEq = 9
tkNEq = 10
tkLT = 11
tkGT = 12
tkLTE = 13
tkGTE = 14
tkQMark = 15
tkPlus = 16
tkMinus = 17
tkSlash = 18
tkPipes = 19
)
// makeTokValue wraps a (type, text) pair into the 2-element PRG array
// that TSqlParser2 consumes: { nTokenType, cTokenValue }.
func makeTokValue(ttype int, text string) hbrt.Value {
return hbrt.MakeArrayFrom([]hbrt.Value{
hbrt.MakeNumInt(int64(ttype)),
hbrt.MakeString(text),
})
}
// lexSQL is the Go port of TSqlLexer:Tokenize — byte-level FSM over the
// ASCII input string. Produces the same aTokens shape the PRG lexer did.
func lexSQL(s string) []hbrt.Value {
toks := make([]hbrt.Value, 0, 32)
n := len(s)
i := 0
for i < n {
c := s[i]
// Whitespace
if c == ' ' || c == '\t' || c == '\n' || c == '\r' {
i++
continue
}
// Line comment `-- ...`
if c == '-' && i+1 < n && s[i+1] == '-' {
i += 2
for i < n && s[i] != '\n' {
i++
}
continue
}
// Block comment `/* ... */`
if c == '/' && i+1 < n && s[i+1] == '*' {
i += 2
for i < n-1 {
if s[i] == '*' && s[i+1] == '/' {
i += 2
break
}
i++
}
continue
}
// String literal (single-quoted, '' escapes a quote)
if c == '\'' {
i++
start := i
var sb strings.Builder
inEscape := false
for i < n {
cc := s[i]
if cc == '\'' {
if i+1 < n && s[i+1] == '\'' {
if !inEscape {
sb.WriteString(s[start:i])
inEscape = true
} else {
sb.WriteByte('\'')
sb.WriteString(s[start:i])
}
sb.WriteByte('\'')
i += 2
start = i
} else {
break
}
} else {
i++
}
}
var val string
if inEscape {
sb.WriteString(s[start:i])
val = sb.String()
} else {
val = s[start:i]
}
if i < n {
i++ // skip closing quote
}
toks = append(toks, makeTokValue(tkText, val))
continue
}
// Numeric literal
if c >= '0' && c <= '9' {
start := i
for i < n && ((s[i] >= '0' && s[i] <= '9') || s[i] == '.') {
i++
}
toks = append(toks, makeTokValue(tkNum, s[start:i]))
continue
}
// Identifier / keyword
if isAlphaSQL(c) || c == '_' {
start := i
for i < n && (isAlphaSQL(s[i]) || (s[i] >= '0' && s[i] <= '9') || s[i] == '_') {
i++
}
toks = append(toks, makeTokValue(tkName, strings.ToUpper(s[start:i])))
continue
}
// Bracketed identifier `[col name]`
if c == '[' {
i++
start := i
for i < n && s[i] != ']' {
i++
}
name := strings.ToUpper(s[start:i])
if i < n {
i++ // skip ']'
}
toks = append(toks, makeTokValue(tkName, name))
continue
}
// Parameter placeholder
if c == '?' {
toks = append(toks, makeTokValue(tkQMark, "?"))
i++
continue
}
// Multi-char + single-char operators / punctuation
switch c {
case ',':
toks = append(toks, makeTokValue(tkComma, ","))
i++
case '.':
toks = append(toks, makeTokValue(tkDot, "."))
i++
case '*':
toks = append(toks, makeTokValue(tkStar, "*"))
i++
case '(':
toks = append(toks, makeTokValue(tkLPar, "("))
i++
case ')':
toks = append(toks, makeTokValue(tkRPar, ")"))
i++
case '+':
toks = append(toks, makeTokValue(tkPlus, "+"))
i++
case '-':
toks = append(toks, makeTokValue(tkMinus, "-"))
i++
case '/':
toks = append(toks, makeTokValue(tkSlash, "/"))
i++
case '|':
if i+1 < n && s[i+1] == '|' {
toks = append(toks, makeTokValue(tkPipes, "||"))
i += 2
} else {
i++
}
case '=':
toks = append(toks, makeTokValue(tkEq, "="))
i++
case '<':
if i+1 < n && s[i+1] == '=' {
toks = append(toks, makeTokValue(tkLTE, "<="))
i += 2
} else if i+1 < n && s[i+1] == '>' {
toks = append(toks, makeTokValue(tkNEq, "<>"))
i += 2
} else {
toks = append(toks, makeTokValue(tkLT, "<"))
i++
}
case '>':
if i+1 < n && s[i+1] == '=' {
toks = append(toks, makeTokValue(tkGTE, ">="))
i += 2
} else {
toks = append(toks, makeTokValue(tkGT, ">"))
i++
}
case '!':
if i+1 < n && s[i+1] == '=' {
toks = append(toks, makeTokValue(tkNEq, "!="))
i += 2
} else {
i++
}
case ';':
i++
default:
i++
}
}
toks = append(toks, makeTokValue(tkEnd, ""))
return toks
}
func isAlphaSQL(c byte) bool {
return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')
}
// SqlLexerTokenize(cSQL) → aTokens
// Direct Go port of TSqlLexer:Tokenize. Returns the same
// { { nType, cText }, ... } structure the PRG version produced.
func SqlLexerTokenize(t *hbrt.Thread) {
t.Frame(1, 0)
defer t.EndProc()
toks := lexSQL(t.Local(1).AsString())
t.PushValue(hbrt.MakeArrayFrom(toks))
t.RetValue()
}
// SqlLexAndExtractTemplate(cSQL) → { aTokens, cKey, aParams }
//
// Combined lex + template extraction — one Go call replaces three
// PRG-to-Go boundary crossings (lex, get tokens, extract). aTokens
// already has literal tokens replaced with TK_QMARK; aParams holds
// the extracted literal values in positional order; cKey is the
// plan cache key (digest of the normalized token-type sequence).
func SqlLexAndExtractTemplate(t *hbrt.Thread) {
t.Frame(1, 0)
defer t.EndProc()
src := t.Local(1).AsString()
toks := lexSQL(src)
params := make([]hbrt.Value, 0, 8)
var keyBuf strings.Builder
keyBuf.Grow(len(src))
for _, tv := range toks {
tok := tv.AsArray()
if tok == nil || len(tok.Items) < 2 {
continue
}
ttype := int(tok.Items[0].AsNumInt())
switch ttype {
case tkText:
params = append(params, tok.Items[1])
tok.Items[0] = hbrt.MakeNumInt(tkQMark)
tok.Items[1] = hbrt.MakeString("?")
keyBuf.WriteByte('?')
case tkNum:
s := tok.Items[1].AsString()
var val hbrt.Value
if i, err := strconv.ParseInt(s, 10, 64); err == nil {
val = hbrt.MakeNumInt(i)
} else if f, err := strconv.ParseFloat(s, 64); err == nil {
val = hbrt.MakeDoubleAuto(f)
} else {
val = hbrt.MakeString(s)
}
params = append(params, val)
tok.Items[0] = hbrt.MakeNumInt(tkQMark)
tok.Items[1] = hbrt.MakeString("?")
keyBuf.WriteByte('#')
default:
keyBuf.WriteByte(byte(ttype) + 0x20)
if ttype == tkName {
keyBuf.WriteString(tok.Items[1].AsString())
keyBuf.WriteByte(' ')
}
}
}
result := hbrt.MakeArrayFrom([]hbrt.Value{
hbrt.MakeArrayFrom(toks),
hbrt.MakeString(keyBuf.String()),
hbrt.MakeArrayFrom(params),
})
t.PushValue(result)
t.RetValue()
}
// SqlExtractTemplate(aTokens) → { cKey, aParams }
//
// Walks a FiveSql2 lexer token stream, replacing string (TK_TEXT=2)
// and numeric (TK_NUM=3) literals with the parameter placeholder
// token (TK_QMARK=15). Collected literal values are returned as
// aParams in their natural left-to-right order.
//
// Each token is a 2-element array {nTokenType, cTokenValue}. The
// mutation is in place so the caller can pass the resulting aTokens
// straight into TSqlParser2 — the parser then emits ND_PAR nodes
// that resolve against aParams at execution time.
//
// The template key is a compact digest of the non-literal token
// type sequence, used as the plan cache key for queries that share
// the same shape but differ only in literal values. Queries like:
//
// INSERT INTO t VALUES (1,'a')
// INSERT INTO t VALUES (2,'b')
//
// produce the SAME key once literals are collapsed to '?', letting
// the plan cache hit from the 2nd call onward.
//
// Returns a 2-element array: { cKey, aParams }.
func SqlExtractTemplate(t *hbrt.Thread) {
t.Frame(1, 0)
defer t.EndProc()
tokensVal := t.Local(1)
if !tokensVal.IsArray() {
empty := hbrt.MakeArrayFrom([]hbrt.Value{
hbrt.MakeString(""),
hbrt.MakeArrayFrom(nil),
})
t.PushValue(empty)
t.RetValue()
return
}
toks := tokensVal.AsArray().Items
params := make([]hbrt.Value, 0, 8)
// Template key — cheap digest of the token-type sequence.
var keyBuf strings.Builder
keyBuf.Grow(len(toks) * 2)
const (
tkText = 2
tkNum = 3
tkQmark = 15
)
for _, tokVal := range toks {
tok := tokVal.AsArray()
if tok == nil || len(tok.Items) < 2 {
continue
}
ttype := int(tok.Items[0].AsNumInt())
switch ttype {
case tkText:
// String literal → TK_QMARK + save raw string value.
params = append(params, tok.Items[1])
tok.Items[0] = hbrt.MakeInt(tkQmark)
tok.Items[1] = hbrt.MakeString("?")
keyBuf.WriteByte('?')
case tkNum:
// Numeric literal → TK_QMARK + parse value. Integer form
// when possible (common for id columns), double otherwise.
s := tok.Items[1].AsString()
var val hbrt.Value
if i, err := strconv.ParseInt(s, 10, 64); err == nil {
val = hbrt.MakeNumInt(i)
} else if f, err := strconv.ParseFloat(s, 64); err == nil {
val = hbrt.MakeDoubleAuto(f)
} else {
val = hbrt.MakeString(s)
}
params = append(params, val)
tok.Items[0] = hbrt.MakeInt(tkQmark)
tok.Items[1] = hbrt.MakeString("?")
keyBuf.WriteByte('#')
default:
// Non-literal token — include type code + text so two
// different-but-same-shape queries distinguish properly
// (e.g., SELECT id vs SELECT name).
keyBuf.WriteByte(byte(ttype) + 0x20) // offset to printable
if ttype == 1 { // TK_NAME — include name text
keyBuf.WriteString(strings.ToUpper(tok.Items[1].AsString()))
keyBuf.WriteByte(' ')
}
}
}
result := hbrt.MakeArrayFrom([]hbrt.Value{
hbrt.MakeString(keyBuf.String()),
hbrt.MakeArrayFrom(params),
})
t.PushValue(result)
t.RetValue()
}
// SqlCoerceStr(x) → cString
// Converts any scalar to its canonical string form (NULL-safe).
func SqlCoerceStr(t *hbrt.Thread) {
t.Frame(1, 0)
defer t.EndProc()
v := t.Local(1)
t.RetString(sqlCoerceStr(v))
}
func sqlCoerceStr(v hbrt.Value) string {
switch {
case v.IsNil():
return ""
case v.IsString():
return v.AsString()
case v.IsNumeric():
if v.IsNumInt() {
return strconv.FormatInt(v.AsNumInt(), 10)
}
return strconv.FormatFloat(v.AsNumDouble(), 'g', -1, 64)
case v.IsLogical():
if v.AsBool() {
return "T"
}
return "F"
}
return ""
}
// SqlCoerceNum(x) → nNumber
// Converts any scalar to numeric (NULL → 0, bool → 1/0, string → Val).
func SqlCoerceNum(t *hbrt.Thread) {
t.Frame(1, 0)
defer t.EndProc()
v := t.Local(1)
switch {
case v.IsNil():
t.RetInt(0)
case v.IsNumeric():
t.RetVal(v)
case v.IsString():
t.RetVal(hbrt.MakeDoubleAuto(parseLeadingNumeric(v.AsString())))
case v.IsLogical():
if v.AsBool() {
t.RetInt(1)
} else {
t.RetInt(0)
}
default:
t.RetInt(0)
}
}
// SqlCoerceForCmp(x) → xNormalized
// Trim + upper-case strings; pass-through for other types. Used to
// make SQL equality/ordering case-insensitive on CHAR values.
func SqlCoerceForCmp(t *hbrt.Thread) {
t.Frame(1, 0)
defer t.EndProc()
v := t.Local(1)
if v.IsString() {
t.RetString(strings.ToUpper(strings.TrimSpace(v.AsString())))
return
}
t.RetVal(v)
}
// SqlIsTrue(x) → lBool
// SQL truthiness: NIL → false, empty string → false, 0 → false.
func SqlIsTrue(t *hbrt.Thread) {
t.Frame(1, 0)
defer t.EndProc()
t.RetBool(sqlIsTrue(t.Local(1)))
}
func sqlIsTrue(v hbrt.Value) bool {
switch {
case v.IsNil():
return false
case v.IsLogical():
return v.AsBool()
case v.IsNumeric():
if v.IsNumInt() {
return v.AsNumInt() != 0
}
return v.AsNumDouble() != 0 && !math.IsNaN(v.AsNumDouble())
case v.IsString():
return strings.TrimSpace(v.AsString()) != ""
}
return false
}
// SqlCmpEq(a, b) → lBool
// Case-insensitive equality with cross-type N↔C coercion.
func SqlCmpEq(t *hbrt.Thread) {
t.Frame(2, 0)
defer t.EndProc()
t.RetBool(sqlCmpEq(t.Local(1), t.Local(2)))
}
func sqlCmpEq(a, b hbrt.Value) bool {
aNil, bNil := a.IsNil(), b.IsNil()
if aNil || bNil {
return aNil && bNil
}
// Numeric: compare regardless of Int/Double distinction.
if a.IsNumeric() && b.IsNumeric() {
return a.AsNumDouble() == b.AsNumDouble()
}
if a.IsString() && b.IsString() {
return strings.EqualFold(
strings.TrimSpace(a.AsString()),
strings.TrimSpace(b.AsString()),
)
}
if a.IsLogical() && b.IsLogical() {
return a.AsBool() == b.AsBool()
}
if a.IsDate() && b.IsDate() {
return a.AsJulian() == b.AsJulian()
}
// Cross-type N / C coercion.
if a.IsNumeric() && b.IsString() {
return a.AsNumDouble() == parseLeadingNumeric(b.AsString())
}
if a.IsString() && b.IsNumeric() {
return parseLeadingNumeric(a.AsString()) == b.AsNumDouble()
}
return false
}
// SqlCmpLt(a, b) → lBool
// Case-insensitive less-than with cross-type N↔C coercion.
func SqlCmpLt(t *hbrt.Thread) {
t.Frame(2, 0)
defer t.EndProc()
t.RetBool(sqlCmpLt(t.Local(1), t.Local(2)))
}
func sqlCmpLt(a, b hbrt.Value) bool {
if a.IsNil() || b.IsNil() {
return false
}
if a.IsNumeric() && b.IsNumeric() {
return a.AsNumDouble() < b.AsNumDouble()
}
if a.IsString() && b.IsString() {
return strings.ToUpper(strings.TrimSpace(a.AsString())) <
strings.ToUpper(strings.TrimSpace(b.AsString()))
}
if a.IsDate() && b.IsDate() {
return a.AsJulian() < b.AsJulian()
}
if a.IsLogical() && b.IsLogical() {
return !a.AsBool() && b.AsBool()
}
if a.IsNumeric() && b.IsString() {
return a.AsNumDouble() < parseLeadingNumeric(b.AsString())
}
if a.IsString() && b.IsNumeric() {
return parseLeadingNumeric(a.AsString()) < b.AsNumDouble()
}
return false
}

File diff suppressed because it is too large Load Diff

142
hbrtl/sqlwacache.go Normal file
View File

@@ -0,0 +1,142 @@
// Copyright (c) 2026 Charles KWON OhJun (charleskwonohjun@gmail.com)
// All rights reserved.
// Workarea cache for FiveSql2 DML — opt-in persistent workarea slots
// keyed by alias. Eliminates per-query dbUseArea + dbCloseArea syscall
// overhead for repeated INSERT / UPDATE / DELETE against the same table.
//
// Semantics:
// * Disabled by default. Callers opt in via SqlWACacheEnable(). Tests
// and short one-shot scripts can stay on the safe per-query open/
// close behavior; long-running bench loops or servers pay the open
// cost once.
// * Entries map uppercase alias → workarea number. The PRG side is
// responsible for the actual dbUseArea / dbSelectArea — this layer
// only stores the handle.
// * Invalidation is explicit. CREATE TABLE / DROP TABLE in
// TSqlDDL.prg call SqlWACacheInvalidate before any filesystem
// operation that would otherwise collide with a still-open handle.
// * SqlWACacheCloseAll drops every entry; callers then decide how
// to actually close the workareas (dbCloseAll, per-alias close, …).
package hbrtl
import (
"strings"
"sync"
"five/hbrt"
)
var (
waCacheMu sync.Mutex
waCacheEntries = map[string]int{}
waCacheEnabled bool
)
// SqlWACacheEnable() → NIL
// Turns on the workarea cache for this process. Existing opens are not
// retroactively registered — the cache populates on next SqlWAOpenCached.
func SqlWACacheEnable(t *hbrt.Thread) {
t.Frame(0, 0)
defer t.EndProc()
waCacheMu.Lock()
waCacheEnabled = true
waCacheMu.Unlock()
t.RetNil()
}
// SqlWACacheDisable() → NIL
// Turns the cache off and drops all entries. Workareas themselves
// are left in whatever state the caller last put them in — callers
// typically follow with dbCloseAll() or per-table close.
func SqlWACacheDisable(t *hbrt.Thread) {
t.Frame(0, 0)
defer t.EndProc()
waCacheMu.Lock()
waCacheEnabled = false
waCacheEntries = map[string]int{}
waCacheMu.Unlock()
t.RetNil()
}
// SqlWACacheIsEnabled() → lBool
func SqlWACacheIsEnabled(t *hbrt.Thread) {
t.Frame(0, 0)
defer t.EndProc()
waCacheMu.Lock()
on := waCacheEnabled
waCacheMu.Unlock()
t.RetBool(on)
}
// SqlWACacheGet(cAlias) → nWA | 0
// Lookup a cached workarea number by alias. Returns 0 if disabled or
// no entry. PRG side still verifies Used() / Select() before relying
// on the number — another process or manual close may have invalidated
// the handle between cache hits.
func SqlWACacheGet(t *hbrt.Thread) {
t.Frame(1, 0)
defer t.EndProc()
waCacheMu.Lock()
on := waCacheEnabled
nWA := 0
if on {
nWA = waCacheEntries[strings.ToUpper(t.Local(1).AsString())]
}
waCacheMu.Unlock()
t.RetInt(int64(nWA))
}
// SqlWACachePut(cAlias, nWA) → NIL
// Register (or overwrite) a cache entry. No-op when cache is disabled
// so callers can unconditionally call Put after a successful open.
func SqlWACachePut(t *hbrt.Thread) {
t.Frame(2, 0)
defer t.EndProc()
alias := strings.ToUpper(t.Local(1).AsString())
nWA := int(t.Local(2).AsNumInt())
waCacheMu.Lock()
if waCacheEnabled && nWA > 0 {
waCacheEntries[alias] = nWA
}
waCacheMu.Unlock()
t.RetNil()
}
// SqlWACacheInvalidate(cAlias) → NIL
// Drop a single cache entry. Called before CREATE TABLE / DROP TABLE /
// FErase so the PRG side can then close and recreate the file without
// conflicting with a stale cached open.
func SqlWACacheInvalidate(t *hbrt.Thread) {
t.Frame(1, 0)
defer t.EndProc()
alias := strings.ToUpper(t.Local(1).AsString())
waCacheMu.Lock()
delete(waCacheEntries, alias)
waCacheMu.Unlock()
t.RetNil()
}
// SqlWACacheCloseAll() → aKeys
// Empties the cache and returns the list of aliases that were in it.
// Callers can iterate and close each corresponding workarea.
func SqlWACacheCloseAll(t *hbrt.Thread) {
t.Frame(0, 0)
defer t.EndProc()
waCacheMu.Lock()
keys := make([]string, 0, len(waCacheEntries))
for k := range waCacheEntries {
keys = append(keys, k)
}
waCacheEntries = map[string]int{}
waCacheMu.Unlock()
out := make([]hbrt.Value, len(keys))
for i, k := range keys {
out[i] = hbrt.MakeString(k)
}
t.PushValue(hbrt.MakeArrayFrom(out))
t.RetValue()
}

View File

@@ -330,11 +330,31 @@ STATIC PROCEDURE TestArrayHash()
ASort(a,,, {|x,y| x > y})
Assert("9d ASort desc: {3,2,1}", a[1] == 3 .AND. a[2] == 2 .AND. a[3] == 1)
// ASort dates (default, no block — formerly no-op, now sorts julian)
a := { CToD("2026-03-15"), CToD("2024-01-10"), CToD("2025-07-01") }
ASort(a)
Assert("9c1 ASort dates ascending", ;
a[1] == CToD("2024-01-10") .AND. ;
a[2] == CToD("2025-07-01") .AND. ;
a[3] == CToD("2026-03-15"))
// ASort logicals (default — .F. < .T.)
a := { .T., .F., .T., .F. }
ASort(a)
Assert("9c2 ASort logicals: F,F,T,T", ;
!a[1] .AND. !a[2] .AND. a[3] .AND. a[4])
// AScan
a := {"alice", "bob", "charlie"}
Assert("9e AScan: found 'bob' at 2", AScan(a, "bob") == 2)
Assert("9f AScan: 'dave' not found", AScan(a, "dave") == 0)
// AScan numeric fast-path
a := { 10, 20, 30, 40 }
Assert("9e1 AScan int found", AScan(a, 30) == 3)
Assert("9e2 AScan int cross-type (double lookup)", AScan(a, 30.0) == 3)
Assert("9e3 AScan int not found", AScan(a, 99) == 0)
// AEval with mutable closure capture (Harbour: closures share outer locals)
nSum := 0
AEval({10, 20, 30}, {|x| nSum += x})