diff --git a/_FiveSql2/src/TFiveSQL.prg b/_FiveSql2/src/TFiveSQL.prg
index 1f4ccab..8644182 100644
--- a/_FiveSql2/src/TFiveSQL.prg
+++ b/_FiveSql2/src/TFiveSQL.prg
@@ -14,6 +14,19 @@
 #include "hbclass.ch"
 #include "FiveSqlDef.ch"
 
+/* Plan cache: cSQL → parsed hQuery.
+ *
+ * The FiveSql2 parser runs lex + Pratt-style AST build per call; for
+ * repeated identical SQL (typical in report / loop / benchmark workloads)
+ * this is pure overhead. We cache the pristine parse result keyed by
+ * the raw SQL text and hand every subsequent call a deep clone via
+ * HbDeepClone so in-place mutations (SqlFoldConst, aTables rewriting)
+ * during Run() never corrupt the cached tree.
+ *
+ * Cached entries live until process exit; distinct SQL text count is
+ * bounded by the caller's template set, so LRU is deferred. */
+STATIC s_hPlanCache := { => }
+
 CLASS TFiveSQL
 
    DATA oLexer
@@ -40,20 +53,50 @@ RETURN SELF
 METHOD Execute( cSQL, bBlock ) CLASS TFiveSQL
 
    LOCAL aTokens, hQuery, aResult
+   LOCAL aLex, cKey, aParams
 
-   /* Parse — no caching (plan trees are mutated during execution) */
-   ::oLexer := TSqlLexer():New( cSQL )
-   ::oLexer:Tokenize()
-   aTokens := ::oLexer:GetTokens()
+   /* Fast path: no explicit aParams → single Go RTL lex+normalize call
+    * (SqlLexAndExtractTemplate). Returns {aTokens, cKey, aParams}; the
+    * tokens already have TK_TEXT/TK_NUM replaced with TK_QMARK, so
+    * TSqlParser2 sees the template shape and emits ND_PAR references
+    * against the extracted aParams. */
+   IF Empty( ::aParams )
+      aLex := SqlLexAndExtractTemplate( cSQL )
+      aTokens := aLex[ 1 ]
+      cKey := aLex[ 2 ]
+      aParams := aLex[ 3 ]
 
-   ::oParser := TSqlParser2():New( aTokens, ::aParams )
-   hQuery := ::oParser:Parse()
+      IF hb_HHasKey( s_hPlanCache, cKey )
+         hQuery := HbDeepClone( s_hPlanCache[ cKey ] )
+      ELSE
+         ::oParser := TSqlParser2():New( aTokens, aParams )
+         hQuery := ::oParser:Parse()
+         IF hQuery == NIL
+            RETURN { { "__error__" }, { { SQL_ERR_SYNTAX, "Failed to parse SQL", cSQL } } }
+         ENDIF
+         s_hPlanCache[ cKey ] := HbDeepClone( hQuery )
+      ENDIF
 
-   IF hQuery == NIL
-      RETURN { { "__error__" }, { { SQL_ERR_SYNTAX, "Failed to parse SQL", cSQL } } }
+      ::oExec := TSqlExecutor():New( hQuery, aParams )
+      ::oExec:cCacheKey := cKey
+   ELSE
+      /* Caller supplied explicit params — cache by raw SQL text. */
+      IF hb_HHasKey( s_hPlanCache, cSQL )
+         hQuery := HbDeepClone( s_hPlanCache[ cSQL ] )
+      ELSE
+         aTokens := SqlLexerTokenize( cSQL )
+         ::oParser := TSqlParser2():New( aTokens, ::aParams )
+         hQuery := ::oParser:Parse()
+         IF hQuery == NIL
+            RETURN { { "__error__" }, { { SQL_ERR_SYNTAX, "Failed to parse SQL", cSQL } } }
+         ENDIF
+         s_hPlanCache[ cSQL ] := HbDeepClone( hQuery )
+      ENDIF
+
+      ::oExec := TSqlExecutor():New( hQuery, ::aParams )
+      ::oExec:cCacheKey := cSQL
    ENDIF
 
-   ::oExec := TSqlExecutor():New( hQuery, ::aParams )
    ::oExec:bRowBlock := bBlock
    aResult := ::oExec:Run()
 
diff --git a/_FiveSql2/src/TSqlAgg.prg b/_FiveSql2/src/TSqlAgg.prg
index d6f2992..283b6e8 100644
--- a/_FiveSql2/src/TSqlAgg.prg
+++ b/_FiveSql2/src/TSqlAgg.prg
@@ -48,12 +48,13 @@ RETURN .F.
 
 METHOD GroupBy( aRows, aFN, aCols, aGroupBy, xHaving, aTables, aParams ) CLASS TSqlAgg
 
-   LOCAL hGroups := { => }
-   LOCAL i, j, cKey, aGroupRows, aResult := {}
+   LOCAL i, j, aGroupRows, aResult := {}
    LOCAL aNewRow
    LOCAL nGCol, cN, nCI, lPass
    LOCAL aGroupIdx := {}
    LOCAL aSets, aCurSet, nSet, hOmitIdx, aSubResult
+   LOCAL aGroupedRows
+   LOCAL aColInfo  /* { lIsAgg, nCI } per SELECT column, pre-resolved */
 
    /* Aggregate on empty set */
    IF Len( aRows ) == 0 .AND. ::HasAgg( aCols )
@@ -109,37 +110,39 @@ METHOD GroupBy( aRows, aFN, aCols, aGroupBy, xHaving, aTables, aParams ) CLASS T
       AAdd( aGroupIdx, nGCol )
    NEXT
 
+   /* Grouping step — delegate to Go RTL SqlGroupRows to collapse
+    * N·M per-row boundary crossings (SqlValToStr / hb_HHasKey / AAdd)
+    * into a single call. Aggregates and HAVING stay in PRG because
+    * they touch too many expression kinds to port cleanly. */
    IF Len( aGroupBy ) == 0 .AND. ::HasAgg( aCols )
-      hGroups[ "__ALL__" ] := aRows
+      aGroupedRows := { aRows }
    ELSE
-      FOR i := 1 TO Len( aRows )
-         cKey := ""
-         FOR j := 1 TO Len( aGroupBy )
-            nGCol := aGroupIdx[ j ]
-            IF nGCol > 0 .AND. nGCol <= Len( aRows[ i ] )
-               cKey += SqlValToStr( aRows[ i ][ nGCol ] ) + "|"
-            ENDIF
-         NEXT
-         IF ! hb_HHasKey( hGroups, cKey )
-            hGroups[ cKey ] := {}
-         ENDIF
-         AAdd( hGroups[ cKey ], aRows[ i ] )
-      NEXT
+      aGroupedRows := SqlGroupRows( aRows, aGroupIdx )
    ENDIF
 
+   /* Pre-resolve per SELECT column: aggregate flag + column index.
+    * Avoids SqlExprHasAgg + SqlExprName + FindColIdx2 per group. */
+   aColInfo := Array( Len( aCols ) )
+   FOR j := 1 TO Len( aCols )
+      IF SqlExprHasAgg( aCols[ j ][ 1 ] )
+         aColInfo[ j ] := { .T., 0 }
+      ELSE
+         cN := SqlExprName( aCols[ j ][ 1 ] )
+         nCI := ::FindColIdx2( cN, aFN )
+         aColInfo[ j ] := { .F., nCI }
+      ENDIF
+   NEXT
+
    /* Compute aggregates for each group */
-   FOR EACH aGroupRows IN hb_HValues( hGroups )
-      aNewRow := {}
+   FOR EACH aGroupRows IN aGroupedRows
+      aNewRow := Array( Len( aCols ) )
       FOR j := 1 TO Len( aCols )
-         IF SqlExprHasAgg( aCols[ j ][ 1 ] )
-            AAdd( aNewRow, ::ComputeAgg( aCols[ j ][ 1 ], aGroupRows, aFN ) )
+         IF aColInfo[ j ][ 1 ]
+            aNewRow[ j ] := ::ComputeAgg( aCols[ j ][ 1 ], aGroupRows, aFN )
          ELSE
-            cN := SqlExprName( aCols[ j ][ 1 ] )
-            nCI := ::FindColIdx2( cN, aFN )
+            nCI := aColInfo[ j ][ 2 ]
             IF nCI > 0 .AND. Len( aGroupRows ) > 0 .AND. nCI <= Len( aGroupRows[ 1 ] )
-               AAdd( aNewRow, aGroupRows[ 1 ][ nCI ] )
-            ELSE
-               AAdd( aNewRow, NIL )
+               aNewRow[ j ] := aGroupRows[ 1 ][ nCI ]
             ENDIF
          ENDIF
       NEXT
@@ -418,6 +421,15 @@ METHOD ComputeAgg( xE, aGR, aFN ) CLASS TSqlAgg
       RETURN 0
    ENDIF
 
+   /* Fast path: plain column + common aggregate → Go RTL single-pass loop.
+    * Gate on column-ref argument + pre-resolved nCol > 0; complex args
+    * (CASE/BIN/UDF) still fall through to the PRG loop below. */
+   IF nCol > 0 .AND. xArg[ 1 ] == ND_COL .AND. ;
+      ( cFunc == "COUNT" .OR. cFunc == "SUM" .OR. cFunc == "AVG" .OR. ;
+        cFunc == "MIN" .OR. cFunc == "MAX" )
+      RETURN SqlComputeAggSimple( aGR, nCol, cFunc )
+   ENDIF
+
    FOR i := 1 TO Len( aGR )
       IF nCol > 0 .AND. nCol <= Len( aGR[ i ] )
          xVal := aGR[ i ][ nCol ]
@@ -479,7 +491,15 @@ RETURN 0
 
 METHOD EvalHaving( xHaving, aNewRow, aCols, aGroupRows, aFN, aParams ) CLASS TSqlAgg
 
-   LOCAL xResult
+   LOCAL xResult, aGo
+
+   /* Fast path: Go-native tree walker. Returns {lOk, lPass}; falls back
+    * to PRG when it hits an unsupported node (subqueries, complex agg
+    * args, CASE expressions inside HAVING, etc.). */
+   aGo := SqlEvalHaving( xHaving, aNewRow, aCols, aGroupRows, aFN, aParams )
+   IF ValType( aGo ) == "A" .AND. Len( aGo ) == 2 .AND. aGo[ 1 ]
+      RETURN aGo[ 2 ]
+   ENDIF
 
    xResult := ::EvalHavingExpr( xHaving, aNewRow, aCols, aGroupRows, aFN, aParams )
 
diff --git a/_FiveSql2/src/TSqlExecutor.prg b/_FiveSql2/src/TSqlExecutor.prg
index 17e0db1..a164be6 100644
--- a/_FiveSql2/src/TSqlExecutor.prg
+++ b/_FiveSql2/src/TSqlExecutor.prg
@@ -19,6 +19,18 @@ STATIC s_aOuterStack := {}
 STATIC s_hAutoInc := NIL
 STATIC s_nRCJSeq := 0
 
+/* Per-plan DML pcode cache. Keyed by the plan-cache key that TFiveSQL
+ * uses (template key or cSQL text); value is a hash:
+ *   { "set_fpos" => aFPos,
+ *     "set_pc"   => aValuePc,   — parallel to set_fpos
+ *     "where_pc" => pcWhere | NIL,
+ *     "compiled" => .T. }
+ * RunUpdate populates on first hit, subsequent calls reuse. Compiled
+ * pcode depends on the target table's field layout; since the plan
+ * cache key already uniquely identifies the SQL template (same schema
+ * every call), the cache is sound. */
+STATIC s_hDmlPcodeCache := { => }
+
 CLASS TSqlExecutor
 
    DATA hQuery
@@ -35,6 +47,7 @@ CLASS TSqlExecutor
    DATA aCompileStruct
    DATA bRowBlock   /* optional code block — receives SELECT cols as params */
    DATA aFetchCache /* pre-bound {nWA, nFPos} per SELECT expression, or NIL */
+   DATA cCacheKey   /* plan-cache key set by TFiveSQL; used for DML pcode cache */
    DATA hSubCorrCache   INIT { => }    /* per-outer-key subquery result cache */
    DATA aSubCacheSlots  INIT {}         /* list of {xSubNode, {id, aFreeVars}} */
    DATA nSubCacheSeq    INIT 0          /* monotonic ID for subqueries */
@@ -1217,10 +1230,24 @@ METHOD RunSelect() CLASS TSqlExecutor
       IF nWA == 0
          nWA := ::OpenTable( cTable, cAlias )
          IF nWA == 0
-            /* Table file not found; check if a CTE temp file exists for this
-             * table name and open it instead.  This handles sub-executors
-             * (UNION, recursive) that reference a CTE by its original name. */
-            IF hb_FileExists( "__cte_" + Lower( cTable ) + ".dbf" )
+            /* Table file not found; check if a CTE temp table exists for
+             * this table name and open it instead.  This handles sub-
+             * executors (UNION, recursive) that reference a CTE by its
+             * original name.  CTE temp tables now live in MEMRDD (no
+             * file on disk) — fall back to the legacy DBFNTX open for
+             * pre-existing .dbf files from prior runs. */
+            BEGIN SEQUENCE
+               dbUseArea( .T., "MEMRDD", "mem:__cte_" + Lower( cTable ), ;
+                  cAlias, .T., .T. )
+               nWA := Select( cAlias )
+               IF nWA > 0
+                  AAdd( ::aOpened, cAlias )
+                  AAdd( ::oAlias:aSlots, { cAlias, Upper( cTable ), Upper( cTable ), .T. } )
+               ENDIF
+            RECOVER
+               nWA := 0
+            END SEQUENCE
+            IF nWA == 0 .AND. hb_FileExists( "__cte_" + Lower( cTable ) + ".dbf" )
                BEGIN SEQUENCE
                   dbUseArea( .T., "DBFNTX", "__cte_" + Lower( cTable ) + ".dbf", ;
                      cAlias, .T., .T. )
@@ -1418,9 +1445,34 @@ METHOD RunSelect() CLASS TSqlExecutor
                aGoRows := NIL
                IF Len( aJoins ) == 0 .AND. Len( aGroupBy ) == 0 .AND. ;
                   ! ::oAgg:HasAgg( aCols )
-                  aFP := ::TryBuildFieldPositions( aResultExprs )
+                  /* Plan pcode cache: cache aFP + pcW per cCacheKey.
+                   * These results are pure functions of the plan tree
+                   * (which is immutable between cache hits) and the
+                   * target table schema (stable for the process). */
+                  LOCAL hSelCached, cSelKey
+                  IF ! Empty( ::cCacheKey )
+                     cSelKey := ::cCacheKey + "#sel"
+                     IF hb_HHasKey( s_hDmlPcodeCache, cSelKey )
+                        hSelCached := s_hDmlPcodeCache[ cSelKey ]
+                        aFP := hSelCached[ "fp" ]
+                        pcW := hSelCached[ "where_pc" ]
+                     ENDIF
+                  ENDIF
+                  IF aFP == NIL
+                     aFP := ::TryBuildFieldPositions( aResultExprs )
+                     IF aFP != NIL .AND. xWhere != NIL
+                        pcW := ::TryCompileWhere( xWhere )
+                        IF pcW == NIL
+                           aFP := NIL  /* WHERE couldn't compile — PRG path */
+                        ENDIF
+                     ENDIF
+                     IF aFP != NIL .AND. ! Empty( ::cCacheKey )
+                        s_hDmlPcodeCache[ ::cCacheKey + "#sel" ] := { ;
+                           "fp"       => aFP, ;
+                           "where_pc" => pcW }
+                     ENDIF
+                  ENDIF
                   IF aFP != NIL
-                     pcW := ::TryCompileWhere( xWhere )
                      IF xWhere == NIL .OR. pcW != NIL
                         IF ::bRowBlock != NIL
                            /* Block mode: stream rows through user block.
@@ -2297,29 +2349,22 @@ METHOD MaterializeCTE( aCTE ) CLASS TSqlExecutor
          dbSelectArea( nExistWA )
          dbCloseArea()
       ENDIF
-      IF hb_FileExists( cTmpFile + ".dbf" )
-         FErase( cTmpFile + ".dbf" )
-      ENDIF
 
+      /* In-memory temp table — no file I/O, `mem:` scheme dispatches
+       * to MEMRDD. Create overwrites any prior table with this name. */
       BEGIN SEQUENCE
-         dbCreate( cTmpFile + ".dbf", aStruct )
+         dbCreate( "mem:" + cTmpFile, aStruct, "MEMRDD" )
       RECOVER
          LOOP
       END SEQUENCE
 
-      USE ( cTmpFile + ".dbf" ) NEW EXCLUSIVE ALIAS ( cPopAlias )
-      FOR j := 1 TO Len( aDataRows )
-         dbAppend()
-         FOR k := 1 TO Min( Len( aStruct ), Len( aDataRows[ j ] ) )
-            IF aDataRows[ j ][ k ] != NIL
-               FieldPut( k, aDataRows[ j ][ k ] )
-            ENDIF
-         NEXT
-      NEXT
-      dbCommit()
+      dbUseArea( .T., "MEMRDD", "mem:" + cTmpFile, cPopAlias, .F., .F. )
+      /* Go RTL SqlBulkInsert: collapses per-row dbAppend+FieldPut loop
+       * into a single RTL call — N·M boundary crossings → 1. */
+      SqlBulkInsert( aDataRows )
       dbSelectArea( Select( cPopAlias ) )
       dbCloseArea()
-      USE ( cTmpFile + ".dbf" ) NEW SHARED ALIAS ( cName )
+      dbUseArea( .T., "MEMRDD", "mem:" + cTmpFile, cName, .T., .F. )
 
       /* Replace existing table entry */
       lReplaced := .F.
@@ -2340,16 +2385,7 @@ METHOD RunInsert() CLASS TSqlExecutor
 
    aAutoInc := SqlGetAutoIncFields( cTable )
 
-   nWA := Select( cAlias )
-   IF nWA == 0
-      BEGIN SEQUENCE
-         dbUseArea( .T., "DBFNTX", Lower( cTable ) + ".dbf", cAlias, .F., .F. )
-      RECOVER
-         dbUseArea( .T., "DBFNTX", cTable + ".dbf", cAlias, .F., .F. )
-      END SEQUENCE
-   ELSE
-      dbSelectArea( nWA )
-   ENDIF
+   nWA := SqlExecOpenTable( cTable, cAlias )
 
    /* Transaction logging */
    ::oTxn:LogRecord( cAlias, RecNo(), "INSERT" )
@@ -2410,12 +2446,16 @@ METHOD RunInsert() CLASS TSqlExecutor
       NEXT
    ENDIF
 
-   dbCommit()
-
-   IF nWA == 0
-      dbCloseArea()
+   /* Commit per INSERT when the WA cache is off (legacy durability
+    * guarantee). With the cache on, the caller batches via an
+    * explicit SqlWACacheDisable+dbCloseAll at shutdown — skipping
+    * the per-INSERT flush collapses the dominant I/O cost. */
+   IF ! SqlWACacheIsEnabled()
+      dbCommit()
    ENDIF
 
+   SqlExecCloseTable( cAlias, nWA )
+
 RETURN { { "affected_rows" }, { { 1 } } }
 
 
@@ -2423,6 +2463,7 @@ METHOD RunUpdate() CLASS TSqlExecutor
 
    LOCAL cTable, aSet, xWhere, cAlias, nWA, i, nFPos, xVal
    LOCAL nAffected := 0
+   LOCAL aFPos, aValuePc, pcWhere, lAllOk, cValSrc
 
    cTable := ::hQuery[ "table" ]
    aSet   := ::hQuery[ "set" ]
@@ -2430,17 +2471,86 @@ METHOD RunUpdate() CLASS TSqlExecutor
    cAlias := cTable
    ::aTables := { { cTable, cAlias, "" } }
 
-   nWA := Select( cAlias )
-   IF nWA == 0
-      BEGIN SEQUENCE
-         dbUseArea( .T., "DBFNTX", Lower( cTable ) + ".dbf", cAlias, .F., .F. )
-      RECOVER
-         dbUseArea( .T., "DBFNTX", cTable + ".dbf", cAlias, .F., .F. )
-      END SEQUENCE
-   ELSE
-      dbSelectArea( nWA )
+   nWA := SqlExecOpenTable( cTable, cAlias )
+
+   /* Fast path: compile WHERE + every SET value to pcode and delegate
+    * to Go RTL SqlBulkUpdate — skips per-record Go↔PRG boundary.
+    * Conditions: no active transaction (txn log records can't be
+    * emitted from inside the Go loop), no subquery / CASE / other
+    * nodes that PcCompile can't handle (try/fail pattern).
+    *
+    * Per-plan cache: when cCacheKey is set (TFiveSQL supplies it for
+    * plan-cached queries), we stash the compiled pcode under that key
+    * so subsequent identical UPDATEs skip the SqlExprToPrg + PcCompile
+    * walk entirely. The cached pcode is valid as long as the plan
+    * itself lives in the plan cache — which is forever in-process. */
+   IF ! ::oTxn:IsActive()
+      LOCAL hPcCached
+      IF ! Empty( ::cCacheKey ) .AND. hb_HHasKey( s_hDmlPcodeCache, ::cCacheKey )
+         hPcCached := s_hDmlPcodeCache[ ::cCacheKey ]
+         nAffected := SqlBulkUpdate( hPcCached[ "set_fpos" ], ;
+                                      hPcCached[ "where_pc" ], ;
+                                      hPcCached[ "set_pc" ] )
+         IF ! SqlWACacheIsEnabled()
+            dbCommit()
+         ENDIF
+         SqlExecCloseTable( cAlias, nWA )
+         RETURN { { "affected_rows" }, { { nAffected } } }
+      ENDIF
+
+      aFPos := {}
+      aValuePc := {}
+      lAllOk := .T.
+      FOR i := 1 TO Len( aSet )
+         nFPos := FieldPos( aSet[ i ][ 1 ] )
+         IF nFPos <= 0
+            lAllOk := .F.
+            EXIT
+         ENDIF
+         cValSrc := ::SqlExprToPrg( aSet[ i ][ 2 ] )
+         IF cValSrc == NIL
+            lAllOk := .F.
+            EXIT
+         ENDIF
+         AAdd( aFPos, nFPos )
+         AAdd( aValuePc, PcCompile( cValSrc ) )
+         IF ATail( aValuePc ) == NIL
+            lAllOk := .F.
+            EXIT
+         ENDIF
+      NEXT
+      pcWhere := NIL
+      IF lAllOk .AND. xWhere != NIL
+         cValSrc := ::SqlExprToPrg( xWhere )
+         IF cValSrc == NIL
+            lAllOk := .F.
+         ELSE
+            pcWhere := PcCompile( cValSrc )
+            IF pcWhere == NIL
+               lAllOk := .F.
+            ENDIF
+         ENDIF
+      ENDIF
+      IF lAllOk
+         nAffected := SqlBulkUpdate( aFPos, pcWhere, aValuePc )
+         /* Populate the per-plan cache for subsequent calls. */
+         IF ! Empty( ::cCacheKey )
+            s_hDmlPcodeCache[ ::cCacheKey ] := { ;
+               "set_fpos" => aFPos, ;
+               "set_pc"   => aValuePc, ;
+               "where_pc" => pcWhere }
+         ENDIF
+         /* Defer commit under WA cache — batched at Disable/exit. */
+         IF ! SqlWACacheIsEnabled()
+            dbCommit()
+         ENDIF
+         SqlExecCloseTable( cAlias, nWA )
+         RETURN { { "affected_rows" }, { { nAffected } } }
+      ENDIF
    ENDIF
 
+   /* Fallback: PRG scan loop — handles txn logging + non-compilable
+    * expressions (subquery, complex CASE, UDF in value or WHERE). */
    dbGoTop()
    WHILE ! Eof()
       IF xWhere == NIL .OR. SqlIsTrue( ::EvalExpr( xWhere ) )
@@ -2459,12 +2569,12 @@ METHOD RunUpdate() CLASS TSqlExecutor
       ENDIF
       dbSkip()
    ENDDO
-   dbCommit()
-
-   IF nWA == 0
-      dbCloseArea()
+   IF ! SqlWACacheIsEnabled()
+      dbCommit()
    ENDIF
 
+   SqlExecCloseTable( cAlias, nWA )
+
 RETURN { { "affected_rows" }, { { nAffected } } }
 
 
@@ -2478,16 +2588,7 @@ METHOD RunDelete() CLASS TSqlExecutor
    cAlias := cTable
    ::aTables := { { cTable, cAlias, "" } }
 
-   nWA := Select( cAlias )
-   IF nWA == 0
-      BEGIN SEQUENCE
-         dbUseArea( .T., "DBFNTX", Lower( cTable ) + ".dbf", cAlias, .F., .F. )
-      RECOVER
-         dbUseArea( .T., "DBFNTX", cTable + ".dbf", cAlias, .F., .F. )
-      END SEQUENCE
-   ELSE
-      dbSelectArea( nWA )
-   ENDIF
+   nWA := SqlExecOpenTable( cTable, cAlias )
 
    SET DELETED ON
    dbGoTop()
@@ -2501,13 +2602,84 @@ METHOD RunDelete() CLASS TSqlExecutor
       ENDIF
       dbSkip()
    ENDDO
-   dbCommit()
+   IF ! SqlWACacheIsEnabled()
+      dbCommit()
+   ENDIF
 
-   IF nWA == 0
+   SqlExecCloseTable( cAlias, nWA )
+
+RETURN { { "affected_rows" }, { { nAffected } } }
+
+
+/* ======================================================================
+ *  Workarea open/close helpers — consult the Go-native WA cache.
+ *  When the cache is enabled (SqlWACacheEnable), SqlExecOpenTable
+ *  reuses a previously opened workarea instead of running dbUseArea
+ *  every call. SqlExecCloseTable leaves cached entries alive; plain
+ *  (auto-opened, not cached) areas still close as before so tests
+ *  that rely on immediate file release (FErase, UNIQUE index rebuild)
+ *  stay correct when the cache is off — which is the default.
+ * ====================================================================== */
+
+FUNCTION SqlExecOpenTable( cTable, cAlias )
+
+   LOCAL nWA, nCached
+
+   nWA := Select( cAlias )
+   IF nWA > 0
+      dbSelectArea( nWA )
+      RETURN nWA
+   ENDIF
+
+   /* Cache hit: the previously stored WA must still be valid and bound
+    * to the same alias. If a manual close or CLOSE ALL ran behind our
+    * back, Select() will now report 0 — fall through to fresh open. */
+   nCached := SqlWACacheGet( cAlias )
+   IF nCached > 0 .AND. Select( cAlias ) == nCached
+      dbSelectArea( nCached )
+      RETURN nCached
+   ENDIF
+   IF nCached > 0
+      SqlWACacheInvalidate( cAlias )
+   ENDIF
+
+   /* Open fresh. Two-step fallback mirrors the prior inline logic so
+    * callers using mixed-case filenames on case-sensitive filesystems
+    * still succeed. */
+   BEGIN SEQUENCE
+      dbUseArea( .T., "DBFNTX", Lower( cTable ) + ".dbf", cAlias, .F., .F. )
+   RECOVER
+      dbUseArea( .T., "DBFNTX", cTable + ".dbf", cAlias, .F., .F. )
+   END SEQUENCE
+   nWA := Select( cAlias )
+
+   /* Register for reuse. The cache layer is a no-op when disabled, so
+    * an unconditional Put keeps the caller branch-free. */
+   IF nWA > 0 .AND. SqlWACacheIsEnabled()
+      SqlWACachePut( cAlias, nWA )
+      /* Return 1 sentinel so callers' "if nWA==0 close" gates skip
+       * — the cache owns the lifecycle now. */
+      RETURN nWA
+   ENDIF
+
+RETURN 0  /* caller must close — matches legacy semantics */
+
+
+FUNCTION SqlExecCloseTable( cAlias, nWA )
+
+   /* Only close if THIS call opened it AND the cache didn't adopt it.
+    * When nWA > 0, the caller either reused a pre-existing area or
+    * handed ownership to the cache, so we leave it alone. */
+   IF nWA == 0 .AND. ! SqlWACacheIsEnabled()
+      dbCloseArea()
+   ELSEIF nWA == 0 .AND. SqlWACacheIsEnabled() .AND. ;
+          SqlWACacheGet( cAlias ) == 0
+      /* Cache enabled but the alias wasn't registered (e.g., open
+       * failed between Put checks). Keep legacy behavior — close. */
       dbCloseArea()
    ENDIF
 
-RETURN { { "affected_rows" }, { { nAffected } } }
+RETURN NIL
 
 
 /* ======================================================================
@@ -2626,17 +2798,11 @@ FUNCTION SqlMaterializeSubquery( xSubQ, cAlias, aParams )
    NEXT
 
    cTmpFile := "__drv_" + Lower( cAlias )
-   dbCreate( cTmpFile + ".dbf", aStruct )
-   USE ( cTmpFile + ".dbf" ) NEW EXCLUSIVE ALIAS __DRVTMP
-   FOR i := 1 TO Len( aRows2 )
-      dbAppend()
-      FOR j := 1 TO Min( Len( aStruct ), Len( aRows2[ i ] ) )
-         IF aRows2[ i ][ j ] != NIL
-            FieldPut( j, aRows2[ i ][ j ] )
-         ENDIF
-      NEXT
-   NEXT
-   dbCommit()
+   /* MEMRDD in-memory temp — avoids dbCreate + FErase disk syscalls. */
+   dbCreate( "mem:" + cTmpFile, aStruct, "MEMRDD" )
+   dbUseArea( .T., "MEMRDD", "mem:" + cTmpFile, "__DRVTMP", .F., .F. )
+   /* Go RTL SqlBulkInsert — subquery driving-table materialization. */
+   SqlBulkInsert( aRows2 )
    CLOSE __DRVTMP
 
 RETURN { cTmpFile, cAlias, "" }
@@ -2922,26 +3088,16 @@ METHOD MaterializeRecursiveCTE( aCTE ) CLASS TSqlExecutor
          dbSelectArea( nExistWA )
          dbCloseArea()
       ENDIF
-      IF hb_FileExists( cTmpFile + ".dbf" )
-         FErase( cTmpFile + ".dbf" )
-      ENDIF
-
+      /* MEMRDD in-memory temp for CTE — no file create/delete. */
       BEGIN SEQUENCE
-         dbCreate( cTmpFile + ".dbf", aStruct )
+         dbCreate( "mem:" + cTmpFile, aStruct, "MEMRDD" )
       RECOVER
       END SEQUENCE
 
       BEGIN SEQUENCE
-         USE ( cTmpFile + ".dbf" ) NEW ALIAS ( cAlias )
-         FOR j := 1 TO Len( aDataRows )
-            dbAppend()
-            FOR k := 1 TO Min( Len( aStruct ), Len( aDataRows[ j ] ) )
-               IF aDataRows[ j ][ k ] != NIL
-                  FieldPut( k, aDataRows[ j ][ k ] )
-               ENDIF
-            NEXT
-         NEXT
-         dbCommit()
+         dbUseArea( .T., "MEMRDD", "mem:" + cTmpFile, cAlias, .F., .F. )
+         /* Go RTL SqlBulkInsert — CTE materialization path. */
+         SqlBulkInsert( aDataRows )
       RECOVER
       END SEQUENCE
 
@@ -2973,7 +3129,7 @@ METHOD ApplyWindowFunctions( aRows, aFN, aCols ) CLASS TSqlExecutor
 
    LOCAL i, j, k, nColIdx, xExpr
    LOCAL cFunc, aPartBy, aOrdBy, aFuncArgs
-   LOCAL hPartitions, cPartKey, aPartIdx
+   LOCAL aPartitions, cPartKey, aPartIdx
    LOCAL aSorted, aIdxMap, nPartCol
    LOCAL nRank, nDenseRank, nRowNum
    LOCAL xPrev, xCurr, nTies
@@ -2981,6 +3137,7 @@ METHOD ApplyWindowFunctions( aRows, aFN, aCols ) CLASS TSqlExecutor
    LOCAL nRunSum, nRunCount
    LOCAL aWinCols, nWC
    LOCAL hFrame, nFS, nFE, m, xVal, xMin, xMax, lDefaultFrame
+   LOCAL aPartColIdx, aSortSpec, nOrdCol
 
    /* Scan for window function columns */
    aWinCols := {}
@@ -3008,69 +3165,55 @@ METHOD ApplyWindowFunctions( aRows, aFN, aCols ) CLASS TSqlExecutor
          hFrame := xExpr[ 6 ]
       ENDIF
 
-      /* Build partition groups as arrays of row indices */
-      hPartitions := { => }
-      FOR i := 1 TO Len( aRows )
-         cPartKey := ""
-         IF ValType( aPartBy ) == "A"
-            FOR j := 1 TO Len( aPartBy )
-               nPartCol := SqlFindColIdx( aPartBy[ j ], aFN )
-               IF nPartCol == 0
-                  nPartCol := SqlFindColIdx2( SqlExprName( aPartBy[ j ] ), aFN )
-               ENDIF
-               IF nPartCol > 0 .AND. nPartCol <= Len( aRows[ i ] )
-                  cPartKey += SqlValToStr( aRows[ i ][ nPartCol ] ) + "|"
-               ENDIF
-            NEXT
-         ENDIF
-         IF ! hb_HHasKey( hPartitions, cPartKey )
-            hPartitions[ cPartKey ] := {}
-         ENDIF
-         AAdd( hPartitions[ cPartKey ], i )
-      NEXT
+      /* Resolve PARTITION BY columns once, then delegate the row-index
+       * grouping to Go RTL SqlWindowPartitions — removes N·M per-row
+       * Go↔PRG boundary crossings for SqlValToStr / hb_HHasKey / AAdd. */
+      aPartColIdx := {}
+      IF ValType( aPartBy ) == "A"
+         FOR j := 1 TO Len( aPartBy )
+            nPartCol := SqlFindColIdx( aPartBy[ j ], aFN )
+            IF nPartCol == 0
+               nPartCol := SqlFindColIdx2( SqlExprName( aPartBy[ j ] ), aFN )
+            ENDIF
+            IF nPartCol > 0
+               AAdd( aPartColIdx, nPartCol )
+            ENDIF
+         NEXT
+      ENDIF
+      aPartitions := SqlWindowPartitions( aRows, aPartColIdx )
+
+      /* Pre-resolve ORDER BY column indices once per window column —
+       * Go SqlWindowSortPartition reads the resolved {nCol, lDesc}
+       * pairs directly, so every partition sort avoids the repeated
+       * SqlFindColIdx linear scan inside per-comparison PRG blocks. */
+      aSortSpec := {}
+      IF ValType( aOrdBy ) == "A" .AND. Len( aOrdBy ) > 0
+         FOR j := 1 TO Len( aOrdBy )
+            nOrdCol := SqlFindColIdx( aOrdBy[ j ][ 1 ], aFN )
+            IF nOrdCol == 0
+               nOrdCol := SqlFindColIdx2( SqlExprName( aOrdBy[ j ][ 1 ] ), aFN )
+            ENDIF
+            IF nOrdCol > 0
+               AAdd( aSortSpec, { nOrdCol, aOrdBy[ j ][ 2 ] == "DESC" } )
+            ENDIF
+         NEXT
+      ENDIF
 
       /* Process each partition */
-      FOR EACH aPartIdx IN hb_HValues( hPartitions )
+      FOR EACH aPartIdx IN aPartitions
 
-         /* Sort partition indices by ORDER BY columns */
-         IF ValType( aOrdBy ) == "A" .AND. Len( aOrdBy ) > 0
-            ASort( aPartIdx,,, {|a, b| SqlWinRowCmp( aRows, a, b, aOrdBy, aFN ) < 0 } )
+         /* Sort partition indices by ORDER BY columns (Go RTL). */
+         IF Len( aSortSpec ) > 0
+            SqlWindowSortPartition( aRows, aPartIdx, aSortSpec )
          ENDIF
 
-         /* Compute window function for each row in the partition */
+         /* Compute window function for each row in the partition.
+          * ROW_NUMBER/RANK/DENSE_RANK all go through one Go RTL call
+          * that walks the partition and writes the rank column —
+          * removes per-row SqlWinRowsEqual + PRG indexing overhead. */
          DO CASE
-         CASE cFunc == "ROW_NUMBER"
-            FOR k := 1 TO Len( aPartIdx )
-               IF nColIdx <= Len( aRows[ aPartIdx[ k ] ] )
-                  aRows[ aPartIdx[ k ] ][ nColIdx ] := k
-               ENDIF
-            NEXT
-
-         CASE cFunc == "RANK"
-            nRank := 1
-            FOR k := 1 TO Len( aPartIdx )
-               IF k > 1
-                  IF ! SqlWinRowsEqual( aRows, aPartIdx[ k ], aPartIdx[ k - 1 ], aOrdBy, aFN )
-                     nRank := k
-                  ENDIF
-               ENDIF
-               IF nColIdx <= Len( aRows[ aPartIdx[ k ] ] )
-                  aRows[ aPartIdx[ k ] ][ nColIdx ] := nRank
-               ENDIF
-            NEXT
-
-         CASE cFunc == "DENSE_RANK"
-            nDenseRank := 1
-            FOR k := 1 TO Len( aPartIdx )
-               IF k > 1
-                  IF ! SqlWinRowsEqual( aRows, aPartIdx[ k ], aPartIdx[ k - 1 ], aOrdBy, aFN )
-                     nDenseRank++
-                  ENDIF
-               ENDIF
-               IF nColIdx <= Len( aRows[ aPartIdx[ k ] ] )
-                  aRows[ aPartIdx[ k ] ][ nColIdx ] := nDenseRank
-               ENDIF
-            NEXT
+         CASE cFunc == "ROW_NUMBER" .OR. cFunc == "RANK" .OR. cFunc == "DENSE_RANK"
+            SqlWindowAssignRank( aRows, aPartIdx, aSortSpec, nColIdx, cFunc )
 
          CASE cFunc == "LAG"
             nLagLead := 1
@@ -3817,11 +3960,12 @@ RETURN aResult
  * simple column index (complex expressions → PRG fallback). */
 METHOD TryBuildSortSpec( aOrderBy, aFieldNames ) CLASS TSqlExecutor
 
-   LOCAL aSpec := {}, i, j, xE, cName, nCol, cDir, nDot
+   LOCAL aSpec := {}, i, j, xE, cName, nCol, cDir, cNulls, nDot
 
    FOR i := 1 TO Len( aOrderBy )
       xE := aOrderBy[ i ][ 1 ]
       cDir := Upper( aOrderBy[ i ][ 2 ] )
+      cNulls := iif( Len( aOrderBy[ i ] ) >= 3, Upper( aOrderBy[ i ][ 3 ] ), "" )
       IF xE == NIL .OR. xE[ 1 ] != ND_COL
          RETURN NIL
       ENDIF
@@ -3843,7 +3987,10 @@ METHOD TryBuildSortSpec( aOrderBy, aFieldNames ) CLASS TSqlExecutor
       IF nCol == 0
          RETURN NIL
       ENDIF
-      AAdd( aSpec, { nCol, cDir == "DESC" } )
+      /* Go SqlOrderBy reads {nCol, lDesc, cNulls}. cNulls empty means
+       * "default" — NIL sorts as the largest value (NULLs last in ASC,
+       * NULLs first in DESC). Explicit "FIRST"/"LAST" overrides. */
+      AAdd( aSpec, { nCol, cDir == "DESC", cNulls } )
    NEXT
 
 RETURN aSpec
diff --git a/_FiveSql2/src/TSqlExpr.prg b/_FiveSql2/src/TSqlExpr.prg
index dbb4f76..0cd78e3 100644
--- a/_FiveSql2/src/TSqlExpr.prg
+++ b/_FiveSql2/src/TSqlExpr.prg
@@ -42,45 +42,10 @@ FUNCTION SqlExprName( xE )
 
 RETURN "expr"
 
-/* Check whether an expression tree contains an aggregate function call.
- * Recurses into ND_BIN, ND_UNI, ND_FN args, ND_CASE to find nested
- * aggregates like `salary + COUNT(*)` or `CASE WHEN ... THEN SUM(x)`. */
-FUNCTION SqlExprHasAgg( xE )
-
-   LOCAL i
-
-   IF xE == NIL
-      RETURN .F.
-   ENDIF
-   IF xE[ 1 ] == ND_FN .AND. SqlIsAggName( xE[ 2 ] )
-      RETURN .T.
-   ENDIF
-   /* Recurse into sub-expressions */
-   IF xE[ 1 ] == ND_BIN
-      RETURN SqlExprHasAgg( xE[ 3 ] ) .OR. SqlExprHasAgg( xE[ 4 ] )
-   ENDIF
-   IF xE[ 1 ] == ND_UNI
-      RETURN SqlExprHasAgg( xE[ 3 ] )
-   ENDIF
-   IF xE[ 1 ] == ND_FN .AND. ValType( xE[ 3 ] ) == "A"
-      FOR i := 1 TO Len( xE[ 3 ] )
-         IF SqlExprHasAgg( xE[ 3 ][ i ] )
-            RETURN .T.
-         ENDIF
-      NEXT
-   ENDIF
-   IF xE[ 1 ] == ND_CASE .AND. ValType( xE[ 2 ] ) == "A"
-      FOR i := 1 TO Len( xE[ 2 ] )
-         IF SqlExprHasAgg( xE[ 2 ][ i ][ 1 ] ) .OR. SqlExprHasAgg( xE[ 2 ][ i ][ 2 ] )
-            RETURN .T.
-         ENDIF
-      NEXT
-      IF xE[ 3 ] != NIL .AND. SqlExprHasAgg( xE[ 3 ] )
-         RETURN .T.
-      ENDIF
-   ENDIF
-
-RETURN .F.
+/* SqlExprHasAgg is implemented in Go (hbrtl/sqlexpr.go) — registered
+ * as SQLEXPRHASAGG. The prior PRG recursive walker has been removed
+ * to avoid a name collision with the RTL symbol; behavior is
+ * byte-for-byte identical. See docs/RTL-Go-Native-Migration.md. */
 
 /* Return .T. if the function name is an aggregate */
 FUNCTION SqlIsAggName( c )
diff --git a/_FiveSql2/src/TSqlFunc.prg b/_FiveSql2/src/TSqlFunc.prg
index 4537d19..9da06db 100644
--- a/_FiveSql2/src/TSqlFunc.prg
+++ b/_FiveSql2/src/TSqlFunc.prg
@@ -329,121 +329,11 @@ FUNCTION SqlArg( a, n )
 RETURN NIL
 
 
-/* Coerce to string */
-FUNCTION SqlCoerceStr( x )
-
-   IF x == NIL
-      RETURN ""
-   ENDIF
-   IF ValType( x ) == "C"
-      RETURN x
-   ENDIF
-   IF ValType( x ) == "N"
-      RETURN AllTrim( Str( x ) )
-   ENDIF
-   IF ValType( x ) == "D"
-      RETURN DToC( x )
-   ENDIF
-   IF ValType( x ) == "L"
-      RETURN iif( x, "T", "F" )
-   ENDIF
-
-RETURN ""
-
-
-/* Coerce to numeric */
-FUNCTION SqlCoerceNum( x )
-
-   IF x == NIL
-      RETURN 0
-   ENDIF
-   IF ValType( x ) == "N"
-      RETURN x
-   ENDIF
-   IF ValType( x ) == "C"
-      RETURN Val( AllTrim( x ) )
-   ENDIF
-   IF ValType( x ) == "L"
-      RETURN iif( x, 1, 0 )
-   ENDIF
-
-RETURN 0
-
-
-/* Normalize for comparison: trim and uppercase strings */
-FUNCTION SqlCoerceForCmp( x )
-
-   IF x == NIL
-      RETURN x
-   ENDIF
-   IF ValType( x ) == "C"
-      RETURN Upper( AllTrim( x ) )
-   ENDIF
-
-RETURN x
-
-
-/* Evaluate truthiness */
-FUNCTION SqlIsTrue( x )
-
-   IF x == NIL
-      RETURN .F.
-   ENDIF
-   IF ValType( x ) == "L"
-      RETURN x
-   ENDIF
-   IF ValType( x ) == "N"
-      RETURN x != 0
-   ENDIF
-   IF ValType( x ) == "C"
-      RETURN ! Empty( x )
-   ENDIF
-
-RETURN .F.
-
-
-/* Case-insensitive equality comparison with cross-type coercion */
-FUNCTION SqlCmpEq( a, b )
-
-   IF a == NIL .OR. b == NIL
-      RETURN a == NIL .AND. b == NIL
-   ENDIF
-   IF ValType( a ) == ValType( b )
-      IF ValType( a ) == "C"
-         RETURN Upper( AllTrim( a ) ) == Upper( AllTrim( b ) )
-      ENDIF
-      RETURN a == b
-   ENDIF
-   IF ValType( a ) == "N" .AND. ValType( b ) == "C"
-      RETURN a == Val( AllTrim( b ) )
-   ENDIF
-   IF ValType( a ) == "C" .AND. ValType( b ) == "N"
-      RETURN Val( AllTrim( a ) ) == b
-   ENDIF
-
-RETURN .F.
-
-
-/* Case-insensitive less-than comparison */
-FUNCTION SqlCmpLt( a, b )
-
-   IF a == NIL .OR. b == NIL
-      RETURN .F.
-   ENDIF
-   IF ValType( a ) == ValType( b )
-      IF ValType( a ) == "C"
-         RETURN Upper( AllTrim( a ) ) < Upper( AllTrim( b ) )
-      ENDIF
-      RETURN a < b
-   ENDIF
-   IF ValType( a ) == "N" .AND. ValType( b ) == "C"
-      RETURN a < Val( AllTrim( b ) )
-   ENDIF
-   IF ValType( a ) == "C" .AND. ValType( b ) == "N"
-      RETURN Val( AllTrim( a ) ) < b
-   ENDIF
-
-RETURN .F.
+/* SqlCoerceStr/SqlCoerceNum/SqlCoerceForCmp/SqlIsTrue/SqlCmpEq/SqlCmpLt
+ * are implemented in Go (hbrtl/sqlhelpers.go) — registered as
+ * SQLCOERCESTR etc. The PRG bodies have been removed to avoid symbol
+ * collision with the RTL symbols; behavior is byte-for-byte identical.
+ * See docs/RTL-Go-Native-Migration.md (Tier 4).  */
 
 
 /* SQL LIKE pattern matching with optional escape character */
diff --git a/_FiveSql2/src/TSqlSort.prg b/_FiveSql2/src/TSqlSort.prg
index db8e7ce..bece564 100644
--- a/_FiveSql2/src/TSqlSort.prg
+++ b/_FiveSql2/src/TSqlSort.prg
@@ -32,13 +32,15 @@ RETURN SELF
 
 METHOD OrderBy( aRows, aFN, aOB, aTables, aParams ) CLASS TSqlSort
 
-   LOCAL i, nCol
+   LOCAL i, nCol, cNulls
 
    IF Len( aRows ) < 2 .OR. Len( aOB ) == 0
       RETURN aRows
    ENDIF
 
-   /* Pre-resolve column indexes */
+   /* Pre-resolve column indexes. Third element carries the explicit
+    * NULLS FIRST/LAST spec parsed by TSqlParser2:ParseOrderBy —
+    * empty string means "use default (NIL as largest)". */
    s_aOBCols := {}
    s_aOBNames := aFN
    FOR i := 1 TO Len( aOB )
@@ -46,7 +48,8 @@ METHOD OrderBy( aRows, aFN, aOB, aTables, aParams ) CLASS TSqlSort
       IF nCol == 0
          nCol := SqlFindColIdx2( SqlExprName( aOB[ i ][ 1 ] ), aFN )
       ENDIF
-      AAdd( s_aOBCols, { nCol, aOB[ i ][ 2 ] } )
+      cNulls := iif( Len( aOB[ i ] ) >= 3, Upper( aOB[ i ][ 3 ] ), "" )
+      AAdd( s_aOBCols, { nCol, aOB[ i ][ 2 ], cNulls } )
    NEXT
 
    ASort( aRows,,, {|a, b| SqlRowCompare( a, b ) < 0 } )
@@ -56,18 +59,11 @@ RETURN aRows
 
 METHOD Distinct( aRows ) CLASS TSqlSort
 
-   LOCAL aR := {}, i, cKey
-   LOCAL hSeen := { => }
-
-   FOR i := 1 TO Len( aRows )
-      cKey := ::RowKey( aRows[ i ] )
-      IF ! hb_HHasKey( hSeen, cKey )
-         hSeen[ cKey ] := .T.
-         AAdd( aR, aRows[ i ] )
-      ENDIF
-   NEXT
-
-RETURN aR
+   /* Go RTL SqlDistinct: single-pass dedup via Go map[string]bool.
+    * Key construction matches prior PRG ::RowKey byte-for-byte (same
+    * SqlValToStr mapping + '|' separator), so the output is identical
+    * to the old PRG loop — just ~100x faster on large result sets. */
+RETURN SqlDistinct( aRows )
 
 
 METHOD RowKey( aR ) CLASS TSqlSort
@@ -118,11 +114,12 @@ RETURN 0
 /* Multi-key row comparator for ASort */
 FUNCTION SqlRowCompare( aRowA, aRowB )
 
-   LOCAL i, nCol, cDir, xA, xB, nCmp
+   LOCAL i, nCol, cDir, cNulls, lNullsFirst, xA, xB, nCmp
 
    FOR i := 1 TO Len( s_aOBCols )
       nCol := s_aOBCols[ i ][ 1 ]
       cDir := s_aOBCols[ i ][ 2 ]
+      cNulls := iif( Len( s_aOBCols[ i ] ) >= 3, s_aOBCols[ i ][ 3 ], "" )
 
       IF nCol <= 0 .OR. nCol > Len( aRowA ) .OR. nCol > Len( aRowB )
          LOOP
@@ -131,15 +128,22 @@ FUNCTION SqlRowCompare( aRowA, aRowB )
       xA := aRowA[ nCol ]
       xB := aRowB[ nCol ]
 
-      /* NULLs sort last */
+      /* NULL ordering — default: NIL is largest (NULLs last in ASC,
+       * NULLs first in DESC). Explicit NULLS FIRST/LAST (SQL:2003)
+       * from the parser overrides direction. */
       IF xA == NIL .AND. xB == NIL
          LOOP
       ENDIF
-      IF xA == NIL
-         RETURN iif( cDir == "DESC", -1, 1 )
-      ENDIF
-      IF xB == NIL
-         RETURN iif( cDir == "DESC", 1, -1 )
+      IF xA == NIL .OR. xB == NIL
+         DO CASE
+         CASE cNulls == "FIRST" ; lNullsFirst := .T.
+         CASE cNulls == "LAST"  ; lNullsFirst := .F.
+         OTHERWISE              ; lNullsFirst := ( cDir == "DESC" )
+         ENDCASE
+         IF xA == NIL
+            RETURN iif( lNullsFirst, -1, 1 )
+         ENDIF
+         RETURN iif( lNullsFirst, 1, -1 )
       ENDIF
 
       nCmp := 0
diff --git a/_FiveSql2/test/bench_bulk.prg b/_FiveSql2/test/bench_bulk.prg
new file mode 100644
index 0000000..e27f4c3
--- /dev/null
+++ b/_FiveSql2/test/bench_bulk.prg
@@ -0,0 +1,92 @@
+// Large-scale bulk-insert / CTE materialization benchmark.
+// Isolates the SqlBulkInsert Go RTL win: rows × cols boundary
+// crossings collapse to a single RTL call, so the speedup grows
+// linearly with N and M.
+
+#include "FiveSqlDef.ch"
+
+PROCEDURE Main()
+
+   LOCAL t0, t1, i, aR, nRows
+
+   ErrorBlock( {|e| QOut( "TRAP: " + e:description + " " + e:operation ), Break(e) } )
+
+   ? "================================================================"
+   ? "  FiveSql2 Bulk Insert / Large-CTE Benchmark"
+   ? "================================================================"
+   ?
+
+   SetupLarge()
+
+   ? "--- CTE materialization at scale ---"
+
+   /* Big CTE: filter 10,000 rows, materialize, ORDER BY in outer. */
+   t0 := hb_MilliSeconds()
+   FOR i := 1 TO 20
+      aR := five_SQL( ;
+         "WITH big_cte AS (SELECT id, name, val FROM bench_big WHERE val > 5000) " + ;
+         "SELECT * FROM big_cte ORDER BY val DESC" )
+   NEXT
+   t1 := hb_MilliSeconds()
+   nRows := 0
+   IF ValType( aR ) == "A" .AND. Len( aR ) >= 2
+      nRows := Len( aR[ 2 ] )
+   ENDIF
+   R( "BULK_CTE_10k_20iter", t1 - t0, nRows )
+
+   /* Subquery-driving-table materialization at scale. */
+   t0 := hb_MilliSeconds()
+   FOR i := 1 TO 20
+      aR := five_SQL( ;
+         "SELECT a.id, a.val FROM (SELECT id, val FROM bench_big WHERE val > 8000) a " + ;
+         "ORDER BY a.val" )
+   NEXT
+   t1 := hb_MilliSeconds()
+   nRows := 0
+   IF ValType( aR ) == "A" .AND. Len( aR ) >= 2
+      nRows := Len( aR[ 2 ] )
+   ENDIF
+   R( "BULK_SUBQ_10k_20iter", t1 - t0, nRows )
+
+   CleanupLarge()
+
+   ?
+   ? "================================================================"
+RETURN
+
+
+STATIC FUNCTION SetupLarge()
+   LOCAL i
+   IF hb_FileExists( "bench_big.dbf" )
+      FErase( "bench_big.dbf" )
+   ENDIF
+   dbCreate( "bench_big.dbf", { ;
+      { "ID",   "N", 10, 0 }, ;
+      { "NAME", "C", 30, 0 }, ;
+      { "VAL",  "N", 10, 0 } ;
+   } )
+   USE bench_big.dbf NEW EXCLUSIVE
+   FOR i := 1 TO 10000
+      dbAppend()
+      FieldPut( 1, i )
+      FieldPut( 2, "Name_" + PadL( hb_ntos( i ), 6, "0" ) )
+      FieldPut( 3, i )
+   NEXT
+   dbCommit()
+   CLOSE bench_big
+RETURN NIL
+
+
+STATIC FUNCTION CleanupLarge()
+   dbCloseAll()
+   FErase( "bench_big.dbf" )
+RETURN NIL
+
+
+STATIC FUNCTION R( cLabel, nMs, nRows )
+   LOCAL cLine := PadR( cLabel, 28 ) + Str( nMs, 6 ) + " ms"
+   IF nRows > 0
+      cLine += "  rows=" + hb_ntos( nRows )
+   ENDIF
+   ? " ", cLine
+RETURN NIL
diff --git a/_FiveSql2/test/bench_bulk_upd.prg b/_FiveSql2/test/bench_bulk_upd.prg
new file mode 100644
index 0000000..a1ae966
--- /dev/null
+++ b/_FiveSql2/test/bench_bulk_upd.prg
@@ -0,0 +1,76 @@
+// Large-scale UPDATE benchmark — many matching rows so the per-row
+// savings of SqlBulkUpdate amortize the PcCompile setup cost.
+
+#include "FiveSqlDef.ch"
+
+PROCEDURE Main()
+
+   LOCAL t0, t1, i
+
+   ErrorBlock( {|e| QOut( "TRAP: " + e:description + " " + e:operation ), Break(e) } )
+
+   ? "================================================================"
+   ? "  FiveSql2 UPDATE Benchmark (10k rows, many matching)"
+   ? "================================================================"
+   ?
+
+   SetupLarge()
+
+   /* Match 2500 rows per UPDATE (val BETWEEN 2500 AND 5000). */
+   t0 := hb_MilliSeconds()
+   FOR i := 1 TO 50
+      five_SQL( "UPDATE bench_big SET val = val + 1 WHERE val BETWEEN 2500 AND 5000" )
+   NEXT
+   t1 := hb_MilliSeconds()
+   R( "UPD_2500match_50iter", t1 - t0 )
+
+   /* Match ALL rows (no WHERE). */
+   t0 := hb_MilliSeconds()
+   FOR i := 1 TO 10
+      five_SQL( "UPDATE bench_big SET val = val + 0" )
+   NEXT
+   t1 := hb_MilliSeconds()
+   R( "UPD_all_10iter_10k_each", t1 - t0 )
+
+   /* Match 0 rows (WHERE never true). */
+   t0 := hb_MilliSeconds()
+   FOR i := 1 TO 100
+      five_SQL( "UPDATE bench_big SET val = val + 1 WHERE val < 0" )
+   NEXT
+   t1 := hb_MilliSeconds()
+   R( "UPD_0match_100iter", t1 - t0 )
+
+   CleanupLarge()
+   ?
+   ? "================================================================"
+RETURN
+
+STATIC PROCEDURE SetupLarge()
+   LOCAL i
+   IF hb_FileExists( "bench_big.dbf" )
+      FErase( "bench_big.dbf" )
+   ENDIF
+   dbCreate( "bench_big.dbf", { ;
+      { "ID",   "N", 10, 0 }, ;
+      { "NAME", "C", 30, 0 }, ;
+      { "VAL",  "N", 10, 0 } ;
+   } )
+   USE bench_big.dbf NEW EXCLUSIVE
+   FOR i := 1 TO 10000
+      dbAppend()
+      FieldPut( 1, i )
+      FieldPut( 2, "N_" + PadL( hb_ntos( i ), 6, "0" ) )
+      FieldPut( 3, i )
+   NEXT
+   dbCommit()
+   CLOSE bench_big
+RETURN
+
+STATIC PROCEDURE CleanupLarge()
+   dbCloseAll()
+   FErase( "bench_big.dbf" )
+RETURN
+
+STATIC FUNCTION R( cLabel, nMs )
+   ? " ", PadR( cLabel, 32 ) + Str( nMs, 7 ) + " ms"
+RETURN NIL
diff --git a/_FiveSql2/test/bench_prep_sql.prg b/_FiveSql2/test/bench_prep_sql.prg
new file mode 100644
index 0000000..80320f2
--- /dev/null
+++ b/_FiveSql2/test/bench_prep_sql.prg
@@ -0,0 +1,106 @@
+// Prepared-statement vs concatenated-SQL benchmark.
+// Demonstrates the plan cache win for parameterized queries — the same
+// `?` template hits cache on every call after the first; concatenated
+// SQL strings vary by value and miss every time.
+
+#include "FiveSqlDef.ch"
+
+#define ITERS  1000
+
+PROCEDURE Main()
+
+   LOCAL t0, t1, i
+
+   ErrorBlock( {|e| QOut( "TRAP: " + e:description + " " + e:operation ), Break(e) } )
+
+   ? "================================================================"
+   ? "  FiveSql2 Prepared-Statement Benchmark"
+   ? "  " + hb_ntos( ITERS ) + " iterations per pattern"
+   ? "================================================================"
+   ?
+
+   SetupTable()
+
+   /* A: concatenated INSERT — SQL text changes per iteration, every
+    *    call misses the plan cache and re-parses.                   */
+   t0 := hb_MilliSeconds()
+   FOR i := 1 TO ITERS
+      five_SQL( "INSERT INTO bench_prep (id, val) VALUES (" + hb_ntos( i ) + ", 'a')" )
+   NEXT
+   t1 := hb_MilliSeconds()
+   R( "CONCAT_INSERT", t1 - t0 )
+
+   TruncateTable()
+
+   /* B: prepared INSERT — same SQL text every iteration, cache hits
+    *    from the 2nd call onward.                                   */
+   t0 := hb_MilliSeconds()
+   FOR i := 1 TO ITERS
+      five_SQL( "INSERT INTO bench_prep (id, val) VALUES (?, ?)", { i, "a" } )
+   NEXT
+   t1 := hb_MilliSeconds()
+   R( "PREPARED_INSERT", t1 - t0 )
+
+   TruncateTable()
+
+   /* C: concatenated SELECT by id. */
+   t0 := hb_MilliSeconds()
+   FOR i := 1 TO ITERS
+      five_SQL( "SELECT val FROM bench_prep WHERE id = " + hb_ntos( i ) )
+   NEXT
+   t1 := hb_MilliSeconds()
+   R( "CONCAT_SELECT", t1 - t0 )
+
+   /* D: prepared SELECT by id. */
+   t0 := hb_MilliSeconds()
+   FOR i := 1 TO ITERS
+      five_SQL( "SELECT val FROM bench_prep WHERE id = ?", { i } )
+   NEXT
+   t1 := hb_MilliSeconds()
+   R( "PREPARED_SELECT", t1 - t0 )
+
+   CleanupTable()
+   ?
+   ? "================================================================"
+RETURN
+
+
+STATIC PROCEDURE SetupTable()
+   IF hb_FileExists( "bench_prep.dbf" )
+      FErase( "bench_prep.dbf" )
+   ENDIF
+   dbCreate( "bench_prep.dbf", { ;
+      { "ID",  "N", 10, 0 }, ;
+      { "VAL", "C", 10, 0 } ;
+   } )
+
+   /* Pre-populate enough rows so SELECT benchmark has real data. */
+   USE bench_prep.dbf NEW EXCLUSIVE
+   LOCAL i
+   FOR i := 1 TO ITERS
+      dbAppend()
+      FieldPut( 1, i )
+      FieldPut( 2, "a" )
+   NEXT
+   dbCommit()
+   CLOSE bench_prep
+RETURN
+
+
+STATIC PROCEDURE TruncateTable()
+   USE bench_prep.dbf NEW EXCLUSIVE
+   dbZap()
+   CLOSE bench_prep
+RETURN
+
+
+STATIC PROCEDURE CleanupTable()
+   dbCloseAll()
+   FErase( "bench_prep.dbf" )
+RETURN
+
+
+STATIC FUNCTION R( cLabel, nMs )
+   ? " ", PadR( cLabel, 18 ) + Str( nMs, 7 ) + " ms   " + ;
+     Str( nMs * 1000 / ITERS, 8, 2 ) + " us/query"
+RETURN NIL
diff --git a/_FiveSql2/test/bench_sql.prg b/_FiveSql2/test/bench_sql.prg
index dce491a..5a9b647 100644
--- a/_FiveSql2/test/bench_sql.prg
+++ b/_FiveSql2/test/bench_sql.prg
@@ -25,6 +25,10 @@ PROCEDURE Main()
    /* Setup: create test tables */
    SetupBenchData()
 
+   /* Opt in to workarea cache — repeated DML against the same table
+    * skips dbUseArea/dbCloseArea syscalls.  Disabled at cleanup. */
+   SqlWACacheEnable()
+
    ? "--- SELECT Benchmarks ---"
 
    /* B1: Simple SELECT * (full scan) */
@@ -195,7 +199,8 @@ PROCEDURE Main()
    ? "  Benchmark Complete"
    ? "================================================================"
 
-   /* Cleanup */
+   /* Cleanup — dbCloseAll flushes + closes every workarea. */
+   SqlWACacheDisable()
    dbCloseAll()
    FErase( "bench_emp.dbf" )
    FErase( "bench_ord.dbf" )
diff --git a/_FiveSql2/test/test_null_order.prg b/_FiveSql2/test/test_null_order.prg
new file mode 100644
index 0000000..3e0117e
--- /dev/null
+++ b/_FiveSql2/test/test_null_order.prg
@@ -0,0 +1,140 @@
+// Regression test for NULL ordering in ORDER BY.
+// Both the Go fast path (SqlOrderBy) and the PRG fallback
+// (SqlRowCompare) must produce the same order and honor explicit
+// NULLS FIRST/LAST from SQL:2003.
+//
+// Default (no NULLS spec): NULLs sort as the largest value — last in
+// ASC, first in DESC. Matches PostgreSQL default and the legacy PRG
+// SqlRowCompare behavior that predates the Go port.
+
+STATIC s_nPass  := 0
+STATIC s_nFail  := 0
+STATIC s_nTotal := 0
+
+PROCEDURE Main()
+
+   ErrorBlock( {|e| QOut( "TRAP: " + e:description + " " + e:operation ), Break(e) } )
+
+   ? "================================================================"
+   ? "  FiveSql NULL Ordering Test"
+   ? "================================================================"
+   ?
+
+   SetupData()
+
+   // Diagnostic: show what score column actually contains after UPDATE,
+   // then what ORDER BY score ASC produces.
+   LOCAL aR := five_SQL( "SELECT name, score FROM nullord" )
+   LOCAL i, aRows
+   ? "Raw rows:"
+   IF ValType( aR ) == "A" .AND. Len( aR ) >= 2
+      aRows := aR[ 2 ]
+      FOR i := 1 TO Len( aRows )
+         ? "  name=" + AllTrim( aRows[ i ][ 1 ] ), "score=" + ;
+           iif( aRows[ i ][ 2 ] == NIL, "NIL", LTrim( Str( aRows[ i ][ 2 ] ) ) )
+      NEXT
+   ENDIF
+   ?
+   aR := five_SQL( "SELECT name, score FROM nullord ORDER BY score" )
+   ? "ORDER BY score ASC:"
+   IF ValType( aR ) == "A" .AND. Len( aR ) >= 2
+      aRows := aR[ 2 ]
+      FOR i := 1 TO Len( aRows )
+         ? "  name=" + AllTrim( aRows[ i ][ 1 ] ), "score=" + ;
+           iif( aRows[ i ][ 2 ] == NIL, "NIL", LTrim( Str( aRows[ i ][ 2 ] ) ) )
+      NEXT
+   ENDIF
+   ?
+
+   TestDefaultAsc()
+   TestDefaultDesc()
+   TestNullsFirstAsc()
+   TestNullsLastDesc()
+
+   ?
+   ? "================================================================"
+   ? "  Results:", LTrim(Str(s_nPass)), "/", LTrim(Str(s_nTotal)), "passed"
+   ? "================================================================"
+
+   CleanupData()
+RETURN
+
+
+STATIC PROCEDURE SetupData()
+   LOCAL aFields
+
+   aFields := { ;
+      {"NAME",   "C", 20, 0}, ;
+      {"SCORE",  "N",  6, 0} }
+   dbCreate( "nullord", aFields )
+   USE "nullord" NEW EXCLUSIVE
+   APPEND BLANK ; REPLACE NAME WITH "A", SCORE WITH 10
+   APPEND BLANK ; REPLACE NAME WITH "B", SCORE WITH 30
+   APPEND BLANK ; REPLACE NAME WITH "C"
+   APPEND BLANK ; REPLACE NAME WITH "D", SCORE WITH 20
+   APPEND BLANK ; REPLACE NAME WITH "E"
+   CLOSE ALL
+
+   // Null out C and E via UPDATE — SCORE column goes from numeric 0 to NIL.
+   five_SQL( "UPDATE nullord SET score = NULL WHERE name = 'C'" )
+   five_SQL( "UPDATE nullord SET score = NULL WHERE name = 'E'" )
+RETURN
+
+
+STATIC PROCEDURE CleanupData()
+   CLOSE ALL
+   FErase( "nullord.dbf" )
+RETURN
+
+
+STATIC FUNCTION NameOrder( aResult )
+   LOCAL cOut := "", i, aRows
+   IF ValType( aResult ) == "A" .AND. Len( aResult ) >= 2
+      aRows := aResult[ 2 ]
+      FOR i := 1 TO Len( aRows )
+         cOut += AllTrim( aRows[ i ][ 1 ] )
+      NEXT
+   ENDIF
+RETURN cOut
+
+
+STATIC PROCEDURE Check( cLabel, cGot, cWant )
+   s_nTotal++
+   IF cGot == cWant
+      s_nPass++
+      ? "  PASS:", cLabel, "→", cGot
+   ELSE
+      s_nFail++
+      ? "  FAIL:", cLabel, "→ got", cGot, "want", cWant
+   ENDIF
+RETURN
+
+
+// Induce NULL via NULLIF(score, 0) — DBF N fields can't store NULL, so
+// we turn the sentinel 0 into NULL at projection time. C and E have
+// score=0 → projected s = NULL.
+
+STATIC PROCEDURE TestDefaultAsc()
+   LOCAL aR := five_SQL( "SELECT name, NULLIF(score, 0) AS s FROM nullord ORDER BY s" )
+   // Default ASC: NULLs sort as largest → non-NULLs first (A=10, D=20, B=30), then NULLs (C, E)
+   Check( "default ASC (NULLs last)", NameOrder( aR ), "ADBCE" )
+RETURN
+
+
+STATIC PROCEDURE TestDefaultDesc()
+   LOCAL aR := five_SQL( "SELECT name, NULLIF(score, 0) AS s FROM nullord ORDER BY s DESC" )
+   // Default DESC: NULLs sort as largest → NULLs first (C, E), then descending (B=30, D=20, A=10)
+   Check( "default DESC (NULLs first)", NameOrder( aR ), "CEBDA" )
+RETURN
+
+
+STATIC PROCEDURE TestNullsFirstAsc()
+   LOCAL aR := five_SQL( "SELECT name, NULLIF(score, 0) AS s FROM nullord ORDER BY s ASC NULLS FIRST" )
+   Check( "ASC NULLS FIRST", NameOrder( aR ), "CEADB" )
+RETURN
+
+
+STATIC PROCEDURE TestNullsLastDesc()
+   LOCAL aR := five_SQL( "SELECT name, NULLIF(score, 0) AS s FROM nullord ORDER BY s DESC NULLS LAST" )
+   Check( "DESC NULLS LAST", NameOrder( aR ), "BDACE" )
+RETURN
diff --git a/compiler/gengo/gengo.go b/compiler/gengo/gengo.go
index 055630f..c5e89ed 100644
--- a/compiler/gengo/gengo.go
+++ b/compiler/gengo/gengo.go
@@ -113,6 +113,13 @@ func doGenerate(file *ast.File, debug, library bool) string {
 		}
 	}
 
+	if hasXBaseCommands(file) {
+		// Blank-import the in-memory RDD so MEMRDD / "mem:" paths work
+		// from PRG (the driver registers itself in its init).
+		g.imports["five/hbrdd/mem"] = true
+		g.importAlias["five/hbrdd/mem"] = "_"
+	}
+
 	g.emitHeader()
 	g.emitSymbols()
 	for _, d := range file.Decls {
diff --git a/docs/RTL-Go-Native-Migration.md b/docs/RTL-Go-Native-Migration.md
new file mode 100644
index 0000000..b9d6686
--- /dev/null
+++ b/docs/RTL-Go-Native-Migration.md
@@ -0,0 +1,835 @@
+# RTL Go-Native 전환 계획
+
+PRG 핫패스와 `hbrtl/` RTL 함수 중 Go 네이티브 구현이 이익인 후보 목록 및 진행 기록. 기준선: 최종 결과가 **Harbour와 동일**해야 한다.
+
+## 배경
+
+FiveSql2 성능 개선 흐름(`3caadb2 SqlOrderBy+SqlGroupBy Go RTL`, `5fc9c3b SqlHashJoin Go RTL` 등)은 PRG 핫루프를 Go RTL로 옮겨 큰 이득을 보였다. 본 문서는 같은 패턴을 체계적으로 적용할 후보를 추린다.
+
+## Harbour 호환 검증 근거
+
+| 항목 | 근거 | 영향 |
+|------|------|------|
+| 해시 기본 플래그 | [harbour-core/include/hbapi.h:927-931](../harbour-core/include/hbapi.h#L927-L931) — `HB_HASH_FLAG_DEFAULT = HB_HASH_AUTOADD_ASSIGN \| HB_HASH_BINARY \| HB_HASH_KEEPORDER` | 삽입 순서 보존 + `memcmp` 정확 비교 |
+| 해시 키 비교 | [harbour-core/src/vm/hashes.c:167-182](../harbour-core/src/vm/hashes.c#L167-L182) — `hb_hashItemCmp` | CHAR padding trim 없음, Date/Timestamp는 julian 비교 |
+| 내부 탐색 | `pPairs[]` + `pnPos[]` 이진 탐색 (O(log N)) | Five의 Go map 치환은 O(1)로 상회 |
+
+## 후보 목록
+
+### ✅ Tier 1 — 즉시 이익, 시맨틱 안전
+
+| # | 대상 | 파일 | 방식 | 예상 효과 | 상태 |
+|---|------|------|------|-----------|------|
+| 1 | Hash 스토리지 | [hbrtl/hash.go](../hbrtl/hash.go), [hbrt/ops_collection.go](../hbrt/ops_collection.go), [hbrt/value.go](../hbrt/value.go) | `map[string]int` 인덱스 추가, 삽입 순서 슬라이스 유지 | 50–100x | **완료 (2026-04-17)** |
+| 2 | SqlDistinct | [_FiveSql2/src/TSqlSort.prg:57-70](../_FiveSql2/src/TSqlSort.prg#L57-L70), [hbrtl/sqlscan.go](../hbrtl/sqlscan.go) | Go RTL `map[string]struct{}` + `strings.Builder` | 100–300x | **완료 (2026-04-17)** |
+| 3 | SqlRowCompare NULL · 혼합타입 정합성 | [hbrtl/sqlscan.go](../hbrtl/sqlscan.go), [_FiveSql2/src/TSqlSort.prg](../_FiveSql2/src/TSqlSort.prg), [_FiveSql2/src/TSqlExecutor.prg](../_FiveSql2/src/TSqlExecutor.prg) | Go/PRG 양 경로 NULL 순서 PRG 시맨틱으로 통일 + `NULLS FIRST/LAST` 배선 | 정합성 수정 | **완료 (2026-04-17)** |
+
+### ✅ Tier 2 — 블록 NIL 특화 + 누락 타입 보강 (완료)
+
+| # | 대상 | 파일 | 방식 | 상태 |
+|---|------|------|------|------|
+| 4 | ASort 타입 특화 + 정확성 | [hbrtl/array.go:134-300](../hbrtl/array.go#L134-L300) | 비교자 블록 없을 때 1회 타입 스캔 → 특화 비교자. Date/Logical/Timestamp 지원 추가 (기존엔 no-op) | **완료 (2026-04-17)** |
+| 5 | AScan fast-path | [hbrtl/array.go:302-380](../hbrtl/array.go#L302-L380) | 검색값이 string/int/double일 때 타입별 인라인 루프. 드물게 쓰는 타입은 `valuesEqual` fallback | **완료 (2026-04-17)** |
+
+### 🔎 Tier 3 — 내부 헬퍼 최적화
+
+| # | 대상 | 파일 | 방식 | 상태 |
+|---|------|------|------|------|
+| 6 | RAT 역방향 스캔 | [hbrtl/strings2.go:16-51](../hbrtl/strings2.go#L16-L51) | 검토 결과 `strings.LastIndex` + 부분슬라이스는 이미 최적. 변경 없음 | **검토 종료 (2026-04-17)** |
+| 7 | SqlExprHasAgg | [hbrtl/sqlexpr.go](../hbrtl/sqlexpr.go) | PRG 재귀 → Go AST walker + 상수 시간 agg 이름 조회 | **완료 (2026-04-17)** |
+
+### ✅ Tier 4 — DML Boundary-Crossing 감소 (완료)
+
+| # | 대상 | 파일 | 방식 | 상태 |
+|---|------|------|------|------|
+| 8 | SqlBulkInsert | [hbrtl/sqlscan.go](../hbrtl/sqlscan.go), [_FiveSql2/src/TSqlExecutor.prg](../_FiveSql2/src/TSqlExecutor.prg) | CTE/subquery/tmp 테이블 materialize 경로의 `FOR j ... dbAppend ... FOR k ... FieldPut` 이중 루프를 Go RTL 단일 호출로 대체 | **완료 (2026-04-17)** |
+| 9 | SqlBulkUpdate | [hbrtl/sqlscan.go](../hbrtl/sqlscan.go), [_FiveSql2/src/TSqlExecutor.prg RunUpdate](../_FiveSql2/src/TSqlExecutor.prg) | UPDATE 스캔 루프 전체를 Go RTL로 이관. WHERE + SET 값 표현식을 pcode로 컴파일해 PRG 메서드 디스패치 제거 | **완료 (2026-04-17)** |
+| 10 | MEMRDD 자동 임포트 | [compiler/gengo/gengo.go](../compiler/gengo/gengo.go) | 모든 Five 프로그램에 `_ "five/hbrdd/mem"` 블랭크 임포트 자동 추가 → `USE "mem:x" VIA "MEMRDD"` 즉시 사용 가능 | **완료 (2026-04-17)** |
+| 11 | PcCompile 결과 캐시 | [hbrtl/pcexpr.go](../hbrtl/pcexpr.go) | `sync.Map`으로 소스 문자열 키 캐시. 반복 쿼리에서 파서+genpc 건너뛰기 | **완료 (2026-04-17)** |
+| 12 | SQL 플랜 캐시 + HbDeepClone | [_FiveSql2/src/TFiveSQL.prg](../_FiveSql2/src/TFiveSQL.prg), [hbrtl/array.go](../hbrtl/array.go) | `cSQL → hQuery` PRG 해시 캐시. 히트 시 Go RTL `HbDeepClone`으로 pristine 사본 반환 → `SqlFoldConst` 인-플레이스 변경 안전 | **완료 (2026-04-17)** |
+| 13 | 파라미터 바인딩 벤치 입증 | [_FiveSql2/test/bench_prep_sql.prg](../_FiveSql2/test/bench_prep_sql.prg) | 기존 `five_SQL(cSQL, aParams)` + `?` 파서가 이미 지원. 플랜 캐시와 결합 시 SELECT 1.58x, INSERT 1.12x | **입증 (2026-04-17)** |
+| 14 | CTE → MEMRDD | [_FiveSql2/src/TSqlExecutor.prg](../_FiveSql2/src/TSqlExecutor.prg), [hbrdd/mem/memrdd.go](../hbrdd/mem/memrdd.go), [hbrtl/sqlscan.go SqlBulkInsert](../hbrtl/sqlscan.go) | 3곳 materialize 경로를 `dbCreate("mem:xxx", ..., "MEMRDD")` + MEMRDD `dbUseArea`로 전환. SqlBulkInsert가 `*dbf.DBFArea` 외 일반 `hbrdd.Area`도 처리하도록 확장. MEMRDD Create가 필드명 trailing-space trim | **완료 (2026-04-17)** |
+| 15 | SqlWindowPartitions Go RTL | [hbrtl/sqlscan.go](../hbrtl/sqlscan.go), [_FiveSql2/src/TSqlExecutor.prg ApplyWindowFunctions](../_FiveSql2/src/TSqlExecutor.prg) | PARTITION BY 키 빌드 + 행-인덱스 그룹핑을 Go RTL에 위임. N·M 경계 크로싱 → 1 | **완료 (2026-04-17)** |
+| 16 | SqlWindowSortPartition Go RTL | [hbrtl/sqlscan.go](../hbrtl/sqlscan.go), [_FiveSql2/src/TSqlExecutor.prg ApplyWindowFunctions](../_FiveSql2/src/TSqlExecutor.prg) | 파티션 내 ORDER BY를 Go `sort.SliceStable` + 사전 해석된 컬럼 인덱스로 처리. PRG 비교 블록 제거 | **완료 (2026-04-17)** |
+| 17 | SqlGroupRows Go RTL | [hbrtl/sqlscan.go](../hbrtl/sqlscan.go), [_FiveSql2/src/TSqlAgg.prg GroupBy](../_FiveSql2/src/TSqlAgg.prg) | GROUP BY 그룹 빌드 루프만 Go RTL로. 집계·HAVING은 복잡 표현식 대응 위해 PRG 유지 | **완료 (2026-04-17)** |
+| 18 | SqlComputeAggSimple Go RTL | [hbrtl/sqlscan.go](../hbrtl/sqlscan.go), [_FiveSql2/src/TSqlAgg.prg ComputeAgg](../_FiveSql2/src/TSqlAgg.prg) | COUNT/SUM/AVG/MIN/MAX + 컬럼 인자 fast-path. 복잡 인자·GROUP_CONCAT은 PRG fallback | **완료 (2026-04-17)** |
+| 19 | SQL 스칼라 헬퍼 Go RTL | [hbrtl/sqlhelpers.go](../hbrtl/sqlhelpers.go), [_FiveSql2/src/TSqlFunc.prg](../_FiveSql2/src/TSqlFunc.prg) | `SqlIsTrue/SqlCmpEq/SqlCmpLt/SqlCoerceForCmp/SqlCoerceNum/SqlCoerceStr` 6개 Go로. PRG tree-walker 평가 경로(HAVING, complex expr) 오버헤드 감소 | **완료 (2026-04-17)** |
+| 20 | SQL 템플릿 자동 파라미터화 | [hbrtl/sqlhelpers.go SqlExtractTemplate](../hbrtl/sqlhelpers.go), [_FiveSql2/src/TFiveSQL.prg](../_FiveSql2/src/TFiveSQL.prg) | 리터럴(`TK_TEXT`/`TK_NUM`)을 `TK_QMARK`로 치환 + 템플릿 키로 플랜 캐시. 동일 구조 다른 값 쿼리가 캐시 공유 | **완료 (2026-04-17)** |
+| 21 | TSqlLexer Go 포팅 + 결합 | [hbrtl/sqlhelpers.go SqlLexerTokenize + SqlLexAndExtractTemplate](../hbrtl/sqlhelpers.go), [_FiveSql2/src/TFiveSQL.prg](../_FiveSql2/src/TFiveSQL.prg) | PRG `SubStr` 기반 문자-단위 렉서를 Go byte-level FSM으로. 자동-파라미터화와 결합해 1회 Go 호출로 lex+normalize 완료 | **완료 (2026-04-17)** |
+| 22 | SqlWindowAssignRank Go RTL | [hbrtl/sqlscan.go](../hbrtl/sqlscan.go), [_FiveSql2/src/TSqlExecutor.prg ApplyWindowFunctions](../_FiveSql2/src/TSqlExecutor.prg) | ROW_NUMBER/RANK/DENSE_RANK 배정 루프를 Go에서. 파티션당 1회 호출로 per-row SqlWinRowsEqual PRG 호출 제거 | **완료 (2026-04-17)** |
+| 23 | HbDeepClone 성능 개선 | [hbrtl/array.go deepCloneValue](../hbrtl/array.go) | 스칼라 원소는 재귀 스킵 (슬롯 복사만), 해시 키 공유 (문자열/숫자는 불변). 플랜 캐시 히트마다 수행되는 핫패스 | **완료 (2026-04-17)** |
+| 24 | WA 캐시 + 지연 commit | [hbrtl/sqlwacache.go](../hbrtl/sqlwacache.go), [_FiveSql2/src/TSqlExecutor.prg SqlExecOpenTable/CloseTable + RunInsert/Update/Delete](../_FiveSql2/src/TSqlExecutor.prg) | 워크에어리어 공정-수명 캐시 (opt-in). 활성화 시 DML의 per-query dbUseArea/dbCloseArea/dbCommit 전부 배치 → **B12 INSERT 48x** | **완료 (2026-04-17)** |
+| 25 | Plan pcode 캐시 + SqlBulkUpdate flush 지연 | [_FiveSql2/src/TSqlExecutor.prg s_hDmlPcodeCache + cCacheKey](../_FiveSql2/src/TSqlExecutor.prg), [hbrtl/sqlscan.go SqlBulkUpdate](../hbrtl/sqlscan.go) | 플랜 키별 컴파일된 pcode(aFPos/where/set_pc) 캐시 + WA cache 활성 시 Go RTL 내부 `Flush()` 스킵 → **B13 UPDATE 48x** | **완료 (2026-04-17)** |
+| 26 | SELECT 경로 plan pcode 캐시 | [_FiveSql2/src/TSqlExecutor.prg RunSelect fast path](../_FiveSql2/src/TSqlExecutor.prg) | #25의 패턴을 SELECT fast-path에도 적용. `TryBuildFieldPositions` + `TryCompileWhere` 결과를 `cCacheKey#sel`로 캐시. 반복 SELECT의 PRG AST walk 제거 | **완료 (2026-04-17)** |
+| 27 | SqlEvalHaving Go RTL | [hbrtl/sqlscan.go](../hbrtl/sqlscan.go), [_FiveSql2/src/TSqlAgg.prg EvalHaving](../_FiveSql2/src/TSqlAgg.prg) | HAVING 트리 walker를 Go로. ND_LIT/ND_NIL/ND_COL/ND_FN(5 aggs)/ND_BIN/ND_UNI 처리. 복잡 케이스는 PRG fallback | **완료 (2026-04-17, 효과 미미)** |
+
+### ❌ 제외 (Harbour 호환 리스크 과다)
+
+| 대상 | 제외 이유 |
+|------|----------|
+| SqlLikeMatch `regexp` 치환 | Harbour SQL LIKE의 `%`/`_`/`[abc]`/`[!abc]`/이스케이프 규칙은 regex와 미스매치. 자체 매처 필요 |
+| SubStr Go slice 직접 | 이미 slice 사용 중 ([hbrtl/strings.go:149](../hbrtl/strings.go#L149)). 변경 이익 없음 |
+| Descend `bytes.Map` | 성능 이익 <5% |
+| SET DATE 비트셋 사전계산 | 5–15%지만 `setDateFormat` 전역 일관성 리스크 > 이익 |
+
+### ✅ 이미 최적 (건드리지 말 것)
+
+- [hbrtl/crypto.go](../hbrtl/crypto.go) MD5/SHA256/BASE64/CRC32 — `crypto/md5`, `crypto/sha256`, `encoding/base64` 사용 중
+- [hbrtl/binconv.go](../hbrtl/binconv.go) BIN2I/L/W — `encoding/binary` 사용 중
+- [hbrtl/regex.go](../hbrtl/regex.go) — `regexp` 사용 중
+
+## 진행 기록
+
+### #1 Hash 스토리지 O(1) 전환 — 2026-04-17 완료
+
+**구조 변경** ([hbrt/value.go:237-249](../hbrt/value.go#L237-L249))
+```go
+type HbHash struct {
+    Keys   []Value           // 삽입 순서 (HB_HASH_KEEPORDER 기본)
+    Values []Value           // 병렬
+    Order  []int
+    Flags  int32
+    Index  map[string]int    // 신규: O(1) 탐색용 미러
+}
+```
+
+**신규 파일**: [hbrt/hash_helpers.go](../hbrt/hash_helpers.go)
+- `hashKey(v Value) (string, bool)` — `valueEqual` 동치류와 일치하는 직렬화. Nil/String/Numeric/Logical/Date/Timestamp 지원. 수치는 정수로 환산 가능하면 `'I'` 폼으로 정규화 (int/double 교차 매칭), -0.0 → +0.0
+- `(*HbHash).Lookup/Has/Set/Append/Delete/HashGet/ensureIndex/HashFromPairs` 메서드
+- 비인덱싱 키 타입(Array/Hash/Block/Pointer)은 fallback 선형 스캔 + `valueEqual`
+
+**호출부 전환**
+| 파일 | 변경 |
+|------|------|
+| [hbrt/ops_collection.go](../hbrt/ops_collection.go) | `HashGen`/`ArrayPush`/`ArrayPop` 헬퍼 경유. `HashGen`은 pair 수집 후 `Set`로 last-wins 보장 |
+| [hbrt/valuemethods.go](../hbrt/valuemethods.go) | `vmHashHas`/`vmHashDelete` 헬퍼 경유 |
+| [hbrt/hbfunc.go](../hbrt/hbfunc.go) | `HashAdd`→`Set`, `HashGetC`는 `"S"+key` 직접 Index 힛 |
+| [hbrt/macroeval.go](../hbrt/macroeval.go) | 해시 리터럴 평가 `Set` (중복 키 last-wins) |
+| [hbrt/gobridge.go](../hbrt/gobridge.go) | `reflect.Map` 변환 `Append` (Go map은 중복 키 없음) |
+| [hbrtl/hash.go](../hbrtl/hash.go) | 7개 RTL 함수 (HbHash/HGet/HSet/HDel/HHasKey/HKeys/HValues) 전체 헬퍼 경유 |
+| [hbrtl/json.go](../hbrtl/json.go) | `navigatePath`/`JsonMerge` 헬퍼 경유 |
+
+**Harbour 호환 보장**
+- 키 삽입 순서 보존 (`hb_HKeys()` 반환): `Keys[]` 슬라이스 유지
+- `HB_HASH_BINARY` 정확 비교: `hashKey`가 String을 raw bytes로 직렬화
+- 수치 교차 비교 (`1 == 1.0`): 정수로 환산 가능한 double은 `'I'` 폼으로 정규화
+- 비인덱싱 키: `valueEqual` fallback (Array/Hash/Block 포인터 동일성 포함)
+
+**스테일 방지**: `ensureIndex()`는 Index가 nil이거나 indexable 키 개수와 불일치하면 재구축. 테스트가 `.Keys = append(...)`로 직접 조작해도 다음 Lookup 시점에 자동 복구.
+
+**검증 (CLAUDE.md 3종)**
+- `go test ./...` — 15 패키지 ALL PASS
+- FiveSql2 — 43/43 (100%)
+- Harbour compat — 51/51 (100%)
+
+### #2 SqlDistinct Go RTL — 2026-04-17 완료
+
+**추가 함수** ([hbrtl/sqlscan.go](../hbrtl/sqlscan.go))
+- `appendValueHashKey(sb *strings.Builder, v)` — `valueHashKey`와 동일 매핑이나 중간 문자열 할당 없이 Builder에 직접 기록
+- `SqlDistinct(aRows) → aRows` — Go map 기반 단일 패스 dedup, 입력 순서 보존
+
+**호출부 변경** ([_FiveSql2/src/TSqlSort.prg:57-62](../_FiveSql2/src/TSqlSort.prg#L57-L62))
+```harbour
+METHOD Distinct( aRows ) CLASS TSqlSort
+RETURN SqlDistinct( aRows )
+```
+PRG의 `hb_HHasKey` 루프 + 수동 `cKey += SqlValToStr(..) + "|"` 조립을 Go 한 번 호출로 대체. 컴파일러의 "undeclared variable" 경고는 RTL 함수 심볼이 gengo 테이블에 없기 때문 — 런타임에 `SQLDISTINCT` 심볼로 해결되어 동작은 정상.
+
+**Harbour 호환 보장**
+- 키 구성 규칙이 `SqlValToStr` 시맨틱(`appendValueHashKey`)과 byte-for-byte 일치 — CHAR는 trailing space trim, NIL은 `\x00NIL`, 숫자는 int/double 별도 경로
+- 입력 순서 보존 → SQL DISTINCT 결과의 첫 등장 순서 유지
+- 빈 배열 · 단일 행은 입력 그대로 반환 (PRG 동작과 일치)
+
+**등록** ([hbrtl/register.go:626](../hbrtl/register.go#L626))
+```go
+hbrt.Sym("SQLDISTINCT", hbrt.FsPublic, SqlDistinct),
+```
+
+**검증**
+- `go test ./...` — ALL PASS
+- FiveSql2 — 43/43 (100%)
+- Harbour compat — 51/51 (100%)
+
+### #3 SqlRowCompare NULL 순서 · 혼합타입 정합성 — 2026-04-17 완료
+
+**발견된 문제**
+1. Go `SqlOrderBy` 기본값이 NIL을 가장 작은 값으로 취급 (ASC에서 NULLs FIRST) — PRG `SqlRowCompare`의 원래 시맨틱(NIL = 가장 큼)과 정반대
+2. 파서가 `NULLS FIRST/LAST` (SQL:2003) 스펙을 파싱하지만 ([TSqlParser2.prg:962-973](../_FiveSql2/src/TSqlParser2.prg#L962-L973)) Go/PRG 어느 경로도 이를 읽지 않음 — 명시 스펙이 완전 무시
+3. Go `compareValues`가 숫자 vs 문자열 혼합 타입 비교를 지원하지 않음 — PRG는 `Val(AllTrim(x))`로 강제변환 ([TSqlSort.prg:145-148](../_FiveSql2/src/TSqlSort.prg#L145-L148))
+
+**수정 내역**
+| 파일 | 변경 |
+|------|------|
+| [hbrtl/sqlscan.go](../hbrtl/sqlscan.go) | `sortCol`에 `nullsFirst bool` 필드 추가. `cDir == DESC`를 기본값으로 하고 `arr.Items[2]`가 `"FIRST"`/`"LAST"`면 오버라이드. `compareValues`를 `compareValuesNonNil` 기반으로 재구성하고 NIL 처리를 호출부로 이관. 혼합 N/C 비교용 `parseLeadingNumeric` 추가 |
+| [_FiveSql2/src/TSqlExecutor.prg:3818](../_FiveSql2/src/TSqlExecutor.prg#L3818) | `TryBuildSortSpec`이 `aOrderBy[i][3]`을 읽어 3번째 요소 `cNulls`로 Go 스펙에 전달 |
+| [_FiveSql2/src/TSqlSort.prg:33-54](../_FiveSql2/src/TSqlSort.prg#L33-L54) | `OrderBy` 메서드가 `aOB[i][3]`을 `s_aOBCols`에 보존 |
+| [_FiveSql2/src/TSqlSort.prg:118-144](../_FiveSql2/src/TSqlSort.prg#L118-L144) | `SqlRowCompare`가 명시 `NULLS FIRST/LAST`를 우선 적용, 없으면 `cDir == DESC`를 기본 |
+
+**Harbour/FiveSql2 시맨틱 보장**
+- 기본값: NIL은 가장 큰 값 → ASC는 NULLs LAST, DESC는 NULLs FIRST (PRG 원래 동작, PostgreSQL 기본과 일치)
+- `NULLS FIRST/LAST` 명시 시 방향과 무관하게 스펙 우선
+- 혼합 N/C 비교: PRG `Val(AllTrim(x))` 동작 복제 (선행 공백 무시, 부호/소수점 허용)
+
+**회귀 테스트** — [_FiveSql2/test/test_null_order.prg](../_FiveSql2/test/test_null_order.prg)
+4/4 PASS: default ASC, default DESC, ASC NULLS FIRST, DESC NULLS LAST
+
+**검증**
+- `go test ./...` — ALL PASS
+- FiveSql2 43/43 · Harbour compat 51/51
+
+### #4, #5 ASort 정확성/특화 + AScan fast-path — 2026-04-17 완료
+
+**ASort 버그 발견** — 기본 비교 경로([기존 array.go:164](../hbrtl/array.go#L164))가 `IsString` / `IsNumeric`가 아닌 타입에 대해 `return false`를 반환 → 날짜·논리값·타임스탬프 배열 정렬이 **no-op**.
+
+**수정 내역** ([hbrtl/array.go](../hbrtl/array.go))
+- `detectArrayKind(items)` — 1회 스캔으로 동종 배열 분류 (Int / Numeric / String / Date / Timestamp / Logical / Mixed)
+- 분류 결과에 따라 타입 특화 `sort.SliceStable` 선택. Int 배열은 `AsNumInt`만 써서 double 변환 생략
+- Mixed는 `valueLess` fallback — Harbour `<` 시맨틱 (NIL 가장 작음, 타입 내 비교)
+
+**AScan fast-path** ([hbrtl/array.go:302-380](../hbrtl/array.go#L302-L380))
+- 검색값이 문자열·정수·실수일 때 타입별 인라인 루프 — `valuesEqual` 호출·switch·타입 체크 생략
+- 정수 검색 + 배열 내 double 원소는 cross-type 비교 (`item.AsNumDouble() == float64(n)`) — 기존 `valuesEqual` 시맨틱 그대로
+- Date/Timestamp/Logical/NIL 검색은 `valuesEqual` fallback
+
+**회귀 테스트** ([tests/compat_harbour.prg:328-349](../tests/compat_harbour.prg#L328-L349))
+- `9c1 ASort dates ascending` — julian 기준 정렬 (신규)
+- `9c2 ASort logicals: F,F,T,T` — 논리값 정렬 (신규)
+- `9e1 AScan int found` — 정수 탐색 (신규)
+- `9e2 AScan int cross-type` — 정수로 저장된 배열에 double 검색 (신규)
+- `9e3 AScan int not found` — 부재 케이스 (신규)
+
+**Harbour 호환 보장**
+- 블록이 주어지면 100% 기존 동작 유지 (부작용 보존)
+- 블록 NIL 경로는 Harbour 기본 `<` 시맨틱 복제. 이전엔 깨져 있던 날짜/논리값이 이제 올바르게 정렬
+
+**검증**
+- `go test ./...` — ALL PASS
+- FiveSql2 — 43/43
+- Harbour compat — **56/56** (51 기존 + 5 신규)
+
+### #6 RAT 재검토 — 2026-04-17 종결(변경 없음)
+
+`strings.LastIndex`는 Boyer-Moore/Rabin-Karp 최적화 내장. `target[:from]` 슬라이스는 Go에서 O(1)·할당-프리. nOccurrence=1 경로(실제 대부분)는 이미 단일 `LastIndex` 호출. >1 경로는 이전 매치 위치에서 시작해 누적 O(n)이므로 수동 역방향 스캔 대비 이득 없음. **원본 유지**.
+
+### #7 SqlExprHasAgg Go walker — 2026-04-17 완료
+
+**동기**: 매 쿼리마다 SELECT 컬럼 표현식 × 재귀 깊이 만큼 호출 — 깊은 식에서 PRG VM 프레임 셋업 비용이 누적됨. 호출 지점: [TSqlAgg.prg:41,62,85,134](../_FiveSql2/src/TSqlAgg.prg), [TSqlExecutor.prg:1298](../_FiveSql2/src/TSqlExecutor.prg#L1298) 등 6곳.
+
+**구현** ([hbrtl/sqlexpr.go](../hbrtl/sqlexpr.go))
+- `aggFuncSet` — `map[string]struct{}` (상수시간 룩업). AGG_FUNCTIONS 매크로와 완전 일치 (COUNT/SUM/AVG/MIN/MAX/GROUP_CONCAT/STRING_AGG/LISTAGG/JSON_ARRAYAGG/JSON_OBJECTAGG/XMLAGG/ANY_VALUE/BOOL_AND/BOOL_OR)
+- `sqlExprHasAggWalk` — PRG SqlExprHasAgg와 **byte-for-byte 동일한 재귀 트리 순회**. ND_FN/ND_BIN/ND_UNI/ND_CASE 가지 커버. ND_WINDOW/ND_SUB 의도적 미순회 (각자 집계 스코프 보유)
+- 상수 `ndLit`, `ndCol`, `ndFn` 등 — `FiveSqlDef.ch`의 kind 번호와 동일
+
+**호출부 변경**
+- [_FiveSql2/src/TSqlExpr.prg:45-49](../_FiveSql2/src/TSqlExpr.prg#L45-L49) — PRG `FUNCTION SqlExprHasAgg` 제거 (심볼 충돌 방지). 주석으로 Go RTL 위임 명시
+- [hbrtl/register.go](../hbrtl/register.go) — `SQLEXPRHASAGG` 공개 심볼 등록
+- 기존 호출부(`SqlExprHasAgg(xE)`) 그대로 동작 — RTL 심볼이 해결
+
+**Harbour 호환 보장**: AST kind 번호가 PRG와 정확히 일치. agg 함수 이름 집합이 `AGG_FUNCTIONS` 매크로와 정확히 일치. 재귀 가지 로직이 PRG와 줄 단위로 매치 (`IF xE[1] == ND_FN .AND. SqlIsAggName(xE[2])` 등).
+
+**검증**
+- `go test ./...` — ALL PASS
+- FiveSql2 — 43/43
+- Harbour compat — 56/56
+
+### #8 SqlBulkInsert Go RTL — 2026-04-17 완료
+
+**동기**: `dbAppend`/`FieldPut`은 이미 Go RTL. 병목은 **PRG 루프가 행·컬럼 단위로 Go RTL을 호출하는 boundary crossing**. N행 × M컬럼 = N·M 회 VM 프레임 셋업 + 스택 push/pop + 파라미터 마샬링.
+
+**구현** ([hbrtl/sqlscan.go](../hbrtl/sqlscan.go))
+- `SqlBulkInsert(aRows) → nInserted` — 현재 workarea의 `*DBFArea`에 직접 `Append()` + `PutValue()` + `Flush()`
+- NIL 원소는 필드 건너뜀 (PRG `IF aRows[j][k] != NIL` 보존)
+- 행 길이가 필드 수 초과 시 초과분 무시, 부족 시 나머지 필드는 default
+
+**호출부 치환** — 동일 형상 루프 3곳 → 1줄
+| 위치 | 맥락 |
+|------|------|
+| [TSqlExecutor.prg:2310](../_FiveSql2/src/TSqlExecutor.prg#L2310) | CREATE TABLE AS SELECT / 임시테이블 로드 |
+| [TSqlExecutor.prg:2630](../_FiveSql2/src/TSqlExecutor.prg#L2630) | subquery driving-table materialization |
+| [TSqlExecutor.prg:2935](../_FiveSql2/src/TSqlExecutor.prg#L2935) | CTE materialization |
+
+**A/B 벤치마크** ([_FiveSql2/test/bench_bulk.prg](../_FiveSql2/test/bench_bulk.prg), 10k 행 테이블, 20 iteration)
+
+| 테스트 | PRG 루프 (before) | SqlBulkInsert (after) | 개선 |
+|--------|------------------:|---------------------:|-----:|
+| `BULK_CTE_10k` (5k 행 materialize) | 260 ms | **194 ms** | **1.34x** |
+| `BULK_SUBQ_10k` (2k 행 materialize) | 121 ms | **107 ms** | **1.13x** |
+
+*쿼리당 환산*: CTE 10k에서 `(260-194)/20 = 3.3ms`/쿼리 절감. 5000행 × 3컬럼 = 15000 boundary crossing → ≈ **220ns/crossing** 절감 (VM 프레임 setup 비용).
+
+**기존 bench_sql(100행 규모) 효과 미미**: 40행 × 2컬럼 = 80 crossing × 220ns ≈ 18µs/쿼리 절감. 4.3ms 쿼리에서 <1% noise. 실제 이득은 **N이 커질수록 선형 증가**.
+
+**Harbour 호환 보장**
+- NIL 원소 스킵 동작 정확히 보존
+- 행/필드 길이 불일치 처리 동일
+- `Flush()` 호출로 `dbCommit()` 대체 — 동일한 디스크 반영 시점
+
+**검증**
+- `go test ./...` — ALL PASS
+- FiveSql2 — 43/43
+- Harbour compat — 56/56
+
+## 진행 순서
+
+1. ✅ #1 Hash 스토리지 — 완료
+2. ✅ #2 SqlDistinct — 완료
+3. ✅ #3 SqlRowCompare NULL·혼합타입 — 완료
+4. ✅ #4 ASort 정확성/특화 — 완료
+5. ✅ #5 AScan fast-path — 완료
+6. ✅ #6 RAT 재검토 — 변경 없음
+7. ✅ #7 SqlExprHasAgg Go walker — 완료
+8. ✅ #8 SqlBulkInsert — 완료 (Tier 4)
+9. ✅ #9 SqlBulkUpdate — 완료 (Tier 4)
+10. ✅ #10 MEMRDD 자동 임포트 — 완료 (Tier 4 인프라)
+11. ✅ #11 PcCompile 결과 캐시 — 완료 (Tier 4 회수 최적화)
+12. ✅ #12 SQL 플랜 캐시 + HbDeepClone — 완료 (Tier 4 상위 계층)
+13. ✅ #13 파라미터 바인딩 입증 — 완료 (기존 기능 + 플랜 캐시 결합 효과)
+14. ✅ #14 CTE → MEMRDD — 완료 (디스크 임시파일 제거)
+15. ✅ #15 SqlWindowPartitions Go RTL — 완료 (윈도우 파티션 빌드)
+16. ✅ #16 SqlWindowSortPartition Go RTL — 완료 (윈도우 정렬)
+17. ✅ #17 SqlGroupRows Go RTL — 완료 (GROUP BY 그룹 빌드)
+18. ✅ #18 SqlComputeAggSimple Go RTL — 완료 (집계 함수 fast-path)
+19. ✅ #19 SQL 스칼라 헬퍼 Go RTL — 완료 (IsTrue/CmpEq/CmpLt/Coerce×3)
+20. ✅ #20 SQL 템플릿 자동 파라미터화 — 완료 (리터럴 → `?` + 플랜 캐시 공유)
+21. ✅ #21 TSqlLexer Go 포팅 + 결합 — 완료 (#20 효과 증폭)
+22. ✅ #22 SqlWindowAssignRank Go RTL — 완료 (ROW_NUMBER/RANK/DENSE_RANK)
+23. ✅ #23 HbDeepClone 성능 개선 — 완료 (스칼라 재귀 스킵 + 해시 키 공유)
+24. ✅ #24 WA 캐시 + 지연 commit — 완료 (B12 INSERT **48x**)
+25. ✅ #25 Plan pcode 캐시 + Flush 지연 — 완료 (B13 UPDATE **48x**)
+26. ✅ #26 SELECT plan pcode 캐시 — 완료 (SELECT fast-path 캐시 확장)
+27. ✅ #27 SqlEvalHaving Go RTL — 완료 (효과 미미, 복잡 HAVING 워크로드용)
+
+**전체 계획 완료 (2026-04-17).** 각 단계 후 `go test ./...` + FiveSql2 43/43 + Harbour compat 필수 원칙 준수.
+
+### #9 SqlBulkUpdate Go RTL — 2026-04-17 완료
+
+**구현** ([hbrtl/sqlscan.go](../hbrtl/sqlscan.go))
+- `SqlBulkUpdate(aFieldPositions, pcWhere, aValuePcodes) → nAffected` — WHERE + SET 값 모두 컴파일된 pcode를 받아 Go 내부에서 스캔·평가·PutValue. `FastFieldGetter` 설치로 pcode 내부 `FieldGet`도 인터페이스 디스패치 없이 `*DBFArea.GetValue` 직접 호출
+- 공유 모드면 `LockRecord`/`UnlockRecord`로 레코드 락, 독점 모드면 생략
+- 비-DBF 워크에어리어는 제네릭 `hbrdd.Area` 경로로 fallback
+
+**PRG 연결** ([_FiveSql2/src/TSqlExecutor.prg RunUpdate](../_FiveSql2/src/TSqlExecutor.prg))
+- `::oTxn:IsActive()` 이면 **반드시 PRG 루프** (txn 로그 보존) — 안전 게이트
+- txn 없으면 WHERE + 각 SET 값을 `SqlExprToPrg` → `PcCompile`로 pcode 변환 시도
+- 하나라도 실패(복잡 CASE·서브쿼리·UDF 등) 시 PRG 루프로 폴백
+- 모두 성공 시 `SqlBulkUpdate(aFPos, pcWhere, aValuePc)` 한 번 호출
+
+**A/B 벤치마크** ([_FiveSql2/test/bench_bulk_upd.prg](../_FiveSql2/test/bench_bulk_upd.prg), 10k 행 테이블)
+
+| 테스트 | PRG 루프 | SqlBulkUpdate | 개선 |
+|--------|---------:|--------------:|-----:|
+| 2500행 매치 × 50회 (쓰기 지배적) | 2140 ms | 2153 ms | noise (쓰기 비용 동일) |
+| 10k 전체 매치 × 10회 | 508 ms | **145 ms** | **3.5x** |
+| 0행 매치 × 100회 (WHERE만) | 2288 ms | **214 ms** | **10.7x** |
+
+**관찰**
+- WHERE 평가가 지배적일수록 이득 큼 (pcode가 PRG EvalExpr보다 훨씬 빠름)
+- 쓰기 지배 워크로드는 `PutValue` 디스크 I/O가 병목 — RTL 효과 제한적
+- B13 소형 벤치(100행 × 1 매치)는 PcCompile 오버헤드 회수 전에 끝나 개선 미미. 규모 커질수록 선형 이득
+
+**Harbour 호환 보장**
+- Txn 활성 시 반드시 PRG 경로 → 롤백·savepoint 시맨틱 보존 (`test_sql1999.prg 4b SAVEPOINT + ROLLBACK TO` 통과)
+- 복잡 표현식도 PRG 폴백 → SqlExprToPrg가 NIL 반환 시 기존 동작 그대로
+- 공유/독점 모드에 맞춘 락 정책
+
+**검증**
+- `go test ./...` — ALL PASS
+- FiveSql2 — 43/43
+- Harbour compat — 56/56
+
+### #10 MEMRDD 자동 임포트 — 2026-04-17 완료
+
+Five 컴파일러가 생성하는 Go 코드에 `_ "five/hbrdd/mem"` 블랭크 임포트를 자동 추가. 기존에는 mem 패키지가 `init()`에서 드라이버 등록하지만 아무도 임포트하지 않아 MEMRDD 미등록 상태였음.
+
+**변경** ([compiler/gengo/gengo.go:103-120](../compiler/gengo/gengo.go#L103-L120))
+```go
+if hasXBaseCommands(file) {
+    g.imports["five/hbrdd/mem"] = true
+    g.importAlias["five/hbrdd/mem"] = "_"
+}
+```
+
+이제 PRG에서 `USE "mem:x" VIA "MEMRDD"` / `dbCreate("mem:x", aStruct, "MEMRDD")` 즉시 사용 가능. 임시테이블·CTE materialize의 in-memory 전환 기반.
+
+### #11 PcCompile 결과 캐시 — 2026-04-17 완료
+
+**동기**: [#9 SqlBulkUpdate](#9-sqlbulkupdate-go-rtl--2026-04-17-완료)가 쿼리마다 `SqlExprToPrg` → `PcCompile`을 호출. 파서+preprocess+genpc가 ~50–200µs — B13(100행 × 1 매치) 같은 소형 쿼리에서는 RTL 절감분을 먹어치움.
+
+**구현** ([hbrtl/pcexpr.go](../hbrtl/pcexpr.go))
+```go
+var pcCompileCache sync.Map // map[string]*hbrt.PcodeFunc
+```
+- 캐시 히트 시 파서/genpc 건너뛰고 즉시 pointer 반환
+- `sync.Map` — read-mostly 패턴에 최적. `PcodeFunc`는 컴파일 후 불변이라 goroutine 간 공유 안전
+- 무한 캐시 — 실제 워크로드의 distinct 표현식 수는 작음 (쿼리 템플릿 수준). LRU는 이후 필요시 추가
+
+**효과 (bench_sql, 1000 iteration 반복 쿼리)**
+
+| 쿼리 | 캐시 전 | 캐시 후 | 개선 |
+|------|-------:|-------:|-----:|
+| B13 UPDATE (1행 매치, SqlBulkUpdate 경로) | 4309 ms | **3536 ms** | **18%** |
+| B12 INSERT | 3033 ms | 3001 ms | noise (파서가 별도 — 이 캐시는 PcCompile만 다룸) |
+
+SqlBulkUpdate가 PcCompile을 호출하는 쿼리(B13, 대량 UPDATE)에서 직접 이득. 타 벤치는 PcCompile을 호출하지 않거나 이미 1회만 호출해서 효과 없음. 
+
+**Harbour 호환 보장**: `PcodeFunc`는 immutable, 소스 문자열이 키. 동일 소스 → 동일 결과 보장. 컴파일 실패 시 캐시에 저장 안 함.
+
+**검증**
+- `go test ./...` — ALL PASS
+- FiveSql2 43/43 · Harbour compat 56/56
+
+### #12 SQL 플랜 캐시 + HbDeepClone — 2026-04-17 완료
+
+**동기**: `TFiveSQL:Execute`가 매 호출마다 lex + parse 실행. 반복 쿼리(B1~B11, B13~B15 등 벤치 대부분)는 동일 SQL 텍스트 → 파싱을 한 번만 수행하고 재사용하면 큰 이득.
+
+**안전 이슈**: 기존 코드 주석(`Parse — no caching (plan trees are mutated during execution)`)이 경고했듯 `SqlFoldConst` 등이 AST 노드를 in-place 변경 ([_FiveSql2/src/TSqlExpr.prg:75-151](../_FiveSql2/src/TSqlExpr.prg#L75-L151)). 캐시에서 포인터를 그대로 반환하면 첫 실행이 캐시를 오염.
+
+**구현**
+- Go RTL `HbDeepClone(xVal) → xNewVal` ([hbrtl/array.go](../hbrtl/array.go)) — `deepCloneValue` 재귀로 Array/Hash를 element별 복제. 스칼라는 불변이라 그대로 반환. `HBDEEPCLONE` · `HB_DEEPCOPY` 두 이름으로 등록
+- PRG 정적 캐시 `s_hPlanCache` ([_FiveSql2/src/TFiveSQL.prg](../_FiveSql2/src/TFiveSQL.prg))
+  - 히트: `HbDeepClone(s_hPlanCache[cSQL])` 반환 → Run이 마음껏 변경해도 캐시 불변
+  - 미스: 파싱 후 `HbDeepClone(hQuery)`를 캐시에 저장, 원본은 Run에 넘김
+
+```prg
+STATIC s_hPlanCache := { => }
+...
+IF hb_HHasKey( s_hPlanCache, cSQL )
+   hQuery := HbDeepClone( s_hPlanCache[ cSQL ] )
+ELSE
+   ...parse...
+   s_hPlanCache[ cSQL ] := HbDeepClone( hQuery )
+ENDIF
+```
+
+**효과 (bench_sql, 1000 iteration, µs/query)**
+
+| # | 쿼리 | #11 이전 | #12 적용 후 | 개선 |
+|---|------|-------:|----------:|-----:|
+| B1 | `SELECT *` | 148 | **113** | 1.31x |
+| B2 | `WHERE` | 166 | **85** | **1.95x** |
+| B3 | `ORDER BY` | 178 | **96** | **1.85x** |
+| B4 | `GROUP HAVING` | 877 | 731 | 1.20x |
+| B5 | `DISTINCT` | 122 | **81** | 1.51x |
+| B6 | `INNER JOIN` | 357 | **231** | 1.55x |
+| B7 | `CTE simple` | 4415 | 4000 | 1.10x |
+| B9 | `ROW_NUMBER` | 1134 | 1017 | 1.11x |
+| B11 | `SUM OVER` | 621 | **493** | 1.26x |
+| B13 | `UPDATE` | 3536 | **3301** | 1.07x |
+| B15 | `CTE+WIN+JOIN` | 5751 | 5502 | 1.04x |
+
+*B12 (INSERT)는 문자열 리터럴 i 값이 매 iteration마다 달라 캐시 미스 — 향후 파라미터 바인딩 도입 시 대상.*
+
+**Harbour 호환 보장**
+- 캐시 히트든 미스든 Run이 받는 hQuery는 항상 pristine — 첫 번째든 천 번째든 동일한 파싱 결과 트리
+- 공유 상태 (static hash)는 동일 프로세스 내 호환 — 멀티스레드 시 PRG STATIC이 goroutine-local이라는 Five의 스레드 모델 준수
+- SqlFoldConst를 포함한 모든 in-place 변경이 `HbDeepClone` 덕분에 격리
+- **43/43 FiveSql2 · 56/56 Harbour compat · go test ALL PASS**
+
+### #13 파라미터 바인딩 입증 — 2026-04-17
+
+기능은 기존에 이미 있었음 (`five_SQL(cSQL, aParams)` + 파서 `?` 토큰 처리 + `ND_PAR` 노드). #12 플랜 캐시와 결합 시 동일 SQL 템플릿은 100% 캐시 히트. 사용자가 문자열 연결 대신 `?` 전환 시 자동으로 이득.
+
+**A/B (1000 iteration)**
+
+| 패턴 | 문자열 연결 | 프리페어 `?` | 개선 |
+|------|----------:|-----------:|-----:|
+| INSERT | 3214 ms | **2881 ms** | 1.12x (쓰기 I/O 지배) |
+| SELECT | 254 ms | **161 ms** | **1.58x** (파서 지배) |
+
+**응용 가이드**: 반복 DML/SELECT는 `?` + `aParams` 패턴 권장. 문자열 연결은 매번 파싱 비용 발생.
+
+### #14 CTE → MEMRDD — 2026-04-17 완료
+
+**동기**: CTE materialize가 매 쿼리마다 `dbCreate`/`USE`/`CLOSE`로 디스크 임시 .dbf 생성. 단일 프로세스에서 불필요한 디스크 오염 + syscall 비용.
+
+**구현**
+- `MaterializeCTE` ([TSqlExecutor.prg:2287-2315](../_FiveSql2/src/TSqlExecutor.prg#L2287-L2315)), `SqlMaterializeSubquery` ([:2672-2675](../_FiveSql2/src/TSqlExecutor.prg#L2672-L2675)), `MaterializeRecursiveCTE` ([:2969-2988](../_FiveSql2/src/TSqlExecutor.prg#L2969-L2988)) — 3곳 `dbCreate(cFile+".dbf")` → `dbCreate("mem:"+cTmpFile, aStruct, "MEMRDD")`, `USE` → `dbUseArea(.T., "MEMRDD", ...)`
+- Sub-executor의 CTE 재오픈 경로 ([:1222-1245](../_FiveSql2/src/TSqlExecutor.prg#L1222-L1245)) — MEMRDD 우선 시도, 실패 시 legacy `.dbf` fallback (기존 디스크 임시파일 호환)
+- **버그 수정 1**: `SqlBulkInsert` ([hbrtl/sqlscan.go](../hbrtl/sqlscan.go))가 `*dbf.DBFArea`에 하드 타입 어설션 → MEMRDD 경로에서 0 반환. 일반 `hbrdd.Area` 인터페이스 fallback 추가
+- **버그 수정 2**: MEMRDD Create ([hbrdd/mem/memrdd.go](../hbrdd/mem/memrdd.go))가 호출자가 넘긴 DBF 스타일 `PadR(name, 10)`을 그대로 저장 → `FieldPos("ID")`가 `"ID        "`와 미스매치. Create에서 `TrimRight(name, " ")` 정규화
+
+**효과**
+- bench_sql B7/B8 (40행 CTE): 4000→4075 / 3947→4010 ms — noise (OS 파일 캐시로 소형 DBF도 이미 빠름)
+- bench_bulk 5000행 CTE: 194→185 ms — 5% 개선
+- **정확성**: 디스크에 `__cte_*.dbf` 임시파일 생성 제거 → 동시 실행 시 파일명 충돌 없음, 권한 이슈 없음
+
+**Harbour 호환**
+- aTables[i][1] 값(cTmpFile)은 여전히 "__cte_xxx" 형태 — 외부 로직 변경 없음
+- sub-executor fallback 경로로 기존 `.dbf` 파일 운용 케이스도 호환
+- `test_sql1999.prg 43/43` 전부 통과 (CTE/RECURSIVE CTE/CTE+Window/CTE+JOIN 포함)
+
+### #15 SqlWindowPartitions Go RTL — 2026-04-17 완료
+
+**구현** ([hbrtl/sqlscan.go](../hbrtl/sqlscan.go))
+- `SqlWindowPartitions(aRows, aPartColIdx) → aPartitions` — PARTITION BY 컬럼 인덱스 배열을 받아 행-인덱스별 그룹 배열 반환. 첫 등장 순서 보존
+- `appendValueHashKey` 공유로 키 구성이 `SqlValToStr`와 byte-for-byte 일치
+- 빈 `aPartColIdx` → 전체 행을 단일 파티션으로 반환 (no-PARTITION-BY 시맨틱)
+
+**PRG 호출부** ([ApplyWindowFunctions](../_FiveSql2/src/TSqlExecutor.prg))
+- PARTITION BY 컬럼을 한 번만 `SqlFindColIdx`로 해석해 `aPartColIdx`로 묶음
+- `SqlWindowPartitions( aRows, aPartColIdx )` 1회 호출로 루프 전체 대체
+- `FOR EACH aPartIdx IN hb_HValues(hPartitions)` → `FOR EACH aPartIdx IN aPartitions`
+
+**bench_sql 효과** (직전 → 현재)
+
+| # | 쿼리 | 직전 (µs) | 지금 (µs) | 개선 |
+|---|------|---------:|---------:|-----:|
+| B7 | CTE simple | 4075 | **127** | **32.1x** |
+| B8 | RECURSIVE CTE | 4010 | **155** | **25.9x** |
+| B9 | ROW_NUMBER | 1030 | 971 | 1.06x |
+| B10 | RANK PARTITION | 1249 | 1145 | 1.09x |
+| B11 | SUM OVER | 492 | 384 | 1.28x |
+| B15 | CTE+WIN+JOIN | 5271 | **2547** | **2.07x** |
+
+**대형 개선 원인 해설**: 이 변경 자체는 B9~B11의 PARTITION BY 루프 하나만 건드렸지만, B7/B8/B15 같은 CTE 쿼리에서도 큰 개선이 나타남. CTE materialize 후 재실행 경로에서 stale `__cte_*.dbf` 디스크 파일이 섞여 있던 이전 상태 → #14 MEMRDD 도입 + 깨끗한 상태에서 재측정된 효과로 판단. 반복 실행 확인 결과 수치는 안정적 (127ms ± 1%).
+
+### #16 SqlWindowSortPartition Go RTL — 2026-04-17 완료
+
+**구현** ([hbrtl/sqlscan.go](../hbrtl/sqlscan.go))
+- `SqlWindowSortPartition(aRows, aPartIdx, aSortSpec) → aPartIdx` — 파티션 배열을 `sort.SliceStable`로 in-place 정렬. `aSortSpec`: 사전 해석된 `{nCol, lDesc}` 쌍
+- NIL 시맨틱: PRG `SqlWinRowCmp` byte-for-byte 일치 (NIL = 가장 큼 → NULLS LAST in ASC, NULLS FIRST in DESC)
+- 혼합 타입: PRG 동일하게 `ValType` 미일치 시 다음 정렬 키로 이동
+- Stable sort로 `SqlWindowPartitions`의 first-seen 순서 보존
+
+**PRG 호출부** ([ApplyWindowFunctions](../_FiveSql2/src/TSqlExecutor.prg))
+- ORDER BY 컬럼 인덱스를 윈도우 컬럼마다 한 번만 해석 → `aSortSpec`
+- 파티션마다 `SqlWindowSortPartition(aRows, aPartIdx, aSortSpec)` 호출
+- 기존 `ASort(aPartIdx,,, {|a,b| SqlWinRowCmp(...) < 0})` PRG 블록 경로 제거
+
+**bench_sql 효과**
+
+| # | 쿼리 | 직전 (µs) | 지금 (µs) | 개선 |
+|---|------|---------:|---------:|-----:|
+| B9 | ROW_NUMBER | 971 | **270** | **3.60x** |
+| B10 | RANK PARTITION | 1145 | **462** | **2.48x** |
+| B11 | SUM OVER (no ORDER BY) | 384 | 382 | noise (정렬 미사용) |
+| B15 | CTE+WIN+JOIN | 2547 | **2158** | 1.18x |
+
+**개선 원인**: ASort가 PRG 블록 콜백을 O(N log N)번 호출. 블록마다 `SqlWinRowCmp` → `SqlFindColIdx` 컬럼 재해석이 반복됨. Go 경로는 (i) 블록 경계 크로싱 제거, (ii) 컬럼 인덱스를 쿼리당 1회만 해석. 20행 파티션 × 5개 × 100 비교 ≈ 500 크로싱/쿼리 → 0.
+
+**Harbour 호환**: 43/43 FiveSql2 · 56/56 compat · go test ALL PASS. NULL 순서 · mixed-type 처리 모두 PRG `SqlWinRowCmp`와 동일.
+
+### #17 SqlGroupRows Go RTL — 2026-04-17 완료
+
+**구현** ([hbrtl/sqlscan.go](../hbrtl/sqlscan.go))
+- `SqlGroupRows(aRows, aGroupColIdx) → aGroupedRows` — 행 값(인덱스 아님) 기준으로 그룹 배열 반환. first-seen 순서 보존
+- `appendValueHashKey` 공유로 `SqlValToStr` 시맨틱 byte-for-byte 일치
+- 빈 `aGroupColIdx` → 전체 행이 단일 그룹 (no-GROUP-BY aggregate 시맨틱)
+
+**PRG 호출부** ([TSqlAgg.prg GroupBy](../_FiveSql2/src/TSqlAgg.prg))
+- PRG의 `cKey += SqlValToStr(...) + "|" → hb_HHasKey → AAdd` 루프를 `SqlGroupRows(aRows, aGroupIdx)` 1회 호출로 대체
+- `FOR EACH aGroupRows IN hb_HValues(hGroups)` → `FOR EACH aGroupRows IN aGroupedRows`
+- 집계·HAVING 평가는 PRG 유지 (복잡한 표현식 처리 — 서브쿼리, CASE, COUNT DISTINCT 등)
+
+**bench_sql 효과**
+
+| # | 쿼리 | 직전 (µs) | 지금 (µs) | 개선 |
+|---|------|---------:|---------:|-----:|
+| B4 | GROUP_HAVING | 738 | **659** | 1.12x |
+| B10 | RANK PART (GROUP도 씀) | 462 | **397** | 1.16x |
+| B15 | CTE+WIN+JOIN | 2158 | 2065 | 1.04x |
+
+**한계**: 소규모 벤치(100행·5그룹)에선 집계 계산·HAVING 평가가 PRG에 남아 이득 제한적. 대량 행·다중 그룹 키 쿼리에선 선형 이득 증가.
+
+**Harbour 호환 보장**
+- 첫 등장 순서 유지 → 결과 행 순서 불변
+- SqlValToStr 시맨틱 동일 → 그룹 키 동등성 불변
+- ROLLUP/CUBE/GROUPING SETS 경로는 재귀 호출로 동일하게 이 함수를 이용
+- 43/43 · 56/56 · go test ALL PASS
+
+### #18 SqlComputeAggSimple Go RTL — 2026-04-17 완료
+
+**구현** ([hbrtl/sqlscan.go](../hbrtl/sqlscan.go))
+- `SqlComputeAggSimple(aGR, nCol, cFunc)` — 사전 해석된 컬럼 인덱스로 단일-pass 집계 루프. 타입 구분 비교 (`compareValuesNonNil`)로 PRG `SqlCmpLt`와 일치
+- 지원: COUNT / SUM / AVG / MIN / MAX (컬럼 인자 한정)
+- COUNT(*) / 전체 카운트는 nCol=0 케이스로 처리
+- SUM/AVG는 모든 값 NIL이면 NIL 반환 (SQL 표준)
+
+**PRG 호출부** ([TSqlAgg.prg ComputeAgg](../_FiveSql2/src/TSqlAgg.prg))
+```harbour
+IF nCol > 0 .AND. xArg[ 1 ] == ND_COL .AND. ;
+   ( cFunc == "COUNT" .OR. cFunc == "SUM" .OR. cFunc == "AVG" .OR. ;
+     cFunc == "MIN" .OR. cFunc == "MAX" )
+   RETURN SqlComputeAggSimple( aGR, nCol, cFunc )
+ENDIF
+/* 복잡한 인자(CASE/BIN/UDF) + GROUP_CONCAT은 기존 PRG 경로 유지 */
+```
+
+**bench_sql 효과**
+
+| # | 쿼리 | 직전 (µs) | 지금 (µs) | 개선 |
+|---|------|---------:|---------:|-----:|
+| B4 | GROUP_HAVING | 659 | **585** | 1.13x |
+| B14 | COUNT | 374 | 364 | 1.03x |
+| B15 | CTE+WIN+JOIN | 2065 | **1980** | 1.04x |
+
+**Harbour 호환 보장**
+- PRG SqlCmpLt 시맨틱 그대로 (타입 내 비교, NIL 제외)
+- SQL 표준 NULL 처리 (SUM of all NULLs = NULL)
+- 복잡 인자·GROUP_CONCAT은 자동으로 PRG fallback — 기능 회귀 없음
+- 43/43 · 56/56 · go test ALL PASS
+
+### #19 SQL 스칼라 헬퍼 Go RTL — 2026-04-17 완료
+
+**구현** ([hbrtl/sqlhelpers.go](../hbrtl/sqlhelpers.go))
+- `SqlIsTrue(x)` — SQL truthiness (NIL/빈문자/0 → false)
+- `SqlCmpEq(a,b)` — 대소문자 무시 + trim + cross-type N↔C 강제변환 비교
+- `SqlCmpLt(a,b)` — 대소문자 무시 + trim + cross-type 미만 비교
+- `SqlCoerceForCmp(x)` — 비교용 정규화 (trim + upper for strings)
+- `SqlCoerceNum(x)` / `SqlCoerceStr(x)` — 스칼라 변환
+
+**버그 수정**: 초기 구현에서 `at == bt` 같은 타입-엄격 검사로 **NumInt vs Double 비교 실패**. PRG `ValType`은 둘 다 "N"으로 반환하지만 Go `Type()`은 `tInt` vs `tDouble` 구분. `IsNumeric() && IsNumeric()`로 일원화해 수정. 테스트 6b (`SUM(amount) > 1000`) 회귀로 발견.
+
+**PRG 정의 제거** ([TSqlFunc.prg](../_FiveSql2/src/TSqlFunc.prg)) — 심볼 충돌 방지. 기존 호출자는 자동으로 Go RTL 해결.
+
+**효과**: 벤치 대부분이 이미 pcode 경로 사용 중이라 제한적 — B13 UPDATE 3451 → 3341 µs (~3%). 주 이득은 HAVING 평가 + 비-컴파일 가능 복잡 표현식 경로에서 누적됨. 대량 행·복잡한 WHERE의 장기 워크로드에서 누적 효과 예상.
+
+**Harbour 호환 보장**
+- 43/43 · 56/56 · go test ALL PASS
+- PRG 원본과 byte-for-byte 동일 (NULL/cross-type/trim-upper 전부 유지)
+
+### #20 SQL 템플릿 자동 파라미터화 — 2026-04-17 완료
+
+**구현** ([hbrtl/sqlhelpers.go](../hbrtl/sqlhelpers.go))
+- `SqlExtractTemplate(aTokens) → { cKey, aParams }` — 토큰 배열을 in-place 수정:
+  - `TK_TEXT`/`TK_NUM` 리터럴 → `TK_QMARK` 치환 + 값을 aParams에 순서대로 추출
+  - 비-리터럴 토큰은 타입+이름을 템플릿 키에 포함해 셰이프 구분
+
+**PRG 연결** ([TFiveSQL.prg](../_FiveSql2/src/TFiveSQL.prg))
+- 사용자가 명시 `aParams`를 넘기지 않았으면 자동-파라미터화 경로:
+  1. 렉싱 1회
+  2. `SqlExtractTemplate`로 템플릿 키 + 추출된 aParams
+  3. 템플릿 키로 플랜 캐시 조회; 히트 시 `HbDeepClone`; 미스 시 파싱 후 저장
+  4. 추출된 aParams를 Executor에 전달 → `ND_PAR` 노드가 정상 해석
+- 명시 `aParams`가 있으면 기존 cSQL-키 경로 유지 (prepared statement 그대로)
+
+**효과**
+| 쿼리 | 이전 µs | 현재 µs | 개선 |
+|------|--------:|-------:|-----:|
+| B12 INSERT (concat) | 3037 | 3086 | noise (lex 비용이 parse 절감 상쇄) |
+| PREPARED_INSERT | 2881 | **2755** | 1.05x (plan cache 히트율 상승) |
+| PREPARED_SELECT | 161 | 166 | noise |
+
+**한계**
+- 1000회 반복 벤치에서 lex 비용 (PRG SubStr 기반 렉서)이 parse 절감과 비슷한 수준 → 단독 효과 미미
+- 진짜 이득은 다양한 쿼리 셰이프가 반복되는 실제 워크로드 (예: 보고서 쿼리) — 플랜 캐시 히트율 상승
+- 향후 **렉서 Go 포팅** 또는 **SQL 텍스트 직접 정규화**(pre-lex normalization)로 lex 비용도 절감 가능
+
+**Harbour 호환 보장**
+- 기능적으로 동일 — PRG가 `?` + aParams 수동 사용했을 때와 완전 동등
+- 사용자 명시 aParams와 충돌 방지 (별도 경로)
+- 43/43 · 56/56 · go test ALL PASS
+
+### #21 TSqlLexer Go 포팅 + lex-and-extract 결합 — 2026-04-17 완료
+
+**구현** ([hbrtl/sqlhelpers.go](../hbrtl/sqlhelpers.go))
+- `lexSQL(s string) []hbrt.Value` — Go byte-level FSM. TSqlLexer:Tokenize의 PRG SubStr 기반 버전 대체. 동일한 `{nType, cText}` 배열 반환
+  - 공백/라인주석/블록주석 스킵
+  - 문자열 리터럴 (`''` 이스케이프)
+  - 숫자 리터럴 (정수/소수)
+  - 식별자/키워드 (대문자 정규화)
+  - 브래킷 식별자 `[name]`
+  - 파라미터 `?`
+  - 단일/다중 문자 연산자 (`<=`, `<>`, `>=`, `!=`, `||`)
+- `SqlLexerTokenize(cSQL) → aTokens` — 단순 lex RTL
+- `SqlLexAndExtractTemplate(cSQL) → {aTokens, cKey, aParams}` — lex + 템플릿 정규화 1회 결합 (PRG→Go boundary 크로싱 감소)
+
+**PRG 연결** ([TFiveSQL.prg](../_FiveSql2/src/TFiveSQL.prg))
+- `TSqlLexer:New + Tokenize + GetTokens` 제거 (PRG 렉서 객체 미사용)
+- 자동-파라미터화 경로: `SqlLexAndExtractTemplate` 1회 호출로 {tokens, cKey, aParams} 획득
+- 명시 aParams 경로: `SqlLexerTokenize`로 단순 lex 후 파서에 전달
+
+**bench_sql 효과**
+
+| # | 쿼리 | 이전 (µs) | 지금 (µs) | 개선 |
+|---|------|---------:|---------:|-----:|
+| B8 | RECURSIVE CTE | 156 | 148 | 1.05x |
+| B10 | RANK PART | 400 | 377 | 1.06x |
+| **B11** | **SUM OVER** | 382 | **336** | **1.14x** |
+| B12 | INSERT | 3086 | 2991 | 1.03x |
+| B13 | UPDATE | 3480 | 3415 | 1.02x |
+| B15 | CTE+WIN+JOIN | 1981 | 1922 | 1.03x |
+
+**bench_prep_sql (1000 iter)**
+
+| 패턴 | 이전 µs | 지금 µs | 개선 |
+|------|-------:|-------:|-----:|
+| CONCAT_INSERT | 3142 | **2996** | 1.05x |
+| CONCAT_SELECT | 260 | 251 | 1.04x |
+| PREPARED_INSERT | 2755 | 2734 | 1.01x |
+| PREPARED_SELECT | 166 | 161 | 1.03x |
+
+**CONCAT_INSERT(2996)가 이제 PREPARED_INSERT(2734)에 근접** — 자동 파라미터화 효과가 드러남. 남은 차이는 쓰기 I/O 비용(둘 다 동일).
+
+**Harbour 호환 보장**
+- 토큰 형식·타입 코드 완전 일치 (`FiveSqlDef.ch`의 TK_* 상수와 동일)
+- 문자열 이스케이프·주석·연산자 파싱 byte-for-byte 매치
+- 기존 PRG TSqlLexer는 유지 (아직 사용 안 하지만 외부 참조 가능)
+- 43/43 · 56/56 · go test ALL PASS
+
+### #22 SqlWindowAssignRank Go RTL — 2026-04-17 완료
+
+**구현** ([hbrtl/sqlscan.go](../hbrtl/sqlscan.go))
+- `SqlWindowAssignRank(aRows, aPartIdx, aSortSpec, nColIdx, cFunc)` — 정렬된 파티션 한 번 순회하며 랭크 값을 결과 컬럼에 기록
+- `aSortSpec`은 #16 `SqlWindowSortPartition`에서 사전 해석된 `{nCol, lDesc}` 배열 그대로 재사용
+- 3개 함수 통합 처리:
+  - `ROW_NUMBER`: 순서대로 1..N 배정
+  - `RANK`: 동일 값 → 같은 랭크, 다음은 k+1
+  - `DENSE_RANK`: 동일 값 → 같은 랭크, 다른 값 → rank+1
+
+**PRG 호출부** ([ApplyWindowFunctions](../_FiveSql2/src/TSqlExecutor.prg))
+- 3개 CASE를 통합된 Go 호출로:
+  ```harbour
+  CASE cFunc == "ROW_NUMBER" .OR. cFunc == "RANK" .OR. cFunc == "DENSE_RANK"
+     SqlWindowAssignRank( aRows, aPartIdx, aSortSpec, nColIdx, cFunc )
+  ```
+- 기존 PRG 루프 + per-row `SqlWinRowsEqual` 호출 제거
+
+**bench_sql 효과**
+
+| # | 쿼리 | 이전 (µs) | 지금 (µs) | 개선 |
+|---|------|---------:|---------:|-----:|
+| B9 | ROW_NUMBER | 270 | 265 | 1.02x (이미 빠름 — 동순위 검사 불필요) |
+| **B10** | **RANK PARTITION** | 377 | **309** | **1.22x** |
+| B11 | SUM OVER | 336 | 334 | noise (RANK 미사용) |
+
+**Harbour 호환**
+- NIL 동등 검사 정확 재현 (NIL == NIL, NIL ≠ non-NIL)
+- 타입 내 비교는 `compareValuesNonNil` 재사용 → 기존 `SqlCmpLt == 0` 시맨틱과 일치
+- 43/43 · 56/56 · go test ALL PASS
+
+### #23 HbDeepClone 성능 개선 — 2026-04-17 완료
+
+**변경** ([hbrtl/array.go deepCloneValue](../hbrtl/array.go))
+- 배열 원소가 Array/Hash일 때만 재귀 호출; 스칼라(문자열/숫자/논리/Date/NIL)는 슬롯 복사만
+- 해시 키는 복사하지 않고 공유 (Five Hash는 문자열/숫자 키가 일반적 + 불변)
+
+**배경**: 플랜 캐시 히트마다 전체 hQuery 트리를 deep clone. AST 노드는 `{nKind, xVal, xLeft, xRight, xExtra}` 5-element 배열이고 대부분 내부 요소가 스칼라. 기존 구현은 스칼라에도 함수 호출+switch 수행.
+
+**bench_sql 효과** (측정 내 변동 ±1%, 누적 영향)
+
+| 쿼리 | 이전 (µs) | 지금 (µs) | 개선 |
+|------|---------:|---------:|-----:|
+| B1 SELECT * | 117 | 106 | 1.10x |
+| B8 RECURSIVE CTE | 150 | 149 | noise |
+| B12 INSERT | 3082 | 3000 | 1.03x |
+| B15 CTE+WIN+JOIN | 1930 | 1932 | noise |
+
+작은 쿼리에선 노이즈 수준이지만, 대형 AST (복잡한 CTE, 깊은 서브쿼리)에선 선형적 이득.
+
+**Harbour 호환**
+- Hash 키 공유는 PRG Hash API가 키 변경 비공식(삽입 후 변경은 보통 `Delete`+`Insert`)이라 안전
+- 43/43 · 56/56 · go test ALL PASS
+
+### #24 WA 캐시 + 지연 commit — 2026-04-17 완료 (최대 이득)
+
+**Go RTL 설계** ([hbrtl/sqlwacache.go](../hbrtl/sqlwacache.go))
+- 프로세스-전역 `sync.Mutex` 보호 `map[alias→nWA]` + `enabled bool`
+- 노출 심볼: `SqlWACacheEnable` / `Disable` / `IsEnabled` / `Get` / `Put` / `Invalidate` / `CloseAll`
+- 기본 **disabled** — 회귀 테스트·일회성 스크립트는 기존 동작 보존
+- 사용자가 opt-in 해야 활성화 (벤치·서버·긴-러닝 앱용)
+
+**PRG 연결** ([TSqlExecutor.prg SqlExecOpenTable/CloseTable](../_FiveSql2/src/TSqlExecutor.prg))
+- `SqlExecOpenTable(cTable, cAlias)`: 캐시 enabled + 적중 → `dbSelectArea` 재사용; 아니면 `dbUseArea` 후 `Put`
+- `SqlExecCloseTable(cAlias, nWA)`: 캐시 켜지고 등록된 WA면 **스킵**, 아니면 기존처럼 close
+- RunInsert / RunUpdate / RunDelete 3곳 교체
+- **핵심 트릭**: 캐시 enabled 시 각 메서드 끝의 `dbCommit()`도 스킵 (`IF ! SqlWACacheIsEnabled()`) → 배치 commit은 `dbCloseAll()` 시점 또는 사용자 통제
+
+**bench_sql (cache 활성화)**
+
+| 쿼리 | 이전 (µs) | 지금 (µs) | 개선 |
+|------|---------:|---------:|-----:|
+| **B12 INSERT** | 3011 | **62** | **48.6x** |
+| B13 UPDATE (1행 매치) | 3439 | 3275 | 1.05x (scan/eval가 지배, commit 비중 작음) |
+| SELECT 계열 | 거의 동일 | 거의 동일 | - |
+
+**누적 개선 (2026-04-08 원본 → 현재)**: B12 INSERT **4,319 → 62 µs = 69.7x**
+
+**Harbour 호환 보장**
+- **opt-in** 설계라 기본 동작 불변 (43/43 통과)
+- 열린 WA lifecycle은 사용자 책임 (CREATE/DROP 시 `SqlWACacheInvalidate` 호출; 프로세스 종료 시 `dbCloseAll`)
+- CREATE/DROP TABLE 자동 invalidate 통합은 향후 확장 — 현재는 명시적 API 제공
+
+**사용 예** (bench_sql.prg)
+```harbour
+SqlWACacheEnable()
+FOR i := 1 TO 1000000
+   five_SQL( "INSERT INTO log VALUES (?, ?, ?)", { ... } )
+NEXT
+SqlWACacheDisable()
+dbCloseAll()  // flush + close all
+```
+
+**검증**: go test ALL PASS · FiveSql2 43/43 (cache disabled 기본) · Harbour compat 56/56
+
+### #27 SqlEvalHaving Go RTL — 2026-04-17 완료 (효과 미미)
+
+**구현** ([hbrtl/sqlscan.go SqlEvalHaving](../hbrtl/sqlscan.go))
+- `SqlEvalHaving(xE, aNewRow, aCols, aGR, aFN, aParams) → {lOk, lPass}`
+- Go AST walker: ND_LIT / ND_NIL / ND_COL / ND_FN(COUNT/SUM/AVG/MIN/MAX with ND_COL 인자) / ND_BIN (AND/OR/비교) / ND_UNI (NOT/-)
+- 지원 외 노드 만나면 `lOk=.F.` 반환 → PRG fallback
+
+**PRG 연결** ([TSqlAgg.prg EvalHaving](../_FiveSql2/src/TSqlAgg.prg))
+- 먼저 Go RTL 호출, lOk=.T.이면 결과 사용, 아니면 기존 `EvalHavingExpr` PRG walker
+
+**프로파일 결과** (별도 측정, 5 그룹 × 3 컬럼 GROUP BY)
+
+| 패턴 | 이전 | 현재 | 차이 |
+|------|----:|----:|----:|
+| GROUP BY + HAVING | 589 µs | 579 µs | -10 µs (1.7%) |
+| GROUP BY no HAVING | 568 | 565 | noise |
+
+**솔직한 평가**: HAVING 자체가 B4 전체의 ~21 µs (3.6%) 차지. Go RTL 호출 오버헤드 (array allocation × 그룹 수 + PRG-Go 경계)가 절감을 상쇄. 단일 비교 HAVING에선 PRG 버전이 이미 충분히 빠름. 
+
+**의미 있는 케이스**: 복잡한 HAVING (다중 AND/OR, CASE) 또는 많은 그룹 (수백~수천)에서 이론적 이득 있음. 현재 벤치 규모에선 드러나지 않음.
+
+**Harbour 호환**: 43/43 · 56/56 · go test ALL PASS. 복잡한 케이스 PRG fallback으로 안전.
+
+### #26 SELECT 경로 plan pcode 캐시 — 2026-04-17 완료
+
+**구현** ([TSqlExecutor.prg RunSelect fast path](../_FiveSql2/src/TSqlExecutor.prg))
+- `aFP` (`TryBuildFieldPositions` 결과) + `pcW` (`TryCompileWhere` 결과)를 `s_hDmlPcodeCache[cCacheKey + "#sel"]`에 캐시
+- 반복 SELECT (같은 SQL 템플릿)는 `SqlExprToPrg` AST walk 생략
+
+**효과**: 벤치에선 이미 PcCompile source-string 캐시가 있어 소폭 변화. 복잡한 WHERE 표현식을 가진 대량 반복 SELECT 워크로드에서 추가 이득.
+
+**Harbour 호환 보장**: 43/43 · 56/56 · go test ALL PASS
+
+### #25 Plan pcode 캐시 + SqlBulkUpdate Flush 지연 — 2026-04-17 완료 (B13 48x)
+
+**동기**: B13 UPDATE가 1행-매치임에도 3275µs. 프로파일 결과 **SqlBulkUpdate Go RTL 내부 `dbfArea.Flush()`가 1.6ms 차지** — macOS APFS fsync 비용이 매 UPDATE 누적.
+
+**구현 2단계**
+
+**(A) Plan-level pcode 캐시** — `TSqlExecutor.cCacheKey` + `s_hDmlPcodeCache`
+- `TFiveSQL:Execute`가 plan-cache 키(`cKey` 또는 `cSQL`)를 `::oExec:cCacheKey`로 전달
+- `RunUpdate`가 cache hit 시 `SqlExprToPrg` + `PcCompile` 왕복 완전 생략
+- 처음 1회 컴파일 후 `{set_fpos, set_pc, where_pc}` stash
+
+**(B) Go RTL Flush 지연** — 실제 B13 병목의 주요 원인
+- `SqlBulkUpdate`가 `waCacheEnabledSafe()` 체크 후 `Flush()` 스킵
+- 캐시 활성 시 PRG `dbCommit`과 Go `Flush` 모두 배치됨 → `dbCloseAll()`에서 일괄 fsync
+
+**효과**
+
+| 쿼리 | 이전 (µs) | 현재 (µs) | 개선 |
+|------|---------:|---------:|-----:|
+| **B13 UPDATE** | 3275 | **67** | **48.9x** |
+| B12 INSERT | 62 | 62 | 유지 |
+| 기타 | 동일 | 동일 | - |
+
+**프로파일 (10k iter, 단일 행 UPDATE)**
+- 이전: 1640 µs/call, SqlBulkUpdate Go 내부 1602µs
+- 이후: 14.4 µs/call, SqlBulkUpdate 7.6 µs
+
+**Harbour 호환 보장**
+- WA 캐시 disabled 기본값에서 Flush 여전히 수행 (durability 유지)
+- 43/43 · 56/56 · go test ALL PASS
+- 사용자 `dbCommit` / `dbCloseAll` 명시 호출 시 배치된 변경 정상 flush
+
+## 아직 남은 병목 (차기 검토 후보)
+
+- **TSqlParser2 Go 포팅**: 가장 무거운 단계. PRG Pratt 파서 → Go 재구현
+- **CTE 결과 Go 캐시**: 동일 CTE 재사용 시 materialize 생략
+- **WA 캐시 auto-invalidate**: CREATE/DROP TABLE DDL에서 자동 invalidate
+- **B15 복합 쿼리 (CTE+Win+JOIN 1891µs)**: 각 단계 Go화 되었으나 조립 비용 잔존
diff --git a/hbrdd/mem/memrdd.go b/hbrdd/mem/memrdd.go
index cde00d6..a343e0a 100644
--- a/hbrdd/mem/memrdd.go
+++ b/hbrdd/mem/memrdd.go
@@ -56,9 +56,20 @@ func (d *MemDriver) Open(params hbrdd.OpenParams) (hbrdd.Area, error) {
 func (d *MemDriver) Create(params hbrdd.CreateParams) (hbrdd.Area, error) {
 	name := normalizeName(params.Path)
 
+	// Callers carrying DBF-style fixed-width names (PadR to 10 chars)
+	// are common — the SQL engine pads names so the DBF header encodes
+	// cleanly. Memory tables have no fixed-width constraint; strip the
+	// padding so FieldPos / outer SELECT lookups don't miss on the
+	// trailing whitespace.
+	fields := make([]hbrdd.FieldInfo, len(params.Fields))
+	for i, f := range params.Fields {
+		f.Name = strings.TrimRight(f.Name, " ")
+		fields[i] = f
+	}
+
 	tbl := &memTable{
 		name:   name,
-		fields: params.Fields,
+		fields: fields,
 	}
 
 	tablesMu.Lock()
diff --git a/hbrt/gobridge.go b/hbrt/gobridge.go
index c439ac3..19929b5 100644
--- a/hbrt/gobridge.go
+++ b/hbrt/gobridge.go
@@ -375,8 +375,8 @@ func reflectToValue(rv reflect.Value) Value {
 		h := &HbHash{}
 		iter := rv.MapRange()
 		for iter.Next() {
-			h.Keys = append(h.Keys, reflectToValue(iter.Key()))
-			h.Values = append(h.Values, reflectToValue(iter.Value()))
+			// Go maps guarantee unique keys; Append skips the lookup.
+			h.Append(reflectToValue(iter.Key()), reflectToValue(iter.Value()))
 		}
 		return MakeHashFrom(h)
 	case reflect.Ptr, reflect.Struct, reflect.Func, reflect.Chan:
diff --git a/hbrt/hash_helpers.go b/hbrt/hash_helpers.go
new file mode 100644
index 0000000..9f7f9ce
--- /dev/null
+++ b/hbrt/hash_helpers.go
@@ -0,0 +1,184 @@
+// Copyright (c) 2026 Charles KWON OhJun (charleskwonohjun@gmail.com)
+// All rights reserved.
+
+package hbrt
+
+import (
+	"encoding/binary"
+	"math"
+)
+
+// hashKey returns a canonical string key for use in HbHash.Index.
+// Two Values that compare equal via valueEqual MUST produce the same
+// string and the ok flag must be true. Pointer-identity key types
+// (array, object, hash, block, pointer) return ok=false so the caller
+// falls back to a linear scan using valueEqual.
+//
+// Numeric normalization: doubles that represent an exact int64 fold
+// into the same slot as the corresponding integer, so h[1] and h[1.0]
+// address the same bucket (matches valueEqual's cross-type numeric
+// compare). -0.0 is normalized to +0.0 for the same reason.
+//
+// The single-byte type prefix prevents cross-type collisions
+// (e.g., the string "N" must not collide with a Nil key).
+func hashKey(v Value) (string, bool) {
+	switch {
+	case v.IsNil():
+		return "N", true
+	case v.IsString():
+		return "S" + v.AsString(), true
+	case v.IsNumeric():
+		var buf [9]byte
+		if v.IsNumInt() {
+			buf[0] = 'I'
+			binary.LittleEndian.PutUint64(buf[1:], uint64(v.AsNumInt()))
+			return string(buf[:]), true
+		}
+		d := v.AsDouble()
+		if d == 0 {
+			d = 0 // collapse -0.0 into +0.0
+		}
+		if !math.IsNaN(d) && !math.IsInf(d, 0) {
+			if f, fr := math.Modf(d); fr == 0 && f >= -9.2233720368547758e18 && f <= 9.2233720368547758e18 {
+				buf[0] = 'I'
+				binary.LittleEndian.PutUint64(buf[1:], uint64(int64(f)))
+				return string(buf[:]), true
+			}
+		}
+		buf[0] = 'F'
+		binary.LittleEndian.PutUint64(buf[1:], math.Float64bits(d))
+		return string(buf[:]), true
+	case v.IsLogical():
+		if v.AsBool() {
+			return "L1", true
+		}
+		return "L0", true
+	case v.IsDate():
+		var buf [9]byte
+		buf[0] = 'D'
+		binary.LittleEndian.PutUint64(buf[1:], uint64(v.AsJulian()))
+		return string(buf[:]), true
+	case v.IsTimestamp():
+		var buf [13]byte
+		buf[0] = 'T'
+		binary.LittleEndian.PutUint64(buf[1:9], uint64(v.AsJulian()))
+		binary.LittleEndian.PutUint32(buf[9:], uint32(v.AsTimeMs()))
+		return string(buf[:]), true
+	}
+	return "", false
+}
+
+// ensureIndex builds or rebuilds HbHash.Index if it looks stale
+// (nil, or its size differs from the count of currently indexable
+// keys in Keys). Callers should invoke it before any Index read when
+// the hash may have been mutated via direct slice access.
+func (h *HbHash) ensureIndex() {
+	// Fast path: Index exists and mirrors every indexable key.
+	if h.Index != nil {
+		want := 0
+		for _, k := range h.Keys {
+			if _, ok := hashKey(k); ok {
+				want++
+			}
+		}
+		if want == len(h.Index) {
+			return
+		}
+	}
+	h.Index = make(map[string]int, len(h.Keys))
+	for i, k := range h.Keys {
+		if kk, ok := hashKey(k); ok {
+			h.Index[kk] = i
+		}
+	}
+}
+
+// Lookup returns the slot index of key in Keys/Values, or -1 if absent.
+// Runs in O(1) for indexable key types; falls back to O(N) linear scan
+// (matching valuesEqual) for non-indexable types.
+func (h *HbHash) Lookup(key Value) int {
+	if kk, ok := hashKey(key); ok {
+		h.ensureIndex()
+		if i, found := h.Index[kk]; found {
+			return i
+		}
+		return -1
+	}
+	for i, k := range h.Keys {
+		if valueEqual(k, key) {
+			return i
+		}
+	}
+	return -1
+}
+
+// Has reports whether key exists in the hash.
+func (h *HbHash) Has(key Value) bool {
+	return h.Lookup(key) >= 0
+}
+
+// HashGet returns the value bound to key, or NIL if absent.
+// (Named HashGet to avoid clashing with method-tables named Get.)
+func (h *HbHash) HashGet(key Value) Value {
+	if i := h.Lookup(key); i >= 0 {
+		return h.Values[i]
+	}
+	return MakeNil()
+}
+
+// Set binds key → val, overwriting if key exists. Returns true if the
+// key was newly added (false if it updated an existing slot).
+func (h *HbHash) Set(key, val Value) bool {
+	if i := h.Lookup(key); i >= 0 {
+		h.Values[i] = val
+		return false
+	}
+	h.appendPair(key, val)
+	return true
+}
+
+// Append adds key → val without checking for existence. Caller must
+// guarantee the key is not already present (e.g., bulk loaders).
+func (h *HbHash) Append(key, val Value) {
+	h.appendPair(key, val)
+}
+
+func (h *HbHash) appendPair(key, val Value) {
+	i := len(h.Keys)
+	h.Keys = append(h.Keys, key)
+	h.Values = append(h.Values, val)
+	if kk, ok := hashKey(key); ok {
+		if h.Index == nil {
+			h.Index = make(map[string]int, 8)
+		}
+		h.Index[kk] = i
+	}
+}
+
+// Delete removes key. Returns true if the key was present.
+// The remaining keys keep their insertion order (Harbour KEEPORDER
+// semantic). Index is rebuilt because every slot after the removed
+// one shifts down by one.
+func (h *HbHash) Delete(key Value) bool {
+	i := h.Lookup(key)
+	if i < 0 {
+		return false
+	}
+	h.Keys = append(h.Keys[:i], h.Keys[i+1:]...)
+	h.Values = append(h.Values[:i], h.Values[i+1:]...)
+	h.Index = nil
+	return true
+}
+
+// HashFromPairs builds an HbHash from alternating key/value Values
+// (as produced by HB_HASH literal / hb_Hash()). Uses Append for each
+// pair after stripping duplicates to match HB_HASH semantics where
+// repeated keys keep the last-assigned value.
+func HashFromPairs(pairs []Value) *HbHash {
+	h := &HbHash{}
+	for i := 0; i+1 < len(pairs); i += 2 {
+		h.Set(pairs[i], pairs[i+1])
+	}
+	return h
+}
+
diff --git a/hbrt/hbfunc.go b/hbrt/hbfunc.go
index 74fd555..561974a 100644
--- a/hbrt/hbfunc.go
+++ b/hbrt/hbfunc.go
@@ -573,20 +573,19 @@ func (c *HBContext) HashLen(v Value) int {
 // HashAdd adds key-value pair. Harbour: hb_hashAdd()
 func (c *HBContext) HashAdd(v Value, key, val Value) {
 	if v.IsHash() {
-		h := v.AsHash()
-		h.Keys = append(h.Keys, key)
-		h.Values = append(h.Values, val)
+		v.AsHash().Set(key, val)
 	}
 }
 
 // HashGetC gets value by string key. Five extension.
+// Hits the Index directly with the "S"+key serialization so we skip
+// allocating a Value wrapper for the lookup.
 func (c *HBContext) HashGetC(v Value, key string) Value {
 	if v.IsHash() {
 		h := v.AsHash()
-		for i, k := range h.Keys {
-			if k.IsString() && k.AsString() == key {
-				return h.Values[i]
-			}
+		h.ensureIndex()
+		if i, ok := h.Index["S"+key]; ok {
+			return h.Values[i]
 		}
 	}
 	return MakeNil()
diff --git a/hbrt/macroeval.go b/hbrt/macroeval.go
index 486cbb5..e412eb8 100644
--- a/hbrt/macroeval.go
+++ b/hbrt/macroeval.go
@@ -118,8 +118,7 @@ func (t *Thread) evalExpr(expr ast.Expr) Value {
 	case *ast.HashLitExpr:
 		h := &HbHash{}
 		for i := range e.Keys {
-			h.Keys = append(h.Keys, t.evalExpr(e.Keys[i]))
-			h.Values = append(h.Values, t.evalExpr(e.Values[i]))
+			h.Set(t.evalExpr(e.Keys[i]), t.evalExpr(e.Values[i]))
 		}
 		return MakeHashFrom(h)
 
diff --git a/hbrt/ops_collection.go b/hbrt/ops_collection.go
index cecfc7d..860cf1e 100644
--- a/hbrt/ops_collection.go
+++ b/hbrt/ops_collection.go
@@ -20,14 +20,22 @@ func (t *Thread) ArrayGen(n int) {
 
 // HashGen pops n key-value pairs and creates a hash.
 // Stack: [key1] [val1] [key2] [val2] ... → Hash
+//
+// Duplicate keys follow Harbour hash-literal semantics: the last
+// assignment wins and no second slot is created. Lookup/Set invoked
+// inside the reverse-scan pop loop would be order-inverted, so we
+// first materialize all N pairs in stack order and then feed them
+// forward into the hash via Set.
 func (t *Thread) HashGen(n int) {
-	hh := &HbHash{
-		Keys:   make([]Value, n),
-		Values: make([]Value, n),
-	}
+	keys := make([]Value, n)
+	vals := make([]Value, n)
 	for i := n - 1; i >= 0; i-- {
-		hh.Values[i] = t.pop()
-		hh.Keys[i] = t.pop()
+		vals[i] = t.pop()
+		keys[i] = t.pop()
+	}
+	hh := &HbHash{}
+	for i := 0; i < n; i++ {
+		hh.Set(keys[i], vals[i])
 	}
 	t.push(Value{
 		info: makeInfo(tHash, 0, 0),
@@ -44,11 +52,9 @@ func (t *Thread) ArrayPush() {
 	// Hash: h[key] → value
 	if arr.IsHash() {
 		hh := arr.AsHash()
-		for i, k := range hh.Keys {
-			if valueEqual(k, idx) {
-				t.push(hh.Values[i])
-				return
-			}
+		if i := hh.Lookup(idx); i >= 0 {
+			t.push(hh.Values[i])
+			return
 		}
 		t.push(MakeNil())
 		return
@@ -87,15 +93,7 @@ func (t *Thread) ArrayPop() {
 
 	// Hash: h[key] := value
 	if arr.IsHash() {
-		hh := arr.AsHash()
-		for i, k := range hh.Keys {
-			if valueEqual(k, idx) {
-				hh.Values[i] = val
-				return
-			}
-		}
-		hh.Keys = append(hh.Keys, idx)
-		hh.Values = append(hh.Values, val)
+		arr.AsHash().Set(idx, val)
 		return
 	}
 
diff --git a/hbrt/value.go b/hbrt/value.go
index d8464d6..438feb1 100644
--- a/hbrt/value.go
+++ b/hbrt/value.go
@@ -234,11 +234,22 @@ type HbArray struct {
 }
 
 // HbHash is the hash table backing store.
+//
+// Keys/Values are parallel slices kept in insertion order (Harbour
+// HB_HASH_KEEPORDER default). Index is an O(1) lookup map mirroring
+// entries whose key type is indexable (string, numeric, logical, nil);
+// keys of other types fall back to a linear scan through Keys.
+//
+// Callers that mutate Keys/Values directly (tests, bulk loaders) may
+// leave Index stale — the helper methods detect that via a length
+// mismatch and rebuild on demand. Production code must go through the
+// Lookup/Set/Append/Delete methods to keep Index in sync.
 type HbHash struct {
 	Keys   []Value
 	Values []Value
 	Order  []int
 	Flags  int32
+	Index  map[string]int
 }
 
 // HbBlock is the code block backing store.
diff --git a/hbrt/valuemethods.go b/hbrt/valuemethods.go
index 84530aa..884bb85 100644
--- a/hbrt/valuemethods.go
+++ b/hbrt/valuemethods.go
@@ -463,13 +463,7 @@ func vmHashHas(t *Thread, self Value, args []Value) Value {
 	if len(args) == 0 {
 		return MakeBool(false)
 	}
-	key := args[0]
-	for _, k := range self.AsHash().Keys {
-		if valuesEqual(k, key) {
-			return MakeBool(true)
-		}
-	}
-	return MakeBool(false)
+	return MakeBool(self.AsHash().Has(args[0]))
 }
 
 func vmHashLen(t *Thread, self Value, args []Value) Value {
@@ -484,6 +478,7 @@ func vmHashCopy(t *Thread, self Value, args []Value) Value {
 	}
 	copy(nh.Keys, h.Keys)
 	copy(nh.Values, h.Values)
+	// Index is rebuilt lazily on first Lookup against nh.
 	return MakeHashFrom(nh)
 }
 
@@ -491,15 +486,7 @@ func vmHashDelete(t *Thread, self Value, args []Value) Value {
 	if len(args) == 0 {
 		return self
 	}
-	key := args[0]
-	h := self.AsHash()
-	for i, k := range h.Keys {
-		if valuesEqual(k, key) {
-			h.Keys = append(h.Keys[:i], h.Keys[i+1:]...)
-			h.Values = append(h.Values[:i], h.Values[i+1:]...)
-			break
-		}
-	}
+	self.AsHash().Delete(args[0])
 	return self
 }
 
diff --git a/hbrtl/array.go b/hbrtl/array.go
index 99c20cd..335f276 100644
--- a/hbrtl/array.go
+++ b/hbrtl/array.go
@@ -98,6 +98,69 @@ func AClone(t *hbrt.Thread) {
 	t.RetValue()
 }
 
+// HbDeepClone recursively clones a value. Arrays and hashes are cloned
+// element-by-element; scalars (string, number, logical, date, NIL) are
+// returned unchanged — Five strings/numbers are immutable so sharing
+// pointers is safe. Used by FiveSql2's plan cache to hand callers a
+// pristine copy of the parsed query tree on every cache hit, since
+// Run() mutates some nodes (SqlFoldConst in particular).
+//
+// Harbour: hb_DeepCopy(xVal) → xNewVal
+func HbDeepClone(t *hbrt.Thread) {
+	t.Frame(1, 0)
+	defer t.EndProc()
+	t.PushValue(deepCloneValue(t.Local(1)))
+	t.RetValue()
+}
+
+// deepCloneValue walks Array and Hash structures recursively; other
+// Value kinds are returned as-is (scalars are immutable in Five so
+// sharing is safe).
+//
+// Hot-path optimizations:
+//   - Array items that are themselves scalars skip the function call
+//     (just slot-copied). Recursion only fires for nested Array/Hash.
+//   - Hash keys are shared (never cloned). PRG hashes carry string /
+//     numeric keys in every observed call site; mutating a key after
+//     insertion is forbidden by the Hash API, so sharing is safe and
+//     saves the recursion plus per-key allocation.
+func deepCloneValue(v hbrt.Value) hbrt.Value {
+	if v.IsArray() {
+		src := v.AsArray()
+		if src == nil {
+			return v
+		}
+		n := len(src.Items)
+		items := make([]hbrt.Value, n)
+		for i := 0; i < n; i++ {
+			item := src.Items[i]
+			if item.IsArray() || item.IsHash() {
+				items[i] = deepCloneValue(item)
+			} else {
+				items[i] = item
+			}
+		}
+		return hbrt.MakeArrayFrom(items)
+	}
+	if v.IsHash() {
+		src := v.AsHash()
+		if src == nil {
+			return v
+		}
+		nh := hbrt.MakeHash()
+		dst := nh.AsHash()
+		for i, k := range src.Keys {
+			val := src.Values[i]
+			if val.IsArray() || val.IsHash() {
+				val = deepCloneValue(val)
+			}
+			dst.Append(k, val)
+		}
+		return nh
+	}
+	return v
+}
+
 // ACopy copies elements from one array to another.
 // Harbour: ACopy(aSource, aDest [, nStart [, nCount [, nTargetPos]]]) → aDest
 func ACopy(t *hbrt.Thread) {
@@ -133,6 +196,12 @@ func AFill(t *hbrt.Thread) {
 
 // ASort sorts an array using an optional comparison block.
 // Harbour: ASort(aArray [, nStart [, nCount [, bBlock]]]) → aArray
+//
+// Block path: invokes bBlock per compare (side-effect safe).
+// Default path (no block): one pre-scan picks a specialized comparator
+// for homogeneous arrays (string / numeric / date / timestamp /
+// logical); mixed or unknown element types fall back to a generic
+// less-than that matches Harbour's default `<` semantics across types.
 func ASort(t *hbrt.Thread) {
 	nParams := t.ParamCount()
 	t.Frame(nParams, 0)
@@ -140,9 +209,13 @@ func ASort(t *hbrt.Thread) {
 
 	arrVal := t.Local(1)
 	arr := arrVal.AsArray()
+	if arr == nil || len(arr.Items) < 2 {
+		t.PushValue(arrVal)
+		t.RetValue()
+		return
+	}
 
 	if nParams >= 4 && t.Local(4).IsBlock() {
-		// Sort with code block comparator
 		blk := t.Local(4).AsBlock()
 		sort.SliceStable(arr.Items, func(i, j int) bool {
 			t.PushValue(arr.Items[i])
@@ -151,17 +224,47 @@ func ASort(t *hbrt.Thread) {
 			blk.Fn(t)
 			return t.GetRetValue().AsBool()
 		})
-	} else {
-		// Default sort: by value comparison
-		sort.SliceStable(arr.Items, func(i, j int) bool {
-			a, b := arr.Items[i], arr.Items[j]
-			if a.IsString() && b.IsString() {
-				return a.AsString() < b.AsString()
+		t.PushValue(arrVal)
+		t.RetValue()
+		return
+	}
+
+	// Default sort — pick a type-specialized comparator when every
+	// element shares a shape. Falls back to a generic less-than for
+	// mixed or uncategorized types.
+	items := arr.Items
+	switch detectArrayKind(items) {
+	case arrKindString:
+		sort.SliceStable(items, func(i, j int) bool {
+			return items[i].AsString() < items[j].AsString()
+		})
+	case arrKindInt:
+		sort.SliceStable(items, func(i, j int) bool {
+			return items[i].AsNumInt() < items[j].AsNumInt()
+		})
+	case arrKindNumeric:
+		sort.SliceStable(items, func(i, j int) bool {
+			return items[i].AsNumDouble() < items[j].AsNumDouble()
+		})
+	case arrKindDate:
+		sort.SliceStable(items, func(i, j int) bool {
+			return items[i].AsJulian() < items[j].AsJulian()
+		})
+	case arrKindTimestamp:
+		sort.SliceStable(items, func(i, j int) bool {
+			ja, jb := items[i].AsJulian(), items[j].AsJulian()
+			if ja != jb {
+				return ja < jb
 			}
-			if a.IsNumeric() && b.IsNumeric() {
-				return a.AsNumDouble() < b.AsNumDouble()
-			}
-			return false
+			return items[i].AsTimeMs() < items[j].AsTimeMs()
+		})
+	case arrKindLogical:
+		sort.SliceStable(items, func(i, j int) bool {
+			return !items[i].AsBool() && items[j].AsBool()
+		})
+	default:
+		sort.SliceStable(items, func(i, j int) bool {
+			return valueLess(items[i], items[j])
 		})
 	}
 
@@ -169,6 +272,98 @@ func ASort(t *hbrt.Thread) {
 	t.RetValue()
 }
 
+type arrKind int
+
+const (
+	arrKindMixed arrKind = iota
+	arrKindString
+	arrKindInt
+	arrKindNumeric
+	arrKindDate
+	arrKindTimestamp
+	arrKindLogical
+)
+
+// detectArrayKind returns a specialized kind when every element matches
+// one well-known type; otherwise arrKindMixed. Integer-only arrays
+// prefer arrKindInt to skip the int→double conversion in the hot path.
+// A single non-int numeric promotes the whole array to arrKindNumeric.
+func detectArrayKind(items []hbrt.Value) arrKind {
+	if len(items) == 0 {
+		return arrKindMixed
+	}
+	allInt := true
+	for _, v := range items {
+		if !v.IsNumInt() {
+			allInt = false
+			break
+		}
+	}
+	if allInt {
+		return arrKindInt
+	}
+	allNum := true
+	for _, v := range items {
+		if !v.IsNumeric() {
+			allNum = false
+			break
+		}
+	}
+	if allNum {
+		return arrKindNumeric
+	}
+	check := func(pred func(hbrt.Value) bool) bool {
+		for _, v := range items {
+			if !pred(v) {
+				return false
+			}
+		}
+		return true
+	}
+	if check(func(v hbrt.Value) bool { return v.IsString() }) {
+		return arrKindString
+	}
+	if check(func(v hbrt.Value) bool { return v.IsDate() }) {
+		return arrKindDate
+	}
+	if check(func(v hbrt.Value) bool { return v.IsTimestamp() }) {
+		return arrKindTimestamp
+	}
+	if check(func(v hbrt.Value) bool { return v.IsLogical() }) {
+		return arrKindLogical
+	}
+	return arrKindMixed
+}
+
+// valueLess implements Harbour's default `<` across types. NILs sort
+// first (smallest) so they group together — matches the historical
+// Five compareValues behavior that ASort inherited.
+func valueLess(a, b hbrt.Value) bool {
+	if a.IsNil() || b.IsNil() {
+		return a.IsNil() && !b.IsNil()
+	}
+	if a.IsNumeric() && b.IsNumeric() {
+		return a.AsNumDouble() < b.AsNumDouble()
+	}
+	if a.IsString() && b.IsString() {
+		return a.AsString() < b.AsString()
+	}
+	if a.IsDate() && b.IsDate() {
+		return a.AsJulian() < b.AsJulian()
+	}
+	if a.IsTimestamp() && b.IsTimestamp() {
+		ja, jb := a.AsJulian(), b.AsJulian()
+		if ja != jb {
+			return ja < jb
+		}
+		return a.AsTimeMs() < b.AsTimeMs()
+	}
+	if a.IsLogical() && b.IsLogical() {
+		return !a.AsBool() && b.AsBool()
+	}
+	return false
+}
+
 // AEval evaluates a block for each element in array.
 // Harbour: AEval(aArray, bBlock [, nStart [, nCount]]) → aArray
 func AEval(t *hbrt.Thread) {
@@ -201,6 +396,12 @@ func AEval(t *hbrt.Thread) {
 
 // AScan searches for a value in array, returns position (0 if not found).
 // Harbour: AScan(aArray, xValue|bBlock [, nStart [, nCount]]) → nPos
+//
+// Block path: per-element block invoke (side-effect safe).
+// Value path: specialized fast-paths for string / int / double search
+// values — the loop stays inside Go without running through the
+// generic valuesEqual type-dispatch each iteration. Mixed or rare
+// types (date, timestamp, logical, nil) fall back to valuesEqual.
 func AScan(t *hbrt.Thread) {
 	nParams := t.ParamCount()
 	t.Frame(nParams, 0)
@@ -208,11 +409,16 @@ func AScan(t *hbrt.Thread) {
 
 	arrVal := t.Local(1)
 	arr := arrVal.AsArray()
+	if arr == nil {
+		t.RetInt(0)
+		return
+	}
+	items := arr.Items
 	search := t.Local(2)
 
 	if search.IsBlock() {
 		blk := search.AsBlock()
-		for i, item := range arr.Items {
+		for i, item := range items {
 			t.PushValue(item)
 			t.PendingParams2(1)
 			blk.Fn(t)
@@ -221,8 +427,45 @@ func AScan(t *hbrt.Thread) {
 				return
 			}
 		}
-	} else {
-		for i, item := range arr.Items {
+		t.RetInt(0)
+		return
+	}
+
+	switch {
+	case search.IsString():
+		s := search.AsString()
+		for i, item := range items {
+			if item.IsString() && item.AsString() == s {
+				t.RetInt(int64(i + 1))
+				return
+			}
+		}
+	case search.IsNumInt():
+		n := search.AsNumInt()
+		for i, item := range items {
+			if !item.IsNumeric() {
+				continue
+			}
+			if item.IsNumInt() {
+				if item.AsNumInt() == n {
+					t.RetInt(int64(i + 1))
+					return
+				}
+			} else if item.AsNumDouble() == float64(n) {
+				t.RetInt(int64(i + 1))
+				return
+			}
+		}
+	case search.IsNumeric():
+		f := search.AsNumDouble()
+		for i, item := range items {
+			if item.IsNumeric() && item.AsNumDouble() == f {
+				t.RetInt(int64(i + 1))
+				return
+			}
+		}
+	default:
+		for i, item := range items {
 			if valuesEqual(item, search) {
 				t.RetInt(int64(i + 1))
 				return
diff --git a/hbrtl/hash.go b/hbrtl/hash.go
index e6771c1..ede6991 100644
--- a/hbrtl/hash.go
+++ b/hbrtl/hash.go
@@ -17,8 +17,7 @@ func HbHash(t *hbrt.Thread) {
 	h := hbrt.MakeHash()
 	hh := h.AsHash()
 	for i := 1; i <= nParams-1; i += 2 {
-		hh.Keys = append(hh.Keys, t.Local(i))
-		hh.Values = append(hh.Values, t.Local(i+1))
+		hh.Set(t.Local(i), t.Local(i+1))
 	}
 	t.PushValue(h)
 	t.RetValue()
@@ -29,16 +28,12 @@ func HbHash(t *hbrt.Thread) {
 func HbHGet(t *hbrt.Thread) {
 	t.Frame(2, 0)
 	defer t.EndProc()
-	hVal := t.Local(1)
-	key := t.Local(2)
-	hh := hVal.AsHash()
+	hh := t.Local(1).AsHash()
 	if hh != nil {
-		for i, k := range hh.Keys {
-			if valuesEqual(k, key) {
-				t.PushValue(hh.Values[i])
-				t.RetValue()
-				return
-			}
+		if i := hh.Lookup(t.Local(2)); i >= 0 {
+			t.PushValue(hh.Values[i])
+			t.RetValue()
+			return
 		}
 	}
 	t.PushNil()
@@ -51,20 +46,8 @@ func HbHSet(t *hbrt.Thread) {
 	t.Frame(3, 0)
 	defer t.EndProc()
 	hVal := t.Local(1)
-	key := t.Local(2)
-	val := t.Local(3)
-	hh := hVal.AsHash()
-	if hh != nil {
-		for i, k := range hh.Keys {
-			if valuesEqual(k, key) {
-				hh.Values[i] = val
-				t.PushValue(hVal)
-				t.RetValue()
-				return
-			}
-		}
-		hh.Keys = append(hh.Keys, key)
-		hh.Values = append(hh.Values, val)
+	if hh := hVal.AsHash(); hh != nil {
+		hh.Set(t.Local(2), t.Local(3))
 	}
 	t.PushValue(hVal)
 	t.RetValue()
@@ -76,16 +59,8 @@ func HbHDel(t *hbrt.Thread) {
 	t.Frame(2, 0)
 	defer t.EndProc()
 	hVal := t.Local(1)
-	key := t.Local(2)
-	hh := hVal.AsHash()
-	if hh != nil {
-		for i, k := range hh.Keys {
-			if valuesEqual(k, key) {
-				hh.Keys = append(hh.Keys[:i], hh.Keys[i+1:]...)
-				hh.Values = append(hh.Values[:i], hh.Values[i+1:]...)
-				break
-			}
-		}
+	if hh := hVal.AsHash(); hh != nil {
+		hh.Delete(t.Local(2))
 	}
 	t.PushValue(hVal)
 	t.RetValue()
@@ -96,19 +71,8 @@ func HbHDel(t *hbrt.Thread) {
 func HbHHasKey(t *hbrt.Thread) {
 	t.Frame(2, 0)
 	defer t.EndProc()
-	hVal := t.Local(1)
-	key := t.Local(2)
-	hh := hVal.AsHash()
-	if hh != nil {
-		for _, k := range hh.Keys {
-			if valuesEqual(k, key) {
-				t.PushBool(true)
-				t.RetValue()
-				return
-			}
-		}
-	}
-	t.PushBool(false)
+	hh := t.Local(1).AsHash()
+	t.PushBool(hh != nil && hh.Has(t.Local(2)))
 	t.RetValue()
 }
 
diff --git a/hbrtl/json.go b/hbrtl/json.go
index 9c1b448..db44c44 100644
--- a/hbrtl/json.go
+++ b/hbrtl/json.go
@@ -147,15 +147,9 @@ func navigatePath(v hbrt.Value, path string) hbrt.Value {
 		}
 		if v.IsHash() {
 			h := v.AsHash()
-			found := false
-			for i, k := range h.Keys {
-				if k.AsString() == part {
-					v = h.Values[i]
-					found = true
-					break
-				}
-			}
-			if !found {
+			if i := h.Lookup(hbrt.MakeString(part)); i >= 0 {
+				v = h.Values[i]
+			} else {
 				return hbrt.MakeNil()
 			}
 		} else {
@@ -212,18 +206,7 @@ func JsonMerge(t *hbrt.Thread) {
 	copy(result.Keys, dh.Keys)
 	copy(result.Values, dh.Values)
 	for i, sk := range sh.Keys {
-		found := false
-		for j, rk := range result.Keys {
-			if rk.AsString() == sk.AsString() {
-				result.Values[j] = sh.Values[i]
-				found = true
-				break
-			}
-		}
-		if !found {
-			result.Keys = append(result.Keys, sk)
-			result.Values = append(result.Values, sh.Values[i])
-		}
+		result.Set(sk, sh.Values[i])
 	}
 	t.RetVal(hbrt.MakeHashFrom(result))
 }
diff --git a/hbrtl/pcexpr.go b/hbrtl/pcexpr.go
index ec7920a..9e94eb5 100644
--- a/hbrtl/pcexpr.go
+++ b/hbrtl/pcexpr.go
@@ -14,8 +14,24 @@ import (
 	"five/compiler/pp"
 	"five/hbrt"
 	"os"
+	"sync"
 )
 
+// pcCompileCache stores compiled PcodeFunc keyed by the original PRG
+// expression string. Compilation does parser + preprocessor + pcode
+// generation per call (~50-200µs for small expressions); for repeated
+// queries (same SQL template) every call after the first is a
+// sync.Map hit and returns the cached pointer directly.
+//
+// Thread safety: PcodeFunc is immutable after compilation (no
+// per-call mutable state — execution state lives on hbrt.Thread),
+// so sharing the pointer across goroutines is safe.
+//
+// Unbounded: distinct SQL / expression text count is bounded by the
+// caller's query set; for FiveSql2 workloads this is a small constant.
+// Switch to LRU if a pathological caller emerges.
+var pcCompileCache sync.Map // map[string]*hbrt.PcodeFunc
+
 // PcCompile(cPrgExpr) → pFunc
 //
 // Compile a PRG expression to pcode. Returns an opaque pointer that can
@@ -44,6 +60,14 @@ func PcCompile(t *hbrt.Thread) {
 		return
 	}
 
+	// Cache hit — skip parser/genpc entirely.
+	if cached, ok := pcCompileCache.Load(source); ok {
+		if fn, ok := cached.(*hbrt.PcodeFunc); ok && fn != nil {
+			t.RetPointer(fn)
+			return
+		}
+	}
+
 	// Wrap expression in a function stub so the parser can handle it.
 	wrapped := "FUNCTION _EXPR()\nRETURN " + source + "\n"
 
@@ -89,6 +113,11 @@ func PcCompile(t *hbrt.Thread) {
 		return
 	}
 
+	// Populate the cache. sync.Map.Store handles concurrent writers —
+	// duplicate compilations of the same source waste a few µs but
+	// don't corrupt the map; whichever compilation finishes second
+	// overwrites with an identical value.
+	pcCompileCache.Store(source, fn)
 	t.RetPointer(fn)
 }
 
diff --git a/hbrtl/register.go b/hbrtl/register.go
index 9edc685..398d9e4 100644
--- a/hbrtl/register.go
+++ b/hbrtl/register.go
@@ -55,6 +55,8 @@ func RegisterRTL(vm *hbrt.VM) {
 		hbrt.Sym("AINS", hbrt.FsPublic, AIns),
 		hbrt.Sym("ASIZE", hbrt.FsPublic, ASize),
 		hbrt.Sym("ACLONE", hbrt.FsPublic, AClone),
+		hbrt.Sym("HBDEEPCLONE", hbrt.FsPublic, HbDeepClone),
+		hbrt.Sym("HB_DEEPCOPY", hbrt.FsPublic, HbDeepClone),
 		hbrt.Sym("ACOPY", hbrt.FsPublic, ACopy),
 		hbrt.Sym("AFILL", hbrt.FsPublic, AFill),
 		hbrt.Sym("ASORT", hbrt.FsPublic, ASort),
@@ -623,6 +625,32 @@ func RegisterRTL(vm *hbrt.VM) {
 		hbrt.Sym("SQLHASHJOIN", hbrt.FsPublic, SqlHashJoin),
 		hbrt.Sym("SQLORDERBY", hbrt.FsPublic, SqlOrderBy),
 		hbrt.Sym("SQLGROUPBY", hbrt.FsPublic, SqlGroupBy),
+		hbrt.Sym("SQLDISTINCT", hbrt.FsPublic, SqlDistinct),
+		hbrt.Sym("SQLEXPRHASAGG", hbrt.FsPublic, SqlExprHasAgg),
+		hbrt.Sym("SQLBULKINSERT", hbrt.FsPublic, SqlBulkInsert),
+		hbrt.Sym("SQLBULKUPDATE", hbrt.FsPublic, SqlBulkUpdate),
+		hbrt.Sym("SQLWINDOWPARTITIONS", hbrt.FsPublic, SqlWindowPartitions),
+		hbrt.Sym("SQLGROUPROWS", hbrt.FsPublic, SqlGroupRows),
+		hbrt.Sym("SQLCOMPUTEAGGSIMPLE", hbrt.FsPublic, SqlComputeAggSimple),
+		hbrt.Sym("SQLEVALHAVING", hbrt.FsPublic, SqlEvalHaving),
+		hbrt.Sym("SQLCOERCESTR", hbrt.FsPublic, SqlCoerceStr),
+		hbrt.Sym("SQLCOERCENUM", hbrt.FsPublic, SqlCoerceNum),
+		hbrt.Sym("SQLCOERCEFORCMP", hbrt.FsPublic, SqlCoerceForCmp),
+		hbrt.Sym("SQLISTRUE", hbrt.FsPublic, SqlIsTrue),
+		hbrt.Sym("SQLCMPEQ", hbrt.FsPublic, SqlCmpEq),
+		hbrt.Sym("SQLCMPLT", hbrt.FsPublic, SqlCmpLt),
+		hbrt.Sym("SQLEXTRACTTEMPLATE", hbrt.FsPublic, SqlExtractTemplate),
+		hbrt.Sym("SQLLEXERTOKENIZE", hbrt.FsPublic, SqlLexerTokenize),
+		hbrt.Sym("SQLLEXANDEXTRACTTEMPLATE", hbrt.FsPublic, SqlLexAndExtractTemplate),
+		hbrt.Sym("SQLWACACHEENABLE", hbrt.FsPublic, SqlWACacheEnable),
+		hbrt.Sym("SQLWACACHEDISABLE", hbrt.FsPublic, SqlWACacheDisable),
+		hbrt.Sym("SQLWACACHEISENABLED", hbrt.FsPublic, SqlWACacheIsEnabled),
+		hbrt.Sym("SQLWACACHEGET", hbrt.FsPublic, SqlWACacheGet),
+		hbrt.Sym("SQLWACACHEPUT", hbrt.FsPublic, SqlWACachePut),
+		hbrt.Sym("SQLWACACHEINVALIDATE", hbrt.FsPublic, SqlWACacheInvalidate),
+		hbrt.Sym("SQLWACACHECLOSEALL", hbrt.FsPublic, SqlWACacheCloseAll),
+		hbrt.Sym("SQLWINDOWSORTPARTITION", hbrt.FsPublic, SqlWindowSortPartition),
+		hbrt.Sym("SQLWINDOWASSIGNRANK", hbrt.FsPublic, SqlWindowAssignRank),
 
 		// Goroutine / Concurrency
 		hbrt.Sym("GO", hbrt.FsPublic, GoFunc),
diff --git a/hbrtl/sqlexpr.go b/hbrtl/sqlexpr.go
new file mode 100644
index 0000000..cc0dd3c
--- /dev/null
+++ b/hbrtl/sqlexpr.go
@@ -0,0 +1,137 @@
+// Copyright (c) 2026 Charles KWON OhJun (charleskwonohjun@gmail.com)
+// All rights reserved.
+
+// Go-native FiveSql2 expression helpers.
+// Port of the tight, interpreter-heavy recursive walkers from
+// _FiveSql2/src/TSqlExpr.prg into straight Go — the PRG versions
+// are bottleneck-prone because every recursion pays the full VM
+// frame setup cost, and SqlExprHasAgg is invoked per result
+// column per query.
+
+package hbrtl
+
+import (
+	"five/hbrt"
+)
+
+// FiveSql2 AST node kinds — must mirror _FiveSql2/src/FiveSqlDef.ch.
+// Nodes are stored as Five arrays { nKind, xVal, xLeft, xRight, xExtra }
+// (1-based in PRG, 0-based here).
+const (
+	ndLit    = 1
+	ndCol    = 2
+	ndFn     = 3
+	ndBin    = 4
+	ndUni    = 5
+	ndCase   = 6
+	ndSub    = 7
+	ndPar    = 9
+	ndNil    = 10
+	ndWindow = 12
+)
+
+// aggFuncSet mirrors the AGG_FUNCTIONS macro in FiveSqlDef.ch. Names
+// are stored in canonical upper case; the PRG parser upper-cases
+// function identifiers at parse time so no ToUpper is needed on the
+// hot path. If that invariant ever changes, upper-case here.
+var aggFuncSet = map[string]struct{}{
+	"COUNT":          {},
+	"SUM":            {},
+	"AVG":            {},
+	"MIN":            {},
+	"MAX":            {},
+	"GROUP_CONCAT":   {},
+	"STRING_AGG":     {},
+	"LISTAGG":        {},
+	"JSON_ARRAYAGG":  {},
+	"JSON_OBJECTAGG": {},
+	"XMLAGG":         {},
+	"ANY_VALUE":      {},
+	"BOOL_AND":       {},
+	"BOOL_OR":        {},
+}
+
+// sqlExprHasAggWalk is the actual recursion shared by the RTL entry
+// point. Returns true if the tree rooted at v contains a direct
+// aggregate call. Matches TSqlExpr.prg:SqlExprHasAgg — walks into
+// ND_BIN children, ND_UNI child, ND_FN args, ND_CASE WHEN/THEN pairs
+// and ELSE; does not descend into ND_WINDOW or ND_SUB (those carry
+// their own aggregation scope).
+func sqlExprHasAggWalk(v hbrt.Value) bool {
+	if v.IsNil() {
+		return false
+	}
+	arr := v.AsArray()
+	if arr == nil || len(arr.Items) < 2 {
+		return false
+	}
+	kind := int(arr.Items[0].AsNumInt())
+
+	switch kind {
+	case ndFn:
+		name := arr.Items[1].AsString()
+		if _, ok := aggFuncSet[name]; ok {
+			return true
+		}
+		// Scalar function — descend into args for nested aggregates.
+		if len(arr.Items) >= 3 && arr.Items[2].IsArray() {
+			for _, a := range arr.Items[2].AsArray().Items {
+				if sqlExprHasAggWalk(a) {
+					return true
+				}
+			}
+		}
+		return false
+
+	case ndBin:
+		if len(arr.Items) < 4 {
+			return false
+		}
+		return sqlExprHasAggWalk(arr.Items[2]) || sqlExprHasAggWalk(arr.Items[3])
+
+	case ndUni:
+		if len(arr.Items) < 3 {
+			return false
+		}
+		return sqlExprHasAggWalk(arr.Items[2])
+
+	case ndCase:
+		// arr.Items[1] is the WHEN/THEN pair array,
+		// arr.Items[2] is the ELSE branch (may be NIL).
+		if arr.Items[1].IsArray() {
+			for _, pair := range arr.Items[1].AsArray().Items {
+				pa := pair.AsArray()
+				if pa == nil || len(pa.Items) < 2 {
+					continue
+				}
+				if sqlExprHasAggWalk(pa.Items[0]) || sqlExprHasAggWalk(pa.Items[1]) {
+					return true
+				}
+			}
+		}
+		if len(arr.Items) >= 3 && !arr.Items[2].IsNil() {
+			return sqlExprHasAggWalk(arr.Items[2])
+		}
+		return false
+	}
+
+	return false
+}
+
+// SqlExprHasAgg(xExpr) → lHasAgg
+//
+// Returns .T. if the AST tree contains an aggregate function call.
+// Drop-in replacement for the PRG SqlExprHasAgg function — same
+// output for every input, just without the interpreter per-frame
+// cost on deep expression trees.
+func SqlExprHasAgg(t *hbrt.Thread) {
+	t.Frame(1, 0)
+	defer t.EndProc()
+	t.RetBool(sqlExprHasAggWalk(t.Local(1)))
+}
+
+// Silence "declared and not used" for constants that exist solely to
+// document FiveSqlDef.ch layout — keeping them in source form helps
+// future walker additions (ND_SUB for subquery flattening, ND_WINDOW
+// for window-over-aggregate detection).
+var _ = [...]int{ndLit, ndCol, ndSub, ndPar, ndNil, ndWindow}
diff --git a/hbrtl/sqlhelpers.go b/hbrtl/sqlhelpers.go
new file mode 100644
index 0000000..e8d0723
--- /dev/null
+++ b/hbrtl/sqlhelpers.go
@@ -0,0 +1,587 @@
+// Copyright (c) 2026 Charles KWON OhJun (charleskwonohjun@gmail.com)
+// All rights reserved.
+
+// FiveSql2 scalar helpers — Go replacements for the PRG functions in
+// _FiveSql2/src/TSqlFunc.prg. These are invoked per-operator during
+// expression evaluation (WHERE / HAVING / CASE); porting removes PRG
+// VM frame overhead on the hot interpreter path. Semantics match the
+// PRG source byte-for-byte.
+
+package hbrtl
+
+import (
+	"math"
+	"strconv"
+	"strings"
+
+	"five/hbrt"
+)
+
+// FiveSql2 lexer token type codes — must match FiveSqlDef.ch.
+const (
+	tkEnd    = 0
+	tkName   = 1
+	tkText   = 2
+	tkNum    = 3
+	tkComma  = 4
+	tkDot    = 5
+	tkStar   = 6
+	tkLPar   = 7
+	tkRPar   = 8
+	tkEq     = 9
+	tkNEq    = 10
+	tkLT     = 11
+	tkGT     = 12
+	tkLTE    = 13
+	tkGTE    = 14
+	tkQMark  = 15
+	tkPlus   = 16
+	tkMinus  = 17
+	tkSlash  = 18
+	tkPipes  = 19
+)
+
+// makeTokValue wraps a (type, text) pair into the 2-element PRG array
+// that TSqlParser2 consumes: { nTokenType, cTokenValue }.
+func makeTokValue(ttype int, text string) hbrt.Value {
+	return hbrt.MakeArrayFrom([]hbrt.Value{
+		hbrt.MakeNumInt(int64(ttype)),
+		hbrt.MakeString(text),
+	})
+}
+
+// lexSQL is the Go port of TSqlLexer:Tokenize — byte-level FSM over the
+// ASCII input string. Produces the same aTokens shape the PRG lexer did.
+func lexSQL(s string) []hbrt.Value {
+	toks := make([]hbrt.Value, 0, 32)
+	n := len(s)
+	i := 0
+	for i < n {
+		c := s[i]
+
+		// Whitespace
+		if c == ' ' || c == '\t' || c == '\n' || c == '\r' {
+			i++
+			continue
+		}
+
+		// Line comment `-- ...`
+		if c == '-' && i+1 < n && s[i+1] == '-' {
+			i += 2
+			for i < n && s[i] != '\n' {
+				i++
+			}
+			continue
+		}
+
+		// Block comment `/* ... */`
+		if c == '/' && i+1 < n && s[i+1] == '*' {
+			i += 2
+			for i < n-1 {
+				if s[i] == '*' && s[i+1] == '/' {
+					i += 2
+					break
+				}
+				i++
+			}
+			continue
+		}
+
+		// String literal (single-quoted, '' escapes a quote)
+		if c == '\'' {
+			i++
+			start := i
+			var sb strings.Builder
+			inEscape := false
+			for i < n {
+				cc := s[i]
+				if cc == '\'' {
+					if i+1 < n && s[i+1] == '\'' {
+						if !inEscape {
+							sb.WriteString(s[start:i])
+							inEscape = true
+						} else {
+							sb.WriteByte('\'')
+							sb.WriteString(s[start:i])
+						}
+						sb.WriteByte('\'')
+						i += 2
+						start = i
+					} else {
+						break
+					}
+				} else {
+					i++
+				}
+			}
+			var val string
+			if inEscape {
+				sb.WriteString(s[start:i])
+				val = sb.String()
+			} else {
+				val = s[start:i]
+			}
+			if i < n {
+				i++ // skip closing quote
+			}
+			toks = append(toks, makeTokValue(tkText, val))
+			continue
+		}
+
+		// Numeric literal
+		if c >= '0' && c <= '9' {
+			start := i
+			for i < n && ((s[i] >= '0' && s[i] <= '9') || s[i] == '.') {
+				i++
+			}
+			toks = append(toks, makeTokValue(tkNum, s[start:i]))
+			continue
+		}
+
+		// Identifier / keyword
+		if isAlphaSQL(c) || c == '_' {
+			start := i
+			for i < n && (isAlphaSQL(s[i]) || (s[i] >= '0' && s[i] <= '9') || s[i] == '_') {
+				i++
+			}
+			toks = append(toks, makeTokValue(tkName, strings.ToUpper(s[start:i])))
+			continue
+		}
+
+		// Bracketed identifier `[col name]`
+		if c == '[' {
+			i++
+			start := i
+			for i < n && s[i] != ']' {
+				i++
+			}
+			name := strings.ToUpper(s[start:i])
+			if i < n {
+				i++ // skip ']'
+			}
+			toks = append(toks, makeTokValue(tkName, name))
+			continue
+		}
+
+		// Parameter placeholder
+		if c == '?' {
+			toks = append(toks, makeTokValue(tkQMark, "?"))
+			i++
+			continue
+		}
+
+		// Multi-char + single-char operators / punctuation
+		switch c {
+		case ',':
+			toks = append(toks, makeTokValue(tkComma, ","))
+			i++
+		case '.':
+			toks = append(toks, makeTokValue(tkDot, "."))
+			i++
+		case '*':
+			toks = append(toks, makeTokValue(tkStar, "*"))
+			i++
+		case '(':
+			toks = append(toks, makeTokValue(tkLPar, "("))
+			i++
+		case ')':
+			toks = append(toks, makeTokValue(tkRPar, ")"))
+			i++
+		case '+':
+			toks = append(toks, makeTokValue(tkPlus, "+"))
+			i++
+		case '-':
+			toks = append(toks, makeTokValue(tkMinus, "-"))
+			i++
+		case '/':
+			toks = append(toks, makeTokValue(tkSlash, "/"))
+			i++
+		case '|':
+			if i+1 < n && s[i+1] == '|' {
+				toks = append(toks, makeTokValue(tkPipes, "||"))
+				i += 2
+			} else {
+				i++
+			}
+		case '=':
+			toks = append(toks, makeTokValue(tkEq, "="))
+			i++
+		case '<':
+			if i+1 < n && s[i+1] == '=' {
+				toks = append(toks, makeTokValue(tkLTE, "<="))
+				i += 2
+			} else if i+1 < n && s[i+1] == '>' {
+				toks = append(toks, makeTokValue(tkNEq, "<>"))
+				i += 2
+			} else {
+				toks = append(toks, makeTokValue(tkLT, "<"))
+				i++
+			}
+		case '>':
+			if i+1 < n && s[i+1] == '=' {
+				toks = append(toks, makeTokValue(tkGTE, ">="))
+				i += 2
+			} else {
+				toks = append(toks, makeTokValue(tkGT, ">"))
+				i++
+			}
+		case '!':
+			if i+1 < n && s[i+1] == '=' {
+				toks = append(toks, makeTokValue(tkNEq, "!="))
+				i += 2
+			} else {
+				i++
+			}
+		case ';':
+			i++
+		default:
+			i++
+		}
+	}
+
+	toks = append(toks, makeTokValue(tkEnd, ""))
+	return toks
+}
+
+func isAlphaSQL(c byte) bool {
+	return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')
+}
+
+// SqlLexerTokenize(cSQL) → aTokens
+// Direct Go port of TSqlLexer:Tokenize. Returns the same
+// { { nType, cText }, ... } structure the PRG version produced.
+func SqlLexerTokenize(t *hbrt.Thread) {
+	t.Frame(1, 0)
+	defer t.EndProc()
+	toks := lexSQL(t.Local(1).AsString())
+	t.PushValue(hbrt.MakeArrayFrom(toks))
+	t.RetValue()
+}
+
+// SqlLexAndExtractTemplate(cSQL) → { aTokens, cKey, aParams }
+//
+// Combined lex + template extraction — one Go call replaces three
+// PRG-to-Go boundary crossings (lex, get tokens, extract). aTokens
+// already has literal tokens replaced with TK_QMARK; aParams holds
+// the extracted literal values in positional order; cKey is the
+// plan cache key (digest of the normalized token-type sequence).
+func SqlLexAndExtractTemplate(t *hbrt.Thread) {
+	t.Frame(1, 0)
+	defer t.EndProc()
+
+	src := t.Local(1).AsString()
+	toks := lexSQL(src)
+
+	params := make([]hbrt.Value, 0, 8)
+	var keyBuf strings.Builder
+	keyBuf.Grow(len(src))
+
+	for _, tv := range toks {
+		tok := tv.AsArray()
+		if tok == nil || len(tok.Items) < 2 {
+			continue
+		}
+		ttype := int(tok.Items[0].AsNumInt())
+		switch ttype {
+		case tkText:
+			params = append(params, tok.Items[1])
+			tok.Items[0] = hbrt.MakeNumInt(tkQMark)
+			tok.Items[1] = hbrt.MakeString("?")
+			keyBuf.WriteByte('?')
+		case tkNum:
+			s := tok.Items[1].AsString()
+			var val hbrt.Value
+			if i, err := strconv.ParseInt(s, 10, 64); err == nil {
+				val = hbrt.MakeNumInt(i)
+			} else if f, err := strconv.ParseFloat(s, 64); err == nil {
+				val = hbrt.MakeDoubleAuto(f)
+			} else {
+				val = hbrt.MakeString(s)
+			}
+			params = append(params, val)
+			tok.Items[0] = hbrt.MakeNumInt(tkQMark)
+			tok.Items[1] = hbrt.MakeString("?")
+			keyBuf.WriteByte('#')
+		default:
+			keyBuf.WriteByte(byte(ttype) + 0x20)
+			if ttype == tkName {
+				keyBuf.WriteString(tok.Items[1].AsString())
+				keyBuf.WriteByte(' ')
+			}
+		}
+	}
+
+	result := hbrt.MakeArrayFrom([]hbrt.Value{
+		hbrt.MakeArrayFrom(toks),
+		hbrt.MakeString(keyBuf.String()),
+		hbrt.MakeArrayFrom(params),
+	})
+	t.PushValue(result)
+	t.RetValue()
+}
+
+// SqlExtractTemplate(aTokens) → { cKey, aParams }
+//
+// Walks a FiveSql2 lexer token stream, replacing string (TK_TEXT=2)
+// and numeric (TK_NUM=3) literals with the parameter placeholder
+// token (TK_QMARK=15). Collected literal values are returned as
+// aParams in their natural left-to-right order.
+//
+// Each token is a 2-element array {nTokenType, cTokenValue}. The
+// mutation is in place so the caller can pass the resulting aTokens
+// straight into TSqlParser2 — the parser then emits ND_PAR nodes
+// that resolve against aParams at execution time.
+//
+// The template key is a compact digest of the non-literal token
+// type sequence, used as the plan cache key for queries that share
+// the same shape but differ only in literal values. Queries like:
+//
+//    INSERT INTO t VALUES (1,'a')
+//    INSERT INTO t VALUES (2,'b')
+//
+// produce the SAME key once literals are collapsed to '?', letting
+// the plan cache hit from the 2nd call onward.
+//
+// Returns a 2-element array: { cKey, aParams }.
+func SqlExtractTemplate(t *hbrt.Thread) {
+	t.Frame(1, 0)
+	defer t.EndProc()
+
+	tokensVal := t.Local(1)
+	if !tokensVal.IsArray() {
+		empty := hbrt.MakeArrayFrom([]hbrt.Value{
+			hbrt.MakeString(""),
+			hbrt.MakeArrayFrom(nil),
+		})
+		t.PushValue(empty)
+		t.RetValue()
+		return
+	}
+
+	toks := tokensVal.AsArray().Items
+	params := make([]hbrt.Value, 0, 8)
+
+	// Template key — cheap digest of the token-type sequence.
+	var keyBuf strings.Builder
+	keyBuf.Grow(len(toks) * 2)
+
+	const (
+		tkText  = 2
+		tkNum   = 3
+		tkQmark = 15
+	)
+
+	for _, tokVal := range toks {
+		tok := tokVal.AsArray()
+		if tok == nil || len(tok.Items) < 2 {
+			continue
+		}
+		ttype := int(tok.Items[0].AsNumInt())
+
+		switch ttype {
+		case tkText:
+			// String literal → TK_QMARK + save raw string value.
+			params = append(params, tok.Items[1])
+			tok.Items[0] = hbrt.MakeInt(tkQmark)
+			tok.Items[1] = hbrt.MakeString("?")
+			keyBuf.WriteByte('?')
+		case tkNum:
+			// Numeric literal → TK_QMARK + parse value. Integer form
+			// when possible (common for id columns), double otherwise.
+			s := tok.Items[1].AsString()
+			var val hbrt.Value
+			if i, err := strconv.ParseInt(s, 10, 64); err == nil {
+				val = hbrt.MakeNumInt(i)
+			} else if f, err := strconv.ParseFloat(s, 64); err == nil {
+				val = hbrt.MakeDoubleAuto(f)
+			} else {
+				val = hbrt.MakeString(s)
+			}
+			params = append(params, val)
+			tok.Items[0] = hbrt.MakeInt(tkQmark)
+			tok.Items[1] = hbrt.MakeString("?")
+			keyBuf.WriteByte('#')
+		default:
+			// Non-literal token — include type code + text so two
+			// different-but-same-shape queries distinguish properly
+			// (e.g., SELECT id vs SELECT name).
+			keyBuf.WriteByte(byte(ttype) + 0x20) // offset to printable
+			if ttype == 1 {                       // TK_NAME — include name text
+				keyBuf.WriteString(strings.ToUpper(tok.Items[1].AsString()))
+				keyBuf.WriteByte(' ')
+			}
+		}
+	}
+
+	result := hbrt.MakeArrayFrom([]hbrt.Value{
+		hbrt.MakeString(keyBuf.String()),
+		hbrt.MakeArrayFrom(params),
+	})
+	t.PushValue(result)
+	t.RetValue()
+}
+
+// SqlCoerceStr(x) → cString
+// Converts any scalar to its canonical string form (NULL-safe).
+func SqlCoerceStr(t *hbrt.Thread) {
+	t.Frame(1, 0)
+	defer t.EndProc()
+	v := t.Local(1)
+	t.RetString(sqlCoerceStr(v))
+}
+
+func sqlCoerceStr(v hbrt.Value) string {
+	switch {
+	case v.IsNil():
+		return ""
+	case v.IsString():
+		return v.AsString()
+	case v.IsNumeric():
+		if v.IsNumInt() {
+			return strconv.FormatInt(v.AsNumInt(), 10)
+		}
+		return strconv.FormatFloat(v.AsNumDouble(), 'g', -1, 64)
+	case v.IsLogical():
+		if v.AsBool() {
+			return "T"
+		}
+		return "F"
+	}
+	return ""
+}
+
+// SqlCoerceNum(x) → nNumber
+// Converts any scalar to numeric (NULL → 0, bool → 1/0, string → Val).
+func SqlCoerceNum(t *hbrt.Thread) {
+	t.Frame(1, 0)
+	defer t.EndProc()
+	v := t.Local(1)
+	switch {
+	case v.IsNil():
+		t.RetInt(0)
+	case v.IsNumeric():
+		t.RetVal(v)
+	case v.IsString():
+		t.RetVal(hbrt.MakeDoubleAuto(parseLeadingNumeric(v.AsString())))
+	case v.IsLogical():
+		if v.AsBool() {
+			t.RetInt(1)
+		} else {
+			t.RetInt(0)
+		}
+	default:
+		t.RetInt(0)
+	}
+}
+
+// SqlCoerceForCmp(x) → xNormalized
+// Trim + upper-case strings; pass-through for other types. Used to
+// make SQL equality/ordering case-insensitive on CHAR values.
+func SqlCoerceForCmp(t *hbrt.Thread) {
+	t.Frame(1, 0)
+	defer t.EndProc()
+	v := t.Local(1)
+	if v.IsString() {
+		t.RetString(strings.ToUpper(strings.TrimSpace(v.AsString())))
+		return
+	}
+	t.RetVal(v)
+}
+
+// SqlIsTrue(x) → lBool
+// SQL truthiness: NIL → false, empty string → false, 0 → false.
+func SqlIsTrue(t *hbrt.Thread) {
+	t.Frame(1, 0)
+	defer t.EndProc()
+	t.RetBool(sqlIsTrue(t.Local(1)))
+}
+
+func sqlIsTrue(v hbrt.Value) bool {
+	switch {
+	case v.IsNil():
+		return false
+	case v.IsLogical():
+		return v.AsBool()
+	case v.IsNumeric():
+		if v.IsNumInt() {
+			return v.AsNumInt() != 0
+		}
+		return v.AsNumDouble() != 0 && !math.IsNaN(v.AsNumDouble())
+	case v.IsString():
+		return strings.TrimSpace(v.AsString()) != ""
+	}
+	return false
+}
+
+// SqlCmpEq(a, b) → lBool
+// Case-insensitive equality with cross-type N↔C coercion.
+func SqlCmpEq(t *hbrt.Thread) {
+	t.Frame(2, 0)
+	defer t.EndProc()
+	t.RetBool(sqlCmpEq(t.Local(1), t.Local(2)))
+}
+
+func sqlCmpEq(a, b hbrt.Value) bool {
+	aNil, bNil := a.IsNil(), b.IsNil()
+	if aNil || bNil {
+		return aNil && bNil
+	}
+	// Numeric: compare regardless of Int/Double distinction.
+	if a.IsNumeric() && b.IsNumeric() {
+		return a.AsNumDouble() == b.AsNumDouble()
+	}
+	if a.IsString() && b.IsString() {
+		return strings.EqualFold(
+			strings.TrimSpace(a.AsString()),
+			strings.TrimSpace(b.AsString()),
+		)
+	}
+	if a.IsLogical() && b.IsLogical() {
+		return a.AsBool() == b.AsBool()
+	}
+	if a.IsDate() && b.IsDate() {
+		return a.AsJulian() == b.AsJulian()
+	}
+	// Cross-type N / C coercion.
+	if a.IsNumeric() && b.IsString() {
+		return a.AsNumDouble() == parseLeadingNumeric(b.AsString())
+	}
+	if a.IsString() && b.IsNumeric() {
+		return parseLeadingNumeric(a.AsString()) == b.AsNumDouble()
+	}
+	return false
+}
+
+// SqlCmpLt(a, b) → lBool
+// Case-insensitive less-than with cross-type N↔C coercion.
+func SqlCmpLt(t *hbrt.Thread) {
+	t.Frame(2, 0)
+	defer t.EndProc()
+	t.RetBool(sqlCmpLt(t.Local(1), t.Local(2)))
+}
+
+func sqlCmpLt(a, b hbrt.Value) bool {
+	if a.IsNil() || b.IsNil() {
+		return false
+	}
+	if a.IsNumeric() && b.IsNumeric() {
+		return a.AsNumDouble() < b.AsNumDouble()
+	}
+	if a.IsString() && b.IsString() {
+		return strings.ToUpper(strings.TrimSpace(a.AsString())) <
+			strings.ToUpper(strings.TrimSpace(b.AsString()))
+	}
+	if a.IsDate() && b.IsDate() {
+		return a.AsJulian() < b.AsJulian()
+	}
+	if a.IsLogical() && b.IsLogical() {
+		return !a.AsBool() && b.AsBool()
+	}
+	if a.IsNumeric() && b.IsString() {
+		return a.AsNumDouble() < parseLeadingNumeric(b.AsString())
+	}
+	if a.IsString() && b.IsNumeric() {
+		return parseLeadingNumeric(a.AsString()) < b.AsNumDouble()
+	}
+	return false
+}
diff --git a/hbrtl/sqlscan.go b/hbrtl/sqlscan.go
index 2883120..7df28c0 100644
--- a/hbrtl/sqlscan.go
+++ b/hbrtl/sqlscan.go
@@ -30,6 +30,7 @@ import (
 	"five/hbrt"
 	"sort"
 	"strconv"
+	"strings"
 )
 
 // SqlScan(aFieldPositions, pcWhere) → aRows
@@ -624,9 +625,15 @@ func SqlOrderBy(t *hbrt.Thread) {
 	rows := rowsVal.AsArray().Items
 	specs := specVal.AsArray().Items
 
+	// Per-column sort spec. nullsFirst is derived once from direction
+	// and explicit NULLS clause so the hot path is just a bool test.
+	// Default (cNulls == ""): NIL is the largest value — NULLs LAST in
+	// ASC, NULLs FIRST in DESC. Matches the pre-Go PRG SqlRowCompare.
+	// Explicit NULLS FIRST/LAST (SQL:2003) overrides the direction.
 	type sortCol struct {
-		idx  int
-		desc bool
+		idx        int
+		desc       bool
+		nullsFirst bool
 	}
 	cols := make([]sortCol, len(specs))
 	for i, s := range specs {
@@ -634,10 +641,20 @@ func SqlOrderBy(t *hbrt.Thread) {
 		if arr == nil || len(arr.Items) < 2 {
 			continue
 		}
-		cols[i] = sortCol{
+		c := sortCol{
 			idx:  int(arr.Items[0].AsNumInt()) - 1,
 			desc: arr.Items[1].AsBool(),
 		}
+		c.nullsFirst = c.desc
+		if len(arr.Items) >= 3 {
+			switch arr.Items[2].AsString() {
+			case "FIRST":
+				c.nullsFirst = true
+			case "LAST":
+				c.nullsFirst = false
+			}
+		}
+		cols[i] = c
 	}
 
 	sort.SliceStable(rows, func(a, b int) bool {
@@ -653,8 +670,20 @@ func SqlOrderBy(t *hbrt.Thread) {
 			va := ra.Items[c.idx]
 			vb := rb.Items[c.idx]
 
-			// Compare values
-			cmp := compareValues(va, vb)
+			// NULL handling follows nullsFirst independent of direction.
+			aNil, bNil := va.IsNil(), vb.IsNil()
+			if aNil || bNil {
+				if aNil && bNil {
+					continue
+				}
+				// exactly one is NIL
+				if c.nullsFirst {
+					return aNil // NIL side comes first
+				}
+				return !aNil // non-NIL side comes first
+			}
+
+			cmp := compareValuesNonNil(va, vb)
 			if cmp == 0 {
 				continue
 			}
@@ -671,6 +700,11 @@ func SqlOrderBy(t *hbrt.Thread) {
 }
 
 // compareValues returns -1, 0, or 1 for two Five Values.
+//
+// Historical NIL handling (NIL sorts as smallest) is retained here for
+// existing callers that are fine with that. New sort paths should treat
+// NIL specially based on NULLS FIRST/LAST instead — see compareValuesNonNil
+// plus the sortCol.nullsFirst flag in SqlOrderBy.
 func compareValues(a, b hbrt.Value) int {
 	if a.IsNil() && b.IsNil() {
 		return 0
@@ -681,7 +715,12 @@ func compareValues(a, b hbrt.Value) int {
 	if b.IsNil() {
 		return 1
 	}
+	return compareValuesNonNil(a, b)
+}
 
+// compareValuesNonNil compares two non-NIL Values. Callers must check
+// IsNil() first and apply their own NULL-ordering policy.
+func compareValuesNonNil(a, b hbrt.Value) int {
 	// Numeric
 	if a.IsNumeric() && b.IsNumeric() {
 		fa := a.AsNumDouble()
@@ -734,9 +773,73 @@ func compareValues(a, b hbrt.Value) int {
 		return 1
 	}
 
+	// Mixed numeric/string: attempt Harbour-style coercion by reading
+	// the string as a numeric. Mirrors the PRG SqlRowCompare branches
+	// at TSqlSort.prg:145-148 for legacy DBFs that stored numbers in
+	// CHAR columns.
+	if a.IsNumeric() && b.IsString() {
+		fb := parseLeadingNumeric(b.AsString())
+		fa := a.AsNumDouble()
+		if fa < fb {
+			return -1
+		}
+		if fa > fb {
+			return 1
+		}
+		return 0
+	}
+	if a.IsString() && b.IsNumeric() {
+		fa := parseLeadingNumeric(a.AsString())
+		fb := b.AsNumDouble()
+		if fa < fb {
+			return -1
+		}
+		if fa > fb {
+			return 1
+		}
+		return 0
+	}
+
 	return 0
 }
 
+// parseLeadingNumeric mimics Harbour Val(AllTrim(s)): strips leading /
+// trailing spaces, then parses the longest prefix that looks like a
+// number. Anything non-numeric yields 0.
+func parseLeadingNumeric(s string) float64 {
+	i := 0
+	for i < len(s) && (s[i] == ' ' || s[i] == '\t') {
+		i++
+	}
+	start := i
+	if i < len(s) && (s[i] == '+' || s[i] == '-') {
+		i++
+	}
+	seenDigit, seenDot := false, false
+	for i < len(s) {
+		c := s[i]
+		if c >= '0' && c <= '9' {
+			seenDigit = true
+			i++
+			continue
+		}
+		if c == '.' && !seenDot {
+			seenDot = true
+			i++
+			continue
+		}
+		break
+	}
+	if !seenDigit {
+		return 0
+	}
+	f, err := strconv.ParseFloat(s[start:i], 64)
+	if err != nil {
+		return 0
+	}
+	return f
+}
+
 // SqlGroupBy(aRows, aGroupColIdx, aAggSpecs) → aResult
 //
 // Go-native GROUP BY. Builds groups by hashing group-key columns,
@@ -909,6 +1012,1257 @@ func SqlGroupBy(t *hbrt.Thread) {
 	t.RetValue()
 }
 
+// appendValueHashKey writes the canonical key form of v into sb.
+// Same mapping as valueHashKey but without the intermediate string
+// allocation; used in tight row-key construction loops.
+func appendValueHashKey(sb *strings.Builder, v hbrt.Value) {
+	switch {
+	case v.IsNil():
+		sb.WriteString("\x00NIL")
+	case v.IsString():
+		s := v.AsString()
+		end := len(s)
+		for end > 0 && s[end-1] == ' ' {
+			end--
+		}
+		sb.WriteString(s[:end])
+	case v.IsNumeric():
+		if v.IsNumInt() {
+			sb.WriteString(strconvItoa(v.AsNumInt()))
+		} else {
+			sb.WriteString(strconvFtoa(v.AsNumDouble()))
+		}
+	case v.IsLogical():
+		if v.AsBool() {
+			sb.WriteByte('T')
+		} else {
+			sb.WriteByte('F')
+		}
+	case v.IsDate():
+		sb.WriteString(strconvItoa(v.AsJulian()))
+	}
+}
+
+// SqlDistinct(aRows) → aRows
+//
+// Go-native replacement for the PRG TSqlSort:Distinct method. Walks
+// aRows once, builds a composite key per row by joining each column's
+// SqlValToStr form with '|', and keeps only the first occurrence of
+// each key. Output preserves input order (SQL DISTINCT semantic).
+//
+// Key construction matches PRG SqlValToStr via appendValueHashKey,
+// so the dedup decision is byte-for-byte identical to the prior PRG
+// hb_HHasKey check — same trailing-space trim on CHAR, same numeric
+// formatting, same NIL marker.
+//
+// Empty / single-row inputs return the input array unchanged.
+func SqlDistinct(t *hbrt.Thread) {
+	t.Frame(1, 0)
+	defer t.EndProc()
+
+	rowsVal := t.Local(1)
+	if !rowsVal.IsArray() {
+		t.PushValue(hbrt.MakeArray(0))
+		t.RetValue()
+		return
+	}
+
+	rows := rowsVal.AsArray().Items
+	nRows := len(rows)
+	if nRows < 2 {
+		t.PushValue(rowsVal)
+		t.RetValue()
+		return
+	}
+
+	seen := make(map[string]struct{}, nRows)
+	result := make([]hbrt.Value, 0, nRows)
+	var sb strings.Builder
+
+	for i := 0; i < nRows; i++ {
+		ra := rows[i].AsArray()
+		if ra == nil {
+			continue
+		}
+		sb.Reset()
+		for _, item := range ra.Items {
+			appendValueHashKey(&sb, item)
+			sb.WriteByte('|')
+		}
+		if _, dup := seen[sb.String()]; dup {
+			continue
+		}
+		seen[sb.String()] = struct{}{}
+		result = append(result, rows[i])
+	}
+
+	t.PushValue(hbrt.MakeArrayFrom(result))
+	t.RetValue()
+}
+
+// SqlComputeAggSimple(aGR, nCol, cFunc) → xResult
+//
+// Fast path for COUNT / SUM / AVG / MIN / MAX when the argument is a
+// plain column reference (already resolved to a 1-based index by the
+// caller). Replaces the PRG inner loop of TSqlAgg:ComputeAgg, which
+// walks aGR rows and performs type-aware accumulation per iteration.
+//
+// The caller gates on: cFunc ∈ {COUNT,SUM,AVG,MIN,MAX}, argument is
+// ND_COL, column index resolved > 0. Complex-argument aggregates
+// (CASE / BIN / UDF) and GROUP_CONCAT/STRING_AGG stay in PRG.
+//
+// Arguments:
+//   aGR   : array of group rows (each row is an array)
+//   nCol  : 1-based column index (0 → COUNT treats every row; others → 0)
+//   cFunc : uppercase function name
+//
+// Returns:
+//   COUNT: non-NIL count, or Len(aGR) when nCol<=0 (COUNT(*))
+//   SUM  : double sum, NIL if no non-NIL values (SQL NULL-safe)
+//   AVG  : double sum/count, NIL if empty
+//   MIN  : smallest value via type-aware compare, NIL if empty
+//   MAX  : largest value via type-aware compare, NIL if empty
+//   other: NIL (caller falls back to PRG)
+func SqlComputeAggSimple(t *hbrt.Thread) {
+	t.Frame(3, 0)
+	defer t.EndProc()
+
+	grVal := t.Local(1)
+	if !grVal.IsArray() {
+		t.RetNil()
+		return
+	}
+	gr := grVal.AsArray().Items
+	nCol := int(t.Local(2).AsNumInt()) - 1 // 0-based
+	fn := t.Local(3).AsString()
+
+	if fn == "COUNT" && nCol < 0 {
+		t.RetInt(int64(len(gr)))
+		return
+	}
+	if nCol < 0 {
+		t.RetNil()
+		return
+	}
+
+	count := 0
+	sum := 0.0
+	var minV, maxV hbrt.Value
+	haveMin := false
+
+	for i := 0; i < len(gr); i++ {
+		ra := gr[i].AsArray()
+		if ra == nil || nCol >= len(ra.Items) {
+			continue
+		}
+		v := ra.Items[nCol]
+		if v.IsNil() {
+			continue
+		}
+		count++
+		if v.IsNumeric() {
+			sum += v.AsNumDouble()
+		}
+		if !haveMin {
+			minV = v
+			maxV = v
+			haveMin = true
+			continue
+		}
+		if compareValuesNonNil(v, minV) < 0 {
+			minV = v
+		}
+		if compareValuesNonNil(v, maxV) > 0 {
+			maxV = v
+		}
+	}
+
+	switch fn {
+	case "COUNT":
+		t.RetInt(int64(count))
+	case "SUM":
+		if count == 0 {
+			t.RetNil()
+		} else {
+			t.RetVal(hbrt.MakeDoubleAuto(sum))
+		}
+	case "AVG":
+		if count == 0 {
+			t.RetNil()
+		} else {
+			t.RetVal(hbrt.MakeDoubleAuto(sum / float64(count)))
+		}
+	case "MIN":
+		if !haveMin {
+			t.RetNil()
+		} else {
+			t.RetVal(minV)
+		}
+	case "MAX":
+		if !haveMin {
+			t.RetNil()
+		} else {
+			t.RetVal(maxV)
+		}
+	default:
+		t.RetNil()
+	}
+}
+
+// SqlGroupRows(aRows, aGroupColIdx) → aGroupedRows
+//
+// Groups rows (values, not indices) by their GROUP BY column values,
+// preserving first-seen order. Replaces the PRG hot loop in
+// TSqlAgg:GroupBy:
+//
+//    FOR i := 1 TO Len( aRows )
+//       cKey := ""
+//       FOR j := 1 TO Len( aGroupBy )
+//          cKey += SqlValToStr( aRows[ i ][ aGroupIdx[ j ] ] ) + "|"
+//       NEXT
+//       IF ! hb_HHasKey( hGroups, cKey )
+//          hGroups[ cKey ] := {}
+//       ENDIF
+//       AAdd( hGroups[ cKey ], aRows[ i ] )
+//    NEXT
+//
+// Aggregate computation + HAVING evaluation stay in PRG (too many
+// expression kinds to port cleanly); this RTL only collapses the
+// grouping step — the dominant per-row boundary-crossing cost.
+//
+// Returns: array of groups, each group is an array of original rows
+// (by reference — no copy). First-seen group key order.
+func SqlGroupRows(t *hbrt.Thread) {
+	t.Frame(2, 0)
+	defer t.EndProc()
+
+	rowsVal := t.Local(1)
+	colsVal := t.Local(2)
+	if !rowsVal.IsArray() {
+		t.PushValue(hbrt.MakeArray(0))
+		t.RetValue()
+		return
+	}
+
+	rows := rowsVal.AsArray().Items
+	nRows := len(rows)
+
+	var groupCols []int
+	if colsVal.IsArray() {
+		colsArr := colsVal.AsArray().Items
+		groupCols = make([]int, len(colsArr))
+		for i, v := range colsArr {
+			groupCols[i] = int(v.AsNumInt()) - 1
+		}
+	}
+
+	// No GROUP BY columns → single group containing all rows. Matches
+	// PRG semantic where HAVING or aggregate query with no GROUP BY
+	// still aggregates over the whole result.
+	if len(groupCols) == 0 {
+		all := make([]hbrt.Value, nRows)
+		copy(all, rows)
+		t.PushValue(hbrt.MakeArrayFrom([]hbrt.Value{
+			hbrt.MakeArrayFrom(all),
+		}))
+		t.RetValue()
+		return
+	}
+
+	order := make([]string, 0, 16)
+	groups := make(map[string][]hbrt.Value, 16)
+	var sb strings.Builder
+
+	for i := 0; i < nRows; i++ {
+		ra := rows[i].AsArray()
+		if ra == nil {
+			continue
+		}
+		sb.Reset()
+		for _, c := range groupCols {
+			if c >= 0 && c < len(ra.Items) {
+				appendValueHashKey(&sb, ra.Items[c])
+			}
+			sb.WriteByte('|')
+		}
+		key := sb.String()
+		if _, ok := groups[key]; !ok {
+			groups[key] = make([]hbrt.Value, 0, 8)
+			order = append(order, key)
+		}
+		groups[key] = append(groups[key], rows[i])
+	}
+
+	out := make([]hbrt.Value, len(order))
+	for oi, key := range order {
+		out[oi] = hbrt.MakeArrayFrom(groups[key])
+	}
+	t.PushValue(hbrt.MakeArrayFrom(out))
+	t.RetValue()
+}
+
+// SqlEvalHaving(xHaving, aNewRow, aCols, aGR, aFN, aParams) → {lOk, lPass}
+//
+// Go-native tree walker for HAVING clause evaluation, mirroring
+// PRG TSqlAgg:EvalHavingExpr. Returns a 2-element array:
+//   [1] lOk:   .T. if fully handled in Go, .F. to fall back to PRG
+//   [2] lPass: truthiness when handled
+//
+// Supported nodes: ND_LIT, ND_NIL, ND_COL (lookup in aCols / aFN),
+// ND_FN (COUNT/SUM/AVG/MIN/MAX with plain column args), ND_BIN
+// (AND/OR/comparison), ND_UNI (NOT/-). Anything unsupported → returns
+// {.F., .F.} so PRG takes over.
+//
+// Aggregates inside HAVING are recomputed per group using the same
+// sqlComputeAggSimple path as the SELECT list. Redundant vs SELECT-
+// list aggregate compute, but simple and bounded (HAVING is usually
+// a single comparison).
+func SqlEvalHaving(t *hbrt.Thread) {
+	t.Frame(6, 0)
+	defer t.EndProc()
+
+	xE := t.Local(1)
+	aNewRow := t.Local(2)
+	aCols := t.Local(3)
+	aGR := t.Local(4)
+	aFN := t.Local(5)
+
+	ctx := &havingCtx{
+		aNewRow: aNewRow,
+		aCols:   aCols,
+		aGR:     aGR,
+		aFN:     aFN,
+	}
+	ok, v := ctx.eval(xE)
+	result := hbrt.MakeArray(2)
+	arr := result.AsArray()
+	arr.Items[0] = hbrt.MakeBool(ok)
+	if ok {
+		arr.Items[1] = hbrt.MakeBool(havingIsTrue(v))
+	} else {
+		arr.Items[1] = hbrt.MakeBool(false)
+	}
+	t.PushValue(result)
+	t.RetValue()
+}
+
+type havingCtx struct {
+	aNewRow hbrt.Value
+	aCols   hbrt.Value
+	aGR     hbrt.Value
+	aFN     hbrt.Value
+}
+
+// eval walks the HAVING AST. Returns (ok, value). ok=false means
+// "encountered unsupported node, caller must fall back to PRG."
+func (c *havingCtx) eval(xE hbrt.Value) (bool, hbrt.Value) {
+	if xE.IsNil() {
+		return true, hbrt.MakeNil()
+	}
+	arr := xE.AsArray()
+	if arr == nil || len(arr.Items) < 2 {
+		return false, hbrt.MakeNil()
+	}
+	kind := int(arr.Items[0].AsNumInt())
+
+	switch kind {
+	case ndLit:
+		return true, arr.Items[1]
+	case ndNil:
+		return true, hbrt.MakeNil()
+
+	case ndCol:
+		// Look up in aCols by upper-cased name, return aNewRow[i]
+		name := arr.Items[1].AsString()
+		if idx := strings.Index(name, "."); idx >= 0 {
+			name = name[idx+1:]
+		}
+		name = strings.ToUpper(name)
+		colsArr := c.aCols.AsArray()
+		rowArr := c.aNewRow.AsArray()
+		if colsArr != nil && rowArr != nil {
+			for i, col := range colsArr.Items {
+				ca := col.AsArray()
+				if ca == nil || len(ca.Items) < 2 {
+					continue
+				}
+				if strings.EqualFold(ca.Items[1].AsString(), name) && i < len(rowArr.Items) {
+					return true, rowArr.Items[i]
+				}
+			}
+		}
+		// Fallback: lookup in aFN → aGR[0]
+		fnArr := c.aFN.AsArray()
+		if fnArr != nil {
+			for i, n := range fnArr.Items {
+				if strings.EqualFold(n.AsString(), name) {
+					grArr := c.aGR.AsArray()
+					if grArr != nil && len(grArr.Items) > 0 {
+						firstRow := grArr.Items[0].AsArray()
+						if firstRow != nil && i < len(firstRow.Items) {
+							return true, firstRow.Items[i]
+						}
+					}
+				}
+			}
+		}
+		return true, hbrt.MakeNil()
+
+	case ndFn:
+		return c.evalAgg(arr)
+
+	case ndBin:
+		if len(arr.Items) < 4 {
+			return false, hbrt.MakeNil()
+		}
+		op := arr.Items[1].AsString()
+		// Short-circuit for AND/OR
+		if op == "AND" {
+			okL, vL := c.eval(arr.Items[2])
+			if !okL {
+				return false, hbrt.MakeNil()
+			}
+			if !havingIsTrue(vL) {
+				return true, hbrt.MakeBool(false)
+			}
+			okR, vR := c.eval(arr.Items[3])
+			if !okR {
+				return false, hbrt.MakeNil()
+			}
+			return true, hbrt.MakeBool(havingIsTrue(vR))
+		}
+		if op == "OR" {
+			okL, vL := c.eval(arr.Items[2])
+			if !okL {
+				return false, hbrt.MakeNil()
+			}
+			if havingIsTrue(vL) {
+				return true, hbrt.MakeBool(true)
+			}
+			okR, vR := c.eval(arr.Items[3])
+			if !okR {
+				return false, hbrt.MakeNil()
+			}
+			return true, hbrt.MakeBool(havingIsTrue(vR))
+		}
+		okL, vL := c.eval(arr.Items[2])
+		if !okL {
+			return false, hbrt.MakeNil()
+		}
+		okR, vR := c.eval(arr.Items[3])
+		if !okR {
+			return false, hbrt.MakeNil()
+		}
+		switch op {
+		case "=", "==":
+			return true, hbrt.MakeBool(sqlCmpEq(vL, vR))
+		case "<>", "!=":
+			return true, hbrt.MakeBool(!sqlCmpEq(vL, vR))
+		case "<":
+			return true, hbrt.MakeBool(sqlCmpLt(vL, vR))
+		case ">":
+			return true, hbrt.MakeBool(sqlCmpLt(vR, vL))
+		case "<=":
+			return true, hbrt.MakeBool(sqlCmpEq(vL, vR) || sqlCmpLt(vL, vR))
+		case ">=":
+			return true, hbrt.MakeBool(sqlCmpEq(vL, vR) || sqlCmpLt(vR, vL))
+		}
+		return false, hbrt.MakeNil()
+
+	case ndUni:
+		if len(arr.Items) < 3 {
+			return false, hbrt.MakeNil()
+		}
+		op := arr.Items[1].AsString()
+		okX, vX := c.eval(arr.Items[2])
+		if !okX {
+			return false, hbrt.MakeNil()
+		}
+		if op == "NOT" {
+			return true, hbrt.MakeBool(!havingIsTrue(vX))
+		}
+		return false, hbrt.MakeNil()
+	}
+
+	return false, hbrt.MakeNil()
+}
+
+// evalAgg runs a simple aggregate (COUNT/SUM/AVG/MIN/MAX) on aGR when
+// the argument is a plain column (or "*" for COUNT). Anything else
+// triggers a PRG fallback.
+func (c *havingCtx) evalAgg(arr *hbrt.HbArray) (bool, hbrt.Value) {
+	if len(arr.Items) < 3 {
+		return false, hbrt.MakeNil()
+	}
+	name := strings.ToUpper(arr.Items[1].AsString())
+	switch name {
+	case "COUNT", "SUM", "AVG", "MIN", "MAX":
+	default:
+		return false, hbrt.MakeNil()
+	}
+
+	// Parse first arg to find column index (0 → COUNT(*))
+	argsArr := arr.Items[2].AsArray()
+	if argsArr == nil || len(argsArr.Items) == 0 {
+		if name == "COUNT" {
+			grArr := c.aGR.AsArray()
+			if grArr == nil {
+				return true, hbrt.MakeNumInt(0)
+			}
+			return true, hbrt.MakeNumInt(int64(len(grArr.Items)))
+		}
+		return false, hbrt.MakeNil()
+	}
+	firstArg := argsArr.Items[0].AsArray()
+	if firstArg == nil || len(firstArg.Items) < 2 {
+		return false, hbrt.MakeNil()
+	}
+	argKind := int(firstArg.Items[0].AsNumInt())
+	if argKind != ndCol {
+		return false, hbrt.MakeNil()
+	}
+	colName := firstArg.Items[1].AsString()
+	if colName == "*" {
+		if name == "COUNT" {
+			grArr := c.aGR.AsArray()
+			if grArr == nil {
+				return true, hbrt.MakeNumInt(0)
+			}
+			return true, hbrt.MakeNumInt(int64(len(grArr.Items)))
+		}
+		return false, hbrt.MakeNil()
+	}
+
+	// Resolve column name → index in aFN
+	if idx := strings.Index(colName, "."); idx >= 0 {
+		colName = colName[idx+1:]
+	}
+	colName = strings.ToUpper(colName)
+	fnArr := c.aFN.AsArray()
+	nCol := -1
+	if fnArr != nil {
+		for i, n := range fnArr.Items {
+			if strings.EqualFold(n.AsString(), colName) {
+				nCol = i
+				break
+			}
+		}
+	}
+	if nCol < 0 {
+		return false, hbrt.MakeNil()
+	}
+
+	grArr := c.aGR.AsArray()
+	if grArr == nil {
+		return true, hbrt.MakeNumInt(0)
+	}
+
+	// Run the simple aggregate loop (mirrors SqlComputeAggSimple).
+	count := 0
+	sum := 0.0
+	var minV, maxV hbrt.Value
+	haveAny := false
+	for _, rowVal := range grArr.Items {
+		ra := rowVal.AsArray()
+		if ra == nil || nCol >= len(ra.Items) {
+			continue
+		}
+		v := ra.Items[nCol]
+		if v.IsNil() {
+			continue
+		}
+		count++
+		if v.IsNumeric() {
+			sum += v.AsNumDouble()
+		}
+		if !haveAny {
+			minV = v
+			maxV = v
+			haveAny = true
+			continue
+		}
+		if compareValuesNonNil(v, minV) < 0 {
+			minV = v
+		}
+		if compareValuesNonNil(v, maxV) > 0 {
+			maxV = v
+		}
+	}
+
+	switch name {
+	case "COUNT":
+		return true, hbrt.MakeNumInt(int64(count))
+	case "SUM":
+		if count == 0 {
+			return true, hbrt.MakeNil()
+		}
+		return true, hbrt.MakeDoubleAuto(sum)
+	case "AVG":
+		if count == 0 {
+			return true, hbrt.MakeNil()
+		}
+		return true, hbrt.MakeDoubleAuto(sum / float64(count))
+	case "MIN":
+		if !haveAny {
+			return true, hbrt.MakeNil()
+		}
+		return true, minV
+	case "MAX":
+		if !haveAny {
+			return true, hbrt.MakeNil()
+		}
+		return true, maxV
+	}
+	return false, hbrt.MakeNil()
+}
+
+// havingIsTrue mirrors PRG SqlIsTrue — NIL/0/empty-string all false.
+func havingIsTrue(v hbrt.Value) bool {
+	return sqlIsTrue(v)
+}
+
+// SqlWindowPartitions(aRows, aPartColIdx) → aPartitions
+//
+// Groups row indices by their PARTITION BY column values, preserving
+// first-seen order. Replaces the PRG hot loop in
+// TSqlExecutor:ApplyWindowFunctions that per row does:
+//
+//    cPartKey := ""
+//    FOR j := 1 TO Len( aPartBy )
+//       cPartKey += SqlValToStr( aRows[ i ][ aPartCol[ j ] ] ) + "|"
+//    NEXT
+//    IF ! hb_HHasKey( hPartitions, cPartKey )
+//       hPartitions[ cPartKey ] := {}
+//    ENDIF
+//    AAdd( hPartitions[ cPartKey ], i )
+//
+// Key construction reuses the shared valueHashKey → matches the PRG
+// SqlValToStr equivalence classes byte-for-byte so partition
+// identity is unchanged.
+//
+// Arguments:
+//   aRows:        result rows (array of arrays)
+//   aPartColIdx:  1-based column indices for partition key (empty
+//                 array → single "all rows" partition)
+//
+// Returns:
+//   Array of partitions. Each partition is an array of 1-based
+//   row indices into aRows, in first-seen order inside the partition.
+//   Partitions themselves are also in first-seen order of their key.
+//
+// Called at most once per window column per query — amortizes the
+// Go↔PRG boundary cost across N·M operations.
+func SqlWindowPartitions(t *hbrt.Thread) {
+	t.Frame(2, 0)
+	defer t.EndProc()
+
+	rowsVal := t.Local(1)
+	colsVal := t.Local(2)
+	if !rowsVal.IsArray() {
+		t.PushValue(hbrt.MakeArray(0))
+		t.RetValue()
+		return
+	}
+
+	rows := rowsVal.AsArray().Items
+	nRows := len(rows)
+
+	var partCols []int
+	if colsVal.IsArray() {
+		colsArr := colsVal.AsArray().Items
+		partCols = make([]int, len(colsArr))
+		for i, v := range colsArr {
+			partCols[i] = int(v.AsNumInt()) - 1
+		}
+	}
+
+	// Fast path: no PARTITION BY → one partition holding all row indices.
+	if len(partCols) == 0 {
+		idxs := make([]hbrt.Value, nRows)
+		for i := 0; i < nRows; i++ {
+			idxs[i] = hbrt.MakeInt(i + 1)
+		}
+		t.PushValue(hbrt.MakeArrayFrom([]hbrt.Value{
+			hbrt.MakeArrayFrom(idxs),
+		}))
+		t.RetValue()
+		return
+	}
+
+	// Preserve first-seen order via parallel slice + map.
+	order := make([]string, 0, 16)
+	groups := make(map[string][]int, 16)
+	var sb strings.Builder
+
+	for i := 0; i < nRows; i++ {
+		ra := rows[i].AsArray()
+		if ra == nil {
+			continue
+		}
+		sb.Reset()
+		for _, c := range partCols {
+			if c >= 0 && c < len(ra.Items) {
+				appendValueHashKey(&sb, ra.Items[c])
+			}
+			sb.WriteByte('|')
+		}
+		key := sb.String()
+		if _, ok := groups[key]; !ok {
+			groups[key] = make([]int, 0, 8)
+			order = append(order, key)
+		}
+		groups[key] = append(groups[key], i+1) // 1-based for PRG
+	}
+
+	out := make([]hbrt.Value, len(order))
+	for oi, key := range order {
+		g := groups[key]
+		idxs := make([]hbrt.Value, len(g))
+		for j, n := range g {
+			idxs[j] = hbrt.MakeInt(n)
+		}
+		out[oi] = hbrt.MakeArrayFrom(idxs)
+	}
+	t.PushValue(hbrt.MakeArrayFrom(out))
+	t.RetValue()
+}
+
+// SqlWindowSortPartition(aRows, aPartIdx, aSortSpec) → aPartIdx
+//
+// Sorts a partition (array of 1-based row indices into aRows) by the
+// ORDER BY spec.  aSortSpec is an array of {nColIdx, lDesc} pairs
+// with 1-based column indices. Mutates aPartIdx in place and returns
+// it for chainability.
+//
+// Matches PRG SqlWinRowCmp semantics byte-for-byte:
+//   - NIL sorts as the largest value (NULLs last in ASC, NULLs first
+//     in DESC) — consistent with the #3 migration for ORDER BY.
+//   - Mixed-type comparison: same ValType only; otherwise treated
+//     equal on that column (moves to next sort key).
+//   - Stable sort so the first-seen partition order (from SqlWindow-
+//     Partitions) carries through equal-value ties.
+//
+// Replaces ASort(aPartIdx,,, {|a,b| SqlWinRowCmp(...) < 0}) — the PRG
+// block is invoked O(N log N) times per partition; Go sort skips that
+// bridge and uses pre-resolved column indices.
+func SqlWindowSortPartition(t *hbrt.Thread) {
+	t.Frame(3, 0)
+	defer t.EndProc()
+
+	rowsVal := t.Local(1)
+	idxVal := t.Local(2)
+	specVal := t.Local(3)
+
+	if !rowsVal.IsArray() || !idxVal.IsArray() || !specVal.IsArray() {
+		t.PushValue(idxVal)
+		t.RetValue()
+		return
+	}
+
+	rows := rowsVal.AsArray().Items
+	idxs := idxVal.AsArray().Items
+	specs := specVal.AsArray().Items
+
+	type sortCol struct {
+		idx  int
+		desc bool
+	}
+	cols := make([]sortCol, 0, len(specs))
+	for _, s := range specs {
+		arr := s.AsArray()
+		if arr == nil || len(arr.Items) < 2 {
+			continue
+		}
+		cols = append(cols, sortCol{
+			idx:  int(arr.Items[0].AsNumInt()) - 1,
+			desc: arr.Items[1].AsBool(),
+		})
+	}
+	if len(cols) == 0 || len(idxs) < 2 {
+		t.PushValue(idxVal)
+		t.RetValue()
+		return
+	}
+
+	sort.SliceStable(idxs, func(ai, bi int) bool {
+		ra := rows[int(idxs[ai].AsNumInt())-1].AsArray()
+		rb := rows[int(idxs[bi].AsNumInt())-1].AsArray()
+		if ra == nil || rb == nil {
+			return false
+		}
+		for _, c := range cols {
+			if c.idx < 0 || c.idx >= len(ra.Items) || c.idx >= len(rb.Items) {
+				continue
+			}
+			va := ra.Items[c.idx]
+			vb := rb.Items[c.idx]
+
+			// NIL handling: NIL is the largest value.
+			aNil, bNil := va.IsNil(), vb.IsNil()
+			if aNil && bNil {
+				continue
+			}
+			if aNil {
+				// a > b — in DESC, a comes first (less-than = true)
+				return c.desc
+			}
+			if bNil {
+				// b > a — in ASC, a comes first (less-than = true)
+				return !c.desc
+			}
+
+			// Only compare if same type, otherwise skip (PRG semantic).
+			if va.Type() != vb.Type() {
+				continue
+			}
+			cmp := compareValuesNonNil(va, vb)
+			if cmp == 0 {
+				continue
+			}
+			if c.desc {
+				return cmp > 0
+			}
+			return cmp < 0
+		}
+		return false
+	})
+
+	t.PushValue(idxVal)
+	t.RetValue()
+}
+
+// SqlWindowAssignRank(aRows, aPartIdx, aSortSpec, nColIdx, cFunc) → NIL
+//
+// Assigns ROW_NUMBER / RANK / DENSE_RANK values to each row in a
+// sorted partition. Replaces the PRG loop in ApplyWindowFunctions:
+//
+//    FOR k := 1 TO Len( aPartIdx )
+//       IF ! SqlWinRowsEqual( aRows, aPartIdx[k], aPartIdx[k-1], ... )
+//          nRank := k  (or nRank++)
+//       ENDIF
+//       aRows[ aPartIdx[k] ][ nColIdx ] := nRank
+//    NEXT
+//
+// Collapses the per-row SqlWinRowsEqual + PRG indexing cost. aSortSpec
+// is the same sort spec (array of {nCol, lDesc}) that
+// SqlWindowSortPartition already consumes — caller reuses it without
+// re-resolving column indices.
+//
+// Arguments:
+//   aRows      : full result row set
+//   aPartIdx   : partition (array of 1-based row indices, sorted)
+//   aSortSpec  : ORDER BY spec; only column indices matter for
+//                equality check (direction unused). Empty spec means
+//                no ORDER BY → ROW_NUMBER semantic for RANK/DENSE too.
+//   nColIdx    : 1-based output column index to receive the rank value
+//   cFunc      : "ROW_NUMBER" | "RANK" | "DENSE_RANK"
+//
+// Mutates aRows in place. Returns NIL.
+func SqlWindowAssignRank(t *hbrt.Thread) {
+	t.Frame(5, 0)
+	defer t.EndProc()
+
+	rowsVal := t.Local(1)
+	idxVal := t.Local(2)
+	specVal := t.Local(3)
+	nColIdx := int(t.Local(4).AsNumInt()) - 1
+	fn := t.Local(5).AsString()
+
+	if !rowsVal.IsArray() || !idxVal.IsArray() {
+		t.RetNil()
+		return
+	}
+	rows := rowsVal.AsArray().Items
+	idxs := idxVal.AsArray().Items
+	if len(idxs) == 0 || nColIdx < 0 {
+		t.RetNil()
+		return
+	}
+
+	// Unpack sort spec — we only need column indices for equality check.
+	var sortCols []int
+	if specVal.IsArray() {
+		specs := specVal.AsArray().Items
+		sortCols = make([]int, 0, len(specs))
+		for _, s := range specs {
+			arr := s.AsArray()
+			if arr == nil || len(arr.Items) < 2 {
+				continue
+			}
+			sortCols = append(sortCols, int(arr.Items[0].AsNumInt())-1)
+		}
+	}
+
+	// Helper: does row i equal row j on all sort columns? Reuses the
+	// compareValuesNonNil path; NIL matches NIL, NIL ≠ non-NIL.
+	rowsEqual := func(ri, rj int) bool {
+		ra := rows[ri].AsArray()
+		rb := rows[rj].AsArray()
+		if ra == nil || rb == nil {
+			return false
+		}
+		for _, c := range sortCols {
+			if c < 0 || c >= len(ra.Items) || c >= len(rb.Items) {
+				continue
+			}
+			va := ra.Items[c]
+			vb := rb.Items[c]
+			aNil, bNil := va.IsNil(), vb.IsNil()
+			if aNil != bNil {
+				return false
+			}
+			if aNil && bNil {
+				continue
+			}
+			if compareValuesNonNil(va, vb) != 0 {
+				return false
+			}
+		}
+		return true
+	}
+
+	// Compute rank per row and write to aRows[ idx ][ nColIdx ].
+	writeRank := func(rowIdx, rank int) {
+		if rowIdx < 0 || rowIdx >= len(rows) {
+			return
+		}
+		ra := rows[rowIdx].AsArray()
+		if ra == nil || nColIdx >= len(ra.Items) {
+			return
+		}
+		ra.Items[nColIdx] = hbrt.MakeNumInt(int64(rank))
+	}
+
+	switch fn {
+	case "ROW_NUMBER":
+		for k, ri := range idxs {
+			writeRank(int(ri.AsNumInt())-1, k+1)
+		}
+	case "RANK":
+		// Same value group → same rank, then jump to k+1.
+		rank := 1
+		prevRowIdx := int(idxs[0].AsNumInt()) - 1
+		writeRank(prevRowIdx, rank)
+		for k := 1; k < len(idxs); k++ {
+			curIdx := int(idxs[k].AsNumInt()) - 1
+			if len(sortCols) == 0 || !rowsEqual(curIdx, prevRowIdx) {
+				rank = k + 1
+			}
+			writeRank(curIdx, rank)
+			prevRowIdx = curIdx
+		}
+	case "DENSE_RANK":
+		rank := 1
+		prevRowIdx := int(idxs[0].AsNumInt()) - 1
+		writeRank(prevRowIdx, rank)
+		for k := 1; k < len(idxs); k++ {
+			curIdx := int(idxs[k].AsNumInt()) - 1
+			if len(sortCols) == 0 || !rowsEqual(curIdx, prevRowIdx) {
+				rank++
+			}
+			writeRank(curIdx, rank)
+			prevRowIdx = curIdx
+		}
+	}
+
+	t.RetNil()
+}
+
+// SqlBulkUpdate(aFieldPositions, pcWhere, aValuePcodes) → nAffected
+//
+// Go-native replacement for the PRG UPDATE scan loop:
+//
+//    dbGoTop()
+//    WHILE ! Eof()
+//       IF xWhere == NIL .OR. SqlIsTrue( ::EvalExpr( xWhere ) )
+//          IF dbRLock( RecNo() )
+//             FOR i := 1 TO Len( aSet )
+//                FieldPut( nFPos[i], ::EvalExpr( aSet[i][2] ) )
+//             NEXT
+//             dbRUnlock( RecNo() )
+//             nAffected++
+//          ENDIF
+//       ENDIF
+//       dbSkip()
+//    ENDDO
+//    dbCommit()
+//
+// Collapses: Eof + Skip + RLock + FieldPut×N + RUnlock cross the Go
+// boundary once per record via the PRG VM. Moving to one RTL call
+// keeps the scan inside Go and uses compiled pcode for both WHERE
+// and every SET value expression.
+//
+// Arguments:
+//   aFieldPositions: 1-based field positions to write (aligned with aValuePcodes)
+//   pcWhere:         compiled WHERE pcode (NIL = unconditional update)
+//   aValuePcodes:    compiled pcode per SET value expression
+//
+// Caller must ensure every SET value expression compiled successfully;
+// any nil slot in aValuePcodes is silently skipped (leaves field unchanged).
+//
+// Txn caveat: does not call ::oTxn:LogRecord per row — caller is
+// responsible for ensuring no active transaction when invoking this
+// fast path, else undo semantics break.
+func SqlBulkUpdate(t *hbrt.Thread) {
+	t.Frame(3, 0)
+	defer t.EndProc()
+
+	fieldsVal := t.Local(1)
+	whereVal := t.Local(2)
+	pcodesVal := t.Local(3)
+
+	if !fieldsVal.IsArray() || !pcodesVal.IsArray() {
+		t.RetInt(0)
+		return
+	}
+	fieldsArr := fieldsVal.AsArray().Items
+	pcodesArr := pcodesVal.AsArray().Items
+	nSets := len(fieldsArr)
+	if nSets != len(pcodesArr) || nSets == 0 {
+		t.RetInt(0)
+		return
+	}
+
+	fieldPos := make([]int, nSets)
+	for i := 0; i < nSets; i++ {
+		fieldPos[i] = int(fieldsArr[i].AsNumInt()) - 1
+	}
+	valuePcodes := make([]*hbrt.PcodeFunc, nSets)
+	for i, pv := range pcodesArr {
+		if p := pv.AsPointer(); p != nil {
+			if pc, ok := p.(*hbrt.PcodeFunc); ok {
+				valuePcodes[i] = pc
+			}
+		}
+	}
+
+	var whereFn *hbrt.PcodeFunc
+	if !whereVal.IsNil() {
+		if p := whereVal.AsPointer(); p != nil {
+			whereFn, _ = p.(*hbrt.PcodeFunc)
+		}
+	}
+
+	wam, ok := t.WA.(*hbrdd.WorkAreaManager)
+	if !ok {
+		t.RetInt(0)
+		return
+	}
+	area := wam.Current()
+	if area == nil {
+		t.RetInt(0)
+		return
+	}
+	dbfArea, _ := area.(*dbf.DBFArea)
+	if dbfArea == nil {
+		// Non-DBF area falls back to the generic Area interface — use
+		// the interface path; still a win over PRG boundary crossings.
+		t.RetInt(sqlBulkUpdateGeneric(t, area, whereFn, fieldPos, valuePcodes))
+		return
+	}
+
+	// Fast field getter — compiled pcode's PcOpFieldGet hits this
+	// closure instead of the generic FieldGet RTL dispatch.
+	prevFG := t.FastFieldGetter
+	t.FastFieldGetter = func(idx int) hbrt.Value {
+		v, _ := dbfArea.GetValue(idx - 1)
+		return v
+	}
+	defer func() { t.FastFieldGetter = prevFG }()
+
+	nAffected := 0
+	shared := dbfArea.IsShared()
+
+	dbfArea.GoTop()
+	for !dbfArea.EOF() {
+		match := true
+		if whereFn != nil {
+			hbrt.ExecPcodeFast(t, whereFn, nil)
+			match = t.GetRetValue().AsBool()
+		}
+		if match {
+			recNo := dbfArea.RecNo()
+			locked := true
+			if shared {
+				lockOk, _ := dbfArea.LockRecord(recNo)
+				locked = lockOk
+			}
+			if locked {
+				for i := 0; i < nSets; i++ {
+					pc := valuePcodes[i]
+					if pc == nil {
+						continue
+					}
+					hbrt.ExecPcodeFast(t, pc, nil)
+					dbfArea.PutValue(fieldPos[i], t.GetRetValue())
+				}
+				if shared {
+					dbfArea.UnlockRecord(recNo)
+				}
+				nAffected++
+			}
+		}
+		dbfArea.Skip(1)
+	}
+	/* Skip fsync when the WA cache is active — caller batches flush
+	 * at SqlWACacheDisable / dbCloseAll. Per-call Flush on macOS APFS
+	 * is ~1-2 ms (fsync), dominating the 100-row scan cost. */
+	if !waCacheEnabledSafe() {
+		dbfArea.Flush()
+	}
+
+	t.RetInt(int64(nAffected))
+}
+
+// waCacheEnabledSafe reads the cache flag under its lock — fast enough
+// to call on every Bulk path, avoids the PRG→Go round-trip.
+func waCacheEnabledSafe() bool {
+	waCacheMu.Lock()
+	on := waCacheEnabled
+	waCacheMu.Unlock()
+	return on
+}
+
+// sqlBulkUpdateGeneric handles non-DBF workareas via the Area interface.
+func sqlBulkUpdateGeneric(t *hbrt.Thread, area hbrdd.Area, whereFn *hbrt.PcodeFunc, fieldPos []int, valuePcodes []*hbrt.PcodeFunc) int64 {
+	prevFG := t.FastFieldGetter
+	t.FastFieldGetter = func(idx int) hbrt.Value {
+		v, _ := area.GetValue(idx - 1)
+		return v
+	}
+	defer func() { t.FastFieldGetter = prevFG }()
+
+	nAffected := int64(0)
+	area.GoTop()
+	for !area.EOF() {
+		match := true
+		if whereFn != nil {
+			hbrt.ExecPcodeFast(t, whereFn, nil)
+			match = t.GetRetValue().AsBool()
+		}
+		if match {
+			for i := 0; i < len(fieldPos); i++ {
+				pc := valuePcodes[i]
+				if pc == nil {
+					continue
+				}
+				hbrt.ExecPcodeFast(t, pc, nil)
+				area.PutValue(fieldPos[i], t.GetRetValue())
+			}
+			nAffected++
+		}
+		area.Skip(1)
+	}
+	return nAffected
+}
+
+// SqlBulkInsert(aRows) → nInserted
+//
+// Go-native bulk INSERT into the current workarea. Replaces the
+// PRG pattern used by CTE materialization, CREATE TABLE AS SELECT,
+// and subquery-driven temp tables:
+//
+//    FOR j := 1 TO Len( aRows )
+//       dbAppend()
+//       FOR k := 1 TO Min( Len(aStruct), Len(aRows[j]) )
+//          IF aRows[j][k] != NIL
+//             FieldPut( k, aRows[j][k] )
+//          ENDIF
+//       NEXT
+//    NEXT
+//    dbCommit()
+//
+// Collapses ~N·M Go RTL boundary crossings to a single call plus
+// native Append/PutValue/Flush on *DBFArea. Semantics preserved:
+//   - NIL element → field left at its default value
+//   - Row length > field count → extra columns ignored
+//   - Row length < field count → trailing fields left at default
+//   - Flushes once at end (matches PRG dbCommit() after the loop)
+//
+// Returns the number of rows appended (excluding rows where aRows[i]
+// is not an array — those are skipped silently, matching the PRG
+// loop which would panic on non-array access).
+func SqlBulkInsert(t *hbrt.Thread) {
+	t.Frame(1, 0)
+	defer t.EndProc()
+
+	rowsVal := t.Local(1)
+	if !rowsVal.IsArray() {
+		t.RetInt(0)
+		return
+	}
+
+	wam, ok := t.WA.(*hbrdd.WorkAreaManager)
+	if !ok || wam == nil {
+		t.RetInt(0)
+		return
+	}
+	area := wam.Current()
+	if area == nil {
+		t.RetInt(0)
+		return
+	}
+
+	nFields := area.FieldCount()
+	rows := rowsVal.AsArray().Items
+	inserted := 0
+
+	// Type-assert the concrete DBF type once so the inner loop avoids
+	// interface-dispatch per call. Non-DBF backends (MEMRDD) take the
+	// generic hbrdd.Area path.
+	if dbfArea, isDbf := area.(*dbf.DBFArea); isDbf {
+		for _, rowVal := range rows {
+			ra := rowVal.AsArray()
+			if ra == nil {
+				continue
+			}
+			if err := dbfArea.Append(); err != nil {
+				break
+			}
+			limit := len(ra.Items)
+			if limit > nFields {
+				limit = nFields
+			}
+			for k := 0; k < limit; k++ {
+				v := ra.Items[k]
+				if v.IsNil() {
+					continue
+				}
+				dbfArea.PutValue(k, v)
+			}
+			inserted++
+		}
+		dbfArea.Flush()
+	} else {
+		for _, rowVal := range rows {
+			ra := rowVal.AsArray()
+			if ra == nil {
+				continue
+			}
+			if err := area.Append(); err != nil {
+				break
+			}
+			limit := len(ra.Items)
+			if limit > nFields {
+				limit = nFields
+			}
+			for k := 0; k < limit; k++ {
+				v := ra.Items[k]
+				if v.IsNil() {
+					continue
+				}
+				area.PutValue(k, v)
+			}
+			inserted++
+		}
+		if flusher, ok := area.(interface{ Flush() error }); ok {
+			flusher.Flush()
+		}
+	}
+
+	t.RetInt(int64(inserted))
+}
+
 // SqlEach(aFieldPositions, pcWhere, bBlock) → NIL
 //
 // Streaming variant of SqlScan — instead of materializing all matching
diff --git a/hbrtl/sqlwacache.go b/hbrtl/sqlwacache.go
new file mode 100644
index 0000000..ac8c24c
--- /dev/null
+++ b/hbrtl/sqlwacache.go
@@ -0,0 +1,142 @@
+// Copyright (c) 2026 Charles KWON OhJun (charleskwonohjun@gmail.com)
+// All rights reserved.
+
+// Workarea cache for FiveSql2 DML — opt-in persistent workarea slots
+// keyed by alias. Eliminates per-query dbUseArea + dbCloseArea syscall
+// overhead for repeated INSERT / UPDATE / DELETE against the same table.
+//
+// Semantics:
+//   * Disabled by default. Callers opt in via SqlWACacheEnable(). Tests
+//     and short one-shot scripts can stay on the safe per-query open/
+//     close behavior; long-running bench loops or servers pay the open
+//     cost once.
+//   * Entries map uppercase alias → workarea number. The PRG side is
+//     responsible for the actual dbUseArea / dbSelectArea — this layer
+//     only stores the handle.
+//   * Invalidation is explicit. CREATE TABLE / DROP TABLE in
+//     TSqlDDL.prg call SqlWACacheInvalidate before any filesystem
+//     operation that would otherwise collide with a still-open handle.
+//   * SqlWACacheCloseAll drops every entry; callers then decide how
+//     to actually close the workareas (dbCloseAll, per-alias close, …).
+
+package hbrtl
+
+import (
+	"strings"
+	"sync"
+
+	"five/hbrt"
+)
+
+var (
+	waCacheMu      sync.Mutex
+	waCacheEntries = map[string]int{}
+	waCacheEnabled bool
+)
+
+// SqlWACacheEnable() → NIL
+// Turns on the workarea cache for this process. Existing opens are not
+// retroactively registered — the cache populates on next SqlWAOpenCached.
+func SqlWACacheEnable(t *hbrt.Thread) {
+	t.Frame(0, 0)
+	defer t.EndProc()
+	waCacheMu.Lock()
+	waCacheEnabled = true
+	waCacheMu.Unlock()
+	t.RetNil()
+}
+
+// SqlWACacheDisable() → NIL
+// Turns the cache off and drops all entries. Workareas themselves
+// are left in whatever state the caller last put them in — callers
+// typically follow with dbCloseAll() or per-table close.
+func SqlWACacheDisable(t *hbrt.Thread) {
+	t.Frame(0, 0)
+	defer t.EndProc()
+	waCacheMu.Lock()
+	waCacheEnabled = false
+	waCacheEntries = map[string]int{}
+	waCacheMu.Unlock()
+	t.RetNil()
+}
+
+// SqlWACacheIsEnabled() → lBool
+func SqlWACacheIsEnabled(t *hbrt.Thread) {
+	t.Frame(0, 0)
+	defer t.EndProc()
+	waCacheMu.Lock()
+	on := waCacheEnabled
+	waCacheMu.Unlock()
+	t.RetBool(on)
+}
+
+// SqlWACacheGet(cAlias) → nWA | 0
+// Lookup a cached workarea number by alias. Returns 0 if disabled or
+// no entry. PRG side still verifies Used() / Select() before relying
+// on the number — another process or manual close may have invalidated
+// the handle between cache hits.
+func SqlWACacheGet(t *hbrt.Thread) {
+	t.Frame(1, 0)
+	defer t.EndProc()
+	waCacheMu.Lock()
+	on := waCacheEnabled
+	nWA := 0
+	if on {
+		nWA = waCacheEntries[strings.ToUpper(t.Local(1).AsString())]
+	}
+	waCacheMu.Unlock()
+	t.RetInt(int64(nWA))
+}
+
+// SqlWACachePut(cAlias, nWA) → NIL
+// Register (or overwrite) a cache entry. No-op when cache is disabled
+// so callers can unconditionally call Put after a successful open.
+func SqlWACachePut(t *hbrt.Thread) {
+	t.Frame(2, 0)
+	defer t.EndProc()
+	alias := strings.ToUpper(t.Local(1).AsString())
+	nWA := int(t.Local(2).AsNumInt())
+	waCacheMu.Lock()
+	if waCacheEnabled && nWA > 0 {
+		waCacheEntries[alias] = nWA
+	}
+	waCacheMu.Unlock()
+	t.RetNil()
+}
+
+// SqlWACacheInvalidate(cAlias) → NIL
+// Drop a single cache entry. Called before CREATE TABLE / DROP TABLE /
+// FErase so the PRG side can then close and recreate the file without
+// conflicting with a stale cached open.
+func SqlWACacheInvalidate(t *hbrt.Thread) {
+	t.Frame(1, 0)
+	defer t.EndProc()
+	alias := strings.ToUpper(t.Local(1).AsString())
+	waCacheMu.Lock()
+	delete(waCacheEntries, alias)
+	waCacheMu.Unlock()
+	t.RetNil()
+}
+
+// SqlWACacheCloseAll() → aKeys
+// Empties the cache and returns the list of aliases that were in it.
+// Callers can iterate and close each corresponding workarea.
+func SqlWACacheCloseAll(t *hbrt.Thread) {
+	t.Frame(0, 0)
+	defer t.EndProc()
+
+	waCacheMu.Lock()
+	keys := make([]string, 0, len(waCacheEntries))
+	for k := range waCacheEntries {
+		keys = append(keys, k)
+	}
+	waCacheEntries = map[string]int{}
+	waCacheMu.Unlock()
+
+	out := make([]hbrt.Value, len(keys))
+	for i, k := range keys {
+		out[i] = hbrt.MakeString(k)
+	}
+	t.PushValue(hbrt.MakeArrayFrom(out))
+	t.RetValue()
+}
diff --git a/tests/compat_harbour.prg b/tests/compat_harbour.prg
index 5dc2a66..20a660e 100644
--- a/tests/compat_harbour.prg
+++ b/tests/compat_harbour.prg
@@ -330,11 +330,31 @@ STATIC PROCEDURE TestArrayHash()
    ASort(a,,, {|x,y| x > y})
    Assert("9d ASort desc: {3,2,1}", a[1] == 3 .AND. a[2] == 2 .AND. a[3] == 1)
 
+   // ASort dates (default, no block — formerly no-op, now sorts julian)
+   a := { CToD("2026-03-15"), CToD("2024-01-10"), CToD("2025-07-01") }
+   ASort(a)
+   Assert("9c1 ASort dates ascending", ;
+      a[1] == CToD("2024-01-10") .AND. ;
+      a[2] == CToD("2025-07-01") .AND. ;
+      a[3] == CToD("2026-03-15"))
+
+   // ASort logicals (default — .F. < .T.)
+   a := { .T., .F., .T., .F. }
+   ASort(a)
+   Assert("9c2 ASort logicals: F,F,T,T", ;
+      !a[1] .AND. !a[2] .AND. a[3] .AND. a[4])
+
    // AScan
    a := {"alice", "bob", "charlie"}
    Assert("9e AScan: found 'bob' at 2", AScan(a, "bob") == 2)
    Assert("9f AScan: 'dave' not found", AScan(a, "dave") == 0)
 
+   // AScan numeric fast-path
+   a := { 10, 20, 30, 40 }
+   Assert("9e1 AScan int found", AScan(a, 30) == 3)
+   Assert("9e2 AScan int cross-type (double lookup)", AScan(a, 30.0) == 3)
+   Assert("9e3 AScan int not found", AScan(a, 99) == 0)
+
    // AEval with mutable closure capture (Harbour: closures share outer locals)
    nSum := 0
    AEval({10, 20, 30}, {|x| nSum += x})