From 8aaed994f4cd9afcce4dc4198979daf93e7eeb6e Mon Sep 17 00:00:00 2001 From: CharlesKWON Date: Tue, 14 Apr 2026 09:15:08 +0900 Subject: [PATCH] =?UTF-8?q?perf(FiveSql2):=20hybrid=20fast=20path=20?= =?UTF-8?q?=E2=80=94=2011x=20speedup=20on=20string=20WHERE=20scans?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Implements hybrid execution model: keep AST tree-walk for SQL:2013+ features (Window, Recursive CTE, JOIN, aggregates) while compiling simple SELECT hot paths to Go + pcode. See docs/FiveSql2-Hybrid-Plan.md for the full architecture rationale (why not SQLite-style VDBE). Hot path (single table, no joins/groups/aggregates): - TryBuildFieldPositions: resolves SELECT column list to FieldPos array once per query (bails to PRG loop on any complex expr). - TryCompileWhere + SqlExprToPrg: walks WHERE AST, emits equivalent PRG source, runs it through PcCompile to get a PcodeFunc. - SqlScan RTL: Go-native scan loop — GoTop/EOF/Skip/GetValue direct, ExecPcode per row for WHERE, result array pre-alloc. WHERE compiler scope: - ND_LIT numeric/logical/string (string literals AllTrim'd to match SqlCmpEq CHAR-padding semantics; rejects embedded quotes/newlines) - ND_COL: CHAR fields auto-wrapped with AllTrim(FieldGet(n)) based on dbStruct() lookup cached once per query in aCompileStruct - ND_BIN: = <> != < <= > >= AND OR + - * / - ND_UNI: NOT - - Anything else (ND_FN, ND_CASE, ND_SUB, ND_PAR, LIKE, IN, IS NULL, BETWEEN, dates) returns NIL → falls back to PRG tree-walk. Bench (50k rows, ~/tmp ext4): Before After Speedup Numeric WHERE ~150ms 11.7ms ~13x String WHERE 119.3ms 10.5ms 11.4x No WHERE - 14.6ms - Raw RDD baseline 6.8ms 6.8ms 1.0x Remaining gap to raw RDD (~1.5x) is structural: Value boxing, result array construction, per-row ExecPcode frame overhead. Would need a Value-pool or SoA refactor to close further. Side fixes bundled: - TSqlIndex:FindExclusive short-circuited. Originally called dbInfo(DBI_FULLPATH)/DBI_SHARED which are unresolved symbols in Five (dbInfo is a stub, DBI_* never defined). Panic'd with "local variable index out of range: 0" whenever a standalone PRG had a workarea Used before calling five_SQL. 43-test masked the bug because it only reached FindExclusive with no open workareas. Restore the scan once dbInfo lands in hbrtl. - cmd/five/main.go: FIVE_KEEP_BUILD=1 env var keeps the temp Go project around for debugging gengo output. Validation: - FiveSql2 43/43 - Harbour compat 51/51 - go test ./... ALL PASS Co-Authored-By: Claude Opus 4.6 (1M context) --- _FiveSql2/src/TSqlExecutor.prg | 222 +++++++++++++++++++++++++++++++-- _FiveSql2/src/TSqlIndex.prg | 38 +++--- cmd/five/main.go | 30 ++++- docs/FiveSql2-Hybrid-Plan.md | 175 ++++++++++++++++++++++++++ hbrtl/register.go | 2 + hbrtl/sqlscan.go | 122 ++++++++++++++++++ 6 files changed, 552 insertions(+), 37 deletions(-) create mode 100644 docs/FiveSql2-Hybrid-Plan.md create mode 100644 hbrtl/sqlscan.go diff --git a/_FiveSql2/src/TSqlExecutor.prg b/_FiveSql2/src/TSqlExecutor.prg index fbc5043..4a3c74c 100644 --- a/_FiveSql2/src/TSqlExecutor.prg +++ b/_FiveSql2/src/TSqlExecutor.prg @@ -32,6 +32,7 @@ CLASS TSqlExecutor DATA nDepth INIT 0 DATA aOpened INIT {} DATA aTables INIT {} + DATA aCompileStruct CLASSDATA hSubCache INIT { => } SHARED @@ -62,6 +63,9 @@ CLASS TSqlExecutor METHOD ApplyWindowFunctions( aRows, aFN, aCols ) METHOD RunMerge() METHOD RunTruncate() + METHOD TryBuildFieldPositions( aExprs ) + METHOD TryCompileWhere( xWhere ) + METHOD SqlExprToPrg( xNode ) ENDCLASS @@ -970,6 +974,7 @@ METHOD RunSelect() CLASS TSqlExecutor LOCAL xArgExpr, cBare, lFound, aLeafCols, k LOCAL hJoinHash LOCAL lIndexUsed, aTmp + LOCAL aFP, pcW, aGoRows aCols := ::hQuery[ "columns" ] ::aTables := ::hQuery[ "tables" ] @@ -1190,19 +1195,45 @@ METHOD RunSelect() CLASS TSqlExecutor hJoinHash := { => } - WHILE ! Eof() - IF Len( aJoins ) > 0 - ::JoinRecurse( aJoins, 1, xWhere, aResultExprs, @aRows, hJoinHash ) - dbSelectArea( nWA ) - ELSE - IF xWhere == NIL .OR. SqlIsTrue( ::EvalExpr( xWhere ) ) - aRow := ::FetchRow( aResultExprs ) - AAdd( aRows, aRow ) + /* === GO NATIVE FAST PATH === + * Single-table, no joins, no aggregates, all SELECT exprs + * simple field refs, WHERE is NIL or compilable to pcode. + * Hands the scan loop off to Go's SqlScan (~15x faster + * than the PRG per-row tree walk). + */ + aFP := NIL + pcW := NIL + aGoRows := NIL + IF Len( aJoins ) == 0 .AND. Len( aGroupBy ) == 0 .AND. ; + ! ::oAgg:HasAgg( aCols ) + aFP := ::TryBuildFieldPositions( aResultExprs ) + IF aFP != NIL + pcW := ::TryCompileWhere( xWhere ) + IF xWhere == NIL .OR. pcW != NIL + aGoRows := SqlScan( aFP, pcW ) + FOR i := 1 TO Len( aGoRows ) + AAdd( aRows, aGoRows[ i ] ) + NEXT ENDIF ENDIF - dbSelectArea( nWA ) - dbSkip() - ENDDO + ENDIF + + /* Fallback: PRG interpreter loop */ + IF aGoRows == NIL + WHILE ! Eof() + IF Len( aJoins ) > 0 + ::JoinRecurse( aJoins, 1, xWhere, aResultExprs, @aRows, hJoinHash ) + dbSelectArea( nWA ) + ELSE + IF xWhere == NIL .OR. SqlIsTrue( ::EvalExpr( xWhere ) ) + aRow := ::FetchRow( aResultExprs ) + AAdd( aRows, aRow ) + ENDIF + ENDIF + dbSelectArea( nWA ) + dbSkip() + ENDDO + ENDIF ENDIF ENDIF ENDIF @@ -2731,3 +2762,172 @@ STATIC FUNCTION RecCteJoin( hRecQuery, aFN, aPrevRows, cCteName ) dbSelectArea( nSaveWA ) RETURN aResult + +/* -------------------------------------------------------------- + * Go fast-path helpers + * Return non-NIL only when the query can be handed off to Go's + * SqlScan RTL. Any complexity (expressions, functions, joins, + * parameters in WHERE) → return NIL so the PRG loop takes over. + * -------------------------------------------------------------- */ +METHOD TryBuildFieldPositions( aExprs ) CLASS TSqlExecutor + LOCAL aPositions := {}, i, xE, cRef, nDot, cField, nFPos + + FOR i := 1 TO Len( aExprs ) + xE := aExprs[ i ][ 1 ] + IF xE == NIL .OR. xE[ 1 ] != ND_COL .OR. xE[ 2 ] == "*" + RETURN NIL + ENDIF + cRef := xE[ 2 ] + nDot := At( ".", cRef ) + IF nDot > 0 + cField := Upper( SubStr( cRef, nDot + 1 ) ) + ELSE + cField := Upper( cRef ) + ENDIF + nFPos := FieldPos( cField ) + IF nFPos == 0 + RETURN NIL + ENDIF + AAdd( aPositions, nFPos ) + NEXT + +RETURN aPositions + +METHOD TryCompileWhere( xWhere ) CLASS TSqlExecutor + /* Phase 1+2: compile numeric/logical/string WHERE to pcode. + * Semantic guard: SqlExprToPrg returns NIL for anything that would + * drift from SqlCmpEq/SqlCoerceForCmp semantics. CHAR columns are + * auto-wrapped with AllTrim() to match Harbour SqlCmpEq behavior. + * NULL/function/subquery/parameter → NIL (fallback). + */ + LOCAL cPrg, xResult + + IF xWhere == NIL + RETURN NIL + ENDIF + + /* Cache struct once for field-type lookups during expr walk */ + ::aCompileStruct := dbStruct() + + cPrg := ::SqlExprToPrg( xWhere ) + ::aCompileStruct := NIL + + IF cPrg == NIL + RETURN NIL + ENDIF + + xResult := PcCompile( cPrg ) + +RETURN xResult + +METHOD SqlExprToPrg( xNode ) CLASS TSqlExecutor + LOCAL cOp, cL, cR + LOCAL cRef, nDot, cField, nFPos, cFType, cLit + + IF xNode == NIL + RETURN NIL + ENDIF + + DO CASE + CASE xNode[ 1 ] == ND_LIT + IF ValType( xNode[ 2 ] ) == "N" + RETURN AllTrim( Str( xNode[ 2 ] ) ) + ENDIF + IF ValType( xNode[ 2 ] ) == "L" + IF xNode[ 2 ] + RETURN ".T." + ENDIF + RETURN ".F." + ENDIF + IF ValType( xNode[ 2 ] ) == "C" + cLit := xNode[ 2 ] + /* Reject strings with embedded quotes — escaping would be ambiguous */ + IF "'" $ cLit .OR. '"' $ cLit .OR. Chr(10) $ cLit .OR. Chr(13) $ cLit + RETURN NIL + ENDIF + /* Match SqlCmpEq: compare trimmed values */ + RETURN "'" + AllTrim( cLit ) + "'" + ENDIF + /* Dates/datetimes deferred */ + RETURN NIL + + CASE xNode[ 1 ] == ND_COL + cRef := xNode[ 2 ] + IF cRef == "*" + RETURN NIL + ENDIF + nDot := At( ".", cRef ) + IF nDot > 0 + cField := Upper( SubStr( cRef, nDot + 1 ) ) + ELSE + cField := Upper( cRef ) + ENDIF + nFPos := FieldPos( cField ) + IF nFPos == 0 + RETURN NIL + ENDIF + /* Look up field type from cached struct to decide AllTrim wrap */ + cFType := "" + IF ::aCompileStruct != NIL .AND. nFPos <= Len( ::aCompileStruct ) + cFType := ::aCompileStruct[ nFPos ][ 2 ] + ENDIF + IF cFType == "C" + RETURN "AllTrim(FieldGet(" + AllTrim( Str( nFPos ) ) + "))" + ENDIF + RETURN "FieldGet(" + AllTrim( Str( nFPos ) ) + ")" + + CASE xNode[ 1 ] == ND_UNI + cOp := xNode[ 2 ] + cL := ::SqlExprToPrg( xNode[ 3 ] ) + IF cL == NIL + RETURN NIL + ENDIF + IF cOp == "NOT" + RETURN "!(" + cL + ")" + ENDIF + IF cOp == "-" + RETURN "-(" + cL + ")" + ENDIF + RETURN NIL + + CASE xNode[ 1 ] == ND_BIN + cOp := xNode[ 2 ] + cL := ::SqlExprToPrg( xNode[ 3 ] ) + IF cL == NIL + RETURN NIL + ENDIF + cR := ::SqlExprToPrg( xNode[ 4 ] ) + IF cR == NIL + RETURN NIL + ENDIF + DO CASE + CASE cOp == "=" .OR. cOp == "==" + RETURN "(" + cL + ")==(" + cR + ")" + CASE cOp == "<>" .OR. cOp == "!=" + RETURN "(" + cL + ")!=(" + cR + ")" + CASE cOp == "<" + RETURN "(" + cL + ")<(" + cR + ")" + CASE cOp == "<=" + RETURN "(" + cL + ")<=(" + cR + ")" + CASE cOp == ">" + RETURN "(" + cL + ")>(" + cR + ")" + CASE cOp == ">=" + RETURN "(" + cL + ")>=(" + cR + ")" + CASE cOp == "AND" + RETURN "(" + cL + ").AND.(" + cR + ")" + CASE cOp == "OR" + RETURN "(" + cL + ").OR.(" + cR + ")" + CASE cOp == "+" + RETURN "(" + cL + ")+(" + cR + ")" + CASE cOp == "-" + RETURN "(" + cL + ")-(" + cR + ")" + CASE cOp == "*" + RETURN "(" + cL + ")*(" + cR + ")" + CASE cOp == "/" + RETURN "(" + cL + ")/(" + cR + ")" + ENDCASE + RETURN NIL + + ENDCASE + +RETURN NIL diff --git a/_FiveSql2/src/TSqlIndex.prg b/_FiveSql2/src/TSqlIndex.prg index 08a5535..bfe3904 100644 --- a/_FiveSql2/src/TSqlIndex.prg +++ b/_FiveSql2/src/TSqlIndex.prg @@ -132,27 +132,23 @@ RETURN nWA METHOD FindExclusive( cTableLow ) CLASS TSqlIndex - LOCAL nSaved, nArea, cDbfName, lShared - - nSaved := Select() - - FOR nArea := 1 TO 250 - IF ( nArea )->( Used() ) - dbSelectArea( nArea ) - IF ! Empty( Alias() ) - cDbfName := Lower( AllTrim( dbInfo( DBI_FULLPATH ) ) ) - IF cTableLow + ".dbf" $ cDbfName .OR. cTableLow $ cDbfName - lShared := dbInfo( DBI_SHARED ) - IF ! lShared - dbSelectArea( nSaved ) - RETURN nArea - ENDIF - ENDIF - ENDIF - ENDIF - NEXT - - dbSelectArea( nSaved ) + /* Pre-flight exclusive-lock detection. + * Originally used dbInfo(DBI_FULLPATH)/DBI_SHARED to scan open + * workareas for an exclusive hold on the target DBF. In Five, + * `dbInfo()` is stubbed (returns NIL) and the DBI_* symbols are + * unresolved at compile time → runtime panic the moment any + * workarea is Used() when this runs (standalone PRGs routinely + * dbUseArea before calling five_SQL, so they tripped this). + * + * The check cannot function correctly on Five regardless, so + * we short-circuit to 0 (= no conflict). Matches behavior of + * the 43-test harness which only reaches here with no Used + * workareas, so the net behavior is preserved. + * + * Future: when dbInfo(DBI_FULLPATH) lands in hbrtl, restore + * the scan. Until then use `Alias()` + filename matching if + * exclusive-lock preflight becomes necessary. + */ RETURN 0 diff --git a/cmd/five/main.go b/cmd/five/main.go index d9b76d8..8a096a7 100644 --- a/cmd/five/main.go +++ b/cmd/five/main.go @@ -128,7 +128,11 @@ func runPRG(prgFile string) { if err != nil { fatal("failed to create temp dir: " + err.Error()) } - defer os.RemoveAll(tmpDir) + if os.Getenv("FIVE_KEEP_BUILD") == "" { + defer os.RemoveAll(tmpDir) + } else { + fmt.Fprintln(os.Stderr, "[FIVE_KEEP_BUILD] keeping:", tmpDir) + } writeGoProject(tmpDir, prgFile, goCode) @@ -159,7 +163,11 @@ func buildPRG(prgFile, output string) { if err != nil { fatal("failed to create temp dir: " + err.Error()) } - defer os.RemoveAll(tmpDir) + if os.Getenv("FIVE_KEEP_BUILD") == "" { + defer os.RemoveAll(tmpDir) + } else { + fmt.Fprintln(os.Stderr, "[FIVE_KEEP_BUILD] keeping:", tmpDir) + } writeGoProject(tmpDir, prgFile, goCode) @@ -198,7 +206,11 @@ func buildMultiPRG(prgFiles []string, output string) { if err != nil { fatal("failed to create temp dir: " + err.Error()) } - defer os.RemoveAll(tmpDir) + if os.Getenv("FIVE_KEEP_BUILD") == "" { + defer os.RemoveAll(tmpDir) + } else { + fmt.Fprintln(os.Stderr, "[FIVE_KEEP_BUILD] keeping:", tmpDir) + } // Phase 1: Parse all files and collect cross-file function names type parsedFile struct { @@ -549,7 +561,11 @@ func buildFRB(prgFile, outputFile string) { if err != nil { fatal("cannot create temp dir: " + err.Error()) } - defer os.RemoveAll(tmpDir) + if os.Getenv("FIVE_KEEP_BUILD") == "" { + defer os.RemoveAll(tmpDir) + } else { + fmt.Fprintln(os.Stderr, "[FIVE_KEEP_BUILD] keeping:", tmpDir) + } // Write go.mod — point to Five's module root fiveRoot := mustAbs(".") @@ -659,7 +675,11 @@ func debugPRG(prgFile string) { if err != nil { fatal("cannot create temp dir: " + err.Error()) } - defer os.RemoveAll(tmpDir) + if os.Getenv("FIVE_KEEP_BUILD") == "" { + defer os.RemoveAll(tmpDir) + } else { + fmt.Fprintln(os.Stderr, "[FIVE_KEEP_BUILD] keeping:", tmpDir) + } fiveRoot := findProjectRoot() goMod := fmt.Sprintf("module five-generated\n\ngo 1.21.13\n\nrequire five v0.0.0\n\nreplace five => %s\n", fiveRoot) diff --git a/docs/FiveSql2-Hybrid-Plan.md b/docs/FiveSql2-Hybrid-Plan.md new file mode 100644 index 0000000..a30f875 --- /dev/null +++ b/docs/FiveSql2-Hybrid-Plan.md @@ -0,0 +1,175 @@ +# FiveSql2 하이브리드 실행 모델 — 구현 계획 + +**Date:** 2026-04-14 +**Decision Owner:** Charles KWON +**Status:** 확정 (user: "좋아요 그게 답입니다") + +--- + +## 1. 아키텍처 결정 + +### 1.1 설계 원칙 + +FiveSql2는 **AST tree-walk 평가기**와 **pcode/Go 핫패스**를 공존시킨다. +SQLite 방식(AST 폐기 + 전량 VDBE 컴파일)은 **채택하지 않는다**. + +### 1.2 왜 하이브리드인가 + +| 관점 | AST 유지 (FiveSql2) | VDBE 전량 (SQLite) | +|------|---------------------|-------------------| +| 신규 SQL 표준 추가 | evaluator에 CASE 추가 | 코드젠 + opcode 신설 | +| Window/RECURSIVE CTE | tree-walk로 자연 표현 | 서브프로그램·임시 커서 필요 | +| 단순 SELECT 성능 | 느림 (트리 워크) | 빠름 (op dispatch) | +| 구현 복잡도 | 낮음 | 매우 높음 | + +FiveSql2는 SQL:1999 → SQL:2013 → 향후 표준 확장이 최우선이므로 +**AST 기반 확장성**을 희생하지 않는다. 성능은 **핫패스 선별 하강**으로 해결. + +### 1.3 역할 분담 + +``` +┌─────────────────────────────────────────────────────┐ +│ Parser (PRG) → AST (영속) │ +│ │ │ +│ ▼ │ +│ TSqlExecutor.RunSelect (PRG) │ +│ ├─ Window / CTE / Recursive → tree-walk evaluator │ +│ ├─ GROUP BY / Aggregate → tree-walk evaluator │ +│ ├─ JOIN → tree-walk evaluator │ +│ │ │ +│ └─ ★ Hot path (simple scan) │ +│ ├─ TryBuildFieldPositions(aExprs) │ +│ ├─ TryCompileWhere(xWhere) → PcCompile │ +│ └─ SqlScan(fields, pcWhere) ── Go RTL ──┐ │ +└─────────────────────────────────────────────────┼────┘ + ▼ + ┌────────────────────────┐ + │ hbrtl/sqlscan.go │ + │ area.GoTop/EOF/Skip │ + │ ExecPcode per row │ + │ GetValue(idx) direct │ + └────────────────────────┘ +``` + +--- + +## 2. 현재 상태 (2026-04-14) + +### 완료 +- [x] `compiler/genpc/genpc.go` — `CompileExpr` 공개 API +- [x] `hbrtl/pcexpr.go` — `PcCompile` / `PcEval` RTL +- [x] `hbrtl/sqlscan.go` — Go 네이티브 스캔 루프 +- [x] `hbrdd/dbf/dbf.go` — `FieldPosCache` O(1) +- [x] `_FiveSql2/src/TSqlExecutor.prg` — `TryBuildFieldPositions` / `TryCompileWhere` 메서드 +- [x] Fast path 통합 (WHERE 없음 + 단순 projection 한정) +- [x] 회귀: 43/43 · 51/51 · Go tests ALL PASS + +### 미완성 +- [ ] `TryCompileWhere`가 항상 NIL 반환 — WHERE 있는 쿼리는 느린 경로 +- [ ] `BindColumns/ResolveCache` 4-test 회귀 미해결 +- [ ] 소형 PRG `TSqlIndex:FindExclusive` 패닉 (격리 벤치 차단) + +--- + +## 3. 단계별 작업 계획 + +### Phase 1 — WHERE 컴파일러 (우선순위 최상) + +**목표:** `TryCompileWhere`가 단순 WHERE를 pcode로 변환해 `SqlScan`에 넘김. + +**범위:** +- ND_COL → `FieldGet(n)` (CHAR 비교 시 `AllTrim()` 자동 래핑) +- ND_LIT → 숫자/문자열 literal +- ND_BIN — `=`, `<>`, `!=`, `<`, `<=`, `>`, `>=`, `AND`, `OR` +- 그 외 (ND_FN, ND_CASE, ND_SUB, LIKE, IN, IS NULL, BETWEEN, ND_PAR) → NIL 반환 (fallback) + +**의미 보존 원칙:** +- `SqlCmpEq`의 CHAR trim 규칙을 PRG 변환 시점에 반영 (비교 양변이 ND_COL이고 CHAR면 `AllTrim()` 래핑) +- NULL 비교는 첫 버전에서 미지원 — ND_NIL 포함 시 NIL 반환 +- 타입 강제는 Five의 기본 연산자 오버로딩에 위임 + +**파일:** +- `_FiveSql2/src/TSqlExecutor.prg` — `METHOD SqlExprToPrg(xNode)` 신설 +- `TryCompileWhere`에서 호출 + +**검증:** 43/43 유지 + simple WHERE 벤치 (50k rows, `salary > 50000`) pcode 경로 확인. + +--- + +### Phase 2 — Projection 확장 + +**목표:** `TryBuildFieldPositions`를 넘어 단순 식 projection 지원. + +**범위:** +- `SELECT a + b, c * 2 FROM t` — ND_BIN 산술식도 pcode로 +- 설계 변경: `SqlScan`이 필드 인덱스 배열 대신 **pcode 배열**(projection 표현식) 수신 +- `aSelectExprs []*PcodeFunc` 형태로 RTL 확장 + +**파일:** +- `hbrtl/sqlscan.go` — 시그니처 변경: `SqlScan(aProjs, pcWhere)` +- `_FiveSql2/src/TSqlExecutor.prg` — projection 빌더 + +**Risk:** SQL 함수(UPPER/ALLTRIM/SUBSTR)는 PRG 런타임에 존재. pcode ExecPcode가 이들 함수를 호출 가능한지 확인 필요. + +--- + +### Phase 3 — BindColumns 회귀 해결 + +**목표:** 이전 세션의 `ResolveCache` PushLocal(0) 버그 근본 원인 파악. + +**증상:** 4 tests panic at `class.go:278` Send. "Unresolved variable → PushLocal(0)". + +**조사 항목:** +- CLASS 내부에서 Resolve를 캐시할 때 `self` 참조가 깨지는 경우 +- `pendingParams` 순서와의 상호작용 +- gengo가 캐시 변수를 local index 0으로 emit하는지 + +**파일:** `hbrt/class.go`, `compiler/gengo/gen_class.go` (조사 우선, 수정은 증거 기반) + +--- + +### Phase 4 — 소형 PRG FindExclusive 패닉 + +**목표:** 격리 벤치가 가능하도록 class-system edge case 수정. + +**증상:** 소형 PRG에서 `TSqlIndex:FindExclusive` "local variable index out of range: 0" + +**영향:** 벤치 격리 차단. 43-test 통합 실행에서는 발생 안 함 → **Phase 1 종료 후** 진행. + +--- + +### Phase 5 — 벤치 + 커밋 + +**목표:** 실측 데이터로 고속화 배수 확정 후 커밋. + +**벤치 케이스:** +- 50k × 3 컬럼 DBF +- SELECT * (fast path) +- SELECT a, b WHERE ... (fast path after Phase 1) +- JOIN / GROUP BY / Window (fallback, 변화 없음 확인) + +**커밋 규칙:** Phase 별 개별 커밋. CLAUDE.md의 3-테스트 게이트 매번 통과. + +--- + +## 4. 비목표 (Non-goals) + +- SQLite식 전량 VDBE 컴파일 — 거부 +- AST 구조 변경 — SQL:2013 이상 확장 경로 보존 +- 옵티마이저 전면 재작성 — 기존 `oIndex:TryIndexScan`, plan cache는 유지 +- 동시성/트랜잭션 모델 변경 — 이번 범위 밖 + +--- + +## 5. 검증 게이트 (불변) + +모든 Phase 종료 시 CLAUDE.md 규칙대로 3개 통과: + +```bash +go test ./... # Go 유닛 +./five build _FiveSql2/test/test_sql1999.prg _FiveSql2/src/*.prg -o /tmp/test_sql && \ + cd ~/tmp && rm -f *.dbf __cte_*.dbf 2>/dev/null; /tmp/test_sql # 43/43 +./five build tests/compat_harbour.prg -o /tmp/test_compat && /tmp/test_compat # 51/51 +``` + +하나라도 실패 → 해당 Phase 롤백. diff --git a/hbrtl/register.go b/hbrtl/register.go index a47a59d..c3ea205 100644 --- a/hbrtl/register.go +++ b/hbrtl/register.go @@ -610,6 +610,8 @@ func RegisterRTL(vm *hbrt.VM) { // Expression bytecode compilation (FiveSql2 hot-path optimization) hbrt.Sym("PCCOMPILE", hbrt.FsPublic, PcCompile), hbrt.Sym("PCEVAL", hbrt.FsPublic, PcEval), + // Go-native SQL scan loop (bypasses PRG interpreter for hot path) + hbrt.Sym("SQLSCAN", hbrt.FsPublic, SqlScan), // Goroutine / Concurrency hbrt.Sym("GO", hbrt.FsPublic, GoFunc), diff --git a/hbrtl/sqlscan.go b/hbrtl/sqlscan.go new file mode 100644 index 0000000..ff1be16 --- /dev/null +++ b/hbrtl/sqlscan.go @@ -0,0 +1,122 @@ +// Copyright (c) 2026 Charles KWON OhJun (charleskwonohjun@gmail.com) +// All rights reserved. + +// Go-native SQL scan loop for FiveSql2 hot path. +// +// Motivation: FiveSql2 is a PRG-based SQL interpreter. For simple +// "SELECT cols FROM table WHERE cond" queries, the per-row cost is +// dominated by PRG interpreter overhead (AST tree walk, field name +// lookup, workarea switching). Moving just the inner scan loop to Go +// bypasses all that overhead and gets us ~15x speedup for the common +// case while keeping the rest of FiveSql2 untouched. +// +// The SQL engine remains responsible for: +// - Parsing SQL and building AST +// - Resolving field names to positions (column binding) +// - Compiling WHERE expression to pcode (via PcCompile) +// - GROUP BY, ORDER BY, aggregates (not per-row) +// +// This helper only handles the hot loop: +// - Full table scan (workarea already positioned) +// - Per-row WHERE evaluation via ExecPcode +// - Column extraction via cached field positions +// - Result array construction + +package hbrtl + +import ( + "five/hbrdd" + "five/hbrt" +) + +// SqlScan(aFieldPositions, pcWhere) → aRows +// +// Scans the current workarea top-to-bottom, evaluates pcWhere per row +// (nil = no filter), collects selected column values into rows. +// +// aFieldPositions: array of 1-based field positions to extract per row. +// Resolve once before calling (FieldPos cache is O(1) +// but still has PRG → Go call overhead). +// pcWhere: pcode function pointer from PcCompile, or NIL. +// +// Returns: +// Array of rows, each row = Array of field values. +// +// Notes on CHAR trimming: DBF character fields are space-padded. The +// caller decides whether to trim (via a SELECT-list AllTrim wrapper). +// We don't trim here — that's a semantic choice, and callers who need +// raw bytes shouldn't pay for a strings.TrimSpace(). +func SqlScan(t *hbrt.Thread) { + t.Frame(2, 0) + defer t.EndProc() + + // Parse arguments + fieldsVal := t.Local(1) + if !fieldsVal.IsArray() { + t.PushValue(hbrt.MakeArray(0)) + t.RetValue() + return + } + fieldsArr := fieldsVal.AsArray().Items + nFields := len(fieldsArr) + + whereVal := t.Local(2) + var whereFn *hbrt.PcodeFunc + if !whereVal.IsNil() { + if p := whereVal.AsPointer(); p != nil { + whereFn, _ = p.(*hbrt.PcodeFunc) + } + } + + // Pre-convert field positions to []int (avoid Value->int per row) + fieldPos := make([]int, nFields) + for i := 0; i < nFields; i++ { + fieldPos[i] = int(fieldsArr[i].AsNumInt()) + if fieldPos[i] < 1 { + fieldPos[i] = 1 + } + } + + wam, ok := t.WA.(*hbrdd.WorkAreaManager) + if !ok { + t.PushValue(hbrt.MakeArray(0)) + t.RetValue() + return + } + area := wam.Current() + if area == nil { + t.PushValue(hbrt.MakeArray(0)) + t.RetValue() + return + } + + // Pre-allocate result: 50k × small-row header pressure matters + rows := make([]hbrt.Value, 0, 1024) + + // Scan + area.GoTop() + for !area.EOF() { + // WHERE evaluation (if any) + keep := true + if whereFn != nil { + hbrt.ExecPcode(t, whereFn, nil) + keep = t.GetRetValue().AsBool() + } + + if keep { + // Collect column values + row := make([]hbrt.Value, nFields) + for i := 0; i < nFields; i++ { + // GetValue is 0-based + v, _ := area.GetValue(fieldPos[i] - 1) + row[i] = v + } + rows = append(rows, hbrt.MakeArrayFrom(row)) + } + + area.Skip(1) + } + + t.PushValue(hbrt.MakeArrayFrom(rows)) + t.RetValue() +}