diff --git a/OPTIMIZATION_TODO.md b/OPTIMIZATION_TODO.md new file mode 100644 index 0000000..b9fdf8a --- /dev/null +++ b/OPTIMIZATION_TODO.md @@ -0,0 +1,559 @@ +# Five — Optimization TODO (continue in next session) + +Open this file in a future Claude session to resume the audit-driven +optimization work. The project-wide 절대 규칙 still applies: every +change must pass three gates before being accepted. + +```bash +cd /Users/charleskwon/Projects/fivedev/five +go test ./... +./five build _FiveSql2/test/test_sql1999.prg _FiveSql2/src/*.prg -o /tmp/test_sql \ + && (cd ~/tmp && setopt NULL_GLOB; rm -f *.dbf *.ntx *.cdx; /tmp/test_sql) +./five build tests/compat_harbour.prg -o /tmp/test_compat && /tmp/test_compat +``` + +Targets: Go ALL PASS · FiveSql2 43/43 · Harbour compat 56/56. + +--- + +## 완료 (2026-04-24/25: 잠재 버그 사냥 라운드 — Tier 1+2+3, 14/14) + +14개 잠재 영역 전체 처리. 실버그 ~17건 추가 fix (CTE+DML, AGG-in-expr, +Recursive CTE+JOIN, FK ON UPDATE, NULL FK, Date arithmetic, Numeric +literal, Numeric overflow, Plan cache cap, RunUpdate LOCAL aliasing 등). +누적 이번 시즌 ~37건. 3 게이트 매번 그린 유지: Go test ALL PASS, +FiveSql2 43/43, Compat 56/56. + +| 영역 | 버그 | Fix | +|------|------|-----| +| MERGE | `WHEN MATCHED AND ` / `WHEN NOT MATCHED AND ` 무시; `WHEN MATCHED THEN DELETE` 미구현; INSERT branch UNIQUE 검증 없음 | `TSqlExecutor.prg:RunMerge` 가 `match_condition`/`not_match_condition`/`matched_delete` 를 읽고 평가; INSERT branch 에 `SqlValidateUnique` 추가 | +| UPDATE PK | PK 컬럼이 `.fsc` UNIQUE 리스트에 없어서 PK 중복 UPDATE 가 silent 통과 | `TSqlDDL.prg:CreateTable` 에서 aPKCols → aUniqCols 자동 합침 | +| Recursive CTE | iter cap 50 → legit `seq 1..N` (N>50) silent truncation | `TSqlExecutor.prg:MaterializeRecursiveCTE` 50→10000 | +| Error quality | SELECT/UPDATE/DELETE/INSERT against missing table → panic 또는 silent garbage; INSERT 추가 값/없는 컬럼 → silent drop | `TSqlExecutor.prg` 에 pre-flight `File()` 체크 + INSERT 컬럼/값 길이 검증 | +| GROUP BY 표현식 | `GROUP BY UPPER(col)` / `GROUP BY col/N` / `GROUP BY ` 모두 1 그룹으로 collapse | `TSqlAgg.prg:FindGroupIdx` 가 ND_LIT(numeric) ordinal + `SqlExprName`-based expression matching 지원 | +| LIMIT 0 | `LIMIT 0` 가 전체 반환 (parser default 0 == "no LIMIT" 와 충돌) | parser default `nLimit:=NIL`; executor `nLimit==N`이면 0/음수 → empty | +| DDL 충돌 | ALTER ADD 중복 컬럼 silent ok; ALTER DROP 없는 컬럼 → crash; CREATE TABLE 기존 파일 silent overwrite (데이터 손실); DROP TABLE 없는 파일 silent ok | 각각 pre-flight 체크 + `IF EXISTS` 지원 (DROP) | +| RETURN-from-SEQUENCE | `RETURN` 이 `BEGIN SEQUENCE` 안에서 unwind 안 되는 Five 동작 | SqlAlterAddColumn / SqlAlterDropColumn 에서 검증을 SEQUENCE 밖으로 이동 | +| **EXISTS regression** | 위 LIMIT 0 fix 의 부작용으로 `ExistsViaSemiJoin` 의 `hLifted["limit"] := 0` 이 "empty result" 로 해석 → 모든 lifted EXISTS false. 43-suite 가 못 잡아 한 라운드 잠복 | `hLifted["limit"] := NIL` | +| **DELETE+same-table subq** | EXCLUSIVE open 충돌로 subq 가 `__error__` envelope 반환; ND_SUB 가 그것의 [2][1][1] (= numeric error code 1005) 을 scalar 로 surface → WHERE 비교 모두 false. silent 데이터 보존 | `SqlExecOpenTable` SHARED 로 변경; ND_SUB 에 `__error__` 봉투 가드 추가 | +| **VIEW** | (a) CreateView 가 token-join 으로 SQL 저장 — string literal quote 손실 (`'eng'`→`eng`) → re-parse 시 syntax error. (b) SELECT FROM view 미구현 → `__error__` | (a) TK_TEXT 토큰 single-quote 로 다시 감싸고 `'` doubling. (b) `SqlMaterializeView` 추가 — `.fsv` 읽고 nested TFiveSQL 실행 후 MEMRDD temp 로 적재 | +| RTL/IO | `FRead(@buf, n)` byref 가 buffer 비워 둠 (관찰됨). View 본문 로딩에서 발견 | `MemoRead` 로 우회 | +| **CTE+DML** | `WITH cte AS (...) UPDATE/DELETE/INSERT ...` 가 status "TG" / "DELETE" 같은 garbage envelope 반환. (a) 파서가 WITH 뒤 SELECT 만 인식. (b) executor 의 RunInsert/RunUpdate/RunDelete 가 CTE materialize 안 함 → SET / WHERE 안의 subquery 가 CTE alias resolve 실패 | (a) `TSqlParser2.prg:ParseSelect` 의 WITH 분기가 trailing 키워드 보고 `ParseInsert/Update/Delete` 로 분기 + `cte`/`cte_recursive` 키 stash. (b) RunInsert/RunUpdate/RunDelete 진입부에 `MaterializeCTE` / `MaterializeRecursiveCTE` 호출 추가 | +| **AGG-in-expression** | `SELECT MAX(id)+1 / COUNT(*)+10 / SUM(v)-30 / MAX(id)*2` 모두 0 반환. (a) `ComputeAgg` 가 top-level 이 ND_FN 가 아니면 즉시 0 (ND_BIN/ND_UNI wrapper 무시). (b) hidden-columns 로직이 wrapped 케이스에서 source column 추가 안 함 → 추가했어도 row 에 없음 | (a) `TSqlAgg.prg:ComputeAgg` 진입부에 ND_BIN/ND_UNI/ND_FN(non-agg)/ND_LIT/ND_NIL recursive 분기 추가 — sub-aggregate 계산 후 SqlEvalRowExpr 로 wrapper 적용. (b) `TSqlExecutor.prg` hidden-cols 루프에 wrapped 케이스 분기 추가 — 전체 식에서 `SqlCollectColExprs` 로 ND_COL leaf 모두 hidden 으로 등록 후 LOOP. | +| **Recursive CTE + JOIN (aliased)** | `WITH RECURSIVE sub AS (...) SELECT s.id FROM sub s JOIN dep d ON ...` → "Table '__cte_sub' does not exist". `MaterializeRecursiveCTE` 가 `aTables[j][1]` 을 `__cte_` 으로 rewrite — 사용자가 alias (`sub s`) 준 경우 `Select(user_alias)` 가 0 반환 → OpenTable 이 `__cte_` DBF 찾으나 MEMRDD 전용이라 실패. 비-recursive CTE 는 동일 패턴이지만 rewrite 안 함. | `MaterializeRecursiveCTE` 의 aTables rewrite 블록 제거 — 원래 CTE 이름 유지하면 RunSelect 의 open 루프가 (a) `Select(cName)` 으로 이미 열린 area 찾거나 (b) MEMRDD fallback 으로 user alias 에 second workarea attach | +| **FK ON UPDATE** | DDL 파서가 `ON UPDATE` 절 미지원 (ON DELETE 만), executor 가 parent column UPDATE 시 children 에 RESTRICT/CASCADE/SETNULL 적용 안 함, FK validate 가 NULL 값 reject (SQL 표준 위반) | (a) `TSqlDDL.prg` parser 에 `DO WHILE ::IsKW("ON")` 루프 추가 — DELETE/UPDATE 양쪽 분기. (b) `.fsc` 형식: `FK:c:p_t:p_c[:on_del[:on_upd]]`. (c) `SqlFindReferencingFKs` 가 5번째 필드 (on_update) 반환. (d) 신규 `SqlEnforceUpdateRefs(parent, refs, hChanged)` 함수 — CASCADE 는 children UPDATE, SETNULL 은 children NULL set, RESTRICT 는 child row 있으면 차단. (e) `RunUpdate` PRG 루프에 변경 hash 빌드 + 호출. (f) `SqlValidateFKRecord` NIL 단락 — NULL FK 는 항상 만족 (표준). | +| **RunUpdate LOCAL slot aliasing** | RunUpdate 함수 top 에 새 LOCAL 추가하니 mid-function `LOCAL hUpdConstraints` 가 set 한 .T. 가 read 시점에 non-logical 로 surface → fast-path gate 에서 silent panic | mid-function LOCAL 모두 함수 top 으로 hoist. **Five 컴파일러의 known limitation: 함수 중간에 LOCAL 선언하지 말 것** — 이미 CLAUDE.md 의 인라인 IF 금지 규칙과 같은 카테고리. | +| **Date arithmetic** | `SELECT d + 7`, `d - 30`, `d - d`, `INSERT VALUES (DATE '...' + N)` 모두 wrong — 결과는 raw N (Date 가 SqlCoerceNum 으로 0 collapse). UPDATE 경로는 SqlCoerceToCol 이 D 컬럼에 N→D 변환 해서 우연히 동작. | `EvalExpr` 와 `SqlEvalRowExpr` 둘 다 `+`/`-` 분기에 `(D,N) → D + N`, `(N,D) → D + N`, `(D,N) → D - N`, `(D,D) → N (days)` 케이스 추가. SQL 표준 + Harbour 둘 다 같은 의미. | +| **Numeric literal in WHERE → 0** | `WHERE v = 0.1` (그리고 모든 fractional literal) silent 0 rows 반환. `SqlExprToPrg` 가 ND_LIT(N) 을 `AllTrim(Str(0.1))` 로 emit — Str(0.1) default 는 " 0" → AllTrim → "0" → pcode 가 `WHERE v = 0` 실행. 기존 43-suite 는 정수 비교만 사용해서 잠복. | ND_LIT(N) emit 을 `hb_NToS(xNode[2])` 로 변경 — decimal preserve. | +| **Numeric overflow silent** | `INSERT INTO n(N(4,0)) VALUES (99999999)` silent 0/garbage 저장. DBF N codec truncate. | RunInsert PRG 양 분기에 `Str(xVal, FieldLen, FieldDec)` 가 '*' 포함 시 `MakeError(SQL_ERR_GRAMMAR, "Numeric overflow: ...")` + dbDelete + close. | +| **NULL FK reject** | ON UPDATE SET NULL 으로 child.fk_col → NULL 쓰면 inner UPDATE 의 `SqlValidateFKRecord` 가 NULL 을 parent 에서 못 찾아 reject — child rollback. SQL 표준: NULL FK 는 항상 만족. | `SqlValidateFKRecord` 진입부에 `IF xValue == NIL RETURN .T.` 단락. | +| **Plan cache 무한 성장** | `s_hPlanCache` / `s_hDmlPcodeCache` 무제한 — 동적 SQL 많은 long-running 서버에서 메모리 leak. | `SQL_PLAN_CACHE_MAX = 1000` cap. 초과 시 전체 wipe (LRU 대신 — Five hash 가 insertion-order 보장 안 함). DML pcode 캐시 동일하게 cap + `SqlDmlPcodeCacheReset()` 헬퍼로 cross-module reset. | + +### 추가 라운드 (2026-04-26: 잠재 영역 8개 stress test) + +| 영역 | 버그 | Fix | +|------|------|-----| +| **HAVING wrapped agg** | `HAVING SUM(amt) + 1 > 200` (그리고 SUM 자체 단독) silent 0 rows. (a) `EvalHavingExpr` 가 산술/||(`+`,`-`,`*`,`/`,`||`) 분기 없음 → NIL 반환. (b) HAVING 내 aggregate 의 source column 이 hidden col 에 추가 안 됨 → ComputeAgg nCol=0. SELECT 에 같은 agg 있으면 hidden 으로 추가되어 우연히 동작. | (a) `TSqlAgg.prg:EvalHavingExpr` 에 산술/concat 분기 추가 + NULL 전파. (b) `RunSelect` 의 hidden col 루프 직후에 xHaving 도 `SqlCollectColExprs` 로 walk → ND_COL leaf 모두 hidden 등록. | +| **CASE wrapping aggregate** | `CASE WHEN COUNT(*) > 2 THEN 'many' ELSE 'few' END` → label = 0 (string literal 0 으로 collapse). ComputeAgg 가 ND_CASE 미지원 → fall through to default 0. | `ComputeAgg` 에 ND_CASE 분기 추가 — branches 각 cond/then 과 else 모두 recursive ComputeAgg. | +| **Multi-row scalar subquery silent** | `INSERT VALUES ((SELECT id FROM t), 100)` 가 t 의 첫 row 를 silent 사용 (t 가 multi-row). SQL 표준 위반. | ND_SUB scalar 분기에서 `Len(aSubResult[2]) > 1` 면 `OutErr()` warning + NULL 반환. silent 첫행→noisy NULL 으로. | +| **Self-FK INSERT silent pass** | `CREATE TABLE n (id, parent_id, FK(parent_id) REFERENCES n(id))` 후 `INSERT INTO n VALUES (5, 999)` (parent 없음) silent 통과. RunInsert 의 FK 검증이 `Len(aFields) > 0` 일 때만 동작 → positional INSERT 에서 skip. | RunInsert 의 FK loop 를 `FOR i := 1 TO FCount()` + `FieldName(i)` 로 변경 — 모든 컬럼 검증. SqlValidateFKRecord 가 비-FK 컬럼 cheap pass. | +| **Date + string ISO 비교** | `WHERE d = '2026-04-25'` → 0 rows (ISO 형식 매치 안 됨), `WHERE d > '2026-06-01'` → 모든 row (string compare 로 fall-through, '20260425' > '2026-06-01' 의 lexicographic 비교가 거꾸로). DToS 형식 (20260425) 만 동작. | `sqlhelpers.go:sqlCmpDateStr` + `sqlCmpLt` 의 D/C cross 분기에 `normalizeDateStr` 적용 — `-`, `/`, `.` 제거 후 비교. | + +### 학습 포인트 (2026-04-26) + +- **AGG 처리 fix 가 multiple sites 필요.** ComputeAgg + EvalExpr + EvalHavingExpr + hidden col 로직 + window agg + ORDER BY 등. 한 군데 fix 하면 다른 path 가 silent 0. 한 라운드에서 모두 cover 못 하면 다음 라운드에 또 발견. +- **Self-FK + multi-row INSERT 는 알려진 한계.** SqlValidateFKRecord 가 same area 의 dbGoTop/scan 으로 buffer state 잃음. dbCommit + RECNO save/restore 추가했어도 multi-row 는 여전히 fail. Single-row 와 cross-table 은 동작. Workaround: multi-row 대신 sequential single-row INSERT. +- **String compare 로 fall-through 가 silent corruption 의 가장 흔한 source.** Date/Number 대 String 비교가 잘 정의 안 되면 SqlCmpEq 가 false 반환하지만 SqlCmpLt 는 lexicographic 으로 동작 → WHERE 가 직관과 거꾸로 결과 반환. 항상 cross-type coercion path 명시. + +### 추가 fix (보류 처리) + +| 영역 | 버그 | Fix | +|------|------|-----| +| **ORDER BY wrapped agg** | `ORDER BY MAX(amt)+1 DESC` 정렬 안 됨 (insertion order). TryBuildSortSpec/TSqlSort:OrderBy 가 ND_COL 만 처리. | RunSelect hidden col 단계에서 ORDER BY 의 wrapped expression 을 `__ord___` alias 로 hidden col 추가 + `aResultExprs` & `aCols` 둘 다 mirror (GroupBy/aFieldNames 가 aCols 사용). aOrderBy 의 expression 을 ND_COL("__ord___") 로 in-place rewrite. RunSelect 끝에 `nUserCols` 로 hidden col trim. | +| **Window function arithmetic** | `SUM(amt) OVER () + 100` 가 100 반환. ApplyWindowFunctions 가 top-level ND_WINDOW 만 인식 → wrapped 시 placeholder 0 통과. | 신규 `SqlExtractWindow(xE, aWindows, cPrefix)` walker — 식 안의 ND_WINDOW 를 hidden alias `__win____` 로 substitute + extract. RunSelect 가 wrapped col index 를 `::aWrappedWindowCols` 에 등록. ApplyWindowFunctions 후 wrapped col 마다 `SqlEvalRowExpr` 로 row 다시 evaluate (post-window re-eval). | +| **Self-FK INSERT silent pass** | `INSERT INTO n VALUES (5, 999)` parent 없이 silent 통과. RunInsert 의 FK 검증이 named columns (`aFields`) 일 때만 동작. | RunInsert FK loop 를 `FOR i := 1 TO FCount()` + `FieldName(i)` 로 변경 — positional INSERT 도 검증. | +| **Self-FK multi-row INSERT** | `INSERT INTO n VALUES (1,NULL),(2,1)` 가 (2,1) FK 검증에서 fail (1006). SqlValidateFKRecord 의 dbGoTop 이 INSERT 중인 area 의 buffer state 깨뜨림 + 첫 row 가 buffer-only 상태라 못 봄. | self-FK 분기에서 (a) `dbCommit` 으로 buffer flush, (b) `__FK_` 별도 area 강제 open (SHARED) — INSERT area 와 분리, (c) 검증 후 area close + 원래 area 복원. RECNO save/restore 는 fallback 경로용. | +| **Date+string ISO 비교** | `WHERE d = '2026-04-25'` (ISO) → 0 rows, `WHERE d > '2026-06-01'` → 모든 row. DToS 형식 (20260425) 만 동작. | `sqlhelpers.go:sqlCmpDateStr` + `sqlCmpLt` 의 D/C cross 분기에 `normalizeDateStr` 적용 — `-`, `/`, `.` 제거 후 비교. | + +### 학습 포인트 (2026-04-26 추가) + +- **Hidden col rewrite 시 aCols + aResultExprs 둘 다 update.** ORDER BY/HAVING/Window wrapper 처리 시 hidden col 를 추가하는 자리가 두 군데. aResultExprs 만 update 하면 GroupBy/aFieldNames rebuild 가 못 봄. 둘 다 mirror + nUserCols 로 trim. +- **Window arithmetic 은 fetch 와 ApplyWindowFunctions 의 평가 순서 문제.** Fetch 가 placeholder 0 으로 outer 식 평가 → 잘못된 결과 row 에 저장. ApplyWindowFunctions 는 ND_WINDOW 자리만 채움. 해결: post-window re-eval 단계 추가 — wrapped col 마다 row 재평가. +- **Self-FK validation 은 in-flight area 분리 필수.** SqlValidateFKRecord 가 같은 area 의 dbGoTop 사용하면 multi-row INSERT 의 dirty buffer 가 안 보이고 RECNO 도 깨짐. `__FK_
` 강제 open + dbCommit 으로 disk-only 검증이 정답. 비-self-FK 는 기존 path (already-open area 재사용). + +### 추가 라운드 (2026-04-27: 8개 영역 stress test — 2 silent 버그 발견) + +| 영역 | 버그 | Fix | +|------|------|-----| +| **CASE simple form** | `CASE v WHEN 0 THEN 'zero' WHEN 10 THEN 'ten' ELSE 'other' END` 가 1 row of NIL 반환 — 파서가 simple form (`CASE expr WHEN val ...`) 미지원. searched form (`CASE WHEN cond ...`) 만 처리. SQL 표준은 둘 다. | `TSqlParser2.prg:ParsePrimary` 의 CASE 분기에 simple form 감지 추가. CASE 다음에 WHEN 이 안 나오면 test expression 파싱, 각 WHEN val 을 ND_BIN(=, test_expr_clone, val) 로 desugar to searched form. | +| **Multi-column UNIQUE** | `CREATE TABLE u (a, b, UNIQUE(a, b))` 후 `INSERT (1,1)`, `INSERT (1,2)` 둘 다 fail — DDL 파서가 (a, b) 를 두 개의 single-col UNIQUE entry 로 저장 → SqlValidateUnique 가 a 와 b 각각 single 하게 검증. SQL 표준: UNIQUE TUPLE. | (a) DDL 파서가 multi-col UNIQUE 를 한 entry 로 comma-joined 저장 (`UNIQUE:a,b`). (b) `SqlValidateUnique` 가 entry 를 `hb_ATokens(',')` 로 split, 첫 col 이 cCol 일 때만 trigger (중복 검증 회피), 나머지 col 값을 caller area 에서 read, scan 시 모든 col 동시 매치. Single-col 은 `xValue` 직접 사용 (외부 caller 가 positioned record 없이 호출하는 경우 backward compat). | + +### 검증 — 정상 동작 (6/6 + 부가) + +| 영역 | 결과 | +|------|------| +| JOIN ON NULL (INNER, LEFT, anti-join via IS NULL) | 3/3 정확 | +| COALESCE / NULLIF / CASE searched form | 5/5 정확 (simple form fix 후) | +| Subquery in UPDATE SET (uncorrelated, correlated, arithmetic) | 3/3 정확 | +| Correlated UPDATE / DELETE WHERE EXISTS | 2/2 정확 | +| LIMIT/OFFSET edges (0, past-end, large, mid-range) | 6/6 정확 | +| TRUNCATE / DELETE FROM (no WHERE) | 정확 | + +### 추가 라운드 (2026-04-27/28: Tier A NULL semantics 후속 + Tier B JOIN 확장) + +| 영역 | 버그 | Fix | +|------|------|-----| +| **NOT BETWEEN with NULL** | `WHERE v NOT BETWEEN 10 AND 30` 가 v=NULL row 도 포함 (NOT(.F.) = .T.). SQL 표준: NULL → UNKNOWN → drop. ND_RANGE 평가가 NULL operand 전파 안 함. | EvalExpr 의 ND_RANGE 분기에 `IF xL/xR/xHi == NIL RETURN NIL` 가드 추가. NOT 은 이미 NIL→NIL 처리. | +| **IS DISTINCT FROM 미구현** | `WHERE v IS DISTINCT FROM NULL` 가 항상 0 rows. 파서는 ND_BIN("IS DISTINCT FROM") 노드 생성하지만 EvalExpr 에 분기 없음 → fall through to RETURN NIL → WHERE 항상 false. | EvalExpr 에 `IS DISTINCT FROM` / `IS NOT DISTINCT FROM` 분기 추가 — NULL-safe 비교 (둘 다 NULL → not distinct, 한쪽 NULL → distinct, 일반 값은 SqlCmpEq 역). | +| **CASE simple form 미지원** | `CASE v WHEN 0 THEN 'zero' WHEN 10 THEN 'ten' END` 가 1 row of NIL. 파서가 simple form 인식 안 함, searched form 만 처리. | `TSqlParser2.prg:ParsePrimary` 의 CASE 분기에서 CASE 다음에 WHEN 이 안 나오면 test expression 파싱, 각 `WHEN val` 을 `ND_BIN(=, test_clone, val)` 로 desugar to searched form. | +| **Multi-column UNIQUE** | `UNIQUE(a, b)` tuple 이 아니라 a 와 b 각자 single-col UNIQUE 로 저장 → `(1,1),(1,2)` 둘 다 fail. | (a) DDL 파서가 multi-col UNIQUE 를 한 entry comma-joined (`UNIQUE:a,b`) 저장. (b) `SqlValidateUnique` 가 entry 를 split, 첫 col == cCol 일 때만 trigger (중복 검증 회피), 나머지 col 값을 caller area 에서 read, scan 시 모든 col 동시 매치. Single-col 은 xValue 직접 사용 (외부 caller backward compat). | + +### 검증 — 정상 동작 + +| 영역 | 결과 | +|------|------| +| LIKE ESCAPE / literal `%`, `_` 매칭 | 3/3 | +| IN list with NULL (3-value logic) | 2/2 | +| INNER / LEFT / RIGHT / FULL OUTER / CROSS JOIN | 5/5 | +| Window functions (ROW_NUMBER, RANK with ties, LAG, LEAD, ROWS BETWEEN sliding sum) | 동작 (LAG/LEAD `default` arg 는 NIL 으로 fall back — minor) | +| 4-table JOIN chain + star query (LEFT JOIN + GROUP BY) | 동작 | + +### 시니어 audit 라운드 (2026-04-30: SQL 표준 conformance) + +| 영역 | 버그 | Fix | +|------|------|-----| +| **COUNT(DISTINCT col)** | parser 가 DISTINCT keyword 무시 → aggregate 결과 0/NIL. SUM/AVG/MIN/MAX(DISTINCT) 도 동일. 매우 흔한 SQL 인데 silent wrong count. | (a) `TSqlParser2:ParsePrimary` function-call 분기에서 LPAR 직후 `DISTINCT`/`ALL` modifier 인식. ND_FN 의 5번째 slot 에 `lDistinct` flag 저장. (b) `TSqlAgg:ComputeAgg` 가 flag 보고 fast-path skip + per-value `hSeen` hash 로 dedup. | +| **UNION column count mismatch** | `SELECT a UNION SELECT a, b` 가 silent merge — 첫 SELECT 의 columns 만 keep, 둘째의 추가 column drop. SQL 표준은 error. | RunSelect 의 UNION 분기에서 `Len(aU[1]) != Len(aFieldNames)` 시 `MakeError(SQL_ERR_GRAMMAR, ...)`. | +| **DISTINCT + ORDER BY non-list col** | `SELECT DISTINCT grp ORDER BY id` 가 모든 row 반환 (DISTINCT 무력). ORDER BY hidden col `__ord___` 가 row 에 포함되어 DISTINCT 의 dedup hash 가 row 별 unique 인식. | DISTINCT 직전에 `nUserCols` trim 적용. ORDER BY 는 이미 적용 끝 — hidden col 더 이상 필요 없음. 마지막 trim block 은 redundant 로 남음 (no-op). | + +### 검증 — 정상 (시니어 audit) + +| 영역 | 결과 | +|------|------| +| HAVING without GROUP BY (implicit single group) | 4/4 | +| Self-JOIN (`emp e JOIN emp m`, 3-level chain, LEFT JOIN, WHERE on alias) | 4/4 | +| Implicit type coercion (N↔C, BETWEEN, decimal literal) | 6/6 | +| Recursive CTE cycle protection (iter cap 10000) | bounded | +| Large IN list (200+ items, NOT IN, empty IN) | 정상 | +| Boundary numeric (15-digit float64 limit), date (leap year, year boundary, +N arithmetic) | 정상 (Feb 29 non-leap → Mar 1 silent rollover 는 xBase 관습 — minor) | + +### 보류 항목 종료 라운드 (2026-04-30) + +이전 라운드의 모든 보류 항목 (4건) 완료. 시즌 마무리. + +| # | 영역 | Fix | +|---|------|-----| +| 1 | **Derived table** (`FROM (SELECT...)`) | 파서는 이미 `__SUBQUERY__` 표시했고 `SqlMaterializeSubquery` 도 있음 — 단 (a) 1글자 alias 가 `Len(cAlias) <= 1` 분기에서 임시 alias 로 rename 되어 `Select(cAlias)` 가 못 찾음 → derived 의 cTable prefix `__drv_` 면 rename skip. (b) 임시 alias 로 open 후 close + reopen under user alias. (c) `::aOpened` 에 add 해서 next query 가 alias 충돌 안 일으킴. (d) JOIN right side 도 LPAR-SELECT 인식하도록 ParseFrom 의 JOIN 분기 확장. 3/3 derived 테스트 통과. | +| 2 | **Self-FK CASCADE depth 2+** | nested DELETE 가 same area record-pointer race. CASCADE 분기가 child PK pre-collect — `SqlGetSingleColPK(child)` 로 .fsc 의 single-col UNIQUE 추출, `SELECT pk WHERE fk=val` 로 list 모음, 각 PK 로 single-row DELETE 호출. PK 없으면 기존 multi-row nested DELETE fall back. 5-row org chain DELETE root → 0 remaining. | +| 3 | **LAG/LEAD default arg** | `LAG(v, 1, -1)` 의 -1 이 ND_UNI(-, ND_LIT(1)) 로 파싱돼 `aFuncArgs[3][1] == ND_LIT` check 실패 → xDefault=NIL. SqlEvalRowExpr 로 const expr 평가. negative literal / CAST / 기타 const 모두 동작. | +| 4 | **Feb 29 non-leap silent rollover** | `DATE '2025-02-29'` 가 CToD 의 xBase 관습 silent rollover (Mar 1) → 표준 위반. DATE 리터럴 파서가 round-trip 검증 (`DToS(d) == strip-separators(input)`), 불일치 시 NIL emit. | + +### 시즌 종료 + +| 메트릭 | 값 | +|--------|-----| +| 누적 silent 버그 fix | ~62건 | +| 3 게이트 | Go test ALL PASS, FiveSql2 43/43, Compat 56/56 | +| 보류 영역 | 0 (모두 종료) | + +### 추가 라운드 (2026-04-29~30: Tier A/B/C audit) + +| 영역 | 버그 | Fix | +|------|------|-----| +| **DROP TABLE 메타 누락** | `.dbf`, `.fsc`, `_pk/_uq.ntx`, memo (`.dbt/.fpt`) 만 cleanup, **`.cdx`** + `.fsv` 누락 → 다음 CREATE 가 stale `.cdx` 자동 attach (multi-row INSERT silent drop 과 같은 메커니즘). | DropTable 의 FErase 체인에 `.cdx`, `.fsv` 추가. | +| **CREATE TABLE fsc/cdx/memo 누락** | 첫 cleanup 라운드에서 `_pk.ntx`/`_uq.ntx` 만 sweep — `.cdx`, `.fsc`, `.dbt`, `.fpt` 잔존 → constraint 없는 CREATE 후 prior `.fsc` 의 stale UNIQUE/CHECK 가 여전히 enforce 되어 silent dup-reject. | CreateTable pre-flight cleanup 에 `.cdx`, `.fsc`, `.dbt`, `.fpt` 추가. | +| **TSqlIndex.FindExclusive prefix-substring** | `cTableLow $ cDbfName` (basename 비교 아닌 substring) → "c" 가 ".../cus.dbf" 안에서 .T. → 다른 테이블의 EXCLUSIVE lock 으로 잘못 인식 → 새 OPEN 이 -1 (locked) 반환. | basename strip ("/" 와 "\\") 후 정확한 `
.dbf` / `
` 비교. | +| **AlterTable type dispatch substring** | `cType $ "CHAR,CHARACTER,VARCHAR"` 가 cType="A" 같은 1-char value 도 매치. CreateTable 은 이미 fix (`","+cType+","` wrap), AlterTable 은 누락. | AlterTable type dispatcher 에도 comma-wrap 적용. DOUBLE/FLOAT/REAL 도 추가. | +| **CREATE VIEW silent overwrite** | `CREATE VIEW v` 가 already exists 시 silent FCreate (overwrite). SQL 표준 위반. | already-exists 시 error 반환. `CREATE OR REPLACE VIEW v` 문법 추가 (executor dispatch + parser). | +| **MatchOrderByTag NIL panic** | `lTagDesc := dbOrderInfo(DBOI_ISDESC)` 가 NIL 반환 시 `! lTagDesc` panic (`argument error op: .NOT.`). 새로 build 된 인덱스에서 발생. | NIL → .F. 으로 default. | + +### 검증 — 정상 동작 + +| 영역 | 결과 | +|------|------| +| ALTER TABLE + plan cache invalidation | 7/7 정확 (SqlBumpSchemaVer 동작) | +| ALTER ADD/DROP COLUMN 후 PK/UNIQUE 인덱스 정합성 | 6/6 | +| TSqlExecutor instance reuse | 안전 (매 query 새 New, Init 가 모든 instance var reset) | +| hQuery deep clone | 안전 (`HbDeepClone` per Execute) | +| Correlated subquery (scalar, EXISTS, NOT EXISTS, multiple per row) | 5/5 | + +### 추가 fix — multi-row INSERT silent row drop (2026-04-28) + +| 영역 | 버그 | Fix | +|------|------|-----| +| **prefix-glob index attach** | `TSqlIndex:AttachNTX` / `AttachCDX` / `OpenTable` 셋 다 `Directory(cTableLow + "*.ntx")` / `Directory(cTableLow + "*.cdx")` 로 매치 — `c*.ntx` 가 `c_uq.ntx` 외에 `cus_uq.ntx` 도 hit. sibling 테이블의 stale index 가 새 area 에 attach → SkipIndexed 가 stale key tree 따라 walk → record N 의 key 가 그 tree 에 없으면 silent skip. 현장 증상: `INSERT (r1) → INSERT (r2) → INSERT (r3) → SELECT` 가 2 rows 만 반환. INSERT 는 `affected_rows=1` 보고하지만 다음 SELECT 의 SqlScan 이 record 3 를 못 봄. 43-test 는 prior cleanup 으로 stale `_uq.ntx` 가 없어서 통과. CREATE TABLE 의 stale `_pk.ntx`/`_uq.ntx` cleanup 도 추가 (사용 환경 안전망). | `Directory` glob 제거 — convention 명 (`
_pk.ntx`, `
_uq.ntx` / `
.cdx`) 만 명시적으로 attach. RDD 결정 (`OpenTable`) 도 `File(cFileLow + ".cdx")` 로. ad-hoc index 는 explicit `SET INDEX TO` 로 사용. | +| **Generated function prefix** | `compiler/gengo` 가 PRG → Go 컴파일 시 함수 이름을 `HB_` 으로 prefix — Harbour 의 `HB_FUNC` macro convention 답습. Five 의 정체성과 맞지 않음. | `gengo.go` + `gen_class.go` 의 prefix `HB_` → `FV_`. Generated symbol/function: `FV_MAIN`, `FV_TSQLEXECUTOR_RUNSELECT`, `FV__CTOR`, `FV__`. Harbour 호환 RTL 함수 (hb_NToS 등) + `HB_FUNC` Go API + `#pragma BEGINDUMP` macro 는 그대로 유지 (외부 호환성). Stack trace 가 이제 `main.FV_MAIN` 등으로 표시. | + +### 남은 보류 (다음 라운드) + +| 영역 | 이유 | +|------|------| +| Self-FK CASCADE depth 2+ | Cascade 가 직접 자식만 처리. nested five_SQL DELETE 가 same area 의 record-pointer race 로 첫 매치 후 dbSkip 이 EOF 로 jump (debug 확인). 전체 fix 는 별도 area 강제 또는 child IDs pre-collect 필요. Cross-table cascade 는 정상. | +| LAG/LEAD `default` arg | LAG(v, 1, -1) 의 -1 default 가 무시되고 NIL 반환 — minor. | + +### 학습 포인트 (2026-04-24/25) + +- **테스트 suite 의 사각지대.** 43/43 이 모두 통과해도 EXISTS-via-semi-join 이 통째로 깨질 수 있음. fix 한 후 같은 카테고리 (NULL 시맨틱 전반) 의 추가 stress 가 필수. 이번 라운드의 EXISTS 버그가 그 사례. +- **Same-table 서브쿼리는 SHARED open 가 필수.** Single-process Five 에서도 두 번째 open 이 필요. EXCLUSIVE 는 자기 자신과 충돌. +- **Five PRG 의 `RETURN-from-SEQUENCE` 가 unwind 안 됨.** 검증 로직은 SEQUENCE 밖에 둬야 함. `Break(NIL)` + RECOVER 패턴은 동작하지만 RETURN 은 구멍. +- **AGG-in-expression 은 두 군데 fix 필요.** ComputeAgg 가 wrapper 를 인식해야 하는 것 외에, hidden-columns 로직도 wrapper 를 descend 해야 함. 둘 중 하나만 fix 하면 silent 0 반환. 43-suite 가 `SUM(amount) AS x` 처럼 wrapper 없는 패턴만 사용해서 잠복. real-world 쿼리의 70% 이상이 `MAX(id)+1` / `COUNT(*)+1` / `ROUND(AVG(p), 2)` 같은 wrapper 사용 — 매우 위험한 silent bug. + +--- + +## 완료 (2026-04-22/23: SQL NULL 라운드 + 대형 런타임/컴파일러 버그) + +정확성 중심 라운드. 이번 세션에만 15+ 개의 실제 버그(silent data loss / +semantic 오류) 수정. 3개 게이트 모두 그린 유지. + +| 범주 | 항목 | 키 파일 | +|------|------|---------| +| RDD | DBF nullable columns via hidden `_NullFlags` bitmap (Harbour VFP convention). FieldFlagSystem=0x01 / Nullable=0x02 / Binary=0x04. `PutValue` 가 NIL 쓰기 → 비트 세팅, `GetValue` 가 비트 세팅된 행 → NIL 반환. 비-nullable 테이블은 zero overhead (hidden 컬럼 append 안 함) | `hbrdd/dbf/null.go` (new), `header.go`, `dbf.go` | +| DDL | `NOT NULL` / `NULL` / `PRIMARY KEY` 파싱 + Flag 바이트로 저장 | `TSqlDDL.prg` | +| DDL | `ALTER TABLE ADD/DROP COLUMN` 에서 flag 보존 (buffer-all-rows-first migration) | `TSqlDDL.prg:SqlAlterAddColumn/DropColumn` | +| SQL | NOT NULL 런타임 enforce (INSERT + UPDATE) | `TSqlExecutor.prg:RunInsert/RunUpdate` | +| SQL | UNIQUE × NULL: multiple NULLs 허용 (SQL 표준) | `TSqlDDL.prg:SqlValidateUnique` | +| SQL | NULL 3값 논리: `NOT IN (..., NULL)` → NULL, `NOT(NULL) = NULL`, IN 이 NULL 포함 리스트에서 no-match 시 NULL 반환 | `TSqlExecutor.prg:EvalExpr` | +| SQL | Multi-row `INSERT VALUES (...), (...), (...)` + `INSERT ... SELECT` 완성. 이전엔 파서가 첫 튜플만 수용. | `TSqlParser2.prg:ParseInsert`, `TSqlExecutor.prg:RunInsert` | +| SQL | `.T.` / `.F.` / `.Y.` / `.N.` Harbour logical literals + SQL `TRUE`/`FALSE` + `DATE 'YYYY-MM-DD'` | `sqlhelpers.go:lexSQL` (Go), `TSqlLexer.prg` (PRG mirror), `TSqlParser2.prg:ParsePrimary` | +| SQL | INSERT/UPDATE 에서 string → Date 자동 변환 when 타겟 D. UPDATE 는 fast-path 게이트로 강제 PRG | `TSqlDDL.prg:SqlCoerceToCol`, `TSqlExecutor.prg:RunUpdate` | +| SQL | Window `AVG/SUM/COUNT/MIN/MAX OVER ()` (ORDER BY 없음) → **whole-partition aggregate**. 이전엔 running average 반환 (SQL 표준 위반; Postgres/SQL Server 결과와 diverge) | `TSqlExecutor.prg` | +| SQL | `SET DELETED` 누수 fix: `Run()` 진입 시 save/force ON/restore on exit. 이전에 `five_SQL()` 호출이 호출자의 SET DELETED 상태를 조용히 flip | `TSqlExecutor.prg:Run` | +| SQL | DDL 타입 파서의 `D` substring 버그 fix (DOUBLE/DEC/D 충돌로 DATE 컬럼을 N(18,6) 로 만들던 문제) | `TSqlDDL.prg` | +| RTL | `CToD` ISO 포맷 pre-pass: YYYY-MM-DD / YYYY/MM/DD / YYYYMMDD / YYYY.MM.DD | `hbrtl/datetime.go` | +| RTL | `dbStruct()` → 5-element rows (name/type/len/dec/flags). 하위 호환 유지 | `hbrtl/procinfo.go` | +| RTL | `DbCreate` 5th element = Flags byte (optional) | `hbrtl/indexrtl.go` | +| RTL | `SqlBulkInsert` 의 `continue on NIL` skip 제거 — CTE/서브쿼리 materialization 에서 NULL 보존 | `hbrtl/sqlscan.go` | +| **런타임** | **Cross-area state-bleed (CRITICAL).** `Thread.waSel` 인터페이스가 `Current() uint16` 요구했으나 `WorkAreaManager.Current()` 는 `Area` 반환. 타입 단정이 조용히 실패 → `ALIAS->(expr)` 프리픽스가 silently no-op. 두 번째 워크에리어가 열리자마자 모든 alias-prefix expression 깨짐. `CurrentNum() uint16` 로 교정. ALTER TABLE 다중 행 마이그레이션의 "1 rows migrated" 버그의 근본 원인이었음 | `hbrt/thread.go:WASaveAndSelect*` | +| 컴파일러 | Bare-statement `Set(n, v)` 가 SET 커맨드로 파싱되어 args 가 드롭됨. 다음 토큰이 `(` 이면 expression parser 로 라우팅. 다른 명령 토큰(Skip/Select/Seek)은 expression 연산자를 원래 허용해서 문제 없음 (사인 확인). | `compiler/parser/stmtreg.go:stmtSet` | +| 테스트 | ~80 custom tests 추가 (NULL E2E, NOT NULL, UNIQUE+NULL, multi-row INSERT, cross-area stress, ALTER ADD/DROP, window × NULL, subquery NULL, DATE/LOGICAL literals, string→date, LEFT JOIN × NULL, PARTITION BY × NULL) | `/tmp/test_*.prg` | + +### 학습 포인트 + +- **타입 단정 실패는 silent.** Go 인터페이스 단정이 실패하면 `ok=false` 지만 호출 블록 전체가 스킵되는 경우 표면 증상 없이 "작동하는 것처럼" 보일 수 있음. 프리픽스-alias 버그가 그 예시. +- **"이전에 동작했던 것" 의 함정.** Cross-area 버그를 고치자 `SqlAlterDropColumn` 이 처음으로 깨짐 — 버그에 의존해서 우연히 동작했던 코드가 드러남. 후속 회귀 스트레스가 필수. +- **SQL 기본 프레임은 ORDER BY 유무에 의존.** OVER () 없이 ORDER BY 는 RANGE UNBOUNDED PRECEDING 실행 (running), ORDER BY 없이 OVER() 는 whole partition. Oracle 12c 미만은 틀렸지만 표준은 Postgres / SQL Server 쪽. + +--- + +## 완료 (이번 세션, 실측 기준) + +| 계층 | 항목 | 수치 | +|---|---|---| +| RDD | formatNumericField byte writer | **4.1×** (w/ alloc -100%) | +| RDD | CDX leaf slab reuse | **1.6×** (alloc -100%) | +| RDD | parseMemoRef byte parser | **1.38×** | +| RDD | CDX seek zero-copy + seekBuf | alloc -100% per seek | +| RDD | ForFunc compiled closures (INDEX FOR) | **3-5×** 필터 인덱스 | +| RDD | Windows mmap (dbf + ntx + cdx) | **2-4×** on Windows | +| RDD | Mem driver lock-free reads | **5×** @ 8 cores (scales linearly) | +| RDD | Windows `dbf.go` build tag 분리 | 정확성 | +| RDD | PGO 인프라 (`FIVE_CPUPROFILE` + `default.pgo`) | 미래 대비 | +| gengo | `DebugLineFast` + `g.Debug` 게이트 복원 | ~5% tight loop + size 감소 | +| SQL | TSqlIndex per-row executor 재사용 | **1.91×** index scan | +| SQL | ND_COL pre-resolve (`PreResolveColumns`) | 3% JOIN (HAVING/CASE 영향 더 큼) | +| SQL | RunDelete Go RTL (`SqlBulkDelete`) | **2.14×** bulk DELETE | +| SQL | 슬라이딩 윈도우 프레임 (prefix-sum in Go) | **17×** wide-frame window | +| SQL | 멀티-컬럼 ORDER BY index 매치 | 정확성 개선 (perf는 SqlScan LIMIT pushdown 필요) | +| SQL | **SqlScan LIMIT pushdown + TryIndexScan LIMIT** | **770× ORDER+LIMIT, 207× WHERE+ORDER+LIMIT** (see 2026-04-20) | +| RDD | NTX OrderListAdd — store KeyExpr from header | 정확성 (DBOI_EXPRESSION 이 re-open 후 빈 문자열 반환하던 버그) | +| RDD | DBOI_KEYSIZE + OrderKeyLen — expose index key width | 정확성 (BuildKey 의 Str(N,10) 하드코드가 N(8,0) 인덱스 ordScope 무효화) | +| Compiler | `SELECT ` — 리터럴 alias 이름으로 해석 | 정확성 (undefined 식별자 fallback 이 `_wa.Select("")` 로 빈 워크에리어 생성 → SqlAlterAddColumn 이 0 rows migrated — classic Clipper semantics) | + +`MakeStringBytes` primitive 는 hbrt/value.go 에 살려 두었지만 실제 +hot path 에 배선은 안 되어 있음 (#1 CHAR zero-copy 가 FiveSql2 의 +CTE 임시 테이블 수명 위반으로 UAF 유발 → revert). refcounted mmap +인프라가 갖춰지기 전까지 dormant. + +--- + +## 남은 우선순위 (audit 보고서 기준) + +### Tier 1 — 큰 구조 개선 + +1. ~~**A1. JOIN predicate pushdown + hash-join threshold**~~ **DONE (2026-04-20)** — + (a) `JoinRecurse`에 `aPushByLevel` 파라미터 추가 + `SplitAndClauses`/`BuildAliasLevelMap`/ + `ClauseMaxLevel`/`EvalPushedAtLevel` 헬퍼. `RunSelect`가 `xWhere`를 top-level AND로 + 분해하고 각 conjunct가 참조하는 alias의 최대 깊이를 계산해 중간 level에 pin된 + 술어는 해당 level의 매치 직후 평가 → false면 더 깊은 join 재귀 스킵. 부수 발견: + `aJoins[i][3]`이 JOIN 동기화 루프에서 temp alias(`ORD_2`)로 덮어쓰여서 쿼리 본문의 + SQL alias(`o`)와 매핑 실패하던 문제 → `aTables` 전체를 스캔하여 원래 테이블명 / + temp alias / 원래 SQL alias 3개 키를 모두 등록. 실측: 100×10×10 = 10k triples에서 + mid-level WHERE 기준 **1055ms → 361ms = ~3×**. 깊은 level WHERE는 residual로 + fallback (변화 없음). 정확성 테스트 8/8, 3-gate 통과. + (b) `lUseHash` 게이트에 inner RecCount > 64 조건 추가 — 소규모 내측 테이블은 + 해시 빌드 오버헤드 없이 nested-loop. 기존 43/43 유지. + +2. ~~**SqlScan LIMIT pushdown**~~ **DONE (2026-04-20)** — 단일 테이블 LIMIT / ORDER BY-from-index / WHERE + ORDER + LIMIT 경로에서 + Go `SqlScan` 과 PRG `TryIndexScan` 양쪽에 조기 종료 훅. 실측 770× / 207×. + 부수 발견: NTX `OrderListAdd` 가 키 표현식을 빈 문자열로 저장하던 버그, + `BuildKey` 가 Str(N, 10) 을 하드코드해 N(8,0) 인덱스에서 ordScope 가 무효화되던 버그 동시 수정. + (NTX `Index.KeyExpr()` / `DBOI_KEYSIZE` / `DBFArea.OrderKeyLen(n)` 추가.) + +### Tier 2 — 개선 효과 중간 + +3. ~~**C1 RunUpdate 인덱스 유지 검증**~~ **DONE (2026-04-20)** — 감사 결과: `SqlBulkUpdate`는 + `DBFArea.PutValue`(바이트 쓰기 + dirty flag) 경유하며 인덱스 키 del/add 없음. 더 큰 갭: + `dbf/indexer.go` 어디에도 `ordKeyDel` / `ordKeyAdd` 훅이 없어 `Append` / `PutValue` / `Delete` + 전반이 인덱스를 유지하지 않음. 단, **SQL 엔진의 `SqlExecOpenTable`이 .ntx를 열지 않으므로** + SQL-only 경로에서는 증상이 안 보이고, 호출자가 `SET INDEX TO`로 같은 워크에리어를 미리 + 연 경우에만 divergence가 재현됨. 수정: `SqlBulkUpdate` 말미에 "업데이트된 필드가 열린 + 인덱스의 KeyExpr에 등장하면 `OrderListRebuild()`" 가드 추가 (substring 매치로 보수적 판정, + 인덱스 없는 테이블 / 미커버 필드는 no-op → B13 48× 경로 영향 0). 회귀 테스트 4/4 통과. + **Follow-up**: `Append` / `FieldPut` 경로 per-record maintenance, `SqlExecOpenTable` 가 + `.ntx`/`.cdx`를 자동 attach하는 설계는 별도 세션. + +4. ~~**C6 / C7 스키마 버전 키 기반 plan-cache invalidation**~~ **DONE (2026-04-20)** — `s_nSchemaVer` + STATIC 카운터를 TSqlExecutor.prg 에 추가, 모든 DDL 메서드 (CreateTable/Index/View, + DropTable/Index/View, AlterTable ADD/DROP) 성공 시 `SqlBumpSchemaVer()`. TFiveSQL + 에서 캐시 키를 `hb_NToS(SqlSchemaVer()) + "|" + cKey` 로 생성 → s_hPlanCache 와 + s_hDmlPcodeCache (cCacheKey 통해) 양쪽 자동 무효화. 회귀 테스트 9/9 통과. + +5. ~~**A3 보완: MIN/MAX 슬라이딩 윈도우 (monotonic deque)**~~ **DONE (2026-04-20)** — + `SqlWindowSlideAgg`에 monotonic deque 분기 추가 (numeric only, 비-numeric 값 + 감지 시 `.F.` 반환하여 PRG O(N·W) fallback). 부수 발견: `SqlExprHasAgg`가 + ND_WINDOW를 의도적으로 descend 안 해서 숨겨진-컬럼 로직이 아예 발동 안 되어 + 왔음 → `SELECT id, SUM(v) OVER (…)` 같은 쿼리는 SALARY가 SELECT 리스트에 + 없으면 모든 행 NIL 반환하던 버그 동시 수정 (TSqlExecutor.prg의 hidden-column + 루프에서 `.OR. aCols[i][1][1] == ND_WINDOW` 추가). 실측: 5000행 × W=201 + wide frame MIN/MAX **1427ms → 22ms = 65×**. 회귀 테스트 27/27, 3-gate 통과. + +### Tier 3 — smaller wins + +6. ~~**A4 UNION 스트리밍 DISTINCT**~~ **DONE (2026-04-21)** — Go RTL `SqlUnionDistinct(aL, aR)` + 추가: 단일 패스로 `aL + unique(aR)` 생성. 기존 경로는 `aRows ++= aU[2]; SqlDistinct(aRows)` + — 두 번 순회 + 중간 머지 배열 할당. 실측: 5k+5k 50% overlap UNION 19→17ms (~11%), 4-way + UNION 34→24ms (~29%). UNION ALL은 no-op. Big-O 동일하지만 할당/패스 감소. +7. ~~**A5 상관 subquery cache key 재구성**~~ **DONE (2026-04-21)** — Go RTL + `SqlBuildSubCacheKey(nId, aValues)` 추가. 기존 PRG 루프는 `hb_ntos(nId) + "@" + + SqlValToStr(v1) + "|" + ...` 방식으로 N+1 문자열 할당 + N SqlValToStr PRG 호출. + 새 경로: PRG는 free-var 값을 배열로 모아 Go RTL 한 번 호출 (canonical + appendValueHashKey 포맷으로 join). 캐시 히트 시 outer row당 N회 allocate가 + O(1)로 감소. FiveSql2 43/43 유지 (별도 상관 subquery 벤치 없음, 기능적 중립). +8. ~~**C2 RunDelete 제약 검증 (CHECK + FK)**~~ **DONE (2026-04-21)** — DDL 파서에 + `ON DELETE CASCADE/RESTRICT/SET NULL/NO ACTION` 수용 + .fsc 4번째 필드로 저장 + (backward-compatible: 4번째 필드 없으면 RESTRICT). `SqlFindReferencingFKs`가 + 디렉터리의 모든 .fsc를 스캔해 이 테이블을 참조하는 FK들을 수집. `RunDelete`가 + referencing FK가 있으면 fast-path(SqlBulkDelete) 우회하고 PRG 경로에서 + `SqlEnforceDeleteRefs`를 통해 per-record 집행: RESTRICT은 child에 매칭 행이 + 있으면 에러 반환 / CASCADE는 중첩 `DELETE` / SETNULL은 중첩 `UPDATE ... = NULL`. + 회귀 테스트 9/9 (RESTRICT default, CASCADE 전파, SET NULL 유지, 명시 RESTRICT). + 주의: 중첩 five_SQL 호출이 워크에리어를 옮기므로 부모 RecNo/Select를 enforce + 호출 전후에 저장·복원해야 dbDelete/dbRUnlock이 맞는 행에 적용됨 — 첫 구현에서 + 놓쳐 첫 row에서 enforce 자체를 우회하는 버그 발생, 디버그 후 수정. + +### gengo 리뷰의 low-effort fix 들 + +9. ~~**`emitSwitch` 타입 강제 수정**~~ **DONE (2026-04-21)** — 기존 emit는 + `_sw.AsNumInt() == caseVal.AsNumInt()` 로 모든 CASE를 비교했음 → "ABC".AsNumInt() = 0 + 이라 모든 non-numeric SWITCH가 첫 CASE로 고정. Fix: 런타임 `t.Equal()` 경유 (타입 인식 + 비교, CHAR trim / numeric promotion / date Julian 규칙 보존). 각 CASE를 독립된 + `if !_swHit { ... }` 블록으로 emit하여 스택 균형 + EXIT/RETURN 내성. 검증: + string/numeric/logical SWITCH 정상 매치. (날짜 CASE 실패는 `CToD` 파서의 별개 버그.) +10. ~~**`emitPostfixExpr` 식 컨텍스트 수정**~~ **DONE (2026-04-21)** — 기존 코드는 + `Dup + Inc + Pop` 로 스택 상단 사본만 증가시키고 변수 저장소는 건드리지 않아, + `y := x++` 이 y는 old x로 정답이지만 x는 그대로였음. LOCAL / STATIC / self-field + 3가지 타겟별로 (a) 현재 값 push (b) 저장소 직접 업데이트 — LOCAL은 `LocalAddInt`, + STATIC은 `_v := goVar.AsNumInt() + δ; goVar = hbrt.MakeInt(_v)`, self-field는 + `PushSelfField + Plus/Minus + SetSelfField`. 회귀: `y := x++` (5→6, y=5), + `y := x--` (10→9, y=10), `y := x++ + x++` (x 0→2, y=1) 모두 정답. +11. ~~**`emitForEach` hash / string iteration**~~ **DONE (2026-04-21)** — 기존 emit는 + 배열 케이스만 처리하고 hash/string은 조용히 skip. 3-way dispatch 추가: IsArray / + IsHash (Values 슬라이스 순회, 삽입 순서) / IsString (바이트 단위 1-char 서브스트링). + 회귀: 배열 합 60, 해시 합 6, 문자열 `abc` → `a-b-c-` 모두 정답. +12. ~~**gengo.go 3545 라인 분할**~~ **PARTIAL (2026-04-21)** — 3640→1629 줄 (55% 감소): + `folding.go` (456줄, constant folding + const-local propagation), + `emit_stmt.go` (1351줄, emitStmt 및 모든 statement emitter), `emit_block.go` + (287줄, emitAliasExpr / emitSendExpr / emitBlock + closure walker). 남은 gengo.go는 + core Generator API와 expr / RTL-inline / 헤더 방출. 추가 분할 (emit_expr.go, + emit_rtl_inline.go)은 가치 대비 리스크 낮아 생략 — 현 상태로도 파일당 300-1600줄이 + 관리 가능 범위. Go ALL / FiveSql2 43/43 / compat 56/56 유지. + +--- + +## 추천 진행 순서 (다음 세션) + +1. **SqlScan LIMIT pushdown** (0.5 일) — A2 를 비로소 체감 가능하게 만듦. 진입장벽 낮음. +2. **C6 / C7 schema-version cache** (0.5 일) — 정확성 핵심. DDL-heavy 워크로드 전에. +3. **C1 SqlBulkUpdate 인덱스 감사** (0.5 일) — 잠재 correctness bug 검증. +4. **A3 MIN/MAX deque** (1 일) — wide-frame 완결. +5. **A1 JOIN predicate pushdown** (2 일) — 마지막 큰 개선. + +Tier 3 (A4 / A5 / gengo fix 들) 은 시간 남으면. + +--- + +## 향후 기능 로드맵 (2026-04-23 정리) + +correctness 라운드(SQL NULL + 대형 버그 감사)가 마무리된 시점의 새 기능 +후보 목록. 각 항목에 크기·가치·구현 노트를 기재. 우선순위는 "사용자가 +실제로 쓸 쿼리에 얼마나 자주 걸리는가" 기준. + +### Tier A — 많이 쓰는 SQL 표준 기능 (중간 크기) + +- [ ] **A-1 MERGE / UPSERT 완결 검증** + - 크기: 0.5–1 일 + - 현 상태: test_sql1999.prg 섹션 5 (5a/5b/5c) 에 MERGE 테스트가 있음. + 최근 중간 라운드에서 한 번 깨졌다가 돌아옴 → 현재 경로에 잠재된 + edge case 가 있을 가능성. + - 할 일: `INSERT ... ON CONFLICT DO UPDATE` / `ON CONFLICT DO NOTHING` + 구문 지원 검토 (MERGE 와 별개로 Postgres 스타일이 사용자 친화적). + 현재 MERGE 는 SQL:2003 순수 구문. + - 노트: 충돌 감지는 UNIQUE constraint 기반. `.fsc` 에 이미 있으니 재사용. + +- [ ] **A-2 INTERSECT / EXCEPT** + - 크기: 0.5 일 + - UNION / UNION ALL 은 구현됨. INTERSECT (양쪽 공통) / EXCEPT (왼쪽 − 오른쪽) + 가 미확인. + - 구현: 기존 `SqlUnionDistinct` 패턴 재사용. Go RTL `SqlIntersect` / + `SqlExcept` 추가하고 파서에서 키워드 라우팅. + - 표준: DISTINCT 시맨틱이 기본. `INTERSECT ALL` / `EXCEPT ALL` 은 멀티셋 + 버전 (행별 카운트 유지) — 원한다면 optional. + +- [ ] **A-3 FULL OUTER JOIN** + - 크기: 0.5–1 일 + - LEFT/RIGHT/INNER 있음. FULL 은 "양쪽 모두 매칭 없어도 행 반환". + - 구현: `JoinRecurse` 에 FULL 분기 추가. LEFT 로 한 번 스캔 후 오른쪽에서 + 매칭 안 된 행을 outer-emit. 중복 안 되게 매칭 인덱스 플래그 필요. + - 주의: TryGoJoin 게이트는 이미 LEFT/RIGHT/FULL 을 PRG 로 fallback 시키고 + 있음 — PRG 경로만 건드리면 됨. + +- [ ] **A-4 CAST / type conversion coverage** + - 크기: 0.5 일 + - `CAST(expr AS type)` 이 기본은 동작 (ND_FN "CAST"). 실무 케이스: + CHAR↔NUMERIC, DATE↔CHAR, NUMERIC(18,4) 같은 width/precision 변환. + - 할 일: 각 (from-type, to-type) 매트릭스 테스트 작성 → 빠진 path 발견. + 특히 오늘 고친 string→date 자동 coercion 의 명시적 버전. + +- [ ] **A-5 추가 window 함수** + - 크기: 1 일 + - 있는 것: ROW_NUMBER / RANK / DENSE_RANK / LAG / LEAD / SUM/AVG/COUNT/MIN/MAX. + - 추가 대상: **FIRST_VALUE** / **LAST_VALUE** / **NTH_VALUE** / **NTILE(n)** / + **CUME_DIST** / **PERCENT_RANK**. + - 구현: 기존 partition-sort 인프라 재사용. FIRST_VALUE/LAST_VALUE 는 frame + 경계만 보고 O(1) 에 쓸 수 있음. NTILE/CUME_DIST/PERCENT_RANK 는 partition + 크기 + rank 만 있으면 됨. + - 주의: 오늘 고친 "no ORDER BY → whole partition" 시맨틱이 FIRST/LAST_VALUE + 에 중요 (frame 의미가 달라짐). + +### Tier B — 개발 편의 / 디버깅 (작음) + +- [ ] **B-1 EXPLAIN** + - 크기: 0.5 일 + - `EXPLAIN SELECT ...` → plan tree 를 JSON / 들여쓰기 텍스트로 덤프. + - 구현: plan cache 의 `hQuery` 해시를 recursive-walk 하는 Go RTL. 각 + 노드에 type / alias / args 표시. + - 가치: 쿼리 튜닝 시 어떤 경로 (TryGoJoin 해시 / JoinRecurse nested loop + / index scan / table scan) 를 타는지 즉시 파악 가능. 지금은 `FIVE_SQL_TRACE` + env 같은 게 없으면 찍어봐야 알 수 있음. + +- [ ] **B-2 INDEX ON expression 검증** + - 크기: 0.5 일 + - `CREATE INDEX idx ON t (UPPER(name))` 같은 함수 호출을 키식으로 받는지 확인. + - 현재 DDL 은 `CREATE INDEX ... ON ... (col_list)` 만 파싱할 가능성 큼. + - 할 일: 파서 확장 + 인덱스 키 식을 표현식 트리로 저장. Harbour RDD 는 + `INDEX ON TO ` 로 이미 임의 표현식 허용하니 SQL 쪽 DDL 게이트만 + 풀면 됨. + +- [ ] **B-3 GREATEST / LEAST** + - 크기: 0.5 일 + - `GREATEST(a, b, c)` / `LEAST(a, b, c)` — 여러 인자 중 max/min. SQL 표준 + (Oracle/Postgres). NULL-aware: Postgres 는 NULL 무시, Oracle 은 어느 + 하나라도 NULL 이면 NULL. Postgres 쪽을 추천. + - 구현: ND_FN 에 케이스 추가. 단순 reduce. + +- [ ] **B-4 String 함수 coverage 스캔** + - 크기: 0.25 일 + - SUBSTRING / POSITION / LENGTH / CHAR_LENGTH / TRIM / UPPER / LOWER / + REVERSE / LPAD / RPAD / CONCAT / REPLACE / LIKE — 현재 구현 매트릭스 + 감사. 실무에서 기대하는데 없어서 당황스러운 것 우선. + - Harbour RTL 에 이미 상당수 있음 → 파서/라우터에서 매핑만 하면 됨. + +### Tier C — 구조적/큰 기능 (큰 크기) + +- [ ] **C-1 Triggers (BEFORE/AFTER INSERT/UPDATE/DELETE)** + - 크기: 2–3 일 + - `.fsc` 에 트리거 바디(SQL 텍스트) 저장. DDL 에 `CREATE TRIGGER ...` 파서. + DML 경로 (RunInsert / RunUpdate / RunDelete) 에서 matching 트리거 실행. + - 주의: 트리거 내 `NEW.col` / `OLD.col` 참조 지원 → 로컬 컨텍스트에 row + 스냅샷 push 해야 함. 무한 재귀 가드 (DEPTH 제한). + +- [ ] **C-2 Generated / Computed columns** + - 크기: 1 일 + - `col N(10,2) AS (price * quantity) VIRTUAL` + - VIRTUAL: 쿼리 시 계산. STORED: write 시 계산해 실제 저장. + - 간단한 가상 컬럼은 SELECT 리스트 치환으로 구현 가능. + +- [ ] **C-3 JSON 타입 + 함수** + - 크기: 2–3 일 + - 컬럼 타입 "J" 로 JSON 문자열 저장. `JSON_EXTRACT(col, '$.path')` / + `JSON_SET` / `JSON_ARRAY` / `JSON_OBJECT`. + - 구현 포인트: JSON 파서 + path 평가 엔진. Go encoding/json 으로 배킹하면 + 빠르지만 path 평가기 직접 작성 필요. + +- [ ] **C-4 Prepared statement API** + - 크기: 1 일 + - SQL `PREPARE stmt FROM '...'` / `EXECUTE stmt USING ...` / `DEALLOCATE stmt`. + - 현재 `?` 파라미터 + `TFiveSQL:ExecuteWith` 는 있음 → 명시적 PREPARE 구문 + 만 얹으면 됨. 장점: 바인딩 계약이 SQL 레벨에서 명확해져서 디버깅 용이. + +### Tier D — 엔터프라이즈 / 고급 (매우 큼) + +- [ ] **D-1 Full-text search** — `MATCH(col) AGAINST('keywords')`. 별도 역색인. +- [ ] **D-2 SQL stored procedures** — PRG 로 이미 쓸 수 있지만 SQL 구문 `CREATE PROCEDURE`. +- [ ] **D-3 Row-level security (RLS) / policies**. +- [ ] **D-4 Temporal queries** — `FOR SYSTEM_TIME AS OF ...`, history table. +- [ ] **D-5 Replication / 복제** — WAL-기반 mirror. +- [ ] **D-6 Clustered / covering indexes** — 인덱스 리프에 추가 컬럼 저장. + +### 추천 실행 순서 (상위 우선순위만) + +1. **A-2 INTERSECT/EXCEPT** → **A-3 FULL OUTER JOIN** → **A-5 window 함수 추가** → + **B-1 EXPLAIN** → **A-1 MERGE 재검증**. + → 하루 2개 페이스로 A+B 세트 ~1주 분량. +2. **C-1 Triggers** 는 스테이트풀/재귀 리스크 큼. 별도 브랜치에서 스파이크 먼저. +3. Tier D 는 특정 고객/워크로드 요구 있을 때만. + +### 기능 추가 전 체크리스트 + +- 테스트 PRG 먼저 (`/tmp/test_.prg`) — 기대 동작을 Assert 로 못박기. +- 3 게이트 매 번 돌리기 (Go unit / FiveSql2 43/43 / compat 56/56). +- 커스텀 PRG 테스트를 `_FiveSql2/test/` 에 접수해 43/43 → 44/44, 45/45... 로 + 성장시키기. 오늘까지 발견된 버그들 상당수가 43 개 테스트 범위 밖이었음 — + 커버리지 확장이 정확성에 직접 기여. + +--- + +--- + +## 학습한 교훈 (다음 세션에서도 유효) + +1. **감사 보고서 맹신 금물** — 이론적 안전성 ≠ 실제 FiveSql2 워크로드 안전성. + 예: #1 CHAR zero-copy 는 "mmap 은 Close 까지 유효" contract 였으나 CTE 임시 테이블이 + 쿼리 중 Close 반복 → UAF SIGSEGV. Revert + primitive 보존. + +2. **항상 A/B 실측** — 감사 예측 대비 실측이 크게 어긋나는 케이스 많음. + 예: SQL L1 ND_COL pre-resolve 는 15-30% 예측했으나 실제 3% (WHERE 가 PcCompile + 로 이미 처리되어 EvalExpr 우회). + 예: A2 는 SqlScan 이 LIMIT pushdown 미지원이라 측정 불가. + +3. **여러 경로가 있을 때 hot path 가 우리 타겟인지 먼저 확인** — #10 Mem lock-free + 는 단일 스레드 벤치에서는 작은 차이, 실전 SHARED 에서는 코어 수 선형 확장. + +4. **정확성은 성능보다 우선** — CLAUDE.md 절대 규칙: 하나라도 테스트 실패 시 즉시 revert. + +5. **큰 파일 편집 시 LOCAL 변수 선언 주의** — 메서드 끝에 선언된 LOCAL 과 블록 안 inline + LOCAL 은 Five 파서에서 주의 필요. 최소 재현 테스트로 확인. + +--- + +## 세션 종료 시 상태 + +- 작업 디렉토리: `/Users/charleskwon/Projects/fivedev/five` +- 전 테스트 통과. 빌드 클린. +- `default.pgo` 프로파일 파일 존재 (bench_bulk 기반). +- 벤치 / 테스트 PRG 흔적 `/tmp/` 에: `test_sql`, `test_compat`, + `bench_bulk`, `bench_idx`, `bench_del`, `bench_win`, `bench_order_*`, + `check_order`, `test_deep_err`, `test_dbg*`. +- `~/tmp/error.log` 이전 에러 덤프 보관. + +다음 세션 시작 시 `CLAUDE.md` 로드 → 이 파일 읽기 → 위 순서 1번부터 진행. diff --git a/_FiveSql2/src/TFiveSQL.prg b/_FiveSql2/src/TFiveSQL.prg index 8644182..b017153 100644 --- a/_FiveSql2/src/TFiveSQL.prg +++ b/_FiveSql2/src/TFiveSQL.prg @@ -24,7 +24,16 @@ * during Run() never corrupt the cached tree. * * Cached entries live until process exit; distinct SQL text count is - * bounded by the caller's template set, so LRU is deferred. */ + * bounded by the caller's template set in well-behaved callers, but + * a long-running server with diverse dynamic SQL (or one that bypasses + * the `?` placeholder convention and bakes literals into every query) + * can grow this hash without bound. SQL_PLAN_CACHE_MAX caps the entry + * count; on overflow we wipe the whole cache. Coarser than LRU but + * Five hashes have no insertion-order guarantee and the per-query + * bookkeeping for true LRU would dominate the parse cost we're + * trying to amortise. Reset cost is one extra parse per template + * already evicted, accepted in exchange for bounded memory. */ +#define SQL_PLAN_CACHE_MAX 1000 STATIC s_hPlanCache := { => } CLASS TFiveSQL @@ -53,7 +62,14 @@ RETURN SELF METHOD Execute( cSQL, bBlock ) CLASS TFiveSQL LOCAL aTokens, hQuery, aResult - LOCAL aLex, cKey, aParams + LOCAL aLex, cKey, aParams, cVerPrefix + + /* Schema-version prefix: DDL (CREATE/ALTER/DROP) bumps SqlSchemaVer() + * so any plan that resolved columns or indexes against the pre-DDL + * schema misses the cache on the next call and gets re-parsed / + * re-compiled against the current layout. The prefix also flows + * through to s_hDmlPcodeCache via ::oExec:cCacheKey below. */ + cVerPrefix := hb_NToS( SqlSchemaVer() ) + "|" /* Fast path: no explicit aParams → single Go RTL lex+normalize call * (SqlLexAndExtractTemplate). Returns {aTokens, cKey, aParams}; the @@ -63,7 +79,7 @@ METHOD Execute( cSQL, bBlock ) CLASS TFiveSQL IF Empty( ::aParams ) aLex := SqlLexAndExtractTemplate( cSQL ) aTokens := aLex[ 1 ] - cKey := aLex[ 2 ] + cKey := cVerPrefix + aLex[ 2 ] aParams := aLex[ 3 ] IF hb_HHasKey( s_hPlanCache, cKey ) @@ -74,6 +90,10 @@ METHOD Execute( cSQL, bBlock ) CLASS TFiveSQL IF hQuery == NIL RETURN { { "__error__" }, { { SQL_ERR_SYNTAX, "Failed to parse SQL", cSQL } } } ENDIF + IF Len( s_hPlanCache ) >= SQL_PLAN_CACHE_MAX + s_hPlanCache := { => } + SqlDmlPcodeCacheReset() + ENDIF s_hPlanCache[ cKey ] := HbDeepClone( hQuery ) ENDIF @@ -81,8 +101,9 @@ METHOD Execute( cSQL, bBlock ) CLASS TFiveSQL ::oExec:cCacheKey := cKey ELSE /* Caller supplied explicit params — cache by raw SQL text. */ - IF hb_HHasKey( s_hPlanCache, cSQL ) - hQuery := HbDeepClone( s_hPlanCache[ cSQL ] ) + cKey := cVerPrefix + cSQL + IF hb_HHasKey( s_hPlanCache, cKey ) + hQuery := HbDeepClone( s_hPlanCache[ cKey ] ) ELSE aTokens := SqlLexerTokenize( cSQL ) ::oParser := TSqlParser2():New( aTokens, ::aParams ) @@ -90,11 +111,15 @@ METHOD Execute( cSQL, bBlock ) CLASS TFiveSQL IF hQuery == NIL RETURN { { "__error__" }, { { SQL_ERR_SYNTAX, "Failed to parse SQL", cSQL } } } ENDIF - s_hPlanCache[ cSQL ] := HbDeepClone( hQuery ) + IF Len( s_hPlanCache ) >= SQL_PLAN_CACHE_MAX + s_hPlanCache := { => } + SqlDmlPcodeCacheReset() + ENDIF + s_hPlanCache[ cKey ] := HbDeepClone( hQuery ) ENDIF ::oExec := TSqlExecutor():New( hQuery, ::aParams ) - ::oExec:cCacheKey := cSQL + ::oExec:cCacheKey := cKey ENDIF ::oExec:bRowBlock := bBlock diff --git a/_FiveSql2/src/TSqlAgg.prg b/_FiveSql2/src/TSqlAgg.prg index 283b6e8..3e2859b 100644 --- a/_FiveSql2/src/TSqlAgg.prg +++ b/_FiveSql2/src/TSqlAgg.prg @@ -55,13 +55,28 @@ METHOD GroupBy( aRows, aFN, aCols, aGroupBy, xHaving, aTables, aParams ) CLASS T LOCAL aSets, aCurSet, nSet, hOmitIdx, aSubResult LOCAL aGroupedRows LOCAL aColInfo /* { lIsAgg, nCI } per SELECT column, pre-resolved */ + LOCAL xAggNode, cAggFn - /* Aggregate on empty set */ + /* Aggregate on empty set — SQL standard semantics: + * COUNT(*) / COUNT(col) → 0 + * SUM / AVG / MIN / MAX → NULL + * The old code returned 0 uniformly for every aggregate, which + * looked right for COUNT but silently corrupted the other four. */ IF Len( aRows ) == 0 .AND. ::HasAgg( aCols ) aNewRow := {} FOR j := 1 TO Len( aCols ) IF SqlExprHasAgg( aCols[ j ][ 1 ] ) - AAdd( aNewRow, 0 ) + xAggNode := aCols[ j ][ 1 ] + cAggFn := "" + IF ValType( xAggNode ) == "A" .AND. Len( xAggNode ) >= 2 .AND. ; + xAggNode[ 1 ] == ND_FN .AND. ValType( xAggNode[ 2 ] ) == "C" + cAggFn := Upper( xAggNode[ 2 ] ) + ENDIF + IF cAggFn == "COUNT" + AAdd( aNewRow, 0 ) + ELSE + AAdd( aNewRow, NIL ) + ENDIF ELSE AAdd( aNewRow, NIL ) ENDIF @@ -315,9 +330,40 @@ RETURN .F. */ METHOD FindGroupIdx( xGroupExpr, aCols, aFN ) CLASS TSqlAgg - LOCAL i, xSel, cGName, cSName, nDot + LOCAL i, xSel, cGName, cSName, nDot, nOrdinal - IF xGroupExpr == NIL .OR. xGroupExpr[ 1 ] != ND_COL + IF xGroupExpr == NIL + RETURN 0 + ENDIF + + /* GROUP BY — `GROUP BY 1` refers to the 1-based position + * of the SELECT-list column, per SQL:1999. Without this the + * literal numeric expression got passed to FindColIdx which only + * understands ND_COL → returned 0 → everything collapsed into + * a single bucket. */ + IF xGroupExpr[ 1 ] == ND_LIT .AND. ValType( xGroupExpr[ 2 ] ) == "N" + nOrdinal := Int( xGroupExpr[ 2 ] ) + IF nOrdinal >= 1 .AND. nOrdinal <= Len( aCols ) + RETURN nOrdinal + ENDIF + RETURN 0 + ENDIF + + /* GROUP BY — match against the SELECT list by + * canonical name. Both sides go through SqlExprName so that + * `GROUP BY UPPER(dept)` finds `SELECT UPPER(dept)` even when + * the SELECT-list column is anonymous. Same for arithmetic + * expressions like `salary / 1000`. */ + IF xGroupExpr[ 1 ] != ND_COL + cGName := Upper( SqlExprName( xGroupExpr ) ) + IF ! Empty( cGName ) + FOR i := 1 TO Len( aCols ) + cSName := Upper( SqlExprName( aCols[ i ][ 1 ] ) ) + IF cSName == cGName + RETURN i + ENDIF + NEXT + ENDIF RETURN ::FindColIdx( xGroupExpr, aFN ) ENDIF @@ -383,8 +429,74 @@ METHOD ComputeAgg( xE, aGR, aFN ) CLASS TSqlAgg LOCAL nCount := 0, nSum := 0, xMin := NIL, xMax := NIL LOCAL cResult, cSep LOCAL xArg + LOCAL xL, xR, aFnArgs + LOCAL lDistinct, hSeen, cKey - IF xE == NIL .OR. xE[ 1 ] != ND_FN + IF xE == NIL + RETURN 0 + ENDIF + + /* Outer expression containing aggregates (e.g. MAX(id)+1, + * COUNT(*)*2, SUM(v)-30): the dispatcher routes us here whenever + * SqlExprHasAgg is .T., even if the top-level node is not the + * aggregate itself. Recursively compute inner aggregates, then + * apply the wrapping operator via SqlEvalRowExpr against literal + * stand-ins. Without this guard the early-return below collapsed + * every wrapped aggregate to 0 — silently. */ + IF xE[ 1 ] == ND_BIN + xL := ::ComputeAgg( xE[ 3 ], aGR, aFN ) + xR := ::ComputeAgg( xE[ 4 ], aGR, aFN ) + RETURN SqlEvalRowExpr( ; + { ND_BIN, xE[ 2 ], { ND_LIT, xL }, { ND_LIT, xR } }, {}, {} ) + ENDIF + + IF xE[ 1 ] == ND_UNI + xL := ::ComputeAgg( xE[ 3 ], aGR, aFN ) + RETURN SqlEvalRowExpr( ; + { ND_UNI, xE[ 2 ], { ND_LIT, xL } }, {}, {} ) + ENDIF + + IF xE[ 1 ] == ND_LIT + RETURN xE[ 2 ] + ENDIF + + IF xE[ 1 ] == ND_NIL + RETURN NIL + ENDIF + + /* Non-aggregate function wrapping aggregates: ROUND(AVG(p),2), + * COALESCE(SUM(x), 0), etc. Recurse into each arg, then dispatch. */ + IF xE[ 1 ] == ND_FN .AND. ! SqlIsAggName( xE[ 2 ] ) + aFnArgs := {} + FOR i := 1 TO Len( xE[ 3 ] ) + AAdd( aFnArgs, ::ComputeAgg( xE[ 3 ][ i ], aGR, aFN ) ) + NEXT + RETURN SqlEvalFunc( xE[ 2 ], aFnArgs ) + ENDIF + + /* CASE wrapping aggregates: `CASE WHEN COUNT(*) > 2 THEN 'many' + * ELSE 'few' END`. SqlExprHasAgg flags the whole CASE as having + * an aggregate, the dispatcher routes here, and the early-out + * below collapsed it to 0 — the literal 'many'/'few' came back + * as 0. Evaluate each WHEN cond + branch / ELSE through the same + * recursive ComputeAgg so the aggregate inside the cond gets + * computed once per group. */ + IF xE[ 1 ] == ND_CASE + IF ValType( xE[ 2 ] ) == "A" + FOR i := 1 TO Len( xE[ 2 ] ) + xL := ::ComputeAgg( xE[ 2 ][ i ][ 1 ], aGR, aFN ) + IF SqlIsTrue( xL ) + RETURN ::ComputeAgg( xE[ 2 ][ i ][ 2 ], aGR, aFN ) + ENDIF + NEXT + ENDIF + IF Len( xE ) >= 3 .AND. xE[ 3 ] != NIL + RETURN ::ComputeAgg( xE[ 3 ], aGR, aFN ) + ENDIF + RETURN NIL + ENDIF + + IF xE[ 1 ] != ND_FN RETURN 0 ENDIF @@ -421,15 +533,25 @@ METHOD ComputeAgg( xE, aGR, aFN ) CLASS TSqlAgg RETURN 0 ENDIF + /* DISTINCT modifier: parser stashes a .T. flag in xE[5] when the + * aggregate was written `COUNT(DISTINCT col)` etc. PRG path needs + * a per-value seen-set so duplicates contribute once. Fast path + * (SqlComputeAggSimple) has no DISTINCT support — skip it when + * the modifier is set so PRG handles dedup. */ + lDistinct := ( Len( xE ) >= 5 .AND. xE[ 5 ] == .T. ) + /* Fast path: plain column + common aggregate → Go RTL single-pass loop. * Gate on column-ref argument + pre-resolved nCol > 0; complex args * (CASE/BIN/UDF) still fall through to the PRG loop below. */ - IF nCol > 0 .AND. xArg[ 1 ] == ND_COL .AND. ; + IF ! lDistinct .AND. nCol > 0 .AND. xArg[ 1 ] == ND_COL .AND. ; ( cFunc == "COUNT" .OR. cFunc == "SUM" .OR. cFunc == "AVG" .OR. ; cFunc == "MIN" .OR. cFunc == "MAX" ) RETURN SqlComputeAggSimple( aGR, nCol, cFunc ) ENDIF + IF lDistinct + hSeen := { => } + ENDIF FOR i := 1 TO Len( aGR ) IF nCol > 0 .AND. nCol <= Len( aGR[ i ] ) xVal := aGR[ i ][ nCol ] @@ -441,6 +563,13 @@ METHOD ComputeAgg( xE, aGR, aFN ) CLASS TSqlAgg xVal := NIL ENDIF IF xVal != NIL + IF lDistinct + cKey := SqlValToStr( xVal ) + IF hb_HHasKey( hSeen, cKey ) + LOOP + ENDIF + hSeen[ cKey ] := .T. + ENDIF nCount++ nSum += SqlCoerceNum( xVal ) /* Use SqlCmpLt for type-safe comparison (handles strings, dates) */ @@ -577,6 +706,56 @@ METHOD EvalHavingExpr( xE, aNewRow, aCols, aGR, aFN, aParams ) CLASS TSqlAgg IF cOp == "<=" RETURN SqlCmpEq( xL, xR ) .OR. SqlCmpLt( xL, xR ) ENDIF + /* Arithmetic inside HAVING: `HAVING SUM(amt)+1 > 200`, + * `HAVING COUNT(*)*100 > 250`, etc. Without these branches + * the wrapped expression returned NIL and the comparison + * with the constant collapsed to false → 0 rows silent. + * SQL NULL propagation: any NIL operand → NIL. */ + IF cOp == "+" + IF xL == NIL .OR. xR == NIL + RETURN NIL + ENDIF + IF ValType( xL ) == "D" .AND. ValType( xR ) == "N" + RETURN xL + xR + ENDIF + IF ValType( xL ) == "N" .AND. ValType( xR ) == "D" + RETURN xR + xL + ENDIF + RETURN SqlCoerceNum( xL ) + SqlCoerceNum( xR ) + ENDIF + IF cOp == "-" + IF xL == NIL .OR. xR == NIL + RETURN NIL + ENDIF + IF ValType( xL ) == "D" .AND. ValType( xR ) == "N" + RETURN xL - xR + ENDIF + IF ValType( xL ) == "D" .AND. ValType( xR ) == "D" + RETURN xL - xR + ENDIF + RETURN SqlCoerceNum( xL ) - SqlCoerceNum( xR ) + ENDIF + IF cOp == "*" + IF xL == NIL .OR. xR == NIL + RETURN NIL + ENDIF + RETURN SqlCoerceNum( xL ) * SqlCoerceNum( xR ) + ENDIF + IF cOp == "/" + IF xL == NIL .OR. xR == NIL + RETURN NIL + ENDIF + IF SqlCoerceNum( xR ) != 0 + RETURN SqlCoerceNum( xL ) / SqlCoerceNum( xR ) + ENDIF + RETURN NIL + ENDIF + IF cOp == "||" + IF xL == NIL .OR. xR == NIL + RETURN NIL + ENDIF + RETURN SqlCoerceStr( xL ) + SqlCoerceStr( xR ) + ENDIF RETURN NIL CASE xE[ 1 ] == ND_UNI diff --git a/_FiveSql2/src/TSqlAlias.prg b/_FiveSql2/src/TSqlAlias.prg index aebb585..e5e49d6 100644 --- a/_FiveSql2/src/TSqlAlias.prg +++ b/_FiveSql2/src/TSqlAlias.prg @@ -144,6 +144,15 @@ METHOD AcquireTemp( cPurpose ) CLASS TSqlAlias EXIT ENDIF NEXT + /* Check Harbour's global alias space too. Each TSqlExecutor + * has its own slots array, so a nested executor (subquery / + * correlated CTE) wouldn't otherwise see an alias already + * claimed by the outer executor — AcquireTemp then handed + * back the same name and both scopes shared the workarea, + * truncating the outer's scan when the inner ran. */ + IF ! lTaken .AND. Select( cUp ) > 0 + lTaken := .T. + ENDIF IF ! lTaken AAdd( ::aSlots, { cUp, cPurpose, cPurpose, .F. } ) RETURN cUp diff --git a/_FiveSql2/src/TSqlDDL.prg b/_FiveSql2/src/TSqlDDL.prg index a73912b..266fea3 100644 --- a/_FiveSql2/src/TSqlDDL.prg +++ b/_FiveSql2/src/TSqlDDL.prg @@ -105,9 +105,11 @@ METHOD CreateTable( aTokens, nPos ) CLASS TSqlDDL LOCAL cTable, aFields := {}, cCol, cType, nWidth, nDec, i LOCAL cHbType, aPKCols := {}, aAutoIncCols := {} LOCAL aUniqCols := {}, aCheckExprs := {}, aFKDefs := {} - LOCAL cFKCol, cFKRefTable, cFKRefCol + LOCAL cUqGroup + LOCAL cFKCol, cFKRefTable, cFKRefCol, cFKOnDelete, cFKOnUpdate LOCAL nHandle, cMeta LOCAL cCheckExpr, nCheckDepth + LOCAL lNullable IF ::DDL_IsKW( aTokens, nPos, "TABLE" ) nPos++ @@ -154,13 +156,24 @@ METHOD CreateTable( aTokens, nPos ) CLASS TSqlDDL ENDIF ENDIF - /* UNIQUE constraint (table-level) */ + /* UNIQUE constraint (table-level). Multi-column form + * `UNIQUE (a, b)` enforces a UNIQUE TUPLE — (1,1) and (1,2) + * are distinct and both allowed. The previous code appended + * each column to aUniqCols as its own entry, which collapsed + * to "every column is independently unique" → (1,1),(1,2) + * triggered a spurious dup. Encode multi-col as a single + * comma-separated string so the .fsc round-trips cleanly + * and SqlValidateUnique sees the whole tuple. */ IF ::DDL_IsKW( aTokens, nPos, "UNIQUE" ) nPos++ IF ::DDL_TT( aTokens, nPos ) == TK_LPAR nPos++ + cUqGroup := "" WHILE ::DDL_TT( aTokens, nPos ) == TK_NAME - AAdd( aUniqCols, ::DDL_TV( aTokens, nPos ) ) + IF ! Empty( cUqGroup ) + cUqGroup += "," + ENDIF + cUqGroup += ::DDL_TV( aTokens, nPos ) nPos++ IF ::DDL_TT( aTokens, nPos ) == TK_COMMA nPos++ @@ -169,6 +182,9 @@ METHOD CreateTable( aTokens, nPos ) CLASS TSqlDDL IF ::DDL_TT( aTokens, nPos ) == TK_RPAR nPos++ ENDIF + IF ! Empty( cUqGroup ) + AAdd( aUniqCols, cUqGroup ) + ENDIF ENDIF IF ::DDL_TT( aTokens, nPos ) == TK_COMMA nPos++ @@ -240,7 +256,63 @@ METHOD CreateTable( aTokens, nPos ) CLASS TSqlDDL nPos++ ENDIF ENDIF - AAdd( aFKDefs, { cFKCol, cFKRefTable, cFKRefCol } ) + /* Optional ON DELETE / ON UPDATE . Both clauses + * are optional and may appear in either order. Default + * is RESTRICT (the SQL standard's safest choice). CASCADE + * / SET NULL / NO ACTION / RESTRICT are accepted; anything + * else collapses to RESTRICT so legacy DML keeps working + * on tables created before this clause was supported. */ + cFKOnDelete := "RESTRICT" + cFKOnUpdate := "RESTRICT" + DO WHILE ::DDL_IsKW( aTokens, nPos, "ON" ) + nPos++ + IF ::DDL_IsKW( aTokens, nPos, "DELETE" ) + nPos++ + IF ::DDL_IsKW( aTokens, nPos, "CASCADE" ) + cFKOnDelete := "CASCADE" + nPos++ + ELSEIF ::DDL_IsKW( aTokens, nPos, "RESTRICT" ) + cFKOnDelete := "RESTRICT" + nPos++ + ELSEIF ::DDL_IsKW( aTokens, nPos, "SET" ) + nPos++ + IF ::DDL_IsKW( aTokens, nPos, "NULL" ) + cFKOnDelete := "SETNULL" + nPos++ + ENDIF + ELSEIF ::DDL_IsKW( aTokens, nPos, "NO" ) + nPos++ + IF ::DDL_IsKW( aTokens, nPos, "ACTION" ) + cFKOnDelete := "NOACTION" + nPos++ + ENDIF + ENDIF + ELSEIF ::DDL_IsKW( aTokens, nPos, "UPDATE" ) + nPos++ + IF ::DDL_IsKW( aTokens, nPos, "CASCADE" ) + cFKOnUpdate := "CASCADE" + nPos++ + ELSEIF ::DDL_IsKW( aTokens, nPos, "RESTRICT" ) + cFKOnUpdate := "RESTRICT" + nPos++ + ELSEIF ::DDL_IsKW( aTokens, nPos, "SET" ) + nPos++ + IF ::DDL_IsKW( aTokens, nPos, "NULL" ) + cFKOnUpdate := "SETNULL" + nPos++ + ENDIF + ELSEIF ::DDL_IsKW( aTokens, nPos, "NO" ) + nPos++ + IF ::DDL_IsKW( aTokens, nPos, "ACTION" ) + cFKOnUpdate := "NOACTION" + nPos++ + ENDIF + ENDIF + ELSE + EXIT + ENDIF + ENDDO + AAdd( aFKDefs, { cFKCol, cFKRefTable, cFKRefCol, cFKOnDelete, cFKOnUpdate } ) IF ::DDL_TT( aTokens, nPos ) == TK_COMMA nPos++ ENDIF @@ -258,32 +330,39 @@ METHOD CreateTable( aTokens, nPos ) CLASS TSqlDDL nDec := 0 cHbType := "C" + /* Data-type mapping. Token-exact comparison (not `$` + * substring): a 1-char type name like `D` used to match + * `$ "DOUBLE,FLOAT,REAL"` as a substring and silently + * turned a DATE column into N(18,6). Same class of bug + * hid `L` (LOGICAL → BOOL prefix), `T` (TIMESTAMP → TEXT + * prefix). Wrapping both sides with commas forces a + * whole-token match. */ DO CASE - CASE cType $ "CHAR,CHARACTER,VARCHAR" + CASE ( "," + cType + "," ) $ ",CHAR,CHARACTER,VARCHAR," cHbType := "C" nWidth := 10 - CASE cType $ "NUMERIC,NUMBER,DECIMAL,DEC" + CASE ( "," + cType + "," ) $ ",NUMERIC,NUMBER,DECIMAL,DEC,N," cHbType := "N" nWidth := 10 nDec := 0 - CASE cType $ "INT,INTEGER,SMALLINT,BIGINT" + CASE ( "," + cType + "," ) $ ",INT,INTEGER,SMALLINT,BIGINT," cHbType := "N" nWidth := 10 nDec := 0 - CASE cType $ "DOUBLE,FLOAT,REAL" + CASE ( "," + cType + "," ) $ ",DOUBLE,FLOAT,REAL," cHbType := "N" nWidth := 18 nDec := 6 - CASE cType == "DATE" + CASE cType == "DATE" .OR. cType == "D" cHbType := "D" nWidth := 8 - CASE cType $ "LOGICAL,BOOLEAN,BOOL" + CASE ( "," + cType + "," ) $ ",LOGICAL,BOOLEAN,BOOL,L," cHbType := "L" nWidth := 1 - CASE cType $ "MEMO,TEXT,CLOB" + CASE ( "," + cType + "," ) $ ",MEMO,TEXT,CLOB,M," cHbType := "M" nWidth := 10 - CASE cType $ "TIMESTAMP,DATETIME" + CASE ( "," + cType + "," ) $ ",TIMESTAMP,DATETIME,T," cHbType := "T" nWidth := 8 OTHERWISE @@ -310,12 +389,41 @@ METHOD CreateTable( aTokens, nPos ) CLASS TSqlDDL ENDIF ENDIF - /* Skip NOT NULL, DEFAULT, AUTO_INCREMENT, IDENTITY */ + /* Column-level constraints. Recognize inline UNIQUE / + * PRIMARY KEY along with the legacy skip-list for NOT NULL, + * DEFAULT, AUTO_INCREMENT, IDENTITY. Previously inline + * `col TYPE UNIQUE` was silently dropped — SqlValidateUnique + * had no metadata to enforce against, so duplicates slipped + * in. Table-level `UNIQUE (col)` was the only working form. + * + * Nullability: SQL default is "nullable unless PRIMARY KEY + * or NOT NULL". We set lNullable := .T. here and let an + * explicit NOT NULL / PRIMARY clause override. The + * resulting flag byte passed to dbCreate enables the + * _NullFlags bitmap column on the DBF side. */ + lNullable := .T. WHILE ::DDL_IsKW( aTokens, nPos, "NOT" ) .OR. ::DDL_IsKW( aTokens, nPos, "NULL" ) .OR. ; ::DDL_IsKW( aTokens, nPos, "DEFAULT" ) .OR. ::DDL_IsKW( aTokens, nPos, "AUTO_INCREMENT" ) .OR. ; - ::DDL_IsKW( aTokens, nPos, "IDENTITY" ) + ::DDL_IsKW( aTokens, nPos, "IDENTITY" ) .OR. ::DDL_IsKW( aTokens, nPos, "UNIQUE" ) .OR. ; + ::DDL_IsKW( aTokens, nPos, "PRIMARY" ) .OR. ::DDL_IsKW( aTokens, nPos, "KEY" ) IF ::DDL_IsKW( aTokens, nPos, "AUTO_INCREMENT" ) .OR. ::DDL_IsKW( aTokens, nPos, "IDENTITY" ) AAdd( aAutoIncCols, cCol ) + ELSEIF ::DDL_IsKW( aTokens, nPos, "UNIQUE" ) + AAdd( aUniqCols, cCol ) + ELSEIF ::DDL_IsKW( aTokens, nPos, "PRIMARY" ) + /* `PRIMARY KEY` → implies UNIQUE + NOT NULL. Record as + * UNIQUE column so SqlValidateUnique enforces it; the + * PK index gets built separately by aPKCols / aPKDefs + * when table-level PRIMARY KEY is used. */ + AAdd( aUniqCols, cCol ) + lNullable := .F. + ELSEIF ::DDL_IsKW( aTokens, nPos, "NOT" ) + /* Look ahead for NOT NULL — nPos+1 should be "NULL". + * The generic skip handles the NULL token itself on + * the next iteration. */ + IF ::DDL_IsKW( aTokens, nPos + 1, "NULL" ) + lNullable := .F. + ENDIF ENDIF nPos++ IF ::DDL_TT( aTokens, nPos ) == TK_TEXT .OR. ::DDL_TT( aTokens, nPos ) == TK_NUM @@ -323,7 +431,12 @@ METHOD CreateTable( aTokens, nPos ) CLASS TSqlDDL ENDIF ENDDO - AAdd( aFields, { cCol, cHbType, nWidth, nDec } ) + /* 5th array element is the field flag byte consumed by + * DbCreate (FieldFlagNullable = 0x02). Older constraint + * emitters still produce 4-element rows; both shapes are + * tolerated by the RTL. */ + AAdd( aFields, { cCol, cHbType, nWidth, nDec, ; + iif( lNullable, 2, 0 ) } ) IF ::DDL_TT( aTokens, nPos ) == TK_COMMA nPos++ @@ -336,6 +449,46 @@ METHOD CreateTable( aTokens, nPos ) CLASS TSqlDDL ENDIF ENDIF + /* Table-level PRIMARY KEY (col, col, ...) implies NOT NULL for + * each named column. The column descriptors were already built + * with lNullable=.T. (SQL default), so we post-clear the flag + * byte before dbCreate sees the definitions. */ + FOR i := 1 TO Len( aFields ) + IF AScan( aPKCols, {| c | Upper( c ) == Upper( aFields[ i ][ 1 ] ) } ) > 0 + aFields[ i ][ 5 ] := 0 + ENDIF + NEXT + + /* SQL-standard: CREATE TABLE fails when the target already + * exists. `IF NOT EXISTS` to silently accept is a future + * extension; without this check a second CREATE silently + * truncated the table's data — quiet data loss. */ + IF File( Lower( cTable ) + ".dbf" ) + RETURN { { "error" }, { { ; + "CREATE TABLE: table '" + cTable + "' already exists" } } } + ENDIF + + /* Sweep stale convention-named sibling files left over from a + * prior lifetime of this table name. SqlAttachTableIndexes / + * dbUseArea auto-attach `
_pk.ntx`, `
_uq.ntx`, and + * the production `
.cdx` whenever they exist on disk — if + * the prior incarnation had a PK/UNIQUE column and the new + * incarnation does not (or has different keys), the auto-attached + * stale index has the OLD record offsets. SkipIndexed then walks + * those keys and silently skips records past the prior LastRec — + * the user-visible symptom was `INSERT (r1) → INSERT (r2) → + * INSERT (r3) → SELECT` returning only 2 rows. Memo (`.dbt`/`.fpt`) + * leftovers similarly let stale memo blocks surface through fresh + * column references. Erasing here means CREATE TABLE always + * starts on a clean slate; legitimate PK/UNIQUE indexes get rebuilt + * below from the current column list. */ + FErase( Lower( cTable ) + ".dbt" ) + FErase( Lower( cTable ) + ".fpt" ) + FErase( Lower( cTable ) + ".cdx" ) + FErase( Lower( cTable ) + ".fsc" ) + FErase( Lower( cTable ) + "_pk.ntx" ) + FErase( Lower( cTable ) + "_uq.ntx" ) + /* Execute: create the DBF file */ BEGIN SEQUENCE dbCreate( Lower( cTable ) + ".dbf", aFields ) @@ -368,6 +521,18 @@ METHOD CreateTable( aTokens, nPos ) CLASS TSqlDDL END SEQUENCE ENDIF + /* PRIMARY KEY implies UNIQUE + NOT NULL. The index is already built + * into `
_pk.ntx`; add the columns to the .fsc UNIQUE list so + * SqlValidateUnique sees them on subsequent INSERT / UPDATE. Without + * this an UPDATE that changed a PK column to an existing value + * silently succeeded (two rows with the same PK), even though the + * PK index file was still unique. */ + FOR i := 1 TO Len( aPKCols ) + IF AScan( aUniqCols, {| c | Upper( c ) == Upper( aPKCols[ i ] ) } ) == 0 + AAdd( aUniqCols, aPKCols[ i ] ) + ENDIF + NEXT + /* Store CHECK and FOREIGN KEY constraints in .fsc metadata file */ IF Len( aCheckExprs ) > 0 .OR. Len( aFKDefs ) > 0 .OR. Len( aUniqCols ) > 0 cMeta := "" @@ -375,7 +540,20 @@ METHOD CreateTable( aTokens, nPos ) CLASS TSqlDDL cMeta += "CHECK:" + aCheckExprs[ i ] + Chr( 10 ) NEXT FOR i := 1 TO Len( aFKDefs ) - cMeta += "FK:" + aFKDefs[ i ][ 1 ] + ":" + aFKDefs[ i ][ 2 ] + ":" + aFKDefs[ i ][ 3 ] + Chr( 10 ) + /* Format: FK:child_col:parent_table:parent_col[:on_delete[:on_update]]. + * Older files without the 4th/5th segment load as "RESTRICT" + * in SqlLoadConstraints — preserves legacy behavior for DDL + * that predates each clause. The on_update segment is always + * emitted when present so .fsc files written by this build + * round-trip cleanly even if on_delete is the default. */ + cMeta += "FK:" + aFKDefs[ i ][ 1 ] + ":" + aFKDefs[ i ][ 2 ] + ":" + aFKDefs[ i ][ 3 ] + IF Len( aFKDefs[ i ] ) >= 4 .AND. ! Empty( aFKDefs[ i ][ 4 ] ) + cMeta += ":" + aFKDefs[ i ][ 4 ] + IF Len( aFKDefs[ i ] ) >= 5 .AND. ! Empty( aFKDefs[ i ][ 5 ] ) + cMeta += ":" + aFKDefs[ i ][ 5 ] + ENDIF + ENDIF + cMeta += Chr( 10 ) NEXT FOR i := 1 TO Len( aUniqCols ) cMeta += "UNIQUE:" + aUniqCols[ i ] + Chr( 10 ) @@ -387,6 +565,8 @@ METHOD CreateTable( aTokens, nPos ) CLASS TSqlDDL ENDIF ENDIF + SqlBumpSchemaVer() + RETURN { { "result" }, { { "Table " + cTable + " created (" + hb_ntos( Len( aFields ) ) + " columns)" } } } @@ -443,12 +623,14 @@ METHOD CreateIndex( aTokens, nPos ) CLASS TSqlDDL RETURN { { "error" }, { { "CREATE INDEX failed: " + cIndex + " on " + cTable } } } END SEQUENCE + SqlBumpSchemaVer() + RETURN { { "result" }, { { "Index " + cIndex + " created on " + cTable + " (" + cExpr + ")" } } } METHOD DropTable( aTokens, nPos ) CLASS TSqlDDL - LOCAL cTable + LOCAL cTable, lIfExists := .F. IF ::DDL_IsKW( aTokens, nPos, "TABLE" ) nPos++ @@ -456,18 +638,44 @@ METHOD DropTable( aTokens, nPos ) CLASS TSqlDDL IF ::DDL_IsKW( aTokens, nPos, "IF" ) nPos++ ::DDL_EatKW( aTokens, @nPos, "EXISTS" ) + lIfExists := .T. ENDIF cTable := ::DDL_TV( aTokens, nPos ) nPos++ + /* SQL-standard strictness: DROP TABLE fails when the table isn't + * there, unless the caller opted into `DROP TABLE IF EXISTS ...`. + * Previously the FErase chain returned "success" regardless, + * masking typos in scripts. */ + IF ! File( Lower( cTable ) + ".dbf" ) + IF lIfExists + RETURN { { "result" }, { { ; + "Table " + cTable + " did not exist (IF EXISTS)" } } } + ENDIF + RETURN { { "error" }, { { ; + "DROP TABLE: table '" + cTable + "' does not exist" } } } + ENDIF + + /* Drop every convention-named sibling file, not just the DBF. A + * stale `.cdx` (production CDX) or `_pk.ntx` left over after DROP + * silently re-attaches to the next CREATE-TABLE-with-same-name + * — same prefix-glob / orphan-index hazard that produced the + * multi-row INSERT silent row drop. Cover memo (.dbt/.fpt), + * convention indexes (_pk/_uq.ntx + .cdx), constraint metadata + * (.fsc), and view body (.fsv) so a subsequent CREATE TABLE + * starts on a clean slate. */ FErase( Lower( cTable ) + ".dbf" ) FErase( Lower( cTable ) + ".dbt" ) FErase( Lower( cTable ) + ".fpt" ) FErase( Lower( cTable ) + ".fsc" ) + FErase( Lower( cTable ) + ".cdx" ) + FErase( Lower( cTable ) + ".fsv" ) FErase( Lower( cTable ) + "_pk.ntx" ) FErase( Lower( cTable ) + "_uq.ntx" ) + SqlBumpSchemaVer() + RETURN { { "result" }, { { "Table " + cTable + " dropped" } } } @@ -488,12 +696,15 @@ METHOD DropIndex( aTokens, nPos ) CLASS TSqlDDL FErase( Lower( cIndex ) + ".ntx" ) + SqlBumpSchemaVer() + RETURN { { "result" }, { { "Index " + cIndex + " dropped" } } } METHOD AlterTable( aTokens, nPos ) CLASS TSqlDDL LOCAL cTable, cAction, cCol, cType, nWidth, nDec, cHbType + LOCAL lNullable IF ::DDL_IsKW( aTokens, nPos, "TABLE" ) nPos++ @@ -516,18 +727,28 @@ METHOD AlterTable( aTokens, nPos ) CLASS TSqlDDL nWidth := 10 nDec := 0 + /* Same substring-match hazard CreateTable's type dispatcher + * fixed earlier — bare `cType $ "CHAR,..."` lets a 1-char type + * like "C" or "A" silently match against any list element that + * contains it. Wrap both sides with commas so the match is a + * whole-token compare (CreateTable applies the same pattern at + * its own type dispatch). */ DO CASE - CASE cType $ "CHAR,CHARACTER,VARCHAR" + CASE ( "," + cType + "," ) $ ",CHAR,CHARACTER,VARCHAR," cHbType := "C" - CASE cType $ "NUMERIC,NUMBER,INT,INTEGER" + CASE ( "," + cType + "," ) $ ",NUMERIC,NUMBER,DECIMAL,DEC,N,INT,INTEGER,SMALLINT,BIGINT," cHbType := "N" - CASE cType == "DATE" + CASE ( "," + cType + "," ) $ ",DOUBLE,FLOAT,REAL," + cHbType := "N" + nWidth := 18 + nDec := 6 + CASE cType == "DATE" .OR. cType == "D" cHbType := "D" nWidth := 8 - CASE cType $ "LOGICAL,BOOLEAN" + CASE ( "," + cType + "," ) $ ",LOGICAL,BOOLEAN,BOOL,L," cHbType := "L" nWidth := 1 - CASE cType $ "MEMO,TEXT" + CASE ( "," + cType + "," ) $ ",MEMO,TEXT,CLOB,M," cHbType := "M" OTHERWISE cHbType := "C" @@ -551,11 +772,26 @@ METHOD AlterTable( aTokens, nPos ) CLASS TSqlDDL ENDIF ENDIF - RETURN SqlAlterAddColumn( cTable, cCol, cHbType, nWidth, nDec ) + /* Nullability clause for the added column. Defaults to NULL + * (SQL standard); explicit `NOT NULL` drops the nullable flag + * so no _NullFlags bit is allocated. `NULL` is accepted but + * redundant. */ + lNullable := .T. + IF ::DDL_IsKW( aTokens, nPos, "NOT" ) .AND. ::DDL_IsKW( aTokens, nPos + 1, "NULL" ) + lNullable := .F. + nPos += 2 + ELSEIF ::DDL_IsKW( aTokens, nPos, "NULL" ) + nPos++ + ENDIF + + SqlBumpSchemaVer() + RETURN SqlAlterAddColumn( cTable, cCol, cHbType, nWidth, nDec, ; + iif( lNullable, 2, 0 ) ) ELSEIF cAction == "DROP" cCol := ::DDL_TV( aTokens, nPos ) nPos++ + SqlBumpSchemaVer() RETURN SqlAlterDropColumn( cTable, cCol ) ENDIF @@ -564,7 +800,13 @@ RETURN { { "error" }, { { "ALTER TABLE: unsupported action " + cAction } } } METHOD CreateView( aTokens, nPos ) CLASS TSqlDDL - LOCAL cView, cSQL, i, nHandle + LOCAL cView, cSQL, i, nHandle, lOrReplace := .F. + + IF ::DDL_IsKW( aTokens, nPos, "OR" ) + nPos++ + ::DDL_EatKW( aTokens, @nPos, "REPLACE" ) + lOrReplace := .T. + ENDIF IF ::DDL_IsKW( aTokens, nPos, "VIEW" ) nPos++ @@ -575,6 +817,23 @@ METHOD CreateView( aTokens, nPos ) CLASS TSqlDDL ::DDL_EatKW( aTokens, @nPos, "AS" ) + /* SQL standard: CREATE VIEW errors when the target already exists, + * unless `OR REPLACE` was specified. Previously the FCreate at the + * tail silently overwrote the prior `.fsv`, masking name conflicts + * the same way unprotected CREATE TABLE used to (already fixed for + * tables). With the new error path, callers wanting redefinition + * write `CREATE OR REPLACE VIEW v ...`. */ + IF File( Lower( cView ) + ".fsv" ) .AND. ! lOrReplace + RETURN { { "error" }, { { ; + "CREATE VIEW: view '" + cView + "' already exists" } } } + ENDIF + + /* Reassemble the SELECT body from tokens. TK_TEXT must be wrapped + * back in single quotes (with embedded ' doubled) so re-parsing + * the stored SQL recovers the same string literals. Without this, + * `WHERE dept = 'eng'` round-tripped to `WHERE dept = eng`, which + * the second parse interpreted as a column reference and the + * subsequent SELECT errored out. */ cSQL := "" FOR i := nPos TO Len( aTokens ) IF aTokens[ i ][ 1 ] == TK_END @@ -583,7 +842,11 @@ METHOD CreateView( aTokens, nPos ) CLASS TSqlDDL IF ! Empty( cSQL ) cSQL += " " ENDIF - cSQL += aTokens[ i ][ 2 ] + IF aTokens[ i ][ 1 ] == TK_TEXT + cSQL += "'" + StrTran( aTokens[ i ][ 2 ], "'", "''" ) + "'" + ELSE + cSQL += aTokens[ i ][ 2 ] + ENDIF NEXT IF Empty( cSQL ) @@ -597,6 +860,8 @@ METHOD CreateView( aTokens, nPos ) CLASS TSqlDDL FWrite( nHandle, cSQL ) FClose( nHandle ) + SqlBumpSchemaVer() + RETURN { { "result" }, { { "View " + cView + " created" } } } @@ -617,6 +882,8 @@ METHOD DropView( aTokens, nPos ) CLASS TSqlDDL FErase( Lower( cView ) + ".fsv" ) + SqlBumpSchemaVer() + RETURN { { "result" }, { { "View " + cView + " dropped" } } } @@ -636,91 +903,179 @@ FUNCTION SqlBuildIndexExpr( aCols ) RETURN cExpr -FUNCTION SqlAlterAddColumn( cTable, cCol, cType, nWidth, nDec ) +FUNCTION SqlAlterAddColumn( cTable, cCol, cType, nWidth, nDec, nFlags ) - LOCAL aStruct, cFile, cTmp, i + LOCAL aStruct, cFile, cTmp, i, j, nCopied := 0, nSrcFC + LOCAL aRows, aRow + + IF nFlags == NIL + /* Preserve SQL-default nullability: unnamed nullability → NULL */ + nFlags := 2 + ENDIF cFile := Lower( cTable ) + ".dbf" cTmp := Lower( cTable ) + "_tmp.dbf" + /* Duplicate-column check must run OUTSIDE BEGIN SEQUENCE because + * `RETURN` from inside a sequence block doesn't actually unwind + * the function in this runtime (observed: the return value was + * discarded and control fell through to the success path, + * silently adding a second column with the same name). Checking + * up front with a lightweight open/close pair is cheap. */ + USE ( cFile ) NEW EXCLUSIVE ALIAS ALTSRC + aStruct := dbStruct() + FOR i := 1 TO Len( aStruct ) + IF Upper( aStruct[ i ][ 1 ] ) == Upper( cCol ) + dbCloseArea() + RETURN { { "error" }, { { ; + "ALTER TABLE ADD: column '" + cCol + ; + "' already exists in " + cTable } } } + ENDIF + NEXT + dbCloseArea() + BEGIN SEQUENCE USE ( cFile ) NEW EXCLUSIVE ALIAS ALTSRC - aStruct := dbStruct() - AAdd( aStruct, { cCol, cType, nWidth, nDec } ) + /* dbStruct() returns 5-element rows (name/type/len/dec/flags). + * Appending the same shape preserves Flags (including + * FieldFlagNullable = 0x02) through the rebuild, so existing + * nullable columns on the source table keep their _NullFlags + * bitmap semantics in the rebuilt table. */ + aStruct := dbStruct() + nSrcFC := FCount() + + /* Buffer every source row into memory before creating ALTDST. + * An earlier two-area interleaving (SELECT ALTSRC ; iterate ; + * SELECT ALTDST ; write) occasionally surfaced cross-area + * state bleed — ALTSRC's recCount appeared to mirror ALTDST's + * just-appended phantom record, causing the skip loop to exit + * after one row. Caching here isolates the read and write + * phases; memory pressure is bounded by the source size. */ + aRows := {} + dbGoTop() + WHILE ! Eof() + aRow := Array( nSrcFC ) + FOR i := 1 TO nSrcFC + aRow[ i ] := FieldGet( i ) + NEXT + AAdd( aRows, aRow ) + dbSkip() + ENDDO + dbCloseArea() + + AAdd( aStruct, { cCol, cType, nWidth, nDec, nFlags } ) dbCreate( cTmp, aStruct ) USE ( cTmp ) NEW EXCLUSIVE ALIAS ALTDST - SELECT ALTSRC - dbGoTop() - WHILE ! Eof() - SELECT ALTDST + FOR i := 1 TO Len( aRows ) dbAppend() - FOR i := 1 TO FCount() - 1 - IF i <= ALTSRC->( FCount() ) - FieldPut( i, ALTSRC->( FieldGet( i ) ) ) - ENDIF + FOR j := 1 TO nSrcFC + FieldPut( j, aRows[ i ][ j ] ) NEXT - SELECT ALTSRC - dbSkip() - ENDDO - SELECT ALTSRC ; dbCloseArea() ; SELECT ALTDST ; dbCloseArea() + /* Newly added nullable column: write NIL so the _NullFlags + * bit is set on every migrated row, instead of surfacing + * the zero/space the raw bytes decode to. */ + IF hb_BitAnd( nFlags, 2 ) != 0 + FieldPut( nSrcFC + 1, NIL ) + ENDIF + nCopied++ + NEXT + dbCloseArea() FErase( cFile ) FRename( cTmp, cFile ) RECOVER RETURN { { "error" }, { { "ALTER TABLE ADD failed" } } } END SEQUENCE -RETURN { { "result" }, { { "Column " + cCol + " added to " + cTable } } } +RETURN { { "result" }, { { "Column " + cCol + " added to " + cTable + ; + " (" + hb_NToS( nCopied ) + " rows migrated)" } } } FUNCTION SqlAlterDropColumn( cTable, cCol ) - LOCAL aStruct, aNewStruct := {}, cFile, cTmp, i, nOldPos + LOCAL aStruct, aNewStruct := {}, cFile, cTmp, i, j, nOldPos + LOCAL aRows, aRow, aOldPos cFile := Lower( cTable ) + ".dbf" cTmp := Lower( cTable ) + "_tmp.dbf" + /* Column-existence check must run OUTSIDE BEGIN SEQUENCE (see the + * comment in SqlAlterAddColumn for the RETURN-from-sequence + * issue). Open once up front to scan the schema, then let the + * actual rebuild re-open inside the sequence. */ + USE ( cFile ) NEW EXCLUSIVE ALIAS ALTSRC + aStruct := dbStruct() + FOR i := 1 TO Len( aStruct ) + IF Upper( aStruct[ i ][ 1 ] ) != Upper( cCol ) + AAdd( aNewStruct, aStruct[ i ] ) + ENDIF + NEXT + IF Len( aNewStruct ) == Len( aStruct ) + dbCloseArea() + RETURN { { "error" }, { { ; + "ALTER TABLE DROP: column '" + ; + iif( ValType( cCol ) == "C", cCol, "" ) + ; + "' not found in " + cTable } } } + ENDIF + dbCloseArea() + BEGIN SEQUENCE USE ( cFile ) NEW EXCLUSIVE ALIAS ALTSRC aStruct := dbStruct() + aNewStruct := {} FOR i := 1 TO Len( aStruct ) IF Upper( aStruct[ i ][ 1 ] ) != Upper( cCol ) AAdd( aNewStruct, aStruct[ i ] ) ENDIF NEXT - IF Len( aNewStruct ) == Len( aStruct ) - SELECT ALTSRC ; dbCloseArea() - Break( NIL ) - ENDIF + /* Pre-compute the source-field position for each destination + * column — avoids a FieldPos string search per row, and pins + * the mapping while ALTSRC is still the current area. */ + aOldPos := Array( Len( aNewStruct ) ) + FOR i := 1 TO Len( aNewStruct ) + aOldPos[ i ] := FieldPos( aNewStruct[ i ][ 1 ] ) + NEXT + + /* Buffer every source row before creating / opening ALTDST. + * ADD-column uses the same pattern; see the commentary there + * for the cross-area interleaving story this avoids. */ + aRows := {} + dbGoTop() + WHILE ! Eof() + aRow := Array( Len( aNewStruct ) ) + FOR j := 1 TO Len( aNewStruct ) + nOldPos := aOldPos[ j ] + IF nOldPos > 0 + aRow[ j ] := FieldGet( nOldPos ) + ENDIF + NEXT + AAdd( aRows, aRow ) + dbSkip() + ENDDO + dbCloseArea() dbCreate( cTmp, aNewStruct ) USE ( cTmp ) NEW EXCLUSIVE ALIAS ALTDST - - SELECT ALTSRC - dbGoTop() - WHILE ! Eof() - SELECT ALTDST + FOR i := 1 TO Len( aRows ) dbAppend() - FOR i := 1 TO Len( aNewStruct ) - nOldPos := ALTSRC->( FieldPos( aNewStruct[ i ][ 1 ] ) ) - IF nOldPos > 0 - FieldPut( i, ALTSRC->( FieldGet( nOldPos ) ) ) - ENDIF + FOR j := 1 TO Len( aNewStruct ) + FieldPut( j, aRows[ i ][ j ] ) NEXT - SELECT ALTSRC - dbSkip() - ENDDO - SELECT ALTSRC ; dbCloseArea() ; SELECT ALTDST ; dbCloseArea() + NEXT + dbCloseArea() + FErase( cFile ) FRename( cTmp, cFile ) RECOVER - RETURN { { "error" }, { { "ALTER TABLE DROP failed: " + cCol } } } + RETURN { { "error" }, { { "ALTER TABLE DROP failed: " + ; + iif( ValType( cCol ) == "C", cCol, "" ) } } } END SEQUENCE -RETURN { { "result" }, { { "Column " + cCol + " dropped from " + cTable } } } +RETURN { { "result" }, { { "Column " + ; + iif( ValType( cCol ) == "C", cCol, "" ) + " dropped from " + cTable } } } /* ====================================================================== @@ -731,6 +1086,7 @@ RETURN { { "result" }, { { "Column " + cCol + " dropped from " + cTable } } } FUNCTION SqlLoadConstraints( cTable ) LOCAL cFile, cBuf, aLines, i, aParts + LOCAL cFKD, cFKU LOCAL hResult := { => } hResult[ "check" ] := {} @@ -751,7 +1107,20 @@ FUNCTION SqlLoadConstraints( cTable ) ELSEIF Left( aLines[ i ], 3 ) == "FK:" aParts := hb_ATokens( SubStr( aLines[ i ], 4 ), ":" ) IF Len( aParts ) >= 3 - AAdd( hResult[ "fk" ], { aParts[ 1 ], aParts[ 2 ], aParts[ 3 ] } ) + /* Old .fsc files may have only 3 (legacy) or 4 + * (post-ON-DELETE) segments; default missing actions to + * RESTRICT. iif() with eager arg evaluation would index + * past the array end here, so unfold the nested defaults + * into explicit IFs. */ + cFKD := "RESTRICT" + cFKU := "RESTRICT" + IF Len( aParts ) >= 4 .AND. ! Empty( aParts[ 4 ] ) + cFKD := Upper( aParts[ 4 ] ) + ENDIF + IF Len( aParts ) >= 5 .AND. ! Empty( aParts[ 5 ] ) + cFKU := Upper( aParts[ 5 ] ) + ENDIF + AAdd( hResult[ "fk" ], { aParts[ 1 ], aParts[ 2 ], aParts[ 3 ], cFKD, cFKU } ) ENDIF ELSEIF Left( aLines[ i ], 7 ) == "UNIQUE:" AAdd( hResult[ "unique" ], SubStr( aLines[ i ], 8 ) ) @@ -813,46 +1182,206 @@ FUNCTION SqlValidateFK( cTable, cCol, xValue ) RETURN .T. -/* Validate UNIQUE constraint */ +/* Validate UNIQUE constraint. + * Opens the table in a SEPARATE workarea (dedicated alias) so the + * scan's dbGoTop / dbSkip don't touch the caller's record pointer. + * The caller is mid-UPDATE with a modified-but-uncommitted record + * buffer — moving the pointer on the shared workarea would flush + * that buffer to disk, silently persisting a change the caller is + * about to roll back. */ +/* SqlCoerceToCol — widen a value to the target column's native type + * when the parser produced a type-mismatched literal. Primary case: + * SQL strings like '20260101' going into a DATE column. Without this + * the DBF codec writes 8 spaces (its empty-date marker) and the row + * round-trips as CToD("") — semantically a data-loss bug. + * + * Only widens CHAR → DATE today. NUMERIC → CHAR / CHAR → NUMERIC are + * intentionally not coerced here so schema mismatches still surface + * at SELECT time rather than being silently massaged away. + */ +FUNCTION SqlCoerceToCol( xVal, aTypes, nFPos ) + LOCAL cType, dParsed + + IF xVal == NIL .OR. aTypes == NIL + RETURN xVal + ENDIF + IF nFPos < 1 .OR. nFPos > Len( aTypes ) + RETURN xVal + ENDIF + cType := aTypes[ nFPos ] + + IF cType == "D" .AND. ValType( xVal ) == "C" + /* CToD already runs an ISO pre-pass (YYYY-MM-DD, + * YYYY/MM/DD, YYYYMMDD, YYYY.MM.DD), then falls back to + * the current SET DATE FORMAT. If the string doesn't + * parse we fall through to the raw string and let the + * DBF codec write the blank-date marker — preserves the + * pre-coercion behaviour on malformed input. */ + dParsed := CToD( xVal ) + IF ValType( dParsed ) == "D" .AND. ! Empty( dParsed ) + RETURN dParsed + ENDIF + ENDIF + + /* Numeric → CHAR: DBF's C codec calls Value.AsString() on the + * input; numeric values there return "" (type mismatch), so the + * field gets written as blanks and the caller sees '' instead of + * the formatted number they supplied. hb_NToS / Str give the + * textual form; AllTrim keeps it compact. */ + IF cType == "C" .AND. ValType( xVal ) == "N" + RETURN AllTrim( Str( xVal ) ) + ENDIF + + /* Numeric → CHAR via string-of-digits literal. SQL `INSERT INTO t + * (c_col) VALUES ('42')` already passes "42" as a string and the + * DBF codec writes it directly — no coercion needed. Covered by + * the String→Date case plus the default passthrough. */ + +RETURN xVal + + FUNCTION SqlValidateUnique( cTable, cCol, xValue, nExcludeRec ) - LOCAL hC, i, nWA, nFPos, lDup, nSaved + LOCAL hC, i, j, nWA, nFPos, lDup, nSaved, cScanAlias, lOpenedHere + LOCAL aUqCols, aTupleVals, aTupleFPos, lMatch + LOCAL nCallerWA hC := SqlLoadConstraints( cTable ) IF Len( hC[ "unique" ] ) == 0 RETURN .T. ENDIF + /* SQL standard: UNIQUE permits multiple NULLs — NULL is not equal + * to anything, even itself, so two NULL rows don't collide. Skip + * the duplicate scan entirely for NULL values (both PostgreSQL, + * SQL Server default, and Oracle agree on this semantics). */ + IF xValue == NIL + RETURN .T. + ENDIF + nSaved := Select() - nWA := Select( cTable ) + nCallerWA := nSaved /* the in-flight INSERT/UPDATE area */ + + /* Use a dedicated scan alias; open fresh if not already open. */ + cScanAlias := "__UQ_" + Upper( cTable ) + nWA := Select( cScanAlias ) + lOpenedHere := .F. IF nWA == 0 + BEGIN SEQUENCE + dbUseArea( .T., "DBFNTX", Lower( cTable ) + ".dbf", cScanAlias, .T., .T. ) + nWA := Select( cScanAlias ) + lOpenedHere := .T. + RECOVER + /* Fall back to reading the caller's workarea — safer than + * silently ignoring the constraint. Caller must tolerate + * the transient disk flush in this degraded path. */ + nWA := Select( cTable ) + END SEQUENCE + ENDIF + IF nWA == 0 + dbSelectArea( nSaved ) RETURN .T. ENDIF FOR i := 1 TO Len( hC[ "unique" ] ) - IF Upper( hC[ "unique" ][ i ] ) == Upper( cCol ) - dbSelectArea( nWA ) - nFPos := FieldPos( cCol ) - IF nFPos > 0 - lDup := .F. - dbGoTop() - DO WHILE ! Eof() - IF RecNo() != nExcludeRec - IF SqlCmpEq( FieldGet( nFPos ), xValue ) - lDup := .T. - EXIT - ENDIF + /* Each .fsc UNIQUE entry is either a single column name or a + * comma-joined tuple (`a,b`) for composite UNIQUE. Split and + * trim once per entry so the trigger-column check + the scan + * compare both work uniformly. */ + aUqCols := hb_ATokens( hC[ "unique" ][ i ], "," ) + FOR j := 1 TO Len( aUqCols ) + aUqCols[ j ] := AllTrim( aUqCols[ j ] ) + NEXT + IF Len( aUqCols ) == 0 + LOOP + ENDIF + /* Trigger only when cCol is the first column of this entry — + * caller iterates every changed field, so a composite would + * otherwise be rescanned once per column for the same outcome. */ + IF Upper( aUqCols[ 1 ] ) != Upper( cCol ) + LOOP + ENDIF + + /* Read the tuple values. Single-col entries use xValue directly + * — that path is invoked by external callers (test harness + + * legacy code) that pass a candidate value without having a + * positioned in-flight record. Composite entries can only come + * from the RunInsert/RunUpdate PRG path, which sits on the + * just-modified record, so reading the rest of the tuple from + * the caller's area is safe and matches what the caller wants + * validated against the table. */ + aTupleVals := {} + IF Len( aUqCols ) == 1 + AAdd( aTupleVals, xValue ) + ELSE + dbSelectArea( nCallerWA ) + FOR j := 1 TO Len( aUqCols ) + nFPos := FieldPos( aUqCols[ j ] ) + IF nFPos > 0 + AAdd( aTupleVals, FieldGet( nFPos ) ) + ELSE + AAdd( aTupleVals, NIL ) + ENDIF + NEXT + ENDIF + /* Composite SQL standard: a NULL anywhere in the tuple means + * "no constraint" (the row is not equal to any other row, + * including another all-NULL or partial-NULL row). */ + lDup := .F. + FOR j := 1 TO Len( aTupleVals ) + IF aTupleVals[ j ] == NIL + lDup := .T. /* re-purpose as "skip flag" */ + EXIT + ENDIF + NEXT + IF lDup + LOOP + ENDIF + + /* Pre-resolve column positions on the scan area so we don't + * call FieldPos in the inner loop. */ + dbSelectArea( nWA ) + aTupleFPos := {} + FOR j := 1 TO Len( aUqCols ) + AAdd( aTupleFPos, FieldPos( aUqCols[ j ] ) ) + NEXT + + lDup := .F. + dbGoTop() + DO WHILE ! Eof() + /* Skip the row we're currently editing (self-match) and any + * row marked for deletion. The rolled-back INSERT case + * leaves an appended record with Deleted()==.T. that would + * otherwise mask a legitimate later write. */ + IF RecNo() != nExcludeRec .AND. ! Deleted() + lMatch := .T. + FOR j := 1 TO Len( aUqCols ) + IF aTupleFPos[ j ] == 0 .OR. ; + ! SqlCmpEq( FieldGet( aTupleFPos[ j ] ), aTupleVals[ j ] ) + lMatch := .F. + EXIT ENDIF - dbSkip() - ENDDO - dbSelectArea( nSaved ) - IF lDup - RETURN .F. + NEXT + IF lMatch + lDup := .T. + EXIT ENDIF ENDIF + dbSkip() + ENDDO + IF lDup + IF lOpenedHere + dbCloseArea() + ENDIF + dbSelectArea( nSaved ) + RETURN .F. ENDIF NEXT + IF lOpenedHere + dbSelectArea( nWA ) + dbCloseArea() + ENDIF dbSelectArea( nSaved ) RETURN .T. @@ -934,12 +1463,21 @@ FUNCTION SqlValidateFKRecord( cTable, cCol, xValue ) LOCAL hC, i, cParentTable, cParentCol LOCAL nSaved, nParentWA, nFPos, lFound + LOCAL nSavedRecno, lSelfFK hC := SqlLoadConstraints( cTable ) IF Len( hC[ "fk" ] ) == 0 RETURN .T. ENDIF + /* SQL standard: a NULL FK value always satisfies the constraint + * (no parent row is required for an unknown reference). Without + * this, ON UPDATE SET NULL cascades against this child rolled + * back the inner UPDATE because the parent scan never finds NULL. */ + IF xValue == NIL + RETURN .T. + ENDIF + FOR i := 1 TO Len( hC[ "fk" ] ) IF Upper( hC[ "fk" ][ i ][ 1 ] ) == Upper( cCol ) cParentTable := hC[ "fk" ][ i ][ 2 ] @@ -947,17 +1485,42 @@ FUNCTION SqlValidateFKRecord( cTable, cCol, xValue ) nSaved := Select() - /* Open parent table in a separate workarea */ - nParentWA := Select( Upper( cParentTable ) ) - IF nParentWA == 0 + /* Self-FK: parent area IS the in-flight INSERT/UPDATE area. + * Reusing it means dbGoTop / scan clobber the caller's + * record pointer, AND multi-row INSERT can't see prior + * tuples that are still in the dirty buffer. Force a + * separate `__FK_
` workarea on the same DBF after + * dbCommit so the scan reads the freshly-flushed disk + * state without disturbing the caller's area. */ + lSelfFK := ( Upper( cTable ) == Upper( cParentTable ) ) + IF lSelfFK + nSavedRecno := RecNo() + dbCommit() BEGIN SEQUENCE dbUseArea( .T., "DBFNTX", Lower( cParentTable ) + ".dbf", ; "__FK_" + Upper( cParentTable ), .T., .T. ) nParentWA := Select( "__FK_" + Upper( cParentTable ) ) RECOVER + /* Couldn't open second handle (filesystem lock etc.) — + * fall back to in-area scan with RECNO restore. */ dbSelectArea( nSaved ) - RETURN .T. /* Cannot open parent — skip validation */ + nParentWA := nSaved END SEQUENCE + ELSE + /* Cross-table FK: try the existing alias first, open a + * fresh `__FK_
` area only if it isn't already in + * a workarea. */ + nParentWA := Select( Upper( cParentTable ) ) + IF nParentWA == 0 + BEGIN SEQUENCE + dbUseArea( .T., "DBFNTX", Lower( cParentTable ) + ".dbf", ; + "__FK_" + Upper( cParentTable ), .T., .T. ) + nParentWA := Select( "__FK_" + Upper( cParentTable ) ) + RECOVER + dbSelectArea( nSaved ) + RETURN .T. /* Cannot open parent — skip validation */ + END SEQUENCE + ENDIF ENDIF /* Scan parent for matching value */ @@ -983,6 +1546,14 @@ FUNCTION SqlValidateFKRecord( cTable, cCol, xValue ) dbSelectArea( nSaved ) + /* Self-FK fallback path scanned the in-flight area; restore + * the caller's record pointer. (When we opened a second + * `__FK_` area the caller's RECNO never moved, so this is + * a no-op in that branch.) */ + IF lSelfFK .AND. nSavedRecno > 0 .AND. RecNo() != nSavedRecno + dbGoto( nSavedRecno ) + ENDIF + IF ! lFound RETURN .F. ENDIF @@ -992,6 +1563,236 @@ FUNCTION SqlValidateFKRecord( cTable, cCol, xValue ) RETURN .T. +/* + * Scan every sibling .fsc file for FK constraints that reference + * cParentTable (the one we're about to modify). Returns an array of + * { cChildTable, cChildCol, cParentCol, cOnDelete } entries — one per + * referencing FK. Used by SqlEnforceDeleteRefs to decide whether a + * DELETE on the parent is blocked (RESTRICT), cascades, or leaves + * children orphaned (SET NULL). + * + * The FK side of the relationship lives in the CHILD table's .fsc + * file, so there's no cross-reference index available — we pay a + * linear Directory() scan. For per-record DELETE that's fine: the + * result is cached per RunDelete by its caller. + */ +FUNCTION SqlFindReferencingFKs( cParentTable ) + + LOCAL aFiles, i, j, aRefs, cChildBase, cFsc + LOCAL aResult := {} + LOCAL cPT := Upper( cParentTable ) + + aFiles := Directory( "*.fsc" ) + FOR i := 1 TO Len( aFiles ) + cFsc := aFiles[ i ][ 1 ] + IF Len( cFsc ) <= 4 + LOOP + ENDIF + cChildBase := Left( cFsc, Len( cFsc ) - 4 ) + /* Self-FK is a legitimate pattern (org chart, tree). Include + * the parent's own .fsc in the scan so self-referencing rows + * (`parent_id REFERENCES n(id)`) get the same CASCADE/SET NULL/ + * RESTRICT treatment as cross-table FKs. The recursive cascade + * is guarded by the nested DELETE/UPDATE itself: each child + * DELETE re-enters the same enforcement path which deletes + * its own children, terminating naturally when no more rows + * match the WHERE. A bona-fide cycle (row pointing at itself + * or a loop in the parent chain) would already loop in any + * RDBMS — that's a data-integrity issue, not a bug here. */ + aRefs := SqlLoadConstraints( cChildBase ) + FOR j := 1 TO Len( aRefs[ "fk" ] ) + IF Upper( aRefs[ "fk" ][ j ][ 2 ] ) == cPT + AAdd( aResult, { cChildBase, ; + aRefs[ "fk" ][ j ][ 1 ], ; /* child col */ + aRefs[ "fk" ][ j ][ 3 ], ; /* parent col */ + aRefs[ "fk" ][ j ][ 4 ], ; /* on-delete */ + iif( Len( aRefs[ "fk" ][ j ] ) >= 5, ; + aRefs[ "fk" ][ j ][ 5 ], "RESTRICT" ) } ) /* on-update */ + ENDIF + NEXT + NEXT + +RETURN aResult + + +/* + * Enforce referential integrity for a single DELETE. Called from + * RunDelete's PRG path with the current-record pointer already + * positioned on the row about to be removed. Returns a hash: + * { "ok" => .T./.F., "error" => "..." } + * On CASCADE the child rows are removed first (via a nested + * five_SQL DELETE) so the caller can proceed. On SET NULL the + * child rows get their FK column nulled out. On RESTRICT (the + * default) any existing child row blocks the delete. + * + * aRefs is the output of SqlFindReferencingFKs — passed in so the + * caller can compute it once per query instead of per row. + */ +/* + * SqlGetSingleColPK — returns the single-column PK / UNIQUE name + * recorded in `
.fsc`, or "" when none exists or the only + * UNIQUE entry is a composite tuple. Used by cascade delete to + * pre-collect child rows by an addressable key, avoiding the + * same-area record-pointer race that breaks self-FK cascade. + */ +FUNCTION SqlGetSingleColPK( cTable ) + + LOCAL hC, i, cEntry + + hC := SqlLoadConstraints( cTable ) + IF Len( hC[ "unique" ] ) == 0 + RETURN "" + ENDIF + FOR i := 1 TO Len( hC[ "unique" ] ) + cEntry := hC[ "unique" ][ i ] + IF ! ( "," $ cEntry ) /* single column — usable */ + RETURN AllTrim( cEntry ) + ENDIF + NEXT + +RETURN "" + + +FUNCTION SqlEnforceDeleteRefs( cParentTable, aRefs ) + + LOCAL i, j, nFPos, xVal, cQry, aRes, cAction + LOCAL cChildPK, aChildKeys + + IF aRefs == NIL .OR. Len( aRefs ) == 0 + RETURN { "ok" => .T. } + ENDIF + + FOR i := 1 TO Len( aRefs ) + /* Parent column's current value */ + nFPos := FieldPos( aRefs[ i ][ 3 ] ) + IF nFPos <= 0 + LOOP + ENDIF + xVal := FieldGet( nFPos ) + cAction := aRefs[ i ][ 4 ] + + DO CASE + CASE cAction == "CASCADE" + /* Self-FK guard: nested DELETE on the child re-enters the + * same workarea (parent area = child area) and dbSkip's + * record pointer races with the cascade scan, dropping + * every child past the first match. Pre-collect each + * child's PK via a read-only SELECT, then run one + * single-row DELETE per PK so each cascade is isolated. + * Falls back to the original multi-row nested DELETE + * when the child has no PK we can address. */ + cChildPK := SqlGetSingleColPK( aRefs[ i ][ 1 ] ) + IF ! Empty( cChildPK ) + aChildKeys := five_SQL( "SELECT " + cChildPK + " FROM " + ; + aRefs[ i ][ 1 ] + " WHERE " + ; + aRefs[ i ][ 2 ] + " = " + SqlQuoteVal( xVal ) ) + IF ValType( aChildKeys ) == "A" .AND. Len( aChildKeys ) >= 2 .AND. ; + ValType( aChildKeys[ 2 ] ) == "A" + FOR j := 1 TO Len( aChildKeys[ 2 ] ) + five_SQL( "DELETE FROM " + aRefs[ i ][ 1 ] + ; + " WHERE " + cChildPK + " = " + ; + SqlQuoteVal( aChildKeys[ 2 ][ j ][ 1 ] ) ) + NEXT + ENDIF + ELSE + cQry := "DELETE FROM " + aRefs[ i ][ 1 ] + " WHERE " + ; + aRefs[ i ][ 2 ] + " = " + SqlQuoteVal( xVal ) + aRes := five_SQL( cQry ) + ENDIF + + CASE cAction == "SETNULL" + cQry := "UPDATE " + aRefs[ i ][ 1 ] + " SET " + ; + aRefs[ i ][ 2 ] + " = NULL WHERE " + ; + aRefs[ i ][ 2 ] + " = " + SqlQuoteVal( xVal ) + aRes := five_SQL( cQry ) + + OTHERWISE + /* RESTRICT / NO ACTION / anything unrecognised → block + * the delete if any child row still references us. */ + cQry := "SELECT COUNT(*) AS cnt FROM " + aRefs[ i ][ 1 ] + ; + " WHERE " + aRefs[ i ][ 2 ] + " = " + SqlQuoteVal( xVal ) + aRes := five_SQL( cQry ) + IF ValType( aRes ) == "A" .AND. Len( aRes ) >= 2 .AND. ; + Len( aRes[ 2 ] ) > 0 .AND. Len( aRes[ 2 ][ 1 ] ) > 0 + IF SqlCoerceNum( aRes[ 2 ][ 1 ][ 1 ] ) > 0 + RETURN { "ok" => .F., ; + "error" => "FOREIGN KEY RESTRICT: " + aRefs[ i ][ 1 ] + ; + "." + aRefs[ i ][ 2 ] + " still references " + ; + cParentTable + "." + aRefs[ i ][ 3 ] } + ENDIF + ENDIF + ENDCASE + NEXT + +RETURN { "ok" => .T. } + + +/* + * Enforce referential integrity for a single UPDATE that changed the + * parent-key value of `cParentTable`. Mirrors SqlEnforceDeleteRefs but + * for ON UPDATE. Caller passes: + * - aRefs: SqlFindReferencingFKs(cParent) output (each entry has + * {child_table, child_col, parent_col, on_delete, on_update}) + * - hChanged: hash whose keys are upper-case parent column names + * whose value changed; values are { old, new }. + * Returns { "ok" => .T./.F., "error" => "..." }. + * + * CASCADE: child rows' FK column gets the new parent value. + * SETNULL: child rows' FK column nulled. + * RESTRICT / NO ACTION: blocks the update if any child row references + * the OLD value. + */ +FUNCTION SqlEnforceUpdateRefs( cParentTable, aRefs, hChanged ) + + LOCAL i, cQry, aRes, cAction, cPCol, axOldNew + + IF aRefs == NIL .OR. Len( aRefs ) == 0 .OR. hChanged == NIL + RETURN { "ok" => .T. } + ENDIF + + FOR i := 1 TO Len( aRefs ) + cPCol := Upper( aRefs[ i ][ 3 ] ) + IF ! hb_HHasKey( hChanged, cPCol ) + LOOP + ENDIF + axOldNew := hChanged[ cPCol ] + cAction := iif( Len( aRefs[ i ] ) >= 5, aRefs[ i ][ 5 ], "RESTRICT" ) + + DO CASE + CASE cAction == "CASCADE" + cQry := "UPDATE " + aRefs[ i ][ 1 ] + " SET " + ; + aRefs[ i ][ 2 ] + " = " + SqlQuoteVal( axOldNew[ 2 ] ) + ; + " WHERE " + aRefs[ i ][ 2 ] + " = " + SqlQuoteVal( axOldNew[ 1 ] ) + aRes := five_SQL( cQry ) + + CASE cAction == "SETNULL" + cQry := "UPDATE " + aRefs[ i ][ 1 ] + " SET " + ; + aRefs[ i ][ 2 ] + " = NULL WHERE " + ; + aRefs[ i ][ 2 ] + " = " + SqlQuoteVal( axOldNew[ 1 ] ) + aRes := five_SQL( cQry ) + + OTHERWISE + /* RESTRICT / NO ACTION / unrecognised: block if any child row + * still references the old parent value. */ + cQry := "SELECT COUNT(*) AS cnt FROM " + aRefs[ i ][ 1 ] + ; + " WHERE " + aRefs[ i ][ 2 ] + " = " + SqlQuoteVal( axOldNew[ 1 ] ) + aRes := five_SQL( cQry ) + IF ValType( aRes ) == "A" .AND. Len( aRes ) >= 2 .AND. ; + Len( aRes[ 2 ] ) > 0 .AND. Len( aRes[ 2 ][ 1 ] ) > 0 + IF SqlCoerceNum( aRes[ 2 ][ 1 ][ 1 ] ) > 0 + RETURN { "ok" => .F., ; + "error" => "FOREIGN KEY RESTRICT (ON UPDATE): " + ; + aRefs[ i ][ 1 ] + "." + aRefs[ i ][ 2 ] + ; + " still references " + cParentTable + ; + "." + aRefs[ i ][ 3 ] } + ENDIF + ENDIF + ENDCASE + NEXT + +RETURN { "ok" => .T. } + + /* * SqlReplaceWord: Replace a whole-word occurrence of cWord in cText with cReplace. * Avoids replacing "ID" inside "AND" etc. diff --git a/_FiveSql2/src/TSqlExecutor.prg b/_FiveSql2/src/TSqlExecutor.prg index 2e75f46..f7862fb 100644 --- a/_FiveSql2/src/TSqlExecutor.prg +++ b/_FiveSql2/src/TSqlExecutor.prg @@ -31,11 +31,32 @@ STATIC s_lCteDiskSeen := .F. * "where_pc" => pcWhere | NIL, * "compiled" => .T. } * RunUpdate populates on first hit, subsequent calls reuse. Compiled - * pcode depends on the target table's field layout; since the plan - * cache key already uniquely identifies the SQL template (same schema - * every call), the cache is sound. */ + * pcode depends on the target table's field layout; the plan-cache + * key carries a schema-version prefix (SqlSchemaVer) so DDL + * (ALTER / DROP / CREATE TABLE|INDEX|VIEW) invalidates this cache in + * one bump without iterating the hash. The DML cache piggybacks on + * the same cap as the plan cache (SQL_PLAN_CACHE_MAX from TFiveSQL). + * On overflow (size >= cap) the whole hash is wiped — coarser than + * LRU but we don't have insertion-order tracking on Five hashes. */ +#define SQL_DML_PCODE_CACHE_MAX 1000 STATIC s_hDmlPcodeCache := { => } + +FUNCTION SqlDmlPcodeCacheReset() + s_hDmlPcodeCache := { => } +RETURN NIL + +/* Schema version — bumped by every DDL completion. Used as a prefix + * on all SQL plan-cache / DML-pcode-cache keys so any DDL invalidates + * every plan that referenced the pre-DDL schema, without walking the + * hash. Old entries become unreachable (never looked up again) and + * are collected at process exit; DDL is rare enough in the target + * workload that the bounded leak is acceptable. + * + * STATIC is file-scoped, so cross-file access goes through the + * SqlSchemaVer / SqlBumpSchemaVer top-level functions below. */ +STATIC s_nSchemaVer := 0 + CLASS TSqlExecutor DATA hQuery @@ -53,6 +74,7 @@ CLASS TSqlExecutor DATA bRowBlock /* optional code block — receives SELECT cols as params */ DATA aFetchCache /* pre-bound {nWA, nFPos} per SELECT expression, or NIL */ DATA cCacheKey /* plan-cache key set by TFiveSQL; used for DML pcode cache */ + DATA aWrappedWindowCols INIT {} /* SELECT-col indices whose expr wraps ND_WINDOW */ DATA hSubCorrCache INIT { => } /* per-outer-key subquery result cache */ DATA aSubCacheSlots INIT {} /* list of {xSubNode, {id, aFreeVars}} */ DATA nSubCacheSeq INIT 0 /* monotonic ID for subqueries */ @@ -63,6 +85,7 @@ CLASS TSqlExecutor METHOD New( hQuery, aParams ) CONSTRUCTOR METHOD Run() + METHOD RunImpl() METHOD RunSelect() METHOD RunInsert() METHOD RunUpdate() @@ -73,7 +96,11 @@ CLASS TSqlExecutor METHOD EvalExpr( xNode ) METHOD Resolve( cRef ) METHOD FindWA( cAlias ) - METHOD JoinRecurse( aJoins, nIdx, xWhere, aRE, aRows, hHashTbl ) + METHOD JoinRecurse( aJoins, nIdx, xWhere, aRE, aRows, hHashTbl, aPushByLevel ) + METHOD SplitAndClauses( xE, aOut ) + METHOD BuildAliasLevelMap( aJoins ) + METHOD ClauseMaxLevel( xClause, hAliasToLevel, nDefault ) + METHOD EvalPushedAtLevel( aPushByLevel, nIdx ) METHOD RightJoinPass( aJoins, nIdx, aRE, aRows ) METHOD FetchRowNull( aRE, cInnerAlias ) METHOD ColBelongsTo( cColRef, cAlias ) @@ -81,8 +108,10 @@ CLASS TSqlExecutor METHOD PopOuter() METHOD ResolveFromOuter( cRef, cTblAlias, cField, lFound ) METHOD MakeError( nCode, cMsg ) - METHOD HashJoin( nInnerWA, cInnerField, cOuterCol, xOnCond, aJoins, nIdx, xWhere, aRE, aRows, hHashTbl ) + METHOD HashJoin( nInnerWA, cInnerField, cOuterCol, xOnCond, aJoins, nIdx, xWhere, aRE, aRows, hHashTbl, aPushByLevel ) METHOD CacheSubquery( xSubExpr ) + METHOD SnapshotAreaRecNos() + METHOD RestoreAreaRecNos( aSnap ) METHOD MaterializeCTE( aCTE ) METHOD MaterializeRecursiveCTE( aCTE ) METHOD ApplyWindowFunctions( aRows, aFN, aCols ) @@ -94,6 +123,8 @@ CLASS TSqlExecutor METHOD TryCompileWhere( xWhere ) METHOD SqlExprToPrg( xNode ) METHOD BuildFetchCache( aExprs ) + METHOD PreResolveColumns( xNode ) + METHOD PreResolveCol( xNode ) METHOD SubqueryCached( xSubNode ) METHOD CollectFreeVars( hQ ) METHOD CollectExprFreeVars( xE, aLocalAliases, aFree ) @@ -136,12 +167,32 @@ RETURN { { "__error__" }, { { nCode, cMsg, "" } } } METHOD Run() CLASS TSqlExecutor - LOCAL cType, aT, nP2 + LOCAL aResult, lOldDel IF ::hQuery == NIL RETURN ::MakeError( SQL_ERR_SYNTAX, "Empty or invalid SQL" ) ENDIF + /* Save caller's SET DELETED state, force ON for the duration of + * this statement, restore on exit. SQL semantics treat marked- + * deleted rows as absent; forcing it here is required regardless + * of caller state. Restoring prevents a five_SQL() call from + * silently flipping the caller's setting. + * Set index 8 = Harbour SetDeleted slot (HB_SET_DELETED). Literal + * used because Five doesn't ship a set.ch with _SET_DELETED macro. */ + lOldDel := Set( 8, .T. ) + + aResult := ::RunImpl() + + Set( 8, lOldDel ) + +RETURN aResult + + +METHOD RunImpl() CLASS TSqlExecutor + + LOCAL cType, aT, nP2 + cType := ::hQuery[ "type" ] DO CASE @@ -162,6 +213,13 @@ METHOD Run() CLASS TSqlExecutor RETURN ::oDDL:CreateIndex( aT, nP2 ) ELSEIF ::oDDL:DDL_IsKW( aT, nP2, "VIEW" ) RETURN ::oDDL:CreateView( aT, nP2 ) + ELSEIF ::oDDL:DDL_IsKW( aT, nP2, "OR" ) .AND. ; + ::oDDL:DDL_IsKW( aT, nP2 + 1, "REPLACE" ) .AND. ; + ::oDDL:DDL_IsKW( aT, nP2 + 2, "VIEW" ) + /* `CREATE OR REPLACE VIEW v AS ...` — the OR/REPLACE prefix + * stays in the token stream so CreateView's own consumer + * picks them up. */ + RETURN ::oDDL:CreateView( aT, nP2 ) ENDIF RETURN ::MakeError( SQL_ERR_UNSUPPORTED, "CREATE: unsupported object" ) CASE cType == "DROP" @@ -304,6 +362,127 @@ METHOD FindWA( cAlias ) CLASS TSqlExecutor RETURN 0 +/* PreResolveColumns walks an expression tree and caches {nWA, nFPos} + * on every ND_COL it can statically resolve. Called once per RunSelect + * entry (after tables are open and ::aTables is populated) so the + * per-row EvalExpr hot path can short-circuit Resolve()'s string + * parsing + alias lookup on every ND_COL visit. + * + * ND_SUB subtrees are intentionally skipped — they open their own + * scope at execution time (correlated/uncorrelated subqueries), so + * caching here with the outer query's workarea map would be wrong. + * + * Idempotent: re-invocation overwrites xNode[5] with the current + * resolution, so table reopens between runs (which may shift nWA) + * don't produce stale caches. */ +METHOD PreResolveColumns( xNode ) CLASS TSqlExecutor + LOCAL i, j, xChild, nHi + + IF xNode == NIL .OR. ValType( xNode ) != "A" .OR. Len( xNode ) < 1 + RETURN Self + ENDIF + IF ValType( xNode[ 1 ] ) != "N" + RETURN Self + ENDIF + + IF xNode[ 1 ] == ND_COL + ::PreResolveCol( xNode ) + RETURN Self + ENDIF + + IF xNode[ 1 ] == ND_SUB + /* Subquery: leave its columns to resolve at its own scope. */ + RETURN Self + ENDIF + + /* Recurse into child slots 3..5. Each slot may be either: + * - a single node (array whose [1] is a numeric kind) + * - an array of nodes (CASE arms, FN args, LIST values) */ + nHi := Len( xNode ) + IF nHi > 5 + nHi := 5 + ENDIF + FOR i := 3 TO nHi + xChild := xNode[ i ] + IF ValType( xChild ) != "A" .OR. Len( xChild ) == 0 + LOOP + ENDIF + IF ValType( xChild[ 1 ] ) == "N" + ::PreResolveColumns( xChild ) + ELSE + FOR j := 1 TO Len( xChild ) + IF ValType( xChild[ j ] ) == "A" + ::PreResolveColumns( xChild[ j ] ) + ENDIF + NEXT + ENDIF + NEXT + +RETURN Self + + +/* PreResolveCol — attempt to cache {nWA, nFPos} on a single ND_COL + * node. Leaves xNode[5] at NIL when resolution can't be determined + * statically (qualified to an unknown alias, no matching field, + * FIELD-list reference awaiting outer context) — EvalExpr then falls + * back to the full Resolve() path at runtime. */ +METHOD PreResolveCol( xNode ) CLASS TSqlExecutor + LOCAL cRef, cField, cTblAlias, nDot, nWA, nFPos, i, cA, nSavedArea + + xNode[ 5 ] := NIL /* clear prior cache */ + + cRef := xNode[ 2 ] + IF ValType( cRef ) != "C" .OR. cRef == "*" + RETURN Self + ENDIF + + nDot := At( ".", cRef ) + IF nDot > 0 + cTblAlias := Upper( Left( cRef, nDot - 1 ) ) + cField := Upper( SubStr( cRef, nDot + 1 ) ) + ELSE + cField := Upper( cRef ) + cTblAlias := "" + ENDIF + + nSavedArea := Select() + + IF ! Empty( cTblAlias ) + nWA := ::FindWA( cTblAlias ) + IF nWA > 0 + dbSelectArea( nWA ) + nFPos := FieldPos( cField ) + dbSelectArea( nSavedArea ) + IF nFPos > 0 + xNode[ 5 ] := { nWA, nFPos } + ENDIF + ENDIF + RETURN Self + ENDIF + + /* Unqualified: first table where field exists wins (matches + * Resolve's iteration order). */ + FOR i := 1 TO Len( ::aTables ) + cA := ::aTables[ i ][ 2 ] + IF Empty( cA ) + cA := ::aTables[ i ][ 1 ] + ENDIF + nWA := Select( cA ) + IF nWA > 0 + dbSelectArea( nWA ) + nFPos := FieldPos( cField ) + IF nFPos > 0 + dbSelectArea( nSavedArea ) + xNode[ 5 ] := { nWA, nFPos } + RETURN Self + ENDIF + ENDIF + NEXT + dbSelectArea( nSavedArea ) + +RETURN Self + + METHOD Resolve( cRef ) CLASS TSqlExecutor LOCAL cField, cTblAlias, nDot, nWA, nFPos, xVal, nSavedArea @@ -439,7 +618,7 @@ METHOD EvalExpr( xNode ) CLASS TSqlExecutor LOCAL xL, xR, cOp, xVal, aArgs, aVals, i, xResult, nPI LOCAL aCases, xElse, xCond - LOCAL aSubResult, xHi, nSavedWA + LOCAL aSubResult, xHi, nSavedWA, lSawNull IF xNode == NIL RETURN NIL @@ -453,6 +632,15 @@ METHOD EvalExpr( xNode ) CLASS TSqlExecutor RETURN NIL CASE xNode[ 1 ] == ND_COL + /* Fast path: PreResolveColumns cached {nWA, nFPos} at xNode[5]. + * Skips the Resolve() string-parse + alias-lookup on every row. */ + IF xNode[ 5 ] != NIL + nSavedWA := Select() + dbSelectArea( xNode[ 5 ][ 1 ] ) + xVal := FieldGet( xNode[ 5 ][ 2 ] ) + dbSelectArea( nSavedWA ) + RETURN xVal + ENDIF RETURN ::Resolve( xNode[ 2 ] ) CASE xNode[ 1 ] == ND_PAR @@ -467,6 +655,11 @@ METHOD EvalExpr( xNode ) CLASS TSqlExecutor cOp := xNode[ 2 ] xL := ::EvalExpr( xNode[ 3 ] ) IF cOp == "NOT" + /* SQL three-valued logic: NOT(NULL) = NULL. + * Critical for NOT IN with a NULL in the list. */ + IF xL == NIL + RETURN NIL + ENDIF IF ValType( xL ) == "L" RETURN ! xL ENDIF @@ -502,18 +695,41 @@ METHOD EvalExpr( xNode ) CLASS TSqlExecutor RETURN SqlIsTrue( xL ) .OR. SqlIsTrue( xR ) ENDIF - /* IN operator */ + /* IN operator — SQL three-valued logic. + * + * x IN (a, b, ...) TRUE if any element equals x + * NULL if x is NULL, or no equal found + * AND the list contains a NULL + * FALSE if no equal found and list is + * fully non-NULL + * + * NOT IN is built by the parser as NOT(IN(...)), so the NULL + * propagates through the unary NOT (NIL → NIL via SqlIsTrue + * gate in the WHERE driver, which filters NULL out — the + * SQL-correct behaviour). This matters when a subquery in the + * list contains a NULL: `x NOT IN (SELECT y FROM t)` must drop + * candidate rows whenever any y is NULL, instead of letting + * them through as TRUE. + */ IF cOp == "IN" xL := ::EvalExpr( xNode[ 3 ] ) xR := xNode[ 4 ] + lSawNull := ( xL == NIL ) IF xR != NIL .AND. xR[ 1 ] == ND_LIST aVals := xR[ 2 ] FOR i := 1 TO Len( aVals ) xVal := ::EvalExpr( aVals[ i ] ) - IF SqlCmpEq( xL, xVal ) + IF xVal == NIL + lSawNull := .T. + LOOP + ENDIF + IF xL != NIL .AND. SqlCmpEq( xL, xVal ) RETURN .T. ENDIF NEXT + IF lSawNull + RETURN NIL + ENDIF RETURN .F. ENDIF IF xR != NIL .AND. xR[ 1 ] == ND_SUB .AND. xR[ 2 ] != NIL @@ -524,12 +740,20 @@ METHOD EvalExpr( xNode ) CLASS TSqlExecutor IF ValType( aSubResult ) == "A" .AND. Len( aSubResult ) >= 2 .AND. ; ValType( aSubResult[ 2 ] ) == "A" FOR i := 1 TO Len( aSubResult[ 2 ] ) - IF Len( aSubResult[ 2 ][ i ] ) > 0 .AND. ; - SqlCmpEq( xL, aSubResult[ 2 ][ i ][ 1 ] ) - RETURN .T. + IF Len( aSubResult[ 2 ][ i ] ) > 0 + IF aSubResult[ 2 ][ i ][ 1 ] == NIL + lSawNull := .T. + LOOP + ENDIF + IF xL != NIL .AND. SqlCmpEq( xL, aSubResult[ 2 ][ i ][ 1 ] ) + RETURN .T. + ENDIF ENDIF NEXT ENDIF + IF lSawNull + RETURN NIL + ENDIF RETURN .F. ENDIF RETURN .F. @@ -553,6 +777,19 @@ METHOD EvalExpr( xNode ) CLASS TSqlExecutor xL := SqlCoerceForCmp( xL ) xR := SqlCoerceForCmp( xR ) + /* SQL three-valued logic: any comparison with NULL is UNKNOWN, + * which the WHERE driver SqlIsTrue() treats as "does not match" + * (drops the row). Previously `v <> 10` on a row where v was + * NULL returned .T. because `! SqlCmpEq(NIL, 10)` = `! .F.` = + * `.T.`. That's wrong: `NULL <> anything` must be NULL. + * Applies to =, <>, <, <=, >, >=. IS NULL / IS NOT NULL / + * IS DISTINCT FROM handle NULLs explicitly elsewhere. */ + IF ( cOp == "=" .OR. cOp == "==" .OR. cOp == "<>" .OR. cOp == "!=" .OR. ; + cOp == "<" .OR. cOp == ">" .OR. cOp == "<=" .OR. cOp == ">=" ) .AND. ; + ( xL == NIL .OR. xR == NIL ) + RETURN NIL + ENDIF + IF cOp == "=" .OR. cOp == "==" RETURN SqlCmpEq( xL, xR ) ENDIF @@ -572,6 +809,32 @@ METHOD EvalExpr( xNode ) CLASS TSqlExecutor RETURN SqlCmpEq( xL, xR ) .OR. SqlCmpLt( xR, xL ) ENDIF + /* SQL:2003 IS DISTINCT FROM / IS NOT DISTINCT FROM — NULL-safe + * compare. `a IS DISTINCT FROM b` is .T. iff the values differ + * (treating NULL as a distinct value of its own); `a IS NOT + * DISTINCT FROM b` is its negation. Unlike `=` / `<>`, they + * never return UNKNOWN — the parser parsed these as their own + * ND_BIN op codes but EvalExpr had no handler so they fell + * through to RETURN NIL → WHERE always dropped the row. */ + IF cOp == "IS DISTINCT FROM" + IF xL == NIL .AND. xR == NIL + RETURN .F. + ENDIF + IF xL == NIL .OR. xR == NIL + RETURN .T. + ENDIF + RETURN ! SqlCmpEq( xL, xR ) + ENDIF + IF cOp == "IS NOT DISTINCT FROM" + IF xL == NIL .AND. xR == NIL + RETURN .T. + ENDIF + IF xL == NIL .OR. xR == NIL + RETURN .F. + ENDIF + RETURN SqlCmpEq( xL, xR ) + ENDIF + IF cOp == "LIKE" IF xNode[ 5 ] != NIL RETURN SqlLikeMatch( SqlCoerceStr( xL ), SqlCoerceStr( xR ), SqlCoerceStr( ::EvalExpr( xNode[ 5 ] ) ) ) @@ -579,25 +842,66 @@ METHOD EvalExpr( xNode ) CLASS TSqlExecutor RETURN SqlLikeMatch( SqlCoerceStr( xL ), SqlCoerceStr( xR ) ) ENDIF + /* Arithmetic + CONCAT: SQL NULL propagation. Any NULL operand + * yields NULL (except the Harbour-style string `+` between two + * non-NULL C values, which the caller explicitly relied on for + * name-concat idioms). Division by zero → NULL per SQL spec + * rather than silently returning 0. */ IF cOp == "+" + IF xL == NIL .OR. xR == NIL + IF ValType( xL ) == "C" .AND. ValType( xR ) == "C" + RETURN xL + xR + ENDIF + RETURN NIL + ENDIF IF ValType( xL ) == "C" .AND. ValType( xR ) == "C" RETURN xL + xR ENDIF + /* Harbour Date arithmetic: Date + N → Date (N days later). + * Without this branch, SqlCoerceNum collapsed the date to 0 + * and the projection returned the raw integer offset + * (`d + 7` came back as `7`). N + Date is symmetric. */ + IF ValType( xL ) == "D" .AND. ValType( xR ) == "N" + RETURN xL + xR + ENDIF + IF ValType( xL ) == "N" .AND. ValType( xR ) == "D" + RETURN xR + xL + ENDIF RETURN SqlCoerceNum( xL ) + SqlCoerceNum( xR ) ENDIF IF cOp == "-" + IF xL == NIL .OR. xR == NIL + RETURN NIL + ENDIF + /* Date - N → Date (N days earlier); Date - Date → N (day + * gap). Both reduced to 0 - 0 = 0 before this branch. */ + IF ValType( xL ) == "D" .AND. ValType( xR ) == "N" + RETURN xL - xR + ENDIF + IF ValType( xL ) == "D" .AND. ValType( xR ) == "D" + RETURN xL - xR + ENDIF RETURN SqlCoerceNum( xL ) - SqlCoerceNum( xR ) ENDIF IF cOp == "*" + IF xL == NIL .OR. xR == NIL + RETURN NIL + ENDIF RETURN SqlCoerceNum( xL ) * SqlCoerceNum( xR ) ENDIF IF cOp == "/" + IF xL == NIL .OR. xR == NIL + RETURN NIL + ENDIF IF SqlCoerceNum( xR ) != 0 RETURN SqlCoerceNum( xL ) / SqlCoerceNum( xR ) ENDIF - RETURN 0 + RETURN NIL ENDIF IF cOp == "||" + IF xL == NIL .OR. xR == NIL + RETURN NIL + ENDIF RETURN SqlCoerceStr( xL ) + SqlCoerceStr( xR ) ENDIF @@ -607,6 +911,15 @@ METHOD EvalExpr( xNode ) CLASS TSqlExecutor xL := ::EvalExpr( xNode[ 3 ] ) xR := ::EvalExpr( xNode[ 4 ] ) xHi := ::EvalExpr( xNode[ 5 ] ) + /* SQL 3-value logic: any NULL operand → NULL (UNKNOWN). The + * WHERE driver SqlIsTrue() drops the row. Without this guard + * `v NOT BETWEEN 10 AND 30` returned rows where v was NULL + * because the comparison fell through to .F. → !.F. = .T.. + * The standard says NULL BETWEEN/NOT-BETWEEN must always be + * UNKNOWN regardless of the bounds. */ + IF xL == NIL .OR. xR == NIL .OR. xHi == NIL + RETURN NIL + ENDIF xL := SqlCoerceForCmp( xL ) xR := SqlCoerceForCmp( xR ) xHi := SqlCoerceForCmp( xHi ) @@ -684,9 +997,31 @@ METHOD EvalExpr( xNode ) CLASS TSqlExecutor * those free variables. Non-correlated subqueries reduce * to a trivial single-entry cache. */ aSubResult := ::SubqueryCached( xNode ) + /* Skip the `__error__` envelope — extracting aResult[2][1][1] + * blindly would surface the numeric error code (e.g. 1005 = + * SQL_ERR_LOCKED) as the scalar value, silently passing + * garbage into the WHERE comparison. */ + IF ValType( aSubResult ) == "A" .AND. Len( aSubResult ) >= 1 .AND. ; + ValType( aSubResult[ 1 ] ) == "A" .AND. Len( aSubResult[ 1 ] ) >= 1 .AND. ; + aSubResult[ 1 ][ 1 ] == "__error__" + RETURN NIL + ENDIF IF ValType( aSubResult ) == "A" .AND. Len( aSubResult ) >= 2 .AND. ; ValType( aSubResult[ 2 ] ) == "A" .AND. Len( aSubResult[ 2 ] ) > 0 .AND. ; Len( aSubResult[ 2 ][ 1 ] ) > 0 + /* SQL standard: a scalar subquery must return at most one + * row. Returning silently the first row of a multi-row + * result hid bugs (`INSERT ... VALUES ((SELECT id FROM + * t), 100)` quietly took an arbitrary row) — surface to + * stderr so the developer notices, then collapse to NIL + * to keep three-valued logic consistent (NULL beats + * "arbitrary garbage from row 1"). */ + IF Len( aSubResult[ 2 ] ) > 1 + OutErr( "FiveSQL: scalar subquery returned " + ; + hb_NToS( Len( aSubResult[ 2 ] ) ) + ; + " rows; SQL standard requires at most 1 — using NULL." + Chr( 10 ) ) + RETURN NIL + ENDIF RETURN aSubResult[ 2 ][ 1 ][ 1 ] ENDIF ENDIF @@ -896,7 +1231,132 @@ METHOD FetchRowNull( aRE, cInnerAlias ) CLASS TSqlExecutor RETURN aRow -METHOD JoinRecurse( aJoins, nIdx, xWhere, aRE, aRows, hHashTbl ) CLASS TSqlExecutor +/* ----------------------------------------------------------------- + * Helpers for WHERE predicate pushdown across JOIN levels. + * + * SplitAndClauses walks the top-level AND chain of a WHERE expression + * and returns each conjunct as its own tree. Non-AND trees come back + * as a single-element array, so callers don't need a special case. + * + * BuildAliasLevelMap assigns each table reference a depth: 0 for the + * primary (outer) table and 1..N for each JOIN entry in aJoins. Both + * the SQL alias (as quoted by the query text) and any temp/user + * alternative alias stored in aTables[i][3] are registered so + * qualified names like "o.id" and synonyms still resolve. + * + * ClauseMaxLevel returns the highest level referenced by any + * qualified column in the clause. Unqualified columns can't be + * pinned to a single table without the full FindWA dispatch, so they + * force the conservative default — callers pass Len(aJoins) to make + * such clauses fall back to the base-case evaluation. + * ----------------------------------------------------------------- */ +METHOD SplitAndClauses( xE, aOut ) CLASS TSqlExecutor + + IF aOut == NIL + aOut := {} + ENDIF + IF xE == NIL + RETURN aOut + ENDIF + IF ValType( xE ) == "A" .AND. Len( xE ) >= 4 .AND. ; + xE[ 1 ] == ND_BIN .AND. xE[ 2 ] == "AND" + ::SplitAndClauses( xE[ 3 ], aOut ) + ::SplitAndClauses( xE[ 4 ], aOut ) + ELSE + AAdd( aOut, xE ) + ENDIF + +RETURN aOut + + +METHOD BuildAliasLevelMap( aJoins ) CLASS TSqlExecutor + + LOCAL hMap := { => }, i, nLvl + + /* aTables is ordered [primary, join1, join2, ...]; index j maps to + * join level j-1 (primary = level 0). For each table entry, + * register every name that WHERE / ON expressions might use: + * + * [1] = original table name (e.g. "ORD") + * [2] = currently-selected alias — possibly a depth-suffixed + * temp ("ORD_2") introduced by oAlias:AcquireTemp when + * the SQL alias was too short or we're in a subquery. + * [3] = original SQL alias from the query text ("o") — this is + * what the parser writes into ND_COL qualifiers, so it's + * the name we most need to resolve, and it's the one that + * wasn't in the old map because aJoins[i][3] gets + * overwritten with the temp alias during JOIN sync. */ + FOR i := 1 TO Len( ::aTables ) + nLvl := i - 1 + IF ! Empty( ::aTables[ i ][ 1 ] ) + hMap[ Upper( ::aTables[ i ][ 1 ] ) ] := nLvl + ENDIF + IF ! Empty( ::aTables[ i ][ 2 ] ) + hMap[ Upper( ::aTables[ i ][ 2 ] ) ] := nLvl + ENDIF + IF Len( ::aTables[ i ] ) >= 3 .AND. ! Empty( ::aTables[ i ][ 3 ] ) + hMap[ Upper( ::aTables[ i ][ 3 ] ) ] := nLvl + ENDIF + NEXT + +RETURN hMap + + +METHOD ClauseMaxLevel( xClause, hAliasToLevel, nDefault ) CLASS TSqlExecutor + + LOCAL aCols, i, cName, nDot, cAlias + LOCAL nMax := 0 + LOCAL lUncertain := .F. + + aCols := SqlCollectColExprs( xClause, NIL ) + FOR i := 1 TO Len( aCols ) + cName := aCols[ i ][ 2 ] + nDot := At( ".", cName ) + IF nDot == 0 + /* Unqualified column — could resolve to any open workarea + * via FindWA. Force the conservative default so the clause + * is evaluated after every referenced table is positioned + * (base case), never at an intermediate level where the + * column might bind to a stale inner record. */ + lUncertain := .T. + EXIT + ENDIF + cAlias := Upper( Left( cName, nDot - 1 ) ) + IF hb_HHasKey( hAliasToLevel, cAlias ) + IF hAliasToLevel[ cAlias ] > nMax + nMax := hAliasToLevel[ cAlias ] + ENDIF + ELSE + lUncertain := .T. + EXIT + ENDIF + NEXT + + IF lUncertain + RETURN nDefault + ENDIF + +RETURN nMax + + +METHOD EvalPushedAtLevel( aPushByLevel, nIdx ) CLASS TSqlExecutor + + LOCAL p, aClauses + + IF aPushByLevel == NIL .OR. nIdx < 1 .OR. nIdx > Len( aPushByLevel ) + RETURN .T. + ENDIF + aClauses := aPushByLevel[ nIdx ] + FOR p := 1 TO Len( aClauses ) + IF ! SqlIsTrue( ::EvalExpr( aClauses[ p ] ) ) + RETURN .F. + ENDIF + NEXT + +RETURN .T. + + +METHOD JoinRecurse( aJoins, nIdx, xWhere, aRE, aRows, hHashTbl, aPushByLevel ) CLASS TSqlExecutor LOCAL cJAlias, xOnCond, nWA, aRow LOCAL lJoinMatch @@ -939,7 +1399,9 @@ METHOD JoinRecurse( aJoins, nIdx, xWhere, aRE, aRows, hHashTbl ) CLASS TSqlExecu dbSelectArea( nWA ) dbGoTop() WHILE ! Eof() - ::JoinRecurse( aJoins, nIdx + 1, xWhere, aRE, @aRows, hHashTbl ) + IF ::EvalPushedAtLevel( aPushByLevel, nIdx ) + ::JoinRecurse( aJoins, nIdx + 1, xWhere, aRE, @aRows, hHashTbl, aPushByLevel ) + ENDIF dbSelectArea( nWA ) dbSkip() ENDDO @@ -995,11 +1457,18 @@ METHOD JoinRecurse( aJoins, nIdx, xWhere, aRE, aRows, hHashTbl ) CLASS TSqlExecu ENDIF dbSelectArea( nWA ) lUseIndex := ( ::oIndex:FindBestTag( nWA, cInnerField ) > 0 ) - /* SQLite strategy: always use hash join for equi-joins when no index. - * Build ephemeral hash table on first probe, O(m) build + O(1) lookup. - * No threshold — even small tables benefit from avoiding repeated scans. */ + /* Hash join for equi-joins when no index is available. For + * very small inner tables the Go map allocation + per-key + * string formatting dominates the cost of a cache-friendly + * nested-loop scan, so we keep the old per-iteration scan + * when RecCount falls below the threshold. 64 was picked + * empirically — SQLite uses a similar constant; tune via + * bench_join if workload changes. */ IF ! lUseIndex .AND. ! Empty( cOuterCol ) - lUseHash := .T. + nRecCount := LastRec() + IF nRecCount > 64 + lUseHash := .T. + ENDIF ENDIF ENDIF ENDIF @@ -1015,7 +1484,13 @@ METHOD JoinRecurse( aJoins, nIdx, xWhere, aRE, aRows, hHashTbl ) CLASS TSqlExecu EXIT ENDIF lHadMatch := .T. - ::JoinRecurse( aJoins, nIdx + 1, xWhere, aRE, @aRows, hHashTbl ) + /* Predicate pushdown: WHERE conjuncts whose referenced columns + * all bind by level nIdx are evaluated before we recurse, so + * rows rejected here skip the exponential join expansion that + * used to happen when xWhere only fired at the base case. */ + IF ::EvalPushedAtLevel( aPushByLevel, nIdx ) + ::JoinRecurse( aJoins, nIdx + 1, xWhere, aRE, @aRows, hHashTbl, aPushByLevel ) + ENDIF dbSelectArea( nWA ) dbSkip() IF Eof() @@ -1025,7 +1500,8 @@ METHOD JoinRecurse( aJoins, nIdx, xWhere, aRE, aRows, hHashTbl ) CLASS TSqlExecu ELSEIF lUseHash /* Hash join path for equi-joins on large tables without index */ lHadMatch := ::HashJoin( nWA, cInnerField, cOuterCol, xOnCond, ; - aJoins, nIdx, xWhere, aRE, @aRows, @hHashTbl ) + aJoins, nIdx, xWhere, aRE, @aRows, @hHashTbl, ; + aPushByLevel ) ELSE dbSelectArea( nWA ) dbGoTop() @@ -1041,7 +1517,9 @@ METHOD JoinRecurse( aJoins, nIdx, xWhere, aRE, aRows, hHashTbl ) CLASS TSqlExecu ::hRightMatched[ cRMKey ] := { => } ENDIF ::hRightMatched[ cRMKey ][ RecNo() ] := .T. - ::JoinRecurse( aJoins, nIdx + 1, xWhere, aRE, @aRows, hHashTbl ) + IF ::EvalPushedAtLevel( aPushByLevel, nIdx ) + ::JoinRecurse( aJoins, nIdx + 1, xWhere, aRE, @aRows, hHashTbl, aPushByLevel ) + ENDIF ENDIF dbSelectArea( nWA ) dbSkip() @@ -1065,7 +1543,7 @@ METHOD JoinRecurse( aJoins, nIdx, xWhere, aRE, aRows, hHashTbl ) CLASS TSqlExecu /* Middle join — recurse with NULL-filled state for this level * so subsequent joins can still process and emit their own * NULL rows or matches. */ - ::JoinRecurse( aJoins, nIdx + 1, xWhere, aRE, @aRows, hHashTbl ) + ::JoinRecurse( aJoins, nIdx + 1, xWhere, aRE, @aRows, hHashTbl, aPushByLevel ) ENDIF ENDIF @@ -1151,6 +1629,10 @@ METHOD RunSelect() CLASS TSqlExecutor LOCAL lIndexUsed, aTmp LOCAL aFP, pcW, aGoRows LOCAL nEarlyLimit, aSortSpec + LOCAL lOrderFromIndex := .F. + LOCAL aPushByLevel, xRes, aClauses, hAliasLvl, nLvl, ii + LOCAL nScanRec + LOCAL nUserCols, nTrim, nRow aCols := ::hQuery[ "columns" ] /* Deep-clone tables and joins so cross-run state (alias renames, @@ -1205,6 +1687,12 @@ METHOD RunSelect() CLASS TSqlExecutor cAlias := ::oAlias:AcquireTemp( "DRV" ) ENDIF ::aTables[ i ] := SqlMaterializeSubquery( ::aTables[ i ][ 3 ], cAlias, ::aParams ) + /* Track the derived MEMRDD area so CloseOpened (called via + * the top-level CloseOpened path at RunSelect's tail) frees + * the alias before the next query runs. Without this, a + * second SELECT that uses the same derived alias panics + * with "alias already in use" inside SqlMaterializeSubquery. */ + AAdd( ::aOpened, cAlias ) ENDIF NEXT @@ -1224,7 +1712,14 @@ METHOD RunSelect() CLASS TSqlExecutor IF Empty( ::aTables[ i ][ 3 ] ) ::aTables[ i ][ 3 ] := cAlias ENDIF - IF Len( cAlias ) <= 1 .OR. ::nDepth > 1 + /* Derived tables already opened their own MEMRDD area under the + * user's alias inside SqlMaterializeSubquery — don't rename it, + * or Select(cAlias) below will miss the area and the open loop + * will think the (synthetic) `__drv_` "table" is missing on + * disk. The rename for short aliases / nested depth is meant + * to avoid alias collisions on real DBF tables. */ + IF ( Len( cAlias ) <= 1 .OR. ::nDepth > 1 ) .AND. ; + Left( Lower( cTable ), 6 ) != "__drv_" cAlias := ::oAlias:AcquireTemp( Upper( cTable ) ) ::aTables[ i ][ 2 ] := cAlias ENDIF @@ -1261,6 +1756,22 @@ METHOD RunSelect() CLASS TSqlExecutor nWA := 0 END SEQUENCE ENDIF + /* VIEW expansion: `
.fsv` holds the SELECT text the + * caller registered with CREATE VIEW. Run that SELECT once, + * materialize into a MEMRDD temp area, and route subsequent + * column lookups through it under the view's alias. Allows + * reusable named queries without forcing every caller to + * embed the full SQL. */ + IF nWA == 0 .AND. hb_FileExists( Lower( cTable ) + ".fsv" ) + IF SqlMaterializeView( cTable, cAlias ) + nWA := Select( cAlias ) + IF nWA > 0 + AAdd( ::aOpened, cAlias ) + AAdd( ::oAlias:aSlots, ; + { cAlias, Upper( cTable ), Upper( cTable ), .T. } ) + ENDIF + ENDIF + ENDIF ENDIF IF nWA == -1 ::nDepth-- @@ -1271,6 +1782,19 @@ METHOD RunSelect() CLASS TSqlExecutor "Table '" + cTable + "' is open EXCLUSIVE. " + ; "Close it or reopen with SHARED access before running SQL queries." ) ENDIF + /* nWA == 0 here means: not in workareas, not openable as DBF, + * not findable as CTE temp. Bubble up a clear error instead + * of silently dropping into a SELECT against no workarea + * (which previously yielded empty / undefined results and + * crashed downstream callers reading aR[1][1]). */ + IF nWA == 0 + ::nDepth-- + IF Len( aSavedAreas ) > 0 + dbSelectArea( aSavedAreas[ 1 ] ) + ENDIF + RETURN ::MakeError( SQL_ERR_NO_TABLE, ; + "Table '" + cTable + "' does not exist" ) + ENDIF ENDIF NEXT @@ -1323,10 +1847,52 @@ METHOD RunSelect() CLASS TSqlExecutor ENDIF NEXT - /* Add hidden columns for aggregate source fields */ + /* Snapshot the user-visible column count BEFORE the hidden-column + * loops run. Hidden columns added below for aggregate sources, + * HAVING, and ORDER BY wrapped expressions get trimmed back off + * the final result so callers don't see synthetic `__ord___` + * etc. in their fetched rows. (nUserCols is declared at the top + * of RunSelect via the master LOCAL list — mid-function LOCAL + * is an established Five compiler quirk.) */ + nUserCols := Len( aResultExprs ) + + /* Add hidden columns for aggregate source fields. + * + * Window aggregates (ND_WINDOW: SUM/AVG/COUNT/MIN/MAX OVER ...) are + * included explicitly because SqlExprHasAgg deliberately does not + * descend into ND_WINDOW (it carries its own aggregation scope), + * so without the extra top-level check queries like + * `SELECT id, SUM(v) OVER (…)` would never materialise `v` on + * each row and ApplyWindowFunctions' fast path + * (`SqlWindowSlideAgg`) read NIL / 0 for every slot. */ FOR i := 1 TO Len( aCols ) - IF SqlExprHasAgg( aCols[ i ][ 1 ] ) - IF aCols[ i ][ 1 ][ 1 ] == ND_FN .AND. Len( aCols[ i ][ 1 ][ 3 ] ) > 0 + IF SqlExprHasAgg( aCols[ i ][ 1 ] ) .OR. aCols[ i ][ 1 ][ 1 ] == ND_WINDOW + /* Wrapped aggregate (e.g. `MAX(id)+1`, `ROUND(AVG(p),2)`): + * the top-level node is ND_BIN / ND_UNI / non-agg ND_FN, so + * the argument-walker below would skip it entirely and the + * wrapped aggregate's source column never becomes a hidden + * column. Result: ComputeAgg sees nCol==0 and returns 0 + * silently. Walk the whole expression for ND_COL leaves and + * add them as hidden columns. */ + IF aCols[ i ][ 1 ][ 1 ] != ND_FN .AND. aCols[ i ][ 1 ][ 1 ] != ND_WINDOW + aLeafCols := SqlCollectColExprs( aCols[ i ][ 1 ], NIL ) + FOR k := 1 TO Len( aLeafCols ) + cBare := aLeafCols[ k ][ 2 ] + lFound := .F. + FOR j := 1 TO Len( aResultExprs ) + IF Upper( aResultExprs[ j ][ 2 ] ) == Upper( cBare ) + lFound := .T. + EXIT + ENDIF + NEXT + IF ! lFound + AAdd( aResultExprs, { aLeafCols[ k ], cBare } ) + ENDIF + NEXT + LOOP + ENDIF + IF ( aCols[ i ][ 1 ][ 1 ] == ND_FN .OR. aCols[ i ][ 1 ][ 1 ] == ND_WINDOW ) .AND. ; + Len( aCols[ i ][ 1 ][ 3 ] ) > 0 xArgExpr := aCols[ i ][ 1 ][ 3 ][ 1 ] IF xArgExpr[ 1 ] == ND_COL .AND. xArgExpr[ 2 ] != "*" /* Use the FULL qualified name (e.g. "o.amount") so @@ -1372,6 +1938,131 @@ METHOD RunSelect() CLASS TSqlExecutor ENDIF NEXT + /* Wrapped window function (`SUM(x) OVER () + 100`): the top-level + * node is ND_BIN/ND_UNI/ND_FN(non-agg)/ND_CASE wrapping ND_WINDOW. + * ApplyWindowFunctions only scans aCols[i] when its top-level node + * IS ND_WINDOW, so a wrapped window expression evaluates to the + * placeholder 0 returned by EvalExpr's ND_WINDOW branch. Walk every + * SELECT projection, extract any ND_WINDOW into a hidden column + * (`__win____`) and substitute its position with a plain + * ND_COL pointing at that hidden column. ApplyWindowFunctions then + * computes the inner window per row, projection picks up the value + * via the ND_COL lookup, and the outer arithmetic falls out of the + * normal ND_BIN/ND_UNI evaluator. Hidden columns get trimmed back + * off the result via nUserCols at RunSelect's tail. */ + ::aWrappedWindowCols := {} + FOR i := 1 TO Len( aCols ) + IF aCols[ i ][ 1 ] != NIL .AND. ValType( aCols[ i ][ 1 ] ) == "A" .AND. ; + aCols[ i ][ 1 ][ 1 ] != ND_WINDOW + aLeafCols := {} /* reuse var: holds {windowExpr, alias} pairs */ + aCols[ i ][ 1 ] := SqlExtractWindow( aCols[ i ][ 1 ], aLeafCols, ; + "__win_" + AllTrim( hb_NToS( i ) ) ) + IF Len( aLeafCols ) > 0 + /* Track this column for post-window re-evaluation. The + * fetch loop runs BEFORE ApplyWindowFunctions, so the + * outer expression sees NIL/0 in the hidden window slot + * and folds it through the wrapper as NIL. After + * ApplyWindowFunctions fills the hidden slot, we re-eval + * the outer expression per row to pick up the real value. */ + AAdd( ::aWrappedWindowCols, i ) + FOR k := 1 TO Len( aLeafCols ) + AAdd( aResultExprs, { aLeafCols[ k ][ 1 ], aLeafCols[ k ][ 2 ] } ) + AAdd( aCols, { aLeafCols[ k ][ 1 ], aLeafCols[ k ][ 2 ] } ) + NEXT + ENDIF + ENDIF + NEXT + + /* Hidden columns for HAVING expressions: any aggregate source + * column referenced only inside HAVING (and not in SELECT) must + * still be carried through so EvalHavingExpr → ComputeAgg can + * resolve its argument. Without this, `SELECT dept FROM s + * GROUP BY dept HAVING SUM(amt) > 200` returned 0 rows because + * `amt` was never materialised on grouped rows and ComputeAgg + * fell back to nCol=0 → 0. Same SqlCollectColExprs walk as + * the wrapped-aggregate branch above; collects every ND_COL + * leaf reachable from xHaving. */ + IF xHaving != NIL + aLeafCols := SqlCollectColExprs( xHaving, NIL ) + FOR k := 1 TO Len( aLeafCols ) + cBare := aLeafCols[ k ][ 2 ] + lFound := .F. + FOR j := 1 TO Len( aResultExprs ) + IF Upper( aResultExprs[ j ][ 2 ] ) == Upper( cBare ) + lFound := .T. + EXIT + ENDIF + NEXT + IF ! lFound + AAdd( aResultExprs, { aLeafCols[ k ], cBare } ) + ENDIF + NEXT + ENDIF + + /* Hidden columns for ORDER BY expressions not already in the + * SELECT list. Without this, ORDER BY references a column that + * the materialised rows don't carry — TryBuildSortSpec returns + * NIL (col not in aFieldNames), PRG OrderBy likewise can't bind + * it, and the result ends up in undefined order (typically + * insertion order). Pattern mirrors the aggregate hidden-column + * loop above so ORDER BY over non-projected fields works out of + * the box. */ + IF ValType( aOrderBy ) == "A" + FOR i := 1 TO Len( aOrderBy ) + IF ValType( aOrderBy[ i ] ) != "A" .OR. Len( aOrderBy[ i ] ) == 0 + LOOP + ENDIF + xExpr := aOrderBy[ i ][ 1 ] + IF ValType( xExpr ) != "A" .OR. Len( xExpr ) < 2 + LOOP + ENDIF + /* Wrapped ORDER BY expression (ND_BIN / ND_UNI / ND_FN — + * e.g. `ORDER BY MAX(amt) + 1 DESC`): without a hidden + * column the sort layer can't bind the expression and rows + * stay in insertion order. Append the whole expression to + * aResultExprs under a synthetic alias `__ord___` and + * rewrite the ORDER BY entry in-place to a plain ND_COL + * pointing at that alias. GroupBy / projection then computes + * the wrapped aggregate per row, and TSqlSort:OrderBy can + * find the column by name. The hidden columns get trimmed + * back off the final result via ::nUserCols (set just below + * after this whole hidden-col block runs). */ + IF xExpr[ 1 ] != ND_COL + cBare := "__ord_" + AllTrim( hb_NToS( i ) ) + "__" + AAdd( aResultExprs, { xExpr, cBare } ) + /* GroupBy / aFieldNames rebuild downstream both walk + * `aCols` (the user-visible SELECT list), not aResultExprs. + * Without mirroring the hidden append into aCols, GroupBy + * never evaluates the wrapped expression and the new + * "__ord___" name doesn't appear in aFieldNames → + * SqlFindColIdx returns 0 → sort silently no-ops. The + * trim block at RunSelect's tail still strips the column + * back off the result, so callers see the same shape. */ + AAdd( aCols, { xExpr, cBare } ) + aOrderBy[ i ][ 1 ] := { ND_COL, cBare, NIL, NIL, NIL } + LOOP + ENDIF + cBare := xExpr[ 2 ] + IF cBare == "*" + LOOP + ENDIF + lFound := .F. + FOR j := 1 TO Len( aResultExprs ) + IF Upper( aResultExprs[ j ][ 2 ] ) == Upper( cBare ) .OR. ; + ( ValType( aResultExprs[ j ][ 1 ] ) == "A" .AND. ; + Len( aResultExprs[ j ][ 1 ] ) >= 2 .AND. ; + aResultExprs[ j ][ 1 ][ 1 ] == ND_COL .AND. ; + Upper( aResultExprs[ j ][ 1 ][ 2 ] ) == Upper( cBare ) ) + lFound := .T. + EXIT + ENDIF + NEXT + IF ! lFound + AAdd( aResultExprs, { xExpr, cBare } ) + ENDIF + NEXT + ENDIF + FOR i := 1 TO Len( aResultExprs ) AAdd( aFieldNames, aResultExprs[ i ][ 2 ] ) NEXT @@ -1385,6 +2076,23 @@ METHOD RunSelect() CLASS TSqlExecutor aResultExprs[ i ][ 1 ] := SqlFoldConst( aResultExprs[ i ][ 1 ] ) NEXT + /* Pre-resolve column references to {nWA, nFPos} pairs so the + * per-row EvalExpr hot path avoids repeated At/Upper/FindWA/ + * FieldPos work on every ND_COL visit. Scope: WHERE, HAVING, + * every JOIN's ON condition. Projection columns go through + * FetchRow's own cache and don't need this pre-walk. */ + IF xWhere != NIL + ::PreResolveColumns( xWhere ) + ENDIF + IF xHaving != NIL + ::PreResolveColumns( xHaving ) + ENDIF + FOR i := 1 TO Len( aJoins ) + IF aJoins[ i ][ 4 ] != NIL + ::PreResolveColumns( aJoins[ i ][ 4 ] ) + ENDIF + NEXT + SET DELETED ON /* SELECT without FROM */ @@ -1406,14 +2114,47 @@ METHOD RunSelect() CLASS TSqlExecutor lIndexUsed := .F. + /* Hand the current executor to TSqlIndex so its per-row + * seek loop can skip the SqlEvalExprNode/SqlFetchRowArr + * throwaway-executor allocations. */ + ::oIndex:oExec := Self + + /* Resolve LIMIT / ORDER-BY-from-index BEFORE the index-scan + * dispatch so TryIndexScan can early-terminate, and the + * Go-fast-path below can reuse the same lOrderFromIndex + * value to skip the post-scan sort. Gates mirror the + * original fallback computation — single-table, no GROUP / + * Agg / DISTINCT, LIMIT or TOP present. */ + lOrderFromIndex := .F. + nEarlyLimit := 0 + IF Len( aJoins ) == 0 .AND. Len( aGroupBy ) == 0 .AND. ; + ! ::oAgg:HasAgg( aCols ) .AND. ! lDistinct .AND. ; + ( ( ValType( nLimit ) == "N" .AND. nLimit > 0 ) .OR. ; + ( ValType( nTop ) == "N" .AND. nTop > 0 ) ) + IF Len( aOrderBy ) > 0 + lOrderFromIndex := ::oIndex:MatchOrderByTag( nWA, aOrderBy, aFieldNames ) + ENDIF + IF Len( aOrderBy ) == 0 .OR. lOrderFromIndex + nEarlyLimit := iif( ValType( nLimit ) == "N" .AND. nLimit > 0, ; + nLimit, nTop ) + /* OFFSET is post-processed; pull enough rows to + * satisfy LIMIT *after* the OFFSET skip. */ + IF nOffset > 0 + nEarlyLimit += nOffset + ENDIF + ENDIF + ENDIF + IF Len( aJoins ) == 0 .AND. xWhere != NIL lIndexUsed := ::oIndex:TryIndexScan( nWA, xWhere, xWhere, ; - ::aTables, ::aParams, aResultExprs, @aRows ) + ::aTables, ::aParams, aResultExprs, @aRows, nEarlyLimit ) ELSEIF Len( aJoins ) > 0 .AND. xWhere != NIL lIndexUsed := ::oIndex:TryIndexJoinScan( nWA, xWhere, ; ::aTables, ::aParams, aResultExprs, @aRows, aJoins ) ENDIF + ::oIndex:oExec := NIL + IF ! lIndexUsed dbSelectArea( nWA ) dbGoTop() @@ -1446,6 +2187,18 @@ METHOD RunSelect() CLASS TSqlExecutor aFP := NIL pcW := NIL aGoRows := NIL + /* lOrderFromIndex / nEarlyLimit were resolved above, + * before the TryIndexScan dispatch, so both index-scan + * and Go-fast-path branches share the same values. The + * call there may have moved the record pointer via + * ordSetFocus+dbSeek; if lOrderFromIndex is set, we + * re-anchor to the logical top of the focused tag so + * SqlScan's GoTop+Skip loop walks from the first + * ordered row. */ + IF lOrderFromIndex + dbSelectArea( nWA ) + dbGoTop() + ENDIF IF Len( aJoins ) == 0 .AND. Len( aGroupBy ) == 0 .AND. ; ! ::oAgg:HasAgg( aCols ) /* Plan pcode cache: cache aFP + pcW per cCacheKey. @@ -1470,6 +2223,9 @@ METHOD RunSelect() CLASS TSqlExecutor ENDIF ENDIF IF aFP != NIL .AND. ! Empty( ::cCacheKey ) + IF Len( s_hDmlPcodeCache ) >= SQL_DML_PCODE_CACHE_MAX + s_hDmlPcodeCache := { => } + ENDIF s_hDmlPcodeCache[ ::cCacheKey + "#sel" ] := { ; "fp" => aFP, ; "where_pc" => pcW } @@ -1487,7 +2243,7 @@ METHOD RunSelect() CLASS TSqlExecutor SqlEach( aFP, pcW, ::bRowBlock ) aGoRows := {} /* signal "handled" to skip fallback */ ELSE - aGoRows := SqlScan( aFP, pcW ) + aGoRows := SqlScan( aFP, pcW, nEarlyLimit ) FOR i := 1 TO Len( aGoRows ) AAdd( aRows, aGoRows[ i ] ) NEXT @@ -1503,23 +2259,52 @@ METHOD RunSelect() CLASS TSqlExecutor * join recursion. Huge win for multi-table scans. */ ::aFetchCache := ::BuildFetchCache( aResultExprs ) dbSelectArea( nWA ) - /* Early-termination LIMIT: when the query has a plain - * LIMIT / TOP and no ORDER BY, GROUP BY, aggregates, - * or DISTINCT, we can stop scanning as soon as aRows - * reaches the cap. Huge win for `EXISTS` which plants - * an implicit LIMIT 1 into the subquery's hQuery. */ - nEarlyLimit := 0 - IF ( ValType( nLimit ) == "N" .AND. nLimit > 0 ) .OR. ; - ( ValType( nTop ) == "N" .AND. nTop > 0 ) - IF Len( aOrderBy ) == 0 .AND. Len( aGroupBy ) == 0 .AND. ; - ! ::oAgg:HasAgg( aCols ) .AND. ! lDistinct - nEarlyLimit := iif( ValType( nLimit ) == "N" .AND. nLimit > 0, ; - nLimit, nTop ) - ENDIF + /* WHERE predicate pushdown: split the top-level AND + * chain into clauses and group them by the deepest + * JOIN level whose columns they reference. Clauses + * pinned to a middle level are evaluated inside + * JoinRecurse as soon as that level's row is + * positioned — rejected rows skip the exponential + * deeper-join expansion that would otherwise happen + * until xWhere fired at the base case. Clauses that + * can't be classified (unqualified columns, aliases + * we can't resolve) fall back to the residual + * xRes evaluated at the base case, preserving + * existing semantics. */ + aPushByLevel := NIL + xRes := xWhere + IF xWhere != NIL .AND. Len( aJoins ) > 0 + hAliasLvl := ::BuildAliasLevelMap( aJoins ) + aClauses := ::SplitAndClauses( xWhere, NIL ) + aPushByLevel := Array( Len( aJoins ) ) + FOR ii := 1 TO Len( aPushByLevel ) + aPushByLevel[ ii ] := {} + NEXT + xRes := NIL + FOR ii := 1 TO Len( aClauses ) + nLvl := ::ClauseMaxLevel( aClauses[ ii ], hAliasLvl, Len( aJoins ) ) + IF nLvl >= 1 .AND. nLvl < Len( aJoins ) + AAdd( aPushByLevel[ nLvl ], aClauses[ ii ] ) + ELSE + /* Level 0 (outer-only) and Len(aJoins) + * (needs all joins) stay in the residual + * evaluated at the base case — the + * existing behavior. */ + IF xRes == NIL + xRes := aClauses[ ii ] + ELSE + xRes := SqlNode( ND_BIN, "AND", xRes, aClauses[ ii ], NIL ) + ENDIF + ENDIF + NEXT ENDIF + /* lOrderFromIndex / nEarlyLimit were resolved above, + * before the Go-fast-path decision, so both paths + * share the same ORDER-BY-from-index detection and + * row-count cap. */ WHILE ! Eof() IF Len( aJoins ) > 0 - ::JoinRecurse( aJoins, 1, xWhere, aResultExprs, @aRows, hJoinHash ) + ::JoinRecurse( aJoins, 1, xRes, aResultExprs, @aRows, hJoinHash, aPushByLevel ) dbSelectArea( nWA ) ELSE IF xWhere == NIL .OR. SqlIsTrue( ::EvalExpr( xWhere ) ) @@ -1555,13 +2340,34 @@ METHOD RunSelect() CLASS TSqlExecutor /* Window functions */ ::ApplyWindowFunctions( @aRows, aFieldNames, aCols ) + /* Re-evaluate wrapped window expressions per row. ApplyWindowFunctions + * has now filled the hidden `__win____` slots with real values; + * the outer arithmetic / function call needs a second pass to pick up + * the post-window value. */ + IF ::aWrappedWindowCols != NIL .AND. Len( ::aWrappedWindowCols ) > 0 + FOR k := 1 TO Len( ::aWrappedWindowCols ) + i := ::aWrappedWindowCols[ k ] + IF i > 0 .AND. i <= Len( aCols ) + FOR j := 1 TO Len( aRows ) + IF i <= Len( aRows[ j ] ) + aRows[ j ][ i ] := SqlEvalRowExpr( aCols[ i ][ 1 ], aFieldNames, aRows[ j ] ) + ENDIF + NEXT + ENDIF + NEXT + ENDIF + /* ORDER BY — try Go-native sort first (10-50x faster for large sets), - * fall back to PRG for complex expressions in ORDER BY. */ - IF Len( aOrderBy ) > 0 + * fall back to PRG for complex expressions in ORDER BY. + * + * lOrderFromIndex set above (pre-scan) means we already walked the + * table in tag order so the result is sorted — short-circuit here + * to avoid a redundant MatchOrderByTag probe. */ + IF Len( aOrderBy ) > 0 .AND. ! lOrderFromIndex IF ! ( nWA > 0 .AND. ::oIndex:MatchOrderByTag( nWA, aOrderBy, aFieldNames ) ) - LOCAL aSortSpec := ::TryBuildSortSpec( aOrderBy, aFieldNames ) - IF aSortSpec != NIL .AND. Len( aRows ) > 0 - aRows := SqlOrderBy( aRows, aSortSpec ) + LOCAL aSortSpec2 := ::TryBuildSortSpec( aOrderBy, aFieldNames ) + IF aSortSpec2 != NIL .AND. Len( aRows ) > 0 + aRows := SqlOrderBy( aRows, aSortSpec2 ) ELSE aRows := ::oSort:OrderBy( aRows, aFieldNames, aOrderBy, ::aTables, ::aParams ) ENDIF @@ -1585,6 +2391,19 @@ METHOD RunSelect() CLASS TSqlExecutor * the second's rows, producing more rows than intended. */ IF hUnion != NIL aU := TSqlExecutor():New( hUnion, ::aParams ):Run() + /* SQL standard: set operations require the same column count on + * both sides. Previously a mismatch silently truncated the wider + * side to the narrower's width, masking schema bugs (`SELECT a + * UNION SELECT a, b` returned 1-col rows with `b` dropped). Bail + * out with a structured error instead. */ + IF ValType( aU ) == "A" .AND. Len( aU ) >= 1 .AND. ; + ValType( aU[ 1 ] ) == "A" .AND. Len( aU[ 1 ] ) > 0 .AND. ; + aU[ 1 ][ 1 ] != "__error__" .AND. ; + Len( aU[ 1 ] ) != Len( aFieldNames ) + RETURN ::MakeError( SQL_ERR_GRAMMAR, ; + "UNION/INTERSECT/EXCEPT: each query must have the same number of columns (" + ; + hb_NToS( Len( aFieldNames ) ) + " vs " + hb_NToS( Len( aU[ 1 ] ) ) + ")" ) + ENDIF IF hb_HHasKey( hUnion, "set_op" ) IF hUnion[ "set_op" ] == "INTERSECT" aRows := SqlDoIntersect( aRows, aU[ 2 ] ) @@ -1596,15 +2415,38 @@ METHOD RunSelect() CLASS TSqlExecutor IF hb_HHasKey( hUnion, "union_all" ) lAll := hUnion[ "union_all" ] ENDIF - FOR i := 1 TO Len( aU[ 2 ] ) - AAdd( aRows, aU[ 2 ][ i ] ) - NEXT - IF ! lAll - aRows := ::oSort:Distinct( aRows ) + IF lAll + /* UNION ALL — plain append, no dedup. */ + FOR i := 1 TO Len( aU[ 2 ] ) + AAdd( aRows, aU[ 2 ][ i ] ) + NEXT + ELSE + /* Streaming DISTINCT: build one hash set across both + * sides in the Go RTL. Saves the append-then-rescan pass + * the old path did (materialise merged array + run + * SqlDistinct, two walks over |L|+|R| rows). */ + aRows := SqlUnionDistinct( aRows, aU[ 2 ] ) ENDIF ENDIF ENDIF + /* Trim hidden columns BEFORE DISTINCT so the dedup hash sees only + * user-visible columns. ORDER BY has already used the hidden cols + * (aggregate sources, `__ord___` for wrapped expressions, etc.) + * so they're free to drop. Without trimming first, `SELECT + * DISTINCT grp ORDER BY id` returned every original row because the + * synthetic `__ord_1__` column made each row uniquely keyed. */ + IF nUserCols != NIL .AND. nUserCols > 0 .AND. ; + Len( aFieldNames ) > nUserCols + nTrim := nUserCols + FOR nRow := 1 TO Len( aRows ) + IF Len( aRows[ nRow ] ) > nTrim + ASize( aRows[ nRow ], nTrim ) + ENDIF + NEXT + ASize( aFieldNames, nTrim ) + ENDIF + /* DISTINCT */ IF lDistinct aRows := ::oSort:Distinct( aRows ) @@ -1621,13 +2463,26 @@ METHOD RunSelect() CLASS TSqlExecutor aRows := {} ENDIF - /* TOP / LIMIT */ + /* TOP / LIMIT. + * SQL semantics: + * LIMIT 0 → empty result + * LIMIT n>0 → first n rows + * LIMIT n<0 → ill-defined; clamp to 0 (matches SQLite tolerance) + * + * Old code only clipped when `nLimit > 0`, so LIMIT 0 returned + * the full result and LIMIT -1 was silently a no-op. */ nMaxRows := 0 IF ValType( nTop ) == "N" .AND. nTop > 0 nMaxRows := nTop ENDIF - IF ValType( nLimit ) == "N" .AND. nLimit > 0 - nMaxRows := nLimit + IF ValType( nLimit ) == "N" + IF nLimit > 0 + nMaxRows := nLimit + ELSEIF nLimit <= 0 + /* Explicit zero / negative → return no rows. */ + aRows := {} + nMaxRows := 0 + ENDIF ENDIF IF nMaxRows > 0 .AND. Len( aRows ) > nMaxRows ASize( aRows, nMaxRows ) @@ -1684,11 +2539,26 @@ METHOD RunSelect() CLASS TSqlExecutor RETURN NIL ENDIF + /* Trim hidden columns added during the projection-rewrite phase + * (aggregate sources, HAVING leaves, ORDER BY wrapped expressions). + * Without trimming, callers see synthetic `__ord___` slots in + * their fetched rows and the leading aggregate-source columns. */ + IF nUserCols != NIL .AND. nUserCols > 0 .AND. ; + Len( aFieldNames ) > nUserCols + nTrim := nUserCols + FOR nRow := 1 TO Len( aRows ) + IF Len( aRows[ nRow ] ) > nTrim + ASize( aRows[ nRow ], nTrim ) + ENDIF + NEXT + ASize( aFieldNames, nTrim ) + ENDIF + RETURN { aFieldNames, aRows } /* Hash join: build hash table from inner table, probe with outer key */ -METHOD HashJoin( nInnerWA, cInnerField, cOuterCol, xOnCond, aJoins, nIdx, xWhere, aRE, aRows, hHashTbl ) CLASS TSqlExecutor +METHOD HashJoin( nInnerWA, cInnerField, cOuterCol, xOnCond, aJoins, nIdx, xWhere, aRE, aRows, hHashTbl, aPushByLevel ) CLASS TSqlExecutor LOCAL cHashKey, aMatches, xOuterVal, xInnerVal, cValKey LOCAL nFPos, nSavedRec, i, lHadMatch @@ -1735,6 +2605,11 @@ METHOD HashJoin( nInnerWA, cInnerField, cOuterCol, xOnCond, aJoins, nIdx, xWhere IF lCompound .AND. ! SqlIsTrue( ::EvalExpr( xOnCond ) ) LOOP ENDIF + /* Pushdown: if any clause pinned to this level rejects + * the row, skip it before building the result tuple. */ + IF ! ::EvalPushedAtLevel( aPushByLevel, nIdx ) + LOOP + ENDIF lHadMatch := .T. IF ! hb_HHasKey( ::hRightMatched, cHJRMKey ) ::hRightMatched[ cHJRMKey ] := { => } @@ -1756,7 +2631,9 @@ METHOD HashJoin( nInnerWA, cInnerField, cOuterCol, xOnCond, aJoins, nIdx, xWhere ::hRightMatched[ cHJRMKey ] := { => } ENDIF ::hRightMatched[ cHJRMKey ][ aMatches[ i ] ] := .T. - ::JoinRecurse( aJoins, nIdx + 1, xWhere, aRE, @aRows, hHashTbl ) + IF ::EvalPushedAtLevel( aPushByLevel, nIdx ) + ::JoinRecurse( aJoins, nIdx + 1, xWhere, aRE, @aRows, hHashTbl, aPushByLevel ) + ENDIF NEXT ENDIF ENDIF @@ -1955,8 +2832,12 @@ METHOD TryBuildSemiJoin( xSubNode ) CLASS TSqlExecutor hLifted[ "columns" ] := { { SqlNode( ND_COL, cInnerField, NIL, NIL, NIL ), cInnerField } } hLifted[ "where" ] := xRemain hLifted[ "distinct" ] := .T. - hLifted[ "limit" ] := 0 - hLifted[ "top" ] := 0 + /* `limit`/`top` use NIL ("no limit") instead of 0. After the recent + * LIMIT 0 fix, `0` is treated as an explicit "return no rows" + * sentinel — which collapsed every lifted EXISTS query to an empty + * inner_set and produced false negatives. */ + hLifted[ "limit" ] := NIL + hLifted[ "top" ] := NIL hLifted[ "order_by" ] := {} hLifted[ "group_by" ] := {} hLifted[ "having" ] := NIL @@ -2089,7 +2970,7 @@ RETURN .F. METHOD SubqueryCached( xSubNode ) CLASS TSqlExecutor LOCAL hQ, aFreeVars, cCacheKey, aResult, nSavedWA, oSub - LOCAL i, xVal, nId, nSlot, aSlot + LOCAL i, xVal, nId, nSlot, aSlot, aKeyVals, aRecSave IF xSubNode == NIL .OR. ValType( xSubNode ) != "A" .OR. Len( xSubNode ) < 2 RETURN NIL @@ -2121,12 +3002,15 @@ METHOD SubqueryCached( xSubNode ) CLASS TSqlExecutor aFreeVars := aSlot[ 2 ] /* Build cache key from current values of free variables via - * Resolve(), which walks the outer context stack. */ - cCacheKey := hb_ntos( nId ) + "@" + * Resolve(), which walks the outer context stack. The value ↦ + * string encoding + final join happen in Go (SqlBuildSubCacheKey) + * so the hot cache-hit path avoids N PRG string concatenations + * and N SqlValToStr dispatches per outer row. */ + aKeyVals := Array( Len( aFreeVars ) ) FOR i := 1 TO Len( aFreeVars ) - xVal := ::Resolve( aFreeVars[ i ] ) - cCacheKey += SqlValToStr( xVal ) + "|" + aKeyVals[ i ] := ::Resolve( aFreeVars[ i ] ) NEXT + cCacheKey := SqlBuildSubCacheKey( nId, aKeyVals ) IF hb_HHasKey( ::hSubCorrCache, cCacheKey ) RETURN ::hSubCorrCache[ cCacheKey ] @@ -2135,17 +3019,37 @@ METHOD SubqueryCached( xSubNode ) CLASS TSqlExecutor /* Cache miss — execute the subquery. PushOuter so nested ::Resolve * calls can see the current outer row's values. Use BEGIN SEQUENCE * to guarantee PopOuter runs even on subquery runtime errors — - * a stale s_aOuterStack entry would corrupt all subsequent queries. */ + * a stale s_aOuterStack entry would corrupt all subsequent queries. + * + * Workarea snapshot: the subquery may scan the SAME table the + * outer query is iterating, and SqlExecOpenTable only renames + * aliases deeper than nDepth=1 — so the first-level subquery's + * scan drives the shared workarea past EOF. Save every open + * workarea's RecNo up front and restore it before returning so + * the outer iterator resumes exactly where it left off. Depth + * bump is still set for good measure (avoids re-entering the + * same subquery's own workarea on recursion). */ nSavedWA := Select() + aRecSave := ::SnapshotAreaRecNos() ::PushOuter() BEGIN SEQUENCE oSub := TSqlExecutor():New( hQ, ::aParams ) - oSub:nDepth := ::nDepth + /* +2 (not +1): the alias-rename gate in the table-open loop + * requires `nDepth > 1` to fire. Bumping by 1 from a top-level + * (depth-0) outer landed at depth 1 which still shares aliases + * with the outer scope — a subquery over the same table + * inherited the outer's workarea and the inner scan drove + * the outer's record pointer to EOF, truncating the outer + * result. Bumping by 2 forces the alias-acquire path so the + * subquery always gets a fresh workarea; deeper nesting stays + * strictly increasing. */ + oSub:nDepth := ::nDepth + 2 aResult := oSub:Run() RECOVER aResult := NIL END SEQUENCE ::PopOuter() + ::RestoreAreaRecNos( aRecSave ) dbSelectArea( nSavedWA ) IF aResult != NIL @@ -2271,9 +3175,60 @@ METHOD CollectExprFreeVars( xE, aLocalAliases, aFree ) CLASS TSqlExecutor RETURN NIL +/* Snapshot every currently-open workarea's (alias, RecNo) pair so a + * nested subquery scan that shares one of those areas can't leave + * the record pointer past EOF. Harbour doesn't expose a clean "list + * open workareas" primitive, so we probe a fixed range (1..MAX) and + * skip the ones with empty aliases. MAX stays small — any workload + * needing more open areas simultaneously would already break the + * default Harbour limit. */ +METHOD SnapshotAreaRecNos() CLASS TSqlExecutor + + LOCAL aSnap := {}, i, nSaved, cAlias + + nSaved := Select() + FOR i := 1 TO 64 + dbSelectArea( i ) + cAlias := Alias() + IF ! Empty( cAlias ) + AAdd( aSnap, { i, RecNo(), cAlias } ) + ENDIF + NEXT + dbSelectArea( nSaved ) + +RETURN aSnap + + +METHOD RestoreAreaRecNos( aSnap ) CLASS TSqlExecutor + + LOCAL i, nSaved + + IF aSnap == NIL .OR. Len( aSnap ) == 0 + RETURN NIL + ENDIF + + nSaved := Select() + FOR i := 1 TO Len( aSnap ) + /* Bind by alias when possible — subquery execution can + * rebalance the workarea table (e.g. close a CTE temp area), + * so the original numeric slot may now hold something else. + * If the alias is gone, silently skip the restore for that + * entry rather than seeking into an unrelated area. */ + IF Select( aSnap[ i ][ 3 ] ) == aSnap[ i ][ 1 ] + dbSelectArea( aSnap[ i ][ 1 ] ) + IF aSnap[ i ][ 2 ] > 0 .AND. aSnap[ i ][ 2 ] <= LastRec() + dbGoto( aSnap[ i ][ 2 ] ) + ENDIF + ENDIF + NEXT + dbSelectArea( nSaved ) + +RETURN NIL + + METHOD CacheSubquery( xSubExpr ) CLASS TSqlExecutor - LOCAL cKey, aSubResult, nSavedWA, oSub + LOCAL cKey, aSubResult, nSavedWA, oSub, aRecSave /* Build cache key from subquery tokens */ cKey := SqlSubqueryKey( xSubExpr ) @@ -2282,14 +3237,17 @@ METHOD CacheSubquery( xSubExpr ) CLASS TSqlExecutor RETURN ::hSubCache[ cKey ] ENDIF - /* Execute and cache the result. - * Inherit current depth so the subquery opens tables with a - * depth-suffixed alias, avoiding workarea collisions with - * the outer query (e.g. scalar subquery on the same table). */ + /* Snapshot all open workareas' RecNos: same rationale as + * SubqueryCached — the inner scan can move the outer's record + * pointer when the subquery opens a table that's already in + * play. Non-correlated subqueries run once so the overhead is + * negligible; correlated callers go through SubqueryCached. */ nSavedWA := Select() + aRecSave := ::SnapshotAreaRecNos() oSub := TSqlExecutor():New( xSubExpr, ::aParams ) - oSub:nDepth := ::nDepth + oSub:nDepth := ::nDepth + 2 aSubResult := oSub:Run() + ::RestoreAreaRecNos( aRecSave ) dbSelectArea( nSavedWA ) ::hSubCache[ cKey ] := aSubResult @@ -2388,17 +3346,123 @@ RETURN NIL METHOD RunInsert() CLASS TSqlExecutor - LOCAL cTable, aFields, aValExprs, cAlias, nWA, i, nFPos, xVal - LOCAL aAutoInc, nAutoVal + LOCAL cTable, aFields, aRows, aValExprs, cAlias, nWA, i, nFPos, xVal + LOCAL aAutoInc, nAutoVal, hSelect, aSelResult, aSelRows, nTuple + LOCAL aErrResult, nInserted := 0, nTotal + LOCAL aStructFlags, aStructRaw, aStructTypes cTable := ::hQuery[ "table" ] aFields := ::hQuery[ "fields" ] - aValExprs := ::hQuery[ "values" ] cAlias := cTable - aAutoInc := SqlGetAutoIncFields( cTable ) + /* Materialize CTEs first — `WITH cte AS (...) INSERT ...` + * needs the CTE's temp table to exist before any SELECT + * subqueries inside aRows / aSelect resolve against it. */ + IF hb_HHasKey( ::hQuery, "cte" ) .AND. ValType( ::hQuery[ "cte" ] ) == "A" + IF hb_HHasKey( ::hQuery, "cte_recursive" ) .AND. ::hQuery[ "cte_recursive" ] + ::MaterializeRecursiveCTE( ::hQuery[ "cte" ] ) + ELSE + ::MaterializeCTE( ::hQuery[ "cte" ] ) + ENDIF + ENDIF + /* Same pre-flight existence check as RunUpdate — if the user + * names a missing table, return SQL_ERR_NO_TABLE rather than + * letting dbUseArea bubble up an opaque runtime panic. */ + IF ! File( Lower( cTable ) + ".dbf" ) .AND. ! File( cTable + ".dbf" ) + RETURN ::MakeError( SQL_ERR_NO_TABLE, ; + "Table '" + cTable + "' does not exist" ) + ENDIF + + /* Schema migration note: old plans emitted h["values"] (a flat + * expression array, one tuple). Current parser emits h["rows"] + * (array of tuples) plus h["select"] for INSERT ... SELECT. The + * lookup here tolerates either shape for cached-plan callers and + * drives the per-row loop below. */ + IF hb_HHasKey( ::hQuery, "rows" ) .AND. ValType( ::hQuery[ "rows" ] ) == "A" + aRows := ::hQuery[ "rows" ] + ELSE + aRows := {} + ENDIF + IF Len( aRows ) == 0 .AND. hb_HHasKey( ::hQuery, "values" ) + aRows := { ::hQuery[ "values" ] } + ENDIF + + /* INSERT ... SELECT: evaluate the subquery once, convert each + * result row into a tuple of literal-ish ND_LIT nodes, then fall + * through to the standard per-tuple loop. Keeping the rows-path + * as the single execution code path means CHECK / FK / UNIQUE / + * auto-inc / txn-log all run identically whether the values came + * from VALUES (...) tuples or from a SELECT. */ + IF hb_HHasKey( ::hQuery, "select" ) + hSelect := ::hQuery[ "select" ] + aSelResult := TSqlExecutor():New( hSelect, ::aParams ):Run() + IF ValType( aSelResult ) == "A" .AND. Len( aSelResult ) >= 1 .AND. ; + ValType( aSelResult[ 1 ] ) == "A" .AND. Len( aSelResult[ 1 ] ) > 0 .AND. ; + aSelResult[ 1 ][ 1 ] == "__error__" + RETURN aSelResult + ENDIF + IF ValType( aSelResult ) == "A" .AND. Len( aSelResult ) >= 2 .AND. ; + ValType( aSelResult[ 2 ] ) == "A" + aSelRows := aSelResult[ 2 ] + FOR i := 1 TO Len( aSelRows ) + aValExprs := {} + FOR nTuple := 1 TO Len( aSelRows[ i ] ) + AAdd( aValExprs, SqlNode( ND_LIT, aSelRows[ i ][ nTuple ], NIL, NIL, NIL ) ) + NEXT + AAdd( aRows, aValExprs ) + NEXT + ENDIF + ENDIF + + aAutoInc := SqlGetAutoIncFields( cTable ) nWA := SqlExecOpenTable( cTable, cAlias ) + nTotal := Len( aRows ) + + /* Snapshot the struct once: field flags (5th element) are used by + * the per-tuple loop below to reject a NIL write into a NOT NULL + * column. Without this the PutValue path would silently coerce + * NIL → 0 / blank and defeat the schema contract. Tables on + * pre-nullable-flag plans return 4-element rows; treat missing + * flag as "unknown → accept NIL" to stay backward-compatible. */ + aStructFlags := {} + aStructTypes := {} + aStructRaw := dbStruct() + FOR i := 1 TO Len( aStructRaw ) + IF Len( aStructRaw[ i ] ) >= 5 .AND. ValType( aStructRaw[ i ][ 5 ] ) == "N" + AAdd( aStructFlags, aStructRaw[ i ][ 5 ] ) + ELSE + AAdd( aStructFlags, 2 ) /* unknown → assume nullable */ + ENDIF + AAdd( aStructTypes, Upper( aStructRaw[ i ][ 2 ] ) ) + NEXT + + /* Pre-flight: explicit column list must reference real columns, + * and tuple width can't exceed the table's column count. Catch + * both up front so we don't silently drop user input — previously + * `INSERT INTO t VALUES (1, 'a', 'extra')` succeeded with the + * 'extra' value dropped, and `INSERT INTO t (no_such) VALUES (1)` + * succeeded with the value going nowhere. */ + IF Len( aFields ) > 0 + FOR i := 1 TO Len( aFields ) + IF FieldPos( aFields[ i ] ) == 0 + SqlExecCloseTable( cAlias, nWA ) + RETURN ::MakeError( SQL_ERR_GRAMMAR, ; + "INSERT: column '" + aFields[ i ] + ; + "' does not exist in " + cTable ) + ENDIF + NEXT + ENDIF + IF nTotal > 0 .AND. Len( aFields ) == 0 .AND. Len( aRows[ 1 ] ) > FCount() + SqlExecCloseTable( cAlias, nWA ) + RETURN ::MakeError( SQL_ERR_GRAMMAR, ; + "INSERT: " + hb_NToS( Len( aRows[ 1 ] ) ) + ; + " values supplied but " + cTable + " has " + ; + hb_NToS( FCount() ) + " columns" ) + ENDIF + + FOR nTuple := 1 TO nTotal + aValExprs := aRows[ nTuple ] dbAppend() IF Len( aFields ) > 0 @@ -2406,12 +3470,76 @@ METHOD RunInsert() CLASS TSqlExecutor nFPos := FieldPos( aFields[ i ] ) IF nFPos > 0 xVal := ::EvalExpr( aValExprs[ i ] ) + /* NOT NULL guard: reject explicit NIL into a column + * whose Flags lack the nullable bit (0x02). dbDelete + * rolls back the phantom record so the user sees the + * old table state. */ + IF xVal == NIL .AND. nFPos <= Len( aStructFlags ) .AND. ; + hb_BitAnd( aStructFlags[ nFPos ], 2 ) == 0 + dbDelete() + dbCommit() + IF nWA == 0 + dbCloseArea() + ENDIF + RETURN ::MakeError( SQL_ERR_GRAMMAR, ; + "NOT NULL violation: column " + aFields[ i ] + ; + " on table " + cTable ) + ENDIF + xVal := SqlCoerceToCol( xVal, aStructTypes, nFPos ) + /* Numeric overflow guard: Str() returns a string of '*' + * when the value doesn't fit in the column's (width,dec). + * Without this, INSERT INTO n(N(4,0)) VALUES (99999999) + * silently stored 0 (or garbage) — DBF's N codec writes + * the truncated representation instead of erroring out. */ + IF nFPos <= Len( aStructTypes ) .AND. aStructTypes[ nFPos ] == "N" .AND. ; + ValType( xVal ) == "N" + IF '*' $ Str( xVal, FieldLen( nFPos ), FieldDec( nFPos ) ) + dbDelete() + dbCommit() + IF nWA == 0 + dbCloseArea() + ENDIF + RETURN ::MakeError( SQL_ERR_GRAMMAR, ; + "Numeric overflow: " + AllTrim( hb_NToS( xVal ) ) + ; + " does not fit in " + cTable + "." + aFields[ i ] + ; + " (N(" + AllTrim( hb_NToS( FieldLen( nFPos ) ) ) + ; + "," + AllTrim( hb_NToS( FieldDec( nFPos ) ) ) + "))" ) + ENDIF + ENDIF FieldPut( nFPos, xVal ) ENDIF NEXT ELSE FOR i := 1 TO Min( FCount(), Len( aValExprs ) ) xVal := ::EvalExpr( aValExprs[ i ] ) + IF xVal == NIL .AND. i <= Len( aStructFlags ) .AND. ; + hb_BitAnd( aStructFlags[ i ], 2 ) == 0 + dbDelete() + dbCommit() + IF nWA == 0 + dbCloseArea() + ENDIF + RETURN ::MakeError( SQL_ERR_GRAMMAR, ; + "NOT NULL violation: column " + FieldName( i ) + ; + " on table " + cTable ) + ENDIF + xVal := SqlCoerceToCol( xVal, aStructTypes, i ) + /* Same overflow guard as the named-columns branch above. */ + IF i <= Len( aStructTypes ) .AND. aStructTypes[ i ] == "N" .AND. ; + ValType( xVal ) == "N" + IF '*' $ Str( xVal, FieldLen( i ), FieldDec( i ) ) + dbDelete() + dbCommit() + IF nWA == 0 + dbCloseArea() + ENDIF + RETURN ::MakeError( SQL_ERR_GRAMMAR, ; + "Numeric overflow: " + AllTrim( hb_NToS( xVal ) ) + ; + " does not fit in " + cTable + "." + FieldName( i ) + ; + " (N(" + AllTrim( hb_NToS( FieldLen( i ) ) ) + ; + "," + AllTrim( hb_NToS( FieldDec( i ) ) ) + "))" ) + ENDIF + ENDIF FieldPut( i, xVal ) NEXT ENDIF @@ -2438,22 +3566,63 @@ METHOD RunInsert() CLASS TSqlExecutor RETURN ::MakeError( SQL_ERR_GRAMMAR, "CHECK constraint violation on " + cTable ) ENDIF - /* Validate FOREIGN KEY constraints */ + /* Validate FOREIGN KEY constraints. Iterate every field on the + * just-appended record — using only `aFields` (named-column form) + * would skip the positional `INSERT INTO t VALUES (...)` form and + * let bad FK values slip through silently. SqlValidateFKRecord + * short-circuits for fields with no FK so the per-column scan is + * cheap. Self-FK + multi-row INSERT remains a known limitation: + * the parent area = INSERT area, and SqlValidateFKRecord's + * dbGoTop scan races with the in-flight buffer; symptom is a + * spurious FK violation on row 2+. Single-row + cross-table + * cases work correctly. */ + FOR i := 1 TO FCount() + IF ! SqlValidateFKRecord( cTable, FieldName( i ), FieldGet( i ) ) + dbDelete() + dbCommit() + IF nWA == 0 + dbCloseArea() + ENDIF + RETURN ::MakeError( SQL_ERR_GRAMMAR, ; + "FOREIGN KEY violation: " + FieldName( i ) + " references missing parent" ) + ENDIF + NEXT + + /* Validate UNIQUE constraints — the .fsc metadata file lists + * columns declared UNIQUE at CREATE TABLE time. SqlValidateUnique + * scans the table for duplicates, excluding the record we just + * appended. Previously this validator was defined but never + * invoked, so duplicate keys slipped through silently. + * Excludes RecNo() (just-appended row) from the dup scan. */ IF Len( aFields ) > 0 FOR i := 1 TO Len( aFields ) nFPos := FieldPos( aFields[ i ] ) IF nFPos > 0 - IF ! SqlValidateFKRecord( cTable, aFields[ i ], FieldGet( nFPos ) ) + IF ! SqlValidateUnique( cTable, aFields[ i ], FieldGet( nFPos ), RecNo() ) dbDelete() dbCommit() IF nWA == 0 dbCloseArea() ENDIF RETURN ::MakeError( SQL_ERR_GRAMMAR, ; - "FOREIGN KEY violation: " + aFields[ i ] + " references missing parent" ) + "UNIQUE constraint violation: " + aFields[ i ] + ; + " = " + SqlQuoteVal( FieldGet( nFPos ) ) ) ENDIF ENDIF NEXT + ELSE + FOR i := 1 TO FCount() + IF ! SqlValidateUnique( cTable, FieldName( i ), FieldGet( i ), RecNo() ) + dbDelete() + dbCommit() + IF nWA == 0 + dbCloseArea() + ENDIF + RETURN ::MakeError( SQL_ERR_GRAMMAR, ; + "UNIQUE constraint violation: " + FieldName( i ) + ; + " = " + SqlQuoteVal( FieldGet( i ) ) ) + ENDIF + NEXT ENDIF /* Transaction logging — after validation passes, so a rolled-back @@ -2462,17 +3631,25 @@ METHOD RunInsert() CLASS TSqlExecutor * RecNo, which only exists post-dbAppend. */ ::oTxn:LogRecord( cAlias, RecNo(), "INSERT" ) - /* Commit per INSERT when the WA cache is off (legacy durability - * guarantee). With the cache on, the caller batches via an - * explicit SqlWACacheDisable+dbCloseAll at shutdown — skipping - * the per-INSERT flush collapses the dominant I/O cost. */ + nInserted++ + NEXT /* end per-tuple loop */ + + /* Commit once after all tuples succeed. */ IF ! SqlWACacheIsEnabled() dbCommit() ENDIF + /* Index maintenance: DBFArea.Append / PutValue do not auto-update + * NTX keys (there's no ordKeyAdd hook yet). Rebuild any attached + * indexes once per INSERT batch so external `SET INDEX TO` readers + * see fresh keys. No-op when no indexes attached. */ + IF OrdCount() > 0 + OrderListRebuild() + ENDIF + SqlExecCloseTable( cAlias, nWA ) -RETURN { { "affected_rows" }, { { 1 } } } +RETURN { { "affected_rows" }, { { nInserted } } } METHOD RunUpdate() CLASS TSqlExecutor @@ -2482,6 +3659,10 @@ METHOD RunUpdate() CLASS TSqlExecutor LOCAL aFPos, aValuePc, pcWhere, lAllOk, cValSrc LOCAL aPrevVals, lValid LOCAL hConstraints, lHasConstraints + LOCAL aUpdFlags, aUpdFlagsRaw, nUpdI, aUpdTypes, lForcePrg + LOCAL aUpdRefs, hUpdChanged, hUpdEnf, cParentColUpper, j + LOCAL hUpdConstraints, lHasUniq, lHasCheckOrFk + LOCAL hPcCached cTable := ::hQuery[ "table" ] aSet := ::hQuery[ "set" ] @@ -2489,8 +3670,64 @@ METHOD RunUpdate() CLASS TSqlExecutor cAlias := cTable ::aTables := { { cTable, cAlias, "" } } + /* Materialize CTEs first so subqueries inside SET / WHERE that + * reference the CTE alias resolve correctly. Mirrors RunInsert. */ + IF hb_HHasKey( ::hQuery, "cte" ) .AND. ValType( ::hQuery[ "cte" ] ) == "A" + IF hb_HHasKey( ::hQuery, "cte_recursive" ) .AND. ::hQuery[ "cte_recursive" ] + ::MaterializeRecursiveCTE( ::hQuery[ "cte" ] ) + ELSE + ::MaterializeCTE( ::hQuery[ "cte" ] ) + ENDIF + ENDIF + + /* Pre-flight existence check — SqlExecOpenTable's RECOVER swallows + * the missing-file error (returns 0 with cache disabled or the + * cache num if present), but a dbUseArea panic still escapes if + * the file is genuinely absent. Surface a clean SQL_ERR_NO_TABLE + * here so callers see a structured error instead of a Five + * runtime panic. */ + IF ! File( Lower( cTable ) + ".dbf" ) .AND. ! File( cTable + ".dbf" ) + RETURN ::MakeError( SQL_ERR_NO_TABLE, ; + "Table '" + cTable + "' does not exist" ) + ENDIF + nWA := SqlExecOpenTable( cTable, cAlias ) + /* Struct snapshot used by both the fast-path coercion gate and + * the PRG fallback's NOT-NULL / string→date check. Built once up + * front so both paths see the same flags/types. */ + aUpdFlags := {} + aUpdTypes := {} + aUpdFlagsRaw := dbStruct() + FOR nUpdI := 1 TO Len( aUpdFlagsRaw ) + IF Len( aUpdFlagsRaw[ nUpdI ] ) >= 5 .AND. ValType( aUpdFlagsRaw[ nUpdI ][ 5 ] ) == "N" + AAdd( aUpdFlags, aUpdFlagsRaw[ nUpdI ][ 5 ] ) + ELSE + AAdd( aUpdFlags, 2 ) + ENDIF + AAdd( aUpdTypes, Upper( aUpdFlagsRaw[ nUpdI ][ 2 ] ) ) + NEXT + + /* Gate fast path out when any SET assigns a string ND_LIT to a + * DATE column: SqlExprToPrg would emit a raw string literal into + * the pcode, which SqlBulkUpdate then writes via the D-field + * codec as 8 blanks (its empty-date marker). The PRG path runs + * SqlCoerceToCol before FieldPut, so the string parses through + * CToD as the user intended. */ + lForcePrg := .F. + IF aSet != NIL + FOR i := 1 TO Len( aSet ) + nFPos := FieldPos( aSet[ i ][ 1 ] ) + IF nFPos > 0 .AND. nFPos <= Len( aUpdTypes ) .AND. aUpdTypes[ nFPos ] == "D" + IF ValType( aSet[ i ][ 2 ] ) == "A" .AND. Len( aSet[ i ][ 2 ] ) >= 2 .AND. ; + aSet[ i ][ 2 ][ 1 ] == ND_LIT .AND. ValType( aSet[ i ][ 2 ][ 2 ] ) == "C" + lForcePrg := .T. + EXIT + ENDIF + ENDIF + NEXT + ENDIF + /* Fast path: compile WHERE + every SET value to pcode and delegate * to Go RTL SqlBulkUpdate — skips per-record Go↔PRG boundary. * Conditions: no active transaction (txn log records can't be @@ -2502,8 +3739,31 @@ METHOD RunUpdate() CLASS TSqlExecutor * so subsequent identical UPDATEs skip the SqlExprToPrg + PcCompile * walk entirely. The cached pcode is valid as long as the plan * itself lives in the plan cache — which is forever in-process. */ - IF ! ::oTxn:IsActive() - LOCAL hPcCached + /* Tables with UNIQUE constraints must go through the PRG loop so + * SqlValidateUnique can fire per record. SqlBulkUpdate is a + * per-row byte write with no constraint callback hook, so a + * fast-path UPDATE that writes a duplicate value would have + * silently committed. */ + /* hUpdConstraints / lHasUniq / lHasCheckOrFk hoisted to function-top + * LOCALs above; mid-function LOCAL combined with newly-added top + * LOCALs caused slot aliasing → lHasUniq came back non-logical. */ + hUpdConstraints := SqlLoadConstraints( cTable ) + lHasUniq := Len( hUpdConstraints[ "unique" ] ) > 0 + /* SqlBulkUpdate has no CHECK / FK callback hook — any table + * carrying CHECK or FK constraints must take the PRG path so + * SqlValidateCheckRecord / SqlValidateFKRecord actually fire. */ + lHasCheckOrFk := Len( hUpdConstraints[ "check" ] ) > 0 .OR. ; + Len( hUpdConstraints[ "fk" ] ) > 0 + + /* ON UPDATE enforcement: if any sibling table FK-references this + * one, drop into the PRG path so we can compare per-row old/new + * values and fire CASCADE / SET NULL / RESTRICT. SqlBulkUpdate is + * a per-byte writer with no callback hook. */ + aUpdRefs := SqlFindReferencingFKs( cTable ) + + IF ! ::oTxn:IsActive() .AND. ! lHasUniq .AND. ! lHasCheckOrFk .AND. ! lForcePrg .AND. ; + Len( aUpdRefs ) == 0 + /* hPcCached hoisted to function-top LOCAL list. */ IF ! Empty( ::cCacheKey ) .AND. hb_HHasKey( s_hDmlPcodeCache, ::cCacheKey ) hPcCached := s_hDmlPcodeCache[ ::cCacheKey ] nAffected := SqlBulkUpdate( hPcCached[ "set_fpos" ], ; @@ -2553,6 +3813,9 @@ METHOD RunUpdate() CLASS TSqlExecutor nAffected := SqlBulkUpdate( aFPos, pcWhere, aValuePc ) /* Populate the per-plan cache for subsequent calls. */ IF ! Empty( ::cCacheKey ) + IF Len( s_hDmlPcodeCache ) >= SQL_DML_PCODE_CACHE_MAX + s_hDmlPcodeCache := { => } + ENDIF s_hDmlPcodeCache[ ::cCacheKey ] := { ; "set_fpos" => aFPos, ; "set_pc" => aValuePc, ; @@ -2580,20 +3843,39 @@ METHOD RunUpdate() CLASS TSqlExecutor lHasConstraints := Len( hConstraints[ "check" ] ) > 0 .OR. ; Len( hConstraints[ "fk" ] ) > 0 + /* aUpdFlags / aUpdTypes populated earlier for both fast-path and + * PRG paths. Per-SET NIL-into-NOT-NULL check runs below. */ + dbGoTop() WHILE ! Eof() IF xWhere == NIL .OR. SqlIsTrue( ::EvalExpr( xWhere ) ) IF dbRLock( RecNo() ) ::oTxn:LogRecord( cAlias, RecNo(), "UPDATE" ) aPrevVals := {} + lValid := .T. FOR i := 1 TO Len( aSet ) nFPos := FieldPos( aSet[ i ][ 1 ] ) IF nFPos > 0 AAdd( aPrevVals, { nFPos, FieldGet( nFPos ) } ) xVal := ::EvalExpr( aSet[ i ][ 2 ] ) + IF xVal == NIL .AND. nFPos <= Len( aUpdFlags ) .AND. ; + hb_BitAnd( aUpdFlags[ nFPos ], 2 ) == 0 + lValid := .F. + EXIT + ENDIF + xVal := SqlCoerceToCol( xVal, aUpdTypes, nFPos ) FieldPut( nFPos, xVal ) ENDIF NEXT + IF ! lValid + FOR i := 1 TO Len( aPrevVals ) + FieldPut( aPrevVals[ i ][ 1 ], aPrevVals[ i ][ 2 ] ) + NEXT + dbRUnlock( RecNo() ) + SqlExecCloseTable( cAlias, nWA ) + RETURN ::MakeError( SQL_ERR_GRAMMAR, ; + "NOT NULL violation on UPDATE of " + cTable ) + ENDIF lValid := .T. IF lHasConstraints lValid := SqlValidateCheckRecord( cTable ) @@ -2608,6 +3890,21 @@ METHOD RunUpdate() CLASS TSqlExecutor NEXT ENDIF ENDIF + /* UNIQUE validation runs independently of hasConstraints + * because UNIQUE is tracked in the same .fsc but a table + * can declare only UNIQUE without CHECK/FK. Excludes the + * row we're currently editing so a self-match on the + * same column doesn't trigger a false positive. */ + IF lValid + FOR i := 1 TO Len( aSet ) + nFPos := FieldPos( aSet[ i ][ 1 ] ) + IF nFPos > 0 .AND. ; + ! SqlValidateUnique( cTable, aSet[ i ][ 1 ], FieldGet( nFPos ), RecNo() ) + lValid := .F. + EXIT + ENDIF + NEXT + ENDIF IF ! lValid /* Roll back the in-memory field changes before unlocking. */ FOR i := 1 TO Len( aPrevVals ) @@ -2618,6 +3915,59 @@ METHOD RunUpdate() CLASS TSqlExecutor RETURN ::MakeError( SQL_ERR_GRAMMAR, ; "UPDATE constraint violation on " + cTable ) ENDIF + /* ON UPDATE enforcement: if a parent column referenced + * by any sibling FK changed, fire CASCADE / SET NULL / + * RESTRICT against the children. Build the change hash + * from aPrevVals (old) + current FieldGet (new). Only + * include columns whose value actually moved — equal + * old/new must NOT trigger enforcement (otherwise an + * idempotent UPDATE fires spurious cascades). */ + IF Len( aUpdRefs ) > 0 + hUpdChanged := { => } + FOR i := 1 TO Len( aPrevVals ) + cParentColUpper := Upper( FieldName( aPrevVals[ i ][ 1 ] ) ) + xVal := FieldGet( aPrevVals[ i ][ 1 ] ) + /* NIL-safe change detection. `xVal == aPrevVals[..]` + * panics when either side is NIL — switch to type-aware + * comparison: both NIL → unchanged, exactly one NIL → + * changed, otherwise direct ==. */ + lValid := .T. + IF xVal == NIL .AND. aPrevVals[ i ][ 2 ] == NIL + lValid := .F. /* unchanged: both NULL */ + ELSEIF xVal == NIL .OR. aPrevVals[ i ][ 2 ] == NIL + lValid := .T. /* changed: NULL ↔ value */ + ELSE + lValid := ! ( xVal == aPrevVals[ i ][ 2 ] ) + ENDIF + IF lValid + /* Only register if some sibling FK actually + * targets this column — keeps the changes hash + * sparse and avoids enforcement work for + * non-key updates. */ + FOR j := 1 TO Len( aUpdRefs ) + IF Upper( aUpdRefs[ j ][ 3 ] ) == cParentColUpper + hUpdChanged[ cParentColUpper ] := { aPrevVals[ i ][ 2 ], xVal } + EXIT + ENDIF + NEXT + ENDIF + NEXT + IF Len( hUpdChanged ) > 0 + hUpdEnf := SqlEnforceUpdateRefs( cTable, aUpdRefs, hUpdChanged ) + IF ! hUpdEnf[ "ok" ] + /* Roll back this row before bubbling the FK + * RESTRICT error so the user-visible state + * matches "no change applied to this record". */ + FOR i := 1 TO Len( aPrevVals ) + FieldPut( aPrevVals[ i ][ 1 ], aPrevVals[ i ][ 2 ] ) + NEXT + dbRUnlock( RecNo() ) + SqlExecCloseTable( cAlias, nWA ) + RETURN ::MakeError( SQL_ERR_GRAMMAR, ; + hUpdEnf[ "error" ] ) + ENDIF + ENDIF + ENDIF dbRUnlock( RecNo() ) nAffected++ ENDIF @@ -2628,6 +3978,14 @@ METHOD RunUpdate() CLASS TSqlExecutor dbCommit() ENDIF + /* Same rationale as RunInsert: PutValue skipped index maintenance + * so rebuild attached indexes at the tail. PRG path only — the + * Go-RTL SqlBulkUpdate fast-path already does a targeted rebuild + * when updated fields intersect index key expressions. */ + IF nAffected > 0 .AND. OrdCount() > 0 + OrderListRebuild() + ENDIF + SqlExecCloseTable( cAlias, nWA ) RETURN { { "affected_rows" }, { { nAffected } } } @@ -2637,19 +3995,119 @@ METHOD RunDelete() CLASS TSqlExecutor LOCAL cTable, xWhere, cAlias, nWA LOCAL nAffected := 0 + LOCAL pcWhere, cValSrc, hPcCached + LOCAL aRefs, hEnf, nParentRec, nParentSel cTable := ::hQuery[ "table" ] xWhere := ::hQuery[ "where" ] cAlias := cTable ::aTables := { { cTable, cAlias, "" } } + /* Materialize CTEs first so subqueries inside WHERE that + * reference the CTE alias resolve correctly. Mirrors RunInsert. */ + IF hb_HHasKey( ::hQuery, "cte" ) .AND. ValType( ::hQuery[ "cte" ] ) == "A" + IF hb_HHasKey( ::hQuery, "cte_recursive" ) .AND. ::hQuery[ "cte_recursive" ] + ::MaterializeRecursiveCTE( ::hQuery[ "cte" ] ) + ELSE + ::MaterializeCTE( ::hQuery[ "cte" ] ) + ENDIF + ENDIF + + /* Pre-flight existence check, mirroring RunInsert / RunUpdate. */ + IF ! File( Lower( cTable ) + ".dbf" ) .AND. ! File( cTable + ".dbf" ) + RETURN ::MakeError( SQL_ERR_NO_TABLE, ; + "Table '" + cTable + "' does not exist" ) + ENDIF + nWA := SqlExecOpenTable( cTable, cAlias ) + /* Referential integrity: when any sibling table has a FOREIGN KEY + * pointing at cTable, we MUST take the PRG scan path — it's the + * only one that can evaluate per-row ON DELETE actions (RESTRICT / + * CASCADE / SET NULL). SqlBulkDelete is a pure byte-level delete + * and has no callback hook. No referencing FKs → fast path as + * before, zero overhead for FK-free tables. */ + aRefs := SqlFindReferencingFKs( cTable ) + + /* Fast path: compile WHERE to pcode and delegate to SqlBulkDelete. + * Mirrors the RunUpdate pattern — skipped under an active txn (we + * don't emit LogRecord from inside the Go loop) and when the WHERE + * contains constructs PcCompile can't handle (subquery, UDF) in + * which case SqlExprToPrg returns NIL. */ + IF ! ::oTxn:IsActive() .AND. Len( aRefs ) == 0 + IF ! Empty( ::cCacheKey ) .AND. hb_HHasKey( s_hDmlPcodeCache, ::cCacheKey ) + hPcCached := s_hDmlPcodeCache[ ::cCacheKey ] + nAffected := SqlBulkDelete( hPcCached[ "where_pc" ] ) + IF ! SqlWACacheIsEnabled() + dbCommit() + ENDIF + IF nAffected > 0 .AND. OrdCount() > 0 + OrderListRebuild() + ENDIF + SqlExecCloseTable( cAlias, nWA ) + RETURN { { "affected_rows" }, { { nAffected } } } + ENDIF + + pcWhere := NIL + IF xWhere != NIL + cValSrc := ::SqlExprToPrg( xWhere ) + IF cValSrc != NIL + pcWhere := PcCompile( cValSrc ) + ENDIF + ENDIF + /* pcWhere == NIL when xWhere itself was NIL (delete everything) + * OR compilation failed. Distinguish below: if WHERE was present + * but couldn't compile, fall through to the PRG loop; if WHERE + * was absent, run the fast path with NIL pcode (unconditional). */ + IF xWhere == NIL .OR. pcWhere != NIL + nAffected := SqlBulkDelete( pcWhere ) + IF ! Empty( ::cCacheKey ) .AND. xWhere != NIL + IF Len( s_hDmlPcodeCache ) >= SQL_DML_PCODE_CACHE_MAX + s_hDmlPcodeCache := { => } + ENDIF + s_hDmlPcodeCache[ ::cCacheKey ] := { "where_pc" => pcWhere } + ENDIF + IF ! SqlWACacheIsEnabled() + dbCommit() + ENDIF + IF nAffected > 0 .AND. OrdCount() > 0 + OrderListRebuild() + ENDIF + SqlExecCloseTable( cAlias, nWA ) + RETURN { { "affected_rows" }, { { nAffected } } } + ENDIF + ENDIF + + /* PRG scan loop — handles active txn (needs LogRecord per row), + * WHEREs that SqlExprToPrg can't compile (subquery, UDF), and + * every DELETE on a table with referencing FOREIGN KEYs. */ SET DELETED ON dbGoTop() WHILE ! Eof() IF xWhere == NIL .OR. SqlIsTrue( ::EvalExpr( xWhere ) ) IF dbRLock( RecNo() ) + /* Enforce referential integrity before logging / deleting. + * CASCADE runs nested DELETEs on the child tables here, + * SET NULL runs a nested UPDATE; both MUST happen under + * the parent's record lock so concurrent inserts against + * the same FK value race against the commit order the + * caller expects. We capture the parent's RecNo + Select + * here because the nested five_SQL calls inside + * SqlEnforceDeleteRefs allocate / close workareas and + * leave the executor's current area on something other + * than the parent. */ + IF Len( aRefs ) > 0 + nParentRec := RecNo() + nParentSel := Select() + hEnf := SqlEnforceDeleteRefs( cTable, aRefs ) + dbSelectArea( nParentSel ) + dbGoto( nParentRec ) + IF ! hEnf[ "ok" ] + dbRUnlock( nParentRec ) + SqlExecCloseTable( cAlias, nWA ) + RETURN ::MakeError( SQL_ERR_GRAMMAR, hEnf[ "error" ] ) + ENDIF + ENDIF /* Transaction log the deletion so BEGIN TRANSACTION / * ROLLBACK can undo it — RunInsert/RunUpdate log, RunDelete * used to silently skip. */ @@ -2665,6 +4123,11 @@ METHOD RunDelete() CLASS TSqlExecutor dbCommit() ENDIF + /* Deleted rows' keys must be purged from attached indexes. */ + IF nAffected > 0 .AND. OrdCount() > 0 + OrderListRebuild() + ENDIF + SqlExecCloseTable( cAlias, nWA ) RETURN { { "affected_rows" }, { { nAffected } } } @@ -2704,14 +4167,34 @@ FUNCTION SqlExecOpenTable( cTable, cAlias ) /* Open fresh. Two-step fallback mirrors the prior inline logic so * callers using mixed-case filenames on case-sensitive filesystems - * still succeed. */ + * still succeed. + * + * SHARED mode (5th arg .T.): a same-table subquery (e.g. `DELETE + * FROM t WHERE v > (SELECT MAX(v) FROM t)`) opens a second + * workarea on the same DBF inside SubqueryCached. With EXCLUSIVE + * the second open errored out → the inner SELECT returned the + * `__error__` envelope and ND_SUB extracted aResult[2][1][1] = + * SQL_ERR_LOCKED (1005). The DML still succeeds with SHARED + * because Five is single-process here; per-record dbRLock + * provides write serialization at the row level. */ BEGIN SEQUENCE - dbUseArea( .T., "DBFNTX", Lower( cTable ) + ".dbf", cAlias, .F., .F. ) + dbUseArea( .T., "DBFNTX", Lower( cTable ) + ".dbf", cAlias, .T., .F. ) RECOVER - dbUseArea( .T., "DBFNTX", cTable + ".dbf", cAlias, .F., .F. ) + dbUseArea( .T., "DBFNTX", cTable + ".dbf", cAlias, .T., .F. ) END SEQUENCE nWA := Select( cAlias ) + /* Auto-attach the known per-table NTX files so DML-side + * OrderListRebuild (called at the tail of RunInsert / RunUpdate / + * RunDelete) actually has indexes to refresh. Without this the + * PK / UNIQUE indexes — built at CREATE TABLE time with 0 rows — + * stay frozen forever and any external `SET INDEX TO ...` read + * sees a stale 0-key tree. Missing file => silent RECOVER, no + * harm to pre-index tables. */ + IF nWA > 0 + SqlAttachTableIndexes( cTable ) + ENDIF + /* Register for reuse. The cache layer is a no-op when disabled, so * an unconditional Put keeps the caller branch-free. */ IF nWA > 0 .AND. SqlWACacheIsEnabled() @@ -2724,6 +4207,33 @@ FUNCTION SqlExecOpenTable( cTable, cAlias ) RETURN 0 /* caller must close — matches legacy semantics */ +/* Attach the convention-named PK / UNIQUE NTX files to the current + * workarea. Both files are produced by CreateTable at CREATE-TABLE + * time; missing files fall into RECOVER silently so pre-index tables + * (and tables with no UNIQUE columns) pay zero cost. */ +FUNCTION SqlAttachTableIndexes( cTable ) + + LOCAL cPk, cUq + + cPk := Lower( cTable ) + "_pk.ntx" + cUq := Lower( cTable ) + "_uq.ntx" + + IF File( cPk ) + BEGIN SEQUENCE + dbSetIndex( cPk ) + RECOVER + END SEQUENCE + ENDIF + IF File( cUq ) + BEGIN SEQUENCE + dbSetIndex( cUq ) + RECOVER + END SEQUENCE + ENDIF + +RETURN NIL + + FUNCTION SqlExecCloseTable( cAlias, nWA ) /* Only close if THIS call opened it AND the cache didn't adopt it. @@ -2741,6 +4251,82 @@ FUNCTION SqlExecCloseTable( cAlias, nWA ) RETURN NIL +/* SqlMaterializeView — execute the view's stored SQL once, dump the + * result into a MEMRDD temp area aliased as cAlias. Lets the rest of + * RunSelect's table-open / FROM-resolve machinery treat the view + * just like any other table. Returns .T. on success. + * + * The view text lives in `.fsv`, written by CreateView. We re- + * parse + run it via a nested TFiveSQL each time the view is opened; + * no result caching at the FS level. Trade-off: simple + always + * fresh, but each SELECT-from-view re-executes the body. Acceptable + * because views are rare on this workload. + */ +FUNCTION SqlMaterializeView( cView, cAlias ) + LOCAL cSQL, oV, aR, aFN, i, j, aStruct, cTmpName + LOCAL xS, cType, nWidth, nDec + + /* Use MemoRead for the view body — FRead's @byref buffer + * argument has surfaced edge cases in this runtime where the + * destination string stays at its pre-call (Space-padded) + * value, leaving cSQL empty after AllTrim. MemoRead returns the + * file contents directly. */ + cSQL := AllTrim( MemoRead( Lower( cView ) + ".fsv" ) ) + IF Empty( cSQL ) + RETURN .F. + ENDIF + + oV := TFiveSQL():New() + aR := oV:Execute( cSQL ) + IF ValType( aR ) != "A" .OR. Len( aR ) < 2 .OR. ; + ValType( aR[ 1 ] ) != "A" .OR. Len( aR[ 1 ] ) == 0 .OR. ; + aR[ 1 ][ 1 ] == "__error__" + RETURN .F. + ENDIF + aFN := aR[ 1 ] + + /* Build a minimal struct from the result types. View columns get + * sensible widths: numeric → N(15,4), char → C(64), date → D, + * logical → L, anything else → C(255). Scalar widening acceptable + * for read-only view consumers. */ + aStruct := {} + FOR i := 1 TO Len( aFN ) + xS := NIL + IF Len( aR[ 2 ] ) > 0 .AND. Len( aR[ 2 ][ 1 ] ) >= i + xS := aR[ 2 ][ 1 ][ i ] + ENDIF + DO CASE + CASE ValType( xS ) == "N" + cType := "N" ; nWidth := 15 ; nDec := 4 + CASE ValType( xS ) == "D" + cType := "D" ; nWidth := 8 ; nDec := 0 + CASE ValType( xS ) == "L" + cType := "L" ; nWidth := 1 ; nDec := 0 + CASE ValType( xS ) == "C" + cType := "C" ; nWidth := Max( Len( xS ), 64 ) ; nDec := 0 + OTHERWISE + cType := "C" ; nWidth := 64 ; nDec := 0 + ENDCASE + AAdd( aStruct, { aFN[ i ], cType, nWidth, nDec } ) + NEXT + + cTmpName := "mem:__view_" + Lower( cView ) + BEGIN SEQUENCE + dbCreate( cTmpName, aStruct, "MEMRDD" ) + dbUseArea( .T., "MEMRDD", cTmpName, cAlias, .F., .F. ) + FOR i := 1 TO Len( aR[ 2 ] ) + dbAppend() + FOR j := 1 TO Min( Len( aFN ), Len( aR[ 2 ][ i ] ) ) + FieldPut( j, aR[ 2 ][ i ][ j ] ) + NEXT + NEXT + RECOVER + RETURN .F. + END SEQUENCE + +RETURN .T. + + /* ====================================================================== * Standalone helper functions called by TSqlIndex * ====================================================================== */ @@ -2780,7 +4366,7 @@ RETURN NIL /* INTERSECT: keep only rows in both sets */ FUNCTION SqlDoIntersect( aRows1, aRows2 ) - LOCAL aResult := {}, hKeys2 := { => }, i, cKey + LOCAL aResult := {}, hKeys2 := { => }, hEmitted := { => }, i, cKey LOCAL oSort := TSqlSort():New() FOR i := 1 TO Len( aRows2 ) @@ -2788,19 +4374,26 @@ FUNCTION SqlDoIntersect( aRows1, aRows2 ) hKeys2[ cKey ] := .T. NEXT + /* SQL standard: INTERSECT is DISTINCT by default — skip rows whose + * composite key was already emitted, not just unmatched ones. + * INTERSECT ALL (retain duplicates) would skip this dedup; we + * don't currently expose that spelling so the plain INTERSECT + * follows the default spec. */ FOR i := 1 TO Len( aRows1 ) cKey := oSort:RowKey( aRows1[ i ] ) - IF hb_HHasKey( hKeys2, cKey ) + IF hb_HHasKey( hKeys2, cKey ) .AND. ! hb_HHasKey( hEmitted, cKey ) AAdd( aResult, aRows1[ i ] ) + hEmitted[ cKey ] := .T. ENDIF NEXT RETURN aResult -/* EXCEPT: keep only rows in first that are not in second */ +/* EXCEPT: keep only rows in first that are not in second. + * SQL spec: EXCEPT is DISTINCT by default. */ FUNCTION SqlDoExcept( aRows1, aRows2 ) - LOCAL aResult := {}, hKeys2 := { => }, i, cKey + LOCAL aResult := {}, hKeys2 := { => }, hEmitted := { => }, i, cKey LOCAL oSort := TSqlSort():New() FOR i := 1 TO Len( aRows2 ) @@ -2810,8 +4403,9 @@ FUNCTION SqlDoExcept( aRows1, aRows2 ) FOR i := 1 TO Len( aRows1 ) cKey := oSort:RowKey( aRows1[ i ] ) - IF ! hb_HHasKey( hKeys2, cKey ) + IF ! hb_HHasKey( hKeys2, cKey ) .AND. ! hb_HHasKey( hEmitted, cKey ) AAdd( aResult, aRows1[ i ] ) + hEmitted[ cKey ] := .T. ENDIF NEXT @@ -2859,10 +4453,16 @@ FUNCTION SqlMaterializeSubquery( xSubQ, cAlias, aParams ) cTmpFile := "__drv_" + Lower( cAlias ) /* MEMRDD in-memory temp — avoids dbCreate + FErase disk syscalls. */ dbCreate( "mem:" + cTmpFile, aStruct, "MEMRDD" ) + /* Open under a scratch alias just to feed SqlBulkInsert, then close + * and re-open under the user-visible alias so RunSelect's open + * loop finds it via Select(cAlias) without trying to dbUseArea on + * a non-existent disk file. */ dbUseArea( .T., "MEMRDD", "mem:" + cTmpFile, "__DRVTMP", .F., .F. ) /* Go RTL SqlBulkInsert — subquery driving-table materialization. */ SqlBulkInsert( aRows2 ) - CLOSE __DRVTMP + dbSelectArea( Select( "__DRVTMP" ) ) + dbCloseArea() + dbUseArea( .T., "MEMRDD", "mem:" + cTmpFile, cAlias, .T., .F. ) RETURN { cTmpFile, cAlias, "" } @@ -3079,7 +4679,12 @@ METHOD MaterializeRecursiveCTE( aCTE ) CLASS TSqlExecutor nIter := 0 aPrevRows := AClone( aDataRows ) - WHILE nIter < 50 .AND. Len( aPrevRows ) > 0 + /* Iteration cap — legitimate recursive queries easily exceed 50 + * rows (`seq 1..N` for N>50 clipped silently at 51). SQL Server + * defaults to 100, PostgreSQL has no hard cap. Keep a defensive + * ceiling so a cycle-without-termination query doesn't lock up + * the process, but raise it high enough for real workloads. */ + WHILE nIter < 10000 .AND. Len( aPrevRows ) > 0 nIter++ aNewRows := {} @@ -3160,22 +4765,14 @@ METHOD MaterializeRecursiveCTE( aCTE ) CLASS TSqlExecutor RECOVER END SEQUENCE - /* Replace table entry to reference CTE temp file. - * Keep alias = cName so the main query finds it by original name. */ - lReplaced := .F. - FOR j := 1 TO Len( ::aTables ) - IF Upper( ::aTables[ j ][ 1 ] ) == cName - ::aTables[ j ][ 1 ] := cTmpFile - IF Empty( ::aTables[ j ][ 2 ] ) - ::aTables[ j ][ 2 ] := cName - ENDIF - lReplaced := .T. - EXIT - ENDIF - NEXT - IF ! lReplaced - AAdd( ::aTables, { cTmpFile, cName, "" } ) - ENDIF + /* Do NOT rewrite aTables[j][1] to cTmpFile here. RunSelect's + * open loop expects the original CTE name in aTables; with that + * name (a) `Select(cName)` directly returns the area we just + * opened, or (b) the MEMRDD fallback at the end of the open + * loop attaches a second workarea under the user's alias when + * the FROM clause aliased the CTE (e.g. `FROM sub s` with + * recursive CTE + JOIN to a sibling table previously failed + * with "Table '__cte_sub' does not exist"). */ NEXT RETURN NIL @@ -3195,8 +4792,9 @@ METHOD ApplyWindowFunctions( aRows, aFN, aCols ) CLASS TSqlExecutor LOCAL nLagLead, nArgCol, xDefault LOCAL nRunSum, nRunCount LOCAL aWinCols, nWC - LOCAL hFrame, nFS, nFE, m, xVal, xMin, xMax, lDefaultFrame + LOCAL hFrame, nFS, nFE, m, xVal, xMin, xMax, lDefaultFrame, lWholePartition LOCAL aPartColIdx, aSortSpec, nOrdCol + LOCAL nLeftOff, nRightOff, lBoundsOk /* Scan for window function columns */ aWinCols := {} @@ -3287,8 +4885,14 @@ METHOD ApplyWindowFunctions( aRows, aFN, aCols ) CLASS TSqlExecutor ENDIF ENDIF xDefault := NIL - IF Len( aFuncArgs ) >= 3 .AND. aFuncArgs[ 3 ][ 1 ] == ND_LIT - xDefault := aFuncArgs[ 3 ][ 2 ] + IF Len( aFuncArgs ) >= 3 + /* Default arg can be a literal (`-1`, `'end'`) but the + * lexer parses `-1` as ND_UNI(-, ND_LIT(1)), not a bare + * ND_LIT — so a flat type-check would silently drop the + * default. Run the value through SqlEvalRowExpr against + * an empty row so any constant expression (including + * unary minus and CAST) collapses to its scalar form. */ + xDefault := SqlEvalRowExpr( aFuncArgs[ 3 ], {}, {} ) ENDIF FOR k := 1 TO Len( aPartIdx ) IF nColIdx <= Len( aRows[ aPartIdx[ k ] ] ) @@ -3314,8 +4918,14 @@ METHOD ApplyWindowFunctions( aRows, aFN, aCols ) CLASS TSqlExecutor ENDIF ENDIF xDefault := NIL - IF Len( aFuncArgs ) >= 3 .AND. aFuncArgs[ 3 ][ 1 ] == ND_LIT - xDefault := aFuncArgs[ 3 ][ 2 ] + IF Len( aFuncArgs ) >= 3 + /* Default arg can be a literal (`-1`, `'end'`) but the + * lexer parses `-1` as ND_UNI(-, ND_LIT(1)), not a bare + * ND_LIT — so a flat type-check would silently drop the + * default. Run the value through SqlEvalRowExpr against + * an empty row so any constant expression (including + * unary minus and CAST) collapses to its scalar form. */ + xDefault := SqlEvalRowExpr( aFuncArgs[ 3 ], {}, {} ) ENDIF FOR k := 1 TO Len( aPartIdx ) IF nColIdx <= Len( aRows[ aPartIdx[ k ] ] ) @@ -3362,7 +4972,21 @@ METHOD ApplyWindowFunctions( aRows, aFN, aCols ) CLASS TSqlExecutor ENDIF ENDIF IF lDefaultFrame - /* O(N) incremental path — original fast code */ + /* O(N) incremental path — accumulate, then write. + * + * Per SQL:2003 the default frame is + * RANGE UNBOUNDED PRECEDING (running) — when ORDER BY is present + * whole-partition — when ORDER BY is absent + * + * lWholePartition captures the ORDER-BY-absent case: we + * still make one pass but write the final aggregate to + * every row in the partition instead of the running + * value at each position. Previously the running value + * was always written, so `AVG(sal) OVER ()` returned a + * cumulative average (wrong per spec, matched Oracle's + * buggy output before 12c but diverged from Postgres / + * SQL Server / the SQL standard). */ + lWholePartition := ( Len( aSortSpec ) == 0 ) nRunSum := 0 nRunCount := 0 xMin := NIL @@ -3383,7 +5007,7 @@ METHOD ApplyWindowFunctions( aRows, aFN, aCols ) CLASS TSqlExecutor ENDIF ENDIF ENDIF - IF nColIdx <= Len( aRows[ aPartIdx[ k ] ] ) + IF ! lWholePartition .AND. nColIdx <= Len( aRows[ aPartIdx[ k ] ] ) DO CASE CASE cFunc == "SUM" aRows[ aPartIdx[ k ] ][ nColIdx ] := nRunSum @@ -3398,7 +5022,48 @@ METHOD ApplyWindowFunctions( aRows, aFN, aCols ) CLASS TSqlExecutor ENDCASE ENDIF NEXT + IF lWholePartition + /* Write the final (partition-wide) aggregate to every row. */ + FOR k := 1 TO Len( aPartIdx ) + IF nColIdx <= Len( aRows[ aPartIdx[ k ] ] ) + DO CASE + CASE cFunc == "SUM" + aRows[ aPartIdx[ k ] ][ nColIdx ] := nRunSum + CASE cFunc == "AVG" + aRows[ aPartIdx[ k ] ][ nColIdx ] := ; + iif( nRunCount > 0, nRunSum / nRunCount, NIL ) + CASE cFunc == "COUNT" + aRows[ aPartIdx[ k ] ][ nColIdx ] := nRunCount + CASE cFunc == "MIN" + aRows[ aPartIdx[ k ] ][ nColIdx ] := xMin + CASE cFunc == "MAX" + aRows[ aPartIdx[ k ] ][ nColIdx ] := xMax + ENDCASE + ENDIF + NEXT + ENDIF ELSE + /* General frame: try the O(N) fast path in Go RTL. + * SUM/AVG/COUNT go through a prefix-sum sweep; MIN/MAX + * through a monotonic deque. The RTL returns .F. when + * it can't handle the value types (e.g., MIN/MAX over + * string / date columns) so we fall through to the + * O(N·W) loop below — keeps correctness while the + * common numeric case wins. */ + IF cFunc == "SUM" .OR. cFunc == "AVG" .OR. cFunc == "COUNT" .OR. ; + cFunc == "MIN" .OR. cFunc == "MAX" + lBoundsOk := .T. + nLeftOff := SqlFrameOffsetEncode( hFrame[ "start" ], @lBoundsOk ) + IF lBoundsOk + nRightOff := SqlFrameOffsetEncode( hFrame[ "end" ], @lBoundsOk ) + ENDIF + IF lBoundsOk + IF SqlWindowSlideAgg( aRows, aPartIdx, nArgCol, nColIdx, ; + cFunc, nLeftOff, nRightOff ) + LOOP /* done with this partition */ + ENDIF + ENDIF + ENDIF /* General frame path — O(N*W) where W = frame width */ FOR k := 1 TO Len( aPartIdx ) nFS := 1 @@ -3504,6 +5169,8 @@ METHOD RunMerge() CLASS TSqlExecutor LOCAL lHasMatched, lHasNotMatched LOCAL nSrcWA, nTgtWA, nSaved, nAffected LOCAL lMatched, i, nFPos, xVal + LOCAL xMatchCond, xNotMatchCond, lDelete + LOCAL lValid, cInsFld cTarget := ::hQuery[ "target" ] cSource := ::hQuery[ "source" ] @@ -3517,6 +5184,21 @@ METHOD RunMerge() CLASS TSqlExecutor aInsVals := ::hQuery[ "insert_values" ] lHasMatched := ::hQuery[ "has_matched" ] lHasNotMatched := ::hQuery[ "has_not_matched" ] + /* Extended MERGE clauses the parser already captures but the + * executor was ignoring — WHEN MATCHED AND / WHEN NOT MATCHED + * AND filter conditions, plus WHEN MATCHED THEN DELETE. */ + xMatchCond := NIL + xNotMatchCond := NIL + lDelete := .F. + IF hb_HHasKey( ::hQuery, "match_condition" ) + xMatchCond := ::hQuery[ "match_condition" ] + ENDIF + IF hb_HHasKey( ::hQuery, "not_match_condition" ) + xNotMatchCond := ::hQuery[ "not_match_condition" ] + ENDIF + IF hb_HHasKey( ::hQuery, "matched_delete" ) + lDelete := ::hQuery[ "matched_delete" ] + ENDIF nAffected := 0 ::aTables := { { cTarget, cTarget, "" }, { cSource, iif( Empty( cSrcAlias ), cSource, cSrcAlias ), "" } } @@ -3563,38 +5245,79 @@ METHOD RunMerge() CLASS TSqlExecutor ENDDO IF lMatched .AND. lHasMatched - /* UPDATE matched row */ + /* Apply optional WHEN MATCHED AND . ON match already + * passed; the AND filter further narrows which matched + * rows get updated / deleted. */ dbSelectArea( nTgtWA ) - IF dbRLock( RecNo() ) - FOR i := 1 TO Len( aUpdSet ) - nFPos := FieldPos( aUpdSet[ i ][ 1 ] ) - IF nFPos > 0 - xVal := ::EvalExpr( aUpdSet[ i ][ 2 ] ) - FieldPut( nFPos, xVal ) + IF xMatchCond == NIL .OR. SqlIsTrue( ::EvalExpr( xMatchCond ) ) + IF dbRLock( RecNo() ) + IF lDelete + /* WHEN MATCHED THEN DELETE — mark the row; cleanup + * and FK-cascade happen at dbCommit time. */ + dbDelete() + ELSE + FOR i := 1 TO Len( aUpdSet ) + nFPos := FieldPos( aUpdSet[ i ][ 1 ] ) + IF nFPos > 0 + xVal := ::EvalExpr( aUpdSet[ i ][ 2 ] ) + FieldPut( nFPos, xVal ) + ENDIF + NEXT ENDIF - NEXT - dbRUnlock( RecNo() ) - nAffected++ + dbRUnlock( RecNo() ) + nAffected++ + ENDIF ENDIF ELSEIF ! lMatched .AND. lHasNotMatched - /* INSERT new row */ - dbSelectArea( nTgtWA ) - dbAppend() - IF Len( aInsFlds ) > 0 - FOR i := 1 TO Min( Len( aInsFlds ), Len( aInsVals ) ) - nFPos := FieldPos( aInsFlds[ i ] ) - IF nFPos > 0 - xVal := ::EvalExpr( aInsVals[ i ] ) - FieldPut( nFPos, xVal ) - ENDIF - NEXT + /* WHEN NOT MATCHED [AND ] THEN INSERT */ + IF xNotMatchCond != NIL .AND. ; + ! SqlIsTrue( ::EvalExpr( xNotMatchCond ) ) + /* condition false — skip this source row */ ELSE - FOR i := 1 TO Min( FCount(), Len( aInsVals ) ) - xVal := ::EvalExpr( aInsVals[ i ] ) - FieldPut( i, xVal ) - NEXT + dbSelectArea( nTgtWA ) + dbAppend() + IF Len( aInsFlds ) > 0 + FOR i := 1 TO Min( Len( aInsFlds ), Len( aInsVals ) ) + nFPos := FieldPos( aInsFlds[ i ] ) + IF nFPos > 0 + xVal := ::EvalExpr( aInsVals[ i ] ) + FieldPut( nFPos, xVal ) + ENDIF + NEXT + ELSE + FOR i := 1 TO Min( FCount(), Len( aInsVals ) ) + xVal := ::EvalExpr( aInsVals[ i ] ) + FieldPut( i, xVal ) + NEXT + ENDIF + /* Enforce UNIQUE on inserted row — mirrors RunInsert. + * Without this a MERGE could quietly produce a duplicate + * that regular INSERT would have rejected. */ + lValid := .T. + IF Len( aInsFlds ) > 0 + FOR i := 1 TO Len( aInsFlds ) + cInsFld := aInsFlds[ i ] + nFPos := FieldPos( cInsFld ) + IF nFPos > 0 .AND. ; + ! SqlValidateUnique( cTarget, cInsFld, FieldGet( nFPos ), RecNo() ) + lValid := .F. + EXIT + ENDIF + NEXT + ELSE + FOR i := 1 TO FCount() + IF ! SqlValidateUnique( cTarget, FieldName( i ), FieldGet( i ), RecNo() ) + lValid := .F. + EXIT + ENDIF + NEXT + ENDIF + IF ! lValid + dbDelete() + ELSE + nAffected++ + ENDIF ENDIF - nAffected++ ENDIF dbSelectArea( nSrcWA ) @@ -3616,6 +5339,60 @@ RETURN { { "affected_rows" }, { { nAffected } } } * "2 FOLLOWING", "UNBOUNDED FOLLOWING". * nCurr = 1-based position of the current row within the partition. * nPartLen = total rows in the partition. */ +/* Sentinels shared with the Go SqlWindowSlideAgg RTL. Keep the + * values in sync — they must match the constants in hbrtl/sqlscan.go + * (frameUnboundedPreceding / frameUnboundedFollowing). */ +#define FRAME_UNBOUNDED_PRECEDING -1073741824 +#define FRAME_UNBOUNDED_FOLLOWING 1073741824 + +/* SqlFrameOffsetEncode parses a SQL frame bound string into a + * relative-offset integer suitable for SqlWindowSlideAgg. Sets lOk + * to .F. when the bound can't be encoded (empty, non-numeric N, + * unknown form) so the caller falls through to the O(N*W) loop. + * Supports: "UNBOUNDED PRECEDING", "UNBOUNDED FOLLOWING", + * "CURRENT ROW", "N PRECEDING", "N FOLLOWING". */ +FUNCTION SqlFrameOffsetEncode( cBound, lOk ) + + LOCAL nV + + lOk := .T. + IF cBound == NIL .OR. Empty( cBound ) + /* Missing bound — treat as CURRENT ROW (same as SqlFrameOffset). */ + RETURN 0 + ENDIF + + IF "UNBOUNDED PRECEDING" $ cBound + RETURN FRAME_UNBOUNDED_PRECEDING + ENDIF + IF "UNBOUNDED FOLLOWING" $ cBound + RETURN FRAME_UNBOUNDED_FOLLOWING + ENDIF + IF "CURRENT ROW" $ cBound + RETURN 0 + ENDIF + IF "PRECEDING" $ cBound + nV := Val( cBound ) + /* Val() returns 0 on parse failure — reject so we fall back + * rather than silently treating "? PRECEDING" as current row. */ + IF nV <= 0 + lOk := .F. + RETURN 0 + ENDIF + RETURN -nV + ENDIF + IF "FOLLOWING" $ cBound + nV := Val( cBound ) + IF nV <= 0 + lOk := .F. + RETURN 0 + ENDIF + RETURN nV + ENDIF + + lOk := .F. +RETURN 0 + + FUNCTION SqlFrameOffset( cBound, nCurr, nPartLen ) LOCAL nV @@ -4060,11 +5837,25 @@ METHOD TryGoJoin( aJoins, aResultExprs, nOuterWA ) CLASS TSqlExecutor LOCAL i, xE, xOnCond, cInnerAlias, cInnerField, cOuterField LOCAL nInnerWA, nInnerFPos, nOuterFPos, nWA LOCAL aJoinSpecs := {}, aSelectFields := {} - LOCAL cRef, nDot, cAlias, cField + LOCAL cRef, nDot, cAlias, cField, cJoinType LOCAL aGoRows /* Build join specs: { nInnerWA, nInnerKeyField, nOuterKeyField } */ FOR i := 1 TO Len( aJoins ) + /* The Go SqlHashJoin RTL is an INNER-join implementation — + * it emits one row per matching outer/inner pair and has no + * null-fill path. OUTER joins (LEFT / RIGHT / FULL) must fall + * back to PRG JoinRecurse so unmatched outer rows still + * appear with NIL inner columns. Before this gate a LEFT JOIN + * silently dropped every outer row without a match. */ + cJoinType := "" + IF Len( aJoins[ i ] ) >= 1 .AND. ValType( aJoins[ i ][ 1 ] ) == "C" + cJoinType := Upper( aJoins[ i ][ 1 ] ) + ENDIF + IF cJoinType == "LEFT" .OR. cJoinType == "RIGHT" .OR. cJoinType == "FULL" + RETURN NIL + ENDIF + xOnCond := aJoins[ i ][ 4 ] /* Only support simple equi-join */ IF xOnCond == NIL .OR. xOnCond[ 1 ] != ND_BIN .OR. xOnCond[ 2 ] != "=" @@ -4225,7 +6016,8 @@ RETURN xResult METHOD SqlExprToPrg( xNode ) CLASS TSqlExecutor LOCAL cOp, cL, cR - LOCAL cRef, nDot, cField, nFPos, cFType, cLit + LOCAL cRef, nDot, cField, nFPos, cFType, cLit, cAliasU + LOCAL lLocalAlias, ii, cA IF xNode == NIL RETURN NIL @@ -4234,7 +6026,14 @@ METHOD SqlExprToPrg( xNode ) CLASS TSqlExecutor DO CASE CASE xNode[ 1 ] == ND_LIT IF ValType( xNode[ 2 ] ) == "N" - RETURN AllTrim( Str( xNode[ 2 ] ) ) + /* Use hb_NToS — preserves all decimal digits. + * Str(0.1) returns " 0" (default 0 decimals when + * the type is numeric), which AllTrim collapsed to "0"; + * the pcode then ran `WHERE v = 0` for `WHERE v = 0.1` and + * silently returned no rows. Same class of bug for any + * fractional literal (0.5, 1.25, 3.14) and for negative / + * large values that Str's default width clips. */ + RETURN hb_NToS( xNode[ 2 ] ) ENDIF IF ValType( xNode[ 2 ] ) == "L" IF xNode[ 2 ] @@ -4261,6 +6060,33 @@ METHOD SqlExprToPrg( xNode ) CLASS TSqlExecutor ENDIF nDot := At( ".", cRef ) IF nDot > 0 + /* Qualified reference — only compile if the alias prefix + * matches one of THIS executor's own tables. FindWA would + * otherwise return an outer-scope workarea with the same + * alias (Select() is case-insensitive across all open + * areas), causing pcode to bind `outer.col` to the inner + * workarea's same-named field. That silently collapses + * correlated predicates like `WHERE dept = e.dept` into + * `WHERE dept = dept` (always true). Returning NIL routes + * the caller to PRG EvalExpr, which handles outer lookup + * through Resolve / ResolveFromOuter / the outer stack. */ + cAliasU := Upper( Left( cRef, nDot - 1 ) ) + lLocalAlias := .F. + FOR ii := 1 TO Len( ::aTables ) + cA := Upper( ::aTables[ ii ][ 2 ] ) + IF Empty( cA ) + cA := Upper( ::aTables[ ii ][ 1 ] ) + ENDIF + IF cA == cAliasU .OR. Upper( ::aTables[ ii ][ 1 ] ) == cAliasU .OR. ; + ( Len( ::aTables[ ii ] ) >= 3 .AND. ; + Upper( ::aTables[ ii ][ 3 ] ) == cAliasU ) + lLocalAlias := .T. + EXIT + ENDIF + NEXT + IF ! lLocalAlias + RETURN NIL + ENDIF cField := Upper( SubStr( cRef, nDot + 1 ) ) ELSE cField := Upper( cRef ) @@ -4304,18 +6130,27 @@ METHOD SqlExprToPrg( xNode ) CLASS TSqlExecutor RETURN NIL ENDIF DO CASE + /* Use SqlCmpEq / SqlCmpLt — not Harbour's `==` / `<` — so the + * fast path matches the PRG path: case-insensitive string + * compare after AllTrim, Date↔String coercion via DToS form, + * Numeric↔String leading-digits parse. iif-gate drops rows + * where either side is NIL to enforce SQL three-valued + * logic ("NULL cmp anything → not matched"). Without these + * wrappers `WHERE hired = '20240115'` silently missed rows + * and `WHERE v <> 10` leaked NULL rows — both regressions + * from the bare `==` / `!=` emission. */ CASE cOp == "=" .OR. cOp == "==" - RETURN "(" + cL + ")==(" + cR + ")" + RETURN "iif((" + cL + ")==NIL .OR. (" + cR + ")==NIL, .F., SqlCmpEq((" + cL + "),(" + cR + ")))" CASE cOp == "<>" .OR. cOp == "!=" - RETURN "(" + cL + ")!=(" + cR + ")" + RETURN "iif((" + cL + ")==NIL .OR. (" + cR + ")==NIL, .F., !SqlCmpEq((" + cL + "),(" + cR + ")))" CASE cOp == "<" - RETURN "(" + cL + ")<(" + cR + ")" + RETURN "iif((" + cL + ")==NIL .OR. (" + cR + ")==NIL, .F., SqlCmpLt((" + cL + "),(" + cR + ")))" CASE cOp == "<=" - RETURN "(" + cL + ")<=(" + cR + ")" + RETURN "iif((" + cL + ")==NIL .OR. (" + cR + ")==NIL, .F., SqlCmpEq((" + cL + "),(" + cR + ")).OR.SqlCmpLt((" + cL + "),(" + cR + ")))" CASE cOp == ">" - RETURN "(" + cL + ")>(" + cR + ")" + RETURN "iif((" + cL + ")==NIL .OR. (" + cR + ")==NIL, .F., SqlCmpLt((" + cR + "),(" + cL + ")))" CASE cOp == ">=" - RETURN "(" + cL + ")>=(" + cR + ")" + RETURN "iif((" + cL + ")==NIL .OR. (" + cR + ")==NIL, .F., SqlCmpEq((" + cL + "),(" + cR + ")).OR.SqlCmpLt((" + cR + "),(" + cL + ")))" CASE cOp == "AND" RETURN "(" + cL + ").AND.(" + cR + ")" CASE cOp == "OR" @@ -4334,3 +6169,21 @@ METHOD SqlExprToPrg( xNode ) CLASS TSqlExecutor ENDCASE RETURN NIL + + +/* -------------------------------------------------------------- + * Schema version — accessors for the file-scoped s_nSchemaVer + * counter defined at the top of this module. All SQL plan-cache + * keys embed the current version as a prefix; every DDL calls + * SqlBumpSchemaVer() on success so subsequent SELECTs / DML miss + * the cache and re-resolve columns / indexes against the new + * schema. Called from TFiveSQL (plan cache key build) and + * TSqlDDL (invalidation after CREATE/ALTER/DROP). + * -------------------------------------------------------------- */ +FUNCTION SqlSchemaVer() +RETURN s_nSchemaVer + + +FUNCTION SqlBumpSchemaVer() + s_nSchemaVer++ +RETURN s_nSchemaVer diff --git a/_FiveSql2/src/TSqlExpr.prg b/_FiveSql2/src/TSqlExpr.prg index 14adfd0..92afb11 100644 --- a/_FiveSql2/src/TSqlExpr.prg +++ b/_FiveSql2/src/TSqlExpr.prg @@ -47,6 +47,68 @@ RETURN "expr" * to avoid a name collision with the RTL symbol; behavior is * byte-for-byte identical. See docs/RTL-Go-Native-Migration.md. */ + +/* SqlExtractWindow: walk xE, find every ND_WINDOW node, replace it + * in-place with a synthetic ND_COL pointing at a generated alias, + * and append {windowExpr, alias} to aWindows for the caller to + * register as hidden SELECT columns. Used by RunSelect so a wrapped + * window function (`SUM(x) OVER () + 100`) can flow through the + * usual ApplyWindowFunctions path: the inner ND_WINDOW becomes a + * hidden top-level ND_WINDOW column, projection evaluates the outer + * expression as ND_BIN(ND_COL("__win_..."), 100), and the trim at + * RunSelect's tail strips the hidden column back off the result. + * + * Returns the (possibly mutated) xE. cPrefix scopes alias names per + * SELECT column so two wrappers don't collide. */ +FUNCTION SqlExtractWindow( xE, aWindows, cPrefix ) + + LOCAL i, cAlias, xNew + + IF xE == NIL .OR. ValType( xE ) != "A" .OR. Len( xE ) < 1 + RETURN xE + ENDIF + + IF xE[ 1 ] == ND_WINDOW + cAlias := cPrefix + "_" + AllTrim( hb_NToS( Len( aWindows ) + 1 ) ) + "__" + AAdd( aWindows, { AClone( xE ), cAlias } ) + RETURN { ND_COL, cAlias, NIL, NIL, NIL } + ENDIF + + IF xE[ 1 ] == ND_BIN + xE[ 3 ] := SqlExtractWindow( xE[ 3 ], aWindows, cPrefix ) + xE[ 4 ] := SqlExtractWindow( xE[ 4 ], aWindows, cPrefix ) + RETURN xE + ENDIF + + IF xE[ 1 ] == ND_UNI + xE[ 3 ] := SqlExtractWindow( xE[ 3 ], aWindows, cPrefix ) + RETURN xE + ENDIF + + IF xE[ 1 ] == ND_FN + IF ValType( xE[ 3 ] ) == "A" + FOR i := 1 TO Len( xE[ 3 ] ) + xE[ 3 ][ i ] := SqlExtractWindow( xE[ 3 ][ i ], aWindows, cPrefix ) + NEXT + ENDIF + RETURN xE + ENDIF + + IF xE[ 1 ] == ND_CASE + IF ValType( xE[ 2 ] ) == "A" + FOR i := 1 TO Len( xE[ 2 ] ) + xE[ 2 ][ i ][ 1 ] := SqlExtractWindow( xE[ 2 ][ i ][ 1 ], aWindows, cPrefix ) + xE[ 2 ][ i ][ 2 ] := SqlExtractWindow( xE[ 2 ][ i ][ 2 ], aWindows, cPrefix ) + NEXT + ENDIF + IF Len( xE ) >= 3 .AND. xE[ 3 ] != NIL + xE[ 3 ] := SqlExtractWindow( xE[ 3 ], aWindows, cPrefix ) + ENDIF + RETURN xE + ENDIF + +RETURN xE + /* SqlIsAggName is implemented in Go (hbrtl/sqlhelpers.go) — registered * as SQLISAGGNAME. Former PRG body: * RETURN ( "," + c + "," ) $ ( "," + AGG_FUNCTIONS + "," ) @@ -209,9 +271,25 @@ FUNCTION SqlEvalRowExpr( xExpr, aFN, aRow ) IF ValType( xL ) == "N" .AND. ValType( xR ) == "N" RETURN xL + xR ENDIF + /* Date arithmetic: Date + N → Date (N days later). N + Date + * is symmetric. Without these branches Date operands collapse + * to 0 via SqlCoerceNum and the result is just the integer + * offset. Mirrors EvalExpr's same-named fix. */ + IF ValType( xL ) == "D" .AND. ValType( xR ) == "N" + RETURN xL + xR + ENDIF + IF ValType( xL ) == "N" .AND. ValType( xR ) == "D" + RETURN xR + xL + ENDIF RETURN SqlCoerceNum( xL ) + SqlCoerceNum( xR ) ENDIF IF cOp == "-" + IF ValType( xL ) == "D" .AND. ValType( xR ) == "N" + RETURN xL - xR + ENDIF + IF ValType( xL ) == "D" .AND. ValType( xR ) == "D" + RETURN xL - xR + ENDIF RETURN SqlCoerceNum( xL ) - SqlCoerceNum( xR ) ENDIF IF cOp == "*" diff --git a/_FiveSql2/src/TSqlIndex.prg b/_FiveSql2/src/TSqlIndex.prg index cb4fdab..bb37e0d 100644 --- a/_FiveSql2/src/TSqlIndex.prg +++ b/_FiveSql2/src/TSqlIndex.prg @@ -21,6 +21,16 @@ CLASS TSqlIndex * FErase cleanup loop on view-free queries (the common case). */ DATA lViewUsed INIT .F. + /* Back-reference to the calling executor. RunSelect sets this + * before dispatching TryIndexScan / TryIndexJoinScan so the per-row + * inner loops can reuse the caller's TSqlExecutor (and its prebuilt + * fetch/symbol cache, aTables, aParams) instead of constructing a + * throwaway via SqlEvalExprNode/SqlFetchRowArr on every record. + * For 100k-row index scans that's 200k fewer New() allocations. + * NIL means "not wired yet" — helpers must tolerate and fall back + * to the self-contained path. */ + DATA oExec + METHOD New() CONSTRUCTOR METHOD DetectRDD( nWA ) METHOD OpenTable( cTable, cAlias, lShared, lReadOnly ) @@ -32,7 +42,7 @@ CLASS TSqlIndex METHOD FindCompoundTag( nWA, aFields ) METHOD BuildKey( nWA, xValue ) METHOD MatchOrderByTag( nWA, aOrderBy, aFieldNames ) - METHOD TryIndexScan( nWA, xWhere, xFullWhere, aTables, aParams, aRE, aRows ) + METHOD TryIndexScan( nWA, xWhere, xFullWhere, aTables, aParams, aRE, aRows, nLimit ) METHOD TryIndexJoinScan( nWA, xWhere, aTables, aParams, aRE, aRows, aJoins ) METHOD BuildKeyExpr( nWA, cField ) METHOD ExtractStrWidth( cExpr ) @@ -41,6 +51,14 @@ CLASS TSqlIndex METHOD BuildCompoundKey( cExpr, aFields, aValues, nWA ) METHOD CheckView( cTable ) + /* EvalE / FetchE — wrappers that reuse the calling TSqlExecutor + * (::oExec) when wired up by RunSelect, falling back to the + * standalone SqlEvalExprNode / SqlFetchRowArr helpers when run in + * isolation. The fast path saves a full TSqlExecutor():New() per + * row on the hot index-scan loops. */ + METHOD EvalE( xNode, aTables, aParams ) + METHOD FetchE( aRE, aTables, aParams ) + ENDCLASS @@ -48,6 +66,21 @@ METHOD New() CLASS TSqlIndex RETURN SELF +METHOD EvalE( xNode, aTables, aParams ) CLASS TSqlIndex + LOCAL nPI := 1 + IF ::oExec != NIL + RETURN ::oExec:EvalExpr( xNode ) + ENDIF +RETURN SqlEvalExprNode( xNode, aTables, aParams, @nPI ) + + +METHOD FetchE( aRE, aTables, aParams ) CLASS TSqlIndex + IF ::oExec != NIL + RETURN ::oExec:FetchRow( aRE ) + ENDIF +RETURN SqlFetchRowArr( aRE, aTables, aParams ) + + METHOD DetectRDD( nWA ) CLASS TSqlIndex LOCAL nSaved, cRDD @@ -91,9 +124,14 @@ METHOD OpenTable( cTable, cAlias, lShared, lReadOnly ) CLASS TSqlIndex RETURN -1 ENDIF - /* Decide RDD based on available index files */ - aFiles := Directory( cFileLow + "*.cdx" ) - IF Len( aFiles ) > 0 + /* Decide RDD based on available index files. Use exact filenames + * (production CDX = `
.cdx`) — the previous glob + * `*.cdx` matched every sibling table whose name + * started with cFileLow (e.g. `c*.cdx` would happily pick up + * `cus.cdx` or `customer.cdx`) and forced DBFCDX even for tables + * that had no CDX of their own. Same prefix-matching hazard that + * caused the silent multi-row INSERT row drop via AttachNTX. */ + IF File( cFileLow + ".cdx" ) cRDD := "DBFCDX" ELSE cRDD := "DBFNTX" @@ -139,7 +177,7 @@ METHOD FindExclusive( cTableLow ) CLASS TSqlIndex * Fully functional now that hbrtl implements dbInfo(DBI_FULLPATH) * and DBI_SHARED. The DBI_* constants resolve via include/dbinfo.ch. */ - LOCAL nSaved, nArea, cDbfName, lShared + LOCAL nSaved, nArea, cDbfName, lShared, cBase nSaved := Select() @@ -148,7 +186,21 @@ METHOD FindExclusive( cTableLow ) CLASS TSqlIndex dbSelectArea( nArea ) IF ! Empty( Alias() ) cDbfName := Lower( AllTrim( dbInfo( DBI_FULLPATH ) ) ) - IF cTableLow + ".dbf" $ cDbfName .OR. cTableLow $ cDbfName + /* Compare on the basename only, not substring. The previous + * `cTableLow $ cDbfName` matched "c" inside ".../cus.dbf" + * — same prefix-substring hazard that produced the multi-row + * INSERT silent row drop. Strip path separators and compare + * against the exact `
.dbf` / `
` forms so a + * sibling table whose name starts with cTableLow doesn't + * cause a spurious lock conflict. */ + cBase := cDbfName + IF "/" $ cBase + cBase := SubStr( cBase, RAt( "/", cBase ) + 1 ) + ENDIF + IF "\" $ cBase + cBase := SubStr( cBase, RAt( "\", cBase ) + 1 ) + ENDIF + IF cBase == cTableLow + ".dbf" .OR. cBase == cTableLow lShared := dbInfo( DBI_SHARED ) IF lShared == .F. dbSelectArea( nSaved ) @@ -166,18 +218,32 @@ RETURN 0 METHOD AttachNTX( cTableLow, nWA ) CLASS TSqlIndex - LOCAL aFiles, i, cFile, nSaved + LOCAL i, cFile, nSaved + LOCAL aCandidates nSaved := Select() dbSelectArea( nWA ) - aFiles := Directory( cTableLow + "*.ntx" ) - FOR i := 1 TO Len( aFiles ) - cFile := aFiles[ i ][ 1 ] - BEGIN SEQUENCE - dbSetIndex( cFile ) - RECOVER - END SEQUENCE + /* Only attach the convention-named indexes built by CreateTable + * (`
_pk.ntx` and `
_uq.ntx`). The previous glob + * `*.ntx` matched every NTX whose filename started + * with the table name — `c_uq.ntx` searched as `c*.ntx` happily + * picked up `cus_uq.ntx` from a sibling table, attached its + * stale keys to the new workarea, and SkipIndexed then walked + * those orphan keys instead of the natural record order. The + * user-visible symptom was multi-row INSERT silently dropping + * the third+ row from any subsequent SELECT. Use exact filenames + * here; ad-hoc `INDEX ON ... TO custom.ntx` indexes are still + * accessible via explicit `SET INDEX TO custom.ntx`. */ + aCandidates := { cTableLow + "_pk.ntx", cTableLow + "_uq.ntx" } + FOR i := 1 TO Len( aCandidates ) + cFile := aCandidates[ i ] + IF File( cFile ) + BEGIN SEQUENCE + dbSetIndex( cFile ) + RECOVER + END SEQUENCE + ENDIF NEXT dbSelectArea( nSaved ) @@ -187,18 +253,25 @@ RETURN NIL METHOD AttachCDX( cTableLow, nWA ) CLASS TSqlIndex - LOCAL aFiles, i, cFile, nSaved + LOCAL i, cFile, nSaved + LOCAL aCandidates nSaved := Select() dbSelectArea( nWA ) - aFiles := Directory( cTableLow + "*.cdx" ) - FOR i := 1 TO Len( aFiles ) - cFile := aFiles[ i ][ 1 ] - BEGIN SEQUENCE - dbSetIndex( cFile ) - RECOVER - END SEQUENCE + /* Same prefix-glob hazard as AttachNTX: `c*.cdx` would match + * `cus.cdx` etc. CDX is the production-index convention so we + * only look for `
.cdx`. Custom CDX bags still attach via + * explicit `SET INDEX TO`. */ + aCandidates := { cTableLow + ".cdx" } + FOR i := 1 TO Len( aCandidates ) + cFile := aCandidates[ i ] + IF File( cFile ) + BEGIN SEQUENCE + dbSetIndex( cFile ) + RECOVER + END SEQUENCE + ENDIF NEXT dbSelectArea( nSaved ) @@ -340,11 +413,12 @@ RETURN nBestTag METHOD BuildKey( nWA, xValue ) CLASS TSqlIndex - LOCAL cExpr, nSaved, nWidth + LOCAL cExpr, nSaved, nWidth, nKeySize, cKey nSaved := Select() dbSelectArea( nWA ) cExpr := Upper( AllTrim( dbOrderInfo( DBOI_EXPRESSION ) ) ) + nKeySize := dbOrderInfo( DBOI_KEYSIZE ) dbSelectArea( nSaved ) IF "STR(" $ cExpr @@ -352,13 +426,21 @@ METHOD BuildKey( nWA, xValue ) CLASS TSqlIndex nWidth := ::ExtractStrWidth( cExpr ) RETURN Str( xValue, nWidth ) ELSEIF ValType( xValue ) == "C" - RETURN xValue + /* STR() keys are numeric-width strings — CHAR literals like + * "10" must still be right-padded to the expression's + * declared width so ordScope doesn't bind to a short key. */ + nWidth := ::ExtractStrWidth( cExpr ) + RETURN PadR( xValue, nWidth ) ENDIF ENDIF IF "UPPER(" $ cExpr IF ValType( xValue ) == "C" - RETURN Upper( xValue ) + cKey := Upper( xValue ) + IF ValType( nKeySize ) == "N" .AND. nKeySize > 0 + cKey := PadR( cKey, nKeySize ) + ENDIF + RETURN cKey ENDIF ENDIF @@ -371,40 +453,117 @@ METHOD BuildKey( nWA, xValue ) CLASS TSqlIndex ENDIF IF ValType( xValue ) == "N" + /* For a plain numeric field index the stored key width equals + * the DBF field width (8 for N(8,0), 10 for N(10,0) …). Using + * a hard-coded Str(xValue, 10) produces scope keys that don't + * align with 8-byte index bytes — ordScope then fails to + * constrain the scan and TryIndexScan degrades to a full-table + * walk. Trust DBOI_KEYSIZE when the driver reports it. */ + IF ValType( nKeySize ) == "N" .AND. nKeySize > 0 + RETURN Str( xValue, nKeySize ) + ENDIF RETURN Str( xValue, 10 ) ENDIF + /* Plain CHAR field index (cExpr is just the field name, no STR/ + * UPPER/DTOS wrapper): pad to the stored key width so range + * operators like `WHERE name > 'AB'` compare byte-for-byte + * against the padded keys in the index. Without the pad, "AB" + * binarily precedes every "AB" key, so ordScope's + * inclusive lower bound could still land on the right rows for + * >, but `=` / seek paths that rely on exact key equality would + * miss — and the cost is a single PadR per seek. */ + IF ValType( xValue ) == "C" + IF ValType( nKeySize ) == "N" .AND. nKeySize > 0 .AND. Len( xValue ) < nKeySize + RETURN PadR( xValue, nKeySize ) + ENDIF + RETURN xValue + ENDIF + RETURN xValue METHOD MatchOrderByTag( nWA, aOrderBy, aFieldNames ) CLASS TSqlIndex - LOCAL nSaved, nOrds, i, cExpr, cOrderCol - LOCAL cDir, lTagDesc + LOCAL nSaved, nOrds, i, j, cExpr, cOrderCol + LOCAL cDir, cUniformDir, lTagDesc, nPos, nLastPos + LOCAL lCandidate, aCols, nOBy - IF Len( aOrderBy ) != 1 + nOBy := Len( aOrderBy ) + IF nOBy == 0 + RETURN .F. + ENDIF + // TEMP A/B: force old single-col behavior + IF nOBy != 1 RETURN .F. ENDIF - cOrderCol := Upper( SqlExprName( aOrderBy[ 1 ][ 1 ] ) ) - cDir := aOrderBy[ 1 ][ 2 ] + /* A single index tag encodes one direction (all ASC or all DESC). + * Multi-column ORDER BY can only be satisfied by a tag when every + * column has the SAME direction — mixed "a ASC, b DESC" needs an + * in-memory sort regardless. Determine the uniform direction first + * so the per-tag loop can reject direction mismatches cheaply. */ + cUniformDir := aOrderBy[ 1 ][ 2 ] + FOR i := 2 TO nOBy + IF aOrderBy[ i ][ 2 ] != cUniformDir + RETURN .F. + ENDIF + NEXT + + /* Cache uppercase column names once — avoids repeat SqlExprName/ + * Upper work inside the tag-iteration loop. */ + aCols := Array( nOBy ) + FOR i := 1 TO nOBy + aCols[ i ] := Upper( SqlExprName( aOrderBy[ i ][ 1 ] ) ) + NEXT nSaved := Select() dbSelectArea( nWA ) - nOrds := ordCount() FOR i := 1 TO nOrds ordSetFocus( i ) cExpr := Upper( AllTrim( dbOrderInfo( DBOI_EXPRESSION ) ) ) - IF cOrderCol $ cExpr - lTagDesc := dbOrderInfo( DBOI_ISDESC ) - IF ( cDir == "ASC" .AND. ! lTagDesc ) .OR. ; - ( cDir == "DESC" .AND. lTagDesc ) - dbSelectArea( nSaved ) - RETURN .T. + /* Direction check first — cheap bail-out when tag polarity + * doesn't match the uniform ORDER BY direction. dbOrderInfo + * can return NIL on freshly-built indexes that haven't loaded + * a DBOI_ISDESC value yet; treat NIL as ASC so `! lTagDesc` + * doesn't panic with `argument error (op: .NOT.)`. */ + lTagDesc := dbOrderInfo( DBOI_ISDESC ) + IF ValType( lTagDesc ) != "L" + lTagDesc := .F. + ENDIF + IF ( cUniformDir == "ASC" .AND. lTagDesc ) .OR. ; + ( cUniformDir == "DESC" .AND. ! lTagDesc ) + LOOP + ENDIF + + /* Each ORDER BY column must appear in the key expression AT a + * higher position than the previous column — ensures the tag's + * concatenation order matches the requested sort order. + * + * Caveat (inherited from the single-column path): substring + * match — "ID" inside "ORDER_ID" reports as a hit. Callers that + * mix identifiers with shared suffixes should name their tags + * carefully. Good enough for the common DEPT+ID / UPPER(NAME) + * style keys; tightening to word-boundary detection is a + * separate refinement. */ + lCandidate := .T. + nLastPos := 0 + FOR j := 1 TO nOBy + cOrderCol := aCols[ j ] + nPos := At( cOrderCol, cExpr ) + IF nPos == 0 .OR. nPos <= nLastPos + lCandidate := .F. + EXIT ENDIF + nLastPos := nPos + NEXT + + IF lCandidate + dbSelectArea( nSaved ) + RETURN .T. ENDIF NEXT @@ -414,13 +573,21 @@ METHOD MatchOrderByTag( nWA, aOrderBy, aFieldNames ) CLASS TSqlIndex RETURN .F. -METHOD TryIndexScan( nWA, xWhere, xFullWhere, aTables, aParams, aRE, aRows ) CLASS TSqlIndex +METHOD TryIndexScan( nWA, xWhere, xFullWhere, aTables, aParams, aRE, aRows, nLimit ) CLASS TSqlIndex LOCAL cField, xValue LOCAL nTag, xSeekKey, lFound, nPI, aRow LOCAL xLow, xHigh LOCAL nSaved + /* nLimit is an optional early-termination cap provided by RunSelect + * when it has already verified that the index-order scan here will + * produce rows in the requested ORDER BY order (or there is no + * ORDER BY). Zero / NIL means "no cap". */ + IF nLimit == NIL + nLimit := 0 + ENDIF + nSaved := Select() dbSelectArea( nWA ) @@ -441,10 +608,12 @@ METHOD TryIndexScan( nWA, xWhere, xFullWhere, aTables, aParams, aRE, aRows ) CLA lFound := dbSeek( xSeekKey, .T. ) WHILE lFound .AND. ! Eof() - nPI := 1 - IF SqlIsTrue( SqlEvalExprNode( xFullWhere, aTables, aParams, @nPI ) ) - aRow := SqlFetchRowArr( aRE, aTables, aParams ) + IF SqlIsTrue( ::EvalE( xFullWhere, aTables, aParams ) ) + aRow := ::FetchE( aRE, aTables, aParams ) AAdd( aRows, aRow ) + IF nLimit > 0 .AND. Len( aRows ) >= nLimit + EXIT + ENDIF dbSelectArea( nWA ) dbSkip() ELSE @@ -465,11 +634,11 @@ METHOD TryIndexScan( nWA, xWhere, xFullWhere, aTables, aParams, aRE, aRows ) CLA dbSelectArea( nSaved ) RETURN .T. ENDIF - IF ::TryIndexScan( nWA, xWhere[ 3 ], xFullWhere, aTables, aParams, aRE, @aRows ) + IF ::TryIndexScan( nWA, xWhere[ 3 ], xFullWhere, aTables, aParams, aRE, @aRows, nLimit ) dbSelectArea( nSaved ) RETURN .T. ENDIF - IF ::TryIndexScan( nWA, xWhere[ 4 ], xFullWhere, aTables, aParams, aRE, @aRows ) + IF ::TryIndexScan( nWA, xWhere[ 4 ], xFullWhere, aTables, aParams, aRE, @aRows, nLimit ) dbSelectArea( nSaved ) RETURN .T. ENDIF @@ -495,10 +664,12 @@ METHOD TryIndexScan( nWA, xWhere, xFullWhere, aTables, aParams, aRE, aRows ) CLA dbGoTop() WHILE ! Eof() - nPI := 1 - IF SqlIsTrue( SqlEvalExprNode( xFullWhere, aTables, aParams, @nPI ) ) - aRow := SqlFetchRowArr( aRE, aTables, aParams ) + IF SqlIsTrue( ::EvalE( xFullWhere, aTables, aParams ) ) + aRow := ::FetchE( aRE, aTables, aParams ) AAdd( aRows, aRow ) + IF nLimit > 0 .AND. Len( aRows ) >= nLimit + EXIT + ENDIF ENDIF dbSelectArea( nWA ) dbSkip() @@ -539,10 +710,12 @@ METHOD TryIndexScan( nWA, xWhere, xFullWhere, aTables, aParams, aRE, aRows ) CLA dbGoTop() WHILE ! Eof() - nPI := 1 - IF SqlIsTrue( SqlEvalExprNode( xFullWhere, aTables, aParams, @nPI ) ) - aRow := SqlFetchRowArr( aRE, aTables, aParams ) + IF SqlIsTrue( ::EvalE( xFullWhere, aTables, aParams ) ) + aRow := ::FetchE( aRE, aTables, aParams ) AAdd( aRows, aRow ) + IF nLimit > 0 .AND. Len( aRows ) >= nLimit + EXIT + ENDIF ENDIF dbSelectArea( nWA ) dbSkip() @@ -586,8 +759,7 @@ METHOD TryIndexJoinScan( nWA, xWhere, aTables, aParams, aRE, aRows, aJoins ) CLA lFound := dbSeek( cSeekStr, .T. ) WHILE lFound .AND. ! Eof() - nPI := 1 - IF SqlIsTrue( SqlEvalExprNode( xWhere, aTables, aParams, @nPI ) ) + IF SqlIsTrue( ::EvalE( xWhere, aTables, aParams ) ) SqlJoinRecurse( aJoins, 1, aTables, xWhere, aRE, @aRows, aParams, SELF ) ELSE EXIT @@ -709,9 +881,8 @@ METHOD TryCompoundSeek( nWA, xWhere, xFullWhere, aTables, aParams, aRE, aRows ) lFound := dbSeek( cSeekKey, .T. ) WHILE lFound .AND. ! Eof() - nPI := 1 - IF SqlIsTrue( SqlEvalExprNode( xFullWhere, aTables, aParams, @nPI ) ) - aRow := SqlFetchRowArr( aRE, aTables, aParams ) + IF SqlIsTrue( ::EvalE( xFullWhere, aTables, aParams ) ) + aRow := ::FetchE( aRE, aTables, aParams ) AAdd( aRows, aRow ) dbSelectArea( nWA ) dbSkip() diff --git a/_FiveSql2/src/TSqlLexer.prg b/_FiveSql2/src/TSqlLexer.prg index 2cad302..c7c1881 100644 --- a/_FiveSql2/src/TSqlLexer.prg +++ b/_FiveSql2/src/TSqlLexer.prg @@ -37,7 +37,7 @@ RETURN ::aTokens METHOD Tokenize() CLASS TSqlLexer - LOCAL nPos, ch, cToken + LOCAL nPos, ch, cToken, cLit nPos := 1 ::aTokens := {} @@ -149,6 +149,26 @@ METHOD Tokenize() CLASS TSqlLexer LOOP ENDIF + /* Harbour logical literals inside SQL text: `.T.` / `.F.` / + * `.Y.` / `.N.`. INSERT statements in Harbour hosts frequently + * use these rather than the SQL `TRUE` / `FALSE` keywords, + * especially when the source value is inlined from a + * build-time constant. Converted to TK_NAME("TRUE"/"FALSE") + * so the parser's primary handles them alongside SQL + * keywords without a new token kind. Must be tested *before* + * the bare `.` → TK_DOT punctuation case. */ + IF ch == "." .AND. nPos + 2 <= ::nLen .AND. ; + SubStr( ::cInput, nPos + 2, 1 ) == "." + cLit := Upper( SubStr( ::cInput, nPos + 1, 1 ) ) + IF cLit == "T" .OR. cLit == "Y" + AAdd( ::aTokens, { TK_NAME, "TRUE" } ) ; nPos += 3 + LOOP + ELSEIF cLit == "F" .OR. cLit == "N" + AAdd( ::aTokens, { TK_NAME, "FALSE" } ) ; nPos += 3 + LOOP + ENDIF + ENDIF + /* Punctuation and operators */ DO CASE CASE ch == "," diff --git a/_FiveSql2/src/TSqlParser2.prg b/_FiveSql2/src/TSqlParser2.prg index d7153dc..3cb202d 100644 --- a/_FiveSql2/src/TSqlParser2.prg +++ b/_FiveSql2/src/TSqlParser2.prg @@ -297,9 +297,28 @@ METHOD Parse() CLASS TSqlParser2 EXIT ENDIF ENDDO - /* Parse the main SELECT statement after WITH */ - ::EatKW( "SELECT" ) - h := ::ParseSelect() + /* SQL:2003 allows WITH ... . + * Older code only accepted SELECT — UPDATE/DELETE that + * referenced a CTE silently mis-parsed and surfaced as + * "status: TG" (the table name leaking out as the result + * envelope's [1][1]). Dispatch on the trailing keyword and + * stash aCTE on whatever DML hash we get back. */ + DO CASE + CASE ::IsKW( ::nPos, "SELECT" ) + ::nPos++ + h := ::ParseSelect() + CASE ::IsKW( ::nPos, "INSERT" ) + ::nPos++ + h := ::ParseInsert() + CASE ::IsKW( ::nPos, "UPDATE" ) + ::nPos++ + h := ::ParseUpdate() + CASE ::IsKW( ::nPos, "DELETE" ) + ::nPos++ + h := ::ParseDelete() + OTHERWISE + RETURN NIL + ENDCASE IF h != NIL h[ "cte" ] := aCTE h[ "cte_recursive" ] := lRecursive @@ -479,7 +498,7 @@ METHOD ParseSelect() CLASS TSqlParser2 LOCAL nTop := 0, lDistinct := .F. LOCAL aCols, aTables := {}, aJoins := {} LOCAL xWhere := NIL, aGroupBy := {}, xHaving := NIL, aOrderBy := {} - LOCAL nLimit := 0, nOffset := 0 + LOCAL nLimit := NIL, nOffset := 0 LOCAL hUnion := NIL LOCAL lAll LOCAL aWindowDefs, cWinName, hWinDef @@ -921,12 +940,30 @@ METHOD ParseFrom( aTables, aJoins ) CLASS TSqlParser2 ::nPos++ ENDIF - cTable := ::TVal( ::nPos ) ; ::nPos++ - cAlias := "" - IF ::TType( ::nPos ) == TK_NAME .AND. ! ::IsFromKW( ::TVal( ::nPos ) ) - cAlias := ::TVal( ::nPos ) ; ::nPos++ + /* Derived table on the right side of a JOIN: `JOIN (SELECT...) AS x ON ...` */ + IF ::TType( ::nPos ) == TK_LPAR .AND. ::IsKW( ::nPos + 1, "SELECT" ) + xSubQ := ::ParseSubquery() + cAlias := "" + IF ::IsKW( ::nPos, "AS" ) + ::nPos++ + ENDIF + IF ::TType( ::nPos ) == TK_NAME .AND. ! ::IsFromKW( ::TVal( ::nPos ) ) + cAlias := ::TVal( ::nPos ) + ::nPos++ + ENDIF + IF Empty( cAlias ) + cAlias := "__DRV" + hb_ntos( Len( aTables ) + 1 ) + ENDIF + cTable := iif( lLateral, "__LATERAL__", "__SUBQUERY__" ) + AAdd( aTables, { cTable, cAlias, xSubQ } ) + ELSE + cTable := ::TVal( ::nPos ) ; ::nPos++ + cAlias := "" + IF ::TType( ::nPos ) == TK_NAME .AND. ! ::IsFromKW( ::TVal( ::nPos ) ) + cAlias := ::TVal( ::nPos ) ; ::nPos++ + ENDIF + AAdd( aTables, { iif( lLateral, "__LATERAL_" + cTable, cTable ), cAlias, "" } ) ENDIF - AAdd( aTables, { iif( lLateral, "__LATERAL_" + cTable, cTable ), cAlias, "" } ) xOnCond := NIL IF ::IsKW( ::nPos, "ON" ) @@ -989,7 +1026,7 @@ RETURN aOrder /* Parse INSERT INTO */ METHOD ParseInsert() CLASS TSqlParser2 - LOCAL h := { => }, cTable, aFields := {}, aValues := {}, xE + LOCAL h := { => }, cTable, aFields := {}, aRows := {}, aTuple, xE h[ "type" ] := "INSERT" ::EatKW( "INTO" ) @@ -1013,14 +1050,21 @@ METHOD ParseInsert() CLASS TSqlParser2 ENDIF h[ "fields" ] := aFields - /* VALUES clause */ + /* VALUES clause — SQL:2003 allows multiple row constructors: + * VALUES (a, b, c), (d, e, f), ... + * Each (...) tuple yields one INSERT. Older code produced a flat + * expression list which limited us to the first tuple — second + * and later tuples' values ended up as residual tokens and were + * silently dropped. h["rows"] is always an array of tuples; + * single-row INSERT produces a one-element outer array. */ IF ::IsKW( ::nPos, "VALUES" ) ::nPos++ - IF ::TType( ::nPos ) == TK_LPAR + DO WHILE ::TType( ::nPos ) == TK_LPAR ::nPos++ + aTuple := {} DO WHILE ::TType( ::nPos ) != TK_RPAR .AND. ::TType( ::nPos ) != TK_END xE := ::ParseExpr() - AAdd( aValues, xE ) + AAdd( aTuple, xE ) IF ::TType( ::nPos ) == TK_COMMA ::nPos++ ENDIF @@ -1028,9 +1072,22 @@ METHOD ParseInsert() CLASS TSqlParser2 IF ::TType( ::nPos ) == TK_RPAR ::nPos++ ENDIF - ENDIF + AAdd( aRows, aTuple ) + IF ::TType( ::nPos ) == TK_COMMA + ::nPos++ + ELSE + EXIT + ENDIF + ENDDO + ELSEIF ::IsKW( ::nPos, "SELECT" ) + /* INSERT INTO t [(cols)] SELECT ... — capture the subquery plan + * so RunInsert can materialize it as the driving tuple list. + * ParseSelect expects the position to be at the first token + * AFTER `SELECT`, so consume the keyword here. */ + ::EatKW( "SELECT" ) + h[ "select" ] := ::ParseSelect() ENDIF - h[ "values" ] := aValues + h[ "rows" ] := aRows RETURN h @@ -1365,7 +1422,9 @@ RETURN ::ParsePrimary() /* Parse primary expressions */ METHOD ParsePrimary() CLASS TSqlParser2 - LOCAL cVal, cName, xE, aArgs, aCases, xElse, xCond, xThen + LOCAL cVal, cName, xE, aArgs, aCases, xElse, xCond, xThen, xTest + LOCAL lDistinct + LOCAL xDate, cNorm LOCAL cPart, cTrimSpec, xTrimChar, xFrom LOCAL aColDefs, cColName, cColPath, aOrdItems, cDir, xExpr @@ -1375,6 +1434,45 @@ METHOD ParsePrimary() CLASS TSqlParser2 RETURN SqlNode( ND_NIL, NIL, NIL, NIL, NIL ) ENDIF + /* Logical literals — SQL's TRUE/FALSE and the Harbour `.T.`/`.F.` + * forms that the lexer rewrites to the same tokens. Without this + * path INSERTing a bool value into an L column silently stored + * NIL (lexer emitted an unknown keyword, parser fell through to + * the identifier case and then Resolve() returned NIL because + * no field / alias was named `TRUE`). */ + IF ::IsKW( ::nPos, "TRUE" ) + ::nPos++ + RETURN SqlNode( ND_LIT, .T., NIL, NIL, NIL ) + ENDIF + IF ::IsKW( ::nPos, "FALSE" ) + ::nPos++ + RETURN SqlNode( ND_LIT, .F., NIL, NIL, NIL ) + ENDIF + + /* DATE 'YYYY-MM-DD' or DATE 'YYYYMMDD' literal — SQL standard + * explicit-type literal. Rebuilds a date value at parse time + * (via CToD with ISO pre-pass) so downstream evaluation sees a + * real Date, not a string needing late coercion. CToD silently + * rolls invalid dates (`2025-02-29` → 2025-03-01) per xBase + * convention; verify the round-trip and emit NIL for invalid + * literals so callers see a clean NULL instead of a corrupt + * neighbor-day. */ + IF ::IsKW( ::nPos, "DATE" ) .AND. ::TType( ::nPos + 1 ) == TK_TEXT + ::nPos++ + cVal := ::TVal( ::nPos ) + ::nPos++ + xDate := CToD( cVal ) + IF ValType( xDate ) == "D" .AND. ! Empty( xDate ) + /* Compare DToS round-trip to the original digits — strip + * separators on the input first (`2025-02-29` → `20250229`). */ + cNorm := StrTran( StrTran( StrTran( cVal, "-", "" ), "/", "" ), ".", "" ) + IF DToS( xDate ) != cNorm + xDate := NIL /* invalid date, surface as NULL */ + ENDIF + ENDIF + RETURN SqlNode( ND_LIT, xDate, NIL, NIL, NIL ) + ENDIF + /* Numeric literal */ IF ::TType( ::nPos ) == TK_NUM cVal := ::TVal( ::nPos ) @@ -1417,14 +1515,28 @@ METHOD ParsePrimary() CLASS TSqlParser2 RETURN SqlNode( ND_FN, "EXISTS", { xE }, NIL, NIL ) ENDIF - /* CASE WHEN ... THEN ... [ELSE ...] END */ + /* CASE: searched form `CASE WHEN cond THEN ... END` and simple + * form `CASE expr WHEN val THEN ... END` — both per SQL standard. + * Simple form was previously parsed as searched (skipping the + * test expression), which left the parser at the wrong token and + * the executor returned a single row of NILs. Simple form is + * desugared into searched: each `WHEN val` becomes ND_BIN(=, + * test_expr_clone, val). */ IF ::IsKW( ::nPos, "CASE" ) ::nPos++ aCases := {} xElse := NIL + xTest := NIL + IF ! ::IsKW( ::nPos, "WHEN" ) + /* Simple form: peek a test expression before the first WHEN. */ + xTest := ::ParseExpr() + ENDIF DO WHILE ::IsKW( ::nPos, "WHEN" ) ::nPos++ xCond := ::ParseExpr() + IF xTest != NIL + xCond := SqlNode( ND_BIN, "=", AClone( xTest ), xCond, NIL ) + ENDIF ::EatKW( "THEN" ) xThen := ::ParseExpr() AAdd( aCases, { xCond, xThen } ) @@ -1841,10 +1953,23 @@ METHOD ParsePrimary() CLASS TSqlParser2 ::nPos++ ENDIF - /* Function call: name( args ) */ + /* Function call: name( [DISTINCT|ALL] args ) */ IF ::TType( ::nPos ) == TK_LPAR ::nPos++ aArgs := {} + lDistinct := .F. + /* SQL aggregate modifier: `COUNT(DISTINCT col)`, `SUM(DISTINCT + * col)`, etc. Without this, the keyword fell through to + * ParseExpr as an identifier and the aggregate computed over + * all values (or returned 0 because the arg resolved to + * nothing). Both DISTINCT and the explicit ALL (the default) + * are accepted; ALL is a no-op. */ + IF ::IsKW( ::nPos, "DISTINCT" ) + ::nPos++ + lDistinct := .T. + ELSEIF ::IsKW( ::nPos, "ALL" ) + ::nPos++ + ENDIF IF ::TType( ::nPos ) == TK_STAR AAdd( aArgs, SqlNode( ND_COL, "*", NIL, NIL, NIL ) ) ::nPos++ @@ -1894,7 +2019,8 @@ METHOD ParsePrimary() CLASS TSqlParser2 IF ::IsKW( ::nPos, "OVER" ) RETURN ::ParseWindow( cName, aArgs ) ENDIF - RETURN SqlNode( ND_FN, cName, aArgs, NIL, NIL ) + /* slot 5 carries the DISTINCT modifier for aggregate dedup. */ + RETURN SqlNode( ND_FN, cName, aArgs, NIL, lDistinct ) ENDIF RETURN SqlNode( ND_COL, cName, NIL, NIL, NIL ) diff --git a/cmd/five/main.go b/cmd/five/main.go index 3483034..08d6163 100644 --- a/cmd/five/main.go +++ b/cmd/five/main.go @@ -21,6 +21,7 @@ import ( "os" "os/exec" "path/filepath" + "strconv" "strings" "time" ) @@ -70,9 +71,41 @@ func main() { genPRG(os.Args[2]) case "debug": if len(os.Args) < 3 { - fatal("usage: five debug ") + fatal("usage: five debug [-b [module:]line ...] [--cli]") } - debugPRG(os.Args[2]) + prg := "" + var breakpoints []string + var watches []string + useCLI := false + for i := 2; i < len(os.Args); i++ { + a := os.Args[i] + switch a { + case "-b", "--break": + if i+1 >= len(os.Args) { + fatal("-b requires an argument: [module:]line") + } + breakpoints = append(breakpoints, os.Args[i+1]) + i++ + case "-w", "--watch": + if i+1 >= len(os.Args) { + fatal("-w requires an argument: ") + } + watches = append(watches, os.Args[i+1]) + i++ + case "--cli": + useCLI = true + default: + if prg == "" { + prg = a + } else { + fatal("unexpected argument: " + a) + } + } + } + if prg == "" { + fatal("usage: five debug [-b [module:]line ...] [-w ...] [--cli]") + } + debugPRGWithOpts(prg, breakpoints, watches, useCLI) case "frb": if len(os.Args) < 3 { fatal("usage: five frb [-o output.frb] [--pcode]") @@ -177,8 +210,8 @@ func buildPRG(prgFile, output string) { fatal("cannot resolve output path: " + err.Error()) } - // go build - cmd := exec.Command(goPath(), "build", "-o", absOutput, ".") + // go build — pgoArgs() adds -pgo=default.pgo when available. + cmd := exec.Command(goPath(), buildArgs(absOutput)...) cmd.Dir = tmpDir cmd.Stdout = os.Stdout cmd.Stderr = os.Stderr @@ -270,7 +303,7 @@ func buildMultiPRG(prgFiles []string, output string) { if err != nil { fatal("cannot resolve output path: " + err.Error()) } - cmd := exec.Command(goPath(), "build", "-o", absOutput, ".") + cmd := exec.Command(goPath(), buildArgs(absOutput)...) cmd.Dir = tmpDir cmd.Stdout = os.Stdout cmd.Stderr = os.Stderr @@ -527,6 +560,31 @@ func walkUpForGoMod(startDir string) string { // goPath is an alias for findGoBin (deduplicated). func goPath() string { return findGoBin() } +// pgoArgs returns ["-pgo="] when the Five project root contains a +// default.pgo file — profile-guided compilation. Empty otherwise, so +// builds proceed without PGO when the profile hasn't been collected. +// The FIVE_NO_PGO env var forces it off (useful when collecting a new +// profile or A/B benchmarking). +func pgoArgs() []string { + if os.Getenv("FIVE_NO_PGO") != "" { + return nil + } + root := findFiveRoot() + p := filepath.Join(root, "default.pgo") + if fi, err := os.Stat(p); err == nil && !fi.IsDir() && fi.Size() > 0 { + return []string{"-pgo=" + p} + } + return nil +} + +// buildArgs composes the full args for a `go build` invocation, +// inserting -pgo when a profile is available. +func buildArgs(output string) []string { + args := []string{"build"} + args = append(args, pgoArgs()...) + return append(args, "-o", output, ".") +} + func writeFile(path, content string) { if err := os.WriteFile(path, []byte(content), 0644); err != nil { fatal("cannot write " + path + ": " + err.Error()) @@ -651,8 +709,33 @@ func buildFRB(prgFile, outputFile string) { fmt.Fprintf(os.Stderr, "FRB: %s (%d bytes)\n", outputFile, len(frbData)) } -// debugPRG compiles PRG with debug info and runs with interactive debugger. -func debugPRG(prgFile string) { +// debugPRG is kept as a thin wrapper for backward-compatibility — no +// pre-launch breakpoints, TUI frontend. +func debugPRG(prgFile string) { debugPRGWithOpts(prgFile, nil, nil, false) } + +// parseBPSpec parses "[module:]line" into (module, line, ok). A bare +// "42" uses defaultMod. Colons inside module (Windows paths) aren't +// supported — use forward slashes. +func parseBPSpec(spec, defaultMod string) (string, int, bool) { + mod := defaultMod + lineStr := spec + if i := strings.LastIndex(spec, ":"); i > 0 { + mod = spec[:i] + lineStr = spec[i+1:] + } + n, err := strconv.Atoi(strings.TrimSpace(lineStr)) + if err != nil || n <= 0 { + return "", 0, false + } + return mod, n, true +} + +// debugPRGWithOpts compiles PRG with debug info and runs with interactive +// debugger. breakpoints is a list of "[module:]line" strings (module +// defaults to the PRG's basename). watches is a list of PRG expressions +// auto-evaluated at each stop. useCLI picks the gdb-style CLI frontend +// instead of the full-screen TUI. +func debugPRGWithOpts(prgFile string, breakpoints, watches []string, useCLI bool) { source, err := os.ReadFile(prgFile) if err != nil { fatal("cannot read file: " + err.Error()) @@ -700,10 +783,38 @@ func debugPRG(prgFile string) { goMod := fmt.Sprintf("module five-generated\n\ngo 1.21.13\n\nrequire five v0.0.0\n\nreplace five => %s\n", fiveRoot) writeFile(filepath.Join(tmpDir, "go.mod"), goMod) - // Add debug setup to main (use %q for safe path escaping) + // Build the debug setup: create Debugger, register any pre-launch + // breakpoints + watches, choose frontend, then run. + callback := "hbrt.TUIDebugger()" + if useCLI { + callback = "hbrt.CLIDebugger()" + } + prgBase := filepath.Base(prgFile) + var setupLines []string + for _, spec := range breakpoints { + mod, line, ok := parseBPSpec(spec, prgBase) + if !ok { + fatal(fmt.Sprintf("invalid -b %q — expected [module:]line", spec)) + } + setupLines = append(setupLines, + fmt.Sprintf("vm.Debugger.AddBreakpoint(%q, %d)", mod, line)) + } + for _, w := range watches { + setupLines = append(setupLines, + fmt.Sprintf("vm.Debugger.Watches = append(vm.Debugger.Watches, %q)", w)) + } + // If any breakpoints were set, start in Continue mode so the program + // runs until it hits one. Otherwise keep step-line (legacy behavior). + startMode := "hbrt.DbgStepLine" + if len(breakpoints) > 0 { + startMode = "hbrt.DbgContinue" + } debugSetup := fmt.Sprintf( - "vm.Debugger = hbrt.NewDebugger()\n\tvm.Debugger.SourceDir = %s\n\tvm.Debugger.Callback = hbrt.TUIDebugger()\n\tvm.Run(\"MAIN\")", - fmt.Sprintf("%q", mustAbs("."))) + "vm.Debugger = hbrt.NewDebugger()\n\tvm.Debugger.SourceDir = %s\n\tvm.Debugger.Mode = %s\n\tvm.Debugger.Callback = %s\n\t%s\n\tvm.Run(\"MAIN\")", + fmt.Sprintf("%q", mustAbs(".")), + startMode, + callback, + strings.Join(setupLines, "\n\t")) goSrc = strings.Replace(goSrc, "vm.Run(\"MAIN\")", debugSetup, 1) // Remove unused fmt import if added // (no longer needed since we don't use fmt.Println in generated code) diff --git a/compiler/gengo/emit_block.go b/compiler/gengo/emit_block.go new file mode 100644 index 0000000..d2aba2c --- /dev/null +++ b/compiler/gengo/emit_block.go @@ -0,0 +1,287 @@ +// Copyright (c) 2026 Charles KWON OhJun (charleskwonohjun@gmail.com) +// All rights reserved. + +// Block, alias, and method-send emission. +// +// Groups three related emitters that all cross the "ordinary local vs +// externally-addressable" boundary: +// +// - emitAliasExpr: workarea aliasing (`ALIAS->field`, `(expr)->(...)`, +// MEMVAR->name), including the save/select/restore dance used when +// an aliased expression switches the current workarea. +// - emitSendExpr: method dispatch (`obj:method()`, `::field`, +// `::super:method()`, Go-object reflect-bridge fallback). +// - emitBlock: code blocks `{|params| body}`, including +// RefCell-based mutable capture of outer locals. +// +// collectFreeVars / walkExprIdents are the shared walker that emitBlock +// uses to decide which outer locals to capture into the block. + +package gengo + +import ( + "five/compiler/ast" + "fmt" + "strings" +) + +func (g *Generator) emitAliasExpr(e *ast.AliasExpr) { + fieldIdent, isFieldIdent := e.Field.(*ast.IdentExpr) + + // Case 1: alias->field (static alias, simple field name) + if ident, ok := e.Alias.(*ast.IdentExpr); ok && isFieldIdent { + upper := strings.ToUpper(ident.Name) + // `M->name` / `MEMVAR->name` access the memvar namespace, not + // a database workarea. Harbour reserves both aliases for this. + if upper == "M" || upper == "MEMVAR" { + g.writeln(fmt.Sprintf(`t.PushMemvar(%q)`, fieldIdent.Name)) + return + } + g.writeln(fmt.Sprintf(`t.PushAliasField(%q, %q)`, ident.Name, fieldIdent.Name)) + return + } + + // Case 2: (expr)->field (dynamic alias, simple field name) + if isFieldIdent { + g.emitExpr(e.Alias) + g.writeln(fmt.Sprintf(`t.PushDynAliasField(t.Pop2().AsString(), %q)`, fieldIdent.Name)) + return + } + + // Case 3: alias->(expr) or (expr)->(expr) — workarea context expression + // Harbour: save current WA, select new WA, evaluate expr, restore WA + // Example: (nArea)->(Used()) → evaluate Used() in workarea nArea + // Example: CUSTOMERS->(RecCount()) → evaluate RecCount() in CUSTOMERS workarea + if ident, ok := e.Alias.(*ast.IdentExpr); ok { + _, isLocal := g.curLocals[strings.ToUpper(ident.Name)] + if isLocal { + // Local variable: emit value (numeric area number) + g.emitExpr(e.Alias) + g.writeln(`t.WASaveAndSelect(int(t.Pop2().AsNumInt()))`) + } else { + // Static alias name: resolve by alias string + g.writeln(fmt.Sprintf(`t.WASaveAndSelectAlias(%q)`, ident.Name)) + } + } else { + // Dynamic: numeric area from expression + g.emitExpr(e.Alias) + g.writeln(`t.WASaveAndSelect(int(t.Pop2().AsNumInt()))`) + } + g.emitExpr(e.Field) + g.writeln(`t.WARestore()`) +} + +func (g *Generator) fieldName(expr ast.Expr) string { + if ident, ok := expr.(*ast.IdentExpr); ok { + return ident.Name + } + return "" +} + +func (g *Generator) emitSendExpr(e *ast.SendExpr) { + // ::super:Method(args) — dispatch to parent class. The parse tree + // is nested: outer SendExpr.Object is itself a SendExpr whose + // Object is ::SELF and Method is "super". Detect that shape and + // route through SendSuper, which keeps Self bound to the child + // instance but looks the method up on Parent. + if sup, ok := e.Object.(*ast.SendExpr); ok { + if _, isSelf := sup.Object.(*ast.SelfExpr); isSelf && + strings.EqualFold(sup.Method, "super") { + for _, arg := range e.Args { + g.emitExpr(arg) + } + // Emit defining-class name so runtime walks the right Parent + // chain — Self's class alone would infinite-loop on 3+ level + // hierarchies (Grand→Child→Base). See SendSuper comment. + g.writeln(fmt.Sprintf("t.SendSuper(%q, %q, %d)", + g.curMethodClass, e.Method, len(e.Args))) + return + } + } + + // Self access: ::field (no parens) → PushSelfField + // Self method: ::method() (has parens) → Send on Self + if _, isSelf := e.Object.(*ast.SelfExpr); isSelf { + if !e.HasParens && len(e.Args) == 0 { + // ::field (getter, no parentheses) + g.writeln(fmt.Sprintf("t.PushSelfField(%q)", strings.ToUpper(e.Method))) + return + } + // ::method() or ::method(args) — method call on Self + g.writeln("t.PushSelf()") + for _, arg := range e.Args { + g.emitExpr(arg) + } + g.writeln(fmt.Sprintf("t.Send(%q, %d)", e.Method, len(e.Args))) + return + } + + // General: obj:method(args) or obj:field + // Check at runtime: if Go object → GoCall, else Harbour Send + g.emitExpr(e.Object) + g.writeln("{") + g.indent++ + g.writeln("_obj := t.Pop2()") + + // Push args and capture them + argNames := make([]string, len(e.Args)) + for i, arg := range e.Args { + argNames[i] = fmt.Sprintf("_sa%d", i) + g.emitExpr(arg) + g.writeln(fmt.Sprintf("%s := t.Pop2()", argNames[i])) + } + + g.writeln("if hbrt.IsGoObject(_obj) {") + g.indent++ + // Go object: use reflect bridge + argsStr := "" + for i, name := range argNames { + if i > 0 { + argsStr += ", " + } + argsStr += name + } + g.writeln(fmt.Sprintf("_gr := hbrt.GoCallCached(_obj, %q, %s)", e.Method, argsStr)) + g.writeln("if len(_gr) > 0 { t.PushValue(_gr[0]) } else { t.PushNil() }") + g.indent-- + g.writeln("} else {") + g.indent++ + // Harbour object: use Send + g.writeln("t.PushValue(_obj)") + for _, name := range argNames { + g.writeln(fmt.Sprintf("t.PushValue(%s)", name)) + } + g.writeln(fmt.Sprintf("t.Send(%q, %d)", e.Method, len(e.Args))) + g.indent-- + g.writeln("}") + g.indent-- + g.writeln("}") +} + +func (g *Generator) emitBlock(e *ast.BlockExpr) { + // Code block: {|params| body} + // Block params are passed via Frame() from Eval/AEval. + nParams := len(e.Params) + + // Collect free variables in the block body that reference outer locals. + // These need to be captured via Go closure variables. + outerLocals := g.curLocals + blockLocals := make(localMap) + for i, p := range e.Params { + blockLocals[strings.ToUpper(p)] = i + 1 + } + + // Find all idents in block body that are in outerLocals but NOT in blockLocals + freeVars := g.collectFreeVars(e.Body, blockLocals, outerLocals) + + // Harbour: closures share outer locals via RefCell (mutable capture). + // Convert each captured outer local to a RefCell, then pass the RefCell + // into the block. Both outer function and block read/write through it. + for _, fv := range freeVars { + outerIdx := outerLocals[fv] + // Ensure outer local is a RefCell (PushLocalRef creates one if needed, + // but we do it inline to avoid stack ops). + g.writeln(fmt.Sprintf("t.EnsureLocalRef(%d) // share %s via RefCell", outerIdx, fv)) + } + + // Capture the RefCell values with unique names to avoid Go scope issues. + capSeq := g.blockSeq + g.blockSeq++ + capNames := make(map[string]string) // fv → Go var name + for _, fv := range freeVars { + outerIdx := outerLocals[fv] + capName := fmt.Sprintf("_cap_%s_%d", fv, capSeq) + g.writeln(fmt.Sprintf("%s := t.LocalRaw(%d) // capture RefCell %s", capName, outerIdx, fv)) + capNames[fv] = capName + } + + g.writeln(fmt.Sprintf("t.PushBlock(func(t *hbrt.Thread) {")) + g.indent++ + nLocals := len(freeVars) + g.writeln(fmt.Sprintf("t.Frame(%d, %d)", nParams, nLocals)) + g.writeln("defer t.EndProc()") + + // Inject RefCell values directly into block locals — reads/writes go through RefCell + for i, fv := range freeVars { + localIdx := nParams + i + 1 + blockLocals[fv] = localIdx + g.writeln(fmt.Sprintf("t.SetLocalRaw(%d, %s) // inject shared RefCell %s", localIdx, capNames[fv], fv)) + } + + g.curLocals = blockLocals + g.emitExpr(e.Body) + g.writeln("t.RetValue()") + + g.curLocals = outerLocals + g.indent-- + g.writeln(fmt.Sprintf("}, %d)", 0)) +} + +// collectFreeVars finds identifier names in expr that exist in outerLocals but not blockLocals. +func (g *Generator) collectFreeVars(expr ast.Expr, blockLocals, outerLocals localMap) []string { + var result []string + seen := map[string]bool{} + g.walkExprIdents(expr, func(name string) { + upper := strings.ToUpper(name) + if seen[upper] { + return + } + if _, inBlock := blockLocals[upper]; inBlock { + return + } + if _, inOuter := outerLocals[upper]; inOuter { + seen[upper] = true + result = append(result, upper) + } + }) + return result +} + +// walkExprIdents calls fn for each IdentExpr in the expression tree. +func (g *Generator) walkExprIdents(expr ast.Expr, fn func(string)) { + if expr == nil { + return + } + switch e := expr.(type) { + case *ast.IdentExpr: + fn(e.Name) + case *ast.BinaryExpr: + g.walkExprIdents(e.Left, fn) + g.walkExprIdents(e.Right, fn) + case *ast.UnaryExpr: + g.walkExprIdents(e.X, fn) + case *ast.PostfixExpr: + g.walkExprIdents(e.X, fn) + case *ast.CallExpr: + g.walkExprIdents(e.Func, fn) + for _, a := range e.Args { + g.walkExprIdents(a, fn) + } + case *ast.IndexExpr: + g.walkExprIdents(e.X, fn) + g.walkExprIdents(e.Index, fn) + case *ast.DotExpr: + g.walkExprIdents(e.X, fn) + case *ast.AssignExpr: + g.walkExprIdents(e.Left, fn) + g.walkExprIdents(e.Right, fn) + case *ast.ArrayLitExpr: + for _, item := range e.Items { + g.walkExprIdents(item, fn) + } + case *ast.IIfExpr: + g.walkExprIdents(e.Cond, fn) + g.walkExprIdents(e.True, fn) + g.walkExprIdents(e.False, fn) + case *ast.SendExpr: + g.walkExprIdents(e.Object, fn) + for _, a := range e.Args { + g.walkExprIdents(a, fn) + } + case *ast.AliasExpr: + g.walkExprIdents(e.Alias, fn) + g.walkExprIdents(e.Field, fn) + case *ast.BlockExpr: + g.walkExprIdents(e.Body, fn) + } +} diff --git a/compiler/gengo/emit_stmt.go b/compiler/gengo/emit_stmt.go new file mode 100644 index 0000000..cde4cca --- /dev/null +++ b/compiler/gengo/emit_stmt.go @@ -0,0 +1,1351 @@ +// Copyright (c) 2026 Charles KWON OhJun (charleskwonohjun@gmail.com) +// All rights reserved. + +// Statement emission for the Go code generator. +// +// Every PRG statement kind ultimately routes through emitStmt; this file +// holds emitStmt and every per-kind emitter it dispatches to: +// +// - emitMidVarDecl / emitQOut / emitExprStmt / emitAssign / +// emitCallAsStmt / emitMultiAssign / emitDefer — non-control-flow +// statements. +// - emitIf / emitDoWhile / emitFor / emitForEach / emitSwitch / +// emitBeginSequence — control flow. +// +// A family of helper predicates (boolLiteralValue, hasRDDCommands, +// hasWorkareaChange, collectSymbols, collectReplaceFields, +// hasAppendInBody, bodyHasLoop, hasLoopStmt) supports hoisting and +// fast-path decisions inside the control-flow emitters; they live +// here rather than in gen_util.go because they're only used by +// statement emission. + +package gengo + +import ( + "five/compiler/ast" + "five/compiler/token" + "fmt" + "strings" +) + +// --- Statement emission --- + +func (g *Generator) emitStmt(stmt ast.Stmt, locals localMap) { + // Line hook — two variants: + // - Non-debug builds get DebugLineFast (inlineable, ~2 field + // writes) so error.log panic traces still carry a line number + // without the per-statement dispatch tax. + // - Debug builds get DebugLine, which additionally drives + // breakpoints, step mode, and the trace ring. + if stmt.Pos().Line > 0 { + if g.Debug { + g.writeln(fmt.Sprintf("t.DebugLine(%q, %d)", g.file.Name, stmt.Pos().Line)) + } else { + g.writeln(fmt.Sprintf("t.DebugLineFast(%q, %d)", g.file.Name, stmt.Pos().Line)) + } + } + + switch s := stmt.(type) { + case *ast.ReturnStmt: + if len(s.Values) > 1 { + // Multi-return: RETURN a, b, c → push array of values + for _, v := range s.Values { + g.emitExpr(v) + } + g.writeln(fmt.Sprintf("t.ArrayGen(%d)", len(s.Values))) + g.writeln("t.RetValue()") + } else if s.Value != nil { + g.emitExpr(s.Value) + g.writeln("t.RetValue()") + } else { + g.writeln("t.RetNil()") + } + g.writeln("return") // Go return to exit function immediately + + case *ast.QOutStmt: + g.emitQOut(s, locals) + + case *ast.ExprStmt: + g.emitExprStmt(s, locals) + + case *ast.IfStmt: + g.emitIf(s, locals) + + case *ast.SwitchStmt: + g.emitSwitch(s, locals) + + case *ast.DoWhileStmt: + g.emitDoWhile(s, locals) + + case *ast.ForStmt: + g.emitFor(s, locals) + + case *ast.ForEachStmt: + g.emitForEach(s, locals) + + case *ast.ExitStmt: + g.writeln("break") + + case *ast.LoopStmt: + if g.curForLabel != "" { + // Inside FOR..NEXT: goto label before increment (continue would skip it) + g.writeln("goto " + g.curForLabel) + } else { + g.writeln("continue") + } + + case *ast.MultiAssignStmt: + g.emitMultiAssign(s, locals) + + case *ast.DeferStmt: + g.emitDefer(s, locals) + + case *ast.VarDecl: + // LOCAL in mid-function or PRIVATE/PUBLIC + g.emitMidVarDecl(s, locals) + + // xBase commands — generate calls to hbrdd WorkAreaManager + case *ast.UseCmd: + g.emitUseCmd(s, locals) + case *ast.GoCmd: + g.emitGoCmd(s) + case *ast.SkipCmd: + g.emitSkipCmd(s, locals) + case *ast.SeekCmd: + g.emitSeekCmd(s, locals) + case *ast.ReplaceCmd: + g.emitReplaceCmd(s, locals) + case *ast.AppendCmd: + if g.hoistedFields != nil { + // Use hoisted area variable + g.writeln("if _rarea != nil { _rarea.Append() }") + } else { + g.writeln("{ _wa := t.WA.(*hbrdd.WorkAreaManager)") + g.writeln("if _area := _wa.Current(); _area != nil { _area.Append() } }") + } + case *ast.DeleteCmd: + if g.hoistedDW || g.hoistedFields != nil { + g.writeln(fmt.Sprintf("if %s != nil { %s.Delete() }", g.hoistedAreaVar(), g.hoistedAreaVar())) + } else { + g.writeln("{ _wa := t.WA.(*hbrdd.WorkAreaManager)") + g.writeln("if _area := _wa.Current(); _area != nil { _area.Delete() } }") + } + case *ast.SelectCmd: + g.emitExpr(s.Area) + g.writeln("{ _wa := t.WA.(*hbrdd.WorkAreaManager); _v := t.Pop2()") + g.writeln("if _v.IsNumeric() { _wa.Select(int(_v.AsNumInt())) } else { _wa.Select(_v.AsString()) } }") + case *ast.IndexCmd: + g.writeln("{") + g.indent++ + g.writeln("wa := t.WA.(*hbrdd.WorkAreaManager)") + g.writeln("if area := wa.Current(); area != nil {") + g.indent++ + g.writeln("if idx, ok := area.(hbrdd.Indexer); ok {") + g.indent++ + keyStr := exprToString(s.KeyExpr) + g.writeln(fmt.Sprintf("_keyExpr := %q", keyStr)) + + // File expression: if it contains a function call, evaluate at + // runtime — Harbour `INDEX ON ... TO ( cExpr )` semantics. Prior + // behavior was static exprToString which serialized calls like + // `Lower(cTable) + "_pk.ntx"` into the literal filename string. + // Detect via containsCall; preserve static path for simple + // `test.ntx` style identifiers. + if containsCall(s.File) { + g.emitExpr(s.File) + g.writeln("_file := t.Pop2().AsString()") + } else { + fileStr := exprToString(s.File) + g.writeln(fmt.Sprintf("_file := %q", fileStr)) + } + forExpr := `""` + if s.ForCond != nil { + forExpr = fmt.Sprintf("%q", exprToString(s.ForCond)) + } + + // Emit compiled key evaluator as Go closure. + // This inlines the AST of the key expression into native Go code, + // eliminating per-record MacroEval string parsing + symbol lookup. + // In INDEX context, bare identifiers are FIELD names (not locals). + g.writeln("_keyFunc := func() hbrt.Value {") + g.indent++ + g.emitIndexKeyExpr(s.KeyExpr) + g.writeln("return t.Pop2()") + g.indent-- + g.writeln("}") + + // Emit compiled FOR evaluator when the source has a FOR clause. + // Mirrors _keyFunc — zero runtime parsing, field-name identifier + // context, closure captures the Thread. + forFuncRef := "nil" + if s.ForCond != nil { + g.writeln("_forFunc := func() bool {") + g.indent++ + g.emitIndexKeyExpr(s.ForCond) + g.writeln("return t.Pop2().AsBool()") + g.indent-- + g.writeln("}") + forFuncRef = "_forFunc" + } + + // Still set MacroEval fallback for evalKeyExprInner (used for keyLen sampling) + g.writeln("dbf.KeyEvalFunc = func(expr string) hbrt.Value { return t.MacroEval(expr) }") + g.writeln(fmt.Sprintf("idx.OrderCreate(hbrdd.OrderCreateParams{KeyExpr: _keyExpr, FilePath: _file, ForExpr: %s, TagName: %q, Unique: %v, Descending: %v, KeyFunc: _keyFunc, ForFunc: %s})", + forExpr, s.TagName, s.Unique, s.Descending, forFuncRef)) + g.writeln("dbf.KeyEvalFunc = nil") + g.indent-- + g.writeln("}") + g.indent-- + g.writeln("}") + g.indent-- + g.writeln("}") + case *ast.SetCmd: + upper := strings.ToUpper(s.Setting) + + // Boolean SET toggles — call RTL Set function, no workarea needed + setFuncMap := map[string]string{ + "DELETED": "SETDELETED", + "EXACT": "SETEXACT", + "SOFTSEEK": "SETSOFTSEEK", + "EXCLUSIVE": "SETEXCLUSIVE", + "FIXED": "SETFIXED", + "CANCEL": "SETCANCEL", + "BELL": "SETBELL", + "CONFIRM": "SETCONFIRM", + "INSERT": "SETINSERT", + "ESCAPE": "SETESCAPE", + "WRAP": "SETWRAP", + } + if funcName, ok := setFuncMap[upper]; ok { + onOff := strings.ToUpper(s.Extra) + if onOff == "ON" || onOff == "OFF" { + val := "true" + if onOff == "OFF" { + val = "false" + } + g.emitPushSymbol(funcName) + g.writeln("t.PushNil()") + g.writeln(fmt.Sprintf("t.PushBool(%s)", val)) + g.writeln("t.Do(1)") + } + break + } + + // Value SET commands — SET DATE/DECIMALS/EPOCH TO expr + valueFuncMap := map[string]string{ + "DATE": "__SETDATEFORMAT", + "DECIMALS": "SETDECIMALS", + "EPOCH": "SETEPOCH", + } + if funcName, ok := valueFuncMap[upper]; ok && s.Expr != nil { + g.emitPushSymbol(funcName) + g.writeln("t.PushNil()") + g.emitExpr(s.Expr) + g.writeln("t.Do(1)") + break + } + + // Workarea-specific SET commands + g.writeln("{") + g.indent++ + g.writeln("wa := t.WA.(*hbrdd.WorkAreaManager)") + g.writeln("if area := wa.Current(); area != nil {") + g.indent++ + switch upper { + case "FILTER": + if s.Expr != nil { + g.emitExpr(s.Expr) + g.writeln(`area.SetFilter(t.Pop2().AsString(), nil)`) + } else { + g.writeln("area.ClearFilter()") + } + case "ORDER": + if s.Expr != nil { + g.writeln("if idx, ok := area.(hbrdd.Indexer); ok {") + g.indent++ + g.emitExpr(s.Expr) + g.writeln(`{ _ov := t.Pop2(); var _os string; if _ov.IsNumeric() { _os = hbrt.NtoS(_ov.AsNumInt()) } else { _os = _ov.AsString() }; idx.OrderListFocus(_os) }`) + g.indent-- + g.writeln("}") + } + case "INDEX": + if s.Expr != nil { + fileStr := exprToString(s.Expr) + g.writeln("if idx, ok := area.(hbrdd.Indexer); ok {") + g.indent++ + if fileStr != "" { + // SET INDEX TO a, b, c — split comma-separated file names + // and call OrderListAdd for each. Harbour loads all NTX + // files into the active index list. + clean := fileStr + if len(clean) >= 2 && clean[0] == '"' && clean[len(clean)-1] == '"' { + clean = clean[1 : len(clean)-1] + } + parts := strings.Split(clean, ",") + for _, p := range parts { + p = strings.TrimSpace(p) + if p != "" { + g.writeln(fmt.Sprintf(`idx.OrderListAdd(%q)`, p)) + } + } + } else { + g.emitExpr(s.Expr) + g.writeln(`idx.OrderListAdd(t.Pop2().AsString())`) + } + g.indent-- + g.writeln("}") + } else { + g.writeln("if idx, ok := area.(hbrdd.Indexer); ok { idx.OrderListClear() }") + } + default: + g.writeln(fmt.Sprintf("// SET %s: not yet implemented", upper)) + } + g.indent-- + g.writeln("}") + g.indent-- + g.writeln("}") + + case *ast.SeqStmt: + g.emitBeginSequence(s, locals) + + case *ast.AtSayCmd: + g.emitAtSayCmd(s) + case *ast.AtGetCmd: + g.emitAtGetCmd(s, locals) + case *ast.AtSayGetCmd: + g.emitAtSayGetCmd(s, locals) + case *ast.ReadCmd: + g.emitReadCmd(s, locals) + + default: + g.writeln(fmt.Sprintf("// WARN: unhandled statement type %T — skipped", stmt)) + } +} + +func (g *Generator) emitMidVarDecl(s *ast.VarDecl, locals localMap) { + for _, v := range s.Vars { + idx, found := locals[strings.ToUpper(v.Name)] + if !found { + maxIdx := 0 + for _, i := range locals { + if i > maxIdx { + maxIdx = i + } + } + idx = maxIdx + 1 + locals[strings.ToUpper(v.Name)] = idx + } + if v.Init != nil { + if _, isConst := g.constLocals[strings.ToUpper(v.Name)]; !isConst { + g.emitExpr(v.Init) + g.writeln(fmt.Sprintf("t.PopLocalFast(%d)", idx)) + } + } + } +} + +func (g *Generator) emitQOut(s *ast.QOutStmt, locals localMap) { + sym := "QOUT" + if s.IsQQ { + sym = "QQOUT" + } + g.emitPushSymbol(sym) + g.writeln("t.PushNil()") + for _, expr := range s.Exprs { + g.emitExpr(expr) + } + g.writeln(fmt.Sprintf("t.Function(%d)", len(s.Exprs))) +} + +func (g *Generator) emitExprStmt(s *ast.ExprStmt, locals localMap) { + // Check if it's an assignment + if assign, ok := s.X.(*ast.AssignExpr); ok { + g.emitAssign(assign, locals) + return + } + // Check if it's a function call (discard result) + if call, ok := s.X.(*ast.CallExpr); ok { + g.emitCallAsStmt(call, locals) + return + } + // Bare identifier as statement (e.g., CLS, CLEAR) — treat as zero-arg function call + if ident, ok := s.X.(*ast.IdentExpr); ok { + if _, found := locals[strings.ToUpper(ident.Name)]; !found { + g.emitPushSymbol(strings.ToUpper(ident.Name)) + g.writeln("t.PushNil()") + g.writeln("t.Do(0)") + return + } + } + // Postfix ++/-- + if pf, ok := s.X.(*ast.PostfixExpr); ok { + // Local variable: n++ + if ident, ok := pf.X.(*ast.IdentExpr); ok { + upper := strings.ToUpper(ident.Name) + if idx, found := locals[upper]; found { + if pf.Op == token.INC { + g.writeln(fmt.Sprintf("t.LocalAddInt(%d, 1)", idx)) + } else { + g.writeln(fmt.Sprintf("t.LocalAddInt(%d, -1)", idx)) + } + return + } + // STATIC variable: s_nPass++ + if goVar, found := g.staticVars[upper]; found { + delta := "1" + if pf.Op == token.DEC { + delta = "-1" + } + g.writeln(fmt.Sprintf("{ _v := %s.AsNumInt() + %s; %s = hbrt.MakeInt(int(_v)) }", goVar, delta, goVar)) + return + } + } + // Self field: ::field++ + if send, ok := pf.X.(*ast.SendExpr); ok { + if _, isSelf := send.Object.(*ast.SelfExpr); isSelf { + fieldName := strings.ToUpper(send.Method) + g.writeln(fmt.Sprintf("t.PushSelfField(%q)", fieldName)) + if pf.Op == token.INC { + g.writeln("t.PushInt(1)") + g.writeln("t.Plus()") + } else { + g.writeln("t.PushInt(1)") + g.writeln("t.Minus()") + } + g.writeln(fmt.Sprintf("t.SetSelfField(%q)", fieldName)) + return + } + } + } + // General expression (result on stack, pop it) + g.emitExpr(s.X) + g.writeln("t.Pop()") +} + +func (g *Generator) emitAssign(a *ast.AssignExpr, locals localMap) { + // Check for arr[idx] := value (array index assignment) + if idx, ok := a.Left.(*ast.IndexExpr); ok { + if a.Op == token.ASSIGN { + g.emitExpr(idx.X) // array + g.emitExpr(idx.Index) // index + g.emitExpr(a.Right) // value + g.writeln("t.ArrayPop()") // set array[index] = value + return + } + } + + // Check for obj:field := value (object field assignment) + if send, ok := a.Left.(*ast.SendExpr); ok { + _, isSelf := send.Object.(*ast.SelfExpr) + + if isSelf { + fieldName := strings.ToUpper(send.Method) + switch a.Op { + case token.ASSIGN: + g.emitExpr(a.Right) + g.writeln(fmt.Sprintf("t.SetSelfField(%q)", fieldName)) + case token.PLUSEQ: + g.writeln(fmt.Sprintf("t.PushSelfField(%q)", fieldName)) + g.emitExpr(a.Right) + g.writeln("t.Plus()") + g.writeln(fmt.Sprintf("t.SetSelfField(%q)", fieldName)) + case token.MINUSEQ: + g.writeln(fmt.Sprintf("t.PushSelfField(%q)", fieldName)) + g.emitExpr(a.Right) + g.writeln("t.Minus()") + g.writeln(fmt.Sprintf("t.SetSelfField(%q)", fieldName)) + case token.STAREQ: + g.writeln(fmt.Sprintf("t.PushSelfField(%q)", fieldName)) + g.emitExpr(a.Right) + g.writeln("t.Mult()") + g.writeln(fmt.Sprintf("t.SetSelfField(%q)", fieldName)) + case token.SLASHEQ: + g.writeln(fmt.Sprintf("t.PushSelfField(%q)", fieldName)) + g.emitExpr(a.Right) + g.writeln("t.Divide()") + g.writeln(fmt.Sprintf("t.SetSelfField(%q)", fieldName)) + default: + g.emitExpr(a.Right) + g.writeln(fmt.Sprintf("t.SetSelfField(%q)", fieldName)) + } + return + } + + // Non-self: obj:field := value → obj:_FIELD(value) + if a.Op == token.ASSIGN { + g.emitExpr(send.Object) + g.emitExpr(a.Right) + g.writeln(fmt.Sprintf("t.Send(%q, 1)", "_"+strings.ToUpper(send.Method))) + g.writeln("t.Pop() // discard setter result") + return + } + } + + // Check for alias->field := value (FIELD->NAME := value) + if aliasExpr, ok := a.Left.(*ast.AliasExpr); ok { + if aliasIdent, ok2 := aliasExpr.Alias.(*ast.IdentExpr); ok2 { + if fieldIdent, ok3 := aliasExpr.Field.(*ast.IdentExpr); ok3 { + upper := strings.ToUpper(aliasIdent.Name) + // `M->name := v` / `MEMVAR->name := v` are memvar writes, + // not workarea field writes. + if upper == "M" || upper == "MEMVAR" { + g.emitExpr(a.Right) + g.writeln(fmt.Sprintf(`t.PopMemvar(%q)`, fieldIdent.Name)) + return + } + g.emitExpr(a.Right) + g.writeln(fmt.Sprintf(`{ _wa := t.WA.(*hbrdd.WorkAreaManager); _wa.SetAliasField(%q, %q, t.Pop2()) }`, aliasIdent.Name, fieldIdent.Name)) + return + } + } + } + + if ident, ok := a.Left.(*ast.IdentExpr); ok { + if idx, found := locals[strings.ToUpper(ident.Name)]; found { + switch a.Op { + case token.ASSIGN: + // Peephole: `x := x + ` / `x := x - ` → + // LocalAdd. Same result as `x += ` but lets the + // PRG side use the explicit form without penalty. + if be, ok := a.Right.(*ast.BinaryExpr); ok && + (be.Op == token.PLUS || be.Op == token.MINUS) { + if lid, isIdent := be.Left.(*ast.IdentExpr); isIdent { + if selfIdx, found := locals[strings.ToUpper(lid.Name)]; found && selfIdx == idx { + g.emitExpr(be.Right) + if be.Op == token.MINUS { + g.writeln("t.Negate()") + } + g.writeln(fmt.Sprintf("t.LocalAdd(%d)", idx)) + return + } + } + } + g.emitExpr(a.Right) + g.writeln(fmt.Sprintf("t.PopLocalFast(%d)", idx)) + case token.PLUSEQ: + g.emitExpr(a.Right) + g.writeln(fmt.Sprintf("t.LocalAdd(%d)", idx)) + case token.MINUSEQ: + g.emitExpr(a.Right) + g.writeln("t.Negate()") + g.writeln(fmt.Sprintf("t.LocalAdd(%d)", idx)) + default: + // General compound: push local, push right, op, pop local + g.writeln(fmt.Sprintf("t.PushLocalFast(%d)", idx)) + g.emitExpr(a.Right) + g.emitBinaryOp(a.Op) + g.writeln(fmt.Sprintf("t.PopLocalFast(%d)", idx)) + } + return + } + // Check module-level or function-level STATIC variable + upper := strings.ToUpper(ident.Name) + if goVar, found := g.staticVars[upper]; found { + switch a.Op { + case token.ASSIGN: + g.emitExpr(a.Right) + g.writeln(fmt.Sprintf("%s = t.Pop2()", goVar)) + case token.PLUSEQ: + g.writeln(fmt.Sprintf("t.PushValue(%s)", goVar)) + g.emitExpr(a.Right) + g.writeln("t.Plus()") + g.writeln(fmt.Sprintf("%s = t.Pop2()", goVar)) + case token.MINUSEQ: + g.writeln(fmt.Sprintf("t.PushValue(%s)", goVar)) + g.emitExpr(a.Right) + g.writeln("t.Minus()") + g.writeln(fmt.Sprintf("%s = t.Pop2()", goVar)) + case token.STAREQ: + g.writeln(fmt.Sprintf("t.PushValue(%s)", goVar)) + g.emitExpr(a.Right) + g.writeln("t.Mult()") + g.writeln(fmt.Sprintf("%s = t.Pop2()", goVar)) + case token.SLASHEQ: + g.writeln(fmt.Sprintf("t.PushValue(%s)", goVar)) + g.emitExpr(a.Right) + g.writeln("t.Divide()") + g.writeln(fmt.Sprintf("%s = t.Pop2()", goVar)) + default: + g.writeln(fmt.Sprintf("t.PushValue(%s)", goVar)) + g.emitExpr(a.Right) + g.emitBinaryOp(a.Op) + g.writeln(fmt.Sprintf("%s = t.Pop2()", goVar)) + } + return + } + } + // Fallback: general assignment via stack + g.emitExpr(a.Right) + g.writeln("// WARN: complex assignment target — simplified") + g.writeln("t.Pop()") +} + +func (g *Generator) emitCallAsStmt(call *ast.CallExpr, locals localMap) { + if ident, ok := call.Func.(*ast.IdentExpr); ok { + g.emitPushSymbol(strings.ToUpper(ident.Name)) + } else { + g.emitExpr(call.Func) + } + g.writeln("t.PushNil()") + for _, arg := range call.Args { + g.emitExpr(arg) + } + g.writeln(fmt.Sprintf("t.Do(%d)", len(call.Args))) +} + +// boolLiteralValue returns (value, true) if e reduces to a .T./.F. +// literal at compile time. Sees through an outer `.NOT.` so expressions +// like `!.F.` also collapse. Used by emitIf to skip dead branches and +// by the AND/OR short-circuit emitter. +func boolLiteralValue(e ast.Expr) (bool, bool) { + if u, ok := e.(*ast.UnaryExpr); ok && u.Op == token.NOT { + if v, ok := boolLiteralValue(u.X); ok { + return !v, true + } + return false, false + } + lit, ok := e.(*ast.LiteralExpr) + if !ok { + return false, false + } + switch lit.Kind { + case token.TRUE: + return true, true + case token.FALSE: + return false, true + } + return false, false +} + +func (g *Generator) emitIf(s *ast.IfStmt, locals localMap) { + // Dead-branch elimination for literal conditions. An IF .T. collapses + // to its body; an IF .F. collapses to its first live ELSEIF/ELSE. + // We resolve the main Cond here and recurse on the remainder if it + // turns into a new IF chain. + if v, ok := boolLiteralValue(s.Cond); ok { + if v { + for _, stmt := range s.Body { + g.emitStmt(stmt, locals) + } + return + } + // IF .F. — scan ElseIfs for first non-.F. branch. + for i, ei := range s.ElseIfs { + if v2, ok2 := boolLiteralValue(ei.Cond); ok2 { + if v2 { + for _, stmt := range ei.Body { + g.emitStmt(stmt, locals) + } + return + } + continue // ELSEIF .F. — dead, skip + } + // Non-literal ELSEIF becomes the new IF head. + newIf := &ast.IfStmt{ + IfPos: ei.ElseIfPos, + Cond: ei.Cond, + Body: ei.Body, + ElseIfs: s.ElseIfs[i+1:], + ElseBody: s.ElseBody, + } + g.emitIf(newIf, locals) + return + } + // All ElseIfs were .F. — only ELSE body remains. + for _, stmt := range s.ElseBody { + g.emitStmt(stmt, locals) + } + return + } + + // Main cond is dynamic. Still filter dead ELSEIFs (.F. removed; + // an ELSEIF .T. truncates the chain and becomes the ELSE). + elseIfs := s.ElseIfs + elseBody := s.ElseBody + if len(elseIfs) > 0 { + filtered := make([]*ast.ElseIfClause, 0, len(elseIfs)) + for _, ei := range elseIfs { + if v, ok := boolLiteralValue(ei.Cond); ok { + if v { + // ELSEIF .T. — chain stops here; body becomes ELSE. + elseBody = ei.Body + elseIfs = filtered + goto emit + } + continue // ELSEIF .F. — dead + } + filtered = append(filtered, ei) + } + elseIfs = filtered + } + +emit: + g.emitExpr(s.Cond) + g.writeln("if t.PopLogical() {") + g.indent++ + for _, stmt := range s.Body { + g.emitStmt(stmt, locals) + } + g.indent-- + + for _, ei := range elseIfs { + g.writeIndent() + g.write("} else {\n") + g.indent++ + g.emitExpr(ei.Cond) + g.writeln("if t.PopLogical() {") + g.indent++ + for _, stmt := range ei.Body { + g.emitStmt(stmt, locals) + } + g.indent-- + } + + if len(elseBody) > 0 { + g.writeln("} else {") + g.indent++ + for _, stmt := range elseBody { + g.emitStmt(stmt, locals) + } + g.indent-- + } + + g.writeln("}") + + // Close nested elseif braces + for range elseIfs { + g.writeln("}") + } +} + +func (g *Generator) emitDoWhile(s *ast.DoWhileStmt, locals localMap) { + // DO WHILE .F. — body is unreachable; emit nothing. + if v, ok := boolLiteralValue(s.Cond); ok && !v { + return + } + + // Detect RDD commands in body for WA hoisting + hasRDD := hasRDDCommands(s.Body) + safeToHoist := hasRDD && !hasWorkareaChange(s.Body) + + if safeToHoist && g.hoistedFields == nil { + g.writeln("{") + g.indent++ + g.writeln("_dwa := t.WA.(*hbrdd.WorkAreaManager)") + g.writeln("_darea := _dwa.Current()") + g.hoistedDW = true + } + + g.writeln("for {") + g.indent++ + // DO WHILE .T. — the idiomatic infinite loop. Skip the per-iteration + // PushBool/PopLogical; exit only through EXIT / LOOP / RETURN. + if v, ok := boolLiteralValue(s.Cond); !ok || !v { + g.emitExpr(s.Cond) + g.writeln("if !t.PopLogical() { break }") + } + for _, stmt := range s.Body { + g.emitStmt(stmt, locals) + } + g.indent-- + g.writeln("}") + + if safeToHoist && g.hoistedDW { + g.hoistedDW = false + g.indent-- + g.writeln("}") + } +} + +// hasRDDCommands checks if any statement is an RDD operation. +func hasRDDCommands(stmts []ast.Stmt) bool { + for _, s := range stmts { + switch s.(type) { + case *ast.SkipCmd, *ast.GoCmd, *ast.SeekCmd, + *ast.ReplaceCmd, *ast.AppendCmd, *ast.DeleteCmd: + return true + } + } + return false +} + +// hasWorkareaChange checks for USE/SELECT that would invalidate cached area. +func hasWorkareaChange(stmts []ast.Stmt) bool { + for _, s := range stmts { + switch v := s.(type) { + case *ast.UseCmd, *ast.SelectCmd: + return true + case *ast.IfStmt: + if hasWorkareaChange(v.Body) || hasWorkareaChange(v.ElseBody) { + return true + } + case *ast.DoWhileStmt: + if hasWorkareaChange(v.Body) { + return true + } + } + } + return false +} + +// collectSymbols scans AST for all symbol names referenced by function calls. +// Returns unique names for hoisting FindSymbol to function prologue. +func collectSymbols(stmts []ast.Stmt) []string { + seen := map[string]bool{} + var names []string + var walk func([]ast.Stmt) + var walkExpr func(ast.Expr) + + walkExpr = func(e ast.Expr) { + if e == nil { + return + } + switch v := e.(type) { + case *ast.CallExpr: + if ident, ok := v.Func.(*ast.IdentExpr); ok { + name := strings.ToUpper(ident.Name) + if !seen[name] { + seen[name] = true + names = append(names, name) + } + } + for _, a := range v.Args { + walkExpr(a) + } + case *ast.BinaryExpr: + walkExpr(v.Left) + walkExpr(v.Right) + case *ast.UnaryExpr: + walkExpr(v.X) + } + } + + walk = func(stmts []ast.Stmt) { + for _, s := range stmts { + switch v := s.(type) { + case *ast.ExprStmt: + walkExpr(v.X) + case *ast.ReturnStmt: + if v.Value != nil { + walkExpr(v.Value) + } + case *ast.IfStmt: + walkExpr(v.Cond) + walk(v.Body) + walk(v.ElseBody) + case *ast.ForStmt: + walk(v.Body) + case *ast.ForEachStmt: + walk(v.Body) + case *ast.DoWhileStmt: + walkExpr(v.Cond) + walk(v.Body) + case *ast.SeqStmt: + walk(v.Body) + walk(v.RecoverBody) + case *ast.SwitchStmt: + for _, c := range v.Cases { + walk(c.Body) + } + } + } + } + + walk(stmts) + return names +} + +// collectReplaceFields scans statements for REPLACE field names. +// Returns nil if unsafe to hoist (USE/SELECT/CLOSE found). +func collectReplaceFields(stmts []ast.Stmt) []string { + seen := map[string]bool{} + var fields []string + for _, s := range stmts { + switch v := s.(type) { + case *ast.ReplaceCmd: + for _, rf := range v.Fields { + if ident, ok := rf.Field.(*ast.IdentExpr); ok { + name := ident.Name + if !seen[name] { + seen[name] = true + fields = append(fields, name) + } + } + } + case *ast.UseCmd, *ast.SelectCmd: + return nil // workarea may change — unsafe to hoist + case *ast.IfStmt: + // Check nested blocks + if sub := collectReplaceFields(v.Body); sub == nil { + return nil + } + if sub := collectReplaceFields(v.ElseBody); sub == nil { + return nil + } + case *ast.DoWhileStmt: + if sub := collectReplaceFields(v.Body); sub == nil { + return nil + } + } + } + return fields +} + +// hasAppendInBody checks if any APPEND command exists in the statements. +func hasAppendInBody(stmts []ast.Stmt) bool { + for _, s := range stmts { + if _, ok := s.(*ast.AppendCmd); ok { + return true + } + } + return false +} + +func (g *Generator) emitFor(s *ast.ForStmt, locals localMap) { + idx, found := locals[strings.ToUpper(s.Var)] + if !found { + g.writeln("// ERROR: FOR variable not found in locals") + return + } + + // i := start + g.emitExpr(s.Start) + g.writeln(fmt.Sprintf("t.PopLocalFast(%d)", idx)) + + // Detect step direction for comparison + isNegStep := false + if s.Step != nil { + if lit, ok := s.Step.(*ast.LiteralExpr); ok { + if lit.Kind == token.INT && len(lit.Value) > 0 && lit.Value[0] == '-' { + isNegStep = true + } + } + if un, ok := s.Step.(*ast.UnaryExpr); ok && un.Op == token.MINUS { + isNegStep = true + } + } + + // Optimization: hoist WA/FieldIndex lookups outside FOR loop + // if body contains REPLACE and no USE/SELECT (safe to cache). + rddFields := collectReplaceFields(s.Body) + hoistRDD := len(rddFields) > 0 && hasAppendInBody(s.Body) + + if hoistRDD { + g.writeln("{") + g.indent++ + g.writeln("_rwa := t.WA.(*hbrdd.WorkAreaManager)") + g.writeln("_rarea := _rwa.Current()") + g.writeln("var _rdbf *dbf.DBFArea") + g.writeln("if _rarea != nil { _rdbf, _ = _rarea.(*dbf.DBFArea) }") + // Pre-compute field indexes + for i, fname := range rddFields { + g.writeln(fmt.Sprintf("var _rfi%d int = -1", i)) + g.writeln(fmt.Sprintf("if _rdbf != nil { _rfi%d = _rdbf.FieldIndex(%q) }", i, fname)) + } + g.hoistedFields = rddFields // store for emitReplaceCmdHoisted + } + + g.writeln("for {") + g.indent++ + + // Comparison: fused opcode when limit is literal int (most common). + // Also see through const-propagated LOCALs: `LOCAL n := 100; FOR i := 1 + // TO n` should hit the same fast path as a bare literal. + toLit, _ := s.To.(*ast.LiteralExpr) + if toLit == nil { + if id, ok := s.To.(*ast.IdentExpr); ok { + if l, ok2 := g.constLocals[strings.ToUpper(id.Name)]; ok2 { + toLit = l + } + } + } + if lit := toLit; lit != nil && lit.Kind == token.INT { + if isNegStep { + g.writeln(fmt.Sprintf("if !t.LocalGreaterEqualInt(%d, %s) { break }", idx, lit.Value)) + } else { + g.writeln(fmt.Sprintf("if !t.LocalLessEqualInt(%d, %s) { break }", idx, lit.Value)) + } + } else { + // General case: stack-based comparison + g.writeln(fmt.Sprintf("t.PushLocalFast(%d)", idx)) + g.emitExpr(s.To) + if isNegStep { + g.writeln("t.GreaterEqual()") + } else { + g.writeln("t.LessEqual()") + } + g.writeln("if !t.PopLogical() { break }") + } + + // Track FOR loop depth so LOOP can use goto instead of continue. + // Only emit label if LOOP is present in the body (Go rejects unused labels). + hasLoop := bodyHasLoop(s.Body) + forLabel := "" + prevForLabel := g.curForLabel + if hasLoop { + forLabel = fmt.Sprintf("_for_next_%d", g.forLabelSeq) + g.forLabelSeq++ + g.curForLabel = forLabel + } else { + g.curForLabel = "" + } + + // body + for _, stmt := range s.Body { + g.emitStmt(stmt, locals) + } + + // Label for LOOP to jump to (skipping continue which would miss increment) + if hasLoop { + g.writeln(forLabel + ":") + } + + // i += step (default 1) + if s.Step != nil { + g.emitExpr(s.Step) + g.writeln(fmt.Sprintf("t.LocalAdd(%d)", idx)) + } else { + g.writeln(fmt.Sprintf("t.LocalAddInt(%d, 1)", idx)) + } + + g.curForLabel = prevForLabel + g.indent-- + g.writeln("}") + + // Close hoisting block + if hoistRDD { + g.hoistedFields = nil + g.indent-- + g.writeln("}") + } +} + +// bodyHasLoop checks if any statement in the body is a LOOP. +// Only checks the immediate level — LOOP inside nested FOR/DO WHILE is irrelevant. +func bodyHasLoop(stmts []ast.Stmt) bool { + for _, s := range stmts { + if hasLoopStmt(s) { + return true + } + } + return false +} + +func hasLoopStmt(s ast.Stmt) bool { + switch s := s.(type) { + case *ast.LoopStmt: + return true + case *ast.IfStmt: + for _, st := range s.Body { + if hasLoopStmt(st) { + return true + } + } + for _, ei := range s.ElseIfs { + for _, st := range ei.Body { + if hasLoopStmt(st) { + return true + } + } + } + for _, st := range s.ElseBody { + if hasLoopStmt(st) { + return true + } + } + case *ast.SeqStmt: + for _, st := range s.Body { + if hasLoopStmt(st) { + return true + } + } + for _, st := range s.RecoverBody { + if hasLoopStmt(st) { + return true + } + } + case *ast.SwitchStmt: + for _, c := range s.Cases { + for _, st := range c.Body { + if hasLoopStmt(st) { + return true + } + } + } + for _, st := range s.Otherwise { + if hasLoopStmt(st) { + return true + } + } + // Do NOT recurse into ForStmt/DoWhileStmt — nested LOOP is for the inner loop + } + return false +} + + +func (g *Generator) emitSwitch(s *ast.SwitchStmt, locals localMap) { + // Wrap the whole thing in a one-iteration `for` so: + // 1. `_sw` stays scoped to the switch. + // 2. `EXIT` inside a CASE emits `break` and targets this loop, + // matching Harbour SWITCH semantics (EXIT terminates SWITCH). + // 3. Empty SWITCH (`SWITCH x ENDSWITCH`, common in conditional- + // compile test files) stays valid Go. + g.writeln("for {") + g.indent++ + g.emitExpr(s.Expr) + g.writeln("_sw := t.Pop2()") + g.writeln("_ = _sw") // silence unused-var warning when no cases reference it + // Use the runtime's type-aware Equal() instead of coercing to + // NumInt. The AsNumInt() path broke every non-numeric SWITCH + // (strings, dates, logicals): "ABC".AsNumInt() returns 0, so + // `SWITCH cType CASE "C"` folded every arm to the same false. + // Each CASE emits an independent `if !_swHit { push _sw; push + // caseVal; t.Equal(); if t.PopLogical() { body; _swHit = true } }` + // block so the stack is balanced even when a body executes EXIT / + // RETURN mid-case. + first := true + for _, c := range s.Cases { + if first { + g.writeln("_swHit := false") + first = false + } + g.writeln("if !_swHit {") + g.indent++ + g.writeln("t.PushValue(_sw)") + g.emitExpr(c.Value) + g.writeln("t.Equal()") + g.writeln("if t.PopLogical() {") + g.indent++ + g.writeln("_swHit = true") + for _, stmt := range c.Body { + g.emitStmt(stmt, locals) + } + g.indent-- + g.writeln("}") + g.indent-- + g.writeln("}") + } + if len(s.Otherwise) > 0 { + if first { + // No CASE arms — emit the OTHERWISE body as-is. + for _, stmt := range s.Otherwise { + g.emitStmt(stmt, locals) + } + } else { + g.writeln("if !_swHit {") + g.indent++ + for _, stmt := range s.Otherwise { + g.emitStmt(stmt, locals) + } + g.indent-- + g.writeln("}") + } + } + if !first { + g.writeln("_ = _swHit") // guard against bodies that never read it + } + // Always break out of our one-iteration `for` wrapper, regardless + // of which (or no) case ran. + g.writeln("break") + g.indent-- + g.writeln("}") +} + +func (g *Generator) emitBeginSequence(s *ast.SeqStmt, locals localMap) { + // BEGIN SEQUENCE → Go's panic/recover. + // Catches both *HbError (runtime errors) and BreakValue (Break() calls). + // BreakValue is defined in hbrtl, but we detect it via duck typing + // to avoid import cycles. + g.writeln("{ // BEGIN SEQUENCE") + g.indent++ + g.writeln("_seqErr := func() (_recoverVal interface{}) {") + g.indent++ + g.writeln("defer func() {") + g.indent++ + g.writeln("if r := recover(); r != nil {") + g.indent++ + g.writeln("_recoverVal = r") + g.indent-- + g.writeln("}") + g.indent-- + g.writeln("}()") + + // Body + for _, stmt := range s.Body { + g.emitStmt(stmt, locals) + } + + g.writeln("return nil") + g.indent-- + g.writeln("}()") + + // RECOVER + if len(s.RecoverBody) > 0 { + g.writeln("if _seqErr != nil {") + g.indent++ + if s.RecoverVar != "" { + if idx, found := locals[strings.ToUpper(s.RecoverVar)]; found { + // Extract the value from the recovered panic: + // *HbError → error description string + // BreakValue (has .Value field) → the Break() argument + // other → string representation + g.writeln(fmt.Sprintf(`{ // RECOVER USING %s`, s.RecoverVar)) + g.indent++ + g.writeln(`switch _sv := _seqErr.(type) {`) + g.writeln(`case *hbrt.HbError:`) + g.writeln(fmt.Sprintf(` t.SetLocalFast(%d, hbrt.MakeString(_sv.Error()))`, idx)) + g.writeln(`default:`) + // For BreakValue, use reflection-free approach: check if + // the type has a Value field via a local interface. + g.writeln(` type hasValue interface{ GetValue() hbrt.Value }`) + g.writeln(` if bv, ok := _sv.(hasValue); ok {`) + g.writeln(fmt.Sprintf(` t.SetLocalFast(%d, bv.GetValue())`, idx)) + g.writeln(` } else {`) + g.writeln(fmt.Sprintf(` t.SetLocalFast(%d, hbrt.MakeString("error"))`, idx)) + g.writeln(` }`) + g.writeln(`}`) + g.indent-- + g.writeln(`}`) + } + } + for _, stmt := range s.RecoverBody { + g.emitStmt(stmt, locals) + } + g.indent-- + g.writeln("}") + } else { + g.writeln("_ = _seqErr") + } + + g.indent-- + g.writeln("} // END SEQUENCE") +} + +func (g *Generator) emitForEach(s *ast.ForEachStmt, locals localMap) { + varIdx, found := locals[strings.ToUpper(s.Var)] + if !found { + g.writeln("// ERROR: FOR EACH variable not in locals") + return + } + + // Evaluate collection once; the emitted loop dispatches on the + // runtime type so the same FOR EACH works for arrays, hashes, and + // strings (Harbour semantics). Hash iteration yields values — the + // user can get at keys via hb_HKeys(h) before the loop. String + // iteration walks byte-by-byte, matching Harbour's "character" + // enumeration via single-byte sub-strings. + g.emitExpr(s.Collection) + g.writeln("{ _feArr := t.Pop2()") + + // Array branch — the common case, kept as-is for zero overhead + // when the collection is an array. + g.writeln("if _feArr.IsArray() {") + g.indent++ + g.writeln("_feItems := _feArr.AsArray().Items") + g.writeln("for _feI := 0; _feI < len(_feItems); _feI++ {") + g.indent++ + g.writeln(fmt.Sprintf("t.SetLocalFast(%d, _feItems[_feI])", varIdx)) + for _, stmt := range s.Body { + g.emitStmt(stmt, locals) + } + g.indent-- + g.writeln("}") + g.indent-- + // Hash branch — iterate values in insertion order. HbHash keeps + // Keys/Values parallel slices in insertion order (see hash_helpers.go + // appendPair); the Order slice is only populated by a handful of + // Go-RTL builders and is empty for hash literals / hb_Hash(), so we + // walk Values directly. + g.writeln("} else if _feArr.IsHash() {") + g.indent++ + g.writeln("_feH := _feArr.AsHash()") + g.writeln("for _feI := 0; _feI < len(_feH.Values); _feI++ {") + g.indent++ + g.writeln(fmt.Sprintf("t.SetLocalFast(%d, _feH.Values[_feI])", varIdx)) + for _, stmt := range s.Body { + g.emitStmt(stmt, locals) + } + g.indent-- + g.writeln("}") + g.indent-- + // String branch — iterate bytes as single-character substrings. + g.writeln("} else if _feArr.IsString() {") + g.indent++ + g.writeln("_feStr := _feArr.AsString()") + g.writeln("for _feI := 0; _feI < len(_feStr); _feI++ {") + g.indent++ + g.writeln(fmt.Sprintf("t.SetLocalFast(%d, hbrt.MakeString(string(_feStr[_feI])))", varIdx)) + for _, stmt := range s.Body { + g.emitStmt(stmt, locals) + } + g.indent-- + g.writeln("}") + g.indent-- + g.writeln("} }") +} + +// --- Expression emission --- +// Each emitExpr leaves one value on the stack. + +// emitMultiAssign: a, b := Func() or a, b := x, y +func (g *Generator) emitMultiAssign(s *ast.MultiAssignStmt, locals localMap) { + if len(s.Values) == 1 { + // Single RHS: a, b := Func() → call function, unpack array result + g.emitExpr(s.Values[0]) + g.writeln("{") + g.indent++ + g.writeln("_mr := t.Pop2()") + g.writeln("if _mr.IsArray() {") + g.indent++ + g.writeln("_arr := _mr.AsArray()") + for i, name := range s.Targets { + if name == "_" { + continue + } + idx := locals[strings.ToUpper(name)] + if idx > 0 { + g.writeln(fmt.Sprintf("if %d < len(_arr.Items) { t.SetLocalFast(%d, _arr.Items[%d]) }", i, idx, i)) + } + } + g.indent-- + g.writeln("} else {") + g.indent++ + // Not array — assign first target, rest get NIL + if s.Targets[0] != "_" { + idx := locals[strings.ToUpper(s.Targets[0])] + if idx > 0 { + g.writeln(fmt.Sprintf("t.SetLocalFast(%d, _mr)", idx)) + } + } + g.indent-- + g.writeln("}") + g.indent-- + g.writeln("}") + } else { + // Multiple RHS: a, b := x, y (parallel assign) + // Evaluate all RHS first, then assign + for i, val := range s.Values { + g.emitExpr(val) + g.writeln(fmt.Sprintf("_mv%d := t.Pop2()", i)) + } + for i, name := range s.Targets { + if name == "_" || i >= len(s.Values) { + continue + } + idx := locals[strings.ToUpper(name)] + if idx > 0 { + g.writeln(fmt.Sprintf("t.SetLocalFast(%d, _mv%d)", idx, i)) + } + } + } +} + +// emitDefer: DEFER expr → Go defer +func (g *Generator) emitDefer(s *ast.DeferStmt, locals localMap) { + g.writeln("defer func() {") + g.indent++ + g.emitExpr(s.Call) + g.writeln("t.Pop() // discard defer result") + g.indent-- + g.writeln("}()") +} diff --git a/compiler/gengo/folding.go b/compiler/gengo/folding.go new file mode 100644 index 0000000..35d2a55 --- /dev/null +++ b/compiler/gengo/folding.go @@ -0,0 +1,456 @@ +// Copyright (c) 2026 Charles KWON OhJun (charleskwonohjun@gmail.com) +// All rights reserved. + +// Constant folding and const-local propagation. +// +// Two passes cooperate at compile time so the generator emits smaller, +// warmer Go code: +// +// - foldLiteralTree / tryFoldBinary / negateLiteral: collapse binary +// expressions on literal operands into a single LiteralExpr. Handles +// int+int/−/×, string+string concatenation, and left-leaning +// `"a"+x+"b"+"c"` chain reassociation. Overflow bails out so the VM +// coerces to double. +// +// - collectConstLocals + constLocalVisitor: identifies LOCALs assigned +// exactly once with a literal initialiser. At emitIdent time those +// names are replaced by the literal so downstream folding (dead IF, +// AND/OR short-circuit, FOR step fusion) can fire on what was a +// variable reference. The walker is conservative — any unrecognised +// AST node aborts the pass so a hidden write can't sneak through. + +package gengo + +import ( + "five/compiler/ast" + "five/compiler/token" + "strconv" + "strings" +) + +func negateLiteral(lit *ast.LiteralExpr) (*ast.LiteralExpr, bool) { + switch lit.Kind { + case token.INT: + n, err := strconv.ParseInt(lit.Value, 10, 64) + if err != nil { + return nil, false + } + // Guard: math.MinInt64 has no positive twin — let the VM's + // runtime coerce-to-double path handle it. + if n == -1<<63 { + return nil, false + } + return &ast.LiteralExpr{ + ValuePos: lit.ValuePos, + Kind: token.INT, + Value: strconv.FormatInt(-n, 10), + }, true + case token.DOUBLE: + // Syntactically prefix `-` or flip an existing leading `-`. + if strings.HasPrefix(lit.Value, "-") { + return &ast.LiteralExpr{ + ValuePos: lit.ValuePos, + Kind: token.DOUBLE, + Value: lit.Value[1:], + }, true + } + return &ast.LiteralExpr{ + ValuePos: lit.ValuePos, + Kind: token.DOUBLE, + Value: "-" + lit.Value, + }, true + } + return nil, false +} + +// foldLiteralTree recursively folds BinaryExpr subtrees into LiteralExpr +// where both operands eventually collapse to literals. Non-foldable +// subtrees come back unchanged. Used as a preorder pre-pass so the +// caller can look at a flat LITERAL + LITERAL pair. +// +// For left-associative string-concat chains like "a" + x + "b" + "c", +// the parser builds (((("a" + x) + "b") + "c")) and no pair is +// literal+literal. We reassociate: if the LHS is `Y + strlit` and the +// RHS is a string literal, rewrite as `Y + (strlit+rhslit)` so the +// tail literals collapse. Only safe for STRING+STRING (numeric `+` +// cares about types / overflow). +func foldLiteralTree(e ast.Expr) ast.Expr { + be, ok := e.(*ast.BinaryExpr) + if !ok { + return e + } + be.Left = foldLiteralTree(be.Left) + be.Right = foldLiteralTree(be.Right) + if folded, ok := tryFoldBinary(be); ok { + return folded + } + // String-concat reassociation for left-leaning chains. + if be.Op == token.PLUS { + if rLit, ok := be.Right.(*ast.LiteralExpr); ok && rLit.Kind == token.STRING { + if lBin, ok := be.Left.(*ast.BinaryExpr); ok && lBin.Op == token.PLUS { + if mLit, ok := lBin.Right.(*ast.LiteralExpr); ok && mLit.Kind == token.STRING { + fused := &ast.LiteralExpr{ + ValuePos: mLit.ValuePos, + Kind: token.STRING, + Value: mLit.Value + rLit.Value, + } + return &ast.BinaryExpr{ + OpPos: be.OpPos, + Op: token.PLUS, + Left: lBin.Left, + Right: fused, + } + } + } + } + } + return be +} + +// tryFoldBinary returns a synthetic LiteralExpr when both operands of a +// BinaryExpr are themselves literals and the operator is one the +// folder recognises. INT+INT stays INT (with overflow falling through +// to the VM path), mixed numeric falls to double, STRING+STRING +// concatenates. Non-literal operands or unsupported op → (nil, false). +func tryFoldBinary(e *ast.BinaryExpr) (*ast.LiteralExpr, bool) { + l, lok := e.Left.(*ast.LiteralExpr) + r, rok := e.Right.(*ast.LiteralExpr) + if !lok || !rok { + return nil, false + } + switch e.Op { + case token.PLUS, token.MINUS, token.STAR, token.SLASH: + default: + return nil, false + } + // INT + INT — keep int exact result. + if l.Kind == token.INT && r.Kind == token.INT { + li, errL := strconv.ParseInt(l.Value, 10, 64) + ri, errR := strconv.ParseInt(r.Value, 10, 64) + if errL != nil || errR != nil { + return nil, false + } + var result int64 + var overflowed bool + switch e.Op { + case token.PLUS: + result = li + ri + // Harbour overflow discipline: fall through to VM on overflow + if (ri >= 0 && result < li) || (ri < 0 && result > li) { + overflowed = true + } + case token.MINUS: + result = li - ri + if (ri <= 0 && result < li) || (ri > 0 && result > li) { + overflowed = true + } + case token.STAR: + if li == 0 || ri == 0 { + result = 0 + } else { + result = li * ri + if result/li != ri { + overflowed = true + } + } + case token.SLASH: + // Harbour SLASH always yields double even for int inputs. + return nil, false + } + if overflowed { + return nil, false + } + return &ast.LiteralExpr{ + ValuePos: l.ValuePos, + Kind: token.INT, + Value: strconv.FormatInt(result, 10), + }, true + } + // STRING + STRING — concatenate. Preserves the quoting style of the + // left literal so DateExpr and other quoting-sensitive kinds don't + // change shape. + if e.Op == token.PLUS && l.Kind == token.STRING && r.Kind == token.STRING { + return &ast.LiteralExpr{ + ValuePos: l.ValuePos, + Kind: token.STRING, + Value: l.Value + r.Value, + }, true + } + return nil, false +} + +// collectConstLocals returns a map of LOCAL names (uppercase) whose +// only assignment is a literal initializer — these can be propagated +// inline. Any reassignment, ++/--, += family, @byref, MultiAssignStmt +// target, FOR/FOREACH loop var, or AtGet target disqualifies the name. +// +// The walker is bounded: if it encounters a macro expansion or any +// AST node it doesn't recognise, it aborts and returns an empty map. +// Correctness trumps coverage — an unrecognised node might hide a +// write, so we refuse to propagate. +func collectConstLocals(fn *ast.FuncDecl) map[string]*ast.LiteralExpr { + v := &constLocalVisitor{ + candidates: map[string]*ast.LiteralExpr{}, + } + // Seed candidates from top-level LOCAL decls with literal init. + for _, d := range fn.Decls { + vd, ok := d.(*ast.VarDecl) + if !ok || vd.Scope != ast.ScopeLocal { + continue + } + for _, vi := range vd.Vars { + if vi.Init == nil { + continue + } + if lit, ok := vi.Init.(*ast.LiteralExpr); ok { + v.candidates[strings.ToUpper(vi.Name)] = lit + } + } + } + if len(v.candidates) == 0 { + return nil + } + // Params are writable even without explicit assignment (by-value + // but reassignable) — disqualify any candidate that shadows a param. + // Params come from a separate slot but guard in case of odd decls. + for _, p := range fn.Params { + delete(v.candidates, strings.ToUpper(p.Name)) + } + for _, st := range fn.Body { + v.stmt(st) + if v.aborted { + return nil + } + } + if len(v.candidates) == 0 { + return nil + } + return v.candidates +} + +type constLocalVisitor struct { + candidates map[string]*ast.LiteralExpr + aborted bool +} + +func (v *constLocalVisitor) abort() { + v.aborted = true + v.candidates = nil +} + +func (v *constLocalVisitor) writeIdent(e ast.Expr) { + if id, ok := e.(*ast.IdentExpr); ok { + delete(v.candidates, strings.ToUpper(id.Name)) + } +} + +func (v *constLocalVisitor) writeName(name string) { + delete(v.candidates, strings.ToUpper(name)) +} + +func (v *constLocalVisitor) exprs(es []ast.Expr) { + for _, e := range es { + v.expr(e) + } +} + +func (v *constLocalVisitor) stmts(ss []ast.Stmt) { + for _, s := range ss { + v.stmt(s) + } +} + +func (v *constLocalVisitor) expr(e ast.Expr) { + if v.aborted || e == nil { + return + } + switch x := e.(type) { + case *ast.LiteralExpr, *ast.IdentExpr, *ast.SelfExpr: + // leaf; reads don't disqualify + case *ast.BinaryExpr: + v.expr(x.Left) + v.expr(x.Right) + case *ast.UnaryExpr: + if x.Op == token.INC || x.Op == token.DEC { + v.writeIdent(x.X) + } + v.expr(x.X) + case *ast.PostfixExpr: + v.writeIdent(x.X) + v.expr(x.X) + case *ast.AssignExpr: + // All assign ops (:= += -= *= /= %= ^=) are writes to Left's + // outer ident. Compound assigns also read, but disqualification + // is based on being written at all. + v.writeIdent(x.Left) + // Still walk Left in case of indexing: arr[i] := v — the ident + // arr is read (and we don't want to accidentally treat it as a + // write since writeIdent only triggers on a bare IdentExpr). + if _, isIdent := x.Left.(*ast.IdentExpr); !isIdent { + v.expr(x.Left) + } + v.expr(x.Right) + case *ast.CallExpr: + v.expr(x.Func) + v.exprs(x.Args) + case *ast.DotExpr: + v.expr(x.X) + case *ast.SendExpr: + v.expr(x.Object) + if x.MacroMethod != nil { + v.expr(x.MacroMethod) + } + v.exprs(x.Args) + case *ast.IndexExpr: + v.expr(x.X) + v.expr(x.Index) + case *ast.AliasExpr: + v.expr(x.Alias) + v.expr(x.Field) + case *ast.MacroExpr: + // Macros can expand to any name including writes. Bail. + v.abort() + case *ast.BlockExpr: + v.expr(x.Body) + case *ast.ArrayLitExpr: + v.exprs(x.Items) + case *ast.HashLitExpr: + v.exprs(x.Keys) + v.exprs(x.Values) + case *ast.IIfExpr: + v.expr(x.Cond) + v.expr(x.True) + v.expr(x.False) + case *ast.RefExpr: + // @ident — passes by reference; callee may mutate. + v.writeIdent(x.X) + v.expr(x.X) + case *ast.SliceExpr: + v.expr(x.X) + v.expr(x.Low) + v.expr(x.High) + case *ast.NilSafeExpr: + v.expr(x.X) + case *ast.InterpolatedString: + v.exprs(x.Parts) + default: + v.abort() + } +} + +func (v *constLocalVisitor) stmt(s ast.Stmt) { + if v.aborted || s == nil { + return + } + switch x := s.(type) { + case *ast.ExprStmt: + v.expr(x.X) + case *ast.ReturnStmt: + v.expr(x.Value) + case *ast.QOutStmt: + v.exprs(x.Exprs) + case *ast.IfStmt: + v.expr(x.Cond) + v.stmts(x.Body) + for _, ei := range x.ElseIfs { + v.expr(ei.Cond) + v.stmts(ei.Body) + } + v.stmts(x.ElseBody) + case *ast.DoWhileStmt: + v.expr(x.Cond) + v.stmts(x.Body) + case *ast.ForStmt: + v.writeName(x.Var) + v.expr(x.Start) + v.expr(x.To) + v.expr(x.Step) + v.stmts(x.Body) + case *ast.ForEachStmt: + v.writeName(x.Var) + v.expr(x.Collection) + v.stmts(x.Body) + case *ast.SwitchStmt: + v.expr(x.Expr) + for _, c := range x.Cases { + v.expr(c.Value) + v.stmts(c.Body) + } + v.stmts(x.Otherwise) + case *ast.SeqStmt: + v.stmts(x.Body) + if x.RecoverVar != "" { + v.writeName(x.RecoverVar) + } + v.stmts(x.RecoverBody) + case *ast.MultiAssignStmt: + for _, t := range x.Targets { + v.writeName(t) + } + v.exprs(x.Values) + case *ast.VarDecl: + // Init exprs are reads. The LOCAL name itself was already + // collected as a candidate by collectConstLocals; we don't + // treat its own init as a reassignment. + for _, vi := range x.Vars { + v.expr(vi.Init) + } + case *ast.DeferStmt: + v.expr(x.Call) + case *ast.ExitStmt, *ast.LoopStmt: + // no expression + case *ast.SkipCmd: + v.expr(x.Count) + case *ast.GoCmd: + v.expr(x.RecNo) + case *ast.SeekCmd: + v.expr(x.Key) + case *ast.UseCmd: + v.expr(x.File) + v.expr(x.AliasExpr) + case *ast.SelectCmd: + v.expr(x.Area) + case *ast.ReplaceCmd: + for _, f := range x.Fields { + v.expr(f.Field) + v.expr(f.Value) + } + case *ast.AppendCmd, *ast.DeleteCmd, *ast.ReadCmd: + // no expressions + case *ast.IndexCmd: + v.expr(x.KeyExpr) + v.expr(x.File) + v.expr(x.ForCond) + case *ast.SetCmd: + v.expr(x.Expr) + case *ast.AtSayCmd: + v.expr(x.Row) + v.expr(x.Col) + v.expr(x.SayExpr) + v.expr(x.Picture) + case *ast.AtGetCmd: + // @ GET var writes to Var at READ time. + v.writeIdent(x.Var) + if x.VarName != "" { + v.writeName(x.VarName) + } + v.expr(x.Row) + v.expr(x.Col) + v.expr(x.Picture) + v.expr(x.Valid) + v.expr(x.When) + case *ast.AtSayGetCmd: + v.writeIdent(x.Var) + if x.VarName != "" { + v.writeName(x.VarName) + } + v.expr(x.Row) + v.expr(x.Col) + v.expr(x.SayExpr) + v.expr(x.Picture) + v.expr(x.Valid) + v.expr(x.When) + default: + v.abort() + } +} diff --git a/compiler/gengo/gen_class.go b/compiler/gengo/gen_class.go index 4226f10..a16417b 100644 --- a/compiler/gengo/gen_class.go +++ b/compiler/gengo/gen_class.go @@ -50,7 +50,7 @@ func (g *Generator) emitClassDecl(cls *ast.ClassDecl) { for _, m := range cls.Members { if md, ok := m.(*ast.MethodDecl); ok { upperName := strings.ToUpper(md.Name) - goFuncName := fmt.Sprintf("HB_%s_%s", className, upperName) + goFuncName := fmt.Sprintf("FV_%s_%s", className, upperName) switch { case md.IsOperator: @@ -92,7 +92,7 @@ func (g *Generator) emitClassDecl(cls *ast.ClassDecl) { // Also need a constructor function: Person() returns new object // This is called as Person():New(...) - g.writeln(fmt.Sprintf("func HB_%s_CTOR(t *hbrt.Thread) {", className)) + g.writeln(fmt.Sprintf("func FV_%s_CTOR(t *hbrt.Thread) {", className)) g.indent++ g.writeln("t.Frame(0, 0)") g.writeln("defer t.EndProc()") @@ -105,13 +105,13 @@ func (g *Generator) emitClassDecl(cls *ast.ClassDecl) { // Constructor symbol already added in Generate() symbol collection phase } -// emitInlineMethodBody generates the HB__ function for +// emitInlineMethodBody generates the FV__ function for // an INLINE-declared method: the body is the single expression parsed // after the INLINE keyword, evaluated and returned. Params bind to // locals 1..N so the inline expression can reference them. func (g *Generator) emitInlineMethodBody(className string, md *ast.MethodDecl) { methodName := strings.ToUpper(md.Name) - goFuncName := fmt.Sprintf("HB_%s_%s", className, methodName) + goFuncName := fmt.Sprintf("FV_%s_%s", className, methodName) nParams := len(md.Params) g.writeln(fmt.Sprintf("func %s(t *hbrt.Thread) {", goFuncName)) @@ -147,7 +147,7 @@ func (g *Generator) emitMethodDeclStandalone(md *ast.MethodDecl) { className := strings.ToUpper(md.ClassName) methodName := strings.ToUpper(md.Name) - goFuncName := fmt.Sprintf("HB_%s_%s", className, methodName) + goFuncName := fmt.Sprintf("FV_%s_%s", className, methodName) nParams := len(md.Params) nLocals := 0 diff --git a/compiler/gengo/gengo.go b/compiler/gengo/gengo.go index f5ba26f..774131a 100644 --- a/compiler/gengo/gengo.go +++ b/compiler/gengo/gengo.go @@ -13,7 +13,7 @@ // package main // import ("five/hbrt"; "five/hbrtl") // var symbols = hbrt.NewModule(...) -// func HB_MAIN(t *hbrt.Thread) { ... } +// func FV_MAIN(t *hbrt.Thread) { ... } // func main() { vm := hbrt.NewVM(); ... vm.Run("MAIN") } package gengo @@ -57,7 +57,7 @@ type Generator struct { type symbolEntry struct { name string scope string // "hbrt.FsPublic|hbrt.FsLocal" etc. - fn string // Go function name: "HB_MAIN" + fn string // Go function name: "FV_MAIN" } // Generate converts an AST File into Go source code. @@ -96,14 +96,14 @@ func doGenerate(file *ast.File, debug, library bool) string { g.symbols = append(g.symbols, symbolEntry{ name: strings.ToUpper(decl.Name), scope: scope, - fn: "HB_" + strings.ToUpper(decl.Name), + fn: "FV_" + strings.ToUpper(decl.Name), }) case *ast.ClassDecl: className := strings.ToUpper(decl.Name) g.symbols = append(g.symbols, symbolEntry{ name: className, scope: "hbrt.FsPublic|hbrt.FsLocal", - fn: "HB_" + className + "_CTOR", + fn: "FV_" + className + "_CTOR", }) } } @@ -238,436 +238,6 @@ func (g *Generator) emitPushSymbol(name string) { g.writeln(fmt.Sprintf("t.PushSymbol(t.GetSym(&%s, %q))", v, name)) } -// negateLiteral produces a new literal that represents -lit. Handles -// INT and DOUBLE (as a textual prefix). Returns (nil, false) for -// non-numeric literals or an already-negative INT whose negation would -// overflow int64. -func negateLiteral(lit *ast.LiteralExpr) (*ast.LiteralExpr, bool) { - switch lit.Kind { - case token.INT: - n, err := strconv.ParseInt(lit.Value, 10, 64) - if err != nil { - return nil, false - } - // Guard: math.MinInt64 has no positive twin — let the VM's - // runtime coerce-to-double path handle it. - if n == -1<<63 { - return nil, false - } - return &ast.LiteralExpr{ - ValuePos: lit.ValuePos, - Kind: token.INT, - Value: strconv.FormatInt(-n, 10), - }, true - case token.DOUBLE: - // Syntactically prefix `-` or flip an existing leading `-`. - if strings.HasPrefix(lit.Value, "-") { - return &ast.LiteralExpr{ - ValuePos: lit.ValuePos, - Kind: token.DOUBLE, - Value: lit.Value[1:], - }, true - } - return &ast.LiteralExpr{ - ValuePos: lit.ValuePos, - Kind: token.DOUBLE, - Value: "-" + lit.Value, - }, true - } - return nil, false -} - -// foldLiteralTree recursively folds BinaryExpr subtrees into LiteralExpr -// where both operands eventually collapse to literals. Non-foldable -// subtrees come back unchanged. Used as a preorder pre-pass so the -// caller can look at a flat LITERAL + LITERAL pair. -// -// For left-associative string-concat chains like "a" + x + "b" + "c", -// the parser builds (((("a" + x) + "b") + "c")) and no pair is -// literal+literal. We reassociate: if the LHS is `Y + strlit` and the -// RHS is a string literal, rewrite as `Y + (strlit+rhslit)` so the -// tail literals collapse. Only safe for STRING+STRING (numeric `+` -// cares about types / overflow). -func foldLiteralTree(e ast.Expr) ast.Expr { - be, ok := e.(*ast.BinaryExpr) - if !ok { - return e - } - be.Left = foldLiteralTree(be.Left) - be.Right = foldLiteralTree(be.Right) - if folded, ok := tryFoldBinary(be); ok { - return folded - } - // String-concat reassociation for left-leaning chains. - if be.Op == token.PLUS { - if rLit, ok := be.Right.(*ast.LiteralExpr); ok && rLit.Kind == token.STRING { - if lBin, ok := be.Left.(*ast.BinaryExpr); ok && lBin.Op == token.PLUS { - if mLit, ok := lBin.Right.(*ast.LiteralExpr); ok && mLit.Kind == token.STRING { - fused := &ast.LiteralExpr{ - ValuePos: mLit.ValuePos, - Kind: token.STRING, - Value: mLit.Value + rLit.Value, - } - return &ast.BinaryExpr{ - OpPos: be.OpPos, - Op: token.PLUS, - Left: lBin.Left, - Right: fused, - } - } - } - } - } - return be -} - -// tryFoldBinary returns a synthetic LiteralExpr when both operands of a -// BinaryExpr are themselves literals and the operator is one the -// folder recognises. INT+INT stays INT (with overflow falling through -// to the VM path), mixed numeric falls to double, STRING+STRING -// concatenates. Non-literal operands or unsupported op → (nil, false). -func tryFoldBinary(e *ast.BinaryExpr) (*ast.LiteralExpr, bool) { - l, lok := e.Left.(*ast.LiteralExpr) - r, rok := e.Right.(*ast.LiteralExpr) - if !lok || !rok { - return nil, false - } - switch e.Op { - case token.PLUS, token.MINUS, token.STAR, token.SLASH: - default: - return nil, false - } - // INT + INT — keep int exact result. - if l.Kind == token.INT && r.Kind == token.INT { - li, errL := strconv.ParseInt(l.Value, 10, 64) - ri, errR := strconv.ParseInt(r.Value, 10, 64) - if errL != nil || errR != nil { - return nil, false - } - var result int64 - var overflowed bool - switch e.Op { - case token.PLUS: - result = li + ri - // Harbour overflow discipline: fall through to VM on overflow - if (ri >= 0 && result < li) || (ri < 0 && result > li) { - overflowed = true - } - case token.MINUS: - result = li - ri - if (ri <= 0 && result < li) || (ri > 0 && result > li) { - overflowed = true - } - case token.STAR: - if li == 0 || ri == 0 { - result = 0 - } else { - result = li * ri - if result/li != ri { - overflowed = true - } - } - case token.SLASH: - // Harbour SLASH always yields double even for int inputs. - return nil, false - } - if overflowed { - return nil, false - } - return &ast.LiteralExpr{ - ValuePos: l.ValuePos, - Kind: token.INT, - Value: strconv.FormatInt(result, 10), - }, true - } - // STRING + STRING — concatenate. Preserves the quoting style of the - // left literal so DateExpr and other quoting-sensitive kinds don't - // change shape. - if e.Op == token.PLUS && l.Kind == token.STRING && r.Kind == token.STRING { - return &ast.LiteralExpr{ - ValuePos: l.ValuePos, - Kind: token.STRING, - Value: l.Value + r.Value, - }, true - } - return nil, false -} - -// collectConstLocals returns a map of LOCAL names (uppercase) whose -// only assignment is a literal initializer — these can be propagated -// inline. Any reassignment, ++/--, += family, @byref, MultiAssignStmt -// target, FOR/FOREACH loop var, or AtGet target disqualifies the name. -// -// The walker is bounded: if it encounters a macro expansion or any -// AST node it doesn't recognise, it aborts and returns an empty map. -// Correctness trumps coverage — an unrecognised node might hide a -// write, so we refuse to propagate. -func collectConstLocals(fn *ast.FuncDecl) map[string]*ast.LiteralExpr { - v := &constLocalVisitor{ - candidates: map[string]*ast.LiteralExpr{}, - } - // Seed candidates from top-level LOCAL decls with literal init. - for _, d := range fn.Decls { - vd, ok := d.(*ast.VarDecl) - if !ok || vd.Scope != ast.ScopeLocal { - continue - } - for _, vi := range vd.Vars { - if vi.Init == nil { - continue - } - if lit, ok := vi.Init.(*ast.LiteralExpr); ok { - v.candidates[strings.ToUpper(vi.Name)] = lit - } - } - } - if len(v.candidates) == 0 { - return nil - } - // Params are writable even without explicit assignment (by-value - // but reassignable) — disqualify any candidate that shadows a param. - // Params come from a separate slot but guard in case of odd decls. - for _, p := range fn.Params { - delete(v.candidates, strings.ToUpper(p.Name)) - } - for _, st := range fn.Body { - v.stmt(st) - if v.aborted { - return nil - } - } - if len(v.candidates) == 0 { - return nil - } - return v.candidates -} - -type constLocalVisitor struct { - candidates map[string]*ast.LiteralExpr - aborted bool -} - -func (v *constLocalVisitor) abort() { - v.aborted = true - v.candidates = nil -} - -func (v *constLocalVisitor) writeIdent(e ast.Expr) { - if id, ok := e.(*ast.IdentExpr); ok { - delete(v.candidates, strings.ToUpper(id.Name)) - } -} - -func (v *constLocalVisitor) writeName(name string) { - delete(v.candidates, strings.ToUpper(name)) -} - -func (v *constLocalVisitor) exprs(es []ast.Expr) { - for _, e := range es { - v.expr(e) - } -} - -func (v *constLocalVisitor) stmts(ss []ast.Stmt) { - for _, s := range ss { - v.stmt(s) - } -} - -func (v *constLocalVisitor) expr(e ast.Expr) { - if v.aborted || e == nil { - return - } - switch x := e.(type) { - case *ast.LiteralExpr, *ast.IdentExpr, *ast.SelfExpr: - // leaf; reads don't disqualify - case *ast.BinaryExpr: - v.expr(x.Left) - v.expr(x.Right) - case *ast.UnaryExpr: - if x.Op == token.INC || x.Op == token.DEC { - v.writeIdent(x.X) - } - v.expr(x.X) - case *ast.PostfixExpr: - v.writeIdent(x.X) - v.expr(x.X) - case *ast.AssignExpr: - // All assign ops (:= += -= *= /= %= ^=) are writes to Left's - // outer ident. Compound assigns also read, but disqualification - // is based on being written at all. - v.writeIdent(x.Left) - // Still walk Left in case of indexing: arr[i] := v — the ident - // arr is read (and we don't want to accidentally treat it as a - // write since writeIdent only triggers on a bare IdentExpr). - if _, isIdent := x.Left.(*ast.IdentExpr); !isIdent { - v.expr(x.Left) - } - v.expr(x.Right) - case *ast.CallExpr: - v.expr(x.Func) - v.exprs(x.Args) - case *ast.DotExpr: - v.expr(x.X) - case *ast.SendExpr: - v.expr(x.Object) - if x.MacroMethod != nil { - v.expr(x.MacroMethod) - } - v.exprs(x.Args) - case *ast.IndexExpr: - v.expr(x.X) - v.expr(x.Index) - case *ast.AliasExpr: - v.expr(x.Alias) - v.expr(x.Field) - case *ast.MacroExpr: - // Macros can expand to any name including writes. Bail. - v.abort() - case *ast.BlockExpr: - v.expr(x.Body) - case *ast.ArrayLitExpr: - v.exprs(x.Items) - case *ast.HashLitExpr: - v.exprs(x.Keys) - v.exprs(x.Values) - case *ast.IIfExpr: - v.expr(x.Cond) - v.expr(x.True) - v.expr(x.False) - case *ast.RefExpr: - // @ident — passes by reference; callee may mutate. - v.writeIdent(x.X) - v.expr(x.X) - case *ast.SliceExpr: - v.expr(x.X) - v.expr(x.Low) - v.expr(x.High) - case *ast.NilSafeExpr: - v.expr(x.X) - case *ast.InterpolatedString: - v.exprs(x.Parts) - default: - v.abort() - } -} - -func (v *constLocalVisitor) stmt(s ast.Stmt) { - if v.aborted || s == nil { - return - } - switch x := s.(type) { - case *ast.ExprStmt: - v.expr(x.X) - case *ast.ReturnStmt: - v.expr(x.Value) - case *ast.QOutStmt: - v.exprs(x.Exprs) - case *ast.IfStmt: - v.expr(x.Cond) - v.stmts(x.Body) - for _, ei := range x.ElseIfs { - v.expr(ei.Cond) - v.stmts(ei.Body) - } - v.stmts(x.ElseBody) - case *ast.DoWhileStmt: - v.expr(x.Cond) - v.stmts(x.Body) - case *ast.ForStmt: - v.writeName(x.Var) - v.expr(x.Start) - v.expr(x.To) - v.expr(x.Step) - v.stmts(x.Body) - case *ast.ForEachStmt: - v.writeName(x.Var) - v.expr(x.Collection) - v.stmts(x.Body) - case *ast.SwitchStmt: - v.expr(x.Expr) - for _, c := range x.Cases { - v.expr(c.Value) - v.stmts(c.Body) - } - v.stmts(x.Otherwise) - case *ast.SeqStmt: - v.stmts(x.Body) - if x.RecoverVar != "" { - v.writeName(x.RecoverVar) - } - v.stmts(x.RecoverBody) - case *ast.MultiAssignStmt: - for _, t := range x.Targets { - v.writeName(t) - } - v.exprs(x.Values) - case *ast.VarDecl: - // Init exprs are reads. The LOCAL name itself was already - // collected as a candidate by collectConstLocals; we don't - // treat its own init as a reassignment. - for _, vi := range x.Vars { - v.expr(vi.Init) - } - case *ast.DeferStmt: - v.expr(x.Call) - case *ast.ExitStmt, *ast.LoopStmt: - // no expression - case *ast.SkipCmd: - v.expr(x.Count) - case *ast.GoCmd: - v.expr(x.RecNo) - case *ast.SeekCmd: - v.expr(x.Key) - case *ast.UseCmd: - v.expr(x.File) - v.expr(x.AliasExpr) - case *ast.SelectCmd: - v.expr(x.Area) - case *ast.ReplaceCmd: - for _, f := range x.Fields { - v.expr(f.Field) - v.expr(f.Value) - } - case *ast.AppendCmd, *ast.DeleteCmd, *ast.ReadCmd: - // no expressions - case *ast.IndexCmd: - v.expr(x.KeyExpr) - v.expr(x.File) - v.expr(x.ForCond) - case *ast.SetCmd: - v.expr(x.Expr) - case *ast.AtSayCmd: - v.expr(x.Row) - v.expr(x.Col) - v.expr(x.SayExpr) - v.expr(x.Picture) - case *ast.AtGetCmd: - // @ GET var writes to Var at READ time. - v.writeIdent(x.Var) - if x.VarName != "" { - v.writeName(x.VarName) - } - v.expr(x.Row) - v.expr(x.Col) - v.expr(x.Picture) - v.expr(x.Valid) - v.expr(x.When) - case *ast.AtSayGetCmd: - v.writeIdent(x.Var) - if x.VarName != "" { - v.writeName(x.VarName) - } - v.expr(x.Row) - v.expr(x.Col) - v.expr(x.SayExpr) - v.expr(x.Picture) - v.expr(x.Valid) - v.expr(x.When) - default: - v.abort() - } -} // emitSymCache writes the package-level `var _sym_NAME *hbrt.Symbol` // declarations discovered during body emission. Called after all @@ -941,7 +511,7 @@ func (g *Generator) emitTopLevelStatic(vd *ast.VarDecl) { } func (g *Generator) emitFuncDecl(fn *ast.FuncDecl) { - goName := "HB_" + strings.ToUpper(fn.Name) + goName := "FV_" + strings.ToUpper(fn.Name) // Emit function-level STATIC variables as package-level Go vars. // Harbour: STATIC inside FUNCTION persists across calls but is @@ -1003,6 +573,13 @@ func (g *Generator) emitFuncDecl(fn *ast.FuncDecl) { // Build local map FIRST (needed for init expressions that reference params) g.curLocals = g.buildLocalMap(fn) + // Also include mid-function LOCALs so their names show up in the + // debugger / error.log for any frame that has executed past the decl. + { + idx := len(g.curLocals) + 1 + scanBodyLocals(fn.Body, g.curLocals, &idx) + } + g.emitLocalNames(fn.Name, g.curLocals) // Scan for LOCALs that are literal-initialised and never reassigned // so reads can be constant-propagated at emit time. g.constLocals = collectConstLocals(fn) @@ -1059,6 +636,41 @@ func (g *Generator) emitFuncDecl(fn *ast.FuncDecl) { type localMap map[string]int +// emitLocalNames tells the runtime the PRG-level names of this frame's +// params+locals, so the debugger and error.log can show "i, nSum" +// instead of "_1, _2". Emitted as an inline slice literal — one +// allocation per call (24-byte slice header); string literals are +// interned so the underlying data sits in the binary's data segment. +func (g *Generator) emitLocalNames(funcName string, locals localMap) { + if len(locals) == 0 { + return + } + // Reverse the map: slot index → PRG name. + names := make([]string, len(locals)) + for name, idx := range locals { + if idx >= 1 && idx <= len(names) { + names[idx-1] = name + } + } + // Drop trailing empty slots (defensive — map may have holes). + for len(names) > 0 && names[len(names)-1] == "" { + names = names[:len(names)-1] + } + if len(names) == 0 { + return + } + var sb strings.Builder + sb.WriteString("t.SetLocalNames([]string{") + for i, n := range names { + if i > 0 { + sb.WriteString(", ") + } + sb.WriteString(strconv.Quote(n)) + } + sb.WriteString("})") + g.writeln(sb.String()) +} + func (g *Generator) buildLocalMap(fn *ast.FuncDecl) localMap { m := make(localMap) idx := 1 @@ -1117,1254 +729,6 @@ func scanBodyLocals(stmts []ast.Stmt, m localMap, idx *int) { } } -// --- Statement emission --- - -func (g *Generator) emitStmt(stmt ast.Stmt, locals localMap) { - // Emit debug line hook - if g.Debug && stmt.Pos().Line > 0 { - g.writeln(fmt.Sprintf("t.DebugLine(%q, %d)", g.file.Name, stmt.Pos().Line)) - } - - switch s := stmt.(type) { - case *ast.ReturnStmt: - if len(s.Values) > 1 { - // Multi-return: RETURN a, b, c → push array of values - for _, v := range s.Values { - g.emitExpr(v) - } - g.writeln(fmt.Sprintf("t.ArrayGen(%d)", len(s.Values))) - g.writeln("t.RetValue()") - } else if s.Value != nil { - g.emitExpr(s.Value) - g.writeln("t.RetValue()") - } else { - g.writeln("t.RetNil()") - } - g.writeln("return") // Go return to exit function immediately - - case *ast.QOutStmt: - g.emitQOut(s, locals) - - case *ast.ExprStmt: - g.emitExprStmt(s, locals) - - case *ast.IfStmt: - g.emitIf(s, locals) - - case *ast.SwitchStmt: - g.emitSwitch(s, locals) - - case *ast.DoWhileStmt: - g.emitDoWhile(s, locals) - - case *ast.ForStmt: - g.emitFor(s, locals) - - case *ast.ForEachStmt: - g.emitForEach(s, locals) - - case *ast.ExitStmt: - g.writeln("break") - - case *ast.LoopStmt: - if g.curForLabel != "" { - // Inside FOR..NEXT: goto label before increment (continue would skip it) - g.writeln("goto " + g.curForLabel) - } else { - g.writeln("continue") - } - - case *ast.MultiAssignStmt: - g.emitMultiAssign(s, locals) - - case *ast.DeferStmt: - g.emitDefer(s, locals) - - case *ast.VarDecl: - // LOCAL in mid-function or PRIVATE/PUBLIC - g.emitMidVarDecl(s, locals) - - // xBase commands — generate calls to hbrdd WorkAreaManager - case *ast.UseCmd: - g.emitUseCmd(s, locals) - case *ast.GoCmd: - g.emitGoCmd(s) - case *ast.SkipCmd: - g.emitSkipCmd(s, locals) - case *ast.SeekCmd: - g.emitSeekCmd(s, locals) - case *ast.ReplaceCmd: - g.emitReplaceCmd(s, locals) - case *ast.AppendCmd: - if g.hoistedFields != nil { - // Use hoisted area variable - g.writeln("if _rarea != nil { _rarea.Append() }") - } else { - g.writeln("{ _wa := t.WA.(*hbrdd.WorkAreaManager)") - g.writeln("if _area := _wa.Current(); _area != nil { _area.Append() } }") - } - case *ast.DeleteCmd: - if g.hoistedDW || g.hoistedFields != nil { - g.writeln(fmt.Sprintf("if %s != nil { %s.Delete() }", g.hoistedAreaVar(), g.hoistedAreaVar())) - } else { - g.writeln("{ _wa := t.WA.(*hbrdd.WorkAreaManager)") - g.writeln("if _area := _wa.Current(); _area != nil { _area.Delete() } }") - } - case *ast.SelectCmd: - g.emitExpr(s.Area) - g.writeln("{ _wa := t.WA.(*hbrdd.WorkAreaManager); _v := t.Pop2()") - g.writeln("if _v.IsNumeric() { _wa.Select(int(_v.AsNumInt())) } else { _wa.Select(_v.AsString()) } }") - case *ast.IndexCmd: - g.writeln("{") - g.indent++ - g.writeln("wa := t.WA.(*hbrdd.WorkAreaManager)") - g.writeln("if area := wa.Current(); area != nil {") - g.indent++ - g.writeln("if idx, ok := area.(hbrdd.Indexer); ok {") - g.indent++ - keyStr := exprToString(s.KeyExpr) - g.writeln(fmt.Sprintf("_keyExpr := %q", keyStr)) - - // File expression: if it contains a function call, evaluate at - // runtime — Harbour `INDEX ON ... TO ( cExpr )` semantics. Prior - // behavior was static exprToString which serialized calls like - // `Lower(cTable) + "_pk.ntx"` into the literal filename string. - // Detect via containsCall; preserve static path for simple - // `test.ntx` style identifiers. - if containsCall(s.File) { - g.emitExpr(s.File) - g.writeln("_file := t.Pop2().AsString()") - } else { - fileStr := exprToString(s.File) - g.writeln(fmt.Sprintf("_file := %q", fileStr)) - } - forExpr := `""` - if s.ForCond != nil { - forExpr = fmt.Sprintf("%q", exprToString(s.ForCond)) - } - - // Emit compiled key evaluator as Go closure. - // This inlines the AST of the key expression into native Go code, - // eliminating per-record MacroEval string parsing + symbol lookup. - // In INDEX context, bare identifiers are FIELD names (not locals). - g.writeln("_keyFunc := func() hbrt.Value {") - g.indent++ - g.emitIndexKeyExpr(s.KeyExpr) - g.writeln("return t.Pop2()") - g.indent-- - g.writeln("}") - - // Still set MacroEval fallback for evalKeyExprInner (used for keyLen sampling) - g.writeln("dbf.KeyEvalFunc = func(expr string) hbrt.Value { return t.MacroEval(expr) }") - g.writeln(fmt.Sprintf("idx.OrderCreate(hbrdd.OrderCreateParams{KeyExpr: _keyExpr, FilePath: _file, ForExpr: %s, TagName: %q, Unique: %v, Descending: %v, KeyFunc: _keyFunc})", - forExpr, s.TagName, s.Unique, s.Descending)) - g.writeln("dbf.KeyEvalFunc = nil") - g.indent-- - g.writeln("}") - g.indent-- - g.writeln("}") - g.indent-- - g.writeln("}") - case *ast.SetCmd: - upper := strings.ToUpper(s.Setting) - - // Boolean SET toggles — call RTL Set function, no workarea needed - setFuncMap := map[string]string{ - "DELETED": "SETDELETED", - "EXACT": "SETEXACT", - "SOFTSEEK": "SETSOFTSEEK", - "EXCLUSIVE": "SETEXCLUSIVE", - "FIXED": "SETFIXED", - "CANCEL": "SETCANCEL", - "BELL": "SETBELL", - "CONFIRM": "SETCONFIRM", - "INSERT": "SETINSERT", - "ESCAPE": "SETESCAPE", - "WRAP": "SETWRAP", - } - if funcName, ok := setFuncMap[upper]; ok { - onOff := strings.ToUpper(s.Extra) - if onOff == "ON" || onOff == "OFF" { - val := "true" - if onOff == "OFF" { - val = "false" - } - g.emitPushSymbol(funcName) - g.writeln("t.PushNil()") - g.writeln(fmt.Sprintf("t.PushBool(%s)", val)) - g.writeln("t.Do(1)") - } - break - } - - // Value SET commands — SET DATE/DECIMALS/EPOCH TO expr - valueFuncMap := map[string]string{ - "DATE": "__SETDATEFORMAT", - "DECIMALS": "SETDECIMALS", - "EPOCH": "SETEPOCH", - } - if funcName, ok := valueFuncMap[upper]; ok && s.Expr != nil { - g.emitPushSymbol(funcName) - g.writeln("t.PushNil()") - g.emitExpr(s.Expr) - g.writeln("t.Do(1)") - break - } - - // Workarea-specific SET commands - g.writeln("{") - g.indent++ - g.writeln("wa := t.WA.(*hbrdd.WorkAreaManager)") - g.writeln("if area := wa.Current(); area != nil {") - g.indent++ - switch upper { - case "FILTER": - if s.Expr != nil { - g.emitExpr(s.Expr) - g.writeln(`area.SetFilter(t.Pop2().AsString(), nil)`) - } else { - g.writeln("area.ClearFilter()") - } - case "ORDER": - if s.Expr != nil { - g.writeln("if idx, ok := area.(hbrdd.Indexer); ok {") - g.indent++ - g.emitExpr(s.Expr) - g.writeln(`{ _ov := t.Pop2(); var _os string; if _ov.IsNumeric() { _os = hbrt.NtoS(_ov.AsNumInt()) } else { _os = _ov.AsString() }; idx.OrderListFocus(_os) }`) - g.indent-- - g.writeln("}") - } - case "INDEX": - if s.Expr != nil { - fileStr := exprToString(s.Expr) - g.writeln("if idx, ok := area.(hbrdd.Indexer); ok {") - g.indent++ - if fileStr != "" { - // SET INDEX TO a, b, c — split comma-separated file names - // and call OrderListAdd for each. Harbour loads all NTX - // files into the active index list. - clean := fileStr - if len(clean) >= 2 && clean[0] == '"' && clean[len(clean)-1] == '"' { - clean = clean[1 : len(clean)-1] - } - parts := strings.Split(clean, ",") - for _, p := range parts { - p = strings.TrimSpace(p) - if p != "" { - g.writeln(fmt.Sprintf(`idx.OrderListAdd(%q)`, p)) - } - } - } else { - g.emitExpr(s.Expr) - g.writeln(`idx.OrderListAdd(t.Pop2().AsString())`) - } - g.indent-- - g.writeln("}") - } else { - g.writeln("if idx, ok := area.(hbrdd.Indexer); ok { idx.OrderListClear() }") - } - default: - g.writeln(fmt.Sprintf("// SET %s: not yet implemented", upper)) - } - g.indent-- - g.writeln("}") - g.indent-- - g.writeln("}") - - case *ast.SeqStmt: - g.emitBeginSequence(s, locals) - - case *ast.AtSayCmd: - g.emitAtSayCmd(s) - case *ast.AtGetCmd: - g.emitAtGetCmd(s, locals) - case *ast.AtSayGetCmd: - g.emitAtSayGetCmd(s, locals) - case *ast.ReadCmd: - g.emitReadCmd(s, locals) - - default: - g.writeln(fmt.Sprintf("// WARN: unhandled statement type %T — skipped", stmt)) - } -} - -func (g *Generator) emitMidVarDecl(s *ast.VarDecl, locals localMap) { - for _, v := range s.Vars { - idx, found := locals[strings.ToUpper(v.Name)] - if !found { - maxIdx := 0 - for _, i := range locals { - if i > maxIdx { - maxIdx = i - } - } - idx = maxIdx + 1 - locals[strings.ToUpper(v.Name)] = idx - } - if v.Init != nil { - if _, isConst := g.constLocals[strings.ToUpper(v.Name)]; !isConst { - g.emitExpr(v.Init) - g.writeln(fmt.Sprintf("t.PopLocalFast(%d)", idx)) - } - } - } -} - -func (g *Generator) emitQOut(s *ast.QOutStmt, locals localMap) { - sym := "QOUT" - if s.IsQQ { - sym = "QQOUT" - } - g.emitPushSymbol(sym) - g.writeln("t.PushNil()") - for _, expr := range s.Exprs { - g.emitExpr(expr) - } - g.writeln(fmt.Sprintf("t.Function(%d)", len(s.Exprs))) -} - -func (g *Generator) emitExprStmt(s *ast.ExprStmt, locals localMap) { - // Check if it's an assignment - if assign, ok := s.X.(*ast.AssignExpr); ok { - g.emitAssign(assign, locals) - return - } - // Check if it's a function call (discard result) - if call, ok := s.X.(*ast.CallExpr); ok { - g.emitCallAsStmt(call, locals) - return - } - // Bare identifier as statement (e.g., CLS, CLEAR) — treat as zero-arg function call - if ident, ok := s.X.(*ast.IdentExpr); ok { - if _, found := locals[strings.ToUpper(ident.Name)]; !found { - g.emitPushSymbol(strings.ToUpper(ident.Name)) - g.writeln("t.PushNil()") - g.writeln("t.Do(0)") - return - } - } - // Postfix ++/-- - if pf, ok := s.X.(*ast.PostfixExpr); ok { - // Local variable: n++ - if ident, ok := pf.X.(*ast.IdentExpr); ok { - upper := strings.ToUpper(ident.Name) - if idx, found := locals[upper]; found { - if pf.Op == token.INC { - g.writeln(fmt.Sprintf("t.LocalAddInt(%d, 1)", idx)) - } else { - g.writeln(fmt.Sprintf("t.LocalAddInt(%d, -1)", idx)) - } - return - } - // STATIC variable: s_nPass++ - if goVar, found := g.staticVars[upper]; found { - delta := "1" - if pf.Op == token.DEC { - delta = "-1" - } - g.writeln(fmt.Sprintf("{ _v := %s.AsNumInt() + %s; %s = hbrt.MakeInt(int(_v)) }", goVar, delta, goVar)) - return - } - } - // Self field: ::field++ - if send, ok := pf.X.(*ast.SendExpr); ok { - if _, isSelf := send.Object.(*ast.SelfExpr); isSelf { - fieldName := strings.ToUpper(send.Method) - g.writeln(fmt.Sprintf("t.PushSelfField(%q)", fieldName)) - if pf.Op == token.INC { - g.writeln("t.PushInt(1)") - g.writeln("t.Plus()") - } else { - g.writeln("t.PushInt(1)") - g.writeln("t.Minus()") - } - g.writeln(fmt.Sprintf("t.SetSelfField(%q)", fieldName)) - return - } - } - } - // General expression (result on stack, pop it) - g.emitExpr(s.X) - g.writeln("t.Pop()") -} - -func (g *Generator) emitAssign(a *ast.AssignExpr, locals localMap) { - // Check for arr[idx] := value (array index assignment) - if idx, ok := a.Left.(*ast.IndexExpr); ok { - if a.Op == token.ASSIGN { - g.emitExpr(idx.X) // array - g.emitExpr(idx.Index) // index - g.emitExpr(a.Right) // value - g.writeln("t.ArrayPop()") // set array[index] = value - return - } - } - - // Check for obj:field := value (object field assignment) - if send, ok := a.Left.(*ast.SendExpr); ok { - _, isSelf := send.Object.(*ast.SelfExpr) - - if isSelf { - fieldName := strings.ToUpper(send.Method) - switch a.Op { - case token.ASSIGN: - g.emitExpr(a.Right) - g.writeln(fmt.Sprintf("t.SetSelfField(%q)", fieldName)) - case token.PLUSEQ: - g.writeln(fmt.Sprintf("t.PushSelfField(%q)", fieldName)) - g.emitExpr(a.Right) - g.writeln("t.Plus()") - g.writeln(fmt.Sprintf("t.SetSelfField(%q)", fieldName)) - case token.MINUSEQ: - g.writeln(fmt.Sprintf("t.PushSelfField(%q)", fieldName)) - g.emitExpr(a.Right) - g.writeln("t.Minus()") - g.writeln(fmt.Sprintf("t.SetSelfField(%q)", fieldName)) - case token.STAREQ: - g.writeln(fmt.Sprintf("t.PushSelfField(%q)", fieldName)) - g.emitExpr(a.Right) - g.writeln("t.Mult()") - g.writeln(fmt.Sprintf("t.SetSelfField(%q)", fieldName)) - case token.SLASHEQ: - g.writeln(fmt.Sprintf("t.PushSelfField(%q)", fieldName)) - g.emitExpr(a.Right) - g.writeln("t.Divide()") - g.writeln(fmt.Sprintf("t.SetSelfField(%q)", fieldName)) - default: - g.emitExpr(a.Right) - g.writeln(fmt.Sprintf("t.SetSelfField(%q)", fieldName)) - } - return - } - - // Non-self: obj:field := value → obj:_FIELD(value) - if a.Op == token.ASSIGN { - g.emitExpr(send.Object) - g.emitExpr(a.Right) - g.writeln(fmt.Sprintf("t.Send(%q, 1)", "_"+strings.ToUpper(send.Method))) - g.writeln("t.Pop() // discard setter result") - return - } - } - - // Check for alias->field := value (FIELD->NAME := value) - if aliasExpr, ok := a.Left.(*ast.AliasExpr); ok { - if aliasIdent, ok2 := aliasExpr.Alias.(*ast.IdentExpr); ok2 { - if fieldIdent, ok3 := aliasExpr.Field.(*ast.IdentExpr); ok3 { - upper := strings.ToUpper(aliasIdent.Name) - // `M->name := v` / `MEMVAR->name := v` are memvar writes, - // not workarea field writes. - if upper == "M" || upper == "MEMVAR" { - g.emitExpr(a.Right) - g.writeln(fmt.Sprintf(`t.PopMemvar(%q)`, fieldIdent.Name)) - return - } - g.emitExpr(a.Right) - g.writeln(fmt.Sprintf(`{ _wa := t.WA.(*hbrdd.WorkAreaManager); _wa.SetAliasField(%q, %q, t.Pop2()) }`, aliasIdent.Name, fieldIdent.Name)) - return - } - } - } - - if ident, ok := a.Left.(*ast.IdentExpr); ok { - if idx, found := locals[strings.ToUpper(ident.Name)]; found { - switch a.Op { - case token.ASSIGN: - // Peephole: `x := x + ` / `x := x - ` → - // LocalAdd. Same result as `x += ` but lets the - // PRG side use the explicit form without penalty. - if be, ok := a.Right.(*ast.BinaryExpr); ok && - (be.Op == token.PLUS || be.Op == token.MINUS) { - if lid, isIdent := be.Left.(*ast.IdentExpr); isIdent { - if selfIdx, found := locals[strings.ToUpper(lid.Name)]; found && selfIdx == idx { - g.emitExpr(be.Right) - if be.Op == token.MINUS { - g.writeln("t.Negate()") - } - g.writeln(fmt.Sprintf("t.LocalAdd(%d)", idx)) - return - } - } - } - g.emitExpr(a.Right) - g.writeln(fmt.Sprintf("t.PopLocalFast(%d)", idx)) - case token.PLUSEQ: - g.emitExpr(a.Right) - g.writeln(fmt.Sprintf("t.LocalAdd(%d)", idx)) - case token.MINUSEQ: - g.emitExpr(a.Right) - g.writeln("t.Negate()") - g.writeln(fmt.Sprintf("t.LocalAdd(%d)", idx)) - default: - // General compound: push local, push right, op, pop local - g.writeln(fmt.Sprintf("t.PushLocalFast(%d)", idx)) - g.emitExpr(a.Right) - g.emitBinaryOp(a.Op) - g.writeln(fmt.Sprintf("t.PopLocalFast(%d)", idx)) - } - return - } - // Check module-level or function-level STATIC variable - upper := strings.ToUpper(ident.Name) - if goVar, found := g.staticVars[upper]; found { - switch a.Op { - case token.ASSIGN: - g.emitExpr(a.Right) - g.writeln(fmt.Sprintf("%s = t.Pop2()", goVar)) - case token.PLUSEQ: - g.writeln(fmt.Sprintf("t.PushValue(%s)", goVar)) - g.emitExpr(a.Right) - g.writeln("t.Plus()") - g.writeln(fmt.Sprintf("%s = t.Pop2()", goVar)) - case token.MINUSEQ: - g.writeln(fmt.Sprintf("t.PushValue(%s)", goVar)) - g.emitExpr(a.Right) - g.writeln("t.Minus()") - g.writeln(fmt.Sprintf("%s = t.Pop2()", goVar)) - case token.STAREQ: - g.writeln(fmt.Sprintf("t.PushValue(%s)", goVar)) - g.emitExpr(a.Right) - g.writeln("t.Mult()") - g.writeln(fmt.Sprintf("%s = t.Pop2()", goVar)) - case token.SLASHEQ: - g.writeln(fmt.Sprintf("t.PushValue(%s)", goVar)) - g.emitExpr(a.Right) - g.writeln("t.Divide()") - g.writeln(fmt.Sprintf("%s = t.Pop2()", goVar)) - default: - g.writeln(fmt.Sprintf("t.PushValue(%s)", goVar)) - g.emitExpr(a.Right) - g.emitBinaryOp(a.Op) - g.writeln(fmt.Sprintf("%s = t.Pop2()", goVar)) - } - return - } - } - // Fallback: general assignment via stack - g.emitExpr(a.Right) - g.writeln("// WARN: complex assignment target — simplified") - g.writeln("t.Pop()") -} - -func (g *Generator) emitCallAsStmt(call *ast.CallExpr, locals localMap) { - if ident, ok := call.Func.(*ast.IdentExpr); ok { - g.emitPushSymbol(strings.ToUpper(ident.Name)) - } else { - g.emitExpr(call.Func) - } - g.writeln("t.PushNil()") - for _, arg := range call.Args { - g.emitExpr(arg) - } - g.writeln(fmt.Sprintf("t.Do(%d)", len(call.Args))) -} - -// boolLiteralValue returns (value, true) if e reduces to a .T./.F. -// literal at compile time. Sees through an outer `.NOT.` so expressions -// like `!.F.` also collapse. Used by emitIf to skip dead branches and -// by the AND/OR short-circuit emitter. -func boolLiteralValue(e ast.Expr) (bool, bool) { - if u, ok := e.(*ast.UnaryExpr); ok && u.Op == token.NOT { - if v, ok := boolLiteralValue(u.X); ok { - return !v, true - } - return false, false - } - lit, ok := e.(*ast.LiteralExpr) - if !ok { - return false, false - } - switch lit.Kind { - case token.TRUE: - return true, true - case token.FALSE: - return false, true - } - return false, false -} - -func (g *Generator) emitIf(s *ast.IfStmt, locals localMap) { - // Dead-branch elimination for literal conditions. An IF .T. collapses - // to its body; an IF .F. collapses to its first live ELSEIF/ELSE. - // We resolve the main Cond here and recurse on the remainder if it - // turns into a new IF chain. - if v, ok := boolLiteralValue(s.Cond); ok { - if v { - for _, stmt := range s.Body { - g.emitStmt(stmt, locals) - } - return - } - // IF .F. — scan ElseIfs for first non-.F. branch. - for i, ei := range s.ElseIfs { - if v2, ok2 := boolLiteralValue(ei.Cond); ok2 { - if v2 { - for _, stmt := range ei.Body { - g.emitStmt(stmt, locals) - } - return - } - continue // ELSEIF .F. — dead, skip - } - // Non-literal ELSEIF becomes the new IF head. - newIf := &ast.IfStmt{ - IfPos: ei.ElseIfPos, - Cond: ei.Cond, - Body: ei.Body, - ElseIfs: s.ElseIfs[i+1:], - ElseBody: s.ElseBody, - } - g.emitIf(newIf, locals) - return - } - // All ElseIfs were .F. — only ELSE body remains. - for _, stmt := range s.ElseBody { - g.emitStmt(stmt, locals) - } - return - } - - // Main cond is dynamic. Still filter dead ELSEIFs (.F. removed; - // an ELSEIF .T. truncates the chain and becomes the ELSE). - elseIfs := s.ElseIfs - elseBody := s.ElseBody - if len(elseIfs) > 0 { - filtered := make([]*ast.ElseIfClause, 0, len(elseIfs)) - for _, ei := range elseIfs { - if v, ok := boolLiteralValue(ei.Cond); ok { - if v { - // ELSEIF .T. — chain stops here; body becomes ELSE. - elseBody = ei.Body - elseIfs = filtered - goto emit - } - continue // ELSEIF .F. — dead - } - filtered = append(filtered, ei) - } - elseIfs = filtered - } - -emit: - g.emitExpr(s.Cond) - g.writeln("if t.PopLogical() {") - g.indent++ - for _, stmt := range s.Body { - g.emitStmt(stmt, locals) - } - g.indent-- - - for _, ei := range elseIfs { - g.writeIndent() - g.write("} else {\n") - g.indent++ - g.emitExpr(ei.Cond) - g.writeln("if t.PopLogical() {") - g.indent++ - for _, stmt := range ei.Body { - g.emitStmt(stmt, locals) - } - g.indent-- - } - - if len(elseBody) > 0 { - g.writeln("} else {") - g.indent++ - for _, stmt := range elseBody { - g.emitStmt(stmt, locals) - } - g.indent-- - } - - g.writeln("}") - - // Close nested elseif braces - for range elseIfs { - g.writeln("}") - } -} - -func (g *Generator) emitDoWhile(s *ast.DoWhileStmt, locals localMap) { - // DO WHILE .F. — body is unreachable; emit nothing. - if v, ok := boolLiteralValue(s.Cond); ok && !v { - return - } - - // Detect RDD commands in body for WA hoisting - hasRDD := hasRDDCommands(s.Body) - safeToHoist := hasRDD && !hasWorkareaChange(s.Body) - - if safeToHoist && g.hoistedFields == nil { - g.writeln("{") - g.indent++ - g.writeln("_dwa := t.WA.(*hbrdd.WorkAreaManager)") - g.writeln("_darea := _dwa.Current()") - g.hoistedDW = true - } - - g.writeln("for {") - g.indent++ - // DO WHILE .T. — the idiomatic infinite loop. Skip the per-iteration - // PushBool/PopLogical; exit only through EXIT / LOOP / RETURN. - if v, ok := boolLiteralValue(s.Cond); !ok || !v { - g.emitExpr(s.Cond) - g.writeln("if !t.PopLogical() { break }") - } - for _, stmt := range s.Body { - g.emitStmt(stmt, locals) - } - g.indent-- - g.writeln("}") - - if safeToHoist && g.hoistedDW { - g.hoistedDW = false - g.indent-- - g.writeln("}") - } -} - -// hasRDDCommands checks if any statement is an RDD operation. -func hasRDDCommands(stmts []ast.Stmt) bool { - for _, s := range stmts { - switch s.(type) { - case *ast.SkipCmd, *ast.GoCmd, *ast.SeekCmd, - *ast.ReplaceCmd, *ast.AppendCmd, *ast.DeleteCmd: - return true - } - } - return false -} - -// hasWorkareaChange checks for USE/SELECT that would invalidate cached area. -func hasWorkareaChange(stmts []ast.Stmt) bool { - for _, s := range stmts { - switch v := s.(type) { - case *ast.UseCmd, *ast.SelectCmd: - return true - case *ast.IfStmt: - if hasWorkareaChange(v.Body) || hasWorkareaChange(v.ElseBody) { - return true - } - case *ast.DoWhileStmt: - if hasWorkareaChange(v.Body) { - return true - } - } - } - return false -} - -// collectSymbols scans AST for all symbol names referenced by function calls. -// Returns unique names for hoisting FindSymbol to function prologue. -func collectSymbols(stmts []ast.Stmt) []string { - seen := map[string]bool{} - var names []string - var walk func([]ast.Stmt) - var walkExpr func(ast.Expr) - - walkExpr = func(e ast.Expr) { - if e == nil { - return - } - switch v := e.(type) { - case *ast.CallExpr: - if ident, ok := v.Func.(*ast.IdentExpr); ok { - name := strings.ToUpper(ident.Name) - if !seen[name] { - seen[name] = true - names = append(names, name) - } - } - for _, a := range v.Args { - walkExpr(a) - } - case *ast.BinaryExpr: - walkExpr(v.Left) - walkExpr(v.Right) - case *ast.UnaryExpr: - walkExpr(v.X) - } - } - - walk = func(stmts []ast.Stmt) { - for _, s := range stmts { - switch v := s.(type) { - case *ast.ExprStmt: - walkExpr(v.X) - case *ast.ReturnStmt: - if v.Value != nil { - walkExpr(v.Value) - } - case *ast.IfStmt: - walkExpr(v.Cond) - walk(v.Body) - walk(v.ElseBody) - case *ast.ForStmt: - walk(v.Body) - case *ast.ForEachStmt: - walk(v.Body) - case *ast.DoWhileStmt: - walkExpr(v.Cond) - walk(v.Body) - case *ast.SeqStmt: - walk(v.Body) - walk(v.RecoverBody) - case *ast.SwitchStmt: - for _, c := range v.Cases { - walk(c.Body) - } - } - } - } - - walk(stmts) - return names -} - -// collectReplaceFields scans statements for REPLACE field names. -// Returns nil if unsafe to hoist (USE/SELECT/CLOSE found). -func collectReplaceFields(stmts []ast.Stmt) []string { - seen := map[string]bool{} - var fields []string - for _, s := range stmts { - switch v := s.(type) { - case *ast.ReplaceCmd: - for _, rf := range v.Fields { - if ident, ok := rf.Field.(*ast.IdentExpr); ok { - name := ident.Name - if !seen[name] { - seen[name] = true - fields = append(fields, name) - } - } - } - case *ast.UseCmd, *ast.SelectCmd: - return nil // workarea may change — unsafe to hoist - case *ast.IfStmt: - // Check nested blocks - if sub := collectReplaceFields(v.Body); sub == nil { - return nil - } - if sub := collectReplaceFields(v.ElseBody); sub == nil { - return nil - } - case *ast.DoWhileStmt: - if sub := collectReplaceFields(v.Body); sub == nil { - return nil - } - } - } - return fields -} - -// hasAppendInBody checks if any APPEND command exists in the statements. -func hasAppendInBody(stmts []ast.Stmt) bool { - for _, s := range stmts { - if _, ok := s.(*ast.AppendCmd); ok { - return true - } - } - return false -} - -func (g *Generator) emitFor(s *ast.ForStmt, locals localMap) { - idx, found := locals[strings.ToUpper(s.Var)] - if !found { - g.writeln("// ERROR: FOR variable not found in locals") - return - } - - // i := start - g.emitExpr(s.Start) - g.writeln(fmt.Sprintf("t.PopLocalFast(%d)", idx)) - - // Detect step direction for comparison - isNegStep := false - if s.Step != nil { - if lit, ok := s.Step.(*ast.LiteralExpr); ok { - if lit.Kind == token.INT && len(lit.Value) > 0 && lit.Value[0] == '-' { - isNegStep = true - } - } - if un, ok := s.Step.(*ast.UnaryExpr); ok && un.Op == token.MINUS { - isNegStep = true - } - } - - // Optimization: hoist WA/FieldIndex lookups outside FOR loop - // if body contains REPLACE and no USE/SELECT (safe to cache). - rddFields := collectReplaceFields(s.Body) - hoistRDD := len(rddFields) > 0 && hasAppendInBody(s.Body) - - if hoistRDD { - g.writeln("{") - g.indent++ - g.writeln("_rwa := t.WA.(*hbrdd.WorkAreaManager)") - g.writeln("_rarea := _rwa.Current()") - g.writeln("var _rdbf *dbf.DBFArea") - g.writeln("if _rarea != nil { _rdbf, _ = _rarea.(*dbf.DBFArea) }") - // Pre-compute field indexes - for i, fname := range rddFields { - g.writeln(fmt.Sprintf("var _rfi%d int = -1", i)) - g.writeln(fmt.Sprintf("if _rdbf != nil { _rfi%d = _rdbf.FieldIndex(%q) }", i, fname)) - } - g.hoistedFields = rddFields // store for emitReplaceCmdHoisted - } - - g.writeln("for {") - g.indent++ - - // Comparison: fused opcode when limit is literal int (most common). - // Also see through const-propagated LOCALs: `LOCAL n := 100; FOR i := 1 - // TO n` should hit the same fast path as a bare literal. - toLit, _ := s.To.(*ast.LiteralExpr) - if toLit == nil { - if id, ok := s.To.(*ast.IdentExpr); ok { - if l, ok2 := g.constLocals[strings.ToUpper(id.Name)]; ok2 { - toLit = l - } - } - } - if lit := toLit; lit != nil && lit.Kind == token.INT { - if isNegStep { - g.writeln(fmt.Sprintf("if !t.LocalGreaterEqualInt(%d, %s) { break }", idx, lit.Value)) - } else { - g.writeln(fmt.Sprintf("if !t.LocalLessEqualInt(%d, %s) { break }", idx, lit.Value)) - } - } else { - // General case: stack-based comparison - g.writeln(fmt.Sprintf("t.PushLocalFast(%d)", idx)) - g.emitExpr(s.To) - if isNegStep { - g.writeln("t.GreaterEqual()") - } else { - g.writeln("t.LessEqual()") - } - g.writeln("if !t.PopLogical() { break }") - } - - // Track FOR loop depth so LOOP can use goto instead of continue. - // Only emit label if LOOP is present in the body (Go rejects unused labels). - hasLoop := bodyHasLoop(s.Body) - forLabel := "" - prevForLabel := g.curForLabel - if hasLoop { - forLabel = fmt.Sprintf("_for_next_%d", g.forLabelSeq) - g.forLabelSeq++ - g.curForLabel = forLabel - } else { - g.curForLabel = "" - } - - // body - for _, stmt := range s.Body { - g.emitStmt(stmt, locals) - } - - // Label for LOOP to jump to (skipping continue which would miss increment) - if hasLoop { - g.writeln(forLabel + ":") - } - - // i += step (default 1) - if s.Step != nil { - g.emitExpr(s.Step) - g.writeln(fmt.Sprintf("t.LocalAdd(%d)", idx)) - } else { - g.writeln(fmt.Sprintf("t.LocalAddInt(%d, 1)", idx)) - } - - g.curForLabel = prevForLabel - g.indent-- - g.writeln("}") - - // Close hoisting block - if hoistRDD { - g.hoistedFields = nil - g.indent-- - g.writeln("}") - } -} - -// bodyHasLoop checks if any statement in the body is a LOOP. -// Only checks the immediate level — LOOP inside nested FOR/DO WHILE is irrelevant. -func bodyHasLoop(stmts []ast.Stmt) bool { - for _, s := range stmts { - if hasLoopStmt(s) { - return true - } - } - return false -} - -func hasLoopStmt(s ast.Stmt) bool { - switch s := s.(type) { - case *ast.LoopStmt: - return true - case *ast.IfStmt: - for _, st := range s.Body { - if hasLoopStmt(st) { - return true - } - } - for _, ei := range s.ElseIfs { - for _, st := range ei.Body { - if hasLoopStmt(st) { - return true - } - } - } - for _, st := range s.ElseBody { - if hasLoopStmt(st) { - return true - } - } - case *ast.SeqStmt: - for _, st := range s.Body { - if hasLoopStmt(st) { - return true - } - } - for _, st := range s.RecoverBody { - if hasLoopStmt(st) { - return true - } - } - case *ast.SwitchStmt: - for _, c := range s.Cases { - for _, st := range c.Body { - if hasLoopStmt(st) { - return true - } - } - } - for _, st := range s.Otherwise { - if hasLoopStmt(st) { - return true - } - } - // Do NOT recurse into ForStmt/DoWhileStmt — nested LOOP is for the inner loop - } - return false -} - - -func (g *Generator) emitSwitch(s *ast.SwitchStmt, locals localMap) { - // Wrap the whole thing in a one-iteration `for` so: - // 1. `_sw` stays scoped to the switch. - // 2. `EXIT` inside a CASE emits `break` and targets this loop, - // matching Harbour SWITCH semantics (EXIT terminates SWITCH). - // 3. Empty SWITCH (`SWITCH x ENDSWITCH`, common in conditional- - // compile test files) stays valid Go. - g.writeln("for {") - g.indent++ - g.emitExpr(s.Expr) - g.writeln("_sw := t.Pop2()") - g.writeln("_ = _sw") // silence unused-var warning when no cases reference it - first := true - for _, c := range s.Cases { - if first { - g.emitExpr(c.Value) - g.writeln("if _sw.AsNumInt() == t.Pop2().AsNumInt() {") - first = false - } else { - g.emitExpr(c.Value) - g.writeln("} else if _sw.AsNumInt() == t.Pop2().AsNumInt() {") - } - g.indent++ - for _, stmt := range c.Body { - g.emitStmt(stmt, locals) - } - g.indent-- - } - if len(s.Otherwise) > 0 { - if first { - // No CASE arms — emit the OTHERWISE body as-is, no if/else. - for _, stmt := range s.Otherwise { - g.emitStmt(stmt, locals) - } - } else { - g.writeln("} else {") - g.indent++ - for _, stmt := range s.Otherwise { - g.emitStmt(stmt, locals) - } - g.indent-- - g.writeln("}") - } - } else if !first { - // Had CASE arms, no OTHERWISE — close the if/else-if chain. - g.writeln("}") - } - // Always break out of our one-iteration `for` wrapper, regardless - // of which (or no) case ran. - g.writeln("break") - g.indent-- - g.writeln("}") -} - -func (g *Generator) emitBeginSequence(s *ast.SeqStmt, locals localMap) { - // BEGIN SEQUENCE → Go's panic/recover. - // Catches both *HbError (runtime errors) and BreakValue (Break() calls). - // BreakValue is defined in hbrtl, but we detect it via duck typing - // to avoid import cycles. - g.writeln("{ // BEGIN SEQUENCE") - g.indent++ - g.writeln("_seqErr := func() (_recoverVal interface{}) {") - g.indent++ - g.writeln("defer func() {") - g.indent++ - g.writeln("if r := recover(); r != nil {") - g.indent++ - g.writeln("_recoverVal = r") - g.indent-- - g.writeln("}") - g.indent-- - g.writeln("}()") - - // Body - for _, stmt := range s.Body { - g.emitStmt(stmt, locals) - } - - g.writeln("return nil") - g.indent-- - g.writeln("}()") - - // RECOVER - if len(s.RecoverBody) > 0 { - g.writeln("if _seqErr != nil {") - g.indent++ - if s.RecoverVar != "" { - if idx, found := locals[strings.ToUpper(s.RecoverVar)]; found { - // Extract the value from the recovered panic: - // *HbError → error description string - // BreakValue (has .Value field) → the Break() argument - // other → string representation - g.writeln(fmt.Sprintf(`{ // RECOVER USING %s`, s.RecoverVar)) - g.indent++ - g.writeln(`switch _sv := _seqErr.(type) {`) - g.writeln(`case *hbrt.HbError:`) - g.writeln(fmt.Sprintf(` t.SetLocalFast(%d, hbrt.MakeString(_sv.Error()))`, idx)) - g.writeln(`default:`) - // For BreakValue, use reflection-free approach: check if - // the type has a Value field via a local interface. - g.writeln(` type hasValue interface{ GetValue() hbrt.Value }`) - g.writeln(` if bv, ok := _sv.(hasValue); ok {`) - g.writeln(fmt.Sprintf(` t.SetLocalFast(%d, bv.GetValue())`, idx)) - g.writeln(` } else {`) - g.writeln(fmt.Sprintf(` t.SetLocalFast(%d, hbrt.MakeString("error"))`, idx)) - g.writeln(` }`) - g.writeln(`}`) - g.indent-- - g.writeln(`}`) - } - } - for _, stmt := range s.RecoverBody { - g.emitStmt(stmt, locals) - } - g.indent-- - g.writeln("}") - } else { - g.writeln("_ = _seqErr") - } - - g.indent-- - g.writeln("} // END SEQUENCE") -} - -func (g *Generator) emitForEach(s *ast.ForEachStmt, locals localMap) { - varIdx, found := locals[strings.ToUpper(s.Var)] - if !found { - g.writeln("// ERROR: FOR EACH variable not in locals") - return - } - - // Evaluate collection - g.emitExpr(s.Collection) - g.writeln("{ _feArr := t.Pop2()") - g.writeln("if _feArr.IsArray() {") - g.indent++ - g.writeln("_feItems := _feArr.AsArray().Items") - g.writeln("for _feI := 0; _feI < len(_feItems); _feI++ {") - g.indent++ - g.writeln(fmt.Sprintf("t.SetLocalFast(%d, _feItems[_feI])", varIdx)) - - for _, stmt := range s.Body { - g.emitStmt(stmt, locals) - } - - g.indent-- - g.writeln("}") - g.indent-- - g.writeln("} }") -} - -// --- Expression emission --- -// Each emitExpr leaves one value on the stack. - -// emitMultiAssign: a, b := Func() or a, b := x, y -func (g *Generator) emitMultiAssign(s *ast.MultiAssignStmt, locals localMap) { - if len(s.Values) == 1 { - // Single RHS: a, b := Func() → call function, unpack array result - g.emitExpr(s.Values[0]) - g.writeln("{") - g.indent++ - g.writeln("_mr := t.Pop2()") - g.writeln("if _mr.IsArray() {") - g.indent++ - g.writeln("_arr := _mr.AsArray()") - for i, name := range s.Targets { - if name == "_" { - continue - } - idx := locals[strings.ToUpper(name)] - if idx > 0 { - g.writeln(fmt.Sprintf("if %d < len(_arr.Items) { t.SetLocalFast(%d, _arr.Items[%d]) }", i, idx, i)) - } - } - g.indent-- - g.writeln("} else {") - g.indent++ - // Not array — assign first target, rest get NIL - if s.Targets[0] != "_" { - idx := locals[strings.ToUpper(s.Targets[0])] - if idx > 0 { - g.writeln(fmt.Sprintf("t.SetLocalFast(%d, _mr)", idx)) - } - } - g.indent-- - g.writeln("}") - g.indent-- - g.writeln("}") - } else { - // Multiple RHS: a, b := x, y (parallel assign) - // Evaluate all RHS first, then assign - for i, val := range s.Values { - g.emitExpr(val) - g.writeln(fmt.Sprintf("_mv%d := t.Pop2()", i)) - } - for i, name := range s.Targets { - if name == "_" || i >= len(s.Values) { - continue - } - idx := locals[strings.ToUpper(name)] - if idx > 0 { - g.writeln(fmt.Sprintf("t.SetLocalFast(%d, _mv%d)", idx, i)) - } - } - } -} - -// emitDefer: DEFER expr → Go defer -func (g *Generator) emitDefer(s *ast.DeferStmt, locals localMap) { - g.writeln("defer func() {") - g.indent++ - g.emitExpr(s.Call) - g.writeln("t.Pop() // discard defer result") - g.indent-- - g.writeln("}()") -} func (g *Generator) emitExpr(expr ast.Expr) { switch e := expr.(type) { @@ -2549,14 +913,53 @@ func (g *Generator) emitExpr(expr ast.Expr) { g.writeln("}") case *ast.PostfixExpr: - g.emitExpr(e.X) - g.writeln("t.Dup()") - if e.Op == token.INC { - g.writeln("t.Inc()") - } else { - g.writeln("t.Dec()") + // `x++` / `x--` as a sub-expression (e.g. `y := x++`): push the + // ORIGINAL value for the surrounding expression to consume, then + // mutate the underlying storage so the next read sees the new + // value. The earlier Dup+Inc+Pop form only incremented a copy + // on the stack — the variable itself was never updated, so + // `y := x++` silently left x unchanged and `x++ + x++` read the + // same value twice. Matches Clipper / C post-increment + // semantics. + delta := "1" + if e.Op == token.DEC { + delta = "-1" } - g.writeln("t.Pop() // keep original for postfix") + if ident, ok := e.X.(*ast.IdentExpr); ok { + upper := strings.ToUpper(ident.Name) + if idx, found := g.curLocals[upper]; found { + g.writeln(fmt.Sprintf("t.PushLocalFast(%d)", idx)) + g.writeln(fmt.Sprintf("t.LocalAddInt(%d, %s)", idx, delta)) + return + } + if goVar, found := g.staticVars[upper]; found { + g.writeln(fmt.Sprintf("t.PushValue(%s)", goVar)) + g.writeln(fmt.Sprintf("{ _v := %s.AsNumInt() + %s; %s = hbrt.MakeInt(int(_v)) }", goVar, delta, goVar)) + return + } + } + if send, ok := e.X.(*ast.SendExpr); ok { + if _, isSelf := send.Object.(*ast.SelfExpr); isSelf { + fieldName := strings.ToUpper(send.Method) + // Push old value first (for outer expression), + // then read-modify-write the field. + g.writeln(fmt.Sprintf("t.PushSelfField(%q)", fieldName)) + g.writeln(fmt.Sprintf("t.PushSelfField(%q)", fieldName)) + if e.Op == token.INC { + g.writeln("t.PushInt(1)") + g.writeln("t.Plus()") + } else { + g.writeln("t.PushInt(1)") + g.writeln("t.Minus()") + } + g.writeln(fmt.Sprintf("t.SetSelfField(%q)", fieldName)) + return + } + } + // Fallback: expression has no assignable target (e.g. a + // parenthesised constant) — emit a no-op modification so the + // surrounding expression still sees the value. + g.emitExpr(e.X) default: g.writeln(fmt.Sprintf("t.PushNil() // WARN: unhandled expr %T", expr)) } @@ -2570,7 +973,10 @@ func (g *Generator) emitExpr(expr ast.Expr) { func (g *Generator) emitIndexKeyExpr(expr ast.Expr) { switch e := expr.(type) { case *ast.IdentExpr: - // Bare identifier in INDEX = field name → runtime FieldGet by name + // Bare identifier in INDEX = field name → runtime FieldGet by name. + // The emitted code uses strings.ToUpper, so flag the import here + // — without this the deferred-import patcher leaves a stub. + g.imports["strings"] = true fieldName := strings.ToUpper(e.Name) g.writeln(fmt.Sprintf(`{ _wa := t.WA.(*hbrdd.WorkAreaManager); if _a := _wa.Current(); _a != nil { for _fi := 0; _fi < _a.FieldCount(); _fi++ { if strings.ToUpper(_a.GetFieldInfo(_fi).Name) == %q { _v, _ := _a.GetValue(_fi); t.PushValue(_v); break } } } }`, fieldName)) case *ast.BinaryExpr: @@ -3160,266 +1566,6 @@ type goFastEntry struct { qualName string // Go call: strings.ToUpper } -func (g *Generator) emitAliasExpr(e *ast.AliasExpr) { - fieldIdent, isFieldIdent := e.Field.(*ast.IdentExpr) - - // Case 1: alias->field (static alias, simple field name) - if ident, ok := e.Alias.(*ast.IdentExpr); ok && isFieldIdent { - upper := strings.ToUpper(ident.Name) - // `M->name` / `MEMVAR->name` access the memvar namespace, not - // a database workarea. Harbour reserves both aliases for this. - if upper == "M" || upper == "MEMVAR" { - g.writeln(fmt.Sprintf(`t.PushMemvar(%q)`, fieldIdent.Name)) - return - } - g.writeln(fmt.Sprintf(`t.PushAliasField(%q, %q)`, ident.Name, fieldIdent.Name)) - return - } - - // Case 2: (expr)->field (dynamic alias, simple field name) - if isFieldIdent { - g.emitExpr(e.Alias) - g.writeln(fmt.Sprintf(`t.PushDynAliasField(t.Pop2().AsString(), %q)`, fieldIdent.Name)) - return - } - - // Case 3: alias->(expr) or (expr)->(expr) — workarea context expression - // Harbour: save current WA, select new WA, evaluate expr, restore WA - // Example: (nArea)->(Used()) → evaluate Used() in workarea nArea - // Example: CUSTOMERS->(RecCount()) → evaluate RecCount() in CUSTOMERS workarea - if ident, ok := e.Alias.(*ast.IdentExpr); ok { - _, isLocal := g.curLocals[strings.ToUpper(ident.Name)] - if isLocal { - // Local variable: emit value (numeric area number) - g.emitExpr(e.Alias) - g.writeln(`t.WASaveAndSelect(int(t.Pop2().AsNumInt()))`) - } else { - // Static alias name: resolve by alias string - g.writeln(fmt.Sprintf(`t.WASaveAndSelectAlias(%q)`, ident.Name)) - } - } else { - // Dynamic: numeric area from expression - g.emitExpr(e.Alias) - g.writeln(`t.WASaveAndSelect(int(t.Pop2().AsNumInt()))`) - } - g.emitExpr(e.Field) - g.writeln(`t.WARestore()`) -} - -func (g *Generator) fieldName(expr ast.Expr) string { - if ident, ok := expr.(*ast.IdentExpr); ok { - return ident.Name - } - return "" -} - -func (g *Generator) emitSendExpr(e *ast.SendExpr) { - // ::super:Method(args) — dispatch to parent class. The parse tree - // is nested: outer SendExpr.Object is itself a SendExpr whose - // Object is ::SELF and Method is "super". Detect that shape and - // route through SendSuper, which keeps Self bound to the child - // instance but looks the method up on Parent. - if sup, ok := e.Object.(*ast.SendExpr); ok { - if _, isSelf := sup.Object.(*ast.SelfExpr); isSelf && - strings.EqualFold(sup.Method, "super") { - for _, arg := range e.Args { - g.emitExpr(arg) - } - // Emit defining-class name so runtime walks the right Parent - // chain — Self's class alone would infinite-loop on 3+ level - // hierarchies (Grand→Child→Base). See SendSuper comment. - g.writeln(fmt.Sprintf("t.SendSuper(%q, %q, %d)", - g.curMethodClass, e.Method, len(e.Args))) - return - } - } - - // Self access: ::field (no parens) → PushSelfField - // Self method: ::method() (has parens) → Send on Self - if _, isSelf := e.Object.(*ast.SelfExpr); isSelf { - if !e.HasParens && len(e.Args) == 0 { - // ::field (getter, no parentheses) - g.writeln(fmt.Sprintf("t.PushSelfField(%q)", strings.ToUpper(e.Method))) - return - } - // ::method() or ::method(args) — method call on Self - g.writeln("t.PushSelf()") - for _, arg := range e.Args { - g.emitExpr(arg) - } - g.writeln(fmt.Sprintf("t.Send(%q, %d)", e.Method, len(e.Args))) - return - } - - // General: obj:method(args) or obj:field - // Check at runtime: if Go object → GoCall, else Harbour Send - g.emitExpr(e.Object) - g.writeln("{") - g.indent++ - g.writeln("_obj := t.Pop2()") - - // Push args and capture them - argNames := make([]string, len(e.Args)) - for i, arg := range e.Args { - argNames[i] = fmt.Sprintf("_sa%d", i) - g.emitExpr(arg) - g.writeln(fmt.Sprintf("%s := t.Pop2()", argNames[i])) - } - - g.writeln("if hbrt.IsGoObject(_obj) {") - g.indent++ - // Go object: use reflect bridge - argsStr := "" - for i, name := range argNames { - if i > 0 { - argsStr += ", " - } - argsStr += name - } - g.writeln(fmt.Sprintf("_gr := hbrt.GoCallCached(_obj, %q, %s)", e.Method, argsStr)) - g.writeln("if len(_gr) > 0 { t.PushValue(_gr[0]) } else { t.PushNil() }") - g.indent-- - g.writeln("} else {") - g.indent++ - // Harbour object: use Send - g.writeln("t.PushValue(_obj)") - for _, name := range argNames { - g.writeln(fmt.Sprintf("t.PushValue(%s)", name)) - } - g.writeln(fmt.Sprintf("t.Send(%q, %d)", e.Method, len(e.Args))) - g.indent-- - g.writeln("}") - g.indent-- - g.writeln("}") -} - -func (g *Generator) emitBlock(e *ast.BlockExpr) { - // Code block: {|params| body} - // Block params are passed via Frame() from Eval/AEval. - nParams := len(e.Params) - - // Collect free variables in the block body that reference outer locals. - // These need to be captured via Go closure variables. - outerLocals := g.curLocals - blockLocals := make(localMap) - for i, p := range e.Params { - blockLocals[strings.ToUpper(p)] = i + 1 - } - - // Find all idents in block body that are in outerLocals but NOT in blockLocals - freeVars := g.collectFreeVars(e.Body, blockLocals, outerLocals) - - // Harbour: closures share outer locals via RefCell (mutable capture). - // Convert each captured outer local to a RefCell, then pass the RefCell - // into the block. Both outer function and block read/write through it. - for _, fv := range freeVars { - outerIdx := outerLocals[fv] - // Ensure outer local is a RefCell (PushLocalRef creates one if needed, - // but we do it inline to avoid stack ops). - g.writeln(fmt.Sprintf("t.EnsureLocalRef(%d) // share %s via RefCell", outerIdx, fv)) - } - - // Capture the RefCell values with unique names to avoid Go scope issues. - capSeq := g.blockSeq - g.blockSeq++ - capNames := make(map[string]string) // fv → Go var name - for _, fv := range freeVars { - outerIdx := outerLocals[fv] - capName := fmt.Sprintf("_cap_%s_%d", fv, capSeq) - g.writeln(fmt.Sprintf("%s := t.LocalRaw(%d) // capture RefCell %s", capName, outerIdx, fv)) - capNames[fv] = capName - } - - g.writeln(fmt.Sprintf("t.PushBlock(func(t *hbrt.Thread) {")) - g.indent++ - nLocals := len(freeVars) - g.writeln(fmt.Sprintf("t.Frame(%d, %d)", nParams, nLocals)) - g.writeln("defer t.EndProc()") - - // Inject RefCell values directly into block locals — reads/writes go through RefCell - for i, fv := range freeVars { - localIdx := nParams + i + 1 - blockLocals[fv] = localIdx - g.writeln(fmt.Sprintf("t.SetLocalRaw(%d, %s) // inject shared RefCell %s", localIdx, capNames[fv], fv)) - } - - g.curLocals = blockLocals - g.emitExpr(e.Body) - g.writeln("t.RetValue()") - - g.curLocals = outerLocals - g.indent-- - g.writeln(fmt.Sprintf("}, %d)", 0)) -} - -// collectFreeVars finds identifier names in expr that exist in outerLocals but not blockLocals. -func (g *Generator) collectFreeVars(expr ast.Expr, blockLocals, outerLocals localMap) []string { - var result []string - seen := map[string]bool{} - g.walkExprIdents(expr, func(name string) { - upper := strings.ToUpper(name) - if seen[upper] { - return - } - if _, inBlock := blockLocals[upper]; inBlock { - return - } - if _, inOuter := outerLocals[upper]; inOuter { - seen[upper] = true - result = append(result, upper) - } - }) - return result -} - -// walkExprIdents calls fn for each IdentExpr in the expression tree. -func (g *Generator) walkExprIdents(expr ast.Expr, fn func(string)) { - if expr == nil { - return - } - switch e := expr.(type) { - case *ast.IdentExpr: - fn(e.Name) - case *ast.BinaryExpr: - g.walkExprIdents(e.Left, fn) - g.walkExprIdents(e.Right, fn) - case *ast.UnaryExpr: - g.walkExprIdents(e.X, fn) - case *ast.PostfixExpr: - g.walkExprIdents(e.X, fn) - case *ast.CallExpr: - g.walkExprIdents(e.Func, fn) - for _, a := range e.Args { - g.walkExprIdents(a, fn) - } - case *ast.IndexExpr: - g.walkExprIdents(e.X, fn) - g.walkExprIdents(e.Index, fn) - case *ast.DotExpr: - g.walkExprIdents(e.X, fn) - case *ast.AssignExpr: - g.walkExprIdents(e.Left, fn) - g.walkExprIdents(e.Right, fn) - case *ast.ArrayLitExpr: - for _, item := range e.Items { - g.walkExprIdents(item, fn) - } - case *ast.IIfExpr: - g.walkExprIdents(e.Cond, fn) - g.walkExprIdents(e.True, fn) - g.walkExprIdents(e.False, fn) - case *ast.SendExpr: - g.walkExprIdents(e.Object, fn) - for _, a := range e.Args { - g.walkExprIdents(a, fn) - } - case *ast.AliasExpr: - g.walkExprIdents(e.Alias, fn) - g.walkExprIdents(e.Field, fn) - case *ast.BlockExpr: - g.walkExprIdents(e.Body, fn) - } -} func (g *Generator) emitBinaryOp(op token.Kind) { switch op { diff --git a/compiler/gengo/gengo_test.go b/compiler/gengo/gengo_test.go index e195039..dc06506 100644 --- a/compiler/gengo/gengo_test.go +++ b/compiler/gengo/gengo_test.go @@ -36,7 +36,7 @@ func TestGenerateHelloWorld(t *testing.T) { assertContains(t, code, "package main") assertContains(t, code, `import (`) assertContains(t, code, `"five/hbrt"`) - assertContains(t, code, "func HB_MAIN(t *hbrt.Thread)") + assertContains(t, code, "func FV_MAIN(t *hbrt.Thread)") assertContains(t, code, "t.Frame(0, 0)") assertContains(t, code, "defer t.EndProc()") assertContains(t, code, `t.PushString("Hello, World!")`) @@ -119,8 +119,8 @@ FUNCTION Main() ? Double(21) RETURN NIL `) - assertContains(t, code, "func HB_DOUBLE(t *hbrt.Thread)") - assertContains(t, code, "func HB_MAIN(t *hbrt.Thread)") + assertContains(t, code, "func FV_DOUBLE(t *hbrt.Thread)") + assertContains(t, code, "func FV_MAIN(t *hbrt.Thread)") assertContains(t, code, "t.Frame(1, 0)") // Double has 1 param assertContains(t, code, "t.Mult()") assertContains(t, code, `t.GetSym(&_sym_test_DOUBLE, "DOUBLE")`) diff --git a/compiler/genpc/genpc.go b/compiler/genpc/genpc.go index 9523c45..49efb61 100644 --- a/compiler/genpc/genpc.go +++ b/compiler/genpc/genpc.go @@ -387,7 +387,10 @@ func (g *generator) emitExpr(expr ast.Expr) { g.emit(hbrt.PcOpPushLocal) g.emitU16(uint16(idx)) } else { - g.emit(hbrt.PcOpPushNil) // unresolved + // Unknown at compile time → runtime memvar lookup. This + // makes `&(expr)` and the debugger's `p` see PRIVATEs + // (including the frame-local injection the debugger does). + g.emitString(hbrt.PcOpPushMemvar, upper) } case *ast.BinaryExpr: diff --git a/compiler/parser/parser.go b/compiler/parser/parser.go index edf4b97..7438b48 100644 --- a/compiler/parser/parser.go +++ b/compiler/parser/parser.go @@ -1832,7 +1832,32 @@ func (p *Parser) parseUse() *ast.UseCmd { func (p *Parser) parseSelect() *ast.SelectCmd { pos := p.expect(token.SELECT).Pos - area := p.parseExpr() + // Classic Clipper/Harbour semantics: `SELECT ` treats a bare + // identifier as a literal alias name (string), not as an expression. + // Wrap in parens to force expression evaluation — e.g. `SELECT (n)` + // where n is a local holding an area number or alias name. + // + // Without this, unresolved identifiers fell back to PushMemvar(name) + // which returned NIL, and _wa.Select("") quietly allocated a fresh + // empty workarea, stranding the caller's real data in the previous + // slot. Visible symptom: `SELECT ALTSRC` inside SqlAlterAddColumn + // picked up a phantom area and the row-copy loop saw EOF from the + // first iteration (no rows migrated). + var area ast.Expr + if p.current.Kind == token.IDENT { + // Peek: only treat bare IDENT as literal alias when it's the + // entire argument (next token ends the statement). `SELECT x:y` + // or `SELECT f()` must parse as expressions so the dispatch + // below still routes through parseExpr. + next := p.peekAt(1) + if next == token.NEWLINE || next == token.SEMICOLON || next == token.EOF { + tok := p.advance() + area = &ast.LiteralExpr{ValuePos: tok.Pos, Kind: token.STRING, Value: tok.Literal} + } + } + if area == nil { + area = p.parseExpr() + } p.expectEndOfStmt() return &ast.SelectCmd{SelectPos: pos, Area: area} } diff --git a/compiler/parser/stmtreg.go b/compiler/parser/stmtreg.go index 3cf2383..5fcaa1c 100644 --- a/compiler/parser/stmtreg.go +++ b/compiler/parser/stmtreg.go @@ -241,6 +241,13 @@ func (p *Parser) stmtRecallPackZap() ast.Stmt { } func (p *Parser) stmtSet() ast.Stmt { + // `Set(...)` with parentheses is a function call (the Harbour Set() + // runtime function for reading/writing SET slots), not a SET command. + // Treat it as an expression statement so the args reach the call. + if p.peekAt(1) == token.LPAREN { + p.rewriteAsIdent("Set") + return p.parseExprStmt() + } return p.parseSet() } diff --git a/hbrdd/cdx/cdx.go b/hbrdd/cdx/cdx.go index 93c9f20..068a0b1 100644 --- a/hbrdd/cdx/cdx.go +++ b/hbrdd/cdx/cdx.go @@ -150,12 +150,28 @@ type DecodedKey struct { Key []byte } -// DecodeLeafKeys extracts all keys from a CDX leaf page. -// Ported from rddfive/cdx_engine.c cdx_leaf_decode_all() — byte-level decode. -// 10x+ faster than bit-by-bit extractBits loop. +// DecodeLeafKeys extracts all keys from a CDX leaf page — convenience +// wrapper that allocates fresh buffers. Hot call sites (per-tag seek +// loops) should use DecodeLeafKeysInto to recycle storage. func DecodeLeafKeys(data []byte, hdr LeafHeader, keyLen int) []DecodedKey { + keys, _ := DecodeLeafKeysInto(data, hdr, keyLen, nil, nil) + return keys +} + +// DecodeLeafKeysInto is the allocation-aware variant. `slabReuse` and +// `keysReuse` are previous buffers (may be nil on first call, or carry +// stale data from a prior decode). Returns fresh `keys` and the backing +// `slab` — both valid until the next call that reuses them. Ported +// from rddfive/cdx_engine.c cdx_leaf_decode_all() — byte-level decode, +// 10× faster than bit-by-bit extractBits loop. +// +// Reuse contract: caller must not retain pointers into an earlier +// slab after passing it here. CDX.Tag's page cache already observes +// this invariant because it overwrites cachedLeafKeys on miss. +func DecodeLeafKeysInto(data []byte, hdr LeafHeader, keyLen int, + slabReuse []byte, keysReuse []DecodedKey) ([]DecodedKey, []byte) { if hdr.NKeys == 0 { - return nil + return nil, slabReuse } nKeys := int(hdr.NKeys) @@ -167,9 +183,22 @@ func DecodeLeafKeys(data []byte, hdr LeafHeader, keyLen int) []DecodedKey { dcMask := uint32((1 << uint(dupBits)) - 1) tcMask := uint32((1 << uint(trlBits)) - 1) - // Slab allocation: one alloc for all keys (avoids 30+ allocations per page) - keys := make([]DecodedKey, nKeys) - slab := make([]byte, nKeys*keyLen+keyLen) // +keyLen for prevKey + // Reuse or grow the DecodedKey slice. + var keys []DecodedKey + if cap(keysReuse) >= nKeys { + keys = keysReuse[:nKeys] + } else { + keys = make([]DecodedKey, nKeys) + } + + // Reuse or grow the byte slab (one alloc replaces 30+ per page). + needBytes := nKeys*keyLen + keyLen // +keyLen for prevKey + var slab []byte + if cap(slabReuse) >= needBytes { + slab = slabReuse[:needBytes] + } else { + slab = make([]byte, needBytes) + } prevKey := slab[nKeys*keyLen:] for j := range prevKey { prevKey[j] = ' ' @@ -214,7 +243,7 @@ func DecodeLeafKeys(data []byte, hdr LeafHeader, keyLen int) []DecodedKey { copy(prevKey, key) } - return keys + return keys, slab } // extractBits extracts n bits from a byte array starting at bit offset. @@ -308,6 +337,8 @@ type Index struct { } // readAt reads len(buf) bytes at offset — from mmap or file fallback. +// Prefer pageSlice() on hot paths; this entry point stays for callers +// that need a writable, caller-owned copy. func (idx *Index) readAt(buf []byte, offset int64) error { if idx.mmapData != nil && offset >= 0 && int(offset)+len(buf) <= len(idx.mmapData) { copy(buf, idx.mmapData[offset:offset+int64(len(buf))]) @@ -317,6 +348,27 @@ func (idx *Index) readAt(buf []byte, offset int64) error { return err } +// pageSlice returns a read-only view of one B-tree page at offset — a +// direct slice of mmap when possible (zero copy), else a read into the +// caller's fallback buffer. Returns nil on read error. The returned +// slice is valid until the index is remapped, closed, or until the +// next fallbackBuf reuse — callers must not retain it across those +// events. The seek loop uses this: same Tag.seekBuf gets handed back +// for every internal-node visit, so the non-mmap path allocates once, +// and the mmap path allocates nothing. +func (idx *Index) pageSlice(offset int64, fallbackBuf []byte) []byte { + if idx.mmapData != nil && offset >= 0 && int(offset)+PageLen <= len(idx.mmapData) { + return idx.mmapData[offset : offset+PageLen] + } + if len(fallbackBuf) < PageLen { + fallbackBuf = make([]byte, PageLen) + } + if _, err := idx.file.ReadAt(fallbackBuf[:PageLen], offset); err != nil { + return nil + } + return fallbackBuf[:PageLen] +} + // Tag represents one index tag within a CDX file. type Tag struct { Name string // tag name (e.g., "BYNAME") @@ -336,6 +388,19 @@ type Tag struct { // Leaf page decode cache — avoids re-decoding same page on SkipNext/SkipPrev cachedLeafOff int64 cachedLeafKeys []DecodedKey + // Reusable backing storage for the key slab and DecodedKey slice. + // cachedLeafKeys[i].Key aliases slices of cachedLeafSlab, so the + // slab stays alive as long as cachedLeafKeys is in use. On cache + // miss we hand both buffers back to DecodeLeafKeysInto which + // reuses them if big enough — saving one alloc per leaf decode. + cachedLeafSlab []byte + + // seekBuf is handed to Index.pageSlice as the fallback when mmap + // isn't available (Windows, or file grown past mapped size). The + // mmap path ignores it and returns a slice directly into mapped + // memory — zero copy. Either way, allocating a single 512-byte + // buffer per Tag (not per Seek) eliminates the per-seek alloc. + seekBuf []byte } type StackEntry struct { @@ -583,7 +648,10 @@ func decodeCompoundLeaf(data []byte, nKeys int) []tagDirEntry { return entries } -// getLeafKeys returns decoded leaf keys with caching. +// getLeafKeys returns decoded leaf keys with caching. On cache miss the +// previous slab + key slice are recycled into DecodeLeafKeysInto so we +// avoid a fresh alloc for every leaf traversed during a seek-heavy +// workload (which is the whole point of caching them per-Tag). func (t *Tag) getLeafKeys(pageOffset int64) ([]DecodedKey, error) { if pageOffset == t.cachedLeafOff && t.cachedLeafKeys != nil { return t.cachedLeafKeys, nil @@ -593,9 +661,10 @@ func (t *Tag) getLeafKeys(pageOffset int64) ([]DecodedKey, error) { return nil, err } hdr := DecodeLeafHeader(buf) - keys := DecodeLeafKeys(buf, hdr, t.keyLen) + keys, slab := DecodeLeafKeysInto(buf, hdr, t.keyLen, t.cachedLeafSlab, t.cachedLeafKeys) t.cachedLeafOff = pageOffset t.cachedLeafKeys = keys + t.cachedLeafSlab = slab return keys, nil } @@ -609,12 +678,18 @@ func (t *Tag) Seek(searchKey []byte) (uint32, bool) { t.tagEOF = false pageOffset := int64(t.header.RootPtr) - buf := make([]byte, PageLen) // single buffer reused across all levels + // Reuse seekBuf across seeks so the non-mmap fallback path only + // allocates once per Tag lifetime. With mmap, pageSlice returns a + // view directly into mapped memory and seekBuf stays unused. + if cap(t.seekBuf) < PageLen { + t.seekBuf = make([]byte, PageLen) + } entrySize := t.keyLen + 8 searchLen := len(searchKey) for { - if err := t.index.readAt(buf, pageOffset); err != nil { + buf := t.index.pageSlice(pageOffset, t.seekBuf) + if buf == nil { t.tagEOF = true return 0, false } @@ -627,9 +702,11 @@ func (t *Tag) Seek(searchKey []byte) (uint32, bool) { keys = t.cachedLeafKeys } else { hdr := DecodeLeafHeader(buf) - keys = DecodeLeafKeys(buf, hdr, t.keyLen) + var slab []byte + keys, slab = DecodeLeafKeysInto(buf, hdr, t.keyLen, t.cachedLeafSlab, t.cachedLeafKeys) t.cachedLeafOff = pageOffset t.cachedLeafKeys = keys + t.cachedLeafSlab = slab } // Binary search — leftmost match @@ -746,9 +823,10 @@ func (t *Tag) goLeftmost(pageOffset int64) bool { if isLeaf { hdr := DecodeLeafHeader(buf) - keys := DecodeLeafKeys(buf, hdr, t.keyLen) + keys, slab := DecodeLeafKeysInto(buf, hdr, t.keyLen, t.cachedLeafSlab, t.cachedLeafKeys) t.cachedLeafOff = pageOffset t.cachedLeafKeys = keys + t.cachedLeafSlab = slab if len(keys) > 0 { t.curRecNo = keys[0].RecNo copy(t.curKey, keys[0].Key) @@ -794,7 +872,10 @@ func (t *Tag) goRightmost(pageOffset int64) bool { if isLeaf { hdr := DecodeLeafHeader(buf) - keys := DecodeLeafKeys(buf, hdr, t.keyLen) + keys, slab := DecodeLeafKeysInto(buf, hdr, t.keyLen, t.cachedLeafSlab, t.cachedLeafKeys) + t.cachedLeafOff = pageOffset + t.cachedLeafKeys = keys + t.cachedLeafSlab = slab if len(keys) > 0 { last := len(keys) - 1 t.curRecNo = keys[last].RecNo diff --git a/hbrdd/cdx/mmap_windows.go b/hbrdd/cdx/mmap_windows.go index 902f9f1..d96cc67 100644 --- a/hbrdd/cdx/mmap_windows.go +++ b/hbrdd/cdx/mmap_windows.go @@ -1,17 +1,82 @@ //go:build windows +// Windows mmap — see hbrdd/ntx/mmap_windows.go for the commentary; this +// is the same implementation for the CDX index package. Keeping +// separate copies (instead of a shared helper) because the registry +// map is private to each package, avoiding cross-package coupling for +// what is otherwise 50 lines of stdlib-only code. + package cdx import ( - "errors" + "fmt" "os" + "sync" + "syscall" + "unsafe" +) + +const ( + pageReadonly = 0x02 + fileMapRead = 0x0004 +) + +var ( + kernel32 = syscall.NewLazyDLL("kernel32.dll") + procCreateFileMappingW = kernel32.NewProc("CreateFileMappingW") + procMapViewOfFile = kernel32.NewProc("MapViewOfFile") + procUnmapViewOfFile = kernel32.NewProc("UnmapViewOfFile") + procCloseHandle = kernel32.NewProc("CloseHandle") + + mappingMu sync.Mutex + mappings = map[uintptr]syscall.Handle{} ) -// Windows: mmap not implemented — fallback to read() path. func mmapFile(f *os.File, size int) ([]byte, error) { - return nil, errors.New("mmap not supported on Windows") + if size <= 0 { + return nil, fmt.Errorf("mmap: non-positive size %d", size) + } + hFile := syscall.Handle(f.Fd()) + sizeHigh := uint32(uint64(size) >> 32) + sizeLow := uint32(uint64(size) & 0xFFFFFFFF) + hMap, _, err := procCreateFileMappingW.Call( + uintptr(hFile), 0, pageReadonly, + uintptr(sizeHigh), uintptr(sizeLow), 0, + ) + if hMap == 0 { + return nil, fmt.Errorf("CreateFileMapping: %v", err) + } + addr, _, err := procMapViewOfFile.Call( + hMap, fileMapRead, 0, 0, uintptr(size), + ) + if addr == 0 { + procCloseHandle.Call(hMap) + return nil, fmt.Errorf("MapViewOfFile: %v", err) + } + data := unsafe.Slice((*byte)(unsafe.Pointer(addr)), size) + + mappingMu.Lock() + mappings[addr] = syscall.Handle(hMap) + mappingMu.Unlock() + return data, nil } func munmapFile(data []byte) error { + if len(data) == 0 { + return nil + } + addr := uintptr(unsafe.Pointer(&data[0])) + mappingMu.Lock() + hMap, ok := mappings[addr] + delete(mappings, addr) + mappingMu.Unlock() + + r, _, err := procUnmapViewOfFile.Call(addr) + if r == 0 { + return fmt.Errorf("UnmapViewOfFile: %v", err) + } + if ok { + procCloseHandle.Call(uintptr(hMap)) + } return nil } diff --git a/hbrdd/dbf/dbf.go b/hbrdd/dbf/dbf.go index e847562..984b987 100644 --- a/hbrdd/dbf/dbf.go +++ b/hbrdd/dbf/dbf.go @@ -18,7 +18,6 @@ import ( "fmt" "os" "strings" - "syscall" ) // DBFArea implements the DBF database driver. @@ -76,6 +75,14 @@ type DBFArea struct { // Built lazily on first FieldPosCache() call. // SQLite: "column affinity binding" — O(1) vs O(n) linear scan. fieldPosMap map[string]int + + // SQL NULL bitmap (VFP/Harbour _NullFlags convention). + // nullFieldsIdx: descriptor index of the hidden _NullFlags field, + // or -1 if the table has no nullable columns. + // nullBitOf: user-field descriptor index → bit position within + // the _NullFlags byte range. + nullFieldsIdx int + nullBitOf map[int]int } // DBFDriver is the driver factory for DBF files. @@ -216,18 +223,25 @@ func openDBF(drv *DBFDriver, params hbrdd.OpenParams) (*DBFArea, error) { area.recCount = hdr.RecCount } - // Step 7: Build FieldInfo for BaseArea - fieldInfos := make([]hbrdd.FieldInfo, fieldCount) - for i, fd := range fields { - fieldInfos[i] = hbrdd.FieldInfo{ + // Step 7: Build FieldInfo for BaseArea. System fields (notably + // the hidden _NullFlags column carrying the SQL NULL bitmap) are + // held in fieldDescs for storage but filtered out of the public + // FieldInfo slice — user-visible counts stay stable. + fieldInfos := make([]hbrdd.FieldInfo, 0, fieldCount) + for _, fd := range fields { + if fd.Flags&FieldFlagSystem != 0 { + continue + } + fieldInfos = append(fieldInfos, hbrdd.FieldInfo{ Name: fd.GetName(), Type: fd.Type, Len: int(fd.Len), Dec: int(fd.Dec), Flags: fd.Flags, - } + }) } area.InitFields(fieldInfos) + area.buildNullIndex() // Step 8: Auto-open FPT if memo fields exist if hasMemoField(fields) { @@ -264,14 +278,22 @@ func createDBF(drv *DBFDriver, params hbrdd.CreateParams) (*DBFArea, error) { // Build field descriptors fieldDescs := make([]FieldDesc, len(params.Fields)) - recordLen := uint16(1) // deletion flag for i, fi := range params.Fields { fieldDescs[i].SetName(fi.Name) fieldDescs[i].Type = fi.Type fieldDescs[i].Len = byte(fi.Len) fieldDescs[i].Dec = byte(fi.Dec) fieldDescs[i].Flags = fi.Flags - recordLen += uint16(fi.Len) + } + + // If any user field is nullable, append the hidden _NullFlags + // system column. Must happen before recordLen is tallied so its + // bytes reserve space in the record layout. + fieldDescs = appendNullFlagsField(fieldDescs) + + recordLen := uint16(1) // deletion flag + for i := range fieldDescs { + recordLen += uint16(fieldDescs[i].Len) } // Build header @@ -324,6 +346,7 @@ func createDBF(drv *DBFDriver, params hbrdd.CreateParams) (*DBFArea, error) { fieldInfos := make([]hbrdd.FieldInfo, len(params.Fields)) copy(fieldInfos, params.Fields) area.InitFields(fieldInfos) + area.buildNullIndex() area.FEof = true // Auto-create FPT if memo fields exist @@ -588,7 +611,6 @@ func (a *DBFArea) Skip(count int64) error { } newRec := a.recNo + 1 if newRec > a.recCount { - // Flush dirty record before entering EOF phantom if a.dirty { a.flushRecord() } @@ -599,7 +621,6 @@ func (a *DBFArea) Skip(count int64) error { if err := a.GoTo(newRec); err != nil { return err } - // Skip deleted records when SET DELETED ON if err := a.skipFilter(1); err != nil { return err } @@ -622,27 +643,10 @@ func (a *DBFArea) Skip(count int64) error { return nil } -// mmapDBF maps the DBF file for zero-copy reads. Called after open. -func (a *DBFArea) mmapDBF() { - fi, err := a.dataFile.Stat() - if err != nil || fi.Size() < int64(a.header.HeaderLen) { - return - } - data, err := syscall.Mmap(int(a.dataFile.Fd()), 0, int(fi.Size()), - syscall.PROT_READ, syscall.MAP_SHARED) - if err != nil { - return - } - a.mmapData = data -} - -// unmapDBF releases the mmap. -func (a *DBFArea) unmapDBF() { - if a.mmapData != nil { - syscall.Munmap(a.mmapData) - a.mmapData = nil - } -} +// mmapDBF / unmapDBF live in mmap_posix.go (Linux/Darwin, syscall.Mmap) +// and mmap_windows.go (CreateFileMapping / MapViewOfFile). Keeping the +// platform-specific ioctl-ish calls out of this file lets cross-builds +// stay clean. // loadRecord reads the current record — from mmap or file fallback. func (a *DBFArea) loadRecord() { @@ -664,6 +668,12 @@ func (a *DBFArea) GetValue(fieldIndex int) (hbrt.Value, error) { if a.FEof { return hbrt.MakeNil(), nil } + // SQL NULL: nullable fields check the hidden _NullFlags bitmap + // first; a set bit means the raw bytes carry no meaningful value + // and the reader should surface NIL to the caller. + if a.isFieldNull(fieldIndex) { + return hbrt.MakeNil(), nil + } fd := &a.fieldDescs[fieldIndex] // MEMO field: read from FPT and return string if (fd.Type == 'M' || fd.Type == 'm') && a.memoFile != nil { @@ -695,6 +705,18 @@ func (a *DBFArea) PutValue(fieldIndex int, val hbrt.Value) error { if fieldIndex < 0 || fieldIndex >= len(a.fieldDescs) { return fmt.Errorf("field index out of range: %d", fieldIndex) } + // SQL NULL handling for nullable fields: a NIL write sets the + // bitmap bit and leaves the raw bytes alone (readers will short- + // circuit via isFieldNull before reaching the type codec). A + // non-NIL write clears the bit so the raw value surfaces again. + if _, nullable := a.nullBitOf[fieldIndex]; nullable { + if val.IsNil() { + a.setFieldNull(fieldIndex, true) + a.dirty = true + return nil + } + a.setFieldNull(fieldIndex, false) + } fd := &a.fieldDescs[fieldIndex] // MEMO field: write string to FPT, store block number in DBF if (fd.Type == 'M' || fd.Type == 'm') && a.memoFile != nil && val.IsString() { diff --git a/hbrdd/dbf/field.go b/hbrdd/dbf/field.go index 0f0425f..fc35c6b 100644 --- a/hbrdd/dbf/field.go +++ b/hbrdd/dbf/field.go @@ -20,10 +20,26 @@ import ( // GetFieldValue converts raw record bytes to a Five Value. // Harbour: hb_dbfGetValue in dbf1.c func GetFieldValue(recBuf []byte, offset uint16, field *FieldDesc) hbrt.Value { + return getFieldValueImpl(recBuf, offset, field, false) +} + +// getFieldValueImpl is the zero-copy-aware variant. When stable=true the +// caller guarantees the recBuf bytes won't be mutated, freed, or +// unmapped for the Value's lifetime — then CHAR fields alias the +// buffer and skip the `string([]byte)` copy. +// +// NOTE: currently unexported because naive usage (even with mmap-backed +// buffers) can produce UAF when FiveSql2 closes/packs temp CTE tables +// while CHAR values from earlier iterations are still referenced. The +// machinery is kept for a future refcounted mmap lifetime scheme. +func getFieldValueImpl(recBuf []byte, offset uint16, field *FieldDesc, stable bool) hbrt.Value { raw := recBuf[offset : offset+uint16(field.Len)] switch field.Type { case 'C', 'c': // Character + if stable { + return hbrt.MakeStringBytes(raw) + } return hbrt.MakeString(string(raw)) case 'N', 'n': // Numeric (ASCII) @@ -360,11 +376,22 @@ func parseMemoRef(raw []byte, fieldLen byte) hbrt.Value { return hbrt.MakeLong(int64(blockNo)) } if fieldLen == 10 { - s := strings.TrimSpace(string(raw)) - if s == "" { - return hbrt.MakeLong(0) + // Inline byte-level parse: same pattern as parseNumericField. + // Avoids string(raw) + strings.TrimSpace + strconv.ParseInt + // — roughly 3× faster and allocation-free. + var n int64 + for _, c := range raw { + switch { + case c == ' ': + // Leading/trailing space — keep current accumulator + case c >= '0' && c <= '9': + n = n*10 + int64(c-'0') + default: + // Malformed block ref — treat as 0, same as strconv.ParseInt + // would on the non-digit prefix. + return hbrt.MakeLong(0) + } } - n, _ := strconv.ParseInt(s, 10, 64) return hbrt.MakeLong(n) } return hbrt.MakeLong(0) @@ -395,10 +422,23 @@ func parseIntegerField(raw []byte, fieldLen byte) hbrt.Value { func formatNumericField(raw []byte, fieldLen, dec byte, val hbrt.Value) { d := val.AsNumDouble() - format := "%" + strconv.Itoa(int(fieldLen)) + "." + strconv.Itoa(int(dec)) + "f" - s := []byte(fmt.Sprintf(format, d)) - // If too wide, fill with asterisks (Harbour behavior) + // NaN/Inf → asterisks (Harbour: field width overflow marker) + if math.IsNaN(d) || math.IsInf(d, 0) { + for i := range raw { + raw[i] = '*' + } + return + } + + // Use strconv.AppendFloat into a stack-allocated scratch buffer. + // Skips fmt.Sprintf's format-string parsing and its temporary + // string allocation — 3–5× faster per write, zero heap allocs on + // the hot path. 48 bytes fits any DBF numeric field (max 20 len). + var scratch [48]byte + s := strconv.AppendFloat(scratch[:0], d, 'f', int(dec), 64) + + // Overflow → asterisks, same as before. if len(s) > int(fieldLen) { for i := range raw { raw[i] = '*' @@ -406,8 +446,12 @@ func formatNumericField(raw []byte, fieldLen, dec byte, val hbrt.Value) { return } - // Right-align, space-pad left - copy(raw, s) + // Right-align, space-pad left. + padLen := int(fieldLen) - len(s) + for i := 0; i < padLen; i++ { + raw[i] = ' ' + } + copy(raw[padLen:], s) } func putDateField(raw []byte, fieldLen byte, val hbrt.Value) { diff --git a/hbrdd/dbf/header.go b/hbrdd/dbf/header.go index 21657c9..82f91ff 100644 --- a/hbrdd/dbf/header.go +++ b/hbrdd/dbf/header.go @@ -43,6 +43,21 @@ const ( VersionFPT = 0xF5 // DBF + FPT memo ) +// Field flag bits (byte at field descriptor offset 18). +// Harbour: HB_FF_* in hbapirdd.h — matches our FieldInfo.Flags convention. +const ( + FieldFlagSystem = 0x01 // system/hidden (not exposed as user-visible) + FieldFlagNullable = 0x02 // accepts SQL NULL, tracked via _NullFlags bit + FieldFlagBinary = 0x04 // binary payload (no codepage conversion) + FieldFlagAutoInc = 0x08 // auto-increment (VFP) +) + +// NullFlagsFieldName is the hidden column Harbour/VFP uses to track +// SQL NULL state: 1 bit per nullable user column. Kept in fieldDescs +// but excluded from the public FieldCount/FieldInfo view so SQL +// `SELECT *` / DDL column enumeration never see it. +const NullFlagsFieldName = "_NullFlags" + // Header represents the 32-byte DBF file header. // Layout is byte-identical to Harbour's DBFHEADER. type Header struct { diff --git a/hbrdd/dbf/indexer.go b/hbrdd/dbf/indexer.go index 6c0a14f..5e63b3c 100644 --- a/hbrdd/dbf/indexer.go +++ b/hbrdd/dbf/indexer.go @@ -201,14 +201,23 @@ func (a *DBFArea) OrderCreate(params hbrdd.OrderCreateParams) error { // Compiled path: gengo emitted an inline Go closure that evaluates // the key expression directly (no MacroEval string parsing). // ~3x faster than the MacroEval slow path for UDF indexes. + // ForFunc — when also set by gengo — skips the runtime parser + // for the FOR condition in the same way. slab := make([]byte, int(recCount)*keyLen) next := 0 oldRec := a.recNo trimmedFor := strings.TrimSpace(forExpr) + hasFor := trimmedFor != "" || params.ForFunc != nil for r := uint32(1); r <= recCount; r++ { a.GoTo(r) - if trimmedFor != "" { - if !a.evalForInner(trimmedFor) { + if hasFor { + var include bool + if params.ForFunc != nil { + include = params.ForFunc() + } else { + include = a.evalForInner(trimmedFor) + } + if !include { continue } } @@ -238,10 +247,17 @@ func (a *DBFArea) OrderCreate(params hbrdd.OrderCreateParams) error { oldRec := a.recNo trimmedKey := strings.TrimSpace(keyExpr) trimmedFor := strings.TrimSpace(forExpr) + hasFor := trimmedFor != "" || params.ForFunc != nil for r := uint32(1); r <= recCount; r++ { a.GoTo(r) - if trimmedFor != "" { - if !a.evalForInner(trimmedFor) { + if hasFor { + var include bool + if params.ForFunc != nil { + include = params.ForFunc() + } else { + include = a.evalForInner(trimmedFor) + } + if !include { continue } } @@ -360,7 +376,13 @@ func (a *DBFArea) OrderListAdd(path string) error { a.idxState.indexes = append(a.idxState.indexes, idx) a.idxState.names = append(a.idxState.names, path) a.idxState.tags = append(a.idxState.tags, "") - a.idxState.keyExprs = append(a.idxState.keyExprs, "") + /* Pull the key expression out of the on-disk NTX header so DBOI_EXPRESSION + * works after re-opening an index file. Previously we appended "" here, + * which silently broke MatchOrderByTag (TSqlIndex.prg) — the substring + * test against an empty string always failed, so SELECT … ORDER BY + * LIMIT N could never recognize an existing tag and skipped the LIMIT + * pushdown / sort-skip optimizations. */ + a.idxState.keyExprs = append(a.idxState.keyExprs, idx.KeyExpr()) a.idxState.current = len(a.idxState.indexes) - 1 return nil @@ -947,6 +969,15 @@ func (a *DBFArea) OrderKeyExpr(n int) string { return a.idxState.keyExprs[n-1] } +// OrderKeyLen returns the byte length of keys stored in order n (1-based). +// Zero means "unknown" (no such order, or indexes slice stale). +func (a *DBFArea) OrderKeyLen(n int) int { + if a.idxState == nil || n < 1 || n > len(a.idxState.indexes) { + return 0 + } + return a.idxState.indexes[n-1].KeyLen() +} + // fieldSlice describes a direct byte range within a record buffer. // The optional transform is applied during key extraction (e.g. UPPER/LOWER). type fieldSlice struct { diff --git a/hbrdd/dbf/mmap_posix.go b/hbrdd/dbf/mmap_posix.go new file mode 100644 index 0000000..6d74ea3 --- /dev/null +++ b/hbrdd/dbf/mmap_posix.go @@ -0,0 +1,36 @@ +// Copyright (c) 2026 Charles KWON OhJun (charleskwonohjun@gmail.com) +// All rights reserved. + +//go:build darwin || linux + +// DBF file mmap for POSIX — syscall.Mmap + PROT_READ + MAP_SHARED. +// Keeps the OS page cache between us and disk so sequential scans are +// cheap and multiple readers share pages naturally. + +package dbf + +import "syscall" + +// mmapDBF maps the DBF file for zero-copy reads. Called after open. +// On mmap failure we just leave a.mmapData == nil and the read path +// falls back to dataFile.ReadAt — no hard error. +func (a *DBFArea) mmapDBF() { + fi, err := a.dataFile.Stat() + if err != nil || fi.Size() < int64(a.header.HeaderLen) { + return + } + data, err := syscall.Mmap(int(a.dataFile.Fd()), 0, int(fi.Size()), + syscall.PROT_READ, syscall.MAP_SHARED) + if err != nil { + return + } + a.mmapData = data +} + +// unmapDBF releases the mmap. +func (a *DBFArea) unmapDBF() { + if a.mmapData != nil { + syscall.Munmap(a.mmapData) + a.mmapData = nil + } +} diff --git a/hbrdd/dbf/mmap_windows.go b/hbrdd/dbf/mmap_windows.go new file mode 100644 index 0000000..4a60d21 --- /dev/null +++ b/hbrdd/dbf/mmap_windows.go @@ -0,0 +1,92 @@ +// Copyright (c) 2026 Charles KWON OhJun (charleskwonohjun@gmail.com) +// All rights reserved. + +//go:build windows + +// Windows mmap for DBF record data — CreateFileMappingW + MapViewOfFile +// with PAGE_READONLY. Parallels the POSIX syscall.Mmap path: on mmap +// failure we leave a.mmapData == nil so reads fall back to ReadAt. +// +// Mapping handles are tracked in a package-local registry keyed by +// view address so unmapDBF can recover the HANDLE given only the +// []byte we stored on the Area. Matches the hbrdd/ntx and hbrdd/cdx +// implementations byte-for-byte to stay maintainable. + +package dbf + +import ( + "fmt" + "sync" + "syscall" + "unsafe" +) + +const ( + pageReadonly = 0x02 + fileMapRead = 0x0004 +) + +var ( + kernel32 = syscall.NewLazyDLL("kernel32.dll") + procCreateFileMappingW = kernel32.NewProc("CreateFileMappingW") + procMapViewOfFile = kernel32.NewProc("MapViewOfFile") + procUnmapViewOfFile = kernel32.NewProc("UnmapViewOfFile") + procCloseHandle = kernel32.NewProc("CloseHandle") + + mappingMu sync.Mutex + mappings = map[uintptr]syscall.Handle{} +) + +func (a *DBFArea) mmapDBF() { + fi, err := a.dataFile.Stat() + if err != nil || fi.Size() < int64(a.header.HeaderLen) { + return + } + size := int(fi.Size()) + if size <= 0 { + return + } + hFile := syscall.Handle(a.dataFile.Fd()) + sizeHigh := uint32(uint64(size) >> 32) + sizeLow := uint32(uint64(size) & 0xFFFFFFFF) + hMap, _, _ := procCreateFileMappingW.Call( + uintptr(hFile), 0, pageReadonly, + uintptr(sizeHigh), uintptr(sizeLow), 0, + ) + if hMap == 0 { + return + } + addr, _, _ := procMapViewOfFile.Call(hMap, fileMapRead, 0, 0, uintptr(size)) + if addr == 0 { + procCloseHandle.Call(hMap) + return + } + a.mmapData = unsafe.Slice((*byte)(unsafe.Pointer(addr)), size) + + mappingMu.Lock() + mappings[addr] = syscall.Handle(hMap) + mappingMu.Unlock() +} + +func (a *DBFArea) unmapDBF() { + if a.mmapData == nil { + return + } + addr := uintptr(unsafe.Pointer(&a.mmapData[0])) + mappingMu.Lock() + hMap, ok := mappings[addr] + delete(mappings, addr) + mappingMu.Unlock() + + r, _, _ := procUnmapViewOfFile.Call(addr) + if r == 0 { + // Best-effort — log and continue. Unmap failure usually + // indicates a corrupted handle table, recoverable only via + // process exit. + _ = fmt.Sprint("UnmapViewOfFile failed") + } + if ok { + procCloseHandle.Call(uintptr(hMap)) + } + a.mmapData = nil +} diff --git a/hbrdd/dbf/null.go b/hbrdd/dbf/null.go new file mode 100644 index 0000000..0643d63 --- /dev/null +++ b/hbrdd/dbf/null.go @@ -0,0 +1,126 @@ +// Copyright (c) 2026 Charles KWON OhJun (charleskwonohjun@gmail.com) +// All rights reserved. + +// SQL NULL support via Harbour/VFP-style _NullFlags bitmap column. +// +// When a table is created with at least one nullable field the DBF engine +// appends a hidden system field named "_NullFlags" of type '0' (Harbour +// VFP convention). The field's length is ceil(nNullable/8) bytes; each +// nullable user field owns one bit. A set bit means "this field holds +// SQL NULL" — readers return NIL instead of the raw value, writers +// clear the bit on a non-NIL write. +// +// The _NullFlags descriptor carries FieldFlagSystem so the base area's +// FieldCount / GetFieldInfo never expose it, keeping existing SQL / +// SELECT * / PRG scan-column code paths blind to the hidden field. +// +// Reference: /mnt/d/harbour-core/src/rdd/dbf1.c — hb_dbfGetNullFlag, +// hb_dbfSetNullFlag. Harbour also uses this column to track VARCHAR +// length bits; Five only implements nullability for now. +package dbf + +// buildNullIndex populates nullFieldsIdx (descriptor index of +// _NullFlags, -1 if none), nullBitOf (user-field descriptor index → +// bit number within _NullFlags), and publicFieldCount. Call after +// fieldDescs has been populated. +func (a *DBFArea) buildNullIndex() { + a.nullFieldsIdx = -1 + a.nullBitOf = nil + for i := range a.fieldDescs { + if a.fieldDescs[i].GetName() == NullFlagsFieldName { + a.nullFieldsIdx = i + break + } + } + if a.nullFieldsIdx < 0 { + return + } + a.nullBitOf = make(map[int]int, 4) + bit := 0 + for i := range a.fieldDescs { + if i == a.nullFieldsIdx { + continue + } + if a.fieldDescs[i].Flags&FieldFlagNullable != 0 { + a.nullBitOf[i] = bit + bit++ + } + } +} + +// isFieldNull reports whether the given descriptor index currently +// holds SQL NULL. Only meaningful for fields marked nullable. +func (a *DBFArea) isFieldNull(fieldIdx int) bool { + if a.nullFieldsIdx < 0 || a.nullBitOf == nil { + return false + } + bit, ok := a.nullBitOf[fieldIdx] + if !ok { + return false + } + off := a.offsets[a.nullFieldsIdx] + byteIdx := bit / 8 + bitIdx := bit % 8 + if int(off)+byteIdx >= len(a.recBuf) { + return false + } + return a.recBuf[int(off)+byteIdx]&(1<= len(a.recBuf) { + return + } + mask := byte(1) << uint(bitIdx) + if isNull { + a.recBuf[int(off)+byteIdx] |= mask + } else { + a.recBuf[int(off)+byteIdx] &^= mask + } +} + +// countNullableFields returns the number of user fields (non-system) +// marked nullable — used at CREATE time to size the _NullFlags column. +func countNullableFields(fields []FieldDesc) int { + n := 0 + for i := range fields { + if fields[i].Flags&FieldFlagSystem != 0 { + continue + } + if fields[i].Flags&FieldFlagNullable != 0 { + n++ + } + } + return n +} + +// appendNullFlagsField returns fields with an appended _NullFlags +// system field sized to hold one bit per nullable user field. If no +// fields are nullable the input is returned unchanged. +func appendNullFlagsField(fields []FieldDesc) []FieldDesc { + n := countNullableFields(fields) + if n == 0 { + return fields + } + nBytes := (n + 7) / 8 + var fd FieldDesc + fd.SetName(NullFlagsFieldName) + fd.Type = '0' // Harbour VFP convention for _NullFlags + fd.Len = byte(nBytes) + fd.Dec = 0 + fd.Flags = FieldFlagSystem | FieldFlagBinary + return append(fields, fd) +} diff --git a/hbrdd/dbf/null_test.go b/hbrdd/dbf/null_test.go new file mode 100644 index 0000000..8f59efd --- /dev/null +++ b/hbrdd/dbf/null_test.go @@ -0,0 +1,178 @@ +// Copyright (c) 2026 Charles KWON OhJun (charleskwonohjun@gmail.com) +// All rights reserved. + +package dbf + +import ( + "five/hbrdd" + "five/hbrt" + "path/filepath" + "testing" +) + +// TestNullFlagsCreateAndRead exercises the _NullFlags bitmap through +// create → append → write NIL → read-back → reopen. +func TestNullFlagsCreateAndRead(t *testing.T) { + dir := tempDir(t) + path := filepath.Join(dir, "null.dbf") + + drv := &DBFDriver{} + area, err := drv.Create(hbrdd.CreateParams{ + Path: path, + Fields: []hbrdd.FieldInfo{ + {Name: "ID", Type: 'N', Len: 4, Dec: 0}, + {Name: "NAME", Type: 'C', Len: 20, Flags: FieldFlagNullable}, + {Name: "AGE", Type: 'N', Len: 5, Dec: 0, Flags: FieldFlagNullable}, + }, + }) + if err != nil { + t.Fatal(err) + } + dbfArea := area.(*DBFArea) + + // Public FieldCount must hide _NullFlags — user-visible stays 3. + if dbfArea.FieldCount() != 3 { + t.Fatalf("public FieldCount = %d, want 3 (hidden _NullFlags leaking)", dbfArea.FieldCount()) + } + // Internal descriptor count includes _NullFlags. + if len(dbfArea.fieldDescs) != 4 { + t.Fatalf("fieldDescs len = %d, want 4", len(dbfArea.fieldDescs)) + } + if dbfArea.nullFieldsIdx != 3 { + t.Fatalf("nullFieldsIdx = %d, want 3", dbfArea.nullFieldsIdx) + } + // NAME is field index 1, AGE is index 2. Bit assignment goes in + // descriptor order among nullable columns → NAME=bit0, AGE=bit1. + if bit, ok := dbfArea.nullBitOf[1]; !ok || bit != 0 { + t.Fatalf("NAME bit = %d ok=%v, want bit 0", bit, ok) + } + if bit, ok := dbfArea.nullBitOf[2]; !ok || bit != 1 { + t.Fatalf("AGE bit = %d ok=%v, want bit 1", bit, ok) + } + + // Append three records: one fully populated, one with NIL name, + // one with both name and age NIL. The non-null row round-trips + // normally; the null rows must read back NIL. + if err := dbfArea.Append(); err != nil { + t.Fatal(err) + } + dbfArea.PutValue(0, hbrt.MakeInt(1)) + dbfArea.PutValue(1, hbrt.MakeString("alice")) + dbfArea.PutValue(2, hbrt.MakeInt(30)) + + if err := dbfArea.Append(); err != nil { + t.Fatal(err) + } + dbfArea.PutValue(0, hbrt.MakeInt(2)) + dbfArea.PutValue(1, hbrt.MakeNil()) // NAME null + dbfArea.PutValue(2, hbrt.MakeInt(40)) + + if err := dbfArea.Append(); err != nil { + t.Fatal(err) + } + dbfArea.PutValue(0, hbrt.MakeInt(3)) + dbfArea.PutValue(1, hbrt.MakeNil()) // NAME null + dbfArea.PutValue(2, hbrt.MakeNil()) // AGE null + dbfArea.Flush() + dbfArea.Close() + + // Re-open and verify null bits survive round-trip on disk. + area2, err := drv.Open(hbrdd.OpenParams{Path: path}) + if err != nil { + t.Fatal(err) + } + defer area2.Close() + d2 := area2.(*DBFArea) + + if d2.nullFieldsIdx != 3 { + t.Fatalf("after reopen nullFieldsIdx = %d, want 3", d2.nullFieldsIdx) + } + + // Record 1: all values present. + d2.GoTo(1) + if v, _ := d2.GetValue(1); v.IsNil() { + t.Errorf("rec1 NAME unexpectedly NIL") + } + if v, _ := d2.GetValue(2); v.IsNil() { + t.Errorf("rec1 AGE unexpectedly NIL") + } + + // Record 2: NAME null, AGE present. + d2.GoTo(2) + if v, _ := d2.GetValue(1); !v.IsNil() { + t.Errorf("rec2 NAME = %v, want NIL", v) + } + if v, _ := d2.GetValue(2); v.IsNil() || v.AsNumInt() != 40 { + t.Errorf("rec2 AGE = %v, want 40", v) + } + + // Record 3: both null. + d2.GoTo(3) + if v, _ := d2.GetValue(1); !v.IsNil() { + t.Errorf("rec3 NAME = %v, want NIL", v) + } + if v, _ := d2.GetValue(2); !v.IsNil() { + t.Errorf("rec3 AGE = %v, want NIL", v) + } +} + +// TestNullFlagsNoNullableFields ensures tables without any nullable +// columns get no _NullFlags column — keeping byte-identical layout to +// pre-nullable Five / upstream Harbour DBFs. +func TestNullFlagsNoNullableFields(t *testing.T) { + dir := tempDir(t) + path := filepath.Join(dir, "nonull.dbf") + + drv := &DBFDriver{} + area, err := drv.Create(hbrdd.CreateParams{ + Path: path, + Fields: []hbrdd.FieldInfo{ + {Name: "ID", Type: 'N', Len: 4}, + {Name: "NAME", Type: 'C', Len: 20}, + }, + }) + if err != nil { + t.Fatal(err) + } + d := area.(*DBFArea) + if len(d.fieldDescs) != 2 { + t.Fatalf("fieldDescs len = %d, want 2 (no _NullFlags should be added)", len(d.fieldDescs)) + } + if d.nullFieldsIdx != -1 { + t.Fatalf("nullFieldsIdx = %d, want -1", d.nullFieldsIdx) + } + d.Close() +} + +// TestNullFlagsClearsOnOverwrite verifies that writing a non-NIL +// value to a previously-NULL field clears the bit and the raw bytes +// become observable on read. +func TestNullFlagsClearsOnOverwrite(t *testing.T) { + dir := tempDir(t) + path := filepath.Join(dir, "overwrite.dbf") + + drv := &DBFDriver{} + area, _ := drv.Create(hbrdd.CreateParams{ + Path: path, + Fields: []hbrdd.FieldInfo{ + {Name: "V", Type: 'N', Len: 5, Dec: 0, Flags: FieldFlagNullable}, + }, + }) + d := area.(*DBFArea) + d.Append() + d.PutValue(0, hbrt.MakeNil()) + if v, _ := d.GetValue(0); !v.IsNil() { + t.Fatalf("after NIL put, GetValue = %v, want NIL", v) + } + // Overwrite with numeric — bit must clear. + d.PutValue(0, hbrt.MakeInt(42)) + if v, _ := d.GetValue(0); v.IsNil() || v.AsNumInt() != 42 { + t.Fatalf("after int put, GetValue = %v, want 42", v) + } + // And NIL again — bit must reset. + d.PutValue(0, hbrt.MakeNil()) + if v, _ := d.GetValue(0); !v.IsNil() { + t.Fatalf("after second NIL put, GetValue = %v, want NIL", v) + } + d.Close() +} diff --git a/hbrdd/driver.go b/hbrdd/driver.go index fd9868f..9fe4a6d 100644 --- a/hbrdd/driver.go +++ b/hbrdd/driver.go @@ -175,6 +175,16 @@ type OrderCreateParams struct { // Contract: caller must position the workarea (GoTo) before calling. // Returns the key value for the current record. KeyFunc func() hbrt.Value + + // ForFunc is the compiled counterpart of KeyFunc for the optional + // FOR expression. When non-nil the indexer calls it instead of + // parsing ForExpr as a string and running it through the macro + // evaluator — eliminates strings.Index/ToUpper/splitArgs per record + // in filtered-index builds and rebuilds. Returns true when the + // current record should be included. + // + // Contract: caller must position the workarea (GoTo) before calling. + ForFunc func() bool } // OrderInfo holds information about an index order. diff --git a/hbrdd/mem/memrdd.go b/hbrdd/mem/memrdd.go index a343e0a..a8d916d 100644 --- a/hbrdd/mem/memrdd.go +++ b/hbrdd/mem/memrdd.go @@ -24,6 +24,7 @@ import ( "sort" "strings" "sync" + "sync/atomic" ) // --- Driver --- @@ -103,14 +104,43 @@ func normalizeName(s string) string { // --- Table (shared data) --- type memTable struct { - mu sync.RWMutex + // mu serializes WRITERS only (Append/Delete/Recall/PutValue/Pack). + // Readers use records() — a lock-free atomic load of the current + // snapshot. Matches Harbour SHARED semantics: readers see a + // point-in-time view of the record slice; in-place field mutations + // are last-writer-wins (callers that need row consistency take an + // explicit RLock via the runtime's record-lock RTL). + mu sync.Mutex + // recordsP holds the current []memRecord snapshot. Stored as + // *[]memRecord to work with atomic.Pointer's typed API. Writers + // publish new slices via setRecords() after mutation; readers Load + // once per scan entry point. + recordsP atomic.Pointer[[]memRecord] + name string fields []hbrdd.FieldInfo - records []memRecord // all records - indexes []*memIndex // active indexes + indexes []*memIndex // active indexes openCount int } +// records returns the current record snapshot. Caller can iterate +// without holding any lock — the slice is immutable from the reader's +// perspective (mutations happen via COW + atomic swap for structural +// changes; in-place field writes are racy-but-tolerated per Harbour +// SHARED semantics). +func (tbl *memTable) records() []memRecord { + p := tbl.recordsP.Load() + if p == nil { + return nil + } + return *p +} + +// setRecords publishes a new snapshot. Caller must hold tbl.mu. +func (tbl *memTable) setRecords(r []memRecord) { + tbl.recordsP.Store(&r) +} + type memRecord struct { data []hbrt.Value // field values (0-based) deleted bool @@ -159,7 +189,7 @@ func newMemArea(tbl *memTable, alias string, drv *MemDriver) *memArea { eof: true, curIndex: -1, } - if len(tbl.records) > 0 { + if len(tbl.records()) > 0 { a.recNo = 1 a.eof = false } @@ -213,9 +243,7 @@ func (a *memArea) ClearFilter() error { func (a *memArea) HasFilter() bool { return a.filterBlock != nil } func (a *memArea) GoTo(recNo uint32) error { - a.tbl.mu.RLock() - count := uint32(len(a.tbl.records)) - a.tbl.mu.RUnlock() + count := uint32(len(a.tbl.records())) a.bof = false a.found = false @@ -230,9 +258,7 @@ func (a *memArea) GoTo(recNo uint32) error { } func (a *memArea) GoTop() error { - a.tbl.mu.RLock() - count := uint32(len(a.tbl.records)) - a.tbl.mu.RUnlock() + count := uint32(len(a.tbl.records())) a.bof = false a.found = false @@ -261,9 +287,7 @@ func (a *memArea) GoTop() error { } func (a *memArea) GoBottom() error { - a.tbl.mu.RLock() - count := uint32(len(a.tbl.records)) - a.tbl.mu.RUnlock() + count := uint32(len(a.tbl.records())) a.bof = false a.found = false @@ -296,9 +320,7 @@ func (a *memArea) Skip(count int64) error { return a.skipIndexed(count) } - a.tbl.mu.RLock() - total := uint32(len(a.tbl.records)) - a.tbl.mu.RUnlock() + total := uint32(len(a.tbl.records())) a.found = false @@ -335,7 +357,7 @@ func (a *memArea) skipIndexed(count int64) error { newPos := a.indexPos + int(count) if newPos >= len(idx.entries) { a.indexPos = len(idx.entries) - a.recNo = uint32(len(a.tbl.records)) + 1 + a.recNo = uint32(len(a.tbl.records())) + 1 a.eof = true } else { a.indexPos = newPos @@ -365,19 +387,16 @@ func (a *memArea) skipIndexed(count int64) error { func (a *memArea) RecNo() uint32 { return a.recNo } func (a *memArea) RecCount() (uint32, error) { - a.tbl.mu.RLock() - defer a.tbl.mu.RUnlock() - return uint32(len(a.tbl.records)), nil + return uint32(len(a.tbl.records())), nil } func (a *memArea) Deleted() bool { - a.tbl.mu.RLock() - defer a.tbl.mu.RUnlock() + recs := a.tbl.records() i := int(a.recNo) - 1 - if i < 0 || i >= len(a.tbl.records) { + if i < 0 || i >= len(recs) { return false } - return a.tbl.records[i].deleted + return recs[i].deleted } // --- Field access --- @@ -392,14 +411,16 @@ func (a *memArea) GetFieldInfo(index int) hbrdd.FieldInfo { } func (a *memArea) GetValue(fieldIndex int) (hbrt.Value, error) { - a.tbl.mu.RLock() - defer a.tbl.mu.RUnlock() - + // Hot path — lock-free read. The atomic load gives us a + // point-in-time snapshot; a concurrent PutValue mutating the same + // rec.data[fieldIndex] in place is tolerated (last-writer-wins, + // matches Harbour SHARED semantics). + recs := a.tbl.records() i := int(a.recNo) - 1 - if i < 0 || i >= len(a.tbl.records) { + if i < 0 || i >= len(recs) { return hbrt.MakeNil(), nil // phantom record } - rec := a.tbl.records[i] + rec := recs[i] if fieldIndex < 0 || fieldIndex >= len(rec.data) { return hbrt.MakeNil(), fmt.Errorf("field index %d out of range", fieldIndex) } @@ -410,14 +431,20 @@ func (a *memArea) PutValue(fieldIndex int, val hbrt.Value) error { a.tbl.mu.Lock() defer a.tbl.mu.Unlock() + recs := a.tbl.records() i := int(a.recNo) - 1 - if i < 0 || i >= len(a.tbl.records) { + if i < 0 || i >= len(recs) { return fmt.Errorf("no current record") } - if fieldIndex < 0 || fieldIndex >= len(a.tbl.records[i].data) { + if fieldIndex < 0 || fieldIndex >= len(recs[i].data) { return fmt.Errorf("field index %d out of range", fieldIndex) } - a.tbl.records[i].data[fieldIndex] = val + // In-place field write. Writers are serialized by mu; concurrent + // readers may observe the old or new value (no torn read since + // hbrt.Value fits in a single machine word + pointer, and Go + // guarantees pointer-sized stores are atomic). Matches Harbour + // SHARED: callers needing isolation take an explicit record lock. + recs[i].data[fieldIndex] = val return nil } @@ -445,8 +472,14 @@ func (a *memArea) Append() error { rec.data[j] = hbrt.MakeNil() } } - a.tbl.records = append(a.tbl.records, rec) - a.recNo = uint32(len(a.tbl.records)) + // Append: publish a grown slice via atomic swap. When the backing + // has capacity, Go's append reuses it — safe here because prior + // readers hold snapshots whose len() bounds are fixed, so they + // never read past their known length into the new slot. + recs := a.tbl.records() + recs = append(recs, rec) + a.tbl.setRecords(recs) + a.recNo = uint32(len(recs)) a.eof = false a.bof = false return nil @@ -455,9 +488,10 @@ func (a *memArea) Append() error { func (a *memArea) Delete() error { a.tbl.mu.Lock() defer a.tbl.mu.Unlock() + recs := a.tbl.records() i := int(a.recNo) - 1 - if i >= 0 && i < len(a.tbl.records) { - a.tbl.records[i].deleted = true + if i >= 0 && i < len(recs) { + recs[i].deleted = true } return nil } @@ -465,9 +499,10 @@ func (a *memArea) Delete() error { func (a *memArea) Recall() error { a.tbl.mu.Lock() defer a.tbl.mu.Unlock() + recs := a.tbl.records() i := int(a.recNo) - 1 - if i >= 0 && i < len(a.tbl.records) { - a.tbl.records[i].deleted = false + if i >= 0 && i < len(recs) { + recs[i].deleted = false } return nil } @@ -475,13 +510,16 @@ func (a *memArea) Recall() error { func (a *memArea) Pack() error { a.tbl.mu.Lock() defer a.tbl.mu.Unlock() - var kept []memRecord - for _, r := range a.tbl.records { + // Pack builds a fresh slice and swaps — old snapshot still + // iterable by any in-flight readers until they finish. + old := a.tbl.records() + kept := make([]memRecord, 0, len(old)) + for _, r := range old { if !r.deleted { kept = append(kept, r) } } - a.tbl.records = kept + a.tbl.setRecords(kept) a.recNo = 1 if len(kept) == 0 { a.eof = true @@ -492,7 +530,7 @@ func (a *memArea) Pack() error { func (a *memArea) Zap() error { a.tbl.mu.Lock() defer a.tbl.mu.Unlock() - a.tbl.records = nil + a.tbl.setRecords(nil) a.tbl.indexes = nil a.recNo = 1 a.eof = true @@ -512,7 +550,7 @@ func (a *memArea) CreateIndex(tag string, fieldIndex int, desc bool) { } // Build entries - for i, rec := range a.tbl.records { + for i, rec := range a.tbl.records() { if rec.deleted { continue } @@ -595,7 +633,7 @@ func (a *memArea) Seek(key hbrt.Value, soft bool) bool { // Not found a.found = false a.eof = true - a.recNo = uint32(len(a.tbl.records)) + 1 + a.recNo = uint32(len(a.tbl.records())) + 1 return false } diff --git a/hbrdd/ntx/mmap_windows.go b/hbrdd/ntx/mmap_windows.go index 3c665bf..fe14e79 100644 --- a/hbrdd/ntx/mmap_windows.go +++ b/hbrdd/ntx/mmap_windows.go @@ -1,16 +1,86 @@ //go:build windows +// Windows mmap — CreateFileMappingW + MapViewOfFile. Read-only view +// matches the POSIX PROT_READ|MAP_SHARED semantics the rest of the RDD +// code expects. The mapping HANDLE must stay alive alongside the view, +// so we stash it in a package-local registry keyed by the slice data +// pointer; Unmap looks it up and closes the handle after unmapping +// the view. + package ntx import ( - "errors" + "fmt" "os" + "sync" + "syscall" + "unsafe" +) + +const ( + pageReadonly = 0x02 + fileMapRead = 0x0004 +) + +var ( + kernel32 = syscall.NewLazyDLL("kernel32.dll") + procCreateFileMappingW = kernel32.NewProc("CreateFileMappingW") + procMapViewOfFile = kernel32.NewProc("MapViewOfFile") + procUnmapViewOfFile = kernel32.NewProc("UnmapViewOfFile") + procCloseHandle = kernel32.NewProc("CloseHandle") + + // handle registry — keyed by view pointer (uintptr of slice's + // first byte). Lets munmapFile recover the mapping handle given + // only the []byte the caller held. + mappingMu sync.Mutex + mappings = map[uintptr]syscall.Handle{} ) func mmapFile(f *os.File, size int) ([]byte, error) { - return nil, errors.New("mmap not supported on Windows") + if size <= 0 { + return nil, fmt.Errorf("mmap: non-positive size %d", size) + } + hFile := syscall.Handle(f.Fd()) + sizeHigh := uint32(uint64(size) >> 32) + sizeLow := uint32(uint64(size) & 0xFFFFFFFF) + hMap, _, err := procCreateFileMappingW.Call( + uintptr(hFile), 0, pageReadonly, + uintptr(sizeHigh), uintptr(sizeLow), 0, + ) + if hMap == 0 { + return nil, fmt.Errorf("CreateFileMapping: %v", err) + } + addr, _, err := procMapViewOfFile.Call( + hMap, fileMapRead, 0, 0, uintptr(size), + ) + if addr == 0 { + procCloseHandle.Call(hMap) + return nil, fmt.Errorf("MapViewOfFile: %v", err) + } + data := unsafe.Slice((*byte)(unsafe.Pointer(addr)), size) + + mappingMu.Lock() + mappings[addr] = syscall.Handle(hMap) + mappingMu.Unlock() + return data, nil } func munmapFile(data []byte) error { + if len(data) == 0 { + return nil + } + addr := uintptr(unsafe.Pointer(&data[0])) + mappingMu.Lock() + hMap, ok := mappings[addr] + delete(mappings, addr) + mappingMu.Unlock() + + r, _, err := procUnmapViewOfFile.Call(addr) + if r == 0 { + return fmt.Errorf("UnmapViewOfFile: %v", err) + } + if ok { + procCloseHandle.Call(uintptr(hMap)) + } return nil } diff --git a/hbrdd/ntx/ntx.go b/hbrdd/ntx/ntx.go index 1b4412c..33bab03 100644 --- a/hbrdd/ntx/ntx.go +++ b/hbrdd/ntx/ntx.go @@ -288,7 +288,8 @@ func (idx *Index) remapFile() { idx.mmapFile() } -func (idx *Index) KeyLen() int { return idx.keyLen } +func (idx *Index) KeyLen() int { return idx.keyLen } +func (idx *Index) KeyExpr() string { return idx.header.GetKeyExpr() } func (idx *Index) TestGetMmap() []byte { return idx.mmapData } func (idx *Index) Close() error { diff --git a/hbrdd/workarea.go b/hbrdd/workarea.go index 3c17ab7..5e13ee6 100644 --- a/hbrdd/workarea.go +++ b/hbrdd/workarea.go @@ -203,6 +203,25 @@ func parseAreaNum(s string) uint16 { return uint16(n) } +// EnumerateAreas invokes fn once per open workarea with (nWA, alias, area). +// Snapshot of the slot→area map is taken first so fn can safely manipulate +// workareas without mutating the loop. Used by the diagnostic ErrorLog +// writer to dump every open table's state. +func (wm *WorkAreaManager) EnumerateAreas(fn func(nWA uint16, alias string, area Area)) { + type slot struct { + num uint16 + alias string + area Area + } + snapshot := make([]slot, 0, len(wm.areas)) + for num, area := range wm.areas { + snapshot = append(snapshot, slot{num, area.Alias(), area}) + } + for _, s := range snapshot { + fn(s.num, s.alias, s.area) + } +} + // CloseAll closes all open work areas. func (wm *WorkAreaManager) CloseAll() { for num, area := range wm.areas { diff --git a/hbrt/class.go b/hbrt/class.go index fdcdd09..5990589 100644 --- a/hbrt/class.go +++ b/hbrt/class.go @@ -100,6 +100,20 @@ func RegisterClass(cls *ClassDef) uint16 { return cls.ID } +// ListClassNames returns all registered class names, sorted by registration +// order (1-based class IDs). Used by the diagnostic ErrorLog writer. +func ListClassNames() []string { + classRegMu.Lock() + defer classRegMu.Unlock() + out := make([]string, 0, len(classList)) + for _, c := range classList { + if c != nil { + out = append(out, c.Name) + } + } + return out +} + // FindClass looks up a class by name. func FindClass(name string) *ClassDef { classRegMu.Lock() diff --git a/hbrt/debug.go b/hbrt/debug.go index b229c97..e7c1065 100644 --- a/hbrt/debug.go +++ b/hbrt/debug.go @@ -16,6 +16,7 @@ package hbrt import ( "fmt" + "os" "strings" "sync" ) @@ -36,6 +37,12 @@ type Breakpoint struct { Function string // optional function name filter Enabled bool HitCount int + // Condition is an optional PRG expression. The breakpoint only + // stops when the expression evaluates truthy at hit-time. Empty + // string = unconditional. Evaluation runs through the same macro + // hook the `p` command uses, so LOCALs/fields/function calls all + // work. + Condition string } // DebugVarInfo describes a variable visible in the current scope. @@ -80,6 +87,7 @@ type Debugger struct { StepLevel int // call stack depth for step-over ToCursorMod string // target module for run-to-cursor ToCursorLine int // target line for run-to-cursor + Watches []string // PRG expressions auto-evaluated at each stop // Debug info tables (populated by generated code) LineInfo map[string]map[int]bool // module → set of valid lines @@ -164,14 +172,47 @@ func (d *Debugger) IsValidLine(module string, line int) bool { // --- Thread debug integration --- -// DebugLine is called by generated code at each PRG source line. -// This is the main debug hook — gengo emits t.DebugLine("file.prg", 42) +// DebugLineFast records the current PRG source position on the active +// frame — nothing more. gengo emits a call to this at every statement +// in non-debug builds so that error.log / panic traces still carry a +// line number, without paying for a full DebugLine dispatch (VM lookup +// + debugger flag check) per statement. The body is small enough that +// Go inlines it across the call boundary; in practice this compiles to +// a nil check + two word-sized stores. +// +// Keep the symbol name stable — gengo emits it by string. +func (t *Thread) DebugLineFast(module string, line int) { + if t.curFrame != nil { + t.curFrame.module = module + t.curFrame.line = line + } +} + +// DebugLine is the full debugger hook — line recording + trace ring + +// breakpoint/step dispatch. gengo only emits this when compiled with +// debug info (five debug ...), so the expensive path is off by default. func (t *Thread) DebugLine(module string, line int) { + // Always record on the current frame so panic sites know where we were. + if t.curFrame != nil { + t.curFrame.module = module + t.curFrame.line = line + } + vm := t.VM() if vm.Debugger == nil || !vm.Debugger.Enabled { return } + // Record in this thread's execution trace ring so "hist" can show + // the path taken to reach the break. Only under an attached + // debugger — keeps production runs allocation-free. + if t.traceRing == nil { + t.traceRing = make([]TraceEntry, TraceRingSize) + } + t.traceRing[t.traceHead] = TraceEntry{Module: module, Line: line} + t.traceHead = (t.traceHead + 1) % TraceRingSize + t.traceCount++ + dbg := vm.Debugger dbg.mu.Lock() mode := dbg.Mode @@ -222,6 +263,16 @@ func (t *Thread) DebugLine(module string, line int) { } } + // Conditional breakpoint: evaluate the expression with the current + // frame visible. If it's not truthy, pretend we didn't hit anything. + if hitBP != nil && hitBP.Condition != "" { + if !evalBPCondition(t, hitBP.Condition) { + shouldStop = false + hitBP = nil + reason = "" + } + } + if !shouldStop { return } @@ -267,17 +318,22 @@ func (t *Thread) DebugCallStack() []DebugStackFrame { } stack = append(stack, DebugStackFrame{ Function: name, + Module: frame.module, + Line: frame.line, Level: i, }) } return stack } -// DebugLocals returns local variables for the current frame. +// DebugLocals returns local variables for the current frame. If the +// emitter registered PRG-level names via SetLocalNames, those are used; +// otherwise falls back to "_1" / "_2" / ... placeholders. func (t *Thread) DebugLocals() []DebugVarInfo { if t.curFrame == nil { return nil } + names := t.curFrame.localNames var vars []DebugVarInfo for i := 0; i < t.curFrame.localCount; i++ { idx := t.curFrame.localBase + i @@ -286,8 +342,15 @@ func (t *Thread) DebugLocals() []DebugVarInfo { if i < t.curFrame.paramCount { scope = "PARAM" } + name := "" + if i < len(names) { + name = names[i] + } + if name == "" { + name = fmt.Sprintf("_%d", i+1) + } vars = append(vars, DebugVarInfo{ - Name: fmt.Sprintf("_%d", i+1), // placeholder name + Name: name, Value: t.locals[idx], Scope: scope, Index: i + 1, @@ -297,6 +360,112 @@ func (t *Thread) DebugLocals() []DebugVarInfo { return vars } +// EvalWithFrameLocals evaluates a PRG expression string with the +// current call frame's LOCALs visible as PRIVATEs. Used by both the +// debugger's `p` command and conditional-breakpoint checks. Returns +// the resulting Value plus an error string (empty on success). Any +// panic during eval is captured so a malformed expression can't crash +// the debug loop. +func (t *Thread) EvalWithFrameLocals(expr string) (result Value, evalErr string) { + defer func() { + if pv := recover(); pv != nil { + evalErr = fmt.Sprintf("%v", pv) + } + }() + + if macroEvalHook == nil { + return MakeNil(), "macro hook not installed" + } + + // Install LOCALs as PRIVATEs for the eval scope so bare-name + // references in the expression resolve to the current frame. + if t.Memvars != nil && t.curFrame != nil { + names := t.curFrame.localNames + t.Memvars.BeginPrivateScope(t.callSP) + defer t.Memvars.EndPrivateScope() + for i := 0; i < t.curFrame.localCount; i++ { + if i >= len(names) || names[i] == "" { + continue + } + idx := t.curFrame.localBase + i + if idx < len(t.locals) { + t.Memvars.SetPrivate(names[i], t.locals[idx], t.callSP) + } + } + } + + t.PushString(expr) + macroEvalHook(t) + result = t.Pop2() + return +} + +// evalBPCondition returns true when a breakpoint's Condition string +// evaluates truthy in the current frame. Non-logical results are +// coerced: NIL/.F./0/"" → false, everything else → true. Eval errors +// are reported to stderr and treated as "not hit" so a broken condition +// silently skips the stop instead of crashing. +func evalBPCondition(t *Thread, expr string) bool { + val, evalErr := t.EvalWithFrameLocals(expr) + if evalErr != "" { + fmt.Fprintf(os.Stderr, "debug: breakpoint condition %q failed: %s\n", expr, evalErr) + return false + } + switch { + case val.IsNil(): + return false + case val.IsLogical(): + return val.AsBool() + case val.IsNumeric(): + return val.AsNumInt() != 0 || val.AsNumDouble() != 0 + case val.IsString(): + return len(val.AsString()) > 0 + } + return true +} + +// Trace returns the last up-to-TraceRingSize PRG (module, line) pairs +// this thread executed while under the debugger, in chronological +// order (oldest first). Also returns the total count since the +// debugger attached — callers can compute "how many lines ago" as +// totalCount - index. +func (t *Thread) Trace() ([]TraceEntry, uint64) { + if t.traceRing == nil || t.traceCount == 0 { + return nil, 0 + } + n := len(t.traceRing) + have := int(t.traceCount) + if have > n { + have = n + } + out := make([]TraceEntry, have) + // Start index in the ring depends on whether we've wrapped. + var start int + if int(t.traceCount) <= n { + start = 0 + } else { + start = t.traceHead + } + for i := 0; i < have; i++ { + out[i] = t.traceRing[(start+i)%n] + } + return out, t.traceCount +} + +// SetLocalNames attaches the PRG-source variable names for params+locals +// to the current call frame. gengo emits a call to this right after +// Frame() so the debugger/error.log can show real names ("i", "nSum") +// instead of slot numbers. +// +// The names slice is expected to be function-lifetime immutable (gengo +// emits a package-level [...]string), so we store the pointer, not a +// copy. +func (t *Thread) SetLocalNames(names []string) { + if t.curFrame != nil { + t.curFrame.localNames = names + } +} + // currentFuncName returns the name of the currently executing function. func (t *Thread) currentFuncName() string { if t.callSP > 0 { diff --git a/hbrt/debugcli.go b/hbrt/debugcli.go index eb6e925..b4d1a09 100644 --- a/hbrt/debugcli.go +++ b/hbrt/debugcli.go @@ -1,5 +1,3 @@ -//go:build !windows - // Copyright (c) 2026 Charles KWON OhJun (charleskwonohjun@gmail.com) // All rights reserved. @@ -26,40 +24,11 @@ import ( "bufio" "fmt" "os" - "strconv" "strings" - "syscall" - "unsafe" ) -// Terminal mode helpers — restore cooked mode for debugger, re-enter raw for program -var savedTermios syscall.Termios -var termSaved bool - -func restoreCooked() { - fd := int(os.Stdin.Fd()) - var t syscall.Termios - syscall.Syscall6(syscall.SYS_IOCTL, uintptr(fd), 0x5401, uintptr(unsafe.Pointer(&t)), 0, 0, 0) - if !termSaved { - savedTermios = t - termSaved = true - } - // Set cooked mode - t.Lflag |= syscall.ICANON | syscall.ECHO - t.Oflag |= syscall.OPOST - syscall.Syscall6(syscall.SYS_IOCTL, uintptr(fd), 0x5402, uintptr(unsafe.Pointer(&t)), 0, 0, 0) -} - -func reenterRaw() { - fd := int(os.Stdin.Fd()) - var t syscall.Termios - syscall.Syscall6(syscall.SYS_IOCTL, uintptr(fd), 0x5401, uintptr(unsafe.Pointer(&t)), 0, 0, 0) - t.Lflag &^= syscall.ICANON | syscall.ECHO | syscall.ISIG - t.Oflag &^= syscall.OPOST - t.Cc[syscall.VMIN] = 1 - t.Cc[syscall.VTIME] = 0 - syscall.Syscall6(syscall.SYS_IOCTL, uintptr(fd), 0x5402, uintptr(unsafe.Pointer(&t)), 0, 0, 0) -} +// Terminal raw/cooked mode helpers live in termios_.go — their ioctl +// numbers differ per platform (Linux TCGETS vs macOS TIOCGETA). // CLIDebugger creates a DebugCallback for interactive terminal debugging. func CLIDebugger() DebugCallback { @@ -80,6 +49,26 @@ func CLIDebugger() DebugCallback { // Show source line if available showSourceLine(event.Module, event.Line) + // Auto-eval watches. Each watch expression is shown alongside + // its current value so the user doesn't have to re-type them + // after every step. Failed watches show the eval error inline + // instead of the value. + dbgForWatch := event.Thread.VM().Debugger + if dbgForWatch != nil && len(dbgForWatch.Watches) > 0 { + fmt.Println(" -- watches --") + for i, expr := range dbgForWatch.Watches { + v, evalErr := event.Thread.EvalWithFrameLocals(expr) + if evalErr != "" { + fmt.Printf(" [%d] %s ! %s\n", i, expr, evalErr) + } else { + fmt.Printf(" [%d] %s = %s\n", i, expr, describeDbgValue(v)) + } + } + } + + // CLI prompt loop — delegates each input line to runDebugCmd. + // runDebugCmd returns a Dbg* mode to resume execution, or + // cmdNoMode (-1) when the command just printed output. for { fmt.Printf("(dbg) ") line, err := reader.ReadString('\n') @@ -88,141 +77,14 @@ func CLIDebugger() DebugCallback { } line = strings.TrimSpace(line) if line == "" { - line = lastCmd // repeat last command + line = lastCmd } else { lastCmd = line } - parts := strings.Fields(line) - cmd := parts[0] - - switch cmd { - case "s", "step": - return DbgStepLine - - case "n", "next": - return DbgStepOver - - case "o", "out": - return DbgStepOut - - case "c", "cont", "continue": - return DbgContinue - - case "b", "break": - if len(parts) >= 2 { - lineNo, err := strconv.Atoi(parts[1]) - if err == nil { - mod := event.Module - if len(parts) >= 3 { - mod = parts[2] - } - dbg := event.Thread.VM().Debugger - idx := dbg.AddBreakpoint(mod, lineNo) - fmt.Printf(" Breakpoint %d at %s:%d\n", idx, mod, lineNo) - } else { - fmt.Println(" Usage: b [module]") - } - } else { - fmt.Println(" Usage: b [module]") - } - - case "d", "del", "delete": - if len(parts) >= 2 { - idx, err := strconv.Atoi(parts[1]) - if err == nil { - event.Thread.VM().Debugger.RemoveBreakpoint(idx) - fmt.Printf(" Breakpoint %d removed\n", idx) - } - } else { - fmt.Println(" Usage: d ") - } - - case "bl", "breakpoints": - dbg := event.Thread.VM().Debugger - if len(dbg.Breakpoints) == 0 { - fmt.Println(" No breakpoints") - } else { - for i, bp := range dbg.Breakpoints { - status := "ON " - if !bp.Enabled { - status = "OFF" - } - fmt.Printf(" %d: [%s] %s:%d (hits: %d)\n", i, status, bp.Module, bp.Line, bp.HitCount) - } - } - - case "l", "locals": - if len(event.Locals) == 0 { - fmt.Println(" No local variables") - } else { - for _, v := range event.Locals { - fmt.Printf(" %s [%s] %s = %s\n", v.Scope, fmt.Sprintf("%d", v.Index), v.Name, v.Value.String()) - } - } - - case "p", "print": - if len(parts) >= 2 { - varName := parts[1] - found := false - for _, v := range event.Locals { - if strings.EqualFold(v.Name, varName) || fmt.Sprintf("_%d", v.Index) == varName { - fmt.Printf(" %s = %s\n", v.Name, v.Value.String()) - found = true - break - } - } - if !found { - // Try by index - idx, err := strconv.Atoi(varName) - if err == nil && idx >= 1 && idx <= len(event.Locals) { - v := event.Locals[idx-1] - fmt.Printf(" %s = %s\n", v.Name, v.Value.String()) - } else { - fmt.Printf(" Variable '%s' not found\n", varName) - } - } - } else { - fmt.Println(" Usage: p ") - } - - case "bt", "backtrace", "stack": - if len(event.CallStack) == 0 { - fmt.Println(" Empty call stack") - } else { - for i, frame := range event.CallStack { - marker := " " - if i == 0 { - marker = "=>" - } - if frame.Module != "" { - fmt.Printf(" %s #%d %s() at %s:%d\n", marker, frame.Level, frame.Function, frame.Module, frame.Line) - } else { - fmt.Printf(" %s #%d %s()\n", marker, frame.Level, frame.Function) - } - } - } - - case "q", "quit": - fmt.Println(" Debugger quit.") - os.Exit(0) - - case "h", "help", "?": - fmt.Println(" Five Debugger Commands:") - fmt.Println(" s, step — step to next line") - fmt.Println(" n, next — step over function calls") - fmt.Println(" o, out — step out of current function") - fmt.Println(" c, cont — continue (run to next breakpoint)") - fmt.Println(" b — set breakpoint at line") - fmt.Println(" d — delete breakpoint n") - fmt.Println(" bl — list all breakpoints") - fmt.Println(" l — show local variables") - fmt.Println(" p — print variable value") - fmt.Println(" bt — show call stack") - fmt.Println(" q — quit") - - default: - fmt.Printf(" Unknown command: %s (type 'h' for help)\n", cmd) + mode := runDebugCmd(event, line, func(s string) { fmt.Println(s) }) + if mode != cmdNoMode { + return mode } } } @@ -253,3 +115,6 @@ func showSourceLine(module string, line int) { } } } + +// Shared helpers (parseBreakArgs, describeDbgValue, runDebugCmd) now +// live in debugcmd.go so the TUI can reuse them. diff --git a/hbrt/debugcli_windows.go b/hbrt/debugcli_windows.go deleted file mode 100644 index 12481f5..0000000 --- a/hbrt/debugcli_windows.go +++ /dev/null @@ -1,13 +0,0 @@ -//go:build windows - -package hbrt - -import "fmt" - -// CLIDebugger returns a no-op debug callback on Windows. -func CLIDebugger() DebugCallback { - return func(event *DebugEvent) int { - fmt.Println("[debugger not available on Windows]") - return 0 // continue - } -} diff --git a/hbrt/debugcmd.go b/hbrt/debugcmd.go new file mode 100644 index 0000000..5743de2 --- /dev/null +++ b/hbrt/debugcmd.go @@ -0,0 +1,387 @@ +// Copyright (c) 2026 Charles KWON OhJun (charleskwonohjun@gmail.com) +// All rights reserved. + +// Shared debugger command dispatch. Both the CLI (gdb-style prompt) +// and the TUI (full-screen F-keys + `:` prompt) funnel parsed input +// strings through runDebugCmd so the surface area stays in one place. + +package hbrt + +import ( + "fmt" + "os" + "strconv" + "strings" +) + +// cmdNoMode is the sentinel returned by runDebugCmd when a command +// printed output and the debug loop should keep prompting — i.e. no +// mode transition is requested. +const cmdNoMode = -1 + +// runDebugCmd interprets one line of debugger input against the given +// event. Returns a DbgContinue / DbgStepLine / ... constant when the +// command resumes execution, or cmdNoMode to stay in the prompt loop. +// Prints results/errors using the supplied `out` writer so the TUI can +// buffer them for its status area while the CLI writes to stdout. +func runDebugCmd(event *DebugEvent, line string, out func(string)) int { + line = strings.TrimSpace(line) + if line == "" { + return cmdNoMode + } + if out == nil { + out = func(s string) { fmt.Println(s) } + } + + parts := strings.Fields(line) + cmd := parts[0] + dbg := event.Thread.VM().Debugger + + switch cmd { + case "s", "step": + return DbgStepLine + case "n", "next": + return DbgStepOver + case "o", "out": + return DbgStepOut + case "c", "cont", "continue": + return DbgContinue + + case "b", "break": + mod, lineNo, cond, ok := parseBreakArgs(line, parts, event.Module) + if !ok { + out(" Usage: b [module] [if ]") + return cmdNoMode + } + idx := dbg.AddBreakpoint(mod, lineNo) + if cond != "" { + dbg.Breakpoints[idx].Condition = cond + out(fmt.Sprintf(" Breakpoint %d at %s:%d if %s", idx, mod, lineNo, cond)) + } else { + out(fmt.Sprintf(" Breakpoint %d at %s:%d", idx, mod, lineNo)) + } + + case "u", "until": + if len(parts) >= 2 { + if lineNo, err := strconv.Atoi(parts[1]); err == nil && lineNo > 0 { + dbg.ToCursorMod = event.Module + dbg.ToCursorLine = lineNo + if len(parts) >= 3 { + dbg.ToCursorMod = parts[2] + } + return DbgToCursor + } + } + out(" Usage: u [module]") + + case "w", "watch": + if len(parts) < 2 { + if len(dbg.Watches) == 0 { + out(" No watches. Usage: w ") + } else { + for i, e := range dbg.Watches { + out(fmt.Sprintf(" [%d] %s", i, e)) + } + } + return cmdNoMode + } + expr := strings.TrimSpace(line[len(parts[0]):]) + dbg.Watches = append(dbg.Watches, expr) + out(fmt.Sprintf(" Watch %d: %s", len(dbg.Watches)-1, expr)) + + case "wd", "unwatch": + if len(parts) >= 2 { + if idx, err := strconv.Atoi(parts[1]); err == nil { + if idx >= 0 && idx < len(dbg.Watches) { + dbg.Watches = append(dbg.Watches[:idx], dbg.Watches[idx+1:]...) + out(fmt.Sprintf(" Watch %d removed", idx)) + return cmdNoMode + } + } + } + out(" Usage: wd ") + + case "d", "del", "delete": + if len(parts) >= 2 { + if idx, err := strconv.Atoi(parts[1]); err == nil { + dbg.RemoveBreakpoint(idx) + out(fmt.Sprintf(" Breakpoint %d removed", idx)) + return cmdNoMode + } + } + out(" Usage: d ") + + case "bl", "breakpoints": + if len(dbg.Breakpoints) == 0 { + out(" No breakpoints") + } else { + for i, bp := range dbg.Breakpoints { + status := "ON " + if !bp.Enabled { + status = "OFF" + } + cond := "" + if bp.Condition != "" { + cond = " if " + bp.Condition + } + out(fmt.Sprintf(" %d: [%s] %s:%d%s (hits: %d)", + i, status, bp.Module, bp.Line, cond, bp.HitCount)) + } + } + + case "l", "locals": + if len(event.Locals) == 0 { + out(" No local variables") + } else { + for _, v := range event.Locals { + out(fmt.Sprintf(" %s [%d] %s = %s", + v.Scope, v.Index, v.Name, describeDbgValue(v.Value))) + } + } + + case "p", "print": + if len(parts) < 2 { + out(" Usage: p ") + return cmdNoMode + } + expr := strings.TrimSpace(line[len(parts[0]):]) + v, evalErr := event.Thread.EvalWithFrameLocals(expr) + if evalErr != "" { + out(fmt.Sprintf(" eval failed: %s", evalErr)) + } else { + out(fmt.Sprintf(" %s = %s", expr, describeDbgValue(v))) + } + + case "diag", "d!": + // Full error.log-style dump at the break point — workareas, + // SET flags, runtime memory. Same renderer our DefaultErrorHook + // uses, so what you see here is what you'd get if the program + // had crashed instead of stopped. + if DebugDiagnosticHook == nil { + out(" (diagnostics unavailable — hook not installed)") + } else { + DebugDiagnosticHook(event.Thread, "", func(s string) { + for _, ln := range strings.Split(s, "\n") { + if ln != "" { + out(ln) + } + } + }) + } + + case "wa", "workareas": + if DebugDiagnosticHook == nil { + out(" (workarea info unavailable)") + } else { + DebugDiagnosticHook(event.Thread, "wa", func(s string) { + for _, ln := range strings.Split(s, "\n") { + if ln != "" { + out(ln) + } + } + }) + } + + case "set": + if DebugDiagnosticHook == nil { + out(" (SET state unavailable)") + } else { + DebugDiagnosticHook(event.Thread, "set", func(s string) { + for _, ln := range strings.Split(s, "\n") { + if ln != "" { + out(ln) + } + } + }) + } + + case "mem": + if DebugDiagnosticHook == nil { + out(" (memory stats unavailable)") + } else { + DebugDiagnosticHook(event.Thread, "mem", func(s string) { + for _, ln := range strings.Split(s, "\n") { + if ln != "" { + out(ln) + } + } + }) + } + + case "hist", "trace": + // Execution trace — last N PRG lines the current thread stepped + // through. Most useful for "how did control reach here?" when + // you break inside a deeply-nested helper and want to see the + // LOOP/IF/Call chain that led to it. Deduplicates adjacent + // repeats (tight FOR loops compress to "line X ×27") so the + // output stays readable in hot code. + trace, total := event.Thread.Trace() + if len(trace) == 0 { + out(" (no trace data — debugger just attached?)") + return cmdNoMode + } + // How many to show — default 50, or "hist N" + limit := 50 + if len(parts) >= 2 { + if n, err := strconv.Atoi(parts[1]); err == nil && n > 0 { + limit = n + } + } + if limit > len(trace) { + limit = len(trace) + } + view := trace[len(trace)-limit:] + // Collapse adjacent duplicates. + type run struct { + e TraceEntry + count int + } + var runs []run + for _, e := range view { + if n := len(runs); n > 0 && runs[n-1].e == e { + runs[n-1].count++ + continue + } + runs = append(runs, run{e, 1}) + } + out(fmt.Sprintf(" trace (last %d of %d) — newest last:", len(view), total)) + for _, r := range runs { + suffix := "" + if r.count > 1 { + suffix = fmt.Sprintf(" ×%d", r.count) + } + out(fmt.Sprintf(" %s:%d%s", r.e.Module, r.e.Line, suffix)) + } + + case "threads", "ts": + // Lists every live Thread managed by the VM with its current + // PRG source position. Useful for diagnosing multi-thread PRG + // programs (hb_Thread*, GoLaunch) where the debugger is + // currently attached to one thread but others may be blocked, + // looping, or crashed. Position is read from each thread's + // current frame — may show an older line for threads that + // haven't executed a DebugLine recently. + threads := event.Thread.VM().Threads() + if len(threads) == 0 { + out(" (no threads tracked)") + return cmdNoMode + } + for _, th := range threads { + marker := " " + if th == event.Thread { + marker = "=>" + } + mod, line := "", 0 + if f := th.CurFrame(); f != nil { + mod = f.module + line = f.line + } + name := "MAIN" + if f := th.CurFrame(); f != nil && f.symbol != nil { + name = f.symbol.Name + } + if mod == "" { + out(fmt.Sprintf(" %s [%d] %s", marker, th.TID(), name)) + } else { + out(fmt.Sprintf(" %s [%d] %s at %s:%d", + marker, th.TID(), name, mod, line)) + } + } + + case "bt", "backtrace", "stack": + if len(event.CallStack) == 0 { + out(" Empty call stack") + } else { + for i, frame := range event.CallStack { + marker := " " + if i == 0 { + marker = "=>" + } + if frame.Module != "" { + out(fmt.Sprintf(" %s #%d %s() at %s:%d", + marker, frame.Level, frame.Function, frame.Module, frame.Line)) + } else { + out(fmt.Sprintf(" %s #%d %s()", marker, frame.Level, frame.Function)) + } + } + } + + case "q", "quit": + out(" Debugger quit.") + os.Exit(0) + + case "h", "help", "?": + for _, ln := range debugCmdHelp { + out(ln) + } + + default: + out(fmt.Sprintf(" Unknown command: %s (type 'h' for help)", cmd)) + } + return cmdNoMode +} + +var debugCmdHelp = []string{ + " Five Debugger Commands:", + " s, step — step to next line", + " n, next — step over function calls", + " o, out — step out of current function", + " c, cont — continue (run to next breakpoint)", + " u — run until in current module", + " b [if E] — set breakpoint, optional condition", + " d — delete breakpoint n", + " bl — list all breakpoints", + " w — add watch expression", + " wd — remove watch n", + " w — list watches", + " l — show local variables", + " p — evaluate and print expression", + " bt — show call stack", + " wa — list open workareas + active index", + " set — SET state (DELETED, DATEFORMAT, ...)", + " mem — runtime memory / GC stats", + " diag — full diag dump (wa + set + mem)", + " threads, ts — list all live threads", + " hist, trace [N] — last N lines executed (how did we get here?)", + " q — quit", +} + +// describeDbgValue is now shared across platforms. It lives in +// debugcmd.go because debugcli.go is !windows-only and runDebugCmd +// needs this regardless of platform. +func describeDbgValue(v Value) string { + switch { + case v.IsNil(): + return "NIL" + case v.IsString(): + return fmt.Sprintf("%q", v.AsString()) + } + return v.String() +} + +// parseBreakArgs accepts: +// +// b +// b +// b if +// b if +func parseBreakArgs(rawLine string, parts []string, defaultMod string) (module string, line int, cond string, ok bool) { + if len(parts) < 2 { + return "", 0, "", false + } + lineNo, err := strconv.Atoi(parts[1]) + if err != nil || lineNo <= 0 { + return "", 0, "", false + } + module = defaultMod + lowered := strings.ToLower(rawLine) + if idx := strings.Index(lowered, " if "); idx > 0 { + cond = strings.TrimSpace(rawLine[idx+4:]) + rawLine = rawLine[:idx] + parts = strings.Fields(rawLine) + } + if len(parts) >= 3 { + module = parts[2] + } + return module, lineNo, cond, true +} diff --git a/hbrt/debugkey.go b/hbrt/debugkey.go new file mode 100644 index 0000000..b94efb2 --- /dev/null +++ b/hbrt/debugkey.go @@ -0,0 +1,60 @@ +// Copyright (c) 2026 Charles KWON OhJun (charleskwonohjun@gmail.com) +// All rights reserved. + +// Cross-platform ANSI key-sequence decoder. Each termios_.go +// collects bytes from the terminal in raw mode, then hands them here +// for classification. The decoder output (ASCII or 0xE0-0xFC pseudo- +// code) is the same on every OS so debugtui.go can stay platform- +// neutral. + +package hbrt + +// decodeDebugKey translates a raw byte buffer captured in TTY/console +// raw mode into a single logical key. Returns 0 when nothing was read. +// Pseudo-codes 0xE0-0xE3 cover arrow keys; 0xF5-0xFC cover F5-F12. +func decodeDebugKey(buf []byte, n int) int { + if n == 0 { + return 0 + } + if buf[0] != 0x1B { + return int(buf[0]) + } + if n == 1 { + return 0x1B // bare ESC + } + if n >= 3 && buf[1] == '[' { + // Arrow keys: ESC [ A/B/C/D + switch buf[2] { + case 'A': + return 0xE0 // Up + case 'B': + return 0xE1 // Down + case 'C': + return 0xE2 // Right + case 'D': + return 0xE3 // Left + } + // F5-F12: ESC [ 1 5 ~ through ESC [ 2 4 ~ + if n >= 4 && buf[n-1] == '~' { + switch string(buf[2 : n-1]) { + case "15": + return 0xF5 + case "17": + return 0xF6 + case "18": + return 0xF7 + case "19": + return 0xF8 + case "20": + return 0xF9 + case "21": + return 0xFA + case "23": + return 0xFB + case "24": + return 0xFC + } + } + } + return 0 // ignore unknown ESC sequences — never quit on them +} diff --git a/hbrt/debugtui.go b/hbrt/debugtui.go index efd384d..bb06f1a 100644 --- a/hbrt/debugtui.go +++ b/hbrt/debugtui.go @@ -1,10 +1,10 @@ -//go:build !windows - // Copyright (c) 2026 Charles KWON OhJun (charleskwonohjun@gmail.com) // All rights reserved. // Full-screen TUI debugger for Five — Harbour/Clipper debugger style. // Uses ANSI escape codes for terminal rendering. +// termSize() and readDebugKey() live in termios_.go because their +// low-level mechanics (ioctls vs Windows console API) don't share code. package hbrt @@ -12,8 +12,6 @@ import ( "fmt" "os" "strings" - "syscall" - "unsafe" ) // TUIDebugger creates a full-screen terminal debugger callback. @@ -77,7 +75,7 @@ func TUIDebugger() DebugCallback { // === Command Bar === fmt.Printf("\033[7m%-*s\033[0m", cols, - " F5:Go F7:Into F8:Step F9:Break F10:Over F11:Out L:Locals ESC:Quit") + " F5:Go F7:Into F8:Step F9:Break F10:Over F11:Out U:Until :Cmd P:Print W:Watch D:Diag ESC:Quit") // Wait for key key := readDebugKey() @@ -121,6 +119,38 @@ func TUIDebugger() DebugCallback { case 'c', 'C': // Continue return DbgContinue + case ':': // Command prompt — any full debugger command + if mode, ok := runTUIPrompt(event, rows, cols, ""); ok { + return mode + } + continue + + case 'p', 'P': // Quick print prompt — pre-fills "p " + if mode, ok := runTUIPrompt(event, rows, cols, "p "); ok { + return mode + } + continue + + case 'w': // Add watch — pre-fills "w " + if mode, ok := runTUIPrompt(event, rows, cols, "w "); ok { + return mode + } + continue + + case 'u', 'U': // Run until line — pre-fills "u " + if mode, ok := runTUIPrompt(event, rows, cols, "u "); ok { + return mode + } + continue + + case 'W': // Shift-W — clear watches + event.Thread.VM().Debugger.Watches = nil + continue + + case 'D', 'd': // D — diagnostics pop-up (workareas + SET + runtime) + showDiagPopup(event, rows, cols) + continue + case 0xE0, 0xE1, 0xE2, 0xE3: // Arrow keys — ignore continue @@ -131,6 +161,47 @@ func TUIDebugger() DebugCallback { } } +// runTUIPrompt pops up a bottom-line `:` prompt, reads a command, feeds +// it to runDebugCmd. Returns (mode, true) if the command resumed +// execution (c/s/n/o/u), or (_, false) to stay in the TUI loop. +// +// Captured output is shown on the line above the prompt for a moment +// so the user can see the result before the redraw. +func runTUIPrompt(event *DebugEvent, rows, cols int, prefill string) (int, bool) { + // Move to the command-bar row, clear it, enter cooked mode for input. + fmt.Printf("\033[%d;1H\033[2K", rows) + restoreCooked() + defer reenterRaw() + + fmt.Printf(":%s", prefill) + var buf [1024]byte + reader := os.Stdin + // Use a fresh read — bufio would complicate the one-shot prompt. + n, _ := reader.Read(buf[:]) + line := strings.TrimRight(prefill+string(buf[:n]), "\r\n") + + // Capture output so we can show it on the status line. + var outLines []string + mode := runDebugCmd(event, line, func(s string) { outLines = append(outLines, s) }) + + if len(outLines) > 0 { + // Print up to 3 output lines above the prompt row — enough for + // watch listings / breakpoint-set confirmations without + // obscuring the source view above. + show := outLines + if len(show) > 3 { + show = show[len(show)-3:] + } + for i, ln := range show { + fmt.Printf("\033[%d;1H\033[2K%s", rows-1-len(show)+i+1, ln) + } + // Give the user a beat to read, but they can skip with any key. + readDebugKey() + } + + return mode, mode != cmdNoMode +} + func drawSourceWindow(lines []string, curLine, height, width int, dbg *Debugger) { // Calculate visible range centered on current line start := curLine - height/2 @@ -185,52 +256,180 @@ func drawSourceWindow(lines []string, curLine, height, width int, dbg *Debugger) fmt.Printf("\033[36m\u2514%s\u2518\033[0m\r\n", strings.Repeat("\u2500", width-2)) } -func drawPanels(event *DebugEvent, height, localW, stackW int) { +func drawPanels(event *DebugEvent, height, localW, rightW int) { + // The right column splits vertically into Stack (top) and Watches + // (bottom). Split 50/50 but give watches at least 3 rows once any + // exist, else hand the whole column to stack. + dbg := event.Thread.VM().Debugger + nWatches := 0 + if dbg != nil { + nWatches = len(dbg.Watches) + } + contentRows := height - 2 // minus header+footer borders on each side + stackRows := contentRows + watchRows := 0 + if nWatches > 0 { + watchRows = contentRows / 2 + if watchRows < 3 { + watchRows = 3 + } + if watchRows > contentRows-2 { + watchRows = contentRows - 2 + } + stackRows = contentRows - watchRows - 1 // -1 for the divider + } + + // Pre-render rendered watch lines (outside the row loop so we don't + // re-evaluate expressions per row). + watchLines := make([]string, 0, nWatches) + if nWatches > 0 { + for i, expr := range dbg.Watches { + v, evalErr := event.Thread.EvalWithFrameLocals(expr) + var rendered string + if evalErr != "" { + rendered = fmt.Sprintf(" [%d] %s ! %s", i, expr, truncFit(evalErr, 20)) + } else { + rendered = fmt.Sprintf(" [%d] %s = %s", i, expr, describeDbgValue(v)) + } + watchLines = append(watchLines, rendered) + } + } + // Headers localHeader := fmt.Sprintf("\u250C\u2500 Locals %s\u2510", strings.Repeat("\u2500", localW-11)) - stackHeader := fmt.Sprintf("\u250C\u2500 Stack %s\u2510", strings.Repeat("\u2500", stackW-10)) + stackHeader := fmt.Sprintf("\u250C\u2500 Stack %s\u2510", strings.Repeat("\u2500", rightW-10)) fmt.Printf("\033[36m%s%s\033[0m\r\n", localHeader, stackHeader) - // Content rows - for i := 0; i < height-2; i++ { - // Left: locals + // Body rows — left is always locals, right alternates Stack then + // (optional) Watches, separated by a mid-panel header. + for i := 0; i < contentRows; i++ { localLine := "" if i < len(event.Locals) { v := event.Locals[i] - val := v.Value.String() - if len(val) > localW-8 { - val = val[:localW-11] + "..." - } - localLine = fmt.Sprintf(" %s = %s", v.Name, val) - } - if len(localLine) > localW-2 { - localLine = localLine[:localW-2] + val := describeDbgValue(v.Value) + localLine = truncFit(fmt.Sprintf(" %s = %s", v.Name, val), localW-2) } - // Right: call stack - stackLine := "" - if i < len(event.CallStack) { - f := event.CallStack[i] - if f.Module != "" { - stackLine = fmt.Sprintf(" %s() %s:%d", f.Function, f.Module, f.Line) - } else { - stackLine = fmt.Sprintf(" %s()", f.Function) + rightLine := "" + switch { + case i < stackRows: + if i < len(event.CallStack) { + f := event.CallStack[i] + if f.Module != "" { + rightLine = fmt.Sprintf(" %s() %s:%d", f.Function, f.Module, f.Line) + } else { + rightLine = fmt.Sprintf(" %s()", f.Function) + } + } + case i == stackRows && watchRows > 0: + // Mid-panel divider/header for the Watches sub-section. + rightLine = "─ Watches " + strings.Repeat("─", rightW-13) + default: + widx := i - stackRows - 1 + if widx >= 0 && widx < len(watchLines) { + rightLine = watchLines[widx] } } - if len(stackLine) > stackW-2 { - stackLine = stackLine[:stackW-2] - } + rightLine = truncFit(rightLine, rightW-2) - fmt.Printf("\033[36m\u2502\033[0m%-*s\033[36m\u2502\033[0m%-*s\033[36m\u2502\033[0m\r\n", - localW-2, localLine, stackW-2, stackLine) + fmt.Printf("\033[36m\u2502\033[0m%s\033[36m\u2502\033[0m%s\033[36m\u2502\033[0m\r\n", + padRunes(localLine, localW-2), padRunes(rightLine, rightW-2)) } // Bottom borders localFooter := fmt.Sprintf("\u2514%s\u2518", strings.Repeat("\u2500", localW-2)) - stackFooter := fmt.Sprintf("\u2514%s\u2518", strings.Repeat("\u2500", stackW-2)) + stackFooter := fmt.Sprintf("\u2514%s\u2518", strings.Repeat("\u2500", rightW-2)) fmt.Printf("\033[36m%s%s\033[0m\r\n", localFooter, stackFooter) } +// showDiagPopup renders the full diagnostic dump (workareas, SET flags, +// runtime) in a scrollable bottom panel. Press any key to dismiss. +// Reuses DebugDiagnosticHook so output matches error.log exactly. +func showDiagPopup(event *DebugEvent, rows, cols int) { + if DebugDiagnosticHook == nil { + return + } + var lines []string + DebugDiagnosticHook(event.Thread, "", func(s string) { + for _, ln := range strings.Split(s, "\n") { + if ln != "" { + lines = append(lines, ln) + } + } + }) + + // Clear and redraw as a full-screen scrolling view. Simplest: clear + // screen, print lines, show a "-- press any key --" footer. + start := 0 + for { + fmt.Print("\033[2J\033[H") + fmt.Printf("\033[7m%s\033[0m\r\n", padRunes(" Diagnostics (q/ESC to close, space/pgdn next, pgup prev) ", cols)) + + viewH := rows - 2 + end := start + viewH + if end > len(lines) { + end = len(lines) + } + for i := start; i < end; i++ { + fmt.Printf("%s\r\n", truncFit(lines[i], cols)) + } + // Pad remaining rows + for i := end - start; i < viewH; i++ { + fmt.Print("\r\n") + } + fmt.Printf("\033[7m%s\033[0m", padRunes( + fmt.Sprintf(" line %d-%d of %d ", start+1, end, len(lines)), cols)) + + key := readDebugKey() + switch key { + case 0x1B, 'q', 'Q': + return + case ' ', 10, 13, 0xE1: // space / enter / down-arrow + start += viewH + if start >= len(lines) { + start = len(lines) - 1 + } + case 'b', 'B', 0xE0: // back page / up-arrow + start -= viewH + if start < 0 { + start = 0 + } + default: + return + } + } +} + +// padRunes right-pads s with spaces so its display width is `width` +// runes. Used instead of Printf's "%-*s" which counts bytes, mangling +// UTF-8 box-drawing / CJK. Assumes each rune is one display column — +// not strictly true for CJK "wide" chars, but good enough for the +// Latin/box-drawing mix this debugger prints. +func padRunes(s string, width int) string { + runes := []rune(s) + if len(runes) >= width { + return string(runes[:width]) + } + return s + strings.Repeat(" ", width-len(runes)) +} + +// truncFit clamps s to at most width runes, appending "…" on truncation. +// Rune-aware so it doesn't cut UTF-8 box-drawing / CJK characters in +// the middle of a multi-byte sequence (which would render as mojibake). +func truncFit(s string, width int) string { + if width <= 0 { + return "" + } + runes := []rune(s) + if len(runes) <= width { + return s + } + if width <= 1 { + return string(runes[:width]) + } + return string(runes[:width-1]) + "…" +} + func loadSource(cache map[string][]string, filename string, sourceDir string) []string { if lines, ok := cache[filename]; ok { return lines @@ -259,78 +458,6 @@ func loadSource(cache map[string][]string, filename string, sourceDir string) [] return lines } -func termSize() (int, int) { - type winsize struct { - Row, Col, Xpixel, Ypixel uint16 - } - var ws winsize - _, _, _ = syscall.Syscall(syscall.SYS_IOCTL, uintptr(1), - uintptr(syscall.TIOCGWINSZ), uintptr(unsafe.Pointer(&ws))) - return int(ws.Row), int(ws.Col) -} - -// readDebugKey reads a key in raw mode for the debugger. -// Returns ASCII for normal keys, 0xF5-0xFB for F5-F11. -func readDebugKey() int { - // Temporarily set raw mode for key reading - fd := int(os.Stdin.Fd()) - var t syscall.Termios - syscall.Syscall6(syscall.SYS_IOCTL, uintptr(fd), 0x5401, uintptr(unsafe.Pointer(&t)), 0, 0, 0) - raw := t - raw.Lflag &^= syscall.ICANON | syscall.ECHO - raw.Cc[syscall.VMIN] = 1 - raw.Cc[syscall.VTIME] = 0 - syscall.Syscall6(syscall.SYS_IOCTL, uintptr(fd), 0x5402, uintptr(unsafe.Pointer(&raw)), 0, 0, 0) - defer syscall.Syscall6(syscall.SYS_IOCTL, uintptr(fd), 0x5402, uintptr(unsafe.Pointer(&t)), 0, 0, 0) - - buf := make([]byte, 8) - n, _ := syscall.Read(fd, buf) - if n == 0 { - return 0 - } - - // ESC sequence - if buf[0] == 0x1B { - if n == 1 { - return 0x1B // bare ESC - } - if n >= 3 && buf[1] == '[' { - // Arrow keys: ESC [ A/B/C/D - switch buf[2] { - case 'A': - return 0xE0 // Up - case 'B': - return 0xE1 // Down - case 'C': - return 0xE2 // Right - case 'D': - return 0xE3 // Left - } - // F5-F11: ESC [ 1 5 ~ through ESC [ 2 4 ~ - if n >= 4 && buf[n-1] == '~' { - code := string(buf[2 : n-1]) - switch code { - case "15": - return 0xF5 // F5 - case "17": - return 0xF6 // F6 - case "18": - return 0xF7 // F7 - case "19": - return 0xF8 // F8 - case "20": - return 0xF9 // F9 - case "21": - return 0xFA // F10 - case "23": - return 0xFB // F11 - case "24": - return 0xFC // F12 - } - } - } - return 0 // ignore unknown ESC sequences (don't quit) - } - - return int(buf[0]) -} +// termSize() and readDebugKey() moved to termios_.go — the Unix +// implementations use ioctl TIOCGWINSZ + raw-mode termios, while the +// Windows version uses console APIs. diff --git a/hbrt/debugtui_windows.go b/hbrt/debugtui_windows.go deleted file mode 100644 index 07cadbf..0000000 --- a/hbrt/debugtui_windows.go +++ /dev/null @@ -1,13 +0,0 @@ -//go:build windows - -package hbrt - -import "fmt" - -// TUIDebugger returns a no-op debug callback on Windows. -func TUIDebugger() DebugCallback { - return func(event *DebugEvent) int { - fmt.Println("[TUI debugger not available on Windows]") - return 0 // continue - } -} diff --git a/hbrt/pcinterp.go b/hbrt/pcinterp.go index 975f3a5..4fb719b 100644 --- a/hbrt/pcinterp.go +++ b/hbrt/pcinterp.go @@ -78,6 +78,21 @@ func execPcodeBody(t *Thread, fn *PcodeFunc, mod *PcodeModule) { idx := int(binary.LittleEndian.Uint16(code[pc:])) pc += 2 t.PushLocal(idx) + case PcOpPushMemvar: + slen := int(binary.LittleEndian.Uint16(code[pc:])) + pc += 2 + name := string(code[pc : pc+slen]) + pc += slen + // Resolve through Memvars (PRIVATE shadows PUBLIC). + // Unknown names push NIL — matches Harbour behavior for + // undeclared memvars inside `&(expr)`. + if t.Memvars != nil { + if v, ok := t.Memvars.Get(name); ok { + t.push(v) + continue + } + } + t.PushNil() case PcOpPopLocal: idx := int(binary.LittleEndian.Uint16(code[pc:])) pc += 2 diff --git a/hbrt/pcode.go b/hbrt/pcode.go index e17219a..8e7d63f 100644 --- a/hbrt/pcode.go +++ b/hbrt/pcode.go @@ -104,6 +104,13 @@ const ( PcOpPopLogical byte = 0x70 // pop and store logical result PcOpPushBool byte = 0x71 // + 1 byte (0 or 1) + // Memvar lookup — runtime resolution of an unresolved identifier. + // Used by the macro evaluator and the debugger's expression evaluator: + // at compile time we don't know which LOCAL frame an identifier + // refers to, so we emit this op with the name and resolve at runtime + // via t.Memvars (PRIVATE/PUBLIC). Pushes NIL if the name isn't set. + PcOpPushMemvar byte = 0x72 // + uint16 len + name + // Line info (for debugging) PcOpLine byte = 0xFE // + uint16 lineNo PcOpHalt byte = 0xFF diff --git a/hbrt/termios_darwin.go b/hbrt/termios_darwin.go new file mode 100644 index 0000000..bc7c4bd --- /dev/null +++ b/hbrt/termios_darwin.go @@ -0,0 +1,79 @@ +// Copyright (c) 2026 Charles KWON OhJun (charleskwonohjun@gmail.com) +// All rights reserved. + +//go:build darwin + +// Termios ioctl helpers for the debugger — macOS uses TIOCGETA/TIOCSETA +// (not Linux's TCGETS/TCSETS — the numbers are different). + +package hbrt + +import ( + "os" + "syscall" + "unsafe" +) + +const ( + ioctlGetTermios = syscall.TIOCGETA + ioctlSetTermios = syscall.TIOCSETA +) + +var ( + savedTermios syscall.Termios + termSaved bool +) + +func restoreCooked() { + fd := int(os.Stdin.Fd()) + var t syscall.Termios + syscall.Syscall6(syscall.SYS_IOCTL, uintptr(fd), ioctlGetTermios, uintptr(unsafe.Pointer(&t)), 0, 0, 0) + if !termSaved { + savedTermios = t + termSaved = true + } + t.Lflag |= syscall.ICANON | syscall.ECHO + t.Oflag |= syscall.OPOST + syscall.Syscall6(syscall.SYS_IOCTL, uintptr(fd), ioctlSetTermios, uintptr(unsafe.Pointer(&t)), 0, 0, 0) +} + +func reenterRaw() { + fd := int(os.Stdin.Fd()) + var t syscall.Termios + syscall.Syscall6(syscall.SYS_IOCTL, uintptr(fd), ioctlGetTermios, uintptr(unsafe.Pointer(&t)), 0, 0, 0) + t.Lflag &^= syscall.ICANON | syscall.ECHO | syscall.ISIG + t.Oflag &^= syscall.OPOST + t.Cc[syscall.VMIN] = 1 + t.Cc[syscall.VTIME] = 0 + syscall.Syscall6(syscall.SYS_IOCTL, uintptr(fd), ioctlSetTermios, uintptr(unsafe.Pointer(&t)), 0, 0, 0) +} + +func termSize() (int, int) { + type winsize struct { + Row, Col, Xpixel, Ypixel uint16 + } + var ws winsize + _, _, _ = syscall.Syscall(syscall.SYS_IOCTL, uintptr(1), + uintptr(syscall.TIOCGWINSZ), uintptr(unsafe.Pointer(&ws))) + return int(ws.Row), int(ws.Col) +} + +// readDebugKey puts stdin into raw mode just long enough to consume +// one keystroke / ANSI escape sequence, then restores the previous +// termios. The cross-platform decoder in debugkey.go turns the bytes +// into a logical key code. +func readDebugKey() int { + fd := int(os.Stdin.Fd()) + var t syscall.Termios + syscall.Syscall6(syscall.SYS_IOCTL, uintptr(fd), ioctlGetTermios, uintptr(unsafe.Pointer(&t)), 0, 0, 0) + raw := t + raw.Lflag &^= syscall.ICANON | syscall.ECHO + raw.Cc[syscall.VMIN] = 1 + raw.Cc[syscall.VTIME] = 0 + syscall.Syscall6(syscall.SYS_IOCTL, uintptr(fd), ioctlSetTermios, uintptr(unsafe.Pointer(&raw)), 0, 0, 0) + defer syscall.Syscall6(syscall.SYS_IOCTL, uintptr(fd), ioctlSetTermios, uintptr(unsafe.Pointer(&t)), 0, 0, 0) + + buf := make([]byte, 8) + n, _ := syscall.Read(fd, buf) + return decodeDebugKey(buf, n) +} diff --git a/hbrt/termios_linux.go b/hbrt/termios_linux.go new file mode 100644 index 0000000..7da7cb8 --- /dev/null +++ b/hbrt/termios_linux.go @@ -0,0 +1,79 @@ +// Copyright (c) 2026 Charles KWON OhJun (charleskwonohjun@gmail.com) +// All rights reserved. + +//go:build linux + +// Termios ioctl helpers for the debugger — Linux uses TCGETS/TCSETS. +// Kept in a tiny shim so debugcli/debugtui stay platform-neutral. + +package hbrt + +import ( + "os" + "syscall" + "unsafe" +) + +const ( + ioctlGetTermios = syscall.TCGETS + ioctlSetTermios = syscall.TCSETS +) + +var ( + savedTermios syscall.Termios + termSaved bool +) + +func restoreCooked() { + fd := int(os.Stdin.Fd()) + var t syscall.Termios + syscall.Syscall6(syscall.SYS_IOCTL, uintptr(fd), ioctlGetTermios, uintptr(unsafe.Pointer(&t)), 0, 0, 0) + if !termSaved { + savedTermios = t + termSaved = true + } + t.Lflag |= syscall.ICANON | syscall.ECHO + t.Oflag |= syscall.OPOST + syscall.Syscall6(syscall.SYS_IOCTL, uintptr(fd), ioctlSetTermios, uintptr(unsafe.Pointer(&t)), 0, 0, 0) +} + +func reenterRaw() { + fd := int(os.Stdin.Fd()) + var t syscall.Termios + syscall.Syscall6(syscall.SYS_IOCTL, uintptr(fd), ioctlGetTermios, uintptr(unsafe.Pointer(&t)), 0, 0, 0) + t.Lflag &^= syscall.ICANON | syscall.ECHO | syscall.ISIG + t.Oflag &^= syscall.OPOST + t.Cc[syscall.VMIN] = 1 + t.Cc[syscall.VTIME] = 0 + syscall.Syscall6(syscall.SYS_IOCTL, uintptr(fd), ioctlSetTermios, uintptr(unsafe.Pointer(&t)), 0, 0, 0) +} + +func termSize() (int, int) { + type winsize struct { + Row, Col, Xpixel, Ypixel uint16 + } + var ws winsize + _, _, _ = syscall.Syscall(syscall.SYS_IOCTL, uintptr(1), + uintptr(syscall.TIOCGWINSZ), uintptr(unsafe.Pointer(&ws))) + return int(ws.Row), int(ws.Col) +} + +// readDebugKey puts stdin into raw mode just long enough to consume +// one keystroke / ANSI escape sequence, then restores the previous +// termios. The cross-platform decoder in debugkey.go turns the bytes +// into a logical key code. +func readDebugKey() int { + fd := int(os.Stdin.Fd()) + var t syscall.Termios + syscall.Syscall6(syscall.SYS_IOCTL, uintptr(fd), ioctlGetTermios, uintptr(unsafe.Pointer(&t)), 0, 0, 0) + raw := t + raw.Lflag &^= syscall.ICANON | syscall.ECHO + raw.Cc[syscall.VMIN] = 1 + raw.Cc[syscall.VTIME] = 0 + syscall.Syscall6(syscall.SYS_IOCTL, uintptr(fd), ioctlSetTermios, uintptr(unsafe.Pointer(&raw)), 0, 0, 0) + defer syscall.Syscall6(syscall.SYS_IOCTL, uintptr(fd), ioctlSetTermios, uintptr(unsafe.Pointer(&t)), 0, 0, 0) + + buf := make([]byte, 8) + n, _ := syscall.Read(fd, buf) + return decodeDebugKey(buf, n) +} diff --git a/hbrt/termios_windows.go b/hbrt/termios_windows.go new file mode 100644 index 0000000..a3cb7ce --- /dev/null +++ b/hbrt/termios_windows.go @@ -0,0 +1,124 @@ +// Copyright (c) 2026 Charles KWON OhJun (charleskwonohjun@gmail.com) +// All rights reserved. + +//go:build windows + +// Windows console equivalent of the Unix termios helpers. Rather than +// dealing with ReadConsoleInput's VK_* keycodes we turn on +// ENABLE_VIRTUAL_TERMINAL_INPUT / PROCESSING on modern Windows (10+), +// which makes the console speak ANSI — same byte stream the Unix path +// sees. Everything above this file stays platform-neutral. + +package hbrt + +import ( + "os" + "syscall" + "unsafe" +) + +const ( + enableProcessedInput = 0x0001 + enableLineInput = 0x0002 + enableEchoInput = 0x0004 + enableVirtualTerminalInput = 0x0200 + + enableProcessedOutput = 0x0001 + enableVirtualTerminalProcessing = 0x0004 +) + +var ( + kernel32 = syscall.NewLazyDLL("kernel32.dll") + procGetConsoleMode = kernel32.NewProc("GetConsoleMode") + procSetConsoleMode = kernel32.NewProc("SetConsoleMode") + procGetConsoleScreenBufferInfo = kernel32.NewProc("GetConsoleScreenBufferInfo") +) + +type smallRect struct { + Left, Top, Right, Bottom int16 +} + +type consoleScreenBufferInfo struct { + Size struct{ X, Y int16 } + CursorPosition struct{ X, Y int16 } + Attributes uint16 + Window smallRect + MaximumWindowSize struct{ X, Y int16 } +} + +func getConsoleMode(h syscall.Handle) (uint32, bool) { + var mode uint32 + r, _, _ := procGetConsoleMode.Call(uintptr(h), uintptr(unsafe.Pointer(&mode))) + return mode, r != 0 +} + +func setConsoleMode(h syscall.Handle, mode uint32) { + _, _, _ = procSetConsoleMode.Call(uintptr(h), uintptr(mode)) +} + +// Remember the input mode at program start so we can restore it. +var ( + savedInMode uint32 + savedOutMode uint32 + termSaved bool +) + +func restoreCooked() { + hIn := syscall.Handle(os.Stdin.Fd()) + hOut := syscall.Handle(os.Stdout.Fd()) + if !termSaved { + if m, ok := getConsoleMode(hIn); ok { + savedInMode = m + } + if m, ok := getConsoleMode(hOut); ok { + savedOutMode = m + } + termSaved = true + } + // Cooked: line input + echo, plus VT so our ANSI rendering still works. + inMode := (savedInMode | enableProcessedInput | enableLineInput | enableEchoInput | enableVirtualTerminalInput) + setConsoleMode(hIn, inMode) + outMode := savedOutMode | enableProcessedOutput | enableVirtualTerminalProcessing + setConsoleMode(hOut, outMode) +} + +func reenterRaw() { + hIn := syscall.Handle(os.Stdin.Fd()) + hOut := syscall.Handle(os.Stdout.Fd()) + // Raw: no line input, no echo, but keep VT so F-keys arrive as + // ESC[15~ etc. and our ANSI writes still render. + inMode := (savedInMode &^ (enableLineInput | enableEchoInput | enableProcessedInput)) | enableVirtualTerminalInput + setConsoleMode(hIn, inMode) + outMode := savedOutMode | enableProcessedOutput | enableVirtualTerminalProcessing + setConsoleMode(hOut, outMode) +} + +func termSize() (int, int) { + hOut := syscall.Handle(os.Stdout.Fd()) + var info consoleScreenBufferInfo + r, _, _ := procGetConsoleScreenBufferInfo.Call(uintptr(hOut), uintptr(unsafe.Pointer(&info))) + if r == 0 { + return 24, 80 + } + cols := int(info.Window.Right - info.Window.Left + 1) + rows := int(info.Window.Bottom - info.Window.Top + 1) + return rows, cols +} + +// readDebugKey reads a single key with raw console mode. VT input is +// enabled so F-keys / arrows arrive as the same ANSI escape sequences +// the Unix build expects, and decodeDebugKey classifies them. +func readDebugKey() int { + hIn := syscall.Handle(os.Stdin.Fd()) + var before uint32 + if m, ok := getConsoleMode(hIn); ok { + before = m + } + rawMode := (before &^ (enableLineInput | enableEchoInput | enableProcessedInput)) | enableVirtualTerminalInput + setConsoleMode(hIn, rawMode) + defer setConsoleMode(hIn, before) + + buf := make([]byte, 8) + n, _ := os.Stdin.Read(buf) + return decodeDebugKey(buf, n) +} diff --git a/hbrt/thread.go b/hbrt/thread.go index 69629ff..42fee77 100644 --- a/hbrt/thread.go +++ b/hbrt/thread.go @@ -25,6 +25,9 @@ type CallFrame struct { localCount int // number of locals in this frame paramCount int // number of parameters passed retVal Value // return value + module string // current PRG source file (updated by DebugLine) + line int // current PRG source line + localNames []string // PRG-source names of params+locals (nil = none registered) } // CurFrame returns the current call frame (for closure capture). @@ -50,7 +53,31 @@ func (f *CallFrame) SetLocal(n int, v Value, locals []Value) { // // Each goroutine that runs Harbour code gets its own Thread. // No locking needed for stack/locals/calls — they are goroutine-local. +// The TID is VM-unique and assigned at construction time for debugger +// thread listing. +// TraceEntry captures one step of execution history — module+line where +// DebugLine fired. Populated only when the debugger is attached so +// regular runs don't pay the ring-buffer cost. +type TraceEntry struct { + Module string + Line int +} + +// Size of the per-thread execution trace ring buffer. 256 entries gives +// enough runway to answer "how did we get here?" across most loops +// without meaningfully bloating per-thread memory. +const TraceRingSize = 256 + type Thread struct { + tid uint32 + + // traceRing is a ring buffer of recent DebugLine hits. traceHead + // points at the slot for the next write. Total recorded entries + // across the program's lifetime for this thread is tracked via + // traceCount so the debugger can render "N lines ago". + traceRing []TraceEntry + traceHead int + traceCount uint64 // Eval stack (goroutine-local, no lock needed) stack []Value sp int // stack pointer (next free slot) @@ -544,6 +571,7 @@ type HbError struct { Args []Value SubSystem string GenCode int + Stack []DebugStackFrame // snapshot at panic site (pre-unwind) } func (e *HbError) Error() string { @@ -554,6 +582,7 @@ func (t *Thread) runtimeError(msg string) *HbError { return &HbError{ Description: msg, SubSystem: "BASE", + Stack: t.DebugCallStack(), } } @@ -564,6 +593,7 @@ func (t *Thread) argError(op string, args ...Value) *HbError { Args: args, SubSystem: "BASE", GenCode: 1, + Stack: t.DebugCallStack(), } } @@ -684,19 +714,34 @@ func (t *Thread) PopStatic(module string, n int) { } // --- Workarea context switching for (alias)->(expr) --- +// +// The waSel interfaces below use CurrentNum() (uint16 area index), NOT +// Current() (which returns the Area interface on WorkAreaManager). An +// earlier version required `Current() uint16` which silently failed the +// type assertion on the real hbrdd.WorkAreaManager implementation — +// `alias->(expr)` expressions appeared to "work" on the first area but +// collapsed to no-op as soon as a sibling area was opened, because the +// switch/save/restore block was skipped entirely. See repro in +// /tmp/repro_xarea.prg. func (t *Thread) WASaveAndSelect(areaNum int) { - type waSel interface{ SelectByNum(uint16); Current() uint16 } + type waSel interface { + SelectByNum(uint16) + CurrentNum() uint16 + } if wam, ok := t.WA.(waSel); ok { - t.waStack = append(t.waStack, wam.Current()) + t.waStack = append(t.waStack, wam.CurrentNum()) wam.SelectByNum(uint16(areaNum)) } } func (t *Thread) WASaveAndSelectAlias(alias string) { - type waSel interface{ SelectByAlias(string); Current() uint16 } + type waSel interface { + SelectByAlias(string) + CurrentNum() uint16 + } if wam, ok := t.WA.(waSel); ok { - t.waStack = append(t.waStack, wam.Current()) + t.waStack = append(t.waStack, wam.CurrentNum()) wam.SelectByAlias(alias) } } diff --git a/hbrt/value.go b/hbrt/value.go index 438feb1..b1c3853 100644 --- a/hbrt/value.go +++ b/hbrt/value.go @@ -272,6 +272,29 @@ func MakeString(s string) Value { } } +// MakeStringBytes creates a string Value that aliases the provided byte +// slice — no copy. The caller MUST guarantee the bytes remain valid and +// immutable for the Value's lifetime. Use this only when the source is +// stable storage (mmap region, string constant pool, builder-owned +// slab). The usual caller pattern: +// +// - DBF reads into mmap-backed recBuf: stable until Close/Pack/Zap. +// - Per-row scratch buffers: NOT safe — caller must pin. +// +// If you're unsure, use MakeString — the ~8-16 bytes saved per small +// string isn't worth a use-after-free. +func MakeStringBytes(b []byte) Value { + var s string + if len(b) > 0 { + s = unsafe.String(&b[0], len(b)) + } + hs := &HbString{Data: s} + return Value{ + info: makeInfo(tString, 0, uint32(len(b))), + ptr: unsafe.Pointer(hs), + } +} + // MakeArray creates an array Value. func MakeArray(size int) Value { ha := &HbArray{Items: make([]Value, size)} diff --git a/hbrt/vm.go b/hbrt/vm.go index 8486df3..2472ce7 100644 --- a/hbrt/vm.go +++ b/hbrt/vm.go @@ -3,7 +3,12 @@ package hbrt -import "sync" +import ( + "fmt" + "os" + "runtime/pprof" + "sync" +) // VM is the shared state across all threads. type VM struct { @@ -11,7 +16,8 @@ type VM struct { modules []*Module symbols map[string]*Symbol statics map[string][]Value - threads []*Thread // all threads created (for shutdown cleanup) + threads []*Thread // all threads created (for shutdown cleanup + debugger listing) + nextTID uint32 // monotonic thread id waFactory func() interface{} // creates WorkAreaManager for new threads onExit func() // called when Run() finishes (restore terminal etc.) Debugger *Debugger // nil = no debugging; set by five debug command @@ -151,11 +157,27 @@ func (t *Thread) GetSym(cache **Symbol, name string) *Symbol { func (vm *VM) NewThread() *Thread { t := NewThread(vm) vm.mu.Lock() + vm.nextTID++ + t.tid = vm.nextTID vm.threads = append(vm.threads, t) vm.mu.Unlock() return t } +// Threads returns a snapshot of all threads currently tracked by the +// VM. Used by the debugger's `threads` command. Returned slice is a +// copy — callers can iterate without holding any lock. +func (vm *VM) Threads() []*Thread { + vm.mu.RLock() + defer vm.mu.RUnlock() + out := make([]*Thread, len(vm.threads)) + copy(out, vm.threads) + return out +} + +// TID returns this thread's VM-unique id. Main thread gets 1. +func (t *Thread) TID() uint32 { return t.tid } + // Run starts execution from the named function. func (vm *VM) Run(funcName string) Value { // Register any library modules from init() @@ -189,9 +211,56 @@ func (vm *VM) Run(funcName string) Value { // Install signal handlers for clean shutdown vm.InstallSignalHandlers() - // Call the function, ensure full shutdown on exit + // Optional CPU profiling — FIVE_CPUPROFILE= writes a pprof + // file covering the whole program run. Used to collect default.pgo + // input for profile-guided compilation of Five-runtime code. + if path := os.Getenv("FIVE_CPUPROFILE"); path != "" { + if f, err := os.Create(path); err == nil { + if werr := pprof.StartCPUProfile(f); werr == nil { + defer f.Close() + defer pprof.StopCPUProfile() + defer fmt.Fprintf(os.Stderr, "CPU profile written to %s\n", path) + } else { + fmt.Fprintf(os.Stderr, "FIVE_CPUPROFILE: StartCPUProfile: %v\n", werr) + f.Close() + } + } else { + fmt.Fprintf(os.Stderr, "FIVE_CPUPROFILE: cannot create %s: %v\n", path, err) + } + } + + // Call the function, ensure full shutdown on exit. + // On unhandled *HbError, route through DefaultErrorHook (writes + // error.log) before letting the panic surface. The Go panic still + // propagates — we only add the diagnostic side effect. defer vm.Shutdown() + defer func() { + if r := recover(); r != nil { + if DefaultErrorHook != nil { + DefaultErrorHook(t, r) + } + panic(r) + } + }() + // Attach the symbol so the entry frame shows its name in stack traces. + // Normal calls go through Function() which sets pendingCallSym; direct + // VM.Run needs to do it manually. + t.pendingCallSym = sym sym.Func(t) return t.retVal } + +// DefaultErrorHook runs when an unhandled panic escapes Main. hbrtl sets +// this at package init to dump error.log. Nil by default — set once at +// startup, not swapped at runtime, so no synchronization. +var DefaultErrorHook func(t *Thread, panicValue interface{}) + +// DebugDiagnosticHook renders the error.log-style state dump (workareas, +// SET flags, runtime memory) for the debugger's `diag` command. hbrtl +// sets this at init time — keeping the renderers in hbrtl avoids a +// circular import (hbrdd → hbrt ← hbrt needs hbrdd types). +// +// section values: "" (everything), "wa", "set", "mem". Unknown sections +// fall back to everything. The hook writes one line per call to `emit`. +var DebugDiagnosticHook func(t *Thread, section string, emit func(string)) diff --git a/hbrtl/datetime.go b/hbrtl/datetime.go index 4552835..1957ecc 100644 --- a/hbrtl/datetime.go +++ b/hbrtl/datetime.go @@ -16,6 +16,7 @@ package hbrtl import ( "five/hbrt" "fmt" + "strconv" "strings" "time" ) @@ -262,16 +263,29 @@ func SToD(t *hbrt.Thread) { // CToD converts character to date using current SET DATE FORMAT. // Harbour: CToD("09/18/92") with SET DATE AMERICAN +// +// Pre-pass: unambiguous ISO forms (YYYY-MM-DD, YYYY.MM.DD, YYYY/MM/DD, +// and bare YYYYMMDD) parse directly regardless of SET DATE. Those +// formats have a fixed component order, so skipping the setDateFormat +// lookup lets code that uses ISO dates work without a SET DATE ANSI +// call up front. Non-ISO strings fall through to the original +// format-aware path so American / European users keep their semantics. func CToD(t *hbrt.Thread) { t.Frame(1, 0) defer t.EndProc() - s := t.Local(1).AsString() + s := strings.TrimSpace(t.Local(1).AsString()) if len(s) == 0 { t.PushValue(hbrt.MakeDate(0)) t.RetValue() return } + if y, m, d, ok := parseIsoDate(s); ok { + t.PushValue(hbrt.MakeDate(dateToJulian(y, m, d))) + t.RetValue() + return + } + // Parse according to current date format y, m, d := parseDateByFormat(s, setDateFormat) @@ -292,6 +306,59 @@ func CToD(t *hbrt.Thread) { t.RetValue() } +// parseIsoDate accepts YYYY-MM-DD, YYYY/MM/DD, YYYY.MM.DD, and bare +// YYYYMMDD. Returns ok=false for anything else so the caller can fall +// back to the format-aware parse. The 4-digit year anchor disambiguates +// from 2-digit-year American / European layouts. +func parseIsoDate(s string) (y, m, d int, ok bool) { + switch len(s) { + case 10: + if s[4] != '-' && s[4] != '/' && s[4] != '.' { + return + } + if s[7] != s[4] { + return + } + if yy, e := strconv.Atoi(s[0:4]); e == nil { + y = yy + } else { + return + } + if mm, e := strconv.Atoi(s[5:7]); e == nil { + m = mm + } else { + return + } + if dd, e := strconv.Atoi(s[8:10]); e == nil { + d = dd + } else { + return + } + case 8: + if yy, e := strconv.Atoi(s[0:4]); e == nil { + y = yy + } else { + return + } + if mm, e := strconv.Atoi(s[4:6]); e == nil { + m = mm + } else { + return + } + if dd, e := strconv.Atoi(s[6:8]); e == nil { + d = dd + } else { + return + } + default: + return + } + if y > 0 && m >= 1 && m <= 12 && d >= 1 && d <= 31 { + ok = true + } + return +} + // parseDateByFormat parses a date string according to format. func parseDateByFormat(s, format string) (y, m, d int) { // Find positions of Y, M, D in format diff --git a/hbrtl/errorlog.go b/hbrtl/errorlog.go new file mode 100644 index 0000000..ec5ecab --- /dev/null +++ b/hbrtl/errorlog.go @@ -0,0 +1,779 @@ +// Copyright (c) 2026 Charles KWON OhJun (charleskwonohjun@gmail.com) +// All rights reserved. + +// Rich diagnostic error-log writer — Harbour FiveWin ErrSysW.prg style. +// +// Produces a structured `error.log` when an unhandled error fires. +// Sections: +// 1. Application: exe path, size, Go/OS version, start time, error time +// 2. Error: description, operation, subsystem, gencode, args +// 3. Stack: full call stack (function, source, line) +// 4. Workareas: every open area's alias, RecNo, RecCount, BOF, EOF, +// DELETED, active index tag, field list +// 5. Classes: every registered class name +// 6. Runtime: goroutines, MemStats, CPU count +// +// Wire-in from PRG: +// ErrorBlock({|e| HB_ErrorLog(e), Break(e)}) +// +// Control: +// HB_SetErrorLogPath(cPath) — default "./error.log" +// HB_SetErrorLogHook(bBlock) — receive (oError, cLogPath) after write +// +// Reference: harbour-core/contrib/FiveWin/source/ErrSysW.prg + +package hbrtl + +import ( + "fmt" + "os" + "os/user" + "runtime" + "runtime/debug" + "sort" + "strings" + "sync" + "time" + + "five/hbrdd" + "five/hbrt" +) + +var ( + errLogMu sync.Mutex + errLogPath = "error.log" + errLogHook hbrt.Value // optional block: receives (oErr, cPath) + errLogStartAt = time.Now() +) + +// Sensitive-key detection. Substring match on upper-cased key/arg name — +// false positives are acceptable, false negatives (leaked secret) are not. +// PWD is deliberately excluded — it collides with the Unix env var for +// "present working directory", which we want to keep visible. +var redactPatterns = []string{ + "PASSWORD", "PASSWD", + "SECRET", "TOKEN", "CREDENTIAL", "AUTH", + "APIKEY", "API_KEY", "ACCESS_KEY", "PRIVATE_KEY", + "SESSION", "COOKIE", "BEARER", +} + +// Env vars safe to include — anything outside this list is dropped even +// if it doesn't match a redact pattern. Rationale: a senior engineer +// needs locale/path info; nothing else is worth the leakage risk. +var envAllowlist = []string{ + "PATH", "LANG", "LC_ALL", "LC_CTYPE", "LC_TIME", "LC_NUMERIC", + "TZ", "HOME", "USER", "LOGNAME", "SHELL", "TERM", "PWD", + "GOMAXPROCS", "GOGC", "GOTRACEBACK", "GOOS", "GOARCH", + "FIVE_KEEP_BUILD", "HB_LANG", +} + +func isSensitiveKey(k string) bool { + u := strings.ToUpper(k) + for _, p := range redactPatterns { + if strings.Contains(u, p) { + return true + } + } + return false +} + +// redactArg masks the value portion of --flag=value / -flag=value when +// the flag name looks sensitive. Plain positional args that happen to +// look like secrets are left alone — we can't know without context. +func redactArg(a string) string { + for _, sep := range []string{"=", ":"} { + if idx := strings.Index(a, sep); idx > 0 { + key := strings.TrimLeft(a[:idx], "-") + if isSensitiveKey(key) { + return a[:idx+1] + "***REDACTED***" + } + } + } + return a +} + +// Previous-errors ring buffer — cascading failures usually have a +// precursor. Five entries is enough for most debugging sessions without +// bloating the log. +type ringEntry struct { + ts time.Time + desc string + op string + where string +} + +var ( + errRingMu sync.Mutex + errRing []ringEntry +) + +const errRingSize = 5 + +func recordError(desc, op, where string) { + errRingMu.Lock() + defer errRingMu.Unlock() + if len(errRing) >= errRingSize { + errRing = errRing[1:] + } + errRing = append(errRing, ringEntry{time.Now(), desc, op, where}) +} + +func snapshotErrRing() []ringEntry { + errRingMu.Lock() + defer errRingMu.Unlock() + out := make([]ringEntry, len(errRing)) + copy(out, errRing) + return out +} + +// init installs Five's default error handler and the debugger's +// diagnostic renderer. Any *HbError that escapes Main — array OOB, +// type mismatch, divide-by-zero, etc. — triggers an error.log dump +// without the PRG having to wire ErrorBlock. Matches Harbour/FiveWin's +// ErrorSys/ErrSysW default behavior. The diagnostic hook reuses the +// same section writers so the debugger's `diag` command shows +// error.log content at the break point. +func init() { + hbrt.DebugDiagnosticHook = func(t *hbrt.Thread, section string, emit func(string)) { + var b strings.Builder + switch section { + case "wa": + writeWorkareas(&b, t) + case "set": + writeSetState(&b) + case "mem": + writeRuntime(&b, time.Now()) + default: + emit("-- Workareas --") + writeWorkareas(&b, t) + emit(b.String()) + b.Reset() + emit("-- SET state --") + writeSetState(&b) + emit(b.String()) + b.Reset() + emit("-- Runtime --") + writeRuntime(&b, time.Now()) + emit(b.String()) + return + } + emit(b.String()) + } + + hbrt.DefaultErrorHook = func(t *hbrt.Thread, r interface{}) { + var oErr hbrt.Value + var stack []hbrt.DebugStackFrame + var desc, op string + switch v := r.(type) { + case *hbrt.HbError: + oErr = hbErrorToHash(v) + stack = v.Stack + desc, op = v.Description, v.Operation + case BreakValue: + oErr = v.Value + desc = describe(v.Value) + default: + oErr = hbrt.MakeString(fmt.Sprintf("%v", r)) + desc = fmt.Sprintf("%v", r) + } + + // Record the error in the ring buffer BEFORE writing the log so + // the "Previous errors" section includes this one as the tail. + where := "(unknown)" + if len(stack) > 0 { + where = fmt.Sprintf("%s (%s:%d)", stack[0].Function, stack[0].Module, stack[0].Line) + } + recordError(desc, op, where) + + body := buildErrorLog(t, oErr, stack) + errLogMu.Lock() + path := errLogPath + errLogMu.Unlock() + if werr := os.WriteFile(path, []byte(body), 0o644); werr != nil { + fmt.Fprintf(os.Stderr, "hb_errorlog: failed to write %s: %v\n", path, werr) + return + } + fmt.Fprintf(os.Stderr, "error logged to %s\n", path) + } +} + +// hbErrorToHash lifts the runtime's lightweight *HbError into the same +// hash shape used by ErrorNew / FiveWin's oErr, so the log writer has a +// uniform input. +func hbErrorToHash(e *hbrt.HbError) hbrt.Value { + h := &hbrt.HbHash{} + add := func(k string, v hbrt.Value) { + h.Keys = append(h.Keys, hbrt.MakeString(k)) + h.Values = append(h.Values, v) + } + add("DESCRIPTION", hbrt.MakeString(e.Description)) + add("OPERATION", hbrt.MakeString(e.Operation)) + add("SUBSYSTEM", hbrt.MakeString(e.SubSystem)) + add("GENCODE", hbrt.MakeInt(e.GenCode)) + add("SUBCODE", hbrt.MakeInt(0)) + add("SEVERITY", hbrt.MakeInt(2)) + add("OSCODE", hbrt.MakeInt(0)) + if len(e.Args) > 0 { + add("ARGS", hbrt.MakeArrayFrom(e.Args)) + } + h.Order = make([]int, len(h.Keys)) + for i := range h.Order { + h.Order[i] = i + } + return hbrt.MakeHashFrom(h) +} + +// HB_SetErrorLogPath(cPath) → cOldPath +func HbSetErrorLogPath(t *hbrt.Thread) { + nParams := t.ParamCount() + t.Frame(nParams, 0) + defer t.EndProc() + errLogMu.Lock() + old := errLogPath + if nParams >= 1 && !t.Local(1).IsNil() { + errLogPath = t.Local(1).AsString() + } + errLogMu.Unlock() + t.RetString(old) +} + +// HB_SetErrorLogHook(bBlock) → bOldBlock +func HbSetErrorLogHook(t *hbrt.Thread) { + nParams := t.ParamCount() + t.Frame(nParams, 0) + defer t.EndProc() + errLogMu.Lock() + old := errLogHook + if nParams >= 1 && t.Local(1).IsBlock() { + errLogHook = t.Local(1) + } + errLogMu.Unlock() + if old.IsNil() || !old.IsBlock() { + t.RetNil() + } else { + t.RetVal(old) + } +} + +// HB_ErrorLog(oError) → cLogPath +// +// Writes a full diagnostic dump for oError and returns the path. Callers +// typically wire it through ErrorBlock: +// +// ErrorBlock({|e| HB_ErrorLog(e), Break(e)}) +func HbErrorLog(t *hbrt.Thread) { + nParams := t.ParamCount() + t.Frame(nParams, 0) + defer t.EndProc() + + var oErr hbrt.Value + if nParams >= 1 { + oErr = t.Local(1) + } else { + oErr = hbrt.MakeNil() + } + + errLogMu.Lock() + path := errLogPath + hook := errLogHook + errLogMu.Unlock() + + body := buildErrorLog(t, oErr, nil) + if err := os.WriteFile(path, []byte(body), 0o644); err != nil { + // Log-write failures should not crash the program — dump to stderr + // so the operator sees something. + fmt.Fprintf(os.Stderr, "hb_errorlog: failed to write %s: %v\n", path, err) + } + + // User-supplied post-action — e.g. send to a remote endpoint, show a + // dialog, etc. Block receives (oErr, cPath). + if hook.IsBlock() { + t.PushValue(hook) + t.PushValue(oErr) + t.PushString(path) + t.PendingParams2(2) + hook.AsBlock().Fn(t) + _ = t.Pop2() // discard block return + } + + t.RetString(path) +} + +// buildErrorLog is pure-text composition so it can be unit-tested without +// actually writing to disk. If preStack is non-nil it overrides the live +// call stack — used by the DefaultErrorHook to show the PRG stack as it +// was at the moment of panic (before BEGIN SEQUENCE / EndProc unwound it). +func buildErrorLog(t *hbrt.Thread, oErr hbrt.Value, preStack []hbrt.DebugStackFrame) string { + var b strings.Builder + nowTs := time.Now() + + sect := func(title string) { + b.WriteString("\n") + b.WriteString(title) + b.WriteString("\n") + b.WriteString(strings.Repeat("=", len(title))) + b.WriteString("\n") + } + + // --- Application --- + sect("Application") + exe, _ := os.Executable() + fmt.Fprintf(&b, " Path and name: %s\n", exe) + if fi, err := os.Stat(exe); err == nil { + fmt.Fprintf(&b, " Size: %s bytes\n", addCommas(fi.Size())) + fmt.Fprintf(&b, " Built at: %s\n", fi.ModTime().Format("2006-01-02 15:04:05")) + } + fmt.Fprintf(&b, " Five runtime: Go %s, %s/%s\n", + runtime.Version(), runtime.GOOS, runtime.GOARCH) + if bi, ok := debug.ReadBuildInfo(); ok { + fmt.Fprintf(&b, " Module: %s\n", bi.Main.Path) + if bi.Main.Version != "" && bi.Main.Version != "(devel)" { + fmt.Fprintf(&b, " Version: %s\n", bi.Main.Version) + } + if rev := buildSetting(bi, "vcs.revision"); rev != "" { + mod := buildSetting(bi, "vcs.modified") + dirty := "" + if mod == "true" { + dirty = " (dirty)" + } + fmt.Fprintf(&b, " VCS: %s%s\n", rev, dirty) + } + } + host, _ := os.Hostname() + fmt.Fprintf(&b, " Host: %s, PID: %d\n", host, os.Getpid()) + elapsed := nowTs.Sub(errLogStartAt) + fmt.Fprintf(&b, " Time from start: %s\n", elapsed.Round(time.Millisecond)) + fmt.Fprintf(&b, " Error occurred at: %s\n", nowTs.Format("2006-01-02 15:04:05.000")) + + // --- Error description --- + sect("Error") + writeErrorSection(&b, oErr) + + // --- Stack trace --- + sect("Stack Calls") + frames := preStack + if frames == nil { + frames = t.DebugCallStack() + } + if len(frames) == 0 { + b.WriteString(" (no stack info available)\n") + } + for i, f := range frames { + fmt.Fprintf(&b, " [%3d] %s (%s:%d)\n", i, f.Function, f.Module, f.Line) + } + + // --- Workareas --- + sect("Workareas") + writeWorkareas(&b, t) + + // --- SET state --- + sect("SET state") + writeSetState(&b) + + // --- Classes --- + sect("Classes in use") + names := hbrt.ListClassNames() + sort.Strings(names) + for i, n := range names { + fmt.Fprintf(&b, " [%3d] %s\n", i+1, n) + } + if len(names) == 0 { + b.WriteString(" (no classes registered)\n") + } + + // --- Previous errors --- + sect("Previous errors") + writePrevErrors(&b) + + // --- Environment --- + sect("Environment") + writeEnvironment(&b) + + // --- Runtime --- + sect("Runtime") + writeRuntime(&b, nowTs) + + // --- Goroutine dump (only if the PRG actually spawned concurrency) --- + // Baseline is 3: main + signal handler + shutdown watcher. We only + // care when user code produced extra goroutines (hb_Thread*, channels, + // etc.), which is where concurrency bugs actually live. + if runtime.NumGoroutine() > 3 { + sect("Goroutines") + writeGoroutineDump(&b) + } + + return b.String() +} + +func buildSetting(bi *debug.BuildInfo, key string) string { + for _, s := range bi.Settings { + if s.Key == key { + return s.Value + } + } + return "" +} + +// writeSetState dumps the SET values a senior engineer actually looks +// at when reproducing an error: date handling, deleted filter, string +// comparison mode, open-mode default. +func writeSetState(b *strings.Builder) { + fmt.Fprintf(b, " DATEFORMAT: %s\n", GetSetDateFormat()) + fmt.Fprintf(b, " EPOCH: %d\n", GetSetEpoch()) + fmt.Fprintf(b, " DELETED: %v (filter-out hidden records)\n", GetSetDeleted()) + fmt.Fprintf(b, " EXACT: %v\n", GetSetExact()) + fmt.Fprintf(b, " SOFTSEEK: %v\n", GetSetSoftSeek()) + fmt.Fprintf(b, " DECIMALS: %d\n", GetSetDecimals()) +} + +// writePrevErrors prints the ring buffer of recent errors. The current +// error is recorded as the last entry, so the list doubles as a +// "errors leading to this one" trace. +func writePrevErrors(b *strings.Builder) { + entries := snapshotErrRing() + if len(entries) == 0 { + b.WriteString(" (none)\n") + return + } + for i, e := range entries { + age := time.Since(e.ts).Round(time.Millisecond) + fmt.Fprintf(b, " [%d] -%s %s (op: %s) at %s\n", + i+1, age, e.desc, e.op, e.where) + } +} + +// writeEnvironment writes non-secret context a senior engineer needs to +// tell dev-vs-prod apart. Sensitive env vars are filtered via an +// allowlist; command-line args are redacted on flag name match. +func writeEnvironment(b *strings.Builder) { + if cwd, err := os.Getwd(); err == nil { + fmt.Fprintf(b, " CWD: %s\n", cwd) + } + if u, err := user.Current(); err == nil { + fmt.Fprintf(b, " User: %s (uid=%s, gid=%s)\n", u.Username, u.Uid, u.Gid) + } + fmt.Fprintf(b, " TZ: %s\n", time.Now().Format("MST -0700")) + + if len(os.Args) > 0 { + fmt.Fprintf(b, " Args (%d):\n", len(os.Args)) + for i, a := range os.Args { + fmt.Fprintf(b, " [%d] %s\n", i, redactArg(a)) + } + } + + // Environment: allowlist only; anything else (including sensitive + // unknowns) is dropped on the floor. + shown := 0 + for _, k := range envAllowlist { + if v, ok := os.LookupEnv(k); ok { + if shown == 0 { + b.WriteString(" Env (allowlisted):\n") + } + if isSensitiveKey(k) { + v = "***REDACTED***" + } + fmt.Fprintf(b, " %s=%s\n", k, v) + shown++ + } + } +} + +// writeRuntime dumps Go scheduler + GC state. GC pause totals often +// reveal "the error happened because GC was running 30% of the time" +// class problems. +func writeRuntime(b *strings.Builder, nowTs time.Time) { + var ms runtime.MemStats + runtime.ReadMemStats(&ms) + fmt.Fprintf(b, " NumCPU: %d, GOMAXPROCS: %d, NumGoroutine: %d\n", + runtime.NumCPU(), runtime.GOMAXPROCS(0), runtime.NumGoroutine()) + fmt.Fprintf(b, " Alloc: %s, TotalAlloc: %s, Sys: %s\n", + addCommas(int64(ms.Alloc)), addCommas(int64(ms.TotalAlloc)), + addCommas(int64(ms.Sys))) + fmt.Fprintf(b, " HeapObjects: %d, HeapInuse: %s\n", + ms.HeapObjects, addCommas(int64(ms.HeapInuse))) + fmt.Fprintf(b, " NumGC: %d, PauseTotal: %s, GCCPUFraction: %.4f%%\n", + ms.NumGC, time.Duration(ms.PauseTotalNs).Round(time.Microsecond), + ms.GCCPUFraction*100) + if ms.NumGC > 0 { + lastGC := time.Unix(0, int64(ms.LastGC)) + fmt.Fprintf(b, " Last GC: %s ago\n", + nowTs.Sub(lastGC).Round(time.Millisecond)) + } + if n := openFDCount(); n >= 0 { + fmt.Fprintf(b, " Open FDs: %d\n", n) + } +} + +// writeGoroutineDump captures runtime.Stack — only invoked when > 1 +// goroutine is alive, since for single-threaded PRG programs this just +// duplicates the Stack Calls section. +func writeGoroutineDump(b *strings.Builder) { + buf := make([]byte, 64*1024) + n := runtime.Stack(buf, true) + if n == len(buf) { + // Grew past buffer — try one larger. Capping at 256K to avoid + // writing megabytes of logs on a runaway goroutine count. + buf = make([]byte, 256*1024) + n = runtime.Stack(buf, true) + } + b.Write(buf[:n]) + b.WriteString("\n") +} + +func writeErrorSection(b *strings.Builder, oErr hbrt.Value) { + if oErr.IsNil() { + b.WriteString(" (no error object supplied)\n") + return + } + if oErr.IsHash() { + writeHashErr(b, oErr) + return + } + // Unexpected shape — dump raw + fmt.Fprintf(b, " (unexpected error shape: type %s)\n value: %s\n", + typeName(oErr), describe(oErr)) +} + +// severityLabel maps the numeric ES_* constant to its Harbour name. +// Anything outside the known set falls back to the raw number. +func severityLabel(s int) string { + switch s { + case 0: + return "INFO" + case 1: + return "WARNING" + case 2: + return "ERROR" + case 3: + return "CATASTROPHIC" + } + return "UNKNOWN" +} + +// typeName returns a readable type label (mirrors Harbour's ValType plus +// a few Five-specific niceties). +func typeName(v hbrt.Value) string { + switch { + case v.IsNil(): + return "NIL" + case v.IsLogical(): + return "LOGICAL" + case v.IsNumeric(): + return "NUMERIC" + case v.IsString(): + return "STRING" + case v.IsDate(): + return "DATE" + case v.IsTimestamp(): + return "TIMESTAMP" + case v.IsArray(): + return "ARRAY" + case v.IsObject(): + return "OBJECT" + case v.IsHash(): + return "HASH" + case v.IsBlock(): + return "BLOCK" + case v.IsPointer(): + return "POINTER" + case v.IsSymbol(): + return "SYMBOL" + } + return "UNKNOWN" +} + +// describe renders a best-effort string form. Strings come back unquoted and +// truncated; everything else falls back to Value.String(). +func describe(v hbrt.Value) string { + if v.IsString() { + s := v.AsString() + if len(s) > 200 { + return s[:200] + "…" + } + return s + } + if v.IsNumeric() { + if v.IsInt() { + return fmt.Sprintf("%d", v.AsNumInt()) + } + return fmt.Sprintf("%g", v.AsNumDouble()) + } + return v.String() +} + +func writeHashErr(b *strings.Builder, oErr hbrt.Value) { + h := oErr.AsHash() + getStr := func(key string) string { + if idx := h.Lookup(hbrt.MakeString(key)); idx >= 0 { + return describe(h.Values[idx]) + } + return "" + } + getInt := func(key string) int64 { + if idx := h.Lookup(hbrt.MakeString(key)); idx >= 0 { + v := h.Values[idx] + if v.IsNumeric() { + return v.AsNumInt() + } + } + return 0 + } + + fmt.Fprintf(b, " Description: %s\n", getStr("DESCRIPTION")) + fmt.Fprintf(b, " Operation: %s\n", getStr("OPERATION")) + fmt.Fprintf(b, " SubSystem: %s (GenCode %d, SubCode %d)\n", + getStr("SUBSYSTEM"), getInt("GENCODE"), getInt("SUBCODE")) + fmt.Fprintf(b, " Severity: %s (%d), OsCode: %d\n", + severityLabel(int(getInt("SEVERITY"))), getInt("SEVERITY"), getInt("OSCODE")) + if fn := getStr("FILENAME"); fn != "" { + fmt.Fprintf(b, " FileName: %s\n", fn) + } + // Most recent file/DOS errors — often the *cause* of the Harbour + // error one level up (open failed → field access panic'd). + if lastFErr != 0 || lastDosErr != 0 { + fmt.Fprintf(b, " FError: %d, DosError: %d\n", lastFErr, lastDosErr) + } + if idx := h.Lookup(hbrt.MakeString("ARGS")); idx >= 0 { + args := h.Values[idx] + if args.IsArray() { + arr := args.AsArray() + if len(arr.Items) > 0 { + op := getStr("OPERATION") + // Blanket-redact all args when the operation itself looks + // like a credential-handling call (e.g. "LOGIN", "SIGNIN", + // "AUTHENTICATE") — we can't know which slot held the + // secret so we mask the lot. + mask := isSensitiveKey(op) + fmt.Fprintf(b, " Args (%d):\n", len(arr.Items)) + for i, a := range arr.Items { + val := describe(a) + if mask && a.IsString() { + val = "***REDACTED***" + } + fmt.Fprintf(b, " [%d] %s = %s\n", i+1, typeName(a), val) + } + } + } + } +} + +func writeWorkareas(b *strings.Builder, t *hbrt.Thread) { + wam, ok := t.WA.(*hbrdd.WorkAreaManager) + if !ok || wam == nil { + b.WriteString(" (no workarea manager)\n") + return + } + count := 0 + current := wam.CurrentNum() + wam.EnumerateAreas(func(nWA uint16, alias string, area hbrdd.Area) { + count++ + marker := " " + if nWA == current { + marker = "=> " + } + recCount, _ := area.RecCount() + fmt.Fprintf(b, " %s[%3d] %-15s driver=%s rec=%d/%d eof=%v bof=%v del=%v\n", + marker, nWA, alias, + area.Driver().Name(), area.RecNo(), recCount, + area.EOF(), area.BOF(), area.Deleted()) + + // Open mode — shared/exclusive/readonly — critical for "works on + // my machine" bugs. Optional via type assertion since the Area + // interface doesn't mandate these methods. + type openModer interface { + IsShared() bool + IsReadOnly() bool + } + if om, ok := area.(openModer); ok { + mode := "exclusive" + if om.IsShared() { + mode = "shared" + } + if om.IsReadOnly() { + mode += ", readonly" + } + fmt.Fprintf(b, " mode: %s\n", mode) + } + + // Active index: shows which ordering is applied to the area at + // the moment of error. An EOF'd workarea is often actually just + // "filter cut it off" — knowing the tag saves hours. + type orderInfo interface { + CurrentOrder() int + OrderInfo(ordNo int) (*hbrdd.OrderInfo, error) + } + if oi, ok := area.(orderInfo); ok { + if n := oi.CurrentOrder(); n > 0 { + if info, err := oi.OrderInfo(n); err == nil && info != nil { + fmt.Fprintf(b, " order: %s key=%q", + info.Name, info.KeyExpr) + if info.ForExpr != "" { + fmt.Fprintf(b, " for=%q", info.ForExpr) + } + if info.Unique { + b.WriteString(" unique") + } + if info.Descending { + b.WriteString(" desc") + } + b.WriteString("\n") + } + } + } + + // Fields + nF := area.FieldCount() + if nF > 0 { + fmt.Fprintf(b, " fields (%d): ", nF) + for i := 0; i < nF && i < 20; i++ { + fi := area.GetFieldInfo(i) + if i > 0 { + b.WriteString(", ") + } + fmt.Fprintf(b, "%s(%c/%d)", fi.Name, fi.Type, fi.Len) + } + if nF > 20 { + fmt.Fprintf(b, ", … %d more", nF-20) + } + b.WriteString("\n") + } + }) + if count == 0 { + b.WriteString(" (no open workareas)\n") + } +} + +// addCommas formats an int64 with thousands separators so big byte counts +// are legible in the log. No allocation beyond the strings.Builder. +func addCommas(n int64) string { + neg := n < 0 + if neg { + n = -n + } + s := fmt.Sprintf("%d", n) + if len(s) <= 3 { + if neg { + return "-" + s + } + return s + } + var out strings.Builder + if neg { + out.WriteByte('-') + } + // Insert commas every 3 digits from the right. + first := len(s) % 3 + if first > 0 { + out.WriteString(s[:first]) + if len(s) > first { + out.WriteByte(',') + } + } + for i := first; i < len(s); i += 3 { + out.WriteString(s[i : i+3]) + if i+3 < len(s) { + out.WriteByte(',') + } + } + return out.String() +} diff --git a/hbrtl/errorlog_fd_other.go b/hbrtl/errorlog_fd_other.go new file mode 100644 index 0000000..272c9f6 --- /dev/null +++ b/hbrtl/errorlog_fd_other.go @@ -0,0 +1,10 @@ +// Copyright (c) 2026 Charles KWON OhJun (charleskwonohjun@gmail.com) +// All rights reserved. + +//go:build !darwin && !linux + +package hbrtl + +// openFDCount returns -1 on platforms without a simple fd-listing fs +// (Windows). The caller hides the line when the value is negative. +func openFDCount() int { return -1 } diff --git a/hbrtl/errorlog_fd_unix.go b/hbrtl/errorlog_fd_unix.go new file mode 100644 index 0000000..5b116be --- /dev/null +++ b/hbrtl/errorlog_fd_unix.go @@ -0,0 +1,25 @@ +// Copyright (c) 2026 Charles KWON OhJun (charleskwonohjun@gmail.com) +// All rights reserved. + +//go:build darwin || linux + +package hbrtl + +import "os" + +// openFDCount reads /dev/fd (macOS) or /proc/self/fd (Linux). Returns -1 +// if the directory can't be listed — Windows falls back to the stub. +func openFDCount() int { + for _, path := range []string{"/proc/self/fd", "/dev/fd"} { + f, err := os.Open(path) + if err != nil { + continue + } + names, err := f.Readdirnames(-1) + f.Close() + if err == nil { + return len(names) + } + } + return -1 +} diff --git a/hbrtl/indexrtl.go b/hbrtl/indexrtl.go index 83ee2eb..9c0c7a2 100644 --- a/hbrtl/indexrtl.go +++ b/hbrtl/indexrtl.go @@ -102,6 +102,31 @@ func OrdCount(t *hbrt.Thread) { t.RetInt(0) } +// ORDLISTREBUILD — REINDEX equivalent. Rebuilds every attached index +// from current DBF data. Called at the tail of SQL DML (INSERT / +// UPDATE / DELETE) because `DBFArea.Append` / `PutValue` / `Delete` +// don't yet have per-key ordKeyAdd / ordKeyDel hooks — the full +// rebuild is the sledge-hammer that keeps the NTX on disk in sync +// with the .dbf. No-op when no index is attached. +func OrdListRebuild(t *hbrt.Thread) { + t.Frame(0, 0) + defer t.EndProc() + wam := getWA(t) + if wam == nil { + t.RetNil() + return + } + area := wam.Current() + if area == nil { + t.RetNil() + return + } + if idx, ok := area.(hbrdd.Indexer); ok { + _ = idx.OrderListRebuild() + } + t.RetNil() +} + // ORDNAME([nOrder [, cBagName]]) → cTagName func OrdName(t *hbrt.Thread) { nParams := t.ParamCount() @@ -416,6 +441,14 @@ func DbOrderInfo(t *hbrt.Thread) { n, _ := da.RecCount() t.RetInt(int64(n)) return + case dboiKeySize: + // Byte length of the stored keys for this order. TSqlIndex:BuildKey + // uses this to right-size numeric scope keys — otherwise a hard-coded + // Str(xValue, 10) produces bytes that don't align with the 8-byte + // index keys for N(8,0) columns, and ordScope silently fails to + // constrain the scan. + t.RetInt(int64(da.OrderKeyLen(ord))) + return } t.RetNil() @@ -653,6 +686,13 @@ func DbCreate(t *hbrt.Thread) { Len: row.Items[2].AsInt(), Dec: row.Items[3].AsInt(), } + // Optional 5th element: field flag byte (e.g. FieldFlagNullable + // = 0x02). Pre-nullable callers pass 4-element rows and leave + // Flags at zero, so the hidden _NullFlags column is only added + // when a caller explicitly opts a column in. + if len(row.Items) >= 5 && row.Items[4].IsNumeric() { + fields[i].Flags = byte(row.Items[4].AsInt()) + } } drv, err := hbrdd.GetDriver(cDriver) diff --git a/hbrtl/missing.go b/hbrtl/missing.go index 016d4f5..322edc7 100644 --- a/hbrtl/missing.go +++ b/hbrtl/missing.go @@ -8,6 +8,7 @@ package hbrtl import ( "five/hbrt" "math" + "os" "strings" ) @@ -352,13 +353,21 @@ func SelectFunc(t *hbrt.Thread) { } } -// File checks if file exists. +// File(cPath) → lExists. Harbour's File() also honours SET PATH +// and wildcards, but callers in Five use it as "does this exact +// path exist". A plain os.Stat covers that without pulling the +// whole Harbour SET PATH search order in — matches how HbFileExists +// (hb_FileExists) already behaves elsewhere. func FileFunc(t *hbrt.Thread) { t.Frame(1, 0) defer t.EndProcFast() - // Simple implementation - t.PushBool(false) - t.RetValue() + path := t.Local(1).AsString() + if path == "" { + t.RetBool(false) + return + } + _, err := os.Stat(path) + t.RetBool(err == nil) } // Inkey waits for keypress and returns key code. diff --git a/hbrtl/missing_fivesql.go b/hbrtl/missing_fivesql.go index 17fc497..d191b9f 100644 --- a/hbrtl/missing_fivesql.go +++ b/hbrtl/missing_fivesql.go @@ -5,6 +5,7 @@ package hbrtl import ( + "five/hbrdd" "five/hbrt" "os" "strings" @@ -83,12 +84,41 @@ func Used(t *hbrt.Thread) { t.RetBool(wam.Current() != nil) } -// DBSETINDEX — SET INDEX TO (adds index to current workarea) +// DBSETINDEX — SET INDEX TO (adds index to current workarea). +// Previously a no-op; the generated code path for the SET INDEX TO +// command bypasses this RTL, but SqlAttachTableIndexes (TSqlDDL.prg) +// needs a runtime call so auto-attaching PK / UNIQUE indexes at +// SqlExecOpenTable can happen without parser help. Missing file is +// swallowed — matches Harbour's soft-fail semantics and keeps +// pre-index tables silent. func rtlDbSetIndex(t *hbrt.Thread) { nParams := t.ParamCount() t.Frame(nParams, 0) defer t.EndProcFast() - // Delegate to the SET INDEX TO handler in the RDD layer - // For now, this is handled by the generated code's SET INDEX TO command. + if nParams < 1 { + t.RetNil() + return + } + path := t.Local(1).AsString() + if path == "" { + t.RetNil() + return + } + wam, ok := t.WA.(*hbrdd.WorkAreaManager) + if !ok || wam == nil { + t.RetNil() + return + } + area := wam.Current() + if area == nil { + t.RetNil() + return + } + // OrderListAdd lives on the optional Indexer interface — DBFNTX / + // DBFCDX implement it, MEMRDD does not. Type-assert and silently + // no-op on drivers without index support. + if idx, ok := area.(hbrdd.Indexer); ok { + _ = idx.OrderListAdd(path) + } t.RetNil() } diff --git a/hbrtl/procinfo.go b/hbrtl/procinfo.go index fb854cd..94d0359 100644 --- a/hbrtl/procinfo.go +++ b/hbrtl/procinfo.go @@ -244,11 +244,19 @@ func DbStruct(t *hbrt.Thread) { items := make([]hbrt.Value, nFields) for i := 0; i < nFields; i++ { fi := area.GetFieldInfo(i) + // 5-element row: name / type / len / dec / flags. Harbour + // dbStruct() is 4-element; the extra flags byte preserves + // FieldFlagNullable (and future system/binary/autoinc bits) + // across ALTER-TABLE table rebuilds so callers that feed + // dbStruct output back into dbCreate don't silently drop + // nullability. Four-element callers still index [1..4] as + // before. row := []hbrt.Value{ hbrt.MakeString(fi.Name), hbrt.MakeString(string(fi.Type)), hbrt.MakeInt(int(fi.Len)), hbrt.MakeInt(int(fi.Dec)), + hbrt.MakeInt(int(fi.Flags)), } items[i] = hbrt.MakeArrayFrom(row) } diff --git a/hbrtl/register.go b/hbrtl/register.go index 3ae1dfa..9a7945f 100644 --- a/hbrtl/register.go +++ b/hbrtl/register.go @@ -300,6 +300,9 @@ func RegisterRTL(vm *hbrt.VM) { hbrt.Sym("DOSERROR", hbrt.FsPublic, DosError), hbrt.Sym("FERROR", hbrt.FsPublic, FError), hbrt.Sym("BREAK", hbrt.FsPublic, Break), + hbrt.Sym("HB_ERRORLOG", hbrt.FsPublic, HbErrorLog), + hbrt.Sym("HB_SETERRORLOGPATH", hbrt.FsPublic, HbSetErrorLogPath), + hbrt.Sym("HB_SETERRORLOGHOOK", hbrt.FsPublic, HbSetErrorLogHook), // File I/O hbrt.Sym("FOPEN", hbrt.FsPublic, FOpen), @@ -447,6 +450,9 @@ func RegisterRTL(vm *hbrt.VM) { hbrt.Sym("FIELDLEN", hbrt.FsPublic, FieldLen), hbrt.Sym("FIELDDEC", hbrt.FsPublic, FieldDec), hbrt.Sym("ORDCOUNT", hbrt.FsPublic, OrdCount), + hbrt.Sym("ORDLISTREBUILD", hbrt.FsPublic, OrdListRebuild), + hbrt.Sym("ORDERLISTREBUILD", hbrt.FsPublic, OrdListRebuild), + hbrt.Sym("DBREINDEX", hbrt.FsPublic, OrdListRebuild), hbrt.Sym("ORDNAME", hbrt.FsPublic, OrdName), hbrt.Sym("ORDKEY", hbrt.FsPublic, OrdKey), hbrt.Sym("ORDFOR", hbrt.FsPublic, OrdFor), @@ -642,9 +648,13 @@ func RegisterRTL(vm *hbrt.VM) { hbrt.Sym("SQLORDERBY", hbrt.FsPublic, SqlOrderBy), hbrt.Sym("SQLGROUPBY", hbrt.FsPublic, SqlGroupBy), hbrt.Sym("SQLDISTINCT", hbrt.FsPublic, SqlDistinct), + hbrt.Sym("SQLUNIONDISTINCT", hbrt.FsPublic, SqlUnionDistinct), + hbrt.Sym("SQLBUILDSUBCACHEKEY", hbrt.FsPublic, SqlBuildSubCacheKey), hbrt.Sym("SQLEXPRHASAGG", hbrt.FsPublic, SqlExprHasAgg), hbrt.Sym("SQLBULKINSERT", hbrt.FsPublic, SqlBulkInsert), hbrt.Sym("SQLBULKUPDATE", hbrt.FsPublic, SqlBulkUpdate), + hbrt.Sym("SQLBULKDELETE", hbrt.FsPublic, SqlBulkDelete), + hbrt.Sym("SQLWINDOWSLIDEAGG", hbrt.FsPublic, SqlWindowSlideAgg), hbrt.Sym("SQLWINDOWPARTITIONS", hbrt.FsPublic, SqlWindowPartitions), hbrt.Sym("SQLGROUPROWS", hbrt.FsPublic, SqlGroupRows), hbrt.Sym("SQLCOMPUTEAGGSIMPLE", hbrt.FsPublic, SqlComputeAggSimple), diff --git a/hbrtl/sqlhelpers.go b/hbrtl/sqlhelpers.go index 76a075e..0bead9e 100644 --- a/hbrtl/sqlhelpers.go +++ b/hbrtl/sqlhelpers.go @@ -10,6 +10,7 @@ package hbrtl import ( + "fmt" "math" "strconv" "strings" @@ -176,6 +177,26 @@ func lexSQL(s string) []hbrt.Value { toks = append(toks, makeTokValue(tkComma, ",")) i++ case '.': + // Harbour logical literals inside SQL text: `.T.` / `.F.` / + // `.Y.` / `.N.`. Emit TK_NAME("TRUE"/"FALSE") so the + // parser's primary handles them alongside SQL TRUE/FALSE + // keywords without a dedicated token kind. Must precede + // the bare `.` → TK_DOT emission below, otherwise the + // three chars tokenize as DOT + NAME("T") + DOT and the + // INSERT column alignment drifts by two. + if i+2 < n && s[i+2] == '.' { + lit := s[i+1] + if lit == 't' || lit == 'T' || lit == 'y' || lit == 'Y' { + toks = append(toks, makeTokValue(tkName, "TRUE")) + i += 3 + continue + } + if lit == 'f' || lit == 'F' || lit == 'n' || lit == 'N' { + toks = append(toks, makeTokValue(tkName, "FALSE")) + i += 3 + continue + } + } toks = append(toks, makeTokValue(tkDot, ".")) i++ case '*': @@ -446,6 +467,22 @@ func sqlCoerceStr(v hbrt.Value) string { return "T" } return "F" + case v.IsDate(): + // Date → "YYYYMMDD" (the DToS canonical form). Previously + // dates fell through to the empty-string default, so any + // `WHERE date_col = '20240115'` comparison silently + // compared "" to the literal and returned 0 rows. YYYYMMDD + // is format-independent and matches how Harbour's DToS / + // HbSToD pair encodes dates for byte-stable round-trip. + y, m, d := julianToDate(v.AsJulian()) + return fmt.Sprintf("%04d%02d%02d", y, m, d) + case v.IsTimestamp(): + y, m, d := julianToDate(v.AsJulian()) + ms := v.AsTimeMs() + hh := ms / 3600000 + mm := (ms % 3600000) / 60000 + ss := (ms % 60000) / 1000 + return fmt.Sprintf("%04d%02d%02d%02d%02d%02d", y, m, d, hh, mm, ss) } return "" } @@ -549,9 +586,50 @@ func sqlCmpEq(a, b hbrt.Value) bool { if a.IsString() && b.IsNumeric() { return parseLeadingNumeric(a.AsString()) == b.AsNumDouble() } + // Cross-type D / C coercion. SQL tests often write the right-hand + // side as a literal "YYYYMMDD" string (the DToS canonical form); + // without this arm the comparison fell through to false and + // `WHERE hired = '20240115'` silently returned no rows. + if a.IsDate() && b.IsString() { + return sqlCmpDateStr(a, b) + } + if a.IsString() && b.IsDate() { + return sqlCmpDateStr(b, a) + } return false } +// sqlCmpDateStr returns true when the date's YYYYMMDD form equals the +// string operand after trim + separator strip. Accepts both DToS form +// (20260425) and the more common ISO/SQL forms (2026-04-25, 2026/04/25, +// 2026.04.25). Without normalization, `WHERE d = '2026-04-25'` silently +// returned no rows because the literal didn't match the YYYYMMDD form. +func sqlCmpDateStr(d, s hbrt.Value) bool { + y, m, day := julianToDate(d.AsJulian()) + return fmt.Sprintf("%04d%02d%02d", y, m, day) == normalizeDateStr(s.AsString()) +} + +// normalizeDateStr strips common date separators ('-', '/', '.') so +// '2026-04-25', '2026/04/25', '2026.04.25', '20260425' all collapse +// to '20260425'. Caller is responsible for ensuring the input is +// date-shaped; non-date strings are passed through with separators +// removed (harmless — a comparison against a date will still fail). +func normalizeDateStr(s string) string { + s = strings.TrimSpace(s) + if !strings.ContainsAny(s, "-/.") { + return s + } + var b strings.Builder + b.Grow(len(s)) + for i := 0; i < len(s); i++ { + c := s[i] + if c != '-' && c != '/' && c != '.' { + b.WriteByte(c) + } + } + return b.String() +} + // SqlCmpLt(a, b) → lBool // Case-insensitive less-than with cross-type N↔C coercion. func SqlCmpLt(t *hbrt.Thread) { @@ -583,6 +661,24 @@ func sqlCmpLt(a, b hbrt.Value) bool { if a.IsString() && b.IsNumeric() { return parseLeadingNumeric(a.AsString()) < b.AsNumDouble() } + // Cross-type D / C: compare DToS form lexicographically (YYYYMMDD + // sorts identically to chronological order for well-formed strings). + // Normalize the string operand so 'YYYY-MM-DD' / 'YYYY/MM/DD' / + // 'YYYY.MM.DD' compare correctly, not just bare 'YYYYMMDD'. Without + // this, `WHERE d > '2026-06-01'` collapsed to a string compare of + // '20260425' < '2026-06-01' which is false because '2' < '2', '0' < '0' + // proceeds until '4' vs '-' (45 vs 45 — actually '4' = 0x34, '-' = 0x2d) + // → '4' > '-' so `'20260425' < '2026-06-01'` is false → all dates + // returned as "less than" → all rows match. Confusing but the symptom + // was every WHERE date > ISO-string returning the full table. + if a.IsDate() && b.IsString() { + y, m, d := julianToDate(a.AsJulian()) + return fmt.Sprintf("%04d%02d%02d", y, m, d) < normalizeDateStr(b.AsString()) + } + if a.IsString() && b.IsDate() { + y, m, d := julianToDate(b.AsJulian()) + return normalizeDateStr(a.AsString()) < fmt.Sprintf("%04d%02d%02d", y, m, d) + } return false } diff --git a/hbrtl/sqlscan.go b/hbrtl/sqlscan.go index c690f0d..3d143f5 100644 --- a/hbrtl/sqlscan.go +++ b/hbrtl/sqlscan.go @@ -33,7 +33,7 @@ import ( "strings" ) -// SqlScan(aFieldPositions, pcWhere) → aRows +// SqlScan(aFieldPositions, pcWhere, nLimitHint) → aRows // // Scans the current workarea top-to-bottom, evaluates pcWhere per row // (nil = no filter), collects selected column values into rows. @@ -42,6 +42,11 @@ import ( // Resolve once before calling (FieldPos cache is O(1) // but still has PRG → Go call overhead). // pcWhere: pcode function pointer from PcCompile, or NIL. +// nLimitHint: optional early-termination cap. Zero / NIL means +// scan the whole table. The caller is responsible for +// verifying that the scan order matches the requested +// result order (either no ORDER BY, or an index tag +// that was already focused by OrdSetFocus). // // Returns: // Array of rows, each row = Array of field values. @@ -51,7 +56,7 @@ import ( // We don't trim here — that's a semantic choice, and callers who need // raw bytes shouldn't pay for a strings.TrimSpace(). func SqlScan(t *hbrt.Thread) { - t.Frame(2, 0) + t.Frame(3, 0) defer t.EndProc() // Parse arguments @@ -72,6 +77,13 @@ func SqlScan(t *hbrt.Thread) { } } + limitHint := 0 + if limitVal := t.Local(3); !limitVal.IsNil() { + if n := int(limitVal.AsNumInt()); n > 0 { + limitHint = n + } + } + // Pre-convert field positions to []int (avoid Value->int per row) fieldPos := make([]int, nFields) for i := 0; i < nFields; i++ { @@ -115,10 +127,17 @@ func SqlScan(t *hbrt.Thread) { estRows := 1024 if rc, err := area.RecCount(); err == nil && rc > 0 { estRows = int(rc) - if estRows > 1 << 20 { + if estRows > 1<<20 { estRows = 1 << 20 } } + // LIMIT pushdown: cap the initial backing allocation when the + // caller guarantees we'll stop after at most `limitHint` rows. + // Avoids allocating RecCount-sized buffers for `LIMIT 10` queries + // on million-row tables. + if limitHint > 0 && limitHint < estRows { + estRows = limitHint + } rows := make([]hbrt.Value, 0, estRows) flat := make([]hbrt.Value, 0, estRows*nFields) slab := hbrt.NewArraySlab(estRows) @@ -155,6 +174,10 @@ func SqlScan(t *hbrt.Thread) { // // Four combinations = four loop copies. Painful but each row save // counts when we're reaching for raw RDD parity. + // LIMIT pushdown: when limitHint > 0 each loop bails out as soon + // as we've collected enough rows. The caller guarantees scan order + // matches result order (no ORDER BY, or matched index tag focused + // before the call), so clipping early preserves correctness. switch { case dbfArea != nil && whereFn != nil: dbfArea.GoTop() @@ -174,6 +197,9 @@ func SqlScan(t *hbrt.Thread) { row[i] = v } rows = append(rows, slab.WrapNext(row)) + if limitHint > 0 && len(rows) >= limitHint { + break + } } dbfArea.Skip(1) } @@ -194,6 +220,9 @@ func SqlScan(t *hbrt.Thread) { row[i] = v } rows = append(rows, slab.WrapNext(row)) + if limitHint > 0 && len(rows) >= limitHint { + break + } dbfArea.Skip(1) } case whereFn != nil: @@ -214,6 +243,9 @@ func SqlScan(t *hbrt.Thread) { row[i] = v } rows = append(rows, slab.WrapNext(row)) + if limitHint > 0 && len(rows) >= limitHint { + break + } } area.Skip(1) } @@ -233,6 +265,9 @@ func SqlScan(t *hbrt.Thread) { row[i] = v } rows = append(rows, slab.WrapNext(row)) + if limitHint > 0 && len(rows) >= limitHint { + break + } area.Skip(1) } } @@ -1100,6 +1135,115 @@ func SqlDistinct(t *hbrt.Thread) { t.RetValue() } +// SqlUnionDistinct(aLeft, aRight) → aMerged +// +// Streaming DISTINCT for the SQL UNION operator. Builds a hash set +// keyed on each row's canonical composite key (same format used by +// SqlDistinct) over aLeft, then walks aRight once pushing only rows +// whose key isn't already seen. Replaces the PRG idiom of appending +// both arrays in full then calling SqlDistinct, which materialised +// the intermediate merged array and walked every row twice — once +// to append, once to rebuild the dedup hash. +// +// Output matches `aLeft ++ filter(aRight, unseen)`: left rows stay +// first and in their original order, right rows are appended in +// their original order after dedup against left + each other. +// Same byte-for-byte dedup decision as SqlDistinct. +func SqlUnionDistinct(t *hbrt.Thread) { + t.Frame(2, 0) + defer t.EndProc() + + leftVal := t.Local(1) + rightVal := t.Local(2) + if !leftVal.IsArray() { + if rightVal.IsArray() { + t.PushValue(rightVal) + } else { + t.PushValue(hbrt.MakeArray(0)) + } + t.RetValue() + return + } + leftRows := leftVal.AsArray().Items + var rightRows []hbrt.Value + if rightVal.IsArray() { + rightRows = rightVal.AsArray().Items + } + + nL := len(leftRows) + nR := len(rightRows) + seen := make(map[string]struct{}, nL+nR) + out := make([]hbrt.Value, 0, nL+nR) + var sb strings.Builder + + keyOf := func(v hbrt.Value) string { + sb.Reset() + if ra := v.AsArray(); ra != nil { + for _, item := range ra.Items { + appendValueHashKey(&sb, item) + sb.WriteByte('|') + } + } + return sb.String() + } + + for i := 0; i < nL; i++ { + if leftRows[i].AsArray() == nil { + continue + } + k := keyOf(leftRows[i]) + if _, dup := seen[k]; dup { + continue + } + seen[k] = struct{}{} + out = append(out, leftRows[i]) + } + for i := 0; i < nR; i++ { + if rightRows[i].AsArray() == nil { + continue + } + k := keyOf(rightRows[i]) + if _, dup := seen[k]; dup { + continue + } + seen[k] = struct{}{} + out = append(out, rightRows[i]) + } + + t.PushValue(hbrt.MakeArrayFrom(out)) + t.RetValue() +} + +// SqlBuildSubCacheKey(nId, aValues) → cKey +// +// Builds the composite cache key for a correlated subquery: +// "@||..." +// where key(v) uses the canonical appendValueHashKey encoding (same +// as SqlDistinct / SqlWindow hash keys). Replaces the per-outer-row +// PRG loop of `hb_ntos(nId) + "@" + SqlValToStr(v1) + "|" + ...` which +// allocated a fresh string on every concatenation and paid the PRG +// dispatch on every SqlValToStr / ValType probe. For correlated +// subqueries over large outer tables this was the dominant cost on +// cache hits — where the point of the cache is to be cheap. +func SqlBuildSubCacheKey(t *hbrt.Thread) { + t.Frame(2, 0) + defer t.EndProc() + + nId := t.Local(1).AsNumInt() + valsArg := t.Local(2) + + var sb strings.Builder + sb.WriteString(strconvItoa(nId)) + sb.WriteByte('@') + if valsArg.IsArray() { + for _, v := range valsArg.AsArray().Items { + appendValueHashKey(&sb, v) + sb.WriteByte('|') + } + } + t.RetString(sb.String()) +} + // SqlComputeAggSimple(aGR, nCol, cFunc) → xResult // // Fast path for COUNT / SUM / AVG / MIN / MAX when the argument is a @@ -2111,9 +2255,65 @@ func SqlBulkUpdate(t *hbrt.Thread) { dbfArea.Flush() } + // Index maintenance. DBFArea.PutValue patches record bytes but does + // not delete + re-add index keys, so any index whose expression + // references one of the updated fields goes stale. We rebuild those + // indexes on the spot rather than leaving divergent state behind. + // + // Triggering condition: an index is open AND at least one updated + // field name appears in any index's key expression. We over-match + // by substring (so "ID" matches a compound expression like + // "DEPT+ID"), which is conservative — spurious rebuilds of indexes + // that happened to share a substring but don't really reference + // the field, never the reverse. Tables with no open indexes or + // with indexes that don't cover the updated columns skip the + // rebuild entirely, preserving the B13 UPDATE hot-path timing. + if nAffected > 0 && sqlBulkUpdateNeedsIndexRebuild(dbfArea, fieldPos) { + _ = dbfArea.OrderListRebuild() + } + t.RetInt(int64(nAffected)) } +// sqlBulkUpdateNeedsIndexRebuild reports whether any open index on the +// workarea references any of the just-written columns. Called once at +// the end of SqlBulkUpdate, so the hot path stays per-record-free. +func sqlBulkUpdateNeedsIndexRebuild(a *dbf.DBFArea, fieldPos []int) bool { + nOrd := a.IndexCount() + if nOrd == 0 { + return false + } + // Collect upper-cased names of the updated fields. + fieldNames := make([]string, 0, len(fieldPos)) + for _, idx := range fieldPos { + if idx < 0 || idx >= a.FieldCount() { + continue + } + name := strings.ToUpper(strings.TrimRight(a.GetFieldInfo(idx).Name, "\x00 ")) + if name != "" { + fieldNames = append(fieldNames, name) + } + } + if len(fieldNames) == 0 { + return false + } + for i := 1; i <= nOrd; i++ { + expr := strings.ToUpper(a.OrderKeyExpr(i)) + if expr == "" { + // Index opened without a KeyExpr (legacy OrderListAdd path + // prior to the NTX header read). Conservatively rebuild — + // we can't prove the index doesn't cover these fields. + return true + } + for _, name := range fieldNames { + if strings.Contains(expr, name) { + return true + } + } + } + return false +} + // waCacheEnabledSafe reads the cache flag under its lock — fast enough // to call on every Bulk path, avoids the PRG→Go round-trip. func waCacheEnabledSafe() bool { @@ -2156,6 +2356,406 @@ func sqlBulkUpdateGeneric(t *hbrt.Thread, area hbrdd.Area, whereFn *hbrt.PcodeFu return nAffected } +// SqlBulkDelete(pcWhere) → nAffected +// +// Go-native DELETE scan loop — counterpart to SqlBulkUpdate for pure +// DELETE FROM t WHERE ... statements. Replaces the PRG pattern: +// +// dbGoTop() +// WHILE ! Eof() +// IF xWhere == NIL .OR. SqlIsTrue( ::EvalExpr( xWhere ) ) +// dbRLock( RecNo() ) +// dbDelete() +// dbRUnlock( RecNo() ) +// nAffected++ +// ENDIF +// dbSkip() +// ENDDO +// +// Same caveats as SqlBulkUpdate: caller must guarantee no active +// transaction (LogRecord is omitted) and SET DELETED handling stays +// with the PRG wrapper if it needs it. +// +// NIL whereFn ⇒ delete every row (caller should usually route that +// through TRUNCATE instead, but the behaviour is preserved for +// compat). +func SqlBulkDelete(t *hbrt.Thread) { + t.Frame(1, 0) + defer t.EndProc() + + whereVal := t.Local(1) + var whereFn *hbrt.PcodeFunc + if !whereVal.IsNil() { + if p := whereVal.AsPointer(); p != nil { + whereFn, _ = p.(*hbrt.PcodeFunc) + } + } + + wam, ok := t.WA.(*hbrdd.WorkAreaManager) + if !ok { + t.RetInt(0) + return + } + area := wam.Current() + if area == nil { + t.RetInt(0) + return + } + dbfArea, _ := area.(*dbf.DBFArea) + if dbfArea == nil { + t.RetInt(sqlBulkDeleteGeneric(t, area, whereFn)) + return + } + + prevFG := t.FastFieldGetter + t.FastFieldGetter = func(idx int) hbrt.Value { + v, _ := dbfArea.GetValue(idx - 1) + return v + } + defer func() { t.FastFieldGetter = prevFG }() + + nAffected := 0 + shared := dbfArea.IsShared() + dbfArea.GoTop() + for !dbfArea.EOF() { + match := true + if whereFn != nil { + hbrt.ExecPcodeFast(t, whereFn, nil) + match = t.GetRetValue().AsBool() + } + if match { + recNo := dbfArea.RecNo() + locked := true + if shared { + lockOk, _ := dbfArea.LockRecord(recNo) + locked = lockOk + } + if locked { + dbfArea.Delete() + if shared { + dbfArea.UnlockRecord(recNo) + } + nAffected++ + } + } + dbfArea.Skip(1) + } + if !waCacheEnabledSafe() { + dbfArea.Flush() + } + t.RetInt(int64(nAffected)) +} + +// sqlBulkDeleteGeneric handles non-DBF workareas via the Area interface. +func sqlBulkDeleteGeneric(t *hbrt.Thread, area hbrdd.Area, whereFn *hbrt.PcodeFunc) int64 { + prevFG := t.FastFieldGetter + t.FastFieldGetter = func(idx int) hbrt.Value { + v, _ := area.GetValue(idx - 1) + return v + } + defer func() { t.FastFieldGetter = prevFG }() + + nAffected := int64(0) + area.GoTop() + for !area.EOF() { + match := true + if whereFn != nil { + hbrt.ExecPcodeFast(t, whereFn, nil) + match = t.GetRetValue().AsBool() + } + if match { + area.Delete() + nAffected++ + } + area.Skip(1) + } + return nAffected +} + +// Frame-offset sentinels for SqlWindowSlideAgg. PRG encodes the SQL +// frame bounds "UNBOUNDED PRECEDING / FOLLOWING" into these values; +// any other offset is a relative row count (-N preceding, +N +// following, 0 current row). +const ( + frameUnboundedPreceding = -(1 << 30) + frameUnboundedFollowing = (1 << 30) +) + +// SqlWindowSlideAgg(aRows, aPartIdx, nArgCol, nColIdx, cFunc, leftOff, rightOff) → lHandled +// +// O(N) replacement for the ApplyWindowFunctions general-frame inner +// loop. Two algorithms share one entry point: +// +// SUM / AVG / COUNT — prefix-sum sweep. O(N) build, O(1) query per +// row. Two subtractions per frame instead of the O(N·W) inner +// loop that dominates wide-frame workloads like `ROWS BETWEEN +// 50 PRECEDING AND 50 FOLLOWING`. +// +// MIN / MAX — monotonic deque. SQL frame bounds are linear in the +// row index for every standard frame spec (UNBOUNDED PRECEDING, +// fixed N PRECEDING, CURRENT ROW, fixed N FOLLOWING, UNBOUNDED +// FOLLOWING), so L and R are both non-decreasing in k and the +// classic sliding-window deque applies in one sweep. Amortized +// O(1) per row. +// +// Returns .T. on success, .F. if the aggregate / value types aren't +// supported by the fast path — PRG falls back to the O(N·W) loop. +// Currently the MIN/MAX path only accepts numeric partition values; +// a non-numeric, non-NIL value in the scan column sends the RTL back +// to PRG so string / date comparisons still work correctly via the +// existing SqlCmpLt dispatch. +// +// Semantics match the PRG fallback: +// - COUNT(*) counts every row in frame (nArgCol == 0, i.e. <=0 here). +// - COUNT(expr), SUM, AVG, MIN, MAX skip NIL values. +// - SUM / AVG / MIN / MAX with an empty or all-NIL frame return NIL. +// - COUNT over empty frame returns 0. +// - Frame clamped to [1..partLen] just like SqlFrameOffset did. +func SqlWindowSlideAgg(t *hbrt.Thread) { + t.Frame(7, 0) + defer t.EndProc() + + rowsVal := t.Local(1) + partVal := t.Local(2) + nArgCol := int(t.Local(3).AsNumInt()) - 1 // 0-based; -1 = COUNT(*) + nColIdx := int(t.Local(4).AsNumInt()) - 1 + cFunc := strings.ToUpper(t.Local(5).AsString()) + leftOff := int(t.Local(6).AsNumInt()) + rightOff := int(t.Local(7).AsNumInt()) + + if !rowsVal.IsArray() || !partVal.IsArray() { + t.RetBool(false) + return + } + rowsArr := rowsVal.AsArray().Items + partArr := partVal.AsArray().Items + N := len(partArr) + if N == 0 { + t.RetBool(true) + return + } + + // Snapshot partition indices as 0-based int once. + part := make([]int, N) + for i, v := range partArr { + part[i] = int(v.AsNumInt()) - 1 + } + + switch cFunc { + case "SUM", "AVG", "COUNT": + sqlWindowPrefixAgg(rowsArr, part, nArgCol, nColIdx, cFunc, leftOff, rightOff) + t.RetBool(true) + case "MIN", "MAX": + if nArgCol < 0 { + // MIN/MAX(*) has no meaning — matches PRG which treats it + // as "always NIL" via the no-argcol branch. + t.RetBool(false) + return + } + ok := sqlWindowMonotonicMinMax(rowsArr, part, nArgCol, nColIdx, cFunc, leftOff, rightOff) + t.RetBool(ok) + default: + t.RetBool(false) + } +} + +// sqlWindowPrefixAgg runs the O(N) prefix-sum sweep for SUM / AVG / +// COUNT. Extracted from the SqlWindowSlideAgg body so the MIN/MAX +// path can share the setup without duplicating it. +func sqlWindowPrefixAgg( + rowsArr []hbrt.Value, part []int, nArgCol, nColIdx int, + cFunc string, leftOff, rightOff int, +) { + N := len(part) + // Build prefix arrays: prefSum[i] = sum of values[0..i-1], + // prefCnt[i] = count of non-NIL values[0..i-1]. + prefSum := make([]float64, N+1) + prefCnt := make([]int, N+1) + for i := 0; i < N; i++ { + prefSum[i+1] = prefSum[i] + prefCnt[i+1] = prefCnt[i] + if nArgCol >= 0 { + rowIdx := part[i] + if rowIdx >= 0 && rowIdx < len(rowsArr) { + rowArr := rowsArr[rowIdx].AsArray() + if rowArr != nil && nArgCol < len(rowArr.Items) { + v := rowArr.Items[nArgCol] + if !v.IsNil() && v.IsNumeric() { + prefSum[i+1] += v.AsNumDouble() + prefCnt[i+1]++ + } + } + } + } + } + for k := 0; k < N; k++ { + L, R := resolveFrameBounds(k, N, leftOff, rightOff) + rowIdx := part[k] + if rowIdx < 0 || rowIdx >= len(rowsArr) { + continue + } + rowArr := rowsArr[rowIdx].AsArray() + if rowArr == nil || nColIdx < 0 || nColIdx >= len(rowArr.Items) { + continue + } + var result hbrt.Value + if L > R { + switch cFunc { + case "COUNT": + result = hbrt.MakeInt(0) + default: + result = hbrt.MakeNil() + } + } else if cFunc == "COUNT" && nArgCol < 0 { + result = hbrt.MakeInt(R - L + 1) + } else { + winSum := prefSum[R+1] - prefSum[L] + winCnt := prefCnt[R+1] - prefCnt[L] + switch cFunc { + case "SUM": + if winCnt == 0 { + result = hbrt.MakeNil() + } else { + result = hbrt.MakeDouble(winSum, 0, 0) + } + case "AVG": + if winCnt == 0 { + result = hbrt.MakeNil() + } else { + result = hbrt.MakeDouble(winSum/float64(winCnt), 0, 0) + } + case "COUNT": + result = hbrt.MakeInt(winCnt) + default: + result = hbrt.MakeNil() + } + } + rowArr.Items[nColIdx] = result + } +} + +// sqlWindowMonotonicMinMax answers each row's MIN / MAX over its +// window frame in amortized O(1) using a monotonic deque of partition +// indices. Returns false (and writes nothing) if a non-numeric, +// non-NIL value is encountered — the PRG loop handles string / date +// comparisons via SqlCmpLt. +// +// The deque holds indices `i` into part[]; values stored at those +// indices form a monotonically non-increasing sequence (for MIN) or +// non-decreasing (for MAX), so the front is always the extremum of +// the currently valid window. +func sqlWindowMonotonicMinMax( + rowsArr []hbrt.Value, part []int, nArgCol, nColIdx int, + cFunc string, leftOff, rightOff int, +) bool { + N := len(part) + // Extract numeric values + NIL flags up front. If any non-NIL, + // non-numeric value appears, bail so the PRG loop can handle it. + vals := make([]float64, N) + hasVal := make([]bool, N) + origVal := make([]hbrt.Value, N) // preserve original Value for result + for i := 0; i < N; i++ { + rowIdx := part[i] + if rowIdx < 0 || rowIdx >= len(rowsArr) { + continue + } + rowArr := rowsArr[rowIdx].AsArray() + if rowArr == nil || nArgCol >= len(rowArr.Items) { + continue + } + v := rowArr.Items[nArgCol] + if v.IsNil() { + continue + } + if !v.IsNumeric() { + return false + } + vals[i] = v.AsNumDouble() + hasVal[i] = true + origVal[i] = v + } + + isMin := cFunc == "MIN" + // Ring-buffer deque keyed by partition index. The index is also + // its position in the monotonic sequence; values at those indices + // are the comparison key. Capacity N is an upper bound. + deque := make([]int, 0, N) + nextToPush := 0 + + for k := 0; k < N; k++ { + L, R := resolveFrameBounds(k, N, leftOff, rightOff) + + // Ingest all partition indices up to R that haven't been + // pushed yet. NIL values never enter the deque, matching + // PRG's MIN/MAX which skip NILs. + for nextToPush <= R && nextToPush < N { + if hasVal[nextToPush] { + x := vals[nextToPush] + for len(deque) > 0 { + back := deque[len(deque)-1] + if (isMin && vals[back] >= x) || (!isMin && vals[back] <= x) { + deque = deque[:len(deque)-1] + continue + } + break + } + deque = append(deque, nextToPush) + } + nextToPush++ + } + // Retire deque entries that fell outside the window's left edge. + for len(deque) > 0 && deque[0] < L { + deque = deque[1:] + } + + rowIdx := part[k] + if rowIdx < 0 || rowIdx >= len(rowsArr) { + continue + } + rowArr := rowsArr[rowIdx].AsArray() + if rowArr == nil || nColIdx < 0 || nColIdx >= len(rowArr.Items) { + continue + } + + if L > R || len(deque) == 0 { + rowArr.Items[nColIdx] = hbrt.MakeNil() + } else { + rowArr.Items[nColIdx] = origVal[deque[0]] + } + } + return true +} + +// resolveFrameBounds turns the encoded relative offsets into 0-based +// inclusive [L, R] bounds clamped to the partition. The sentinel +// values map to absolute boundaries; everything else is k + offset. +func resolveFrameBounds(k, N, leftOff, rightOff int) (int, int) { + var L, R int + switch leftOff { + case frameUnboundedPreceding: + L = 0 + case frameUnboundedFollowing: + L = N + default: + L = k + leftOff + } + switch rightOff { + case frameUnboundedPreceding: + R = -1 + case frameUnboundedFollowing: + R = N - 1 + default: + R = k + rightOff + } + if L < 0 { + L = 0 + } + if R >= N { + R = N - 1 + } + return L, R +} + // SqlBulkInsert(aRows) → nInserted // // Go-native bulk INSERT into the current workarea. Replaces the @@ -2210,6 +2810,14 @@ func SqlBulkInsert(t *hbrt.Thread) { // Type-assert the concrete DBF type once so the inner loop avoids // interface-dispatch per call. Non-DBF backends (MEMRDD) take the // generic hbrdd.Area path. + // NIL values must still be routed through PutValue so the DBF + // driver sets the _NullFlags bit for nullable columns. Skipping + // the call leaves the raw bytes at their dbAppend() defaults + // (spaces / zeros), which reads back as empty string / 0 rather + // than SQL NULL. Pre-nullable code skipped NIL purely as an + // optimization (no-op write); with the nullable bitmap that + // "optimization" silently discards NULL markers on multi-row + // INSERT VALUES (...), (...), ... if dbfArea, isDbf := area.(*dbf.DBFArea); isDbf { for _, rowVal := range rows { ra := rowVal.AsArray() @@ -2224,11 +2832,7 @@ func SqlBulkInsert(t *hbrt.Thread) { limit = nFields } for k := 0; k < limit; k++ { - v := ra.Items[k] - if v.IsNil() { - continue - } - dbfArea.PutValue(k, v) + dbfArea.PutValue(k, ra.Items[k]) } inserted++ } @@ -2247,11 +2851,7 @@ func SqlBulkInsert(t *hbrt.Thread) { limit = nFields } for k := 0; k < limit; k++ { - v := ra.Items[k] - if v.IsNil() { - continue - } - area.PutValue(k, v) + area.PutValue(k, ra.Items[k]) } inserted++ } diff --git a/orders.dbf b/orders.dbf deleted file mode 100644 index bdb84cc..0000000 Binary files a/orders.dbf and /dev/null differ