From 000500e034e007b83a55b43d280c210ba02e3fcd Mon Sep 17 00:00:00 2001 From: CharlesKWON Date: Fri, 1 May 2026 07:45:20 +0900 Subject: [PATCH] fix(pp,parser,gengo): pre-release blocker round (Wave 1) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Six audit-driven blockers landed together because they're tangled: * MENU TO removed from std.ch — the rule expanded to a call to a nonexistent __MenuTo() RTL symbol, so any user code with `MENU TO choice` compiled clean and panicked at runtime. Behavior pre-this-round was a parser silent no-op, which is at least consistent. Restore that until @ PROMPT (the companion command) actually lands. * COUNT now requires `TO `. The earlier `[TO ]` optional bracket was a Harbour-pattern transcription error: the result template references `` unconditionally, so a bare `COUNT` expanded to ungrammatical ` := 0 ; dbEval(...)` and the PRG parser rejected it. Match Harbour's std.ch which makes TO mandatory. * UPDATE FROM ... REPLACE now requires `FROM`/`ON`/`REPLACE` all three. Same root cause as COUNT: the result template uses ``, ``, `` unconditionally; missing any of them produced broken syntax. Tightened to fail loudly rather than silently mis-expand. * CLOSE no longer closes the *current* workarea. SelectByAlias was a silent no-op when the alias was missing, leaving WASaveAndSelectAlias to evaluate the inner DbCloseArea() against the originally-selected WA — a real data-loss footgun. SelectByAlias now returns bool; WASaveAndSelectAlias switches to the no-area sentinel (0) on miss so the inner expression's Current() returns nil and short-circuits. * SUM , TO , — multi-pair form supported. Required two pieces: 1. matchSegment's regular-marker stop-boundary now combines outerTail literals AND the segment's repeat boundary so `[, ]` doesn't let `` swallow past the next ','. 2. **Five parser miscompiled comma-separated expressions in code blocks.** `{|| e1, e2, e3 }` kept only the last expr and threw away earlier ones at *AST level*, so all their side effects vanished. New SeqExpr AST node + emitter (emit each, pop intermediate results) + folding/walk updates fix the underlying bug, which also unbreaks any other block that relied on comma sequencing. * pp.go's `;` continuation joiner now strips exactly one trailing `;` per iteration, preserving Harbour's `;;` convention (literal `;` followed by a continuation marker). Without this the SUM rule's chained ` :=[ :=] 0 ; ; dbEval(...)` collapsed to a missing statement separator. * parseExprStmt's xBase fallback switch is back in sync with parseIdentStmt — COPY/SORT/COUNT/SUM/AVERAGE/TOTAL/UPDATE/JOIN/ DISPLAY/LIST removed (std.ch handles all of them now). Leaving them in the fallback masked typos as silent no-ops. Gates green: go test ./... : PASS FiveSql2 SQL:1999 : 43/43 Harbour compat : 56/56 Co-Authored-By: Claude Opus 4.7 (1M context) --- compiler/ast/ast.go | 16 +++++++++++++++ compiler/gengo/emit_block.go | 8 ++++++++ compiler/gengo/folding.go | 2 ++ compiler/gengo/gengo.go | 10 +++++++++ compiler/parser/expr.go | 19 ++++++++++++++---- compiler/parser/parser.go | 13 ++++++++---- compiler/pp/command.go | 30 +++++++++++++++++---------- compiler/pp/pp.go | 16 ++++++++++----- compiler/pp/std.ch | 39 ++++++++++++++++++++++++++---------- hbrdd/workarea.go | 15 +++++++++++--- hbrt/thread.go | 13 ++++++++++-- 11 files changed, 141 insertions(+), 40 deletions(-) diff --git a/compiler/ast/ast.go b/compiler/ast/ast.go index 38d0493..291abe9 100644 --- a/compiler/ast/ast.go +++ b/compiler/ast/ast.go @@ -393,6 +393,22 @@ func (e *ArrayLitExpr) Pos() token.Position { return e.LBrace } func (e *ArrayLitExpr) End() token.Position { return e.RBrace } func (e *ArrayLitExpr) exprNode() {} +// SeqExpr is a comma-separated expression list used inside code +// blocks: `{|p| e1, e2, e3 }`. All sub-expressions are evaluated in +// order, the last value is the block's return. Without this node the +// parser kept only the last expr and silently dropped the side +// effects of every preceding one — a real miscompile that bit +// `SUM x, y, z TO sx, sy, sz` (only sz accumulated). +type SeqExpr struct { + Items []Expr + StartAt token.Position + EndAt token.Position +} + +func (e *SeqExpr) Pos() token.Position { return e.StartAt } +func (e *SeqExpr) End() token.Position { return e.EndAt } +func (e *SeqExpr) exprNode() {} + // HashLitExpr represents a literal hash: {"a" => 1, "b" => 2} // Harbour: HB_ET_HASH type HashLitExpr struct { diff --git a/compiler/gengo/emit_block.go b/compiler/gengo/emit_block.go index d2aba2c..ab0cb6d 100644 --- a/compiler/gengo/emit_block.go +++ b/compiler/gengo/emit_block.go @@ -283,5 +283,13 @@ func (g *Generator) walkExprIdents(expr ast.Expr, fn func(string)) { g.walkExprIdents(e.Field, fn) case *ast.BlockExpr: g.walkExprIdents(e.Body, fn) + case *ast.SeqExpr: + // Comma-separated expressions inside a code block — recurse so + // every sub-expr's free variables are picked up for closure + // capture. Otherwise the second/third comma-statements would + // see uncaptured outer locals. + for _, item := range e.Items { + g.walkExprIdents(item, fn) + } } } diff --git a/compiler/gengo/folding.go b/compiler/gengo/folding.go index 35d2a55..aa0d925 100644 --- a/compiler/gengo/folding.go +++ b/compiler/gengo/folding.go @@ -312,6 +312,8 @@ func (v *constLocalVisitor) expr(e ast.Expr) { v.abort() case *ast.BlockExpr: v.expr(x.Body) + case *ast.SeqExpr: + v.exprs(x.Items) case *ast.ArrayLitExpr: v.exprs(x.Items) case *ast.HashLitExpr: diff --git a/compiler/gengo/gengo.go b/compiler/gengo/gengo.go index 774131a..bc61389 100644 --- a/compiler/gengo/gengo.go +++ b/compiler/gengo/gengo.go @@ -842,6 +842,16 @@ func (g *Generator) emitExpr(expr ast.Expr) { g.writeln(fmt.Sprintf("t.HashGen(%d)", len(e.Keys))) case *ast.BlockExpr: g.emitBlock(e) + case *ast.SeqExpr: + // Comma-separated expr list (`{|| e1, e2, e3 }`): emit each in + // order; pop the result of every expr except the last so only + // the final value remains on the stack as the seq's value. + for i, item := range e.Items { + g.emitExpr(item) + if i < len(e.Items)-1 { + g.writeln("t.Pop2()") + } + } case *ast.SliceExpr: // a[low:high] → hbrt.ArraySlice(array, low, high) g.emitExpr(e.X) diff --git a/compiler/parser/expr.go b/compiler/parser/expr.go index bc6afbc..98993ce 100644 --- a/compiler/parser/expr.go +++ b/compiler/parser/expr.go @@ -426,13 +426,24 @@ func (p *Parser) parseArrayOrBlock() ast.Expr { } // Parse block body — may have comma-separated expressions - // {|x| expr1, expr2} → comma = sequence, returns last value - body := p.parseExpr() + // {|x| expr1, expr2, expr3} → all evaluated in order, last is + // the return value. Earlier impl dropped intermediate exprs by + // overwriting `body`, which was a silent miscompile (any + // non-trailing side effect — e.g. ` := + ` in a + // multi-pair SUM block — vanished). + first := p.parseExpr() + var seq []ast.Expr for p.match(token.COMMA) { - // Comma-separated: wrap as sequence, keep last - body = p.parseExpr() + if seq == nil { + seq = []ast.Expr{first} + } + seq = append(seq, p.parseExpr()) } rbrace := p.expect(token.RBRACE).Pos + var body ast.Expr = first + if seq != nil { + body = &ast.SeqExpr{Items: seq, StartAt: first.Pos(), EndAt: rbrace} + } return &ast.BlockExpr{LBrace: lbrace, Params: params, Body: body, RBrace: rbrace} } diff --git a/compiler/parser/parser.go b/compiler/parser/parser.go index 4ffdb86..582b5e2 100644 --- a/compiler/parser/parser.go +++ b/compiler/parser/parser.go @@ -1218,12 +1218,17 @@ func (p *Parser) parseExprStmt() ast.Stmt { p.peekAt(1) == token.TIMEOUT_KW { return p.parseWithTimeout() } + // Keep this list IN SYNC with parseIdentStmt's switch above. + // COPY/SORT/COUNT/SUM/AVERAGE/TOTAL/UPDATE/JOIN/DISPLAY/LIST + // are no longer here — std.ch rewrites them to function calls + // before the parser sees them. Leaving them in the fallback + // would silently no-op a typo'd version (e.g. `COPYY TO ...`) + // against the user's expectation. switch p.currentUpper() { - case "COPY", "SORT", "COUNT", "SUM", "AVERAGE", "TOTAL", "UPDATE", - "LABEL", "REPORT", "ACCEPT", "INPUT", - "JOIN", "RELEASE", "SAVE", "RESTORE", + case "LABEL", "REPORT", "ACCEPT", "INPUT", + "RELEASE", "SAVE", "RESTORE", "DIR", "STORE", "NOTE", "TEXT", "ENDTEXT", - "WITH", "CLEAR", "DISPLAY", "LIST": + "WITH", "CLEAR": // Consume entire line — these are complex multi-word commands p.advance() for p.current.Kind != token.NEWLINE && p.current.Kind != token.EOF { diff --git a/compiler/pp/command.go b/compiler/pp/command.go index 33da1d7..5862a1a 100644 --- a/compiler/pp/command.go +++ b/compiler/pp/command.go @@ -540,21 +540,29 @@ func matchSegment(segment, lineWords []string, startLi int, caseSens bool, outer default: return nil, startLi, false } - // Build a pseudo-pattern tail so captureExpression picks the - // right delimiters. Priority: + // Build a pseudo-pattern tail so captureExpression picks + // the right delimiters. Priority order (each level is + // merged, then captureExpression stops at *whichever* + // delimiter shows up first in the input): // 1. Next literals inside the same segment. - // 2. Every literal in the outer-pattern tail — this is - // what stops `[TO <(f)>] [FIELDS ...] [FOR ...]` from - // letting `<(f)>` swallow a trailing FOR/WHILE/NEXT - // clause that happened to be present. - // 3. Repeat boundary (the segment's leading literal) so a - // multi-iteration capture stops before the next iter. + // 2. Every literal in the outer-pattern tail — what + // stops `[TO <(f)>] [FIELDS ...] [FOR ...]` from + // letting `<(f)>` swallow a trailing FOR/WHILE/... + // 3. Repeat boundary (the segment's leading literal) + // — needed for multi-iter `[, ]` so each + // iteration's `` stops at the next ',' before + // the outer-tail's TO/FOR/etc. catches it. tail := segment[pi+1:] if !hasLiteralAfter(tail) { + combined := []string{} if hasLiteralAfter(outerTail) { - tail = outerTail - } else if repeatBoundary != "" { - tail = []string{repeatBoundary} + combined = append(combined, outerTail...) + } + if repeatBoundary != "" { + combined = append(combined, repeatBoundary) + } + if len(combined) > 0 { + tail = combined } } captured := captureExpression(lineWords, &li, tail, 0, caseSens) diff --git a/compiler/pp/pp.go b/compiler/pp/pp.go index 767f2fa..a805bef 100644 --- a/compiler/pp/pp.go +++ b/compiler/pp/pp.go @@ -101,12 +101,18 @@ func (pp *Preprocessor) processLines(filename, source string, depth int) string line := lines[i] // `#command`/`#translate` directives that end with a trailing `;` // continue on the next physical line — this is how harbour-core - // formats its std.ch rules. Join the continuation here so the - // directive parser sees one logical line. Only `#`-directives - // participate; user code uses `;` differently. + // formats its std.ch rules. Strip exactly one trailing `;` per + // iteration so Harbour's `;;` convention ("literal `;` plus + // continuation") survives: the inner `;` ends up as part of the + // joined directive, the outer one drives the continuation. + // Only `#`-directives participate; user code uses `;` differently. if t := strings.TrimSpace(line); strings.HasPrefix(t, "#") { - for strings.HasSuffix(strings.TrimRight(line, " \t"), ";") && i+1 < len(lines) { - line = strings.TrimRight(line, " \t;") + " " + strings.TrimSpace(lines[i+1]) + for i+1 < len(lines) { + trimmed := strings.TrimRight(line, " \t") + if !strings.HasSuffix(trimmed, ";") { + break + } + line = strings.TrimSuffix(trimmed, ";") + " " + strings.TrimSpace(lines[i+1]) i++ } } diff --git a/compiler/pp/std.ch b/compiler/pp/std.ch index bb54a21..dcb5f4f 100644 --- a/compiler/pp/std.ch +++ b/compiler/pp/std.ch @@ -45,16 +45,25 @@ expression SUM/AVERAGE (`SUM x, y TO sx, sy`) use optional-repeat syntax in Harbour and can be added here once a real test exercises the more elaborate form. */ -#command COUNT [TO ] [FOR ] [WHILE ] ; +/* COUNT/SUM/AVERAGE require TO — without it the rewrite + would produce naked assignment with no LHS. Match Harbour + std.ch which also makes TO non-optional. */ +#command COUNT TO [FOR ] [WHILE ] ; [NEXT ] [RECORD ] [] [ALL] => ; := 0 ; dbEval( {|| := + 1 }, ; <{for}>, <{while}>, , , <.rest.> ) -#command SUM TO ; +/* SUM and AVERAGE accept multiple paired expressions/destinations: + `SUM x, y, z TO sx, sy, sz`. The optional `[, ]` and + `[, ]` repeats are matched pairwise; the result template's + chained ` :=[ :=] 0` and comma-list inside the dbEval + block expand once per extra pair. Single-pair usage is unchanged. */ +#command SUM [, ] TO [, ] ; [FOR ] [WHILE ] [NEXT ] ; [RECORD ] [] [ALL] => ; - := 0 ; dbEval( {|| := + }, ; - <{for}>, <{while}>, , , <.rest.> ) + :=[ :=] 0 ; ; + dbEval( {|| := + [, := + ] }, ; + <{for}>, <{while}>, , , <.rest.> ) #command AVERAGE TO ; [FOR ] [WHILE ] [NEXT ] ; @@ -126,13 +135,17 @@ Both areas should be sorted on the key for the default forward- walk; pass RANDOM to scan master from top for each detail key. - Note: ON is wrapped as `_FIELD->` rather than the bare + Note 1: ON is wrapped as `_FIELD->` rather than the bare `<{key}>` Harbour uses, because the same block must evaluate against both master and detail. Bare identifiers don't auto-bind - to fields under Five — `_FIELD->` makes the dispatch explicit. */ -#command UPDATE [FROM <(alias)>] [ON ] [] ; - [REPLACE WITH ; - [, WITH ]] => ; + to fields under Five — `_FIELD->` makes the dispatch explicit. + + Note 2: FROM/ON/REPLACE are all required (Harbour technically allows + them in any order but every real call site provides all three). The + former optional brackets allowed compile-clean garbage like a bare + `UPDATE` to expand to a broken-syntax call. Keep them mandatory. */ +#command UPDATE FROM <(alias)> ON [] ; + REPLACE WITH [, WITH ] => ; __dbUpdate( <(alias)>, {|| _FIELD-> }, <.rand.>, ; {|| _FIELD-> := [, _FIELD-> := ] } ) @@ -145,6 +158,10 @@ #command KEYBOARD => Keyboard() #command RUN <*cmd*> => hb_Run(<(cmd)>) -/* --- legacy GET system --- */ -#command MENU TO => := __MenuTo() +/* --- legacy GET system --- + MENU TO is intentionally absent: it requires the @ PROMPT statement + companion which Five doesn't implement. Adding the rule would let + user code compile and then panic at runtime on the missing + __MenuTo() symbol. Keep the parser's silent no-op for MENU TO until + @ PROMPT lands. */ #command CLEAR GETS => GetList := {} diff --git a/hbrdd/workarea.go b/hbrdd/workarea.go index 5e13ee6..cb2d394 100644 --- a/hbrdd/workarea.go +++ b/hbrdd/workarea.go @@ -167,13 +167,22 @@ func (wm *WorkAreaManager) FindByAlias(alias string) uint16 { return num } -// SelectByAlias switches to a work area by alias name. -// Used by static alias context expressions: CUSTOMERS->(RecCount()) -func (wm *WorkAreaManager) SelectByAlias(alias string) { +// SelectByAlias switches to a work area by alias name. Returns true +// when the alias resolved, false when nothing matched. Callers that +// intend to evaluate an expression in the named workarea (alias +// context: `FOO->(...)`) must check the return value — a missed +// alias used to silently leave the original WA selected, so a +// subsequent DbCloseArea/RecCount/etc. ran against the *current* +// workarea instead of the named one. That's the exact data-loss +// foot-gun that bit `CLOSE bad_alias`. WASaveAndSelectAlias now +// switches to the no-area sentinel (0) on miss so the inner +// expression sees Current() == nil and short-circuits cleanly. +func (wm *WorkAreaManager) SelectByAlias(alias string) bool { num, ok := wm.aliases[strings.ToUpper(alias)] if ok { wm.current = num } + return ok } // SelectByNum switches to a work area by number, silently allowing unused areas. diff --git a/hbrt/thread.go b/hbrt/thread.go index 42fee77..386e8d5 100644 --- a/hbrt/thread.go +++ b/hbrt/thread.go @@ -737,12 +737,21 @@ func (t *Thread) WASaveAndSelect(areaNum int) { func (t *Thread) WASaveAndSelectAlias(alias string) { type waSel interface { - SelectByAlias(string) + SelectByAlias(string) bool + SelectByNum(uint16) CurrentNum() uint16 } if wam, ok := t.WA.(waSel); ok { t.waStack = append(t.waStack, wam.CurrentNum()) - wam.SelectByAlias(alias) + if !wam.SelectByAlias(alias) { + // Alias not open: switch to the no-area sentinel so the + // inner expression's Current() returns nil and ops like + // DbCloseArea/FieldGet/RecCount short-circuit. Without + // this, the inner expression silently runs against the + // originally-selected WA — which led to `CLOSE bad_alias` + // closing the *current* area. + wam.SelectByNum(0) + } } }