fix(pp,parser,gengo): pre-release blocker round (Wave 1)

Six audit-driven blockers landed together because they're tangled:

  * MENU TO removed from std.ch — the rule expanded to a call to a
    nonexistent __MenuTo() RTL symbol, so any user code with `MENU
    TO choice` compiled clean and panicked at runtime. Behavior
    pre-this-round was a parser silent no-op, which is at least
    consistent. Restore that until @ PROMPT (the companion command)
    actually lands.

  * COUNT now requires `TO <var>`. The earlier `[TO <v>]` optional
    bracket was a Harbour-pattern transcription error: the result
    template references `<v>` unconditionally, so a bare `COUNT`
    expanded to ungrammatical ` := 0 ; dbEval(...)` and the
    PRG parser rejected it. Match Harbour's std.ch which makes TO
    mandatory.

  * UPDATE FROM ... REPLACE now requires `FROM`/`ON`/`REPLACE` all
    three. Same root cause as COUNT: the result template uses
    `<key>`, `<f1>`, `<x1>` unconditionally; missing any of them
    produced broken syntax. Tightened to fail loudly rather than
    silently mis-expand.

  * CLOSE <unknown_alias> no longer closes the *current* workarea.
    SelectByAlias was a silent no-op when the alias was missing,
    leaving WASaveAndSelectAlias to evaluate the inner DbCloseArea()
    against the originally-selected WA — a real data-loss footgun.
    SelectByAlias now returns bool; WASaveAndSelectAlias switches to
    the no-area sentinel (0) on miss so the inner expression's
    Current() returns nil and short-circuits.

  * SUM <x1>, <xN> TO <v1>, <vN> — multi-pair form supported.
    Required two pieces:

       1. matchSegment's regular-marker stop-boundary now combines
          outerTail literals AND the segment's repeat boundary so
          `[, <xN>]` doesn't let `<xN>` swallow past the next ','.

       2. **Five parser miscompiled comma-separated expressions in
          code blocks.** `{|| e1, e2, e3 }` kept only the last expr
          and threw away earlier ones at *AST level*, so all their
          side effects vanished. New SeqExpr AST node + emitter
          (emit each, pop intermediate results) + folding/walk
          updates fix the underlying bug, which also unbreaks any
          other block that relied on comma sequencing.

  * pp.go's `;` continuation joiner now strips exactly one trailing
    `;` per iteration, preserving Harbour's `;;` convention (literal
    `;` followed by a continuation marker). Without this the SUM
    rule's chained `<v1> :=[ <vN> :=] 0 ; ; dbEval(...)` collapsed
    to a missing statement separator.

  * parseExprStmt's xBase fallback switch is back in sync with
    parseIdentStmt — COPY/SORT/COUNT/SUM/AVERAGE/TOTAL/UPDATE/JOIN/
    DISPLAY/LIST removed (std.ch handles all of them now). Leaving
    them in the fallback masked typos as silent no-ops.

Gates green:
  go test ./...      : PASS
  FiveSql2 SQL:1999  : 43/43
  Harbour compat     : 56/56

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-05-01 07:45:20 +09:00
parent e79ced2e0c
commit 000500e034
11 changed files with 141 additions and 40 deletions

View File

@@ -393,6 +393,22 @@ func (e *ArrayLitExpr) Pos() token.Position { return e.LBrace }
func (e *ArrayLitExpr) End() token.Position { return e.RBrace }
func (e *ArrayLitExpr) exprNode() {}
// SeqExpr is a comma-separated expression list used inside code
// blocks: `{|p| e1, e2, e3 }`. All sub-expressions are evaluated in
// order, the last value is the block's return. Without this node the
// parser kept only the last expr and silently dropped the side
// effects of every preceding one — a real miscompile that bit
// `SUM x, y, z TO sx, sy, sz` (only sz accumulated).
type SeqExpr struct {
Items []Expr
StartAt token.Position
EndAt token.Position
}
func (e *SeqExpr) Pos() token.Position { return e.StartAt }
func (e *SeqExpr) End() token.Position { return e.EndAt }
func (e *SeqExpr) exprNode() {}
// HashLitExpr represents a literal hash: {"a" => 1, "b" => 2}
// Harbour: HB_ET_HASH
type HashLitExpr struct {

View File

@@ -283,5 +283,13 @@ func (g *Generator) walkExprIdents(expr ast.Expr, fn func(string)) {
g.walkExprIdents(e.Field, fn)
case *ast.BlockExpr:
g.walkExprIdents(e.Body, fn)
case *ast.SeqExpr:
// Comma-separated expressions inside a code block — recurse so
// every sub-expr's free variables are picked up for closure
// capture. Otherwise the second/third comma-statements would
// see uncaptured outer locals.
for _, item := range e.Items {
g.walkExprIdents(item, fn)
}
}
}

View File

@@ -312,6 +312,8 @@ func (v *constLocalVisitor) expr(e ast.Expr) {
v.abort()
case *ast.BlockExpr:
v.expr(x.Body)
case *ast.SeqExpr:
v.exprs(x.Items)
case *ast.ArrayLitExpr:
v.exprs(x.Items)
case *ast.HashLitExpr:

View File

@@ -842,6 +842,16 @@ func (g *Generator) emitExpr(expr ast.Expr) {
g.writeln(fmt.Sprintf("t.HashGen(%d)", len(e.Keys)))
case *ast.BlockExpr:
g.emitBlock(e)
case *ast.SeqExpr:
// Comma-separated expr list (`{|| e1, e2, e3 }`): emit each in
// order; pop the result of every expr except the last so only
// the final value remains on the stack as the seq's value.
for i, item := range e.Items {
g.emitExpr(item)
if i < len(e.Items)-1 {
g.writeln("t.Pop2()")
}
}
case *ast.SliceExpr:
// a[low:high] → hbrt.ArraySlice(array, low, high)
g.emitExpr(e.X)

View File

@@ -426,13 +426,24 @@ func (p *Parser) parseArrayOrBlock() ast.Expr {
}
// Parse block body — may have comma-separated expressions
// {|x| expr1, expr2} → comma = sequence, returns last value
body := p.parseExpr()
// {|x| expr1, expr2, expr3} → all evaluated in order, last is
// the return value. Earlier impl dropped intermediate exprs by
// overwriting `body`, which was a silent miscompile (any
// non-trailing side effect — e.g. `<v> := <v> + <x>` in a
// multi-pair SUM block — vanished).
first := p.parseExpr()
var seq []ast.Expr
for p.match(token.COMMA) {
// Comma-separated: wrap as sequence, keep last
body = p.parseExpr()
if seq == nil {
seq = []ast.Expr{first}
}
seq = append(seq, p.parseExpr())
}
rbrace := p.expect(token.RBRACE).Pos
var body ast.Expr = first
if seq != nil {
body = &ast.SeqExpr{Items: seq, StartAt: first.Pos(), EndAt: rbrace}
}
return &ast.BlockExpr{LBrace: lbrace, Params: params, Body: body, RBrace: rbrace}
}

View File

@@ -1218,12 +1218,17 @@ func (p *Parser) parseExprStmt() ast.Stmt {
p.peekAt(1) == token.TIMEOUT_KW {
return p.parseWithTimeout()
}
// Keep this list IN SYNC with parseIdentStmt's switch above.
// COPY/SORT/COUNT/SUM/AVERAGE/TOTAL/UPDATE/JOIN/DISPLAY/LIST
// are no longer here — std.ch rewrites them to function calls
// before the parser sees them. Leaving them in the fallback
// would silently no-op a typo'd version (e.g. `COPYY TO ...`)
// against the user's expectation.
switch p.currentUpper() {
case "COPY", "SORT", "COUNT", "SUM", "AVERAGE", "TOTAL", "UPDATE",
"LABEL", "REPORT", "ACCEPT", "INPUT",
"JOIN", "RELEASE", "SAVE", "RESTORE",
case "LABEL", "REPORT", "ACCEPT", "INPUT",
"RELEASE", "SAVE", "RESTORE",
"DIR", "STORE", "NOTE", "TEXT", "ENDTEXT",
"WITH", "CLEAR", "DISPLAY", "LIST":
"WITH", "CLEAR":
// Consume entire line — these are complex multi-word commands
p.advance()
for p.current.Kind != token.NEWLINE && p.current.Kind != token.EOF {

View File

@@ -540,21 +540,29 @@ func matchSegment(segment, lineWords []string, startLi int, caseSens bool, outer
default:
return nil, startLi, false
}
// Build a pseudo-pattern tail so captureExpression picks the
// right delimiters. Priority:
// Build a pseudo-pattern tail so captureExpression picks
// the right delimiters. Priority order (each level is
// merged, then captureExpression stops at *whichever*
// delimiter shows up first in the input):
// 1. Next literals inside the same segment.
// 2. Every literal in the outer-pattern tail — this is
// what stops `[TO <(f)>] [FIELDS ...] [FOR ...]` from
// letting `<(f)>` swallow a trailing FOR/WHILE/NEXT
// clause that happened to be present.
// 3. Repeat boundary (the segment's leading literal) so a
// multi-iteration capture stops before the next iter.
// 2. Every literal in the outer-pattern tail — what
// stops `[TO <(f)>] [FIELDS ...] [FOR ...]` from
// letting `<(f)>` swallow a trailing FOR/WHILE/...
// 3. Repeat boundary (the segment's leading literal)
// — needed for multi-iter `[, <xN>]` so each
// iteration's `<xN>` stops at the next ',' before
// the outer-tail's TO/FOR/etc. catches it.
tail := segment[pi+1:]
if !hasLiteralAfter(tail) {
combined := []string{}
if hasLiteralAfter(outerTail) {
tail = outerTail
} else if repeatBoundary != "" {
tail = []string{repeatBoundary}
combined = append(combined, outerTail...)
}
if repeatBoundary != "" {
combined = append(combined, repeatBoundary)
}
if len(combined) > 0 {
tail = combined
}
}
captured := captureExpression(lineWords, &li, tail, 0, caseSens)

View File

@@ -101,12 +101,18 @@ func (pp *Preprocessor) processLines(filename, source string, depth int) string
line := lines[i]
// `#command`/`#translate` directives that end with a trailing `;`
// continue on the next physical line — this is how harbour-core
// formats its std.ch rules. Join the continuation here so the
// directive parser sees one logical line. Only `#`-directives
// participate; user code uses `;` differently.
// formats its std.ch rules. Strip exactly one trailing `;` per
// iteration so Harbour's `;;` convention ("literal `;` plus
// continuation") survives: the inner `;` ends up as part of the
// joined directive, the outer one drives the continuation.
// Only `#`-directives participate; user code uses `;` differently.
if t := strings.TrimSpace(line); strings.HasPrefix(t, "#") {
for strings.HasSuffix(strings.TrimRight(line, " \t"), ";") && i+1 < len(lines) {
line = strings.TrimRight(line, " \t;") + " " + strings.TrimSpace(lines[i+1])
for i+1 < len(lines) {
trimmed := strings.TrimRight(line, " \t")
if !strings.HasSuffix(trimmed, ";") {
break
}
line = strings.TrimSuffix(trimmed, ";") + " " + strings.TrimSpace(lines[i+1])
i++
}
}

View File

@@ -45,16 +45,25 @@
expression SUM/AVERAGE (`SUM x, y TO sx, sy`) use optional-repeat
syntax in Harbour and can be added here once a real test exercises
the more elaborate form. */
#command COUNT [TO <v>] [FOR <for>] [WHILE <while>] ;
/* COUNT/SUM/AVERAGE require TO <var> — without it the rewrite
would produce naked assignment with no LHS. Match Harbour
std.ch which also makes TO non-optional. */
#command COUNT TO <v> [FOR <for>] [WHILE <while>] ;
[NEXT <next>] [RECORD <rec>] [<rest:REST>] [ALL] => ;
<v> := 0 ; dbEval( {|| <v> := <v> + 1 }, ;
<{for}>, <{while}>, <next>, <rec>, <.rest.> )
#command SUM <x> TO <v> ;
/* SUM and AVERAGE accept multiple paired expressions/destinations:
`SUM x, y, z TO sx, sy, sz`. The optional `[, <xN>]` and
`[, <vN>]` repeats are matched pairwise; the result template's
chained `<v1> :=[ <vN> :=] 0` and comma-list inside the dbEval
block expand once per extra pair. Single-pair usage is unchanged. */
#command SUM <x1> [, <xN>] TO <v1> [, <vN>] ;
[FOR <for>] [WHILE <while>] [NEXT <next>] ;
[RECORD <rec>] [<rest:REST>] [ALL] => ;
<v> := 0 ; dbEval( {|| <v> := <v> + <x> }, ;
<{for}>, <{while}>, <next>, <rec>, <.rest.> )
<v1> :=[ <vN> :=] 0 ; ;
dbEval( {|| <v1> := <v1> + <x1>[, <vN> := <vN> + <xN>] }, ;
<{for}>, <{while}>, <next>, <rec>, <.rest.> )
#command AVERAGE <x> TO <v> ;
[FOR <for>] [WHILE <while>] [NEXT <next>] ;
@@ -126,13 +135,17 @@
Both areas should be sorted on the key for the default forward-
walk; pass RANDOM to scan master from top for each detail key.
Note: ON <key> is wrapped as `_FIELD-><key>` rather than the bare
Note 1: ON <key> is wrapped as `_FIELD-><key>` rather than the bare
`<{key}>` Harbour uses, because the same block must evaluate
against both master and detail. Bare identifiers don't auto-bind
to fields under Five — `_FIELD->` makes the dispatch explicit. */
#command UPDATE [FROM <(alias)>] [ON <key>] [<rand:RANDOM>] ;
[REPLACE <f1> WITH <x1> ;
[, <fN> WITH <xN>]] => ;
to fields under Five — `_FIELD->` makes the dispatch explicit.
Note 2: FROM/ON/REPLACE are all required (Harbour technically allows
them in any order but every real call site provides all three). The
former optional brackets allowed compile-clean garbage like a bare
`UPDATE` to expand to a broken-syntax call. Keep them mandatory. */
#command UPDATE FROM <(alias)> ON <key> [<rand:RANDOM>] ;
REPLACE <f1> WITH <x1> [, <fN> WITH <xN>] => ;
__dbUpdate( <(alias)>, {|| _FIELD-><key> }, <.rand.>, ;
{|| _FIELD-><f1> := <x1>[, _FIELD-><fN> := <xN>] } )
@@ -145,6 +158,10 @@
#command KEYBOARD <text> => Keyboard(<text>)
#command RUN <*cmd*> => hb_Run(<(cmd)>)
/* --- legacy GET system --- */
#command MENU TO <var> => <var> := __MenuTo(<var>)
/* --- legacy GET system ---
MENU TO is intentionally absent: it requires the @ PROMPT statement
companion which Five doesn't implement. Adding the rule would let
user code compile and then panic at runtime on the missing
__MenuTo() symbol. Keep the parser's silent no-op for MENU TO until
@ PROMPT lands. */
#command CLEAR GETS => GetList := {}

View File

@@ -167,13 +167,22 @@ func (wm *WorkAreaManager) FindByAlias(alias string) uint16 {
return num
}
// SelectByAlias switches to a work area by alias name.
// Used by static alias context expressions: CUSTOMERS->(RecCount())
func (wm *WorkAreaManager) SelectByAlias(alias string) {
// SelectByAlias switches to a work area by alias name. Returns true
// when the alias resolved, false when nothing matched. Callers that
// intend to evaluate an expression in the named workarea (alias
// context: `FOO->(...)`) must check the return value — a missed
// alias used to silently leave the original WA selected, so a
// subsequent DbCloseArea/RecCount/etc. ran against the *current*
// workarea instead of the named one. That's the exact data-loss
// foot-gun that bit `CLOSE bad_alias`. WASaveAndSelectAlias now
// switches to the no-area sentinel (0) on miss so the inner
// expression sees Current() == nil and short-circuits cleanly.
func (wm *WorkAreaManager) SelectByAlias(alias string) bool {
num, ok := wm.aliases[strings.ToUpper(alias)]
if ok {
wm.current = num
}
return ok
}
// SelectByNum switches to a work area by number, silently allowing unused areas.

View File

@@ -737,12 +737,21 @@ func (t *Thread) WASaveAndSelect(areaNum int) {
func (t *Thread) WASaveAndSelectAlias(alias string) {
type waSel interface {
SelectByAlias(string)
SelectByAlias(string) bool
SelectByNum(uint16)
CurrentNum() uint16
}
if wam, ok := t.WA.(waSel); ok {
t.waStack = append(t.waStack, wam.CurrentNum())
wam.SelectByAlias(alias)
if !wam.SelectByAlias(alias) {
// Alias not open: switch to the no-area sentinel so the
// inner expression's Current() returns nil and ops like
// DbCloseArea/FieldGet/RecCount short-circuit. Without
// this, the inner expression silently runs against the
// originally-selected WA — which led to `CLOSE bad_alias`
// closing the *current* area.
wam.SelectByNum(0)
}
}
}