Six audit-driven blockers landed together because they're tangled:
* MENU TO removed from std.ch — the rule expanded to a call to a
nonexistent __MenuTo() RTL symbol, so any user code with `MENU
TO choice` compiled clean and panicked at runtime. Behavior
pre-this-round was a parser silent no-op, which is at least
consistent. Restore that until @ PROMPT (the companion command)
actually lands.
* COUNT now requires `TO <var>`. The earlier `[TO <v>]` optional
bracket was a Harbour-pattern transcription error: the result
template references `<v>` unconditionally, so a bare `COUNT`
expanded to ungrammatical ` := 0 ; dbEval(...)` and the
PRG parser rejected it. Match Harbour's std.ch which makes TO
mandatory.
* UPDATE FROM ... REPLACE now requires `FROM`/`ON`/`REPLACE` all
three. Same root cause as COUNT: the result template uses
`<key>`, `<f1>`, `<x1>` unconditionally; missing any of them
produced broken syntax. Tightened to fail loudly rather than
silently mis-expand.
* CLOSE <unknown_alias> no longer closes the *current* workarea.
SelectByAlias was a silent no-op when the alias was missing,
leaving WASaveAndSelectAlias to evaluate the inner DbCloseArea()
against the originally-selected WA — a real data-loss footgun.
SelectByAlias now returns bool; WASaveAndSelectAlias switches to
the no-area sentinel (0) on miss so the inner expression's
Current() returns nil and short-circuits.
* SUM <x1>, <xN> TO <v1>, <vN> — multi-pair form supported.
Required two pieces:
1. matchSegment's regular-marker stop-boundary now combines
outerTail literals AND the segment's repeat boundary so
`[, <xN>]` doesn't let `<xN>` swallow past the next ','.
2. **Five parser miscompiled comma-separated expressions in
code blocks.** `{|| e1, e2, e3 }` kept only the last expr
and threw away earlier ones at *AST level*, so all their
side effects vanished. New SeqExpr AST node + emitter
(emit each, pop intermediate results) + folding/walk
updates fix the underlying bug, which also unbreaks any
other block that relied on comma sequencing.
* pp.go's `;` continuation joiner now strips exactly one trailing
`;` per iteration, preserving Harbour's `;;` convention (literal
`;` followed by a continuation marker). Without this the SUM
rule's chained `<v1> :=[ <vN> :=] 0 ; ; dbEval(...)` collapsed
to a missing statement separator.
* parseExprStmt's xBase fallback switch is back in sync with
parseIdentStmt — COPY/SORT/COUNT/SUM/AVERAGE/TOTAL/UPDATE/JOIN/
DISPLAY/LIST removed (std.ch handles all of them now). Leaving
them in the fallback masked typos as silent no-ops.
Gates green:
go test ./... : PASS
FiveSql2 SQL:1999 : 43/43
Harbour compat : 56/56
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
459 lines
12 KiB
Go
459 lines
12 KiB
Go
// Copyright (c) 2026 Charles KWON OhJun (charleskwonohjun@gmail.com)
|
||
// All rights reserved.
|
||
|
||
// Constant folding and const-local propagation.
|
||
//
|
||
// Two passes cooperate at compile time so the generator emits smaller,
|
||
// warmer Go code:
|
||
//
|
||
// - foldLiteralTree / tryFoldBinary / negateLiteral: collapse binary
|
||
// expressions on literal operands into a single LiteralExpr. Handles
|
||
// int+int/−/×, string+string concatenation, and left-leaning
|
||
// `"a"+x+"b"+"c"` chain reassociation. Overflow bails out so the VM
|
||
// coerces to double.
|
||
//
|
||
// - collectConstLocals + constLocalVisitor: identifies LOCALs assigned
|
||
// exactly once with a literal initialiser. At emitIdent time those
|
||
// names are replaced by the literal so downstream folding (dead IF,
|
||
// AND/OR short-circuit, FOR step fusion) can fire on what was a
|
||
// variable reference. The walker is conservative — any unrecognised
|
||
// AST node aborts the pass so a hidden write can't sneak through.
|
||
|
||
package gengo
|
||
|
||
import (
|
||
"five/compiler/ast"
|
||
"five/compiler/token"
|
||
"strconv"
|
||
"strings"
|
||
)
|
||
|
||
func negateLiteral(lit *ast.LiteralExpr) (*ast.LiteralExpr, bool) {
|
||
switch lit.Kind {
|
||
case token.INT:
|
||
n, err := strconv.ParseInt(lit.Value, 10, 64)
|
||
if err != nil {
|
||
return nil, false
|
||
}
|
||
// Guard: math.MinInt64 has no positive twin — let the VM's
|
||
// runtime coerce-to-double path handle it.
|
||
if n == -1<<63 {
|
||
return nil, false
|
||
}
|
||
return &ast.LiteralExpr{
|
||
ValuePos: lit.ValuePos,
|
||
Kind: token.INT,
|
||
Value: strconv.FormatInt(-n, 10),
|
||
}, true
|
||
case token.DOUBLE:
|
||
// Syntactically prefix `-` or flip an existing leading `-`.
|
||
if strings.HasPrefix(lit.Value, "-") {
|
||
return &ast.LiteralExpr{
|
||
ValuePos: lit.ValuePos,
|
||
Kind: token.DOUBLE,
|
||
Value: lit.Value[1:],
|
||
}, true
|
||
}
|
||
return &ast.LiteralExpr{
|
||
ValuePos: lit.ValuePos,
|
||
Kind: token.DOUBLE,
|
||
Value: "-" + lit.Value,
|
||
}, true
|
||
}
|
||
return nil, false
|
||
}
|
||
|
||
// foldLiteralTree recursively folds BinaryExpr subtrees into LiteralExpr
|
||
// where both operands eventually collapse to literals. Non-foldable
|
||
// subtrees come back unchanged. Used as a preorder pre-pass so the
|
||
// caller can look at a flat LITERAL + LITERAL pair.
|
||
//
|
||
// For left-associative string-concat chains like "a" + x + "b" + "c",
|
||
// the parser builds (((("a" + x) + "b") + "c")) and no pair is
|
||
// literal+literal. We reassociate: if the LHS is `Y + strlit` and the
|
||
// RHS is a string literal, rewrite as `Y + (strlit+rhslit)` so the
|
||
// tail literals collapse. Only safe for STRING+STRING (numeric `+`
|
||
// cares about types / overflow).
|
||
func foldLiteralTree(e ast.Expr) ast.Expr {
|
||
be, ok := e.(*ast.BinaryExpr)
|
||
if !ok {
|
||
return e
|
||
}
|
||
be.Left = foldLiteralTree(be.Left)
|
||
be.Right = foldLiteralTree(be.Right)
|
||
if folded, ok := tryFoldBinary(be); ok {
|
||
return folded
|
||
}
|
||
// String-concat reassociation for left-leaning chains.
|
||
if be.Op == token.PLUS {
|
||
if rLit, ok := be.Right.(*ast.LiteralExpr); ok && rLit.Kind == token.STRING {
|
||
if lBin, ok := be.Left.(*ast.BinaryExpr); ok && lBin.Op == token.PLUS {
|
||
if mLit, ok := lBin.Right.(*ast.LiteralExpr); ok && mLit.Kind == token.STRING {
|
||
fused := &ast.LiteralExpr{
|
||
ValuePos: mLit.ValuePos,
|
||
Kind: token.STRING,
|
||
Value: mLit.Value + rLit.Value,
|
||
}
|
||
return &ast.BinaryExpr{
|
||
OpPos: be.OpPos,
|
||
Op: token.PLUS,
|
||
Left: lBin.Left,
|
||
Right: fused,
|
||
}
|
||
}
|
||
}
|
||
}
|
||
}
|
||
return be
|
||
}
|
||
|
||
// tryFoldBinary returns a synthetic LiteralExpr when both operands of a
|
||
// BinaryExpr are themselves literals and the operator is one the
|
||
// folder recognises. INT+INT stays INT (with overflow falling through
|
||
// to the VM path), mixed numeric falls to double, STRING+STRING
|
||
// concatenates. Non-literal operands or unsupported op → (nil, false).
|
||
func tryFoldBinary(e *ast.BinaryExpr) (*ast.LiteralExpr, bool) {
|
||
l, lok := e.Left.(*ast.LiteralExpr)
|
||
r, rok := e.Right.(*ast.LiteralExpr)
|
||
if !lok || !rok {
|
||
return nil, false
|
||
}
|
||
switch e.Op {
|
||
case token.PLUS, token.MINUS, token.STAR, token.SLASH:
|
||
default:
|
||
return nil, false
|
||
}
|
||
// INT + INT — keep int exact result.
|
||
if l.Kind == token.INT && r.Kind == token.INT {
|
||
li, errL := strconv.ParseInt(l.Value, 10, 64)
|
||
ri, errR := strconv.ParseInt(r.Value, 10, 64)
|
||
if errL != nil || errR != nil {
|
||
return nil, false
|
||
}
|
||
var result int64
|
||
var overflowed bool
|
||
switch e.Op {
|
||
case token.PLUS:
|
||
result = li + ri
|
||
// Harbour overflow discipline: fall through to VM on overflow
|
||
if (ri >= 0 && result < li) || (ri < 0 && result > li) {
|
||
overflowed = true
|
||
}
|
||
case token.MINUS:
|
||
result = li - ri
|
||
if (ri <= 0 && result < li) || (ri > 0 && result > li) {
|
||
overflowed = true
|
||
}
|
||
case token.STAR:
|
||
if li == 0 || ri == 0 {
|
||
result = 0
|
||
} else {
|
||
result = li * ri
|
||
if result/li != ri {
|
||
overflowed = true
|
||
}
|
||
}
|
||
case token.SLASH:
|
||
// Harbour SLASH always yields double even for int inputs.
|
||
return nil, false
|
||
}
|
||
if overflowed {
|
||
return nil, false
|
||
}
|
||
return &ast.LiteralExpr{
|
||
ValuePos: l.ValuePos,
|
||
Kind: token.INT,
|
||
Value: strconv.FormatInt(result, 10),
|
||
}, true
|
||
}
|
||
// STRING + STRING — concatenate. Preserves the quoting style of the
|
||
// left literal so DateExpr and other quoting-sensitive kinds don't
|
||
// change shape.
|
||
if e.Op == token.PLUS && l.Kind == token.STRING && r.Kind == token.STRING {
|
||
return &ast.LiteralExpr{
|
||
ValuePos: l.ValuePos,
|
||
Kind: token.STRING,
|
||
Value: l.Value + r.Value,
|
||
}, true
|
||
}
|
||
return nil, false
|
||
}
|
||
|
||
// collectConstLocals returns a map of LOCAL names (uppercase) whose
|
||
// only assignment is a literal initializer — these can be propagated
|
||
// inline. Any reassignment, ++/--, += family, @byref, MultiAssignStmt
|
||
// target, FOR/FOREACH loop var, or AtGet target disqualifies the name.
|
||
//
|
||
// The walker is bounded: if it encounters a macro expansion or any
|
||
// AST node it doesn't recognise, it aborts and returns an empty map.
|
||
// Correctness trumps coverage — an unrecognised node might hide a
|
||
// write, so we refuse to propagate.
|
||
func collectConstLocals(fn *ast.FuncDecl) map[string]*ast.LiteralExpr {
|
||
v := &constLocalVisitor{
|
||
candidates: map[string]*ast.LiteralExpr{},
|
||
}
|
||
// Seed candidates from top-level LOCAL decls with literal init.
|
||
for _, d := range fn.Decls {
|
||
vd, ok := d.(*ast.VarDecl)
|
||
if !ok || vd.Scope != ast.ScopeLocal {
|
||
continue
|
||
}
|
||
for _, vi := range vd.Vars {
|
||
if vi.Init == nil {
|
||
continue
|
||
}
|
||
if lit, ok := vi.Init.(*ast.LiteralExpr); ok {
|
||
v.candidates[strings.ToUpper(vi.Name)] = lit
|
||
}
|
||
}
|
||
}
|
||
if len(v.candidates) == 0 {
|
||
return nil
|
||
}
|
||
// Params are writable even without explicit assignment (by-value
|
||
// but reassignable) — disqualify any candidate that shadows a param.
|
||
// Params come from a separate slot but guard in case of odd decls.
|
||
for _, p := range fn.Params {
|
||
delete(v.candidates, strings.ToUpper(p.Name))
|
||
}
|
||
for _, st := range fn.Body {
|
||
v.stmt(st)
|
||
if v.aborted {
|
||
return nil
|
||
}
|
||
}
|
||
if len(v.candidates) == 0 {
|
||
return nil
|
||
}
|
||
return v.candidates
|
||
}
|
||
|
||
type constLocalVisitor struct {
|
||
candidates map[string]*ast.LiteralExpr
|
||
aborted bool
|
||
}
|
||
|
||
func (v *constLocalVisitor) abort() {
|
||
v.aborted = true
|
||
v.candidates = nil
|
||
}
|
||
|
||
func (v *constLocalVisitor) writeIdent(e ast.Expr) {
|
||
if id, ok := e.(*ast.IdentExpr); ok {
|
||
delete(v.candidates, strings.ToUpper(id.Name))
|
||
}
|
||
}
|
||
|
||
func (v *constLocalVisitor) writeName(name string) {
|
||
delete(v.candidates, strings.ToUpper(name))
|
||
}
|
||
|
||
func (v *constLocalVisitor) exprs(es []ast.Expr) {
|
||
for _, e := range es {
|
||
v.expr(e)
|
||
}
|
||
}
|
||
|
||
func (v *constLocalVisitor) stmts(ss []ast.Stmt) {
|
||
for _, s := range ss {
|
||
v.stmt(s)
|
||
}
|
||
}
|
||
|
||
func (v *constLocalVisitor) expr(e ast.Expr) {
|
||
if v.aborted || e == nil {
|
||
return
|
||
}
|
||
switch x := e.(type) {
|
||
case *ast.LiteralExpr, *ast.IdentExpr, *ast.SelfExpr:
|
||
// leaf; reads don't disqualify
|
||
case *ast.BinaryExpr:
|
||
v.expr(x.Left)
|
||
v.expr(x.Right)
|
||
case *ast.UnaryExpr:
|
||
if x.Op == token.INC || x.Op == token.DEC {
|
||
v.writeIdent(x.X)
|
||
}
|
||
v.expr(x.X)
|
||
case *ast.PostfixExpr:
|
||
v.writeIdent(x.X)
|
||
v.expr(x.X)
|
||
case *ast.AssignExpr:
|
||
// All assign ops (:= += -= *= /= %= ^=) are writes to Left's
|
||
// outer ident. Compound assigns also read, but disqualification
|
||
// is based on being written at all.
|
||
v.writeIdent(x.Left)
|
||
// Still walk Left in case of indexing: arr[i] := v — the ident
|
||
// arr is read (and we don't want to accidentally treat it as a
|
||
// write since writeIdent only triggers on a bare IdentExpr).
|
||
if _, isIdent := x.Left.(*ast.IdentExpr); !isIdent {
|
||
v.expr(x.Left)
|
||
}
|
||
v.expr(x.Right)
|
||
case *ast.CallExpr:
|
||
v.expr(x.Func)
|
||
v.exprs(x.Args)
|
||
case *ast.DotExpr:
|
||
v.expr(x.X)
|
||
case *ast.SendExpr:
|
||
v.expr(x.Object)
|
||
if x.MacroMethod != nil {
|
||
v.expr(x.MacroMethod)
|
||
}
|
||
v.exprs(x.Args)
|
||
case *ast.IndexExpr:
|
||
v.expr(x.X)
|
||
v.expr(x.Index)
|
||
case *ast.AliasExpr:
|
||
v.expr(x.Alias)
|
||
v.expr(x.Field)
|
||
case *ast.MacroExpr:
|
||
// Macros can expand to any name including writes. Bail.
|
||
v.abort()
|
||
case *ast.BlockExpr:
|
||
v.expr(x.Body)
|
||
case *ast.SeqExpr:
|
||
v.exprs(x.Items)
|
||
case *ast.ArrayLitExpr:
|
||
v.exprs(x.Items)
|
||
case *ast.HashLitExpr:
|
||
v.exprs(x.Keys)
|
||
v.exprs(x.Values)
|
||
case *ast.IIfExpr:
|
||
v.expr(x.Cond)
|
||
v.expr(x.True)
|
||
v.expr(x.False)
|
||
case *ast.RefExpr:
|
||
// @ident — passes by reference; callee may mutate.
|
||
v.writeIdent(x.X)
|
||
v.expr(x.X)
|
||
case *ast.SliceExpr:
|
||
v.expr(x.X)
|
||
v.expr(x.Low)
|
||
v.expr(x.High)
|
||
case *ast.NilSafeExpr:
|
||
v.expr(x.X)
|
||
case *ast.InterpolatedString:
|
||
v.exprs(x.Parts)
|
||
default:
|
||
v.abort()
|
||
}
|
||
}
|
||
|
||
func (v *constLocalVisitor) stmt(s ast.Stmt) {
|
||
if v.aborted || s == nil {
|
||
return
|
||
}
|
||
switch x := s.(type) {
|
||
case *ast.ExprStmt:
|
||
v.expr(x.X)
|
||
case *ast.ReturnStmt:
|
||
v.expr(x.Value)
|
||
case *ast.QOutStmt:
|
||
v.exprs(x.Exprs)
|
||
case *ast.IfStmt:
|
||
v.expr(x.Cond)
|
||
v.stmts(x.Body)
|
||
for _, ei := range x.ElseIfs {
|
||
v.expr(ei.Cond)
|
||
v.stmts(ei.Body)
|
||
}
|
||
v.stmts(x.ElseBody)
|
||
case *ast.DoWhileStmt:
|
||
v.expr(x.Cond)
|
||
v.stmts(x.Body)
|
||
case *ast.ForStmt:
|
||
v.writeName(x.Var)
|
||
v.expr(x.Start)
|
||
v.expr(x.To)
|
||
v.expr(x.Step)
|
||
v.stmts(x.Body)
|
||
case *ast.ForEachStmt:
|
||
v.writeName(x.Var)
|
||
v.expr(x.Collection)
|
||
v.stmts(x.Body)
|
||
case *ast.SwitchStmt:
|
||
v.expr(x.Expr)
|
||
for _, c := range x.Cases {
|
||
v.expr(c.Value)
|
||
v.stmts(c.Body)
|
||
}
|
||
v.stmts(x.Otherwise)
|
||
case *ast.SeqStmt:
|
||
v.stmts(x.Body)
|
||
if x.RecoverVar != "" {
|
||
v.writeName(x.RecoverVar)
|
||
}
|
||
v.stmts(x.RecoverBody)
|
||
case *ast.MultiAssignStmt:
|
||
for _, t := range x.Targets {
|
||
v.writeName(t)
|
||
}
|
||
v.exprs(x.Values)
|
||
case *ast.VarDecl:
|
||
// Init exprs are reads. The LOCAL name itself was already
|
||
// collected as a candidate by collectConstLocals; we don't
|
||
// treat its own init as a reassignment.
|
||
for _, vi := range x.Vars {
|
||
v.expr(vi.Init)
|
||
}
|
||
case *ast.DeferStmt:
|
||
v.expr(x.Call)
|
||
case *ast.ExitStmt, *ast.LoopStmt:
|
||
// no expression
|
||
case *ast.SkipCmd:
|
||
v.expr(x.Count)
|
||
case *ast.GoCmd:
|
||
v.expr(x.RecNo)
|
||
case *ast.SeekCmd:
|
||
v.expr(x.Key)
|
||
case *ast.UseCmd:
|
||
v.expr(x.File)
|
||
v.expr(x.AliasExpr)
|
||
case *ast.SelectCmd:
|
||
v.expr(x.Area)
|
||
case *ast.ReplaceCmd:
|
||
for _, f := range x.Fields {
|
||
v.expr(f.Field)
|
||
v.expr(f.Value)
|
||
}
|
||
case *ast.AppendCmd, *ast.DeleteCmd, *ast.ReadCmd:
|
||
// no expressions
|
||
case *ast.IndexCmd:
|
||
v.expr(x.KeyExpr)
|
||
v.expr(x.File)
|
||
v.expr(x.ForCond)
|
||
case *ast.SetCmd:
|
||
v.expr(x.Expr)
|
||
case *ast.AtSayCmd:
|
||
v.expr(x.Row)
|
||
v.expr(x.Col)
|
||
v.expr(x.SayExpr)
|
||
v.expr(x.Picture)
|
||
case *ast.AtGetCmd:
|
||
// @ GET var writes to Var at READ time.
|
||
v.writeIdent(x.Var)
|
||
if x.VarName != "" {
|
||
v.writeName(x.VarName)
|
||
}
|
||
v.expr(x.Row)
|
||
v.expr(x.Col)
|
||
v.expr(x.Picture)
|
||
v.expr(x.Valid)
|
||
v.expr(x.When)
|
||
case *ast.AtSayGetCmd:
|
||
v.writeIdent(x.Var)
|
||
if x.VarName != "" {
|
||
v.writeName(x.VarName)
|
||
}
|
||
v.expr(x.Row)
|
||
v.expr(x.Col)
|
||
v.expr(x.SayExpr)
|
||
v.expr(x.Picture)
|
||
v.expr(x.Valid)
|
||
v.expr(x.When)
|
||
default:
|
||
v.abort()
|
||
}
|
||
}
|