Files
five/compiler/gengo/folding.go
CharlesKWON 000500e034 fix(pp,parser,gengo): pre-release blocker round (Wave 1)
Six audit-driven blockers landed together because they're tangled:

  * MENU TO removed from std.ch — the rule expanded to a call to a
    nonexistent __MenuTo() RTL symbol, so any user code with `MENU
    TO choice` compiled clean and panicked at runtime. Behavior
    pre-this-round was a parser silent no-op, which is at least
    consistent. Restore that until @ PROMPT (the companion command)
    actually lands.

  * COUNT now requires `TO <var>`. The earlier `[TO <v>]` optional
    bracket was a Harbour-pattern transcription error: the result
    template references `<v>` unconditionally, so a bare `COUNT`
    expanded to ungrammatical ` := 0 ; dbEval(...)` and the
    PRG parser rejected it. Match Harbour's std.ch which makes TO
    mandatory.

  * UPDATE FROM ... REPLACE now requires `FROM`/`ON`/`REPLACE` all
    three. Same root cause as COUNT: the result template uses
    `<key>`, `<f1>`, `<x1>` unconditionally; missing any of them
    produced broken syntax. Tightened to fail loudly rather than
    silently mis-expand.

  * CLOSE <unknown_alias> no longer closes the *current* workarea.
    SelectByAlias was a silent no-op when the alias was missing,
    leaving WASaveAndSelectAlias to evaluate the inner DbCloseArea()
    against the originally-selected WA — a real data-loss footgun.
    SelectByAlias now returns bool; WASaveAndSelectAlias switches to
    the no-area sentinel (0) on miss so the inner expression's
    Current() returns nil and short-circuits.

  * SUM <x1>, <xN> TO <v1>, <vN> — multi-pair form supported.
    Required two pieces:

       1. matchSegment's regular-marker stop-boundary now combines
          outerTail literals AND the segment's repeat boundary so
          `[, <xN>]` doesn't let `<xN>` swallow past the next ','.

       2. **Five parser miscompiled comma-separated expressions in
          code blocks.** `{|| e1, e2, e3 }` kept only the last expr
          and threw away earlier ones at *AST level*, so all their
          side effects vanished. New SeqExpr AST node + emitter
          (emit each, pop intermediate results) + folding/walk
          updates fix the underlying bug, which also unbreaks any
          other block that relied on comma sequencing.

  * pp.go's `;` continuation joiner now strips exactly one trailing
    `;` per iteration, preserving Harbour's `;;` convention (literal
    `;` followed by a continuation marker). Without this the SUM
    rule's chained `<v1> :=[ <vN> :=] 0 ; ; dbEval(...)` collapsed
    to a missing statement separator.

  * parseExprStmt's xBase fallback switch is back in sync with
    parseIdentStmt — COPY/SORT/COUNT/SUM/AVERAGE/TOTAL/UPDATE/JOIN/
    DISPLAY/LIST removed (std.ch handles all of them now). Leaving
    them in the fallback masked typos as silent no-ops.

Gates green:
  go test ./...      : PASS
  FiveSql2 SQL:1999  : 43/43
  Harbour compat     : 56/56

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-01 07:45:20 +09:00

459 lines
12 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
// Copyright (c) 2026 Charles KWON OhJun (charleskwonohjun@gmail.com)
// All rights reserved.
// Constant folding and const-local propagation.
//
// Two passes cooperate at compile time so the generator emits smaller,
// warmer Go code:
//
// - foldLiteralTree / tryFoldBinary / negateLiteral: collapse binary
// expressions on literal operands into a single LiteralExpr. Handles
// int+int//×, string+string concatenation, and left-leaning
// `"a"+x+"b"+"c"` chain reassociation. Overflow bails out so the VM
// coerces to double.
//
// - collectConstLocals + constLocalVisitor: identifies LOCALs assigned
// exactly once with a literal initialiser. At emitIdent time those
// names are replaced by the literal so downstream folding (dead IF,
// AND/OR short-circuit, FOR step fusion) can fire on what was a
// variable reference. The walker is conservative — any unrecognised
// AST node aborts the pass so a hidden write can't sneak through.
package gengo
import (
"five/compiler/ast"
"five/compiler/token"
"strconv"
"strings"
)
func negateLiteral(lit *ast.LiteralExpr) (*ast.LiteralExpr, bool) {
switch lit.Kind {
case token.INT:
n, err := strconv.ParseInt(lit.Value, 10, 64)
if err != nil {
return nil, false
}
// Guard: math.MinInt64 has no positive twin — let the VM's
// runtime coerce-to-double path handle it.
if n == -1<<63 {
return nil, false
}
return &ast.LiteralExpr{
ValuePos: lit.ValuePos,
Kind: token.INT,
Value: strconv.FormatInt(-n, 10),
}, true
case token.DOUBLE:
// Syntactically prefix `-` or flip an existing leading `-`.
if strings.HasPrefix(lit.Value, "-") {
return &ast.LiteralExpr{
ValuePos: lit.ValuePos,
Kind: token.DOUBLE,
Value: lit.Value[1:],
}, true
}
return &ast.LiteralExpr{
ValuePos: lit.ValuePos,
Kind: token.DOUBLE,
Value: "-" + lit.Value,
}, true
}
return nil, false
}
// foldLiteralTree recursively folds BinaryExpr subtrees into LiteralExpr
// where both operands eventually collapse to literals. Non-foldable
// subtrees come back unchanged. Used as a preorder pre-pass so the
// caller can look at a flat LITERAL + LITERAL pair.
//
// For left-associative string-concat chains like "a" + x + "b" + "c",
// the parser builds (((("a" + x) + "b") + "c")) and no pair is
// literal+literal. We reassociate: if the LHS is `Y + strlit` and the
// RHS is a string literal, rewrite as `Y + (strlit+rhslit)` so the
// tail literals collapse. Only safe for STRING+STRING (numeric `+`
// cares about types / overflow).
func foldLiteralTree(e ast.Expr) ast.Expr {
be, ok := e.(*ast.BinaryExpr)
if !ok {
return e
}
be.Left = foldLiteralTree(be.Left)
be.Right = foldLiteralTree(be.Right)
if folded, ok := tryFoldBinary(be); ok {
return folded
}
// String-concat reassociation for left-leaning chains.
if be.Op == token.PLUS {
if rLit, ok := be.Right.(*ast.LiteralExpr); ok && rLit.Kind == token.STRING {
if lBin, ok := be.Left.(*ast.BinaryExpr); ok && lBin.Op == token.PLUS {
if mLit, ok := lBin.Right.(*ast.LiteralExpr); ok && mLit.Kind == token.STRING {
fused := &ast.LiteralExpr{
ValuePos: mLit.ValuePos,
Kind: token.STRING,
Value: mLit.Value + rLit.Value,
}
return &ast.BinaryExpr{
OpPos: be.OpPos,
Op: token.PLUS,
Left: lBin.Left,
Right: fused,
}
}
}
}
}
return be
}
// tryFoldBinary returns a synthetic LiteralExpr when both operands of a
// BinaryExpr are themselves literals and the operator is one the
// folder recognises. INT+INT stays INT (with overflow falling through
// to the VM path), mixed numeric falls to double, STRING+STRING
// concatenates. Non-literal operands or unsupported op → (nil, false).
func tryFoldBinary(e *ast.BinaryExpr) (*ast.LiteralExpr, bool) {
l, lok := e.Left.(*ast.LiteralExpr)
r, rok := e.Right.(*ast.LiteralExpr)
if !lok || !rok {
return nil, false
}
switch e.Op {
case token.PLUS, token.MINUS, token.STAR, token.SLASH:
default:
return nil, false
}
// INT + INT — keep int exact result.
if l.Kind == token.INT && r.Kind == token.INT {
li, errL := strconv.ParseInt(l.Value, 10, 64)
ri, errR := strconv.ParseInt(r.Value, 10, 64)
if errL != nil || errR != nil {
return nil, false
}
var result int64
var overflowed bool
switch e.Op {
case token.PLUS:
result = li + ri
// Harbour overflow discipline: fall through to VM on overflow
if (ri >= 0 && result < li) || (ri < 0 && result > li) {
overflowed = true
}
case token.MINUS:
result = li - ri
if (ri <= 0 && result < li) || (ri > 0 && result > li) {
overflowed = true
}
case token.STAR:
if li == 0 || ri == 0 {
result = 0
} else {
result = li * ri
if result/li != ri {
overflowed = true
}
}
case token.SLASH:
// Harbour SLASH always yields double even for int inputs.
return nil, false
}
if overflowed {
return nil, false
}
return &ast.LiteralExpr{
ValuePos: l.ValuePos,
Kind: token.INT,
Value: strconv.FormatInt(result, 10),
}, true
}
// STRING + STRING — concatenate. Preserves the quoting style of the
// left literal so DateExpr and other quoting-sensitive kinds don't
// change shape.
if e.Op == token.PLUS && l.Kind == token.STRING && r.Kind == token.STRING {
return &ast.LiteralExpr{
ValuePos: l.ValuePos,
Kind: token.STRING,
Value: l.Value + r.Value,
}, true
}
return nil, false
}
// collectConstLocals returns a map of LOCAL names (uppercase) whose
// only assignment is a literal initializer — these can be propagated
// inline. Any reassignment, ++/--, += family, @byref, MultiAssignStmt
// target, FOR/FOREACH loop var, or AtGet target disqualifies the name.
//
// The walker is bounded: if it encounters a macro expansion or any
// AST node it doesn't recognise, it aborts and returns an empty map.
// Correctness trumps coverage — an unrecognised node might hide a
// write, so we refuse to propagate.
func collectConstLocals(fn *ast.FuncDecl) map[string]*ast.LiteralExpr {
v := &constLocalVisitor{
candidates: map[string]*ast.LiteralExpr{},
}
// Seed candidates from top-level LOCAL decls with literal init.
for _, d := range fn.Decls {
vd, ok := d.(*ast.VarDecl)
if !ok || vd.Scope != ast.ScopeLocal {
continue
}
for _, vi := range vd.Vars {
if vi.Init == nil {
continue
}
if lit, ok := vi.Init.(*ast.LiteralExpr); ok {
v.candidates[strings.ToUpper(vi.Name)] = lit
}
}
}
if len(v.candidates) == 0 {
return nil
}
// Params are writable even without explicit assignment (by-value
// but reassignable) — disqualify any candidate that shadows a param.
// Params come from a separate slot but guard in case of odd decls.
for _, p := range fn.Params {
delete(v.candidates, strings.ToUpper(p.Name))
}
for _, st := range fn.Body {
v.stmt(st)
if v.aborted {
return nil
}
}
if len(v.candidates) == 0 {
return nil
}
return v.candidates
}
type constLocalVisitor struct {
candidates map[string]*ast.LiteralExpr
aborted bool
}
func (v *constLocalVisitor) abort() {
v.aborted = true
v.candidates = nil
}
func (v *constLocalVisitor) writeIdent(e ast.Expr) {
if id, ok := e.(*ast.IdentExpr); ok {
delete(v.candidates, strings.ToUpper(id.Name))
}
}
func (v *constLocalVisitor) writeName(name string) {
delete(v.candidates, strings.ToUpper(name))
}
func (v *constLocalVisitor) exprs(es []ast.Expr) {
for _, e := range es {
v.expr(e)
}
}
func (v *constLocalVisitor) stmts(ss []ast.Stmt) {
for _, s := range ss {
v.stmt(s)
}
}
func (v *constLocalVisitor) expr(e ast.Expr) {
if v.aborted || e == nil {
return
}
switch x := e.(type) {
case *ast.LiteralExpr, *ast.IdentExpr, *ast.SelfExpr:
// leaf; reads don't disqualify
case *ast.BinaryExpr:
v.expr(x.Left)
v.expr(x.Right)
case *ast.UnaryExpr:
if x.Op == token.INC || x.Op == token.DEC {
v.writeIdent(x.X)
}
v.expr(x.X)
case *ast.PostfixExpr:
v.writeIdent(x.X)
v.expr(x.X)
case *ast.AssignExpr:
// All assign ops (:= += -= *= /= %= ^=) are writes to Left's
// outer ident. Compound assigns also read, but disqualification
// is based on being written at all.
v.writeIdent(x.Left)
// Still walk Left in case of indexing: arr[i] := v — the ident
// arr is read (and we don't want to accidentally treat it as a
// write since writeIdent only triggers on a bare IdentExpr).
if _, isIdent := x.Left.(*ast.IdentExpr); !isIdent {
v.expr(x.Left)
}
v.expr(x.Right)
case *ast.CallExpr:
v.expr(x.Func)
v.exprs(x.Args)
case *ast.DotExpr:
v.expr(x.X)
case *ast.SendExpr:
v.expr(x.Object)
if x.MacroMethod != nil {
v.expr(x.MacroMethod)
}
v.exprs(x.Args)
case *ast.IndexExpr:
v.expr(x.X)
v.expr(x.Index)
case *ast.AliasExpr:
v.expr(x.Alias)
v.expr(x.Field)
case *ast.MacroExpr:
// Macros can expand to any name including writes. Bail.
v.abort()
case *ast.BlockExpr:
v.expr(x.Body)
case *ast.SeqExpr:
v.exprs(x.Items)
case *ast.ArrayLitExpr:
v.exprs(x.Items)
case *ast.HashLitExpr:
v.exprs(x.Keys)
v.exprs(x.Values)
case *ast.IIfExpr:
v.expr(x.Cond)
v.expr(x.True)
v.expr(x.False)
case *ast.RefExpr:
// @ident — passes by reference; callee may mutate.
v.writeIdent(x.X)
v.expr(x.X)
case *ast.SliceExpr:
v.expr(x.X)
v.expr(x.Low)
v.expr(x.High)
case *ast.NilSafeExpr:
v.expr(x.X)
case *ast.InterpolatedString:
v.exprs(x.Parts)
default:
v.abort()
}
}
func (v *constLocalVisitor) stmt(s ast.Stmt) {
if v.aborted || s == nil {
return
}
switch x := s.(type) {
case *ast.ExprStmt:
v.expr(x.X)
case *ast.ReturnStmt:
v.expr(x.Value)
case *ast.QOutStmt:
v.exprs(x.Exprs)
case *ast.IfStmt:
v.expr(x.Cond)
v.stmts(x.Body)
for _, ei := range x.ElseIfs {
v.expr(ei.Cond)
v.stmts(ei.Body)
}
v.stmts(x.ElseBody)
case *ast.DoWhileStmt:
v.expr(x.Cond)
v.stmts(x.Body)
case *ast.ForStmt:
v.writeName(x.Var)
v.expr(x.Start)
v.expr(x.To)
v.expr(x.Step)
v.stmts(x.Body)
case *ast.ForEachStmt:
v.writeName(x.Var)
v.expr(x.Collection)
v.stmts(x.Body)
case *ast.SwitchStmt:
v.expr(x.Expr)
for _, c := range x.Cases {
v.expr(c.Value)
v.stmts(c.Body)
}
v.stmts(x.Otherwise)
case *ast.SeqStmt:
v.stmts(x.Body)
if x.RecoverVar != "" {
v.writeName(x.RecoverVar)
}
v.stmts(x.RecoverBody)
case *ast.MultiAssignStmt:
for _, t := range x.Targets {
v.writeName(t)
}
v.exprs(x.Values)
case *ast.VarDecl:
// Init exprs are reads. The LOCAL name itself was already
// collected as a candidate by collectConstLocals; we don't
// treat its own init as a reassignment.
for _, vi := range x.Vars {
v.expr(vi.Init)
}
case *ast.DeferStmt:
v.expr(x.Call)
case *ast.ExitStmt, *ast.LoopStmt:
// no expression
case *ast.SkipCmd:
v.expr(x.Count)
case *ast.GoCmd:
v.expr(x.RecNo)
case *ast.SeekCmd:
v.expr(x.Key)
case *ast.UseCmd:
v.expr(x.File)
v.expr(x.AliasExpr)
case *ast.SelectCmd:
v.expr(x.Area)
case *ast.ReplaceCmd:
for _, f := range x.Fields {
v.expr(f.Field)
v.expr(f.Value)
}
case *ast.AppendCmd, *ast.DeleteCmd, *ast.ReadCmd:
// no expressions
case *ast.IndexCmd:
v.expr(x.KeyExpr)
v.expr(x.File)
v.expr(x.ForCond)
case *ast.SetCmd:
v.expr(x.Expr)
case *ast.AtSayCmd:
v.expr(x.Row)
v.expr(x.Col)
v.expr(x.SayExpr)
v.expr(x.Picture)
case *ast.AtGetCmd:
// @ GET var writes to Var at READ time.
v.writeIdent(x.Var)
if x.VarName != "" {
v.writeName(x.VarName)
}
v.expr(x.Row)
v.expr(x.Col)
v.expr(x.Picture)
v.expr(x.Valid)
v.expr(x.When)
case *ast.AtSayGetCmd:
v.writeIdent(x.Var)
if x.VarName != "" {
v.writeName(x.VarName)
}
v.expr(x.Row)
v.expr(x.Col)
v.expr(x.SayExpr)
v.expr(x.Picture)
v.expr(x.Valid)
v.expr(x.When)
default:
v.abort()
}
}