Files
five/compiler/gengo/folding.go
CharlesKWON f4ed42556b checkpoint: season-wide bug fix campaign + infra
Cumulative season's silent-bug hunting (~62 fixes) across the FiveSql2
SQL engine, the Five compiler/runtime, and the hbrdd RDD layer. Saved
as a single checkpoint before refactoring the parser to delegate xBase
command translation to the preprocessor.

Highlights:

FiveSql2 engine (_FiveSql2/src/)
- prefix-glob index attach -> explicit convention (<table>_pk.ntx,
  <table>_uq.ntx, <table>.cdx) — fixes silent multi-row INSERT row-drop
- DROP/CREATE TABLE FErase chain extended (.cdx, .fsc, .fsv, .dbt, .fpt)
- COUNT(DISTINCT col) parsed + aggregated via hSeen hash
- UNION column-count mismatch returns SQL_ERR_GRAMMAR (was silent)
- DISTINCT + ORDER BY hidden-col leak fixed (trim before DISTINCT)
- Derived table FROM (SELECT...) + JOIN right-side derived
- Self-FK CASCADE depth 2+ via SqlGetSingleColPK pre-collect
- LAG/LEAD default arg uses SqlEvalRowExpr (handles -N const exprs)
- DATE literal round-trip validation (Feb 29 non-leap rejected)
- CREATE OR REPLACE VIEW; CREATE VIEW errors on already-exists
- AlterTable type dispatcher comma-wrapped (1-char type "A" no longer
  matches CHARACTER)

Compiler / runtime
- gengo: HB_ -> FV_ prefix on emitted Go function names (Five identity)
- gengo split: emit_block.go, emit_stmt.go, folding.go extracted
- parser/stmtreg.go nudges
- hbrt: debug TUI/CLI restructure (debugcmd, debugkey, termios_*),
  windows debug stubs collapsed
- thread/vm/value/class/pcinterp tightening from panic traces

RDD layer (hbrdd/)
- dbf: null bitmap support (null.go + null_test.go), mmap split
  (mmap_posix.go / mmap_windows.go), byte-level numeric parse
- ntx/cdx: windows mmap parity
- workarea + mem RDD: cross-area state-bleed fixes

RTL (hbrtl/)
- errorlog rewrite with platform-specific FD (errorlog_fd_unix /
  errorlog_fd_other)
- sqlscan, sqlhelpers, indexrtl, datetime extensions

Gates green at checkpoint:
- go test ./...        : PASS
- FiveSql2 SQL:1999    : 43/43
- Harbour compat       : 56/56

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-30 09:26:25 +09:00

457 lines
12 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
// Copyright (c) 2026 Charles KWON OhJun (charleskwonohjun@gmail.com)
// All rights reserved.
// Constant folding and const-local propagation.
//
// Two passes cooperate at compile time so the generator emits smaller,
// warmer Go code:
//
// - foldLiteralTree / tryFoldBinary / negateLiteral: collapse binary
// expressions on literal operands into a single LiteralExpr. Handles
// int+int//×, string+string concatenation, and left-leaning
// `"a"+x+"b"+"c"` chain reassociation. Overflow bails out so the VM
// coerces to double.
//
// - collectConstLocals + constLocalVisitor: identifies LOCALs assigned
// exactly once with a literal initialiser. At emitIdent time those
// names are replaced by the literal so downstream folding (dead IF,
// AND/OR short-circuit, FOR step fusion) can fire on what was a
// variable reference. The walker is conservative — any unrecognised
// AST node aborts the pass so a hidden write can't sneak through.
package gengo
import (
"five/compiler/ast"
"five/compiler/token"
"strconv"
"strings"
)
func negateLiteral(lit *ast.LiteralExpr) (*ast.LiteralExpr, bool) {
switch lit.Kind {
case token.INT:
n, err := strconv.ParseInt(lit.Value, 10, 64)
if err != nil {
return nil, false
}
// Guard: math.MinInt64 has no positive twin — let the VM's
// runtime coerce-to-double path handle it.
if n == -1<<63 {
return nil, false
}
return &ast.LiteralExpr{
ValuePos: lit.ValuePos,
Kind: token.INT,
Value: strconv.FormatInt(-n, 10),
}, true
case token.DOUBLE:
// Syntactically prefix `-` or flip an existing leading `-`.
if strings.HasPrefix(lit.Value, "-") {
return &ast.LiteralExpr{
ValuePos: lit.ValuePos,
Kind: token.DOUBLE,
Value: lit.Value[1:],
}, true
}
return &ast.LiteralExpr{
ValuePos: lit.ValuePos,
Kind: token.DOUBLE,
Value: "-" + lit.Value,
}, true
}
return nil, false
}
// foldLiteralTree recursively folds BinaryExpr subtrees into LiteralExpr
// where both operands eventually collapse to literals. Non-foldable
// subtrees come back unchanged. Used as a preorder pre-pass so the
// caller can look at a flat LITERAL + LITERAL pair.
//
// For left-associative string-concat chains like "a" + x + "b" + "c",
// the parser builds (((("a" + x) + "b") + "c")) and no pair is
// literal+literal. We reassociate: if the LHS is `Y + strlit` and the
// RHS is a string literal, rewrite as `Y + (strlit+rhslit)` so the
// tail literals collapse. Only safe for STRING+STRING (numeric `+`
// cares about types / overflow).
func foldLiteralTree(e ast.Expr) ast.Expr {
be, ok := e.(*ast.BinaryExpr)
if !ok {
return e
}
be.Left = foldLiteralTree(be.Left)
be.Right = foldLiteralTree(be.Right)
if folded, ok := tryFoldBinary(be); ok {
return folded
}
// String-concat reassociation for left-leaning chains.
if be.Op == token.PLUS {
if rLit, ok := be.Right.(*ast.LiteralExpr); ok && rLit.Kind == token.STRING {
if lBin, ok := be.Left.(*ast.BinaryExpr); ok && lBin.Op == token.PLUS {
if mLit, ok := lBin.Right.(*ast.LiteralExpr); ok && mLit.Kind == token.STRING {
fused := &ast.LiteralExpr{
ValuePos: mLit.ValuePos,
Kind: token.STRING,
Value: mLit.Value + rLit.Value,
}
return &ast.BinaryExpr{
OpPos: be.OpPos,
Op: token.PLUS,
Left: lBin.Left,
Right: fused,
}
}
}
}
}
return be
}
// tryFoldBinary returns a synthetic LiteralExpr when both operands of a
// BinaryExpr are themselves literals and the operator is one the
// folder recognises. INT+INT stays INT (with overflow falling through
// to the VM path), mixed numeric falls to double, STRING+STRING
// concatenates. Non-literal operands or unsupported op → (nil, false).
func tryFoldBinary(e *ast.BinaryExpr) (*ast.LiteralExpr, bool) {
l, lok := e.Left.(*ast.LiteralExpr)
r, rok := e.Right.(*ast.LiteralExpr)
if !lok || !rok {
return nil, false
}
switch e.Op {
case token.PLUS, token.MINUS, token.STAR, token.SLASH:
default:
return nil, false
}
// INT + INT — keep int exact result.
if l.Kind == token.INT && r.Kind == token.INT {
li, errL := strconv.ParseInt(l.Value, 10, 64)
ri, errR := strconv.ParseInt(r.Value, 10, 64)
if errL != nil || errR != nil {
return nil, false
}
var result int64
var overflowed bool
switch e.Op {
case token.PLUS:
result = li + ri
// Harbour overflow discipline: fall through to VM on overflow
if (ri >= 0 && result < li) || (ri < 0 && result > li) {
overflowed = true
}
case token.MINUS:
result = li - ri
if (ri <= 0 && result < li) || (ri > 0 && result > li) {
overflowed = true
}
case token.STAR:
if li == 0 || ri == 0 {
result = 0
} else {
result = li * ri
if result/li != ri {
overflowed = true
}
}
case token.SLASH:
// Harbour SLASH always yields double even for int inputs.
return nil, false
}
if overflowed {
return nil, false
}
return &ast.LiteralExpr{
ValuePos: l.ValuePos,
Kind: token.INT,
Value: strconv.FormatInt(result, 10),
}, true
}
// STRING + STRING — concatenate. Preserves the quoting style of the
// left literal so DateExpr and other quoting-sensitive kinds don't
// change shape.
if e.Op == token.PLUS && l.Kind == token.STRING && r.Kind == token.STRING {
return &ast.LiteralExpr{
ValuePos: l.ValuePos,
Kind: token.STRING,
Value: l.Value + r.Value,
}, true
}
return nil, false
}
// collectConstLocals returns a map of LOCAL names (uppercase) whose
// only assignment is a literal initializer — these can be propagated
// inline. Any reassignment, ++/--, += family, @byref, MultiAssignStmt
// target, FOR/FOREACH loop var, or AtGet target disqualifies the name.
//
// The walker is bounded: if it encounters a macro expansion or any
// AST node it doesn't recognise, it aborts and returns an empty map.
// Correctness trumps coverage — an unrecognised node might hide a
// write, so we refuse to propagate.
func collectConstLocals(fn *ast.FuncDecl) map[string]*ast.LiteralExpr {
v := &constLocalVisitor{
candidates: map[string]*ast.LiteralExpr{},
}
// Seed candidates from top-level LOCAL decls with literal init.
for _, d := range fn.Decls {
vd, ok := d.(*ast.VarDecl)
if !ok || vd.Scope != ast.ScopeLocal {
continue
}
for _, vi := range vd.Vars {
if vi.Init == nil {
continue
}
if lit, ok := vi.Init.(*ast.LiteralExpr); ok {
v.candidates[strings.ToUpper(vi.Name)] = lit
}
}
}
if len(v.candidates) == 0 {
return nil
}
// Params are writable even without explicit assignment (by-value
// but reassignable) — disqualify any candidate that shadows a param.
// Params come from a separate slot but guard in case of odd decls.
for _, p := range fn.Params {
delete(v.candidates, strings.ToUpper(p.Name))
}
for _, st := range fn.Body {
v.stmt(st)
if v.aborted {
return nil
}
}
if len(v.candidates) == 0 {
return nil
}
return v.candidates
}
type constLocalVisitor struct {
candidates map[string]*ast.LiteralExpr
aborted bool
}
func (v *constLocalVisitor) abort() {
v.aborted = true
v.candidates = nil
}
func (v *constLocalVisitor) writeIdent(e ast.Expr) {
if id, ok := e.(*ast.IdentExpr); ok {
delete(v.candidates, strings.ToUpper(id.Name))
}
}
func (v *constLocalVisitor) writeName(name string) {
delete(v.candidates, strings.ToUpper(name))
}
func (v *constLocalVisitor) exprs(es []ast.Expr) {
for _, e := range es {
v.expr(e)
}
}
func (v *constLocalVisitor) stmts(ss []ast.Stmt) {
for _, s := range ss {
v.stmt(s)
}
}
func (v *constLocalVisitor) expr(e ast.Expr) {
if v.aborted || e == nil {
return
}
switch x := e.(type) {
case *ast.LiteralExpr, *ast.IdentExpr, *ast.SelfExpr:
// leaf; reads don't disqualify
case *ast.BinaryExpr:
v.expr(x.Left)
v.expr(x.Right)
case *ast.UnaryExpr:
if x.Op == token.INC || x.Op == token.DEC {
v.writeIdent(x.X)
}
v.expr(x.X)
case *ast.PostfixExpr:
v.writeIdent(x.X)
v.expr(x.X)
case *ast.AssignExpr:
// All assign ops (:= += -= *= /= %= ^=) are writes to Left's
// outer ident. Compound assigns also read, but disqualification
// is based on being written at all.
v.writeIdent(x.Left)
// Still walk Left in case of indexing: arr[i] := v — the ident
// arr is read (and we don't want to accidentally treat it as a
// write since writeIdent only triggers on a bare IdentExpr).
if _, isIdent := x.Left.(*ast.IdentExpr); !isIdent {
v.expr(x.Left)
}
v.expr(x.Right)
case *ast.CallExpr:
v.expr(x.Func)
v.exprs(x.Args)
case *ast.DotExpr:
v.expr(x.X)
case *ast.SendExpr:
v.expr(x.Object)
if x.MacroMethod != nil {
v.expr(x.MacroMethod)
}
v.exprs(x.Args)
case *ast.IndexExpr:
v.expr(x.X)
v.expr(x.Index)
case *ast.AliasExpr:
v.expr(x.Alias)
v.expr(x.Field)
case *ast.MacroExpr:
// Macros can expand to any name including writes. Bail.
v.abort()
case *ast.BlockExpr:
v.expr(x.Body)
case *ast.ArrayLitExpr:
v.exprs(x.Items)
case *ast.HashLitExpr:
v.exprs(x.Keys)
v.exprs(x.Values)
case *ast.IIfExpr:
v.expr(x.Cond)
v.expr(x.True)
v.expr(x.False)
case *ast.RefExpr:
// @ident — passes by reference; callee may mutate.
v.writeIdent(x.X)
v.expr(x.X)
case *ast.SliceExpr:
v.expr(x.X)
v.expr(x.Low)
v.expr(x.High)
case *ast.NilSafeExpr:
v.expr(x.X)
case *ast.InterpolatedString:
v.exprs(x.Parts)
default:
v.abort()
}
}
func (v *constLocalVisitor) stmt(s ast.Stmt) {
if v.aborted || s == nil {
return
}
switch x := s.(type) {
case *ast.ExprStmt:
v.expr(x.X)
case *ast.ReturnStmt:
v.expr(x.Value)
case *ast.QOutStmt:
v.exprs(x.Exprs)
case *ast.IfStmt:
v.expr(x.Cond)
v.stmts(x.Body)
for _, ei := range x.ElseIfs {
v.expr(ei.Cond)
v.stmts(ei.Body)
}
v.stmts(x.ElseBody)
case *ast.DoWhileStmt:
v.expr(x.Cond)
v.stmts(x.Body)
case *ast.ForStmt:
v.writeName(x.Var)
v.expr(x.Start)
v.expr(x.To)
v.expr(x.Step)
v.stmts(x.Body)
case *ast.ForEachStmt:
v.writeName(x.Var)
v.expr(x.Collection)
v.stmts(x.Body)
case *ast.SwitchStmt:
v.expr(x.Expr)
for _, c := range x.Cases {
v.expr(c.Value)
v.stmts(c.Body)
}
v.stmts(x.Otherwise)
case *ast.SeqStmt:
v.stmts(x.Body)
if x.RecoverVar != "" {
v.writeName(x.RecoverVar)
}
v.stmts(x.RecoverBody)
case *ast.MultiAssignStmt:
for _, t := range x.Targets {
v.writeName(t)
}
v.exprs(x.Values)
case *ast.VarDecl:
// Init exprs are reads. The LOCAL name itself was already
// collected as a candidate by collectConstLocals; we don't
// treat its own init as a reassignment.
for _, vi := range x.Vars {
v.expr(vi.Init)
}
case *ast.DeferStmt:
v.expr(x.Call)
case *ast.ExitStmt, *ast.LoopStmt:
// no expression
case *ast.SkipCmd:
v.expr(x.Count)
case *ast.GoCmd:
v.expr(x.RecNo)
case *ast.SeekCmd:
v.expr(x.Key)
case *ast.UseCmd:
v.expr(x.File)
v.expr(x.AliasExpr)
case *ast.SelectCmd:
v.expr(x.Area)
case *ast.ReplaceCmd:
for _, f := range x.Fields {
v.expr(f.Field)
v.expr(f.Value)
}
case *ast.AppendCmd, *ast.DeleteCmd, *ast.ReadCmd:
// no expressions
case *ast.IndexCmd:
v.expr(x.KeyExpr)
v.expr(x.File)
v.expr(x.ForCond)
case *ast.SetCmd:
v.expr(x.Expr)
case *ast.AtSayCmd:
v.expr(x.Row)
v.expr(x.Col)
v.expr(x.SayExpr)
v.expr(x.Picture)
case *ast.AtGetCmd:
// @ GET var writes to Var at READ time.
v.writeIdent(x.Var)
if x.VarName != "" {
v.writeName(x.VarName)
}
v.expr(x.Row)
v.expr(x.Col)
v.expr(x.Picture)
v.expr(x.Valid)
v.expr(x.When)
case *ast.AtSayGetCmd:
v.writeIdent(x.Var)
if x.VarName != "" {
v.writeName(x.VarName)
}
v.expr(x.Row)
v.expr(x.Col)
v.expr(x.SayExpr)
v.expr(x.Picture)
v.expr(x.Valid)
v.expr(x.When)
default:
v.abort()
}
}