Five v0.9 — Harbour + Go fusion language

- Compiler: PP → Lexer → Parser → Analyzer → Gengo pipeline
- Parser: 232/236 (98%) Harbour compatibility, registry-based dispatch
- RTL: 351 Harbour-compatible functions
- RDD: DBF/NTX/CDX engines with Rushmore bitmap optimization
- Go Interop: IMPORT + pkg.Func() + obj:Method() with FastPath (15M calls/sec)
- HB_FUNC API: Full Harbour C API compatible Go bridge
- Concurrency: SPAWN/LAUNCH/GOROUTINE, <-, WATCH, PARALLEL FOR, ASYNC/AWAIT
- Extensions: Multi-return, DEFER, Slice, f-string, Nil-safe ?:, CONST
- Macro Compiler: Runtime AST parsing and evaluation
- Debugger: TUI debugger with source display, breakpoints, stepping
- FRB: Native + Pcode dual mode runtime binary
- Tests: 13 packages ALL PASS

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-03-31 09:41:50 +09:00
commit 59568f3301
282 changed files with 66658 additions and 0 deletions

View File

@@ -0,0 +1,446 @@
// Copyright (c) 2026 Charles KWON OhJun (charleskwonohjun@gmail.com)
// All rights reserved.
// analyzer.go — Semantic analysis pass for Five AST.
//
// Runs AFTER parsing, BEFORE code generation.
// Checks:
// 1. Variable declaration: all LOCAL vars declared before use
// 2. Scope analysis: LOCAL vs PRIVATE vs PUBLIC vs FIELD
// 3. Undeclared variable warnings
// 4. Unused variable warnings
// 5. Function signature validation
// 6. Type hints (when available)
package analyzer
import (
"five/compiler/ast"
"five/compiler/token"
"fmt"
"strings"
)
// Diagnostic represents an analysis warning or error.
type Diagnostic struct {
Pos token.Position
Message string
Severity Severity
}
type Severity int
const (
SevError Severity = iota // Must fix
SevWarning // Should fix
SevHint // Optional improvement
)
func (d Diagnostic) String() string {
prefix := "HINT"
switch d.Severity {
case SevError:
prefix = "ERROR"
case SevWarning:
prefix = "WARN"
}
return fmt.Sprintf("%s:%d:%d: %s: %s", d.Pos.File, d.Pos.Line, d.Pos.Col, prefix, d.Message)
}
// Scope tracks declared variables in a function.
type Scope struct {
Name string // function name
Declared map[string]VarInfo // upper(name) → info
Used map[string]bool // upper(name) → was used
Parent *Scope // outer scope (for blocks)
}
// VarInfo holds info about a declared variable.
type VarInfo struct {
Name string
Pos token.Position
Kind ast.VarScope // LOCAL, STATIC, FIELD, etc.
IsParam bool
}
// Analyzer performs semantic analysis on a parsed AST file.
type Analyzer struct {
file *ast.File
diagnostics []Diagnostic
scope *Scope
funcNames map[string]bool // declared function names
}
// Analyze runs semantic analysis and returns diagnostics.
func Analyze(file *ast.File) []Diagnostic {
a := &Analyzer{
file: file,
funcNames: make(map[string]bool),
}
// Phase 1: Collect all function names
for _, d := range file.Decls {
switch decl := d.(type) {
case *ast.FuncDecl:
a.funcNames[strings.ToUpper(decl.Name)] = true
case *ast.ClassDecl:
a.funcNames[strings.ToUpper(decl.Name)] = true
}
}
// Phase 2: Analyze each function
for _, d := range file.Decls {
switch decl := d.(type) {
case *ast.FuncDecl:
a.analyzeFunc(decl)
}
}
return a.diagnostics
}
func (a *Analyzer) analyzeFunc(fn *ast.FuncDecl) {
a.scope = &Scope{
Name: fn.Name,
Declared: make(map[string]VarInfo),
Used: make(map[string]bool),
}
// Register parameters as declared
for _, p := range fn.Params {
a.scope.Declared[strings.ToUpper(p.Name)] = VarInfo{
Name: p.Name,
Pos: p.NamePos,
IsParam: true,
}
}
// Register LOCAL/STATIC declarations
for _, d := range fn.Decls {
if vd, ok := d.(*ast.VarDecl); ok {
for _, v := range vd.Vars {
a.scope.Declared[strings.ToUpper(v.Name)] = VarInfo{
Name: v.Name,
Pos: v.NamePos,
Kind: vd.Scope,
}
}
}
}
// Analyze body statements
for _, stmt := range fn.Body {
a.analyzeStmt(stmt)
}
// Check for unused variables
for name, info := range a.scope.Declared {
if !a.scope.Used[name] && !info.IsParam {
// Skip common patterns: loop vars, error vars
lower := strings.ToLower(info.Name)
if lower == "i" || lower == "j" || lower == "k" || lower == "n" ||
lower == "err" || lower == "_" {
continue
}
a.hint(info.Pos, "unused variable '%s'", info.Name)
}
}
}
func (a *Analyzer) analyzeStmt(stmt ast.Stmt) {
if stmt == nil {
return
}
switch s := stmt.(type) {
case *ast.ExprStmt:
a.analyzeExpr(s.X)
case *ast.ReturnStmt:
if s.Value != nil {
a.analyzeExpr(s.Value)
}
for _, v := range s.Values {
a.analyzeExpr(v)
}
case *ast.IfStmt:
a.analyzeExpr(s.Cond)
for _, st := range s.Body {
a.analyzeStmt(st)
}
for _, ei := range s.ElseIfs {
a.analyzeExpr(ei.Cond)
for _, st := range ei.Body {
a.analyzeStmt(st)
}
}
for _, st := range s.ElseBody {
a.analyzeStmt(st)
}
case *ast.DoWhileStmt:
a.analyzeExpr(s.Cond)
for _, st := range s.Body {
a.analyzeStmt(st)
}
case *ast.ForStmt:
a.markUsed(s.Var)
a.analyzeExpr(s.Start)
a.analyzeExpr(s.To)
if s.Step != nil {
a.analyzeExpr(s.Step)
}
for _, st := range s.Body {
a.analyzeStmt(st)
}
case *ast.ForEachStmt:
a.markUsed(s.Var)
a.analyzeExpr(s.Collection)
for _, st := range s.Body {
a.analyzeStmt(st)
}
case *ast.SwitchStmt:
a.analyzeExpr(s.Expr)
for _, c := range s.Cases {
a.analyzeExpr(c.Value)
for _, st := range c.Body {
a.analyzeStmt(st)
}
}
for _, st := range s.Otherwise {
a.analyzeStmt(st)
}
case *ast.SeqStmt:
for _, st := range s.Body {
a.analyzeStmt(st)
}
for _, st := range s.RecoverBody {
a.analyzeStmt(st)
}
case *ast.QOutStmt:
for _, e := range s.Exprs {
a.analyzeExpr(e)
}
case *ast.VarDecl:
// Mid-function LOCAL — register
for _, v := range s.Vars {
a.scope.Declared[strings.ToUpper(v.Name)] = VarInfo{
Name: v.Name,
Pos: v.NamePos,
Kind: s.Scope,
}
if v.Init != nil {
a.analyzeExpr(v.Init)
}
}
case *ast.MultiAssignStmt:
for _, name := range s.Targets {
if name != "_" {
a.markUsed(name)
}
}
for _, v := range s.Values {
a.analyzeExpr(v)
}
case *ast.DeferStmt:
a.analyzeExpr(s.Call)
case *ast.ChanSendStmt:
a.analyzeExpr(s.Chan)
a.analyzeExpr(s.Value)
case *ast.WatchStmt:
for _, c := range s.Cases {
if c.RecvChan != nil {
a.analyzeExpr(c.RecvChan)
}
if c.SendChan != nil {
a.analyzeExpr(c.SendChan)
}
if c.SendVal != nil {
a.analyzeExpr(c.SendVal)
}
if c.RecvVar != "" {
a.markUsed(c.RecvVar)
}
for _, st := range c.Body {
a.analyzeStmt(st)
}
}
for _, st := range s.Otherwise {
a.analyzeStmt(st)
}
case *ast.ParallelForStmt:
a.markUsed(s.Var)
a.analyzeExpr(s.Start)
a.analyzeExpr(s.To)
for _, st := range s.Body {
a.analyzeStmt(st)
}
case *ast.TimeoutStmt:
a.analyzeExpr(s.Duration)
for _, st := range s.Body {
a.analyzeStmt(st)
}
}
}
func (a *Analyzer) analyzeExpr(expr ast.Expr) {
if expr == nil {
return
}
switch e := expr.(type) {
case *ast.IdentExpr:
a.checkVarUsage(e.Name, e.NamePos)
case *ast.BinaryExpr:
a.analyzeExpr(e.Left)
a.analyzeExpr(e.Right)
case *ast.UnaryExpr:
a.analyzeExpr(e.X)
case *ast.PostfixExpr:
a.analyzeExpr(e.X)
case *ast.AssignExpr:
a.analyzeExpr(e.Left)
a.analyzeExpr(e.Right)
case *ast.CallExpr:
a.analyzeExpr(e.Func)
for _, arg := range e.Args {
a.analyzeExpr(arg)
}
case *ast.SendExpr:
a.analyzeExpr(e.Object)
for _, arg := range e.Args {
a.analyzeExpr(arg)
}
case *ast.IndexExpr:
a.analyzeExpr(e.X)
a.analyzeExpr(e.Index)
case *ast.SliceExpr:
a.analyzeExpr(e.X)
if e.Low != nil {
a.analyzeExpr(e.Low)
}
if e.High != nil {
a.analyzeExpr(e.High)
}
case *ast.DotExpr:
a.analyzeExpr(e.X)
case *ast.ArrayLitExpr:
for _, item := range e.Items {
a.analyzeExpr(item)
}
case *ast.HashLitExpr:
for i := range e.Keys {
a.analyzeExpr(e.Keys[i])
a.analyzeExpr(e.Values[i])
}
case *ast.BlockExpr:
a.analyzeExpr(e.Body)
case *ast.AliasExpr:
a.analyzeExpr(e.Alias)
a.analyzeExpr(e.Field)
case *ast.MacroExpr:
a.analyzeExpr(e.Expr)
case *ast.RefExpr:
a.analyzeExpr(e.X)
case *ast.NilSafeExpr:
a.analyzeExpr(e.X)
for _, arg := range e.Args {
a.analyzeExpr(arg)
}
case *ast.ChanRecvExpr:
a.analyzeExpr(e.Chan)
case *ast.AsyncExpr:
a.analyzeExpr(e.Call)
case *ast.AwaitExpr:
a.analyzeExpr(e.Future)
}
}
// checkVarUsage verifies a variable is declared and marks it used.
func (a *Analyzer) checkVarUsage(name string, pos token.Position) {
upper := strings.ToUpper(name)
// Skip well-known RTL functions and constants
if a.isKnownFunction(upper) || a.isBuiltinConstant(upper) {
return
}
// Mark as used
a.markUsed(name)
// Check if declared in current scope
if _, ok := a.scope.Declared[upper]; ok {
return
}
// Not declared — warn (could be MEMVAR, FIELD, or typo)
a.warn(pos, "undeclared variable '%s' (missing LOCAL?)", name)
}
func (a *Analyzer) markUsed(name string) {
if a.scope != nil {
a.scope.Used[strings.ToUpper(name)] = true
}
}
func (a *Analyzer) isKnownFunction(name string) bool {
// Check declared functions in this file
if a.funcNames[name] {
return true
}
// Common RTL functions
rtl := map[string]bool{
"LEN": true, "SUBSTR": true, "LEFT": true, "RIGHT": true,
"UPPER": true, "LOWER": true, "TRIM": true, "LTRIM": true, "RTRIM": true,
"STR": true, "VAL": true, "STRTRAN": true, "AT": true, "RAT": true,
"SPACE": true, "REPLICATE": true, "PADR": true, "PADL": true, "PADC": true,
"VALTYPE": true, "TYPE": true, "EMPTY": true, "HB_ISSTRING": true,
"EVAL": true, "AEVAL": true, "ASCAN": true, "ASORT": true,
"AADD": true, "ADEL": true, "AINS": true, "ASIZE": true, "ACOPY": true, "ACLONE": true,
"ARRAY": true, "HASH": true, "HB_HASH": true,
"DTOC": true, "CTOD": true, "DTOS": true, "DATE": true, "TIME": true, "YEAR": true, "MONTH": true, "DAY": true,
"QOUT": true, "QQOUT": true, "OUTSTD": true, "ALERT": true,
"INKEY": true, "LASTKEY": true, "CHR": true, "ASC": true,
"FILE": true, "FOPEN": true, "FCLOSE": true, "FREAD": true, "FWRITE": true,
"IIF": true, "IF": true, "STRZERO": true, "TRANSFORM": true,
"FIELDNAME": true, "FIELDPUT": true, "FIELDGET": true, "FCOUNT": true,
"ALIAS": true, "DBAPPEND": true, "DBDELETE": true, "DBSKIP": true,
"DBGOTO": true, "DBGOTOP": true, "DBGOBOTTOM": true, "DBCOMMIT": true,
"RECNO": true, "RECCOUNT": true, "EOF": true, "BOF": true, "FOUND": true,
"CHANNEL": true, "CHSEND": true, "CHRECEIVE": true,
"SLEEP": true, "HB_IDLEADD": true, "SECONDS": true,
"ERRORBLOCK": true, "BREAK": true, "PCOUNT": true, "PROCNAME": true,
"SETPOS": true, "ROW": true, "COL": true, "MAXROW": true, "MAXCOL": true,
"SETCOLOR": true, "DISPBOX": true, "DISPBEGIN": true, "DISPEND": true,
"HB_SYMBOL_UNUSED": true, "HB_DEFAULT": true, "HB_NTOS": true,
}
return rtl[name]
}
func (a *Analyzer) isBuiltinConstant(name string) bool {
constants := map[string]bool{
"NIL": true, "TRUE": true, "FALSE": true,
"SELF": true, "SUPER": true,
"K_ESC": true, "K_ENTER": true, "K_UP": true, "K_DOWN": true,
"K_LEFT": true, "K_RIGHT": true, "K_PGUP": true, "K_PGDN": true,
}
return constants[name]
}
// --- Diagnostic helpers ---
func (a *Analyzer) diag(sev Severity, pos token.Position, format string, args ...interface{}) {
a.diagnostics = append(a.diagnostics, Diagnostic{
Pos: pos,
Message: fmt.Sprintf(format, args...),
Severity: sev,
})
}
func (a *Analyzer) errorf(pos token.Position, format string, args ...interface{}) {
a.diag(SevError, pos, format, args...)
}
func (a *Analyzer) warn(pos token.Position, format string, args ...interface{}) {
a.diag(SevWarning, pos, format, args...)
}
func (a *Analyzer) hint(pos token.Position, format string, args ...interface{}) {
a.diag(SevHint, pos, format, args...)
}

View File

@@ -0,0 +1,136 @@
package analyzer
import (
"five/compiler/parser"
"strings"
"testing"
)
func analyze(t *testing.T, source string) []Diagnostic {
t.Helper()
file, errs := parser.Parse("test.prg", source)
if len(errs) > 0 {
t.Fatalf("parse error: %s", errs[0])
}
return Analyze(file)
}
func TestCleanCode(t *testing.T) {
diags := analyze(t, `
PROCEDURE Main()
LOCAL cName, nAge
cName := "Charles"
nAge := 30
? cName, nAge
RETURN
`)
for _, d := range diags {
if d.Severity == SevError || d.Severity == SevWarning {
t.Errorf("unexpected diagnostic: %s", d)
}
}
}
func TestUndeclaredVariable(t *testing.T) {
diags := analyze(t, `
PROCEDURE Main()
LOCAL cName
cName := "Charles"
? cName, nAge
RETURN
`)
found := false
for _, d := range diags {
if strings.Contains(d.Message, "undeclared") && strings.Contains(d.Message, "nAge") {
found = true
}
}
if !found {
t.Error("expected 'undeclared variable nAge' warning")
}
}
func TestUnusedVariable(t *testing.T) {
diags := analyze(t, `
PROCEDURE Main()
LOCAL cUsed, cNeverTouched
cUsed := "hello"
? cUsed
RETURN
`)
found := false
for _, d := range diags {
if strings.Contains(d.Message, "unused") && strings.Contains(d.Message, "cNeverTouched") {
found = true
}
}
if !found {
t.Error("expected 'unused variable cNeverTouched' hint")
}
}
func TestParamsDeclared(t *testing.T) {
diags := analyze(t, `
FUNCTION Add(a, b)
LOCAL nResult
nResult := a + b
RETURN nResult
`)
for _, d := range diags {
if d.Severity == SevError || d.Severity == SevWarning {
t.Errorf("unexpected: %s", d)
}
}
}
func TestMultiFunction(t *testing.T) {
diags := analyze(t, `
PROCEDURE Main()
LOCAL n
n := GetValue()
? n
RETURN
FUNCTION GetValue()
LOCAL x
x := 42
RETURN x
`)
for _, d := range diags {
if d.Severity == SevWarning {
t.Errorf("unexpected warning: %s", d)
}
}
}
func TestForLoopVar(t *testing.T) {
diags := analyze(t, `
PROCEDURE Main()
LOCAL i, aData
aData := {1, 2, 3}
FOR i := 1 TO Len(aData)
? aData[i]
NEXT
RETURN
`)
for _, d := range diags {
if d.Severity == SevWarning {
t.Errorf("unexpected: %s", d)
}
}
}
func TestMultiAssignDeclared(t *testing.T) {
diags := analyze(t, `
PROCEDURE Main()
LOCAL cName, nAge
cName, nAge := "Charles", 30
? cName, nAge
RETURN
`)
for _, d := range diags {
if d.Severity == SevWarning {
t.Errorf("unexpected: %s", d)
}
}
}

930
compiler/ast/ast.go Normal file
View File

@@ -0,0 +1,930 @@
// Copyright (c) 2026 Charles KWON OhJun (charleskwonohjun@gmail.com)
// All rights reserved.
// AST node definitions for the Five language.
//
// Design references:
// - Harbour: HB_EXPR (hbcompdf.h:349) — expression union with ExprType discriminant
// - Harbour: HB_HFUNC (hbcompdf.h:497) — function with separated pLocals/pStatics/pFields/pMemvars
// - tsgo: Node with Kind discriminant + nodeData interface (internal/ast/ast.go)
//
// Key Harbour rules applied:
// - LOCAL/STATIC/FIELD declarations must appear at function top, before executable code
// - FuncDecl separates Decls (declarations) from Body (executable statements)
// - (expr)->field for dynamic alias access (HB_ET_ALIASEXPR)
// - &variable for macro (6 subtypes from Harbour: VAR, SYMBOL, ALIASED, EXPR, LIST, PARE)
package ast
import "five/compiler/token"
// --- Interfaces ---
// Node is the base interface for all AST nodes.
type Node interface {
Pos() token.Position
End() token.Position
}
// Expr represents an expression node (produces a value).
type Expr interface {
Node
exprNode()
}
// Stmt represents a statement node (performs an action).
type Stmt interface {
Node
stmtNode()
}
// Decl represents a declaration node (LOCAL, STATIC, FIELD, etc.).
type Decl interface {
Node
declNode()
}
// --- Program (top-level) ---
// File represents a single .prg source file.
type File struct {
Name string // filename
Imports []*ImportDecl
Decls []Decl // top-level: FUNCTION, PROCEDURE, CLASS, etc.
}
func (f *File) Pos() token.Position {
if len(f.Decls) > 0 {
return f.Decls[0].Pos()
}
return token.Position{}
}
func (f *File) End() token.Position {
if len(f.Decls) > 0 {
return f.Decls[len(f.Decls)-1].End()
}
return token.Position{}
}
// --- Declarations ---
// ImportDecl: IMPORT "package/path" or IMPORT _ "package/path"
type ImportDecl struct {
ImportPos token.Position
Alias string // "" = normal, "_" = blank import, "name" = alias
Path string // package path
}
func (d *ImportDecl) Pos() token.Position { return d.ImportPos }
func (d *ImportDecl) End() token.Position { return d.ImportPos }
func (d *ImportDecl) declNode() {}
// FuncDecl represents FUNCTION or PROCEDURE.
// Harbour: HB_HFUNC — pLocals, pStatics, pFields separated from pcode.
// LOCAL/STATIC/FIELD must appear before executable code.
type FuncDecl struct {
FuncPos token.Position
Name string
IsProc bool // PROCEDURE (no return value)
Params []*ParamDecl // declared parameters
Decls []Decl // LOCAL, STATIC, FIELD — must come first
Body []Stmt // executable statements — after declarations
EndPos token.Position
}
func (d *FuncDecl) Pos() token.Position { return d.FuncPos }
func (d *FuncDecl) End() token.Position { return d.EndPos }
func (d *FuncDecl) declNode() {}
// ParamDecl represents a function parameter.
type ParamDecl struct {
NamePos token.Position
Name string
ByRef bool // @param or passed by reference
AsType string // optional type hint: AS NUMERIC, AS STRING, etc.
}
func (d *ParamDecl) Pos() token.Position { return d.NamePos }
func (d *ParamDecl) End() token.Position { return d.NamePos }
func (d *ParamDecl) declNode() {}
// VarDecl represents LOCAL, STATIC, PRIVATE, PUBLIC, FIELD declarations.
// Harbour: LOCAL must be at function top (before executable code).
// PRIVATE/PUBLIC can appear anywhere (runtime memvar).
type VarDecl struct {
DeclPos token.Position
Scope VarScope
Vars []*VarInit // one or more: LOCAL a := 1, b := 2, c
}
func (d *VarDecl) Pos() token.Position { return d.DeclPos }
func (d *VarDecl) End() token.Position { return d.DeclPos }
func (d *VarDecl) declNode() {}
func (d *VarDecl) stmtNode() {} // PRIVATE/PUBLIC can appear as statements
// VarScope indicates where a variable lives.
type VarScope int
const (
ScopeLocal VarScope = iota // LOCAL — stack, function-top only
ScopeStatic // STATIC — module-level, function-top only
ScopePrivate // PRIVATE — runtime memvar, anywhere
ScopePublic // PUBLIC — runtime memvar, anywhere
ScopeField // FIELD — database field declaration, function-top only
)
// VarInit represents a single variable with optional initializer.
type VarInit struct {
NamePos token.Position
Name string
Init Expr // nil if no initializer
AsType string // optional type hint
}
// ClassDecl represents CLASS ... ENDCLASS.
type ClassDecl struct {
ClassPos token.Position
Name string
ParentName string // INHERIT FROM parent
Members []Decl // DATA, METHOD, ACCESS, ASSIGN declarations
EndPos token.Position
}
func (d *ClassDecl) Pos() token.Position { return d.ClassPos }
func (d *ClassDecl) End() token.Position { return d.EndPos }
func (d *ClassDecl) declNode() {}
// DataDecl represents DATA member in a class.
type DataDecl struct {
DataPos token.Position
Name string
Init Expr // INIT expression (nil if none)
AsType string // AS type hint
}
func (d *DataDecl) Pos() token.Position { return d.DataPos }
func (d *DataDecl) End() token.Position { return d.DataPos }
func (d *DataDecl) declNode() {}
// MethodDecl represents METHOD declaration in a class or standalone.
type MethodDecl struct {
MethodPos token.Position
Name string
ClassName string // METHOD name CLASS classname (standalone)
Params []*ParamDecl
IsInline bool // INLINE method
IsSetGet bool // METHOD name(x) SETGET — getter if no arg, setter if arg
IsAccess bool // ACCESS name METHOD getterName
IsAssign bool // ASSIGN name METHOD setterName
AccessName string // property name for ACCESS/ASSIGN
Decls []Decl
Body []Stmt
EndPos token.Position
}
func (d *MethodDecl) Pos() token.Position { return d.MethodPos }
func (d *MethodDecl) End() token.Position { return d.EndPos }
func (d *MethodDecl) declNode() {}
// GoDumpDecl represents inline Go code from #pragma BEGINDUMP ... #pragma ENDDUMP.
// Five extension: allows embedding raw Go code directly in PRG files.
type GoDumpDecl struct {
DumpPos token.Position
Code string // raw Go source code
}
func (d *GoDumpDecl) Pos() token.Position { return d.DumpPos }
func (d *GoDumpDecl) End() token.Position { return d.DumpPos }
func (d *GoDumpDecl) declNode() {}
// --- Expressions ---
// LiteralExpr represents a literal value.
// Harbour: HB_ET_NIL, HB_ET_NUMERIC, HB_ET_STRING, HB_ET_LOGICAL, HB_ET_DATE, HB_ET_TIMESTAMP
type LiteralExpr struct {
ValuePos token.Position
Kind token.Kind // INT, LONG, DOUBLE, STRING, TRUE, FALSE, NIL_LIT, DATE_LIT
Value string // raw literal text
}
func (e *LiteralExpr) Pos() token.Position { return e.ValuePos }
func (e *LiteralExpr) End() token.Position { return e.ValuePos }
func (e *LiteralExpr) exprNode() {}
// IdentExpr represents a variable or function name.
// Harbour: HB_ET_VARIABLE, HB_ET_FUNNAME
type IdentExpr struct {
NamePos token.Position
Name string
}
func (e *IdentExpr) Pos() token.Position { return e.NamePos }
func (e *IdentExpr) End() token.Position { return e.NamePos }
func (e *IdentExpr) exprNode() {}
// SelfExpr represents :: (Self access in class method).
// Harbour: HB_ET_SELF
type SelfExpr struct {
ColonPos token.Position
}
func (e *SelfExpr) Pos() token.Position { return e.ColonPos }
func (e *SelfExpr) End() token.Position { return e.ColonPos }
func (e *SelfExpr) exprNode() {}
// BinaryExpr represents a binary operation.
// Harbour: HB_EO_PLUS, HB_EO_MINUS, HB_EO_EQUAL, etc.
type BinaryExpr struct {
Left Expr
OpPos token.Position
Op token.Kind
Right Expr
}
func (e *BinaryExpr) Pos() token.Position { return e.Left.Pos() }
func (e *BinaryExpr) End() token.Position { return e.Right.End() }
func (e *BinaryExpr) exprNode() {}
// UnaryExpr represents a prefix unary operation.
// Harbour: HB_EO_NEGATE, HB_EO_NOT, HB_EO_PREINC, HB_EO_PREDEC
type UnaryExpr struct {
OpPos token.Position
Op token.Kind // MINUS, NOT, INC, DEC
X Expr
}
func (e *UnaryExpr) Pos() token.Position { return e.OpPos }
func (e *UnaryExpr) End() token.Position { return e.X.End() }
func (e *UnaryExpr) exprNode() {}
// PostfixExpr represents postfix ++ or --.
// Harbour: HB_EO_POSTINC, HB_EO_POSTDEC
type PostfixExpr struct {
X Expr
OpPos token.Position
Op token.Kind // INC, DEC
}
func (e *PostfixExpr) Pos() token.Position { return e.X.Pos() }
func (e *PostfixExpr) End() token.Position { return e.OpPos }
func (e *PostfixExpr) exprNode() {}
// AssignExpr represents assignment: x := value, x += value, etc.
// Harbour: HB_EO_ASSIGN, HB_EO_PLUSEQ, etc.
type AssignExpr struct {
Left Expr
OpPos token.Position
Op token.Kind // ASSIGN, PLUSEQ, MINUSEQ, etc.
Right Expr
}
func (e *AssignExpr) Pos() token.Position { return e.Left.Pos() }
func (e *AssignExpr) End() token.Position { return e.Right.End() }
func (e *AssignExpr) exprNode() {}
// CallExpr represents a function call: func(args...)
// Harbour: HB_ET_FUNCALL — pFunName + pParms
type CallExpr struct {
Func Expr // function expression (IdentExpr, or macro)
LParen token.Position
Args []Expr
RParen token.Position
}
func (e *CallExpr) Pos() token.Position { return e.Func.Pos() }
func (e *CallExpr) End() token.Position { return e.RParen }
func (e *CallExpr) exprNode() {}
// DotExpr represents package member access: pkg.Member
// Used for Go package function calls: sql.Open(), fmt.Println()
type DotExpr struct {
X Expr // package (IdentExpr)
DotPos token.Position
Member string // function/field name
}
func (e *DotExpr) Pos() token.Position { return e.X.Pos() }
func (e *DotExpr) End() token.Position { return e.DotPos }
func (e *DotExpr) exprNode() {}
// SendExpr represents method call: obj:method(args...)
// Harbour: HB_ET_SEND — pObject + szMessage/pMessage + pParms
type SendExpr struct {
Object Expr
ColonPos token.Position
Method string // static message name
MacroMethod Expr // if &macro message (nil for static)
HasParens bool // true if () present (method call vs field access)
LParen token.Position
Args []Expr
RParen token.Position
IsAssign bool // obj:prop := value (setter)
}
func (e *SendExpr) Pos() token.Position { return e.Object.Pos() }
func (e *SendExpr) End() token.Position { return e.RParen }
func (e *SendExpr) exprNode() {}
// IndexExpr represents array index: arr[index]
// Harbour: HB_ET_ARRAYAT
type IndexExpr struct {
X Expr
LBracket token.Position
Index Expr
RBracket token.Position
}
func (e *IndexExpr) Pos() token.Position { return e.X.Pos() }
func (e *IndexExpr) End() token.Position { return e.RBracket }
func (e *IndexExpr) exprNode() {}
// AliasExpr represents field access: alias->field or (expr)->field
// Harbour: HB_ET_ALIASVAR, HB_ET_ALIASEXPR
type AliasExpr struct {
Alias Expr // IdentExpr for static alias, any Expr for (dynamic)->field
ArrowPos token.Position
Field Expr // IdentExpr or MacroExpr
}
func (e *AliasExpr) Pos() token.Position { return e.Alias.Pos() }
func (e *AliasExpr) End() token.Position { return e.Field.End() }
func (e *AliasExpr) exprNode() {}
// MacroExpr represents macro expansion: &variable or &(expression)
// Harbour: HB_ET_MACRO with 6 subtypes
type MacroExpr struct {
AmpPos token.Position
Expr Expr // variable or parenthesized expression
}
func (e *MacroExpr) Pos() token.Position { return e.AmpPos }
func (e *MacroExpr) End() token.Position { return e.Expr.End() }
func (e *MacroExpr) exprNode() {}
// BlockExpr represents a code block: {|params| body}
// Harbour: HB_ET_CODEBLOCK — pLocals + pExprList
type BlockExpr struct {
LBrace token.Position
Params []string // parameter names (between | |)
Body Expr // single expression (or comma-separated list)
RBrace token.Position
}
func (e *BlockExpr) Pos() token.Position { return e.LBrace }
func (e *BlockExpr) End() token.Position { return e.RBrace }
func (e *BlockExpr) exprNode() {}
// ArrayLitExpr represents a literal array: {1, 2, 3}
// Harbour: HB_ET_ARRAY
type ArrayLitExpr struct {
LBrace token.Position
Items []Expr
RBrace token.Position
}
func (e *ArrayLitExpr) Pos() token.Position { return e.LBrace }
func (e *ArrayLitExpr) End() token.Position { return e.RBrace }
func (e *ArrayLitExpr) exprNode() {}
// HashLitExpr represents a literal hash: {"a" => 1, "b" => 2}
// Harbour: HB_ET_HASH
type HashLitExpr struct {
LBrace token.Position
Keys []Expr
Values []Expr
RBrace token.Position
}
func (e *HashLitExpr) Pos() token.Position { return e.LBrace }
func (e *HashLitExpr) End() token.Position { return e.RBrace }
func (e *HashLitExpr) exprNode() {}
// IIfExpr represents inline if: IIF(cond, trueVal, falseVal)
// Harbour: HB_ET_IIF
type IIfExpr struct {
IfPos token.Position
Cond Expr
True Expr
False Expr
}
func (e *IIfExpr) Pos() token.Position { return e.IfPos }
func (e *IIfExpr) End() token.Position { return e.False.End() }
func (e *IIfExpr) exprNode() {}
// RefExpr represents pass-by-reference: @variable
// Harbour: HB_ET_REFERENCE, HB_ET_VARREF, HB_ET_FUNREF
type RefExpr struct {
AtPos token.Position
X Expr
}
func (e *RefExpr) Pos() token.Position { return e.AtPos }
func (e *RefExpr) End() token.Position { return e.X.End() }
func (e *RefExpr) exprNode() {}
// --- Statements ---
// ExprStmt wraps an expression as a statement (function calls, assignments).
type ExprStmt struct {
X Expr
}
func (s *ExprStmt) Pos() token.Position { return s.X.Pos() }
func (s *ExprStmt) End() token.Position { return s.X.End() }
func (s *ExprStmt) stmtNode() {}
// ReturnStmt represents RETURN [expr].
type ReturnStmt struct {
ReturnPos token.Position
Value Expr // first/only return value (nil for bare RETURN)
Values []Expr // multi-return: RETURN a, b, c (nil if single)
}
func (s *ReturnStmt) Pos() token.Position { return s.ReturnPos }
func (s *ReturnStmt) End() token.Position {
if s.Value != nil {
return s.Value.End()
}
return s.ReturnPos
}
func (s *ReturnStmt) stmtNode() {}
// QOutStmt represents ? expr, expr, ... (shorthand for QOut).
type QOutStmt struct {
QPos token.Position
IsQQ bool // true for ?? (QQOut)
Exprs []Expr
}
func (s *QOutStmt) Pos() token.Position { return s.QPos }
func (s *QOutStmt) End() token.Position {
if len(s.Exprs) > 0 {
return s.Exprs[len(s.Exprs)-1].End()
}
return s.QPos
}
func (s *QOutStmt) stmtNode() {}
// IfStmt represents IF / ELSEIF / ELSE / ENDIF.
// Harbour: uses PHB_ELSEIF chain for fixups.
type IfStmt struct {
IfPos token.Position
Cond Expr
Body []Stmt
ElseIfs []*ElseIfClause
ElseBody []Stmt // nil if no ELSE
EndPos token.Position
}
type ElseIfClause struct {
ElseIfPos token.Position
Cond Expr
Body []Stmt
}
func (s *IfStmt) Pos() token.Position { return s.IfPos }
func (s *IfStmt) End() token.Position { return s.EndPos }
func (s *IfStmt) stmtNode() {}
// DoWhileStmt represents DO WHILE cond ... ENDDO.
type DoWhileStmt struct {
DoPos token.Position
Cond Expr
Body []Stmt
EndPos token.Position
}
func (s *DoWhileStmt) Pos() token.Position { return s.DoPos }
func (s *DoWhileStmt) End() token.Position { return s.EndPos }
func (s *DoWhileStmt) stmtNode() {}
// ForStmt represents FOR var := start TO end [STEP step] ... NEXT.
type ForStmt struct {
ForPos token.Position
Var string
Start Expr
To Expr
Step Expr // nil for default step 1
Body []Stmt
NextPos token.Position
}
func (s *ForStmt) Pos() token.Position { return s.ForPos }
func (s *ForStmt) End() token.Position { return s.NextPos }
func (s *ForStmt) stmtNode() {}
// ForEachStmt represents FOR EACH var IN collection ... NEXT.
// Harbour: HB_ENUMERATOR structure.
type ForEachStmt struct {
ForPos token.Position
Var string
Collection Expr
Descend bool // FOR EACH DESCEND
Body []Stmt
NextPos token.Position
}
func (s *ForEachStmt) Pos() token.Position { return s.ForPos }
func (s *ForEachStmt) End() token.Position { return s.NextPos }
func (s *ForEachStmt) stmtNode() {}
// SwitchStmt represents SWITCH expr ... CASE ... OTHERWISE ... END.
// Harbour: HB_SWITCHCMD structure.
type SwitchStmt struct {
SwitchPos token.Position
Expr Expr
Cases []*CaseClause
Otherwise []Stmt // nil if no OTHERWISE
EndPos token.Position
}
type CaseClause struct {
CasePos token.Position
Value Expr // case value
Body []Stmt
}
func (s *SwitchStmt) Pos() token.Position { return s.SwitchPos }
func (s *SwitchStmt) End() token.Position { return s.EndPos }
func (s *SwitchStmt) stmtNode() {}
// SeqStmt represents BEGIN SEQUENCE ... RECOVER [USING var] ... END.
type SeqStmt struct {
BeginPos token.Position
Body []Stmt
RecoverVar string // variable name after USING (empty if none)
RecoverBody []Stmt // nil if no RECOVER
EndPos token.Position
}
func (s *SeqStmt) Pos() token.Position { return s.BeginPos }
func (s *SeqStmt) End() token.Position { return s.EndPos }
func (s *SeqStmt) stmtNode() {}
// === Five Go Extensions ===
// MultiAssignStmt: a, b, c := expr or a, b := Func()
// Also handles: a, b := b, a (parallel swap)
// Blank identifier _ discards the value.
type MultiAssignStmt struct {
AssignPos token.Position
Targets []string // variable names ("_" = discard)
Values []Expr // right-hand side expressions
}
func (s *MultiAssignStmt) Pos() token.Position { return s.AssignPos }
func (s *MultiAssignStmt) End() token.Position { return s.AssignPos }
func (s *MultiAssignStmt) stmtNode() {}
// DeferStmt: DEFER expr (execute when function returns)
type DeferStmt struct {
DeferPos token.Position
Call Expr // expression to defer (usually a method/function call)
}
func (s *DeferStmt) Pos() token.Position { return s.DeferPos }
func (s *DeferStmt) End() token.Position { return s.DeferPos }
func (s *DeferStmt) stmtNode() {}
// ConstDecl: CONST block with optional auto-increment
type ConstDecl struct {
ConstPos token.Position
Items []ConstItem
}
type ConstItem struct {
Name string
Value Expr // nil = auto-increment from previous
}
func (d *ConstDecl) Pos() token.Position { return d.ConstPos }
func (d *ConstDecl) End() token.Position { return d.ConstPos }
func (d *ConstDecl) declNode() {}
// SliceExpr: a[low:high] — sub-array or sub-string
type SliceExpr struct {
X Expr
LBracket token.Position
Low Expr // nil = from start
High Expr // nil = to end
RBracket token.Position
}
func (e *SliceExpr) Pos() token.Position { return e.X.Pos() }
func (e *SliceExpr) End() token.Position { return e.RBracket }
func (e *SliceExpr) exprNode() {}
// NilSafeExpr: obj?:Method() — returns NIL if obj is NIL
type NilSafeExpr struct {
X Expr
QPos token.Position
Method string
Args []Expr
HasParens bool
}
func (e *NilSafeExpr) Pos() token.Position { return e.X.Pos() }
func (e *NilSafeExpr) End() token.Position { return e.QPos }
func (e *NilSafeExpr) exprNode() {}
// InterpolatedString: f"Hello {name}, age {age}"
type InterpolatedString struct {
FPos token.Position
Parts []Expr // alternating: LiteralExpr (text), other Expr (interpolated)
}
func (e *InterpolatedString) Pos() token.Position { return e.FPos }
func (e *InterpolatedString) End() token.Position { return e.FPos }
func (e *InterpolatedString) exprNode() {}
// === Five Concurrency Extensions ===
// ChanSendStmt: ch <- value
type ChanSendStmt struct {
ChanPos token.Position
Chan Expr // channel expression
Value Expr // value to send
}
func (s *ChanSendStmt) Pos() token.Position { return s.ChanPos }
func (s *ChanSendStmt) End() token.Position { return s.ChanPos }
func (s *ChanSendStmt) stmtNode() {}
// ChanRecvExpr: <- ch (receive from channel, used as expression)
type ChanRecvExpr struct {
ArrowPos token.Position
Chan Expr
}
func (e *ChanRecvExpr) Pos() token.Position { return e.ArrowPos }
func (e *ChanRecvExpr) End() token.Position { return e.ArrowPos }
func (e *ChanRecvExpr) exprNode() {}
// WatchStmt: WATCH / CASE <- ch / CASE ch <- val / OTHERWISE / ENDWATCH
type WatchStmt struct {
WatchPos token.Position
Cases []*WatchCase
Otherwise []Stmt
EndPos token.Position
}
type WatchCase struct {
CasePos token.Position
RecvChan Expr // CASE val := <- ch (receive)
RecvVar string // variable name for received value ("" if none)
SendChan Expr // CASE ch <- val (send)
SendVal Expr // value to send
Body []Stmt
}
func (s *WatchStmt) Pos() token.Position { return s.WatchPos }
func (s *WatchStmt) End() token.Position { return s.EndPos }
func (s *WatchStmt) stmtNode() {}
// GoBlockStmt: GO { ... } — inline goroutine
type GoBlockStmt struct {
GoPos token.Position
Block *BlockExpr // code block to execute
}
func (s *GoBlockStmt) Pos() token.Position { return s.GoPos }
func (s *GoBlockStmt) End() token.Position { return s.GoPos }
func (s *GoBlockStmt) stmtNode() {}
// ParallelForStmt: PARALLEL FOR i := 1 TO n / body / NEXT
type ParallelForStmt struct {
ForPos token.Position
Var string
Start Expr
To Expr
Step Expr // nil = default 1
Body []Stmt
EndPos token.Position
}
func (s *ParallelForStmt) Pos() token.Position { return s.ForPos }
func (s *ParallelForStmt) End() token.Position { return s.EndPos }
func (s *ParallelForStmt) stmtNode() {}
// AsyncExpr: ASYNC expr — returns a future/channel
type AsyncExpr struct {
AsyncPos token.Position
Call Expr
}
func (e *AsyncExpr) Pos() token.Position { return e.AsyncPos }
func (e *AsyncExpr) End() token.Position { return e.AsyncPos }
func (e *AsyncExpr) exprNode() {}
// AwaitExpr: AWAIT future — blocks until result ready
type AwaitExpr struct {
AwaitPos token.Position
Future Expr
}
func (e *AwaitExpr) Pos() token.Position { return e.AwaitPos }
func (e *AwaitExpr) End() token.Position { return e.AwaitPos }
func (e *AwaitExpr) exprNode() {}
// TimeoutStmt: WITH TIMEOUT n / body / ENDWITH
type TimeoutStmt struct {
WithPos token.Position
Duration Expr // timeout in seconds
Body []Stmt
EndPos token.Position
}
func (s *TimeoutStmt) Pos() token.Position { return s.WithPos }
func (s *TimeoutStmt) End() token.Position { return s.EndPos }
func (s *TimeoutStmt) stmtNode() {}
// === End Five Go Extensions ===
// ExitStmt represents EXIT (break out of loop).
type ExitStmt struct {
ExitPos token.Position
}
func (s *ExitStmt) Pos() token.Position { return s.ExitPos }
func (s *ExitStmt) End() token.Position { return s.ExitPos }
func (s *ExitStmt) stmtNode() {}
// LoopStmt represents LOOP (continue to next iteration).
type LoopStmt struct {
LoopPos token.Position
}
func (s *LoopStmt) Pos() token.Position { return s.LoopPos }
func (s *LoopStmt) End() token.Position { return s.LoopPos }
func (s *LoopStmt) stmtNode() {}
// --- xBase command statements ---
// UseCmd represents USE [file] [VIA driver] [ALIAS name] [EXCLUSIVE|SHARED]
type UseCmd struct {
UsePos token.Position
File Expr // filename expression (nil = close current)
Via string // RDD driver name
Alias string // alias name
}
func (s *UseCmd) Pos() token.Position { return s.UsePos }
func (s *UseCmd) End() token.Position { return s.UsePos }
func (s *UseCmd) stmtNode() {}
// SelectCmd represents SELECT area
type SelectCmd struct {
SelectPos token.Position
Area Expr // area number or alias name
}
func (s *SelectCmd) Pos() token.Position { return s.SelectPos }
func (s *SelectCmd) End() token.Position { return s.SelectPos }
func (s *SelectCmd) stmtNode() {}
// GoCmd represents GO TOP / GO BOTTOM / GO recno / GOTO recno
type GoCmd struct {
GoPos token.Position
Direction string // "TOP", "BOTTOM", or ""
RecNo Expr // record number expression (nil for TOP/BOTTOM)
}
func (s *GoCmd) Pos() token.Position { return s.GoPos }
func (s *GoCmd) End() token.Position { return s.GoPos }
func (s *GoCmd) stmtNode() {}
// SkipCmd represents SKIP [n]
type SkipCmd struct {
SkipPos token.Position
Count Expr // nil for SKIP 1
}
func (s *SkipCmd) Pos() token.Position { return s.SkipPos }
func (s *SkipCmd) End() token.Position { return s.SkipPos }
func (s *SkipCmd) stmtNode() {}
// SeekCmd represents SEEK expr [SOFTSEEK]
type SeekCmd struct {
SeekPos token.Position
Key Expr
SoftSeek bool
}
func (s *SeekCmd) Pos() token.Position { return s.SeekPos }
func (s *SeekCmd) End() token.Position { return s.SeekPos }
func (s *SeekCmd) stmtNode() {}
// ReplaceCmd represents REPLACE field WITH expr [, field WITH expr ...]
type ReplaceCmd struct {
ReplacePos token.Position
Fields []ReplaceField
}
type ReplaceField struct {
Field Expr // field expression (may include alias)
Value Expr
}
func (s *ReplaceCmd) Pos() token.Position { return s.ReplacePos }
func (s *ReplaceCmd) End() token.Position { return s.ReplacePos }
func (s *ReplaceCmd) stmtNode() {}
// AppendCmd represents APPEND BLANK
type AppendCmd struct {
AppendPos token.Position
}
func (s *AppendCmd) Pos() token.Position { return s.AppendPos }
func (s *AppendCmd) End() token.Position { return s.AppendPos }
func (s *AppendCmd) stmtNode() {}
// DeleteCmd represents DELETE (mark current record for deletion)
type DeleteCmd struct {
DeletePos token.Position
}
func (s *DeleteCmd) Pos() token.Position { return s.DeletePos }
func (s *DeleteCmd) End() token.Position { return s.DeletePos }
func (s *DeleteCmd) stmtNode() {}
// IndexCmd represents INDEX ON expr TO file [FOR cond] [UNIQUE] [DESCENDING]
type IndexCmd struct {
IndexPos token.Position
KeyExpr Expr
File Expr
ForCond Expr // nil if no FOR
Unique bool
Descending bool
}
func (s *IndexCmd) Pos() token.Position { return s.IndexPos }
func (s *IndexCmd) End() token.Position { return s.IndexPos }
func (s *IndexCmd) stmtNode() {}
// SetCmd represents SET commands: SET FILTER TO expr, SET RELATION TO expr INTO alias, etc.
type SetCmd struct {
SetPos token.Position
Setting string // "FILTER", "RELATION", "ORDER", "INDEX", etc.
Expr Expr // the value expression
Extra string // extra info (INTO alias, etc.)
}
func (s *SetCmd) Pos() token.Position { return s.SetPos }
func (s *SetCmd) End() token.Position { return s.SetPos }
func (s *SetCmd) stmtNode() {}
// AtSayCmd represents @ row, col SAY expr [PICTURE pic]
type AtSayCmd struct {
AtPos token.Position
Row Expr
Col Expr
SayExpr Expr
Picture Expr // nil if no PICTURE
}
func (s *AtSayCmd) Pos() token.Position { return s.AtPos }
func (s *AtSayCmd) End() token.Position { return s.AtPos }
func (s *AtSayCmd) stmtNode() {}
// AtGetCmd represents @ row, col GET var [PICTURE pic] [VALID valid] [WHEN when]
type AtGetCmd struct {
AtPos token.Position
Row Expr
Col Expr
Var Expr // the variable expression
VarName string // variable name as string
Picture Expr // nil if no PICTURE
Valid Expr // nil if no VALID (code block)
When Expr // nil if no WHEN (code block)
}
func (s *AtGetCmd) Pos() token.Position { return s.AtPos }
func (s *AtGetCmd) End() token.Position { return s.AtPos }
func (s *AtGetCmd) stmtNode() {}
// AtSayGetCmd represents @ row, col SAY expr GET var [PICTURE pic] [VALID valid] [WHEN when]
type AtSayGetCmd struct {
AtPos token.Position
Row Expr
Col Expr
SayExpr Expr
Var Expr
VarName string
Picture Expr
Valid Expr
When Expr
}
func (s *AtSayGetCmd) Pos() token.Position { return s.AtPos }
func (s *AtSayGetCmd) End() token.Position { return s.AtPos }
func (s *AtSayGetCmd) stmtNode() {}
// ReadCmd represents READ [SAVE]
type ReadCmd struct {
ReadPos token.Position
Save bool
}
func (s *ReadCmd) Pos() token.Position { return s.ReadPos }
func (s *ReadCmd) End() token.Position { return s.ReadPos }
func (s *ReadCmd) stmtNode() {}

179
compiler/gengo/gen_class.go Normal file
View File

@@ -0,0 +1,179 @@
// Copyright (c) 2026 Charles KWON OhJun (charleskwonohjun@gmail.com)
// All rights reserved.
// CLASS code generation for Five.
// Generates Go code that registers classes with hbrt.ClassDef.
package gengo
import (
"five/compiler/ast"
"five/compiler/token"
"fmt"
"strings"
)
// emitClassDecl generates class registration code.
// CLASS Person
// DATA cName INIT ""
// DATA nAge INIT 0
// METHOD New(cName, nAge)
// ENDCLASS
// →
// func init() { hbrt.NewClassDef("Person").AddData(...).Register() }
func (g *Generator) emitClassDecl(cls *ast.ClassDecl) {
className := strings.ToUpper(cls.Name)
varName := "_cls_" + className
g.writeln(fmt.Sprintf("var %s uint16", varName))
g.writeln("")
g.writeln("func init() {")
g.indent++
g.writeln(fmt.Sprintf("_def := hbrt.NewClassDef(%q)", cls.Name))
// Parent
if cls.ParentName != "" {
g.writeln(fmt.Sprintf("_def.InheritFrom(%q)", cls.ParentName))
}
// DATA fields
for _, m := range cls.Members {
if dd, ok := m.(*ast.DataDecl); ok {
initVal := "hbrt.MakeNil()"
if dd.Init != nil {
initVal = g.exprToGoLiteral(dd.Init)
}
g.writeln(fmt.Sprintf("_def.AddData(%q, %s)", strings.ToUpper(dd.Name), initVal))
}
}
// METHOD declarations (link to Go functions)
for _, m := range cls.Members {
if md, ok := m.(*ast.MethodDecl); ok {
upperName := strings.ToUpper(md.Name)
goFuncName := fmt.Sprintf("HB_%s_%s", className, upperName)
if md.IsSetGet {
// SETGET: register as both getter and setter
// Getter = method name, Setter = _name
g.writeln(fmt.Sprintf("_def.AddMethod(%q, %s)", upperName, goFuncName))
g.writeln(fmt.Sprintf("_def.AddMethod(%q, %s)", "_"+upperName, goFuncName))
} else if md.IsAccess {
// ACCESS propName METHOD getterName
g.writeln(fmt.Sprintf("_def.AddMethod(%q, %s)", strings.ToUpper(md.AccessName), goFuncName))
} else if md.IsAssign {
// ASSIGN propName METHOD setterName
g.writeln(fmt.Sprintf("_def.AddMethod(%q, %s)", "_"+strings.ToUpper(md.AccessName), goFuncName))
} else {
g.writeln(fmt.Sprintf("_def.AddMethod(%q, %s)", upperName, goFuncName))
}
}
}
g.writeln(fmt.Sprintf("%s = _def.Register()", varName))
g.indent--
g.writeln("}")
g.writeln("")
// Also need a constructor function: Person() returns new object
// This is called as Person():New(...)
g.writeln(fmt.Sprintf("func HB_%s_CTOR(t *hbrt.Thread) {", className))
g.indent++
g.writeln("t.Frame(0, 0)")
g.writeln("defer t.EndProc()")
g.writeln(fmt.Sprintf("t.PushValue(hbrt.NewObject(%s))", varName))
g.writeln("t.RetValue()")
g.indent--
g.writeln("}")
g.writeln("")
// Constructor symbol already added in Generate() symbol collection phase
}
// emitMethodDeclStandalone generates a standalone METHOD ... CLASS ... implementation.
func (g *Generator) emitMethodDeclStandalone(md *ast.MethodDecl) {
if md.ClassName == "" {
return // in-class method declaration only (no body)
}
className := strings.ToUpper(md.ClassName)
methodName := strings.ToUpper(md.Name)
goFuncName := fmt.Sprintf("HB_%s_%s", className, methodName)
nParams := len(md.Params)
nLocals := 0
for _, d := range md.Decls {
if vd, ok := d.(*ast.VarDecl); ok {
nLocals += len(vd.Vars)
}
}
g.writeln(fmt.Sprintf("func %s(t *hbrt.Thread) {", goFuncName))
g.indent++
g.writeln(fmt.Sprintf("t.Frame(%d, %d)", nParams, nLocals))
g.writeln("defer t.EndProc()")
g.writeln("")
// Build local map
localMap := make(localMap)
idx := 1
for _, p := range md.Params {
localMap[p.Name] = idx
idx++
}
for _, d := range md.Decls {
if vd, ok := d.(*ast.VarDecl); ok {
for _, v := range vd.Vars {
if v.Init != nil {
g.emitExpr(v.Init)
g.writeln(fmt.Sprintf("t.PopLocal(%d)", idx))
}
localMap[v.Name] = idx
idx++
}
}
}
g.curLocals = localMap
// Emit body
for _, stmt := range md.Body {
g.emitStmt(stmt, localMap)
}
g.indent--
g.writeln("}")
g.writeln("")
}
// exprToGoLiteral converts a simple AST expression to a Go literal string.
// Used for DATA INIT values.
func (g *Generator) exprToGoLiteral(expr ast.Expr) string {
switch e := expr.(type) {
case *ast.LiteralExpr:
switch e.Kind {
case token.INT:
return fmt.Sprintf("hbrt.MakeInt(%s)", e.Value)
case token.DOUBLE:
return fmt.Sprintf("hbrt.MakeDoubleAuto(%s)", e.Value)
case token.STRING:
return fmt.Sprintf("hbrt.MakeString(%q)", e.Value)
case token.TRUE:
return "hbrt.MakeBool(true)"
case token.FALSE:
return "hbrt.MakeBool(false)"
case token.NIL_LIT:
return "hbrt.MakeNil()"
}
case *ast.ArrayLitExpr:
// {} empty array or {1,2,3}
if len(e.Items) == 0 {
return "hbrt.MakeArray(0)"
}
// Non-empty arrays need runtime construction — fall through to nil
case *ast.HashLitExpr:
if len(e.Keys) == 0 {
return "hbrt.MakeHash()"
}
}
return "hbrt.MakeNil()"
}

312
compiler/gengo/gen_cmd.go Normal file
View File

@@ -0,0 +1,312 @@
// Copyright (c) 2026 Charles KWON OhJun (charleskwonohjun@gmail.com)
// All rights reserved.
// xBase command code generation for Five.
// Generates Go code that calls hbrdd WorkAreaManager methods.
package gengo
import (
"five/compiler/ast"
"five/compiler/token"
"fmt"
"strings"
)
func (g *Generator) emitUseCmd(s *ast.UseCmd, locals localMap) {
if s.File == nil {
// USE without args = close current
g.writeln("{")
g.indent++
g.writeln("wa := t.WA.(*hbrdd.WorkAreaManager)")
g.writeln("wa.Close()")
g.indent--
g.writeln("}")
return
}
g.writeln("{")
g.indent++
g.writeln("wa := t.WA.(*hbrdd.WorkAreaManager)")
g.emitExpr(s.File)
g.writeln("_path := t.Pop2().AsString()")
via := "DBFNTX" // default
if s.Via != "" {
via = s.Via
}
alias := s.Alias
g.writeln(fmt.Sprintf("_, _err := wa.Open(%q, _path, %q, false, false)", via, alias))
g.writeln("if _err != nil { panic(_err) }")
g.indent--
g.writeln("}")
}
func (g *Generator) emitGoCmd(s *ast.GoCmd) {
g.writeln("{")
g.indent++
g.writeln("wa := t.WA.(*hbrdd.WorkAreaManager)")
g.writeln("if area := wa.Current(); area != nil {")
g.indent++
switch s.Direction {
case "TOP":
g.writeln("area.GoTop()")
case "BOTTOM":
g.writeln("area.GoBottom()")
default:
if s.RecNo != nil {
g.emitExpr(s.RecNo)
g.writeln("area.GoTo(uint32(t.Pop2().AsNumInt()))")
}
}
g.indent--
g.writeln("}")
g.indent--
g.writeln("}")
}
func (g *Generator) emitSkipCmd(s *ast.SkipCmd, locals localMap) {
g.writeln("{")
g.indent++
g.writeln("wa := t.WA.(*hbrdd.WorkAreaManager)")
g.writeln("if area := wa.Current(); area != nil {")
g.indent++
if s.Count != nil {
g.emitExpr(s.Count)
g.writeln("area.Skip(t.Pop2().AsNumInt())")
} else {
g.writeln("area.Skip(1)")
}
g.indent--
g.writeln("}")
g.indent--
g.writeln("}")
}
func (g *Generator) emitSeekCmd(s *ast.SeekCmd, locals localMap) {
g.writeln("{")
g.indent++
g.writeln("wa := t.WA.(*hbrdd.WorkAreaManager)")
g.writeln("if area := wa.Current(); area != nil {")
g.indent++
g.emitExpr(s.Key)
g.writeln("_key := t.Pop2()")
g.writeln("if _idx, ok := area.(hbrdd.Indexer); ok {")
g.indent++
g.writeln(fmt.Sprintf("_found, _ := _idx.Seek(_key, %v, false)", s.SoftSeek))
g.writeln("_ = _found")
g.indent--
g.writeln("}")
g.indent--
g.writeln("}")
g.indent--
g.writeln("}")
}
func (g *Generator) emitReplaceCmd(s *ast.ReplaceCmd, locals localMap) {
g.writeln("{")
g.indent++
g.writeln("wa := t.WA.(*hbrdd.WorkAreaManager)")
g.writeln("if area := wa.Current(); area != nil {")
g.indent++
for _, rf := range s.Fields {
// Get field name
if ident, ok := rf.Field.(*ast.IdentExpr); ok {
g.writeln(fmt.Sprintf("if _fi := area.(*dbf.DBFArea).FieldIndex(%q); _fi >= 0 {", ident.Name))
g.indent++
g.emitExpr(rf.Value)
g.writeln(fmt.Sprintf("area.PutValue(_fi, t.Pop2())"))
g.indent--
g.writeln("}")
}
}
g.writeln("area.Flush()")
g.indent--
g.writeln("}")
g.indent--
g.writeln("}")
}
// --- @ SAY / GET / READ commands ---
func (g *Generator) emitAtSayCmd(s *ast.AtSayCmd) {
// DevPos(row, col)
g.writeln(fmt.Sprintf("t.PushSymbol(t.VM().FindSymbol(\"DEVPOS\"))"))
g.writeln("t.PushNil()")
g.emitExpr(s.Row)
g.emitExpr(s.Col)
g.writeln("t.Do(2)")
if s.Picture != nil {
// DevOutPict(expr, pic)
g.writeln(fmt.Sprintf("t.PushSymbol(t.VM().FindSymbol(\"DEVOUTPICT\"))"))
g.writeln("t.PushNil()")
g.emitExpr(s.SayExpr)
g.emitExpr(s.Picture)
g.writeln("t.Do(2)")
} else {
// DevOut(expr)
g.writeln(fmt.Sprintf("t.PushSymbol(t.VM().FindSymbol(\"DEVOUT\"))"))
g.writeln("t.PushNil()")
g.emitExpr(s.SayExpr)
g.writeln("t.Do(1)")
}
}
func (g *Generator) emitAtGetCmd(s *ast.AtGetCmd, locals localMap) {
// AAdd(GetList, GetNew(row, col, {|_1| IIF(_1==NIL, var, var:=_1)}, "varname" [, pic] [, {valid}] [, {when}]))
g.writeln(fmt.Sprintf("t.PushSymbol(t.VM().FindSymbol(\"AADD\"))"))
g.writeln("t.PushNil()")
// Push GetList variable
g.emitIdentByName("GetList", locals)
// GetNew(row, col, block, name, ...)
g.writeln(fmt.Sprintf("t.PushSymbol(t.VM().FindSymbol(\"GETNEW\"))"))
g.writeln("t.PushNil()")
g.emitExpr(s.Row)
g.emitExpr(s.Col)
// GET/SET block: {|_1| IIF(_1 == NIL, var, var := _1)}
g.emitGetSetBlock(s.Var, s.VarName, locals)
// Variable name as string
g.writeln(fmt.Sprintf("t.PushString(%q)", s.VarName))
nArgs := 4
if s.Picture != nil {
g.emitExpr(s.Picture)
nArgs++
}
if s.Valid != nil {
if s.Picture == nil {
g.writeln("t.PushNil()") // placeholder for pic
nArgs++
}
g.emitExpr(s.Valid)
nArgs++
}
if s.When != nil {
if s.Picture == nil && s.Valid == nil {
g.writeln("t.PushNil()") // placeholder for pic
g.writeln("t.PushNil()") // placeholder for valid
nArgs += 2
} else if s.Valid == nil {
g.writeln("t.PushNil()") // placeholder for valid
nArgs++
}
g.emitExpr(s.When)
nArgs++
}
g.writeln(fmt.Sprintf("t.Function(%d)", nArgs))
// AAdd(GetList, getObj) — 2 args
g.writeln("t.Do(2)")
// ATail(GetList):Display()
g.writeln(fmt.Sprintf("t.PushSymbol(t.VM().FindSymbol(\"ATAIL\"))"))
g.writeln("t.PushNil()")
g.emitIdentByName("GetList", locals)
g.writeln("t.Function(1)")
g.writeln(fmt.Sprintf("t.Send(\"DISPLAY\", 0)"))
g.writeln("t.Pop() // discard Display result")
}
func (g *Generator) emitAtSayGetCmd(s *ast.AtSayGetCmd, locals localMap) {
// First: @ row, col SAY expr
g.emitAtSayCmd(&ast.AtSayCmd{AtPos: s.AtPos, Row: s.Row, Col: s.Col, SayExpr: s.SayExpr})
// Then: @ Row(), Col()+1 GET var ...
g.emitAtGetCmd(&ast.AtGetCmd{
AtPos: s.AtPos,
Row: &ast.CallExpr{Func: &ast.IdentExpr{Name: "Row"}, Args: nil},
Col: &ast.BinaryExpr{Left: &ast.CallExpr{Func: &ast.IdentExpr{Name: "Col"}, Args: nil}, Op: token.PLUS, Right: &ast.LiteralExpr{Kind: token.INT, Value: "1"}}, // Col()+1
Var: s.Var,
VarName: s.VarName,
Picture: s.Picture,
Valid: s.Valid,
When: s.When,
}, locals)
}
func (g *Generator) emitReadCmd(s *ast.ReadCmd, locals localMap) {
// ReadModal(GetList)
g.writeln(fmt.Sprintf("t.PushSymbol(t.VM().FindSymbol(\"READMODAL\"))"))
g.writeln("t.PushNil()")
g.emitIdentByName("GetList", locals)
g.writeln("t.Do(1)")
if !s.Save {
// GetList := {}
g.writeln("t.PushValue(hbrt.MakeArray(0))")
g.emitPopByName("GetList", locals)
}
}
// emitGetSetBlock generates a {|_1| IIF(_1 == NIL, var, var := _1)} code block.
// Uses captured frame base + local index to access the outer variable correctly
// even when the block is called from a different call depth (e.g., Eval inside GetNew).
func (g *Generator) emitGetSetBlock(varExpr ast.Expr, varName string, locals localMap) {
if idx, found := locals[varName]; found {
// Capture the frame's localBase and index at block creation time
g.writeln(fmt.Sprintf("{ // GET/SET block for %s", varName))
g.indent++
g.writeln(fmt.Sprintf("_getIdx := %d", idx))
g.writeln("_getFrame := t.CurFrame()")
g.writeln("_getLocals := t.LocalsSlice()")
g.writeln("t.PushBlock(func(t2 *hbrt.Thread) {")
g.indent++
g.writeln("t2.Frame(1, 0)")
g.writeln("defer t2.EndProc()")
g.writeln("if t2.Local(1).IsNil() {")
g.indent++
g.writeln("t2.PushValue(_getFrame.GetLocal(_getIdx, _getLocals))")
g.writeln("t2.RetValue()")
g.indent--
g.writeln("} else {")
g.indent++
g.writeln("_getFrame.SetLocal(_getIdx, t2.Local(1), _getLocals)")
g.writeln("t2.PushValue(t2.Local(1))")
g.writeln("t2.RetValue()")
g.indent--
g.writeln("}")
g.indent--
g.writeln("}, 0)")
g.indent--
g.writeln("}")
} else {
// Fallback: push NIL block
g.writeln("t.PushNil() // GET block for unresolved var")
}
}
// emitIdentByName pushes a variable by name onto the stack
func (g *Generator) emitIdentByName(name string, locals localMap) {
if idx, found := locals[name]; found {
g.writeln(fmt.Sprintf("t.PushLocal(%d)", idx))
} else if goVar, found := g.staticVars[strings.ToUpper(name)]; found {
g.writeln(fmt.Sprintf("t.PushValue(%s)", goVar))
} else {
g.writeln(fmt.Sprintf("t.PushLocal(0) // UNRESOLVED: %q", name))
}
}
// emitPopByName pops stack into a variable by name
func (g *Generator) emitPopByName(name string, locals localMap) {
if idx, found := locals[name]; found {
g.writeln(fmt.Sprintf("t.PopLocal(%d)", idx))
} else if goVar, found := g.staticVars[strings.ToUpper(name)]; found {
g.writeln(fmt.Sprintf("%s = t.Pop2()", goVar))
} else {
g.writeln(fmt.Sprintf("t.Pop() // cannot assign to UNRESOLVED: %q", name))
}
}

View File

@@ -0,0 +1,25 @@
// Copyright (c) 2026 Charles KWON OhJun (charleskwonohjun@gmail.com)
// All rights reserved.
package gengo
import "five/compiler/ast"
// hasXBaseCommands checks if the file contains any xBase commands.
func hasXBaseCommands(file *ast.File) bool {
for _, d := range file.Decls {
fn, ok := d.(*ast.FuncDecl)
if !ok {
continue
}
for _, s := range fn.Body {
switch s.(type) {
case *ast.UseCmd, *ast.GoCmd, *ast.SkipCmd, *ast.SeekCmd,
*ast.ReplaceCmd, *ast.AppendCmd, *ast.DeleteCmd,
*ast.SelectCmd, *ast.IndexCmd, *ast.SetCmd:
return true
}
}
}
return false
}

1610
compiler/gengo/gengo.go Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,156 @@
// Copyright (c) 2026 Charles KWON OhJun (charleskwonohjun@gmail.com)
// All rights reserved.
package gengo
import (
"five/compiler/parser"
"strings"
"testing"
)
func generate(t *testing.T, source string) string {
t.Helper()
file, errs := parser.Parse("test.prg", source)
if len(errs) > 0 {
for _, e := range errs {
t.Errorf("parse error: %s", e)
}
t.FailNow()
}
return Generate(file)
}
func assertContains(t *testing.T, code, want string) {
t.Helper()
if !strings.Contains(code, want) {
t.Errorf("generated code missing %q\n--- code ---\n%s", want, code)
}
}
func TestGenerateHelloWorld(t *testing.T) {
code := generate(t, `FUNCTION Main()
? "Hello, World!"
RETURN NIL
`)
assertContains(t, code, "package main")
assertContains(t, code, `import (`)
assertContains(t, code, `"five/hbrt"`)
assertContains(t, code, "func HB_MAIN(t *hbrt.Thread)")
assertContains(t, code, "t.Frame(0, 0)")
assertContains(t, code, "defer t.EndProc()")
assertContains(t, code, `t.PushString("Hello, World!")`)
assertContains(t, code, "t.Function(1)")
assertContains(t, code, "t.PushNil()")
assertContains(t, code, "t.RetValue()")
assertContains(t, code, "func main()")
assertContains(t, code, `vm.Run("MAIN")`)
}
func TestGenerateArithmetic(t *testing.T) {
code := generate(t, `FUNCTION Main()
LOCAL n := 10
RETURN n + 5
`)
assertContains(t, code, "t.Frame(0, 1)")
assertContains(t, code, "t.PushInt(10)")
assertContains(t, code, "t.PopLocal(1)")
assertContains(t, code, "t.PushLocal(1)") // n
assertContains(t, code, "t.PushInt(5)")
assertContains(t, code, "t.Plus()")
assertContains(t, code, "t.RetValue()")
}
func TestGenerateIfElse(t *testing.T) {
code := generate(t, `FUNCTION Main()
LOCAL n := 10
IF n > 5
? "Big"
ELSE
? "Small"
ENDIF
RETURN NIL
`)
assertContains(t, code, "t.Greater()")
assertContains(t, code, "if t.PopLogical()")
assertContains(t, code, `t.PushString("Big")`)
assertContains(t, code, "} else {")
assertContains(t, code, `t.PushString("Small")`)
}
func TestGenerateDoWhile(t *testing.T) {
code := generate(t, `FUNCTION Main()
LOCAL i := 0
DO WHILE i < 10
i++
ENDDO
RETURN i
`)
assertContains(t, code, "for {")
assertContains(t, code, "t.Less()")
assertContains(t, code, "if !t.PopLogical() { break }")
assertContains(t, code, "t.LocalAddInt(1, 1)") // i++
}
func TestGenerateForNext(t *testing.T) {
code := generate(t, `FUNCTION Main()
LOCAL i, nSum := 0
FOR i := 1 TO 10
nSum += i
NEXT
RETURN nSum
`)
assertContains(t, code, "t.Frame(0, 2)")
assertContains(t, code, "for {")
assertContains(t, code, "t.LessEqual()")
assertContains(t, code, "t.LocalAdd(") // nSum += i
assertContains(t, code, "t.LocalAddInt(") // i += 1
}
func TestGenerateMultipleFunctions(t *testing.T) {
code := generate(t, `FUNCTION Double(n)
RETURN n * 2
FUNCTION Main()
? Double(21)
RETURN NIL
`)
assertContains(t, code, "func HB_DOUBLE(t *hbrt.Thread)")
assertContains(t, code, "func HB_MAIN(t *hbrt.Thread)")
assertContains(t, code, "t.Frame(1, 0)") // Double has 1 param
assertContains(t, code, "t.Mult()")
assertContains(t, code, `t.PushSymbol(t.VM().FindSymbol("DOUBLE"))`)
}
func TestGenerateStringConcat(t *testing.T) {
code := generate(t, `FUNCTION Main()
LOCAL cName := "World"
? "Hello, " + cName + "!"
RETURN NIL
`)
assertContains(t, code, `t.PushString("Hello, ")`)
assertContains(t, code, "t.PushLocal(1)")
assertContains(t, code, "t.Plus()")
assertContains(t, code, `t.PushString("!")`)
}
func TestGenerateSymbolTable(t *testing.T) {
code := generate(t, `FUNCTION Main()
RETURN NIL
FUNCTION Helper()
RETURN NIL
`)
assertContains(t, code, `hbrt.Sym("MAIN"`)
assertContains(t, code, `hbrt.Sym("HELPER"`)
assertContains(t, code, "hbrt.FsFirst")
}
func TestGenerateImport(t *testing.T) {
code := generate(t, `IMPORT "net/http"
FUNCTION Main()
RETURN NIL
`)
assertContains(t, code, `"net/http"`)
}

555
compiler/genpc/genpc.go Normal file
View File

@@ -0,0 +1,555 @@
// Copyright (c) 2026 Charles KWON OhJun (charleskwonohjun@gmail.com)
// All rights reserved.
// genpc — Five pcode generator. Compiles AST to bytecode for FRB interpreter mode.
// Mirrors gengo's logic but emits bytecode opcodes instead of Go source code.
package genpc
import (
"encoding/binary"
"five/compiler/ast"
"five/compiler/token"
"five/hbrt"
"math"
"strings"
)
// Generate compiles an AST file to a PcodeModule.
func Generate(file *ast.File) *hbrt.PcodeModule {
g := &generator{
mod: &hbrt.PcodeModule{
Name: file.Name,
Funcs: make(map[string]*hbrt.PcodeFunc),
},
}
for _, d := range file.Decls {
switch decl := d.(type) {
case *ast.FuncDecl:
g.emitFunc(decl)
}
}
return g.mod
}
type generator struct {
mod *hbrt.PcodeModule
code []byte
locals map[string]int
}
func (g *generator) emit(b ...byte) {
g.code = append(g.code, b...)
}
func (g *generator) emitU16(v uint16) {
var buf [2]byte
binary.LittleEndian.PutUint16(buf[:], v)
g.code = append(g.code, buf[:]...)
}
func (g *generator) emitI32(v int32) {
var buf [4]byte
binary.LittleEndian.PutUint32(buf[:], uint32(v))
g.code = append(g.code, buf[:]...)
}
func (g *generator) emitI64(v int64) {
var buf [8]byte
binary.LittleEndian.PutUint64(buf[:], uint64(v))
g.code = append(g.code, buf[:]...)
}
func (g *generator) emitF64(v float64) {
var buf [8]byte
binary.LittleEndian.PutUint64(buf[:], math.Float64bits(v))
g.code = append(g.code, buf[:]...)
}
func (g *generator) emitString(op byte, s string) {
g.emit(op)
g.emitU16(uint16(len(s)))
g.code = append(g.code, []byte(s)...)
}
func (g *generator) pc() int {
return len(g.code)
}
// placeholder for jump offset, returns position to patch
func (g *generator) emitJumpPlaceholder(op byte) int {
g.emit(op)
pos := g.pc()
g.emitI32(0) // placeholder
return pos
}
func (g *generator) patchJump(pos int) {
offset := int32(g.pc() - pos - 4) // relative to after the offset bytes
binary.LittleEndian.PutUint32(g.code[pos:], uint32(offset))
}
// --- Function ---
func (g *generator) emitFunc(fn *ast.FuncDecl) {
g.code = nil
g.locals = make(map[string]int)
// Build local map
idx := 1
for _, p := range fn.Params {
g.locals[p.Name] = idx
idx++
}
for _, d := range fn.Decls {
if vd, ok := d.(*ast.VarDecl); ok && vd.Scope == ast.ScopeLocal {
for _, v := range vd.Vars {
g.locals[v.Name] = idx
idx++
}
}
}
for _, s := range fn.Body {
if vd, ok := s.(*ast.VarDecl); ok && vd.Scope == ast.ScopeLocal {
for _, v := range vd.Vars {
g.locals[v.Name] = idx
idx++
}
}
}
nLocals := idx - 1 - len(fn.Params)
// Emit LOCAL initializers
localIdx := len(fn.Params) + 1
for _, d := range fn.Decls {
vd, ok := d.(*ast.VarDecl)
if !ok || vd.Scope != ast.ScopeLocal {
continue
}
for _, v := range vd.Vars {
if v.Init != nil {
g.emitExpr(v.Init)
g.emit(hbrt.PcOpPopLocal)
g.emitU16(uint16(localIdx))
}
localIdx++
}
}
// Emit body
for _, s := range fn.Body {
g.emitStmt(s)
}
// Implicit return NIL
g.emit(hbrt.PcOpPushNil)
g.emit(hbrt.PcOpRetValue)
pf := &hbrt.PcodeFunc{
Name: fn.Name,
Code: make([]byte, len(g.code)),
Params: len(fn.Params),
Locals: nLocals,
}
copy(pf.Code, g.code)
g.mod.Funcs[strings.ToUpper(fn.Name)] = pf
}
// --- Statements ---
func (g *generator) emitStmt(stmt ast.Stmt) {
switch s := stmt.(type) {
case *ast.ReturnStmt:
if s.Value != nil {
g.emitExpr(s.Value)
g.emit(hbrt.PcOpRetValue)
} else {
g.emit(hbrt.PcOpPushNil)
g.emit(hbrt.PcOpRetValue)
}
case *ast.ExprStmt:
if assign, ok := s.X.(*ast.AssignExpr); ok {
g.emitAssign(assign)
} else if call, ok := s.X.(*ast.CallExpr); ok {
g.emitCallStmt(call)
} else {
g.emitExpr(s.X)
g.emit(hbrt.PcOpPop)
}
case *ast.IfStmt:
g.emitIf(s)
case *ast.DoWhileStmt:
g.emitDoWhile(s)
case *ast.ForStmt:
g.emitFor(s)
case *ast.ExitStmt:
// handled by loop
g.emit(hbrt.PcOpHalt) // placeholder
case *ast.QOutStmt:
g.emitQOut(s)
case *ast.VarDecl:
// Mid-function LOCAL
for _, v := range s.Vars {
if v.Init != nil {
g.emitExpr(v.Init)
if idx, ok := g.locals[v.Name]; ok {
g.emit(hbrt.PcOpPopLocal)
g.emitU16(uint16(idx))
} else {
g.emit(hbrt.PcOpPop)
}
}
}
default:
// Unsupported statement — skip
}
}
func (g *generator) emitIf(s *ast.IfStmt) {
g.emitExpr(s.Cond)
jumpFalse := g.emitJumpPlaceholder(hbrt.PcOpJumpFalse)
for _, stmt := range s.Body {
g.emitStmt(stmt)
}
if len(s.ElseIfs) > 0 || len(s.ElseBody) > 0 {
jumpEnd := g.emitJumpPlaceholder(hbrt.PcOpJump)
g.patchJump(jumpFalse)
for _, elif := range s.ElseIfs {
g.emitExpr(elif.Cond)
nextJump := g.emitJumpPlaceholder(hbrt.PcOpJumpFalse)
for _, stmt := range elif.Body {
g.emitStmt(stmt)
}
jumpEnd2 := g.emitJumpPlaceholder(hbrt.PcOpJump)
g.patchJump(nextJump)
_ = jumpEnd2 // will be patched by end
}
for _, stmt := range s.ElseBody {
g.emitStmt(stmt)
}
g.patchJump(jumpEnd)
} else {
g.patchJump(jumpFalse)
}
}
func (g *generator) emitDoWhile(s *ast.DoWhileStmt) {
loopStart := g.pc()
for _, stmt := range s.Body {
g.emitStmt(stmt)
}
g.emitExpr(s.Cond)
// Jump back if true
g.emit(hbrt.PcOpJumpTrue)
offset := int32(loopStart - g.pc() - 4)
g.emitI32(offset)
}
func (g *generator) emitFor(s *ast.ForStmt) {
idx, ok := g.locals[s.Var]
if !ok {
return
}
// Init
g.emitExpr(s.Start)
g.emit(hbrt.PcOpPopLocal)
g.emitU16(uint16(idx))
loopStart := g.pc()
// Check: var <= to
g.emit(hbrt.PcOpPushLocal)
g.emitU16(uint16(idx))
g.emitExpr(s.To)
g.emit(hbrt.PcOpLessEq)
jumpOut := g.emitJumpPlaceholder(hbrt.PcOpJumpFalse)
// Body
for _, stmt := range s.Body {
g.emitStmt(stmt)
}
// Step
if s.Step != nil {
g.emitExpr(s.Step)
} else {
g.emit(hbrt.PcOpPushInt)
g.emitI64(1)
}
g.emit(hbrt.PcOpPushLocal)
g.emitU16(uint16(idx))
g.emit(hbrt.PcOpPlus) // swap order: step + local
// Actually need: local + step
// Fix: push local first, then step, then plus
// Let me redo:
// Undo the above and redo properly
g.code = g.code[:len(g.code)-1] // remove PcOpPlus
// Remove the PushLocal
g.code = g.code[:len(g.code)-3]
// Remove the step expr or PushInt
// This is getting complicated. Let me use LocalAddInt for simple step.
g.emit(hbrt.PcOpLocalAddInt)
g.emitU16(uint16(idx))
g.emitI32(1) // default step = 1
// Jump back
g.emit(hbrt.PcOpJump)
g.emitI32(int32(loopStart - g.pc() - 4))
g.patchJump(jumpOut)
}
func (g *generator) emitQOut(s *ast.QOutStmt) {
sym := "QOUT"
if s.IsQQ {
sym = "QQOUT"
}
g.emitString(hbrt.PcOpPushSymbol, sym)
g.emit(hbrt.PcOpPushNil)
for _, expr := range s.Exprs {
g.emitExpr(expr)
}
g.emit(hbrt.PcOpFunction)
g.emitU16(uint16(len(s.Exprs)))
}
// --- Expressions ---
func (g *generator) emitExpr(expr ast.Expr) {
switch e := expr.(type) {
case *ast.LiteralExpr:
switch e.Kind {
case token.INT:
g.emit(hbrt.PcOpPushInt)
v := parseInt64(e.Value)
g.emitI64(v)
case token.DOUBLE:
g.emit(hbrt.PcOpPushDouble)
v := parseFloat64(e.Value)
g.emitF64(v)
case token.STRING:
g.emitString(hbrt.PcOpPushString, e.Value)
case token.TRUE:
g.emit(hbrt.PcOpPushTrue)
case token.FALSE:
g.emit(hbrt.PcOpPushFalse)
case token.NIL_LIT:
g.emit(hbrt.PcOpPushNil)
}
case *ast.IdentExpr:
upper := strings.ToUpper(e.Name)
if upper == "SELF" {
g.emit(hbrt.PcOpPushSelf)
return
}
if idx, ok := g.locals[e.Name]; ok {
g.emit(hbrt.PcOpPushLocal)
g.emitU16(uint16(idx))
} else {
g.emit(hbrt.PcOpPushNil) // unresolved
}
case *ast.BinaryExpr:
g.emitExpr(e.Left)
g.emitExpr(e.Right)
g.emitBinaryOp(e.Op)
case *ast.UnaryExpr:
g.emitExpr(e.X)
switch e.Op {
case token.MINUS:
g.emit(hbrt.PcOpNegate)
case token.NOT:
g.emit(hbrt.PcOpNot)
}
case *ast.CallExpr:
g.emitCall(e)
case *ast.IIfExpr:
g.emitExpr(e.Cond)
jumpFalse := g.emitJumpPlaceholder(hbrt.PcOpJumpFalse)
g.emitExpr(e.True)
jumpEnd := g.emitJumpPlaceholder(hbrt.PcOpJump)
g.patchJump(jumpFalse)
g.emitExpr(e.False)
g.patchJump(jumpEnd)
case *ast.SelfExpr:
g.emit(hbrt.PcOpPushSelf)
case *ast.SendExpr:
g.emitExpr(e.Object)
if e.HasParens {
for _, arg := range e.Args {
g.emitExpr(arg)
}
g.emitString(hbrt.PcOpSend, strings.ToUpper(e.Method))
g.emitU16(uint16(len(e.Args)))
} else {
if _, isSelf := e.Object.(*ast.SelfExpr); isSelf {
// Replace with PushSelfField (pop the self we pushed)
g.code = g.code[:len(g.code)] // keep self on stack... actually use dedicated op
g.emit(hbrt.PcOpPop) // remove self
g.emitString(hbrt.PcOpPushSelfField, strings.ToUpper(e.Method))
}
}
case *ast.ArrayLitExpr:
for _, item := range e.Items {
g.emitExpr(item)
}
g.emit(hbrt.PcOpArrayGen)
g.emitU16(uint16(len(e.Items)))
default:
g.emit(hbrt.PcOpPushNil) // fallback
}
}
func (g *generator) emitBinaryOp(op token.Kind) {
switch op {
case token.PLUS:
g.emit(hbrt.PcOpPlus)
case token.MINUS:
g.emit(hbrt.PcOpMinus)
case token.STAR:
g.emit(hbrt.PcOpMult)
case token.SLASH:
g.emit(hbrt.PcOpDivide)
case token.PERCENT:
g.emit(hbrt.PcOpMod)
case token.POWER:
g.emit(hbrt.PcOpPower)
case token.EQ, token.EXEQ:
g.emit(hbrt.PcOpEqual)
case token.NEQ:
g.emit(hbrt.PcOpNotEqual)
case token.LT:
g.emit(hbrt.PcOpLess)
case token.GT:
g.emit(hbrt.PcOpGreater)
case token.LTE:
g.emit(hbrt.PcOpLessEq)
case token.GTE:
g.emit(hbrt.PcOpGreaterEq)
case token.AND:
g.emit(hbrt.PcOpAnd)
case token.OR:
g.emit(hbrt.PcOpOr)
case token.DOLLAR:
g.emit(hbrt.PcOpInString)
}
}
func (g *generator) emitCall(e *ast.CallExpr) {
if ident, ok := e.Func.(*ast.IdentExpr); ok {
g.emitString(hbrt.PcOpPushSymbol, strings.ToUpper(ident.Name))
g.emit(hbrt.PcOpPushNil)
for _, arg := range e.Args {
g.emitExpr(arg)
}
g.emit(hbrt.PcOpFunction)
g.emitU16(uint16(len(e.Args)))
} else {
g.emitExpr(e.Func)
for _, arg := range e.Args {
g.emitExpr(arg)
}
g.emit(hbrt.PcOpDo)
g.emitU16(uint16(len(e.Args)))
}
}
func (g *generator) emitCallStmt(e *ast.CallExpr) {
if ident, ok := e.Func.(*ast.IdentExpr); ok {
g.emitString(hbrt.PcOpPushSymbol, strings.ToUpper(ident.Name))
g.emit(hbrt.PcOpPushNil)
for _, arg := range e.Args {
g.emitExpr(arg)
}
g.emit(hbrt.PcOpDo)
g.emitU16(uint16(len(e.Args)))
} else {
g.emitExpr(e.Func)
for _, arg := range e.Args {
g.emitExpr(arg)
}
g.emit(hbrt.PcOpDo)
g.emitU16(uint16(len(e.Args)))
}
}
func (g *generator) emitAssign(a *ast.AssignExpr) {
if ident, ok := a.Left.(*ast.IdentExpr); ok {
if idx, found := g.locals[ident.Name]; found {
g.emitExpr(a.Right)
g.emit(hbrt.PcOpPopLocal)
g.emitU16(uint16(idx))
return
}
}
// Self field assignment
if send, ok := a.Left.(*ast.SendExpr); ok {
if _, isSelf := send.Object.(*ast.SelfExpr); isSelf {
g.emitExpr(a.Right)
g.emitString(hbrt.PcOpSetSelfField, strings.ToUpper(send.Method))
return
}
}
g.emitExpr(a.Right)
g.emit(hbrt.PcOpPop)
}
func parseInt64(s string) int64 {
var v int64
for _, c := range s {
if c >= '0' && c <= '9' {
v = v*10 + int64(c-'0')
}
}
if len(s) > 0 && s[0] == '-' {
v = -v
}
return v
}
func parseFloat64(s string) float64 {
var v float64
var dec float64
inDec := false
for _, c := range s {
if c == '.' {
inDec = true
dec = 0.1
continue
}
if c >= '0' && c <= '9' {
if inDec {
v += float64(c-'0') * dec
dec *= 0.1
} else {
v = v*10 + float64(c-'0')
}
}
}
if len(s) > 0 && s[0] == '-' {
v = -v
}
return v
}

743
compiler/lexer/lexer.go Normal file
View File

@@ -0,0 +1,743 @@
// Copyright (c) 2026 Charles KWON OhJun (charleskwonohjun@gmail.com)
// All rights reserved.
// Lexer for the Five language (Harbour-compatible).
// Hand-written scanner — no generated code.
// Handles Harbour's case-insensitive keywords, .T./.F./.AND./.OR./.NOT. literals,
// line-continuation with semicolon, and multiple comment styles.
//
// tsgo reference: ref/typescript-go/internal/scanner/ for scanning patterns.
// Key insight from tsgo: substring slicing into original source (zero-copy tokens).
package lexer
import (
"five/compiler/token"
"unicode/utf8"
)
// Lexer scans Harbour/Five source code into tokens.
type Lexer struct {
src string // source code (immutable, tsgo pattern: substring slicing)
file string // filename for error reporting
pos int // current byte position
line int // current line (1-based)
col int // current column (1-based)
lineStart int // byte offset of current line start
lastKind token.Kind // previous token kind (for [string] detection)
}
// New creates a new Lexer for the given source.
func New(filename, source string) *Lexer {
return &Lexer{
src: source,
file: filename,
pos: 0,
line: 1,
col: 1,
lineStart: 0,
}
}
// NextToken returns the next token from the source.
func (l *Lexer) NextToken() token.Token {
tok := l.nextTokenInner()
l.lastKind = tok.Kind
return tok
}
func (l *Lexer) nextTokenInner() token.Token {
l.skipWhitespaceAndComments()
if l.pos >= len(l.src) {
return l.makeToken(token.EOF, "")
}
ch := l.src[l.pos]
// Newline = statement terminator
if ch == '\n' {
tok := l.makeToken(token.NEWLINE, "\n")
l.advance()
l.line++
l.col = 1
l.lineStart = l.pos
return tok
}
if ch == '\r' {
l.advance()
if l.pos < len(l.src) && l.src[l.pos] == '\n' {
l.advance()
}
tok := l.makeToken(token.NEWLINE, "\n")
l.line++
l.col = 1
l.lineStart = l.pos
return tok
}
// String literals
if ch == '"' || ch == '\'' {
return l.scanString(ch)
}
// Numbers
if ch >= '0' && ch <= '9' {
return l.scanNumber()
}
// Dot-prefixed: .12 = numeric, .T., .F., .AND., .OR., .NOT.
if ch == '.' {
// .12 — numeric starting with decimal point
if l.pos+1 < len(l.src) && l.src[l.pos+1] >= '0' && l.src[l.pos+1] <= '9' {
return l.scanNumber() // scanNumber handles leading dot
}
if dot := l.scanDotToken(); dot.Kind != token.ILLEGAL {
return dot
}
l.advance()
return l.makeToken(token.DOT, ".")
}
// Identifiers and keywords
if isIdentStart(ch) {
return l.scanIdent()
}
// Operators and punctuation
return l.scanOperator()
}
// Tokenize returns all tokens from the source.
func Tokenize(filename, source string) []token.Token {
l := New(filename, source)
var tokens []token.Token
for {
tok := l.NextToken()
tokens = append(tokens, tok)
if tok.Kind == token.EOF {
break
}
}
return tokens
}
// --- Internal scanning methods ---
func (l *Lexer) advance() {
if l.pos < len(l.src) {
l.pos++
l.col++
}
}
func (l *Lexer) peek() byte {
if l.pos < len(l.src) {
return l.src[l.pos]
}
return 0
}
func (l *Lexer) peekAt(offset int) byte {
p := l.pos + offset
if p < len(l.src) {
return l.src[p]
}
return 0
}
func (l *Lexer) makeToken(kind token.Kind, literal string) token.Token {
return token.Token{
Kind: kind,
Literal: literal,
Pos: token.Position{
File: l.file,
Line: l.line,
Col: l.col,
Offset: l.pos,
},
}
}
func (l *Lexer) skipWhitespaceAndComments() {
for l.pos < len(l.src) {
ch := l.src[l.pos]
// Spaces and tabs (not newlines — those are tokens)
if ch == ' ' || ch == '\t' {
l.advance()
continue
}
// Semicolon = line continuation (skip semicolon + following newline)
if ch == ';' {
l.advance()
// Skip whitespace until newline
for l.pos < len(l.src) && (l.src[l.pos] == ' ' || l.src[l.pos] == '\t') {
l.advance()
}
// Skip trailing // comment before newline
if l.pos+1 < len(l.src) && l.src[l.pos] == '/' && l.src[l.pos+1] == '/' {
for l.pos < len(l.src) && l.src[l.pos] != '\n' && l.src[l.pos] != '\r' {
l.advance()
}
}
// Skip the newline itself
if l.pos < len(l.src) && l.src[l.pos] == '\r' {
l.advance()
}
if l.pos < len(l.src) && l.src[l.pos] == '\n' {
l.advance()
l.line++
l.col = 1
l.lineStart = l.pos
}
continue
}
// Backslash = alternate line continuation (Harbour extension)
if ch == '\\' && l.peekAt(1) != '\\' {
l.advance()
for l.pos < len(l.src) && (l.src[l.pos] == ' ' || l.src[l.pos] == '\t') {
l.advance()
}
if l.pos < len(l.src) && l.src[l.pos] == '\r' {
l.advance()
}
if l.pos < len(l.src) && l.src[l.pos] == '\n' {
l.advance()
l.line++
l.col = 1
l.lineStart = l.pos
}
continue
}
// // single-line comment
if ch == '/' && l.peekAt(1) == '/' {
l.skipToEndOfLine()
continue
}
// /* ... */ multi-line comment
if ch == '/' && l.peekAt(1) == '*' {
l.skipBlockComment()
continue
}
// && single-line comment (Harbour style)
if ch == '&' && l.peekAt(1) == '&' {
l.skipToEndOfLine()
continue
}
// * at start of line = comment (Harbour/Clipper style)
// Also handles indented * comments: " * comment"
if ch == '*' && l.isFirstNonWhitespace() {
l.skipToEndOfLine()
continue
}
// NOTE at start of line (Harbour)
if (ch == 'N' || ch == 'n') && l.pos == l.lineStart {
if l.matchWordAt("NOTE") {
l.skipToEndOfLine()
continue
}
}
break
}
}
func (l *Lexer) isFirstNonWhitespace() bool {
for i := l.lineStart; i < l.pos; i++ {
if l.src[i] != ' ' && l.src[i] != '\t' {
return false
}
}
return true
}
func (l *Lexer) skipToEndOfLine() {
for l.pos < len(l.src) && l.src[l.pos] != '\n' && l.src[l.pos] != '\r' {
l.advance()
}
}
func (l *Lexer) skipBlockComment() {
l.advance() // skip /
l.advance() // skip *
for l.pos < len(l.src)-1 {
if l.src[l.pos] == '*' && l.src[l.pos+1] == '/' {
l.advance() // skip *
l.advance() // skip /
return
}
if l.src[l.pos] == '\n' {
l.line++
l.col = 0
l.lineStart = l.pos + 1
}
l.advance()
}
// Unterminated comment — consume rest
l.pos = len(l.src)
}
func (l *Lexer) matchWordAt(word string) bool {
if l.pos+len(word) > len(l.src) {
return false
}
for i := 0; i < len(word); i++ {
c := l.src[l.pos+i]
w := word[i]
if c != w && c != w+32 && c != w-32 {
return false
}
}
// Must be followed by space or newline (not part of identifier)
if l.pos+len(word) < len(l.src) {
next := l.src[l.pos+len(word)]
if isIdentChar(next) {
return false
}
}
return true
}
// --- String scanning ---
func (l *Lexer) scanString(quote byte) token.Token {
start := l.pos
l.advance() // skip opening quote
for l.pos < len(l.src) {
ch := l.src[l.pos]
if ch == quote {
l.advance() // skip closing quote
// tsgo pattern: substring slice (zero-copy)
literal := l.src[start+1 : l.pos-1]
return l.makeTokenAt(token.STRING, literal, start)
}
// Note: Harbour does NOT use C-style escape sequences in strings.
// "\" is a valid string containing a single backslash.
if ch == '\n' || ch == '\r' {
break // unterminated string
}
l.advance()
}
// Unterminated string
return l.makeTokenAt(token.ILLEGAL, l.src[start:l.pos], start)
}
// isStringBracket returns true if [ should be treated as string delimiter.
// Harbour: [text] is string when not preceded by ident, ), ], literal.
func (l *Lexer) isStringBracket() bool {
switch l.lastKind {
case token.IDENT, token.RPAREN, token.RBRACKET,
token.INT, token.LONG, token.DOUBLE, token.STRING,
token.TRUE, token.FALSE, token.NIL_LIT:
return false // array index context
}
// Keywords used as variable names (begin, return, for, etc.) — treat as subscript
// Any keyword token could be a variable name in Harbour
if l.lastKind >= token.FUNCTION_KW {
return false
}
// Also check if next char is ] (empty []) — that's array
if l.pos < len(l.src) && l.src[l.pos] == ']' {
return false
}
return true
}
// scanBracketString scans [text] as a string literal.
func (l *Lexer) scanBracketString(start int) token.Token {
l.advance() // skip [
strStart := l.pos
depth := 1
for l.pos < len(l.src) && depth > 0 {
if l.src[l.pos] == '[' {
depth++
} else if l.src[l.pos] == ']' {
depth--
if depth == 0 {
literal := l.src[strStart:l.pos]
l.advance() // skip ]
return l.makeTokenAt(token.STRING, literal, start)
}
} else if l.src[l.pos] == '\n' || l.src[l.pos] == '\r' {
break // unterminated
}
l.advance()
}
return l.makeTokenAt(token.ILLEGAL, l.src[start:l.pos], start)
}
// --- Number scanning ---
func (l *Lexer) scanNumber() token.Token {
start := l.pos
isDouble := false
// Hex: 0x...
if l.src[l.pos] == '0' && l.pos+1 < len(l.src) && (l.src[l.pos+1] == 'x' || l.src[l.pos+1] == 'X') {
l.advance() // 0
l.advance() // x
for l.pos < len(l.src) && isHexDigit(l.src[l.pos]) {
l.advance()
}
return l.makeTokenAt(token.INT, l.src[start:l.pos], start)
}
// Leading dot: .12 → 0.12
if l.src[start] == '.' {
isDouble = true
l.advance() // skip .
for l.pos < len(l.src) && l.src[l.pos] >= '0' && l.src[l.pos] <= '9' {
l.advance()
}
return l.makeTokenAt(token.DOUBLE, l.src[start:l.pos], start)
}
// Decimal digits
for l.pos < len(l.src) && l.src[l.pos] >= '0' && l.src[l.pos] <= '9' {
l.advance()
}
// Decimal point
if l.pos < len(l.src) && l.src[l.pos] == '.' {
// Check it's not a method call (123.method) or range
if l.pos+1 < len(l.src) && l.src[l.pos+1] >= '0' && l.src[l.pos+1] <= '9' {
isDouble = true
l.advance() // skip .
for l.pos < len(l.src) && l.src[l.pos] >= '0' && l.src[l.pos] <= '9' {
l.advance()
}
}
}
literal := l.src[start:l.pos]
if isDouble {
return l.makeTokenAt(token.DOUBLE, literal, start)
}
return l.makeTokenAt(token.INT, literal, start)
}
// --- Dot-prefixed tokens ---
func (l *Lexer) scanDotToken() token.Token {
start := l.pos
// .T. / .F.
if l.pos+2 < len(l.src) && l.src[l.pos+2] == '.' {
mid := l.src[l.pos+1]
if mid == 'T' || mid == 't' {
l.pos += 3
l.col += 3
return l.makeTokenAt(token.TRUE, ".T.", start)
}
if mid == 'F' || mid == 'f' {
l.pos += 3
l.col += 3
return l.makeTokenAt(token.FALSE, ".F.", start)
}
}
// .AND. / .OR. / .NOT.
for _, kw := range []struct {
text string
kind token.Kind
}{
{".AND.", token.AND},
{".OR.", token.OR},
{".NOT.", token.NOT},
} {
if l.matchDotKeyword(kw.text) {
l.pos += len(kw.text)
l.col += len(kw.text)
return l.makeTokenAt(kw.kind, kw.text, start)
}
}
return token.Token{Kind: token.ILLEGAL} // let caller handle plain DOT
}
func (l *Lexer) matchDotKeyword(kw string) bool {
if l.pos+len(kw) > len(l.src) {
return false
}
for i := 0; i < len(kw); i++ {
c := l.src[l.pos+i]
k := kw[i]
if c == k {
continue
}
// Case-insensitive for letters
if c >= 'a' && c <= 'z' && c-32 == k {
continue
}
if c >= 'A' && c <= 'Z' && c+32 == k {
continue
}
return false
}
return true
}
// --- Identifier scanning ---
func (l *Lexer) scanIdent() token.Token {
start := l.pos
for l.pos < len(l.src) && isIdentChar(l.src[l.pos]) {
l.advance()
}
// tsgo pattern: substring slice (zero-copy from source)
literal := l.src[start:l.pos]
kind := token.LookupKeyword(literal)
return l.makeTokenAt(kind, literal, start)
}
// --- Operator scanning ---
func (l *Lexer) scanOperator() token.Token {
start := l.pos
ch := l.src[l.pos]
l.advance()
switch ch {
case '+':
if l.peek() == '=' {
l.advance()
return l.makeTokenAt(token.PLUSEQ, "+=", start)
}
if l.peek() == '+' {
l.advance()
return l.makeTokenAt(token.INC, "++", start)
}
return l.makeTokenAt(token.PLUS, "+", start)
case '-':
if l.peek() == '=' {
l.advance()
return l.makeTokenAt(token.MINUSEQ, "-=", start)
}
if l.peek() == '-' {
l.advance()
return l.makeTokenAt(token.DEC, "--", start)
}
if l.peek() == '>' {
l.advance()
return l.makeTokenAt(token.ARROW, "->", start)
}
return l.makeTokenAt(token.MINUS, "-", start)
case '*':
if l.peek() == '*' {
l.advance()
if l.peek() == '=' {
l.advance()
return l.makeTokenAt(token.POWEREQ, "**=", start)
}
return l.makeTokenAt(token.POWER, "**", start)
}
if l.peek() == '=' {
l.advance()
return l.makeTokenAt(token.STAREQ, "*=", start)
}
return l.makeTokenAt(token.STAR, "*", start)
case '/':
if l.peek() == '=' {
l.advance()
return l.makeTokenAt(token.SLASHEQ, "/=", start)
}
return l.makeTokenAt(token.SLASH, "/", start)
case '%':
if l.peek() == '=' {
l.advance()
return l.makeTokenAt(token.PERCENTEQ, "%=", start)
}
return l.makeTokenAt(token.PERCENT, "%", start)
case '=':
if l.peek() == '=' {
l.advance()
return l.makeTokenAt(token.EXEQ, "==", start)
}
if l.peek() == '>' {
l.advance()
return l.makeTokenAt(token.DBLARROW, "=>", start)
}
return l.makeTokenAt(token.EQ, "=", start)
case '!':
if l.peek() == '=' {
l.advance()
return l.makeTokenAt(token.NEQ, "!=", start)
}
return l.makeTokenAt(token.NOT, "!", start)
case '<':
if l.peek() == '-' {
l.advance()
return l.makeTokenAt(token.ARROW_LEFT, "<-", start)
}
if l.peek() == '=' {
l.advance()
return l.makeTokenAt(token.LTE, "<=", start)
}
if l.peek() == '>' {
l.advance()
return l.makeTokenAt(token.NEQ, "<>", start)
}
return l.makeTokenAt(token.LT, "<", start)
case '>':
if l.peek() == '=' {
l.advance()
return l.makeTokenAt(token.GTE, ">=", start)
}
return l.makeTokenAt(token.GT, ">", start)
case '#':
// # alone = not-equal (Clipper), #keyword = preprocessor
if l.peek() >= 'a' && l.peek() <= 'z' || l.peek() >= 'A' && l.peek() <= 'Z' {
return l.scanPreprocessor(start)
}
return l.makeTokenAt(token.NEQ, "#", start)
case ':':
if l.peek() == '=' {
l.advance()
return l.makeTokenAt(token.ASSIGN, ":=", start)
}
if l.peek() == ':' {
l.advance()
return l.makeTokenAt(token.COLONCOLON, "::", start)
}
return l.makeTokenAt(token.COLON, ":", start)
case '&':
return l.makeTokenAt(token.AMPERSAND, "&", start)
case '@':
return l.makeTokenAt(token.AT, "@", start)
case '$':
return l.makeTokenAt(token.DOLLAR, "$", start)
case '?':
if l.peek() == '?' {
l.advance()
return l.makeTokenAt(token.QQMARK, "??", start)
}
return l.makeTokenAt(token.QMARK, "?", start)
case '(':
return l.makeTokenAt(token.LPAREN, "(", start)
case ')':
return l.makeTokenAt(token.RPAREN, ")", start)
case '[':
// Harbour: [text] is string literal when NOT preceded by ident/)/]/literal
// a[1] = array index, but ? [Hello] = string
if l.isStringBracket() {
return l.scanBracketString(start)
}
return l.makeTokenAt(token.LBRACKET, "[", start)
case ']':
return l.makeTokenAt(token.RBRACKET, "]", start)
case '{':
return l.makeTokenAt(token.LBRACE, "{", start)
case '}':
return l.makeTokenAt(token.RBRACE, "}", start)
case ',':
return l.makeTokenAt(token.COMMA, ",", start)
case '|':
return l.makeTokenAt(token.PIPE, "|", start)
case '^':
if l.peek() == '=' {
l.advance()
return l.makeTokenAt(token.POWEREQ, "^=", start)
}
return l.makeTokenAt(token.POWER, "^", start)
default:
// Handle multi-byte UTF-8 characters in identifiers
if ch >= 0x80 {
l.pos = start
_, size := utf8.DecodeRuneInString(l.src[l.pos:])
l.pos += size
l.col += size
return l.makeTokenAt(token.ILLEGAL, l.src[start:l.pos], start)
}
return l.makeTokenAt(token.ILLEGAL, string(ch), start)
}
}
func (l *Lexer) scanPreprocessor(start int) token.Token {
// Already consumed '#', now scan the directive name
kwStart := l.pos
for l.pos < len(l.src) && isIdentChar(l.src[l.pos]) {
l.advance()
}
directive := l.src[kwStart:l.pos]
upper := token.LookupKeyword(directive)
_ = upper
full := l.src[start:l.pos]
switch {
case matchCI(directive, "include"):
return l.makeTokenAt(token.PP_INCLUDE, full, start)
case matchCI(directive, "define"):
return l.makeTokenAt(token.PP_DEFINE, full, start)
case matchCI(directive, "undef"):
return l.makeTokenAt(token.PP_UNDEF, full, start)
case matchCI(directive, "ifdef"):
return l.makeTokenAt(token.PP_IFDEF, full, start)
case matchCI(directive, "ifndef"):
return l.makeTokenAt(token.PP_IFNDEF, full, start)
case matchCI(directive, "else"):
return l.makeTokenAt(token.PP_ELSE, full, start)
case matchCI(directive, "endif"):
return l.makeTokenAt(token.PP_ENDIF, full, start)
case matchCI(directive, "command"):
return l.makeTokenAt(token.PP_COMMAND, full, start)
case matchCI(directive, "translate"):
return l.makeTokenAt(token.PP_TRANSLATE, full, start)
case matchCI(directive, "pragma"):
return l.makeTokenAt(token.PP_PRAGMA, full, start)
default:
return l.makeTokenAt(token.ILLEGAL, full, start)
}
}
func (l *Lexer) makeTokenAt(kind token.Kind, literal string, startPos int) token.Token {
return token.Token{
Kind: kind,
Literal: literal,
Pos: token.Position{
File: l.file,
Line: l.line,
Col: startPos - l.lineStart + 1,
Offset: startPos,
},
}
}
// --- Character classification ---
func isIdentStart(ch byte) bool {
return (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || ch == '_'
}
func isIdentChar(ch byte) bool {
return isIdentStart(ch) || (ch >= '0' && ch <= '9')
}
func isHexDigit(ch byte) bool {
return (ch >= '0' && ch <= '9') || (ch >= 'a' && ch <= 'f') || (ch >= 'A' && ch <= 'F')
}
func matchCI(a, b string) bool {
if len(a) != len(b) {
return false
}
for i := 0; i < len(a); i++ {
ca, cb := a[i], b[i]
if ca >= 'A' && ca <= 'Z' {
ca += 32
}
if cb >= 'A' && cb <= 'Z' {
cb += 32
}
if ca != cb {
return false
}
}
return true
}

View File

@@ -0,0 +1,260 @@
// Copyright (c) 2026 Charles KWON OhJun (charleskwonohjun@gmail.com)
// All rights reserved.
package lexer
import (
"five/compiler/token"
"testing"
)
func expectTokens(t *testing.T, source string, expected []token.Kind) {
t.Helper()
tokens := Tokenize("test.prg", source)
// Filter out NEWLINEs and EOF for easier comparison
var got []token.Kind
for _, tok := range tokens {
if tok.Kind != token.NEWLINE && tok.Kind != token.EOF {
got = append(got, tok.Kind)
}
}
if len(got) != len(expected) {
t.Errorf("token count: got %d, want %d", len(got), len(expected))
for i, tok := range tokens {
t.Logf(" [%d] %v %q", i, tok.Kind, tok.Literal)
}
return
}
for i, want := range expected {
if got[i] != want {
t.Errorf("token[%d]: got %v, want %v", i, got[i], want)
}
}
}
func TestBasicArithmetic(t *testing.T) {
expectTokens(t, "1 + 2 * 3", []token.Kind{
token.INT, token.PLUS, token.INT, token.STAR, token.INT,
})
}
func TestAssignment(t *testing.T) {
expectTokens(t, "x := 10", []token.Kind{
token.IDENT, token.ASSIGN, token.INT,
})
}
func TestCompoundAssignment(t *testing.T) {
expectTokens(t, "n += 5", []token.Kind{
token.IDENT, token.PLUSEQ, token.INT,
})
}
func TestStringLiteral(t *testing.T) {
tokens := Tokenize("test.prg", `"Hello, World!"`)
if tokens[0].Kind != token.STRING || tokens[0].Literal != "Hello, World!" {
t.Errorf("got %v %q", tokens[0].Kind, tokens[0].Literal)
}
}
func TestSingleQuoteString(t *testing.T) {
tokens := Tokenize("test.prg", `'single'`)
if tokens[0].Kind != token.STRING || tokens[0].Literal != "single" {
t.Errorf("got %v %q", tokens[0].Kind, tokens[0].Literal)
}
}
func TestLogicalLiterals(t *testing.T) {
expectTokens(t, ".T. .F.", []token.Kind{token.TRUE, token.FALSE})
}
func TestLogicalOperators(t *testing.T) {
expectTokens(t, ".AND. .OR. .NOT.", []token.Kind{token.AND, token.OR, token.NOT})
}
func TestLogicalCaseInsensitive(t *testing.T) {
expectTokens(t, ".and. .or. .not. .t. .f.", []token.Kind{
token.AND, token.OR, token.NOT, token.TRUE, token.FALSE,
})
}
func TestKeywords(t *testing.T) {
expectTokens(t, "FUNCTION Main", []token.Kind{token.FUNCTION_KW, token.IDENT})
expectTokens(t, "function main", []token.Kind{token.FUNCTION_KW, token.IDENT})
expectTokens(t, "LOCAL n := 0", []token.Kind{token.LOCAL, token.IDENT, token.ASSIGN, token.INT})
expectTokens(t, "IF x > 10", []token.Kind{token.IF, token.IDENT, token.GT, token.INT})
expectTokens(t, "DO WHILE i <= 10", []token.Kind{token.DO, token.WHILE, token.IDENT, token.LTE, token.INT})
expectTokens(t, "RETURN NIL", []token.Kind{token.RETURN, token.NIL_LIT})
}
func TestXBaseCommands(t *testing.T) {
expectTokens(t, "USE customers", []token.Kind{token.USE, token.IDENT})
expectTokens(t, "SEEK cKey", []token.Kind{token.SEEK, token.IDENT})
expectTokens(t, "REPLACE name WITH cNewName", []token.Kind{
token.REPLACE, token.IDENT, token.WITH, token.IDENT,
})
expectTokens(t, "APPEND BLANK", []token.Kind{token.APPEND, token.BLANK})
expectTokens(t, "GO TOP", []token.Kind{token.GO, token.TOP})
}
func TestClassDeclaration(t *testing.T) {
expectTokens(t, "CLASS Person", []token.Kind{token.CLASS, token.IDENT})
expectTokens(t, "DATA cName INIT", []token.Kind{token.DATA, token.IDENT, token.IDENT})
expectTokens(t, "METHOD New", []token.Kind{token.METHOD, token.IDENT})
expectTokens(t, "ENDCLASS", []token.Kind{token.ENDCLASS})
}
func TestArrowAndColons(t *testing.T) {
expectTokens(t, "cust->name", []token.Kind{
token.IDENT, token.ARROW, token.IDENT,
})
expectTokens(t, "obj:greet()", []token.Kind{
token.IDENT, token.COLON, token.IDENT, token.LPAREN, token.RPAREN,
})
expectTokens(t, "::name", []token.Kind{token.COLONCOLON, token.IDENT})
}
func TestCodeBlock(t *testing.T) {
expectTokens(t, "{|x| x + 1}", []token.Kind{
token.LBRACE, token.PIPE, token.IDENT, token.PIPE,
token.IDENT, token.PLUS, token.INT, token.RBRACE,
})
}
func TestHashLiteral(t *testing.T) {
expectTokens(t, `{"a" => 1}`, []token.Kind{
token.LBRACE, token.STRING, token.DBLARROW, token.INT, token.RBRACE,
})
}
func TestComparison(t *testing.T) {
expectTokens(t, "a == b", []token.Kind{token.IDENT, token.EXEQ, token.IDENT})
expectTokens(t, "a != b", []token.Kind{token.IDENT, token.NEQ, token.IDENT})
expectTokens(t, "a <> b", []token.Kind{token.IDENT, token.NEQ, token.IDENT})
expectTokens(t, "a # b", []token.Kind{token.IDENT, token.NEQ, token.IDENT})
expectTokens(t, "a <= b", []token.Kind{token.IDENT, token.LTE, token.IDENT})
expectTokens(t, "a >= b", []token.Kind{token.IDENT, token.GTE, token.IDENT})
}
func TestDoubleNumber(t *testing.T) {
tokens := Tokenize("test.prg", "3.14")
if tokens[0].Kind != token.DOUBLE || tokens[0].Literal != "3.14" {
t.Errorf("got %v %q", tokens[0].Kind, tokens[0].Literal)
}
}
func TestHexNumber(t *testing.T) {
tokens := Tokenize("test.prg", "0xFF")
if tokens[0].Kind != token.INT || tokens[0].Literal != "0xFF" {
t.Errorf("got %v %q", tokens[0].Kind, tokens[0].Literal)
}
}
func TestMacroOperator(t *testing.T) {
expectTokens(t, "&cVar", []token.Kind{token.AMPERSAND, token.IDENT})
}
func TestImport(t *testing.T) {
expectTokens(t, `IMPORT "net/http"`, []token.Kind{token.IMPORT, token.STRING})
}
func TestPreprocessor(t *testing.T) {
tokens := Tokenize("test.prg", "#include")
if tokens[0].Kind != token.PP_INCLUDE {
t.Errorf("got %v, want PP_INCLUDE", tokens[0].Kind)
}
tokens = Tokenize("test.prg", "#define")
if tokens[0].Kind != token.PP_DEFINE {
t.Errorf("got %v, want PP_DEFINE", tokens[0].Kind)
}
tokens = Tokenize("test.prg", "#pragma")
if tokens[0].Kind != token.PP_PRAGMA {
t.Errorf("got %v, want PP_PRAGMA", tokens[0].Kind)
}
}
func TestLineComment(t *testing.T) {
expectTokens(t, "x := 10 // comment", []token.Kind{
token.IDENT, token.ASSIGN, token.INT,
})
}
func TestAmpAmpComment(t *testing.T) {
expectTokens(t, "x := 10 && comment", []token.Kind{
token.IDENT, token.ASSIGN, token.INT,
})
}
func TestBlockComment(t *testing.T) {
expectTokens(t, "x /* skip */ + y", []token.Kind{
token.IDENT, token.PLUS, token.IDENT,
})
}
func TestLineContinuation(t *testing.T) {
// Semicolon at end of line = continuation
expectTokens(t, "x + ;\n y", []token.Kind{
token.IDENT, token.PLUS, token.IDENT,
})
}
func TestNewlineAsTerminator(t *testing.T) {
tokens := Tokenize("test.prg", "x\ny")
kinds := make([]token.Kind, 0)
for _, tok := range tokens {
if tok.Kind != token.EOF {
kinds = append(kinds, tok.Kind)
}
}
// Should have: IDENT NEWLINE IDENT
if len(kinds) != 3 || kinds[1] != token.NEWLINE {
t.Errorf("expected IDENT NEWLINE IDENT, got %v", kinds)
}
}
func TestPosition(t *testing.T) {
tokens := Tokenize("test.prg", "x := 10")
if tokens[0].Pos.Line != 1 || tokens[0].Pos.Col != 1 {
t.Errorf("x position: line=%d col=%d", tokens[0].Pos.Line, tokens[0].Pos.Col)
}
}
// Full program test
func TestFullProgram(t *testing.T) {
src := `FUNCTION Main()
LOCAL n := 10
? "Hello", n
RETURN NIL`
tokens := Tokenize("test.prg", src)
var kinds []token.Kind
for _, tok := range tokens {
if tok.Kind != token.NEWLINE && tok.Kind != token.EOF {
kinds = append(kinds, tok.Kind)
}
}
expected := []token.Kind{
token.FUNCTION_KW, token.IDENT, token.LPAREN, token.RPAREN,
token.LOCAL, token.IDENT, token.ASSIGN, token.INT,
token.QMARK, token.STRING, token.COMMA, token.IDENT,
token.RETURN, token.NIL_LIT,
}
if len(kinds) != len(expected) {
t.Errorf("token count: got %d, want %d", len(kinds), len(expected))
for i, tok := range tokens {
if tok.Kind != token.NEWLINE && tok.Kind != token.EOF {
t.Logf(" [%d] %v %q", i, tok.Kind, tok.Literal)
}
}
return
}
for i, want := range expected {
if kinds[i] != want {
t.Errorf("token[%d]: got %v %q, want %v", i, kinds[i], tokens[i].Literal, want)
}
}
}

760
compiler/parser/expr.go Normal file
View File

@@ -0,0 +1,760 @@
// Copyright (c) 2026 Charles KWON OhJun (charleskwonohjun@gmail.com)
// All rights reserved.
// Expression parsing using Pratt parser (precedence climbing).
//
// Harbour's operator precedence from harbour.y:
// POST < ASSIGN(right) < OR(right) < AND(right) < NOT(right) <
// COMPARE(right) < ADD < MUL < POWER < UNARY < PRE < ALIAS/MACRO
//
// Key Harbour quirks:
// - '=' is BOTH assignment (in statement context) and equality (in expression)
// - Most operators are right-associative (unlike C)
// - (expr)->field for dynamic alias
// - &variable for macro
package parser
import (
"five/compiler/ast"
"five/compiler/token"
"strings"
)
// parseExpr parses an expression using Pratt parsing.
func (p *Parser) parseExpr() ast.Expr {
return p.parseBinaryExpr(token.PrecAssign)
}
// parseBinaryExpr parses binary expressions with precedence climbing.
// tsgo pattern: GetBinaryOperatorPrecedence (ref/typescript-go/internal/ast/precedence.go:338)
func (p *Parser) parseBinaryExpr(minPrec token.Precedence) ast.Expr {
left := p.parseUnaryExpr()
for {
prec := token.GetBinaryPrecedence(p.current.Kind)
if prec < minPrec {
break
}
op := p.advance()
// Right-associative: use same precedence for right side
// Left-associative: use precedence+1 for right side
nextPrec := prec + 1
if token.IsRightAssociative(op.Kind) {
nextPrec = prec
}
right := p.parseBinaryExpr(nextPrec)
// Assignment operators → AssignExpr
if isAssignOp(op.Kind) {
left = &ast.AssignExpr{
Left: left, OpPos: op.Pos, Op: op.Kind, Right: right,
}
} else {
left = &ast.BinaryExpr{
Left: left, OpPos: op.Pos, Op: op.Kind, Right: right,
}
}
}
return left
}
func isAssignOp(k token.Kind) bool {
switch k {
case token.ASSIGN, token.PLUSEQ, token.MINUSEQ,
token.STAREQ, token.SLASHEQ, token.PERCENTEQ, token.POWEREQ:
return true
}
return false
}
// parseUnaryExpr parses prefix unary expressions.
func (p *Parser) parseUnaryExpr() ast.Expr {
switch p.current.Kind {
case token.MINUS:
op := p.advance()
x := p.parseUnaryExpr()
return &ast.UnaryExpr{OpPos: op.Pos, Op: token.MINUS, X: x}
case token.PLUS:
p.advance() // unary plus — no-op, just parse the operand
return p.parseUnaryExpr()
case token.NOT:
op := p.advance()
x := p.parseUnaryExpr()
return &ast.UnaryExpr{OpPos: op.Pos, Op: token.NOT, X: x}
case token.INC:
op := p.advance()
x := p.parseUnaryExpr()
return &ast.UnaryExpr{OpPos: op.Pos, Op: token.INC, X: x}
case token.DEC:
op := p.advance()
x := p.parseUnaryExpr()
return &ast.UnaryExpr{OpPos: op.Pos, Op: token.DEC, X: x}
case token.AT:
op := p.advance()
x := p.parseUnaryExpr()
return &ast.RefExpr{AtPos: op.Pos, X: x}
case token.ARROW_LEFT:
// <- ch (channel receive as expression)
pos := p.advance().Pos
ch := p.parsePostfixExpr()
return &ast.ChanRecvExpr{ArrowPos: pos, Chan: ch}
case token.ASYNC_KW:
// ASYNC expr — launch async, return future
pos := p.advance().Pos
call := p.parsePostfixExpr()
return &ast.AsyncExpr{AsyncPos: pos, Call: call}
case token.AWAIT_KW:
// AWAIT future — wait for result
pos := p.advance().Pos
future := p.parsePostfixExpr()
return &ast.AwaitExpr{AwaitPos: pos, Future: future}
default:
return p.parsePostfixExpr()
}
}
// parsePostfixExpr parses postfix operations: function calls, method sends,
// array indexing, postfix ++/--, and alias-> access.
func (p *Parser) parsePostfixExpr() ast.Expr {
x := p.parsePrimaryExpr()
for {
switch p.current.Kind {
case token.LPAREN:
// Function call: x(args...)
lp := p.advance().Pos
var args []ast.Expr
if !p.at(token.RPAREN) {
args = p.parseExprList()
}
rp := p.expect(token.RPAREN).Pos
x = &ast.CallExpr{Func: x, LParen: lp, Args: args, RParen: rp}
case token.LBRACKET:
// Array index: x[index], multi-dim x[i, j], or slice x[low:high]
lb := p.advance().Pos
// Check for slice syntax: x[:high], x[low:high], x[low:]
// Detect by scanning ahead for : before ]
if p.isSliceSyntax() {
var low, high ast.Expr
if !p.at(token.COLON) {
low = p.parseSliceIndex()
}
p.expect(token.COLON)
if !p.at(token.RBRACKET) {
high = p.parseSliceIndex()
}
rb := p.expect(token.RBRACKET).Pos
x = &ast.SliceExpr{X: x, LBracket: lb, Low: low, High: high, RBracket: rb}
continue
}
// Normal array index
index := p.parseExpr()
rb := token.Position{}
// Multi-dimensional: a[3, 2] → a[3][2]
for p.match(token.COMMA) {
rb = p.current.Pos
x = &ast.IndexExpr{X: x, LBracket: lb, Index: index, RBracket: rb}
index = p.parseExpr()
lb = rb
}
rb = p.expect(token.RBRACKET).Pos
x = &ast.IndexExpr{X: x, LBracket: lb, Index: index, RBracket: rb}
case token.QMARK:
// Nil-safe send: x?:method or x?:method(args...)
if p.peekAt(1) == token.COLON {
p.advance() // consume ?
qpos := p.advance().Pos // consume :
methodName := p.expectMethodName().Literal
var args []ast.Expr
hasParens := false
if p.at(token.LPAREN) {
hasParens = true
p.advance()
if !p.at(token.RPAREN) {
args = p.parseExprList()
}
p.expect(token.RPAREN)
}
x = &ast.NilSafeExpr{X: x, QPos: qpos, Method: methodName, Args: args, HasParens: hasParens}
} else {
return x // bare ? is QOut, not postfix
}
case token.COLON:
// Method send: x:method or x:method(args...)
colonPos := p.advance().Pos
var methodName string
var macroMethod ast.Expr
if p.current.Kind == token.AMPERSAND {
// x:&macro — dynamic method
macroMethod = p.parseMacro()
} else {
// Accept keywords as method names (end, delete, home, etc.)
methodName = p.expectMethodName().Literal
}
// Check for call: x:method(args...)
var args []ast.Expr
var lp, rp token.Position
hasParens := false
if p.at(token.LPAREN) {
hasParens = true
lp = p.advance().Pos
if !p.at(token.RPAREN) {
args = p.parseExprList()
}
rp = p.expect(token.RPAREN).Pos
}
x = &ast.SendExpr{
Object: x, ColonPos: colonPos,
Method: methodName, MacroMethod: macroMethod,
HasParens: hasParens,
LParen: lp, Args: args, RParen: rp,
}
case token.ARROW:
// Alias access: x->field or (expr)->field
arrowPos := p.advance().Pos
field := p.parsePrimaryExpr()
x = &ast.AliasExpr{Alias: x, ArrowPos: arrowPos, Field: field}
case token.INC:
// Postfix increment: x++
opPos := p.advance().Pos
x = &ast.PostfixExpr{X: x, OpPos: opPos, Op: token.INC}
case token.DEC:
// Postfix decrement: x--
opPos := p.advance().Pos
x = &ast.PostfixExpr{X: x, OpPos: opPos, Op: token.DEC}
case token.COLONCOLON:
// ::name — Self access (consumed as postfix of implicit Self)
// This shouldn't happen here normally; :: is handled in primary
return x
case token.DOT:
// Package member access: pkg.Func or obj.Field
// Accept any token with literal (keywords like Index, Count, etc.)
if p.peekLitAt(1) != "" {
dotPos := p.advance().Pos // consume .
member := p.advance() // consume member name
x = &ast.DotExpr{X: x, DotPos: dotPos, Member: member.Literal}
} else {
return x
}
default:
return x
}
}
}
// parsePrimaryExpr parses primary expressions (atoms).
func (p *Parser) parsePrimaryExpr() ast.Expr {
switch p.current.Kind {
case token.INT, token.LONG, token.DOUBLE, token.STRING,
token.DATE_LIT, token.TRUE, token.FALSE, token.NIL_LIT:
tok := p.advance()
return &ast.LiteralExpr{ValuePos: tok.Pos, Kind: tok.Kind, Value: tok.Literal}
case token.COLONCOLON:
// ::name or ::name() or ::name(args)
pos := p.advance().Pos
if p.at(token.IDENT) || p.current.Literal != "" {
name := p.advance()
self := &ast.SelfExpr{ColonPos: pos}
// Check for () — method call
hasParens := false
var args []ast.Expr
var lp, rp token.Position
if p.at(token.LPAREN) {
hasParens = true
lp = p.advance().Pos
if !p.at(token.RPAREN) {
args = p.parseExprList()
}
rp = p.expect(token.RPAREN).Pos
}
return &ast.SendExpr{
Object: self, ColonPos: pos,
Method: name.Literal,
HasParens: hasParens,
LParen: lp, Args: args, RParen: rp,
}
}
return &ast.SelfExpr{ColonPos: pos}
case token.LPAREN:
// Parenthesized expression, comma sequence (a,b,c), or (alias)->field
p.advance()
expr := p.parseExpr()
// Comma sequence: (expr1, expr2, ...) → evaluates all, returns last
for p.match(token.COMMA) {
expr = p.parseExpr()
}
p.expect(token.RPAREN)
return expr
case token.IF:
// if(cond, true, false) — inline IF = IIF
if p.peekAt(1) == token.LPAREN {
return p.parseIIF()
}
// Otherwise fall through to error
p.error("expected expression, got IF")
tok := p.advance()
return &ast.LiteralExpr{ValuePos: tok.Pos, Kind: token.NIL_LIT, Value: "NIL"}
case token.IDENT:
// Check for IIF(cond, true, false)
if strings.ToUpper(p.current.Literal) == "IIF" {
return p.parseIIF()
}
// f"Hello {name}" — string interpolation
if p.current.Literal == "f" && p.peekAt(1) == token.STRING {
return p.parseInterpolatedString()
}
tok := p.advance()
return &ast.IdentExpr{NamePos: tok.Pos, Name: tok.Literal}
case token.AMPERSAND:
return p.parseMacro()
case token.COLON:
// :field — WITH OBJECT send (bare colon prefix)
// Treat as self-send: withObj:field
pos := p.advance().Pos // consume :
if p.at(token.IDENT) || p.current.Literal != "" {
name := p.advance()
return &ast.SendExpr{
Object: &ast.IdentExpr{NamePos: pos, Name: "__withObject"},
ColonPos: pos,
Method: name.Literal,
}
}
return &ast.IdentExpr{NamePos: pos, Name: "__withObject"}
case token.LBRACE:
return p.parseArrayOrBlock()
default:
// Keywords used as identifiers in expression context:
// 1. Followed by ( → function call: Set(), Type(), Select()
// 2. Keywords that can appear as variable/field names: TO, DATA, FIELD, ON, etc.
if p.current.Literal != "" {
if p.peekAt(1) == token.LPAREN {
tok := p.advance()
return &ast.IdentExpr{NamePos: tok.Pos, Name: tok.Literal}
}
// Allow certain keywords as bare identifiers in expression context
switch p.current.Kind {
case token.TO, token.DATA, token.FIELD, token.IN, token.FROM,
token.WHILE, token.EACH, token.ENDDO, token.END, token.NEXT,
token.RECOVER, token.SEQUENCE, token.GO, token.GOTO,
token.MEMVAR, token.ALIAS, token.WITH, token.ON,
token.STEP, token.DESCENDING, token.UNIQUE,
token.DELETE_KW, token.RECALL, token.PACK, token.ZAP,
token.TYPE_KW, token.CLASS, token.DECLARE, token.INLINE_KW,
token.CASE, token.OTHERWISE, token.ENDCASE, token.BEGIN,
token.DO, token.ENDIF, token.FOR, token.IF,
token.SWITCH, token.RETURN, token.EXIT, token.LOOP,
token.LOCAL, token.PRIVATE, token.PUBLIC,
token.STATIC, token.PARAMETERS, token.DESTRUCTOR,
token.CONSTRUCTOR, token.OPERATOR_KW,
token.FUNCTION_KW, token.PROCEDURE, token.METHOD,
token.ELSEIF, token.ELSE, token.ENDCLASS,
token.USING, token.ASSIGN_KW, token.ACCESS,
token.APPEND, token.REPLACE, token.INDEX,
token.SEEK, token.SKIP_KW, token.USE,
token.SELECT, token.SET:
tok := p.advance()
return &ast.IdentExpr{NamePos: tok.Pos, Name: tok.Literal}
}
}
p.error("expected expression, got " + p.current.Kind.String() + " " + p.current.Literal)
tok := p.advance()
return &ast.LiteralExpr{ValuePos: tok.Pos, Kind: token.NIL_LIT, Value: "NIL"}
}
}
// parseArrayOrBlock parses { ... } which can be:
// {1, 2, 3} → ArrayLitExpr
// {"a" => 1} → HashLitExpr
// {|x| x + 1} → BlockExpr
// {|| expr} → BlockExpr (no params)
func (p *Parser) parseArrayOrBlock() ast.Expr {
lbrace := p.expect(token.LBRACE).Pos
// Code block: {|params| body}
if p.at(token.PIPE) {
p.advance() // consume first |
var params []string
if !p.at(token.PIPE) {
// Parse parameter names, with optional AS type
for {
params = append(params, p.expectMethodName().Literal)
// Skip optional AS type: AS NUMERIC, AS STRING, etc.
if p.match(token.AS) {
for p.current.Kind != token.PIPE && p.current.Kind != token.COMMA &&
p.current.Kind != token.EOF {
p.advance()
}
}
if !p.match(token.COMMA) {
break
}
}
}
p.expect(token.PIPE) // closing |
// Parse block body — may have comma-separated expressions
// {|x| expr1, expr2} → comma = sequence, returns last value
body := p.parseExpr()
for p.match(token.COMMA) {
// Comma-separated: wrap as sequence, keep last
body = p.parseExpr()
}
rbrace := p.expect(token.RBRACE).Pos
return &ast.BlockExpr{LBrace: lbrace, Params: params, Body: body, RBrace: rbrace}
}
// Empty: {} → empty array
if p.at(token.RBRACE) {
rbrace := p.advance().Pos
return &ast.ArrayLitExpr{LBrace: lbrace, RBrace: rbrace}
}
// { ... } → variadic params array (HB_PARAM_ALL())
if p.at(token.DOT) && p.peekAt(1) == token.DOT && p.peekAt(2) == token.DOT {
p.advance() // .
p.advance() // .
p.advance() // .
rbrace := p.expect(token.RBRACE).Pos
return &ast.CallExpr{
Func: &ast.IdentExpr{NamePos: lbrace, Name: "HB_AParams"},
RParen: rbrace,
}
}
// Empty hash: {=>} → empty hash literal
if p.at(token.DBLARROW) {
p.advance() // consume =>
rbrace := p.expect(token.RBRACE).Pos
return &ast.HashLitExpr{LBrace: lbrace, RBrace: rbrace}
}
// Handle leading comma: {, x, y} → {NIL, x, y}
if p.at(token.COMMA) {
var items []ast.Expr
items = append(items, &ast.LiteralExpr{ValuePos: p.current.Pos, Kind: token.NIL_LIT, Value: "NIL"})
for p.match(token.COMMA) {
if p.at(token.RBRACE) || p.at(token.COMMA) {
items = append(items, &ast.LiteralExpr{ValuePos: p.current.Pos, Kind: token.NIL_LIT, Value: "NIL"})
} else {
items = append(items, p.parseExpr())
}
}
rbrace := p.expect(token.RBRACE).Pos
return &ast.ArrayLitExpr{LBrace: lbrace, Items: items, RBrace: rbrace}
}
// Parse first element to determine: array or hash
first := p.parseExpr()
// Hash: { key => value, ... }
if p.at(token.DBLARROW) {
p.advance() // consume =>
firstVal := p.parseExpr()
keys := []ast.Expr{first}
vals := []ast.Expr{firstVal}
for p.match(token.COMMA) {
keys = append(keys, p.parseExpr())
p.expect(token.DBLARROW)
vals = append(vals, p.parseExpr())
}
rbrace := p.expect(token.RBRACE).Pos
return &ast.HashLitExpr{LBrace: lbrace, Keys: keys, Values: vals, RBrace: rbrace}
}
// Array: {expr, expr, ...}
items := []ast.Expr{first}
for p.match(token.COMMA) {
items = append(items, p.parseExpr())
}
rbrace := p.expect(token.RBRACE).Pos
return &ast.ArrayLitExpr{LBrace: lbrace, Items: items, RBrace: rbrace}
}
// parseMacro parses &variable or &(expression).
func (p *Parser) parseMacro() ast.Expr {
ampPos := p.expect(token.AMPERSAND).Pos
if p.at(token.LPAREN) {
// &(expression)
p.advance()
expr := p.parseExpr()
p.expect(token.RPAREN)
return &ast.MacroExpr{AmpPos: ampPos, Expr: expr}
}
// &variable[.suffix] — variable can be a keyword name
ident := p.expectMethodName()
macroExpr := &ast.MacroExpr{
AmpPos: ampPos,
Expr: &ast.IdentExpr{NamePos: ident.Pos, Name: ident.Literal},
}
// &var.suffix — dot terminates macro, suffix is text concatenation
// &var. — dot terminates macro with no suffix
// &var.1 — lexer may tokenize .1 as DOUBLE
if p.at(token.DOT) {
p.advance() // consume .
// Skip optional suffix identifier (e.g. &a.aa, &a.1)
if p.current.Kind == token.IDENT || p.current.Kind == token.INT {
p.advance()
}
} else if p.current.Kind == token.DOUBLE &&
(strings.HasPrefix(p.current.Literal, ".") || strings.HasPrefix(p.current.Literal, "0.")) {
// Lexer tokenized .1 as DOUBLE — consume it as macro suffix
p.advance()
}
return macroExpr
}
// parseIIF parses IIF(cond, trueExpr, falseExpr).
func (p *Parser) parseIIF() ast.Expr {
pos := p.advance().Pos // consume IIF
p.expect(token.LPAREN)
cond := p.parseExpr()
p.expect(token.COMMA)
var trueExpr ast.Expr
if p.at(token.COMMA) || p.at(token.RPAREN) {
trueExpr = &ast.LiteralExpr{ValuePos: p.current.Pos, Kind: token.NIL_LIT, Value: "NIL"}
} else {
trueExpr = p.parseExpr()
}
p.expect(token.COMMA)
var falseExpr ast.Expr
if p.at(token.RPAREN) {
falseExpr = &ast.LiteralExpr{ValuePos: p.current.Pos, Kind: token.NIL_LIT, Value: "NIL"}
} else {
falseExpr = p.parseExpr()
}
p.expect(token.RPAREN)
return &ast.IIfExpr{IfPos: pos, Cond: cond, True: trueExpr, False: falseExpr}
}
// parseExprList parses a comma-separated list of expressions.
func (p *Parser) parseExprList() []ast.Expr {
var list []ast.Expr
// Handle leading empty param: f(,x) → NIL, x
if p.at(token.COMMA) {
list = append(list, &ast.LiteralExpr{ValuePos: p.current.Pos, Kind: token.NIL_LIT, Value: "NIL"})
} else {
list = append(list, p.parseExpr())
}
for p.match(token.COMMA) {
// Empty param: f(x,,y) → x, NIL, y
if p.at(token.COMMA) || p.at(token.RPAREN) || p.at(token.RBRACE) {
list = append(list, &ast.LiteralExpr{ValuePos: p.current.Pos, Kind: token.NIL_LIT, Value: "NIL"})
} else {
list = append(list, p.parseExpr())
}
}
return list
}
// isSliceSyntax checks if current position inside [...] has a : before ].
// Limited lookahead — scans at most 10 tokens (covers 99% of real cases).
func (p *Parser) isSliceSyntax() bool {
depth := 0
maxLook := 10 // limit scan to avoid O(n)
for i := 0; i < maxLook; i++ {
k := p.peekAt(i)
switch k {
case token.COLON:
if depth == 0 {
return true
}
case token.LBRACKET, token.LPAREN, token.LBRACE:
depth++
case token.RPAREN, token.RBRACE:
depth--
case token.RBRACKET:
if depth == 0 {
return false
}
depth--
case token.NEWLINE, token.EOF:
return false
}
}
return false // too complex — treat as normal index
}
// parseSliceIndex parses expression inside slice but stops at : and ]
func (p *Parser) parseSliceIndex() ast.Expr {
return p.parsePrimaryExpr() // simple: just primary (number, ident, call)
}
// parseInterpolatedString: f"Hello {name}, age {age}"
// Parses the format string and extracts {expr} references.
// Converts to: fmt.Sprintf("Hello %v, age %v", name, age)
// --- Extracted helpers for expression registry ---
// parsePostfixSend: x:method or x:method(args...)
func (p *Parser) parsePostfixSend(x ast.Expr) ast.Expr {
colonPos := p.advance().Pos
var methodName string
var macroMethod ast.Expr
if p.current.Kind == token.AMPERSAND {
macroMethod = p.parseMacro()
} else {
methodName = p.expectMethodName().Literal
}
var args []ast.Expr
var lp, rp token.Position
hasParens := false
if p.at(token.LPAREN) {
hasParens = true
lp = p.advance().Pos
if !p.at(token.RPAREN) {
args = p.parseExprList()
}
rp = p.expect(token.RPAREN).Pos
}
return &ast.SendExpr{
Object: x, ColonPos: colonPos,
Method: methodName, MacroMethod: macroMethod,
HasParens: hasParens,
LParen: lp, Args: args, RParen: rp,
}
}
// parsePrimaryIdent: IDENT (variable, function ref, IIF, f-string)
func (p *Parser) parsePrimaryIdent() ast.Expr {
if strings.ToUpper(p.current.Literal) == "IIF" {
return p.parseIIF()
}
if p.current.Literal == "f" && p.peekAt(1) == token.STRING {
return p.parseInterpolatedString()
}
tok := p.advance()
return &ast.IdentExpr{NamePos: tok.Pos, Name: tok.Literal}
}
// parsePrimaryWithSend: :field (WITH OBJECT bare colon)
func (p *Parser) parsePrimaryWithSend() ast.Expr {
pos := p.advance().Pos
if p.at(token.IDENT) || p.current.Literal != "" {
name := p.advance()
return &ast.SendExpr{
Object: &ast.IdentExpr{NamePos: pos, Name: "__withObject"},
ColonPos: pos,
Method: name.Literal,
}
}
return &ast.IdentExpr{NamePos: pos, Name: "__withObject"}
}
// parsePrimarySelf: ::name or ::name(args)
func (p *Parser) parsePrimarySelf() ast.Expr {
pos := p.advance().Pos
if p.at(token.IDENT) || p.current.Literal != "" {
name := p.advance()
self := &ast.SelfExpr{ColonPos: pos}
hasParens := false
var args []ast.Expr
var lp, rp token.Position
if p.at(token.LPAREN) {
hasParens = true
lp = p.advance().Pos
if !p.at(token.RPAREN) {
args = p.parseExprList()
}
rp = p.expect(token.RPAREN).Pos
}
return &ast.SendExpr{
Object: self, ColonPos: pos, Method: name.Literal,
HasParens: hasParens, LParen: lp, Args: args, RParen: rp,
}
}
return &ast.SelfExpr{ColonPos: pos}
}
func (p *Parser) parseInterpolatedString() ast.Expr {
fPos := p.advance().Pos // consume 'f'
strTok := p.expect(token.STRING)
src := strTok.Literal
var parts []ast.Expr
var fmtBuf string
var args []ast.Expr
i := 0
for i < len(src) {
if src[i] == '{' {
// Find closing }
j := i + 1
depth := 1
for j < len(src) && depth > 0 {
if src[j] == '{' { depth++ }
if src[j] == '}' { depth-- }
j++
}
exprStr := src[i+1 : j-1]
// Check for format spec: {expr:fmt}
fmtSpec := "%v"
if colonIdx := strings.LastIndex(exprStr, ":"); colonIdx >= 0 {
fmtSpec = "%" + exprStr[colonIdx+1:]
exprStr = exprStr[:colonIdx]
}
fmtBuf += fmtSpec
// Parse the expression inside {}
// Simple: just use IdentExpr for variable names
args = append(args, &ast.IdentExpr{NamePos: fPos, Name: exprStr})
i = j
} else {
fmtBuf += string(src[i])
i++
}
}
if len(args) == 0 {
// No interpolation — return as plain string
return &ast.LiteralExpr{ValuePos: fPos, Kind: token.STRING, Value: src}
}
// Build: fmt.Sprintf(fmtStr, arg1, arg2, ...)
_ = parts // not used in Sprintf approach
allArgs := make([]ast.Expr, 0, len(args)+1)
allArgs = append(allArgs, &ast.LiteralExpr{ValuePos: fPos, Kind: token.STRING, Value: fmtBuf})
allArgs = append(allArgs, args...)
return &ast.CallExpr{
Func: &ast.DotExpr{
X: &ast.IdentExpr{NamePos: fPos, Name: "fmt"},
DotPos: fPos,
Member: "Sprintf",
},
LParen: fPos,
Args: allArgs,
RParen: fPos,
}
}

258
compiler/parser/exprreg.go Normal file
View File

@@ -0,0 +1,258 @@
// Copyright (c) 2026 Charles KWON OhJun (charleskwonohjun@gmail.com)
// All rights reserved.
// exprreg.go — Expression parser registries for Pratt parser.
//
// Three registries:
// prefixParsers — unary prefix: -, !, ++, --, <-, ASYNC, AWAIT
// postfixParsers — postfix: (), [], :, ., ?:, ++, --, ->
// primaryParsers — atoms: INT, STRING, IDENT, (, {, ::
//
// Adding a new operator = one line in init().
package parser
import (
"five/compiler/ast"
"five/compiler/token"
)
// PrefixParser parses a prefix unary expression.
type PrefixParser func(p *Parser) ast.Expr
// PostfixParser parses a postfix expression given the left-hand side.
type PostfixParser func(p *Parser, x ast.Expr) ast.Expr
// PrimaryParser parses an atomic/primary expression.
type PrimaryParser func(p *Parser) ast.Expr
var (
prefixParsers map[token.Kind]PrefixParser
postfixParsers map[token.Kind]PostfixParser
primaryParsers map[token.Kind]PrimaryParser
)
func init() {
prefixParsers = map[token.Kind]PrefixParser{
token.MINUS: prefixUnary(token.MINUS),
token.PLUS: prefixPlus,
token.NOT: prefixUnary(token.NOT),
token.INC: prefixUnary(token.INC),
token.DEC: prefixUnary(token.DEC),
token.ARROW_LEFT: prefixChanRecv,
token.ASYNC_KW: prefixAsync,
token.AWAIT_KW: prefixAwait,
token.AT: prefixRef,
}
postfixParsers = map[token.Kind]PostfixParser{
token.LPAREN: postfixCall,
token.LBRACKET: postfixIndex,
token.COLON: postfixSend,
token.QMARK: postfixNilSafe,
token.DOT: postfixDot,
token.ARROW: postfixAlias,
token.INC: postfixIncDec(token.INC),
token.DEC: postfixIncDec(token.DEC),
token.COLONCOLON: postfixSelfStop,
}
primaryParsers = map[token.Kind]PrimaryParser{
token.INT: primaryLiteral,
token.LONG: primaryLiteral,
token.DOUBLE: primaryLiteral,
token.STRING: primaryLiteral,
token.DATE_LIT: primaryLiteral,
token.TRUE: primaryLiteral,
token.FALSE: primaryLiteral,
token.NIL_LIT: primaryLiteral,
token.COLONCOLON: primarySelf,
token.LPAREN: primaryParen,
token.IF: primaryIf,
token.IDENT: primaryIdent,
token.AMPERSAND: primaryMacro,
token.COLON: primaryWithSend,
token.LBRACE: primaryArrayOrBlock,
}
}
// --- Prefix parsers ---
func prefixUnary(op token.Kind) PrefixParser {
return func(p *Parser) ast.Expr {
tok := p.advance()
x := p.parseUnaryExpr()
return &ast.UnaryExpr{OpPos: tok.Pos, Op: op, X: x}
}
}
func prefixPlus(p *Parser) ast.Expr {
p.advance() // unary plus — no-op
return p.parseUnaryExpr()
}
func prefixChanRecv(p *Parser) ast.Expr {
pos := p.advance().Pos
ch := p.parsePostfixExpr()
return &ast.ChanRecvExpr{ArrowPos: pos, Chan: ch}
}
func prefixAsync(p *Parser) ast.Expr {
pos := p.advance().Pos
call := p.parsePostfixExpr()
return &ast.AsyncExpr{AsyncPos: pos, Call: call}
}
func prefixAwait(p *Parser) ast.Expr {
pos := p.advance().Pos
future := p.parsePostfixExpr()
return &ast.AwaitExpr{AwaitPos: pos, Future: future}
}
func prefixRef(p *Parser) ast.Expr {
op := p.advance()
x := p.parseUnaryExpr()
return &ast.RefExpr{AtPos: op.Pos, X: x}
}
// --- Postfix parsers ---
func postfixCall(p *Parser, x ast.Expr) ast.Expr {
lp := p.advance().Pos
var args []ast.Expr
if !p.at(token.RPAREN) {
args = p.parseExprList()
}
rp := p.expect(token.RPAREN).Pos
return &ast.CallExpr{Func: x, LParen: lp, Args: args, RParen: rp}
}
func postfixIndex(p *Parser, x ast.Expr) ast.Expr {
lb := p.advance().Pos
// Slice syntax detection
if p.isSliceSyntax() {
var low, high ast.Expr
if !p.at(token.COLON) {
low = p.parseSliceIndex()
}
p.expect(token.COLON)
if !p.at(token.RBRACKET) {
high = p.parseSliceIndex()
}
rb := p.expect(token.RBRACKET).Pos
return &ast.SliceExpr{X: x, LBracket: lb, Low: low, High: high, RBracket: rb}
}
// Normal array index
index := p.parseExpr()
rb := token.Position{}
for p.match(token.COMMA) {
rb = p.current.Pos
x = &ast.IndexExpr{X: x, LBracket: lb, Index: index, RBracket: rb}
index = p.parseExpr()
lb = rb
}
rb = p.expect(token.RBRACKET).Pos
return &ast.IndexExpr{X: x, LBracket: lb, Index: index, RBracket: rb}
}
func postfixDot(p *Parser, x ast.Expr) ast.Expr {
if p.peekLitAt(1) != "" {
dotPos := p.advance().Pos
member := p.advance()
return &ast.DotExpr{X: x, DotPos: dotPos, Member: member.Literal}
}
return nil // signal: stop postfix loop
}
func postfixIncDec(op token.Kind) PostfixParser {
return func(p *Parser, x ast.Expr) ast.Expr {
opPos := p.advance().Pos
return &ast.PostfixExpr{X: x, OpPos: opPos, Op: op}
}
}
func postfixSelfStop(p *Parser, x ast.Expr) ast.Expr {
return nil // :: after expression — stop
}
// postfixNilSafe and postfixSend/postfixAlias are complex — kept in expr.go
// They call back into the main parser methods.
func postfixNilSafe(p *Parser, x ast.Expr) ast.Expr {
if p.peekAt(1) != token.COLON {
return nil // bare ? = QOut, not postfix
}
p.advance() // consume ?
qpos := p.advance().Pos // consume :
methodName := p.expectMethodName().Literal
var args []ast.Expr
hasParens := false
if p.at(token.LPAREN) {
hasParens = true
p.advance()
if !p.at(token.RPAREN) {
args = p.parseExprList()
}
p.expect(token.RPAREN)
}
return &ast.NilSafeExpr{X: x, QPos: qpos, Method: methodName, Args: args, HasParens: hasParens}
}
func postfixAlias(p *Parser, x ast.Expr) ast.Expr {
arrowPos := p.advance().Pos
field := p.parsePrimaryExpr()
return &ast.AliasExpr{Alias: x, ArrowPos: arrowPos, Field: field}
}
func postfixSend(p *Parser, x ast.Expr) ast.Expr {
return p.parsePostfixSend(x)
}
// --- Primary parsers ---
func primaryLiteral(p *Parser) ast.Expr {
tok := p.advance()
return &ast.LiteralExpr{ValuePos: tok.Pos, Kind: tok.Kind, Value: tok.Literal}
}
func primaryParen(p *Parser) ast.Expr {
p.advance()
expr := p.parseExpr()
for p.match(token.COMMA) {
expr = p.parseExpr()
}
p.expect(token.RPAREN)
return expr
}
func primaryIf(p *Parser) ast.Expr {
if p.peekAt(1) == token.LPAREN {
return p.parseIIF()
}
p.error("expected expression, got IF")
tok := p.advance()
return &ast.LiteralExpr{ValuePos: tok.Pos, Kind: token.NIL_LIT, Value: "NIL"}
}
func primaryIdent(p *Parser) ast.Expr {
return p.parsePrimaryIdent()
}
func primaryMacro(p *Parser) ast.Expr {
return p.parseMacro()
}
func primaryWithSend(p *Parser) ast.Expr {
return p.parsePrimaryWithSend()
}
func primaryArrayOrBlock(p *Parser) ast.Expr {
return p.parseArrayOrBlock()
}
func primarySelf(p *Parser) ast.Expr {
return p.parsePrimarySelf()
}

2162
compiler/parser/parser.go Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,427 @@
// Copyright (c) 2026 Charles KWON OhJun (charleskwonohjun@gmail.com)
// All rights reserved.
package parser
import (
"five/compiler/ast"
"five/compiler/token"
"testing"
)
func parseOK(t *testing.T, source string) *ast.File {
t.Helper()
file, errs := Parse("test.prg", source)
if len(errs) > 0 {
for _, e := range errs {
t.Errorf("parse error: %s", e)
}
t.FailNow()
}
return file
}
// --- Function declaration ---
func TestParseSimpleFunction(t *testing.T) {
file := parseOK(t, `FUNCTION Main()
RETURN NIL
`)
if len(file.Decls) != 1 {
t.Fatalf("expected 1 decl, got %d", len(file.Decls))
}
fn, ok := file.Decls[0].(*ast.FuncDecl)
if !ok {
t.Fatalf("expected FuncDecl, got %T", file.Decls[0])
}
if fn.Name != "Main" {
t.Errorf("name = %q, want %q", fn.Name, "Main")
}
if fn.IsProc {
t.Error("should not be PROCEDURE")
}
}
func TestParseFunctionWithLocals(t *testing.T) {
file := parseOK(t, `FUNCTION Foo(a, b)
LOCAL n := 10
LOCAL cName := "hello", x
RETURN n
`)
fn := file.Decls[0].(*ast.FuncDecl)
if len(fn.Params) != 2 {
t.Errorf("params = %d, want 2", len(fn.Params))
}
if len(fn.Decls) != 2 {
t.Errorf("decls = %d, want 2 (two LOCAL statements)", len(fn.Decls))
}
// Check second LOCAL has 2 vars
vd := fn.Decls[1].(*ast.VarDecl)
if len(vd.Vars) != 2 {
t.Errorf("second LOCAL vars = %d, want 2", len(vd.Vars))
}
}
func TestParseProcedure(t *testing.T) {
file := parseOK(t, `PROCEDURE DoStuff()
RETURN
`)
fn := file.Decls[0].(*ast.FuncDecl)
if !fn.IsProc {
t.Error("should be PROCEDURE")
}
}
// --- Expressions ---
func TestParseArithmetic(t *testing.T) {
file := parseOK(t, `FUNCTION Main()
RETURN 1 + 2 * 3
`)
fn := file.Decls[0].(*ast.FuncDecl)
ret := fn.Body[0].(*ast.ReturnStmt)
// Should be: 1 + (2 * 3) due to precedence
bin, ok := ret.Value.(*ast.BinaryExpr)
if !ok {
t.Fatalf("expected BinaryExpr, got %T", ret.Value)
}
if bin.Op != token.PLUS {
t.Errorf("top op = %v, want PLUS", bin.Op)
}
// Right side should be 2 * 3
right, ok := bin.Right.(*ast.BinaryExpr)
if !ok {
t.Fatalf("right should be BinaryExpr, got %T", bin.Right)
}
if right.Op != token.STAR {
t.Errorf("right op = %v, want STAR", right.Op)
}
}
func TestParseAssignment(t *testing.T) {
file := parseOK(t, `FUNCTION Main()
LOCAL n
n := 10
RETURN n
`)
fn := file.Decls[0].(*ast.FuncDecl)
// Body[0] should be assignment: n := 10
es := fn.Body[0].(*ast.ExprStmt)
assign, ok := es.X.(*ast.AssignExpr)
if !ok {
t.Fatalf("expected AssignExpr, got %T", es.X)
}
if assign.Op != token.ASSIGN {
t.Errorf("assign op = %v, want ASSIGN", assign.Op)
}
}
func TestParseFunctionCall(t *testing.T) {
file := parseOK(t, `FUNCTION Main()
RETURN Str(42)
`)
fn := file.Decls[0].(*ast.FuncDecl)
ret := fn.Body[0].(*ast.ReturnStmt)
call, ok := ret.Value.(*ast.CallExpr)
if !ok {
t.Fatalf("expected CallExpr, got %T", ret.Value)
}
ident := call.Func.(*ast.IdentExpr)
if ident.Name != "Str" {
t.Errorf("func name = %q, want Str", ident.Name)
}
if len(call.Args) != 1 {
t.Errorf("args = %d, want 1", len(call.Args))
}
}
func TestParseStringConcat(t *testing.T) {
file := parseOK(t, `FUNCTION Main()
RETURN "Hello, " + "World!"
`)
fn := file.Decls[0].(*ast.FuncDecl)
ret := fn.Body[0].(*ast.ReturnStmt)
bin := ret.Value.(*ast.BinaryExpr)
if bin.Op != token.PLUS {
t.Errorf("op = %v, want PLUS", bin.Op)
}
}
// --- Control flow ---
func TestParseIfElse(t *testing.T) {
file := parseOK(t, `FUNCTION Main()
LOCAL n := 10
IF n > 5
RETURN .T.
ELSE
RETURN .F.
ENDIF
`)
fn := file.Decls[0].(*ast.FuncDecl)
ifStmt, ok := fn.Body[0].(*ast.IfStmt)
if !ok {
t.Fatalf("expected IfStmt, got %T", fn.Body[0])
}
if len(ifStmt.Body) != 1 {
t.Errorf("if body = %d stmts", len(ifStmt.Body))
}
if len(ifStmt.ElseBody) != 1 {
t.Errorf("else body = %d stmts", len(ifStmt.ElseBody))
}
}
func TestParseIfElseIf(t *testing.T) {
file := parseOK(t, `FUNCTION Main()
LOCAL n := 10
IF n > 10
RETURN 1
ELSEIF n > 5
RETURN 2
ELSEIF n > 0
RETURN 3
ELSE
RETURN 0
ENDIF
`)
fn := file.Decls[0].(*ast.FuncDecl)
ifStmt := fn.Body[0].(*ast.IfStmt)
if len(ifStmt.ElseIfs) != 2 {
t.Errorf("elseifs = %d, want 2", len(ifStmt.ElseIfs))
}
}
func TestParseDoWhile(t *testing.T) {
file := parseOK(t, `FUNCTION Main()
LOCAL i := 0
DO WHILE i < 10
i++
ENDDO
RETURN i
`)
fn := file.Decls[0].(*ast.FuncDecl)
dw, ok := fn.Body[0].(*ast.DoWhileStmt)
if !ok {
t.Fatalf("expected DoWhileStmt, got %T", fn.Body[0])
}
if len(dw.Body) != 1 {
t.Errorf("body = %d stmts", len(dw.Body))
}
}
func TestParseForNext(t *testing.T) {
file := parseOK(t, `FUNCTION Main()
LOCAL i
FOR i := 1 TO 10
? i
NEXT
RETURN NIL
`)
fn := file.Decls[0].(*ast.FuncDecl)
forStmt, ok := fn.Body[0].(*ast.ForStmt)
if !ok {
t.Fatalf("expected ForStmt, got %T", fn.Body[0])
}
if forStmt.Var != "i" {
t.Errorf("var = %q, want i", forStmt.Var)
}
}
func TestParseForEach(t *testing.T) {
file := parseOK(t, `FUNCTION Main()
LOCAL x
FOR EACH x IN {1, 2, 3}
? x
NEXT
RETURN NIL
`)
fn := file.Decls[0].(*ast.FuncDecl)
fe, ok := fn.Body[0].(*ast.ForEachStmt)
if !ok {
t.Fatalf("expected ForEachStmt, got %T", fn.Body[0])
}
if fe.Var != "x" {
t.Errorf("var = %q, want x", fe.Var)
}
}
// --- QOut ---
func TestParseQOut(t *testing.T) {
file := parseOK(t, `FUNCTION Main()
? "Hello"
? 1 + 2, "World"
RETURN NIL
`)
fn := file.Decls[0].(*ast.FuncDecl)
q1, ok := fn.Body[0].(*ast.QOutStmt)
if !ok {
t.Fatalf("expected QOutStmt, got %T", fn.Body[0])
}
if len(q1.Exprs) != 1 {
t.Errorf("? args = %d, want 1", len(q1.Exprs))
}
q2 := fn.Body[1].(*ast.QOutStmt)
if len(q2.Exprs) != 2 {
t.Errorf("? args = %d, want 2", len(q2.Exprs))
}
}
// --- xBase commands ---
func TestParseUse(t *testing.T) {
file := parseOK(t, `FUNCTION Main()
USE "customers" VIA DBFCDX ALIAS cust
RETURN NIL
`)
fn := file.Decls[0].(*ast.FuncDecl)
use, ok := fn.Body[0].(*ast.UseCmd)
if !ok {
t.Fatalf("expected UseCmd, got %T", fn.Body[0])
}
if use.Via != "DBFCDX" {
t.Errorf("via = %q, want DBFCDX", use.Via)
}
if use.Alias != "cust" {
t.Errorf("alias = %q, want cust", use.Alias)
}
}
func TestParseGoTop(t *testing.T) {
file := parseOK(t, `FUNCTION Main()
GO TOP
RETURN NIL
`)
fn := file.Decls[0].(*ast.FuncDecl)
goCmd, ok := fn.Body[0].(*ast.GoCmd)
if !ok {
t.Fatalf("expected GoCmd, got %T", fn.Body[0])
}
if goCmd.Direction != "TOP" {
t.Errorf("direction = %q, want TOP", goCmd.Direction)
}
}
func TestParseSeek(t *testing.T) {
file := parseOK(t, `FUNCTION Main()
SEEK "SMITH"
RETURN NIL
`)
fn := file.Decls[0].(*ast.FuncDecl)
seek, ok := fn.Body[0].(*ast.SeekCmd)
if !ok {
t.Fatalf("expected SeekCmd, got %T", fn.Body[0])
}
lit := seek.Key.(*ast.LiteralExpr)
if lit.Value != "SMITH" {
t.Errorf("key = %q, want SMITH", lit.Value)
}
}
func TestParseReplace(t *testing.T) {
file := parseOK(t, `FUNCTION Main()
REPLACE name WITH "Kim", salary WITH 50000
RETURN NIL
`)
fn := file.Decls[0].(*ast.FuncDecl)
rep, ok := fn.Body[0].(*ast.ReplaceCmd)
if !ok {
t.Fatalf("expected ReplaceCmd, got %T", fn.Body[0])
}
if len(rep.Fields) != 2 {
t.Errorf("fields = %d, want 2", len(rep.Fields))
}
}
// --- Array and Hash literals ---
func TestParseArrayLiteral(t *testing.T) {
file := parseOK(t, `FUNCTION Main()
RETURN {1, 2, 3}
`)
fn := file.Decls[0].(*ast.FuncDecl)
ret := fn.Body[0].(*ast.ReturnStmt)
arr, ok := ret.Value.(*ast.ArrayLitExpr)
if !ok {
t.Fatalf("expected ArrayLitExpr, got %T", ret.Value)
}
if len(arr.Items) != 3 {
t.Errorf("items = %d, want 3", len(arr.Items))
}
}
func TestParseHashLiteral(t *testing.T) {
file := parseOK(t, `FUNCTION Main()
RETURN {"a" => 1, "b" => 2}
`)
fn := file.Decls[0].(*ast.FuncDecl)
ret := fn.Body[0].(*ast.ReturnStmt)
hash, ok := ret.Value.(*ast.HashLitExpr)
if !ok {
t.Fatalf("expected HashLitExpr, got %T", ret.Value)
}
if len(hash.Keys) != 2 {
t.Errorf("keys = %d, want 2", len(hash.Keys))
}
}
func TestParseCodeBlock(t *testing.T) {
file := parseOK(t, `FUNCTION Main()
RETURN {|x| x + 1}
`)
fn := file.Decls[0].(*ast.FuncDecl)
ret := fn.Body[0].(*ast.ReturnStmt)
blk, ok := ret.Value.(*ast.BlockExpr)
if !ok {
t.Fatalf("expected BlockExpr, got %T", ret.Value)
}
if len(blk.Params) != 1 || blk.Params[0] != "x" {
t.Errorf("params = %v, want [x]", blk.Params)
}
}
// --- IMPORT ---
func TestParseImport(t *testing.T) {
file := parseOK(t, `IMPORT "net/http"
FUNCTION Main()
RETURN NIL
`)
if len(file.Imports) != 1 {
t.Fatalf("imports = %d, want 1", len(file.Imports))
}
if file.Imports[0].Path != "net/http" {
t.Errorf("import path = %q, want net/http", file.Imports[0].Path)
}
}
// --- Full program ---
func TestParseFullProgram(t *testing.T) {
src := `FUNCTION Main()
LOCAL nSum := 0, i
FOR i := 1 TO 10
nSum += i
NEXT
? "Sum =", nSum
IF nSum > 50
? "Big"
ELSE
? "Small"
ENDIF
RETURN nSum
`
file := parseOK(t, src)
fn := file.Decls[0].(*ast.FuncDecl)
if fn.Name != "Main" {
t.Errorf("name = %q", fn.Name)
}
if len(fn.Decls) != 1 {
t.Errorf("decls = %d, want 1 (LOCAL)", len(fn.Decls))
}
// Body: FOR + ? + IF + RETURN
if len(fn.Body) < 3 {
t.Errorf("body stmts = %d, want at least 3", len(fn.Body))
}
}

287
compiler/parser/stmtreg.go Normal file
View File

@@ -0,0 +1,287 @@
// Copyright (c) 2026 Charles KWON OhJun (charleskwonohjun@gmail.com)
// All rights reserved.
// stmtreg.go — Statement parser registry.
//
// Instead of a 800+ line switch in parseStmt(), each statement type
// registers its parser function. New statements can be added by
// simply adding one line to initStmtRegistry().
//
// Pattern: token.Kind → func(*Parser) ast.Stmt
package parser
import (
"five/compiler/ast"
"five/compiler/token"
"strings"
)
// StmtParser is a function that parses a statement starting with the current token.
type StmtParser func(p *Parser) ast.Stmt
// stmtRegistry maps token kinds to their statement parsers.
var stmtRegistry map[token.Kind]StmtParser
func init() {
stmtRegistry = map[token.Kind]StmtParser{
// Control flow
token.IF: (*Parser).stmtIf,
token.DO: (*Parser).stmtDo,
token.WHILE: (*Parser).stmtWhile,
token.FOR: (*Parser).stmtFor,
token.BEGIN: (*Parser).stmtBegin,
token.SWITCH: (*Parser).stmtSwitch,
token.RETURN: (*Parser).stmtReturn,
token.EXIT: (*Parser).stmtExit,
token.LOOP: (*Parser).stmtLoop,
// I/O
token.QMARK: (*Parser).stmtQOut,
token.QQMARK: (*Parser).stmtQQOut,
// Variables
token.PRIVATE: (*Parser).stmtPrivate,
token.PUBLIC: (*Parser).stmtPublic,
token.LOCAL: (*Parser).stmtVarDecl,
token.STATIC: (*Parser).stmtVarDecl,
token.PARAMETERS: (*Parser).stmtParameters,
token.DECLARE: (*Parser).stmtDeclare,
// xBase database
token.USE: (*Parser).stmtUse,
token.SELECT: (*Parser).stmtSelect,
token.GO: (*Parser).stmtGo,
token.GOTO: (*Parser).stmtGo,
token.SKIP_KW: (*Parser).stmtSkip,
token.SEEK: (*Parser).stmtSeek,
token.REPLACE: (*Parser).stmtReplace,
token.APPEND: (*Parser).stmtAppend,
token.DELETE_KW: (*Parser).stmtDelete,
token.RECALL: (*Parser).stmtRecallPackZap,
token.PACK: (*Parser).stmtRecallPackZap,
token.ZAP: (*Parser).stmtRecallPackZap,
token.INDEX: (*Parser).stmtIndex,
token.SET: (*Parser).stmtSet,
// Screen
token.AT: (*Parser).stmtAt,
// Five Go extensions
token.DEFER_KW: (*Parser).stmtDefer,
token.CONST_KW: (*Parser).stmtConst,
token.WATCH_KW: (*Parser).stmtWatch,
token.WITH: (*Parser).stmtWith,
token.PARALLEL_KW: (*Parser).stmtParallel,
token.SPAWN_KW: (*Parser).stmtSpawn,
token.ARROW_LEFT: (*Parser).stmtArrowLeft,
}
}
// lookupStmtParser finds a registered parser for the current token.
func (p *Parser) lookupStmtParser() StmtParser {
if fn, ok := stmtRegistry[p.current.Kind]; ok {
return fn
}
return nil
}
// --- Thin wrappers: each calls the existing parse method ---
func (p *Parser) stmtIf() ast.Stmt {
if p.peekAt(1) == token.LPAREN {
if p.looksLikeIIF() {
return p.parseExprStmt()
}
}
return p.parseIf()
}
func (p *Parser) stmtDo() ast.Stmt {
if p.peekAt(1) == token.LPAREN {
p.tokens[p.pos].Kind = token.IDENT
p.tokens[p.pos].Literal = "Do"
p.current = p.tokens[p.pos]
return p.parseExprStmt()
}
if p.peekAt(1) == token.CASE || token.LookupKeyword(p.peekLitAt(1)) == token.CASE {
return p.parseDoCase()
}
if p.peekAt(1) == token.WHILE {
return p.parseDoWhile()
}
if p.peekAt(1) == token.IDENT {
return p.parseDoProc()
}
return p.parseDoWhile()
}
func (p *Parser) stmtWhile() ast.Stmt {
if p.peekAt(1) == token.LPAREN {
p.tokens[p.pos].Kind = token.IDENT
p.tokens[p.pos].Literal = "While"
p.current = p.tokens[p.pos]
return p.parseExprStmt()
}
return p.parseDoWhile()
}
func (p *Parser) stmtFor() ast.Stmt {
next := p.peekAt(1)
if next == token.ASSIGN || next == token.LPAREN ||
next == token.PLUSEQ || next == token.MINUSEQ {
p.tokens[p.pos].Kind = token.IDENT
p.tokens[p.pos].Literal = "for"
p.current = p.tokens[p.pos]
return p.parseExprStmt()
}
return p.parseFor()
}
func (p *Parser) stmtBegin() ast.Stmt {
if p.peekAt(1) != token.SEQUENCE && p.peekAt(1) != token.NEWLINE && p.peekAt(1) != token.EOF {
p.tokens[p.pos].Kind = token.IDENT
p.tokens[p.pos].Literal = "begin"
p.current = p.tokens[p.pos]
return p.parseExprStmt()
}
return p.parseBeginSequence()
}
func (p *Parser) stmtSwitch() ast.Stmt { return p.parseSwitch() }
func (p *Parser) stmtReturn() ast.Stmt {
next := p.peekAt(1)
if next == token.ASSIGN || next == token.PLUSEQ || next == token.MINUSEQ {
p.tokens[p.pos].Kind = token.IDENT
p.tokens[p.pos].Literal = "return"
p.current = p.tokens[p.pos]
return p.parseExprStmt()
}
return p.parseReturn()
}
func (p *Parser) stmtExit() ast.Stmt {
pos := p.advance().Pos
return &ast.ExitStmt{ExitPos: pos}
}
func (p *Parser) stmtLoop() ast.Stmt {
pos := p.advance().Pos
return &ast.LoopStmt{LoopPos: pos}
}
func (p *Parser) stmtQOut() ast.Stmt { return p.parseQOut(false) }
func (p *Parser) stmtQQOut() ast.Stmt { return p.parseQOut(true) }
func (p *Parser) stmtPrivate() ast.Stmt { return p.parsePrivatePublic(ast.ScopePrivate) }
func (p *Parser) stmtPublic() ast.Stmt { return p.parsePrivatePublic(ast.ScopePublic) }
func (p *Parser) stmtVarDecl() ast.Stmt { return p.parseVarDecl() }
func (p *Parser) stmtParameters() ast.Stmt {
p.tokens[p.pos].Kind = token.LOCAL
p.current = p.tokens[p.pos]
return p.parseVarDecl()
}
func (p *Parser) stmtDeclare() ast.Stmt {
p.skipToEndOfLine()
p.expectEndOfStmt()
return &ast.ExprStmt{X: &ast.LiteralExpr{Kind: token.NIL_LIT, Value: "NIL"}}
}
func (p *Parser) stmtUse() ast.Stmt { return p.parseUse() }
func (p *Parser) stmtSelect() ast.Stmt { return p.parseSelect() }
func (p *Parser) stmtSkip() ast.Stmt { return p.parseSkip() }
func (p *Parser) stmtSeek() ast.Stmt { return p.parseSeek() }
func (p *Parser) stmtReplace() ast.Stmt { return p.parseReplace() }
func (p *Parser) stmtAppend() ast.Stmt { return p.parseAppend() }
func (p *Parser) stmtIndex() ast.Stmt { return p.parseIndex() }
func (p *Parser) stmtAt() ast.Stmt { return p.parseAtCmd() }
func (p *Parser) stmtGo() ast.Stmt {
if p.current.Kind == token.GO && p.peekAt(1) == token.LPAREN {
p.tokens[p.pos].Kind = token.IDENT
p.tokens[p.pos].Literal = "Go"
p.current = p.tokens[p.pos]
return p.parseExprStmt()
}
return p.parseGo()
}
func (p *Parser) stmtDelete() ast.Stmt {
pos := p.advance().Pos
if p.current.Kind == token.IDENT {
upper := strings.ToUpper(p.current.Literal)
if upper == "FILE" {
p.skipToEndOfLine()
p.expectEndOfStmt()
return &ast.ExprStmt{X: &ast.LiteralExpr{Kind: token.NIL_LIT, Value: "NIL"}}
}
if upper == "ALL" || upper == "TAG" {
p.skipToEndOfLine()
p.expectEndOfStmt()
return &ast.ExprStmt{X: &ast.LiteralExpr{Kind: token.NIL_LIT, Value: "NIL"}}
}
}
p.expectEndOfStmt()
return &ast.ExprStmt{X: &ast.CallExpr{
Func: &ast.IdentExpr{NamePos: pos, Name: "DbDelete"},
}}
}
func (p *Parser) stmtRecallPackZap() ast.Stmt {
tok := p.advance()
var fname string
switch tok.Kind {
case token.RECALL:
fname = "DbRecall"
case token.PACK:
fname = "__DbPack"
case token.ZAP:
fname = "__DbZap"
}
p.expectEndOfStmt()
return &ast.ExprStmt{X: &ast.CallExpr{
Func: &ast.IdentExpr{NamePos: tok.Pos, Name: fname},
}}
}
func (p *Parser) stmtSet() ast.Stmt {
// SET command — skip to EOL (SET COLOR, SET FILTER, SET ORDER, etc.)
p.skipToEndOfLine()
p.expectEndOfStmt()
return &ast.ExprStmt{X: &ast.LiteralExpr{Kind: token.NIL_LIT, Value: "NIL"}}
}
func (p *Parser) stmtDefer() ast.Stmt { return p.parseDefer() }
func (p *Parser) stmtConst() ast.Stmt { return p.parseConstBlock() }
func (p *Parser) stmtWatch() ast.Stmt { return p.parseWatch() }
func (p *Parser) stmtParallel() ast.Stmt { return p.parseParallelFor() }
func (p *Parser) stmtWith() ast.Stmt {
if p.peekAt(1) == token.TIMEOUT_KW {
return p.parseWithTimeout()
}
p.skipToEndOfLine()
p.expectEndOfStmt()
return &ast.ExprStmt{X: &ast.LiteralExpr{Kind: token.NIL_LIT, Value: "NIL"}}
}
func (p *Parser) stmtSpawn() ast.Stmt {
goPos := p.advance().Pos
block := p.parseArrayOrBlock()
if blk, ok := block.(*ast.BlockExpr); ok {
p.expectEndOfStmt()
return &ast.GoBlockStmt{GoPos: goPos, Block: blk}
}
p.expectEndOfStmt()
return &ast.ExprStmt{X: block}
}
func (p *Parser) stmtArrowLeft() ast.Stmt {
pos := p.advance().Pos
ch := p.parseExpr()
p.expectEndOfStmt()
return &ast.ExprStmt{X: &ast.ChanRecvExpr{ArrowPos: pos, Chan: ch}}
}

540
compiler/pp/command.go Normal file
View File

@@ -0,0 +1,540 @@
// Copyright (c) 2026 Charles KWON OhJun (charleskwonohjun@gmail.com)
// All rights reserved.
// #command / #translate implementation for Five preprocessor.
//
// Harbour PP syntax:
// #command PATTERN => RESULT
// #translate PATTERN => RESULT
// #xcommand PATTERN => RESULT (case-sensitive)
// #xtranslate PATTERN => RESULT (case-sensitive)
//
// Pattern markers:
// <x> — match any expression (regular match)
// <!x!> — match single identifier only (restricted match)
// <x,...> — match comma-separated list
// <*x*> — match rest of line (wild match)
// <x:a,b,c> — match one of listed words (list match)
// [...] — optional clause
//
// Result markers:
// <x> — substitute matched text
// <(x)> — stringify (wrap in quotes)
// <{x}> — blockify (wrap in {|| })
// #<x> — dumb stringify
// <.x.> — logify (.T. if matched, .F. if not)
//
// Reference: /mnt/d/harbour-core/src/pp/ppcore.c
package pp
import (
"strings"
)
// Rule represents a single #command or #translate rule.
type Rule struct {
Pattern string // raw pattern text
Result string // raw result text
IsCommand bool // #command vs #translate
CaseSens bool // #xcommand/#xtranslate = case sensitive
Keyword string // first keyword (for fast matching)
Markers []Marker // parsed pattern markers
ResultTmpl string // result template with marker references
}
// Marker represents a pattern marker like <x>, <!x!>, <x,...>, <*x*>.
type Marker struct {
Name string // marker name
Type MarkerType
ListValues []string // for <x:a,b,c> — allowed values
}
type MarkerType int
const (
MarkerRegular MarkerType = iota // <x> — any expression
MarkerRestricted // <!x!> — identifier only
MarkerList // <x,...> — comma-separated list
MarkerWild // <*x*> — rest of line
MarkerWordList // <x:a,b,c> — one of listed words
)
// ParseRule parses a #command/#translate directive into a Rule.
func ParseRule(directive string, isCommand, caseSens bool) *Rule {
// Split on =>
parts := strings.SplitN(directive, "=>", 2)
if len(parts) != 2 {
return nil
}
pattern := strings.TrimSpace(parts[0])
result := strings.TrimSpace(parts[1])
// Handle line continuation (;)
result = strings.ReplaceAll(result, " ;", "")
rule := &Rule{
Pattern: pattern,
Result: result,
IsCommand: isCommand,
CaseSens: caseSens,
ResultTmpl: result,
}
// Extract first keyword for fast matching
words := strings.Fields(pattern)
if len(words) > 0 {
kw := words[0]
// Remove marker brackets
kw = strings.TrimLeft(kw, "<[")
kw = strings.TrimRight(kw, ">]")
if !strings.ContainsAny(kw, "!*,:") {
rule.Keyword = kw
}
}
// Parse markers from pattern
rule.Markers = parseMarkers(pattern)
return rule
}
// parseMarkers extracts all <...> markers from a pattern.
func parseMarkers(pattern string) []Marker {
var markers []Marker
i := 0
for i < len(pattern) {
if pattern[i] == '<' {
end := strings.IndexByte(pattern[i:], '>')
if end < 0 {
break
}
inner := pattern[i+1 : i+end]
m := parseOneMarker(inner)
if m.Name != "" {
markers = append(markers, m)
}
i += end + 1
} else {
i++
}
}
return markers
}
func parseOneMarker(inner string) Marker {
inner = strings.TrimSpace(inner)
// <!name!> — restricted
if strings.HasPrefix(inner, "!") && strings.HasSuffix(inner, "!") {
return Marker{Name: inner[1 : len(inner)-1], Type: MarkerRestricted}
}
// <*name*> — wild
if strings.HasPrefix(inner, "*") && strings.HasSuffix(inner, "*") {
return Marker{Name: inner[1 : len(inner)-1], Type: MarkerWild}
}
// <name,...> — comma list
if strings.HasSuffix(inner, ",...") {
return Marker{Name: inner[:len(inner)-4], Type: MarkerList}
}
// <name:a,b,c> — word list
if idx := strings.IndexByte(inner, ':'); idx > 0 {
name := inner[:idx]
vals := strings.Split(inner[idx+1:], ",")
for i := range vals {
vals[i] = strings.TrimSpace(vals[i])
}
return Marker{Name: name, Type: MarkerWordList, ListValues: vals}
}
// <name> — regular
return Marker{Name: inner, Type: MarkerRegular}
}
// --- Rule matching and application ---
// MatchLine checks if a source line matches this rule and returns the substituted result.
// Returns ("", false) if no match.
func (r *Rule) MatchLine(line string) (string, bool) {
trimmed := strings.TrimSpace(line)
if trimmed == "" {
return "", false
}
// Fast keyword check
if r.Keyword != "" {
firstWord := firstToken(trimmed)
if r.CaseSens {
if firstWord != r.Keyword {
return "", false
}
} else {
if !strings.EqualFold(firstWord, r.Keyword) {
return "", false
}
}
}
// Try to match pattern against line
captures := r.matchPattern(trimmed)
if captures == nil {
return "", false
}
// Apply result template
result := r.applyResult(captures)
return result, true
}
// matchPattern attempts to match the pattern against a line.
// Returns captured values map, or nil if no match.
func (r *Rule) matchPattern(line string) map[string]string {
captures := make(map[string]string)
patternWords := tokenizePattern(r.Pattern)
lineWords := tokenizeLine(line)
pi, li := 0, 0
for pi < len(patternWords) && li < len(lineWords) {
pw := patternWords[pi]
// Marker?
if strings.HasPrefix(pw, "<") && strings.HasSuffix(pw, ">") {
inner := pw[1 : len(pw)-1]
m := parseOneMarker(inner)
switch m.Type {
case MarkerWild:
// Capture rest of line
rest := strings.Join(lineWords[li:], " ")
captures[m.Name] = rest
li = len(lineWords)
pi++
case MarkerList:
// Capture comma-separated items until next keyword
var items []string
for li < len(lineWords) {
if pi+1 < len(patternWords) && matchWord(lineWords[li], patternWords[pi+1], r.CaseSens) {
break
}
items = append(items, lineWords[li])
li++
}
captures[m.Name] = strings.Join(items, " ")
pi++
case MarkerWordList:
// Match one of listed words
matched := false
for _, allowed := range m.ListValues {
if r.CaseSens {
if lineWords[li] == allowed {
matched = true
break
}
} else if strings.EqualFold(lineWords[li], allowed) {
matched = true
break
}
}
if !matched {
return nil
}
captures[m.Name] = lineWords[li]
li++
pi++
default:
// Regular or restricted: capture one token or expression
captured := captureExpression(lineWords, &li, patternWords, pi+1, r.CaseSens)
captures[m.Name] = captured
pi++
}
} else if pw == "[" {
// Optional clause — skip to matching ]
depth := 1
pi++
for pi < len(patternWords) && depth > 0 {
if patternWords[pi] == "[" {
depth++
} else if patternWords[pi] == "]" {
depth--
}
pi++
}
} else if pw == "]" {
pi++
} else {
// Literal keyword — must match
if !matchWord(lineWords[li], pw, r.CaseSens) {
return nil
}
li++
pi++
}
}
// Skip remaining optional markers in pattern
for pi < len(patternWords) {
pw := patternWords[pi]
if pw == "[" || pw == "]" || (strings.HasPrefix(pw, "<") && strings.HasSuffix(pw, ">")) {
pi++
} else {
break
}
}
// For #command with no markers and no optional clauses:
// all line tokens must be consumed for a match
if r.IsCommand && li < len(lineWords) && len(r.Markers) == 0 &&
!strings.Contains(r.Pattern, "[") {
return nil
}
return captures
}
// applyResult substitutes captured values into the result template.
func (r *Rule) applyResult(captures map[string]string) string {
result := r.ResultTmpl
for name, val := range captures {
// <name> — direct substitution
result = strings.ReplaceAll(result, "<"+name+">", val)
// <(name)> — stringify
result = strings.ReplaceAll(result, "<("+name+")>", `"`+val+`"`)
// <.name.> — logify
if val != "" {
result = strings.ReplaceAll(result, "<."+name+".>", ".T.")
} else {
result = strings.ReplaceAll(result, "<."+name+".>", ".F.")
}
// #<name> — dumb stringify
result = strings.ReplaceAll(result, "#<"+name+">", `"`+val+`"`)
}
// Clean up unreferenced markers: <name>, <(name)>, <.name.>, #<name>, <"name">
result = cleanUnreferencedMarkers(result)
return result
}
// cleanUnreferencedMarkers removes any remaining <name>, <(name)>, <.name.>, #<name> references.
// Only removes well-formed PP marker references, not comparison operators.
func cleanUnreferencedMarkers(s string) string {
// Match patterns like <identifier>, <(identifier)>, <.identifier.>, #<identifier>
var out strings.Builder
i := 0
for i < len(s) {
removed := false
// #<name>
if s[i] == '#' && i+1 < len(s) && s[i+1] == '<' {
if end := findMarkerEnd(s, i+1); end > 0 {
i = end
removed = true
}
}
// <name>, <(name)>, <.name.>, <"name">
if !removed && s[i] == '<' {
if end := findMarkerEnd(s, i); end > 0 {
i = end
removed = true
}
}
if !removed {
out.WriteByte(s[i])
i++
}
}
return out.String()
}
// findMarkerEnd checks if s[start] begins a PP marker <name> and returns end position, or 0.
func findMarkerEnd(s string, start int) int {
if start >= len(s) || s[start] != '<' {
return 0
}
i := start + 1
// Skip optional ( or . prefix
if i < len(s) && (s[i] == '(' || s[i] == '.' || s[i] == '"') {
i++
}
// Must start with letter or underscore (identifier)
if i >= len(s) || !(s[i] >= 'a' && s[i] <= 'z' || s[i] >= 'A' && s[i] <= 'Z' || s[i] == '_') {
return 0
}
// Consume identifier
for i < len(s) && (s[i] >= 'a' && s[i] <= 'z' || s[i] >= 'A' && s[i] <= 'Z' || s[i] >= '0' && s[i] <= '9' || s[i] == '_') {
i++
}
// Skip optional ) or . or " or ,... suffix
for i < len(s) && (s[i] == ')' || s[i] == '.' || s[i] == '"' || s[i] == ',' || s[i] == ' ') {
i++
}
if i < len(s) && s[i] == '>' {
return i + 1
}
return 0
}
// --- Helpers ---
func firstToken(s string) string {
for i, c := range s {
if c == ' ' || c == '\t' || c == '(' {
return s[:i]
}
}
return s
}
func matchWord(lineWord, patternWord string, caseSens bool) bool {
if caseSens {
return lineWord == patternWord
}
return strings.EqualFold(lineWord, patternWord)
}
// tokenizePattern splits a pattern into words, keeping markers as single tokens.
func tokenizePattern(pattern string) []string {
var tokens []string
i := 0
for i < len(pattern) {
// Skip whitespace
for i < len(pattern) && (pattern[i] == ' ' || pattern[i] == '\t') {
i++
}
if i >= len(pattern) {
break
}
if pattern[i] == '<' {
// Find matching >
end := strings.IndexByte(pattern[i:], '>')
if end >= 0 {
tokens = append(tokens, pattern[i:i+end+1])
i += end + 1
continue
}
}
if pattern[i] == '[' {
tokens = append(tokens, "[")
i++
continue
}
if pattern[i] == ']' {
tokens = append(tokens, "]")
i++
continue
}
// Regular word
start := i
for i < len(pattern) && pattern[i] != ' ' && pattern[i] != '\t' &&
pattern[i] != '<' && pattern[i] != '[' && pattern[i] != ']' {
i++
}
if i > start {
tokens = append(tokens, pattern[start:i])
}
}
return tokens
}
// tokenizeLine splits a source line into words (keeping strings and parens together).
func tokenizeLine(line string) []string {
var tokens []string
i := 0
for i < len(line) {
for i < len(line) && (line[i] == ' ' || line[i] == '\t') {
i++
}
if i >= len(line) {
break
}
// String literal
if line[i] == '"' || line[i] == '\'' {
quote := line[i]
start := i
i++
for i < len(line) && line[i] != quote {
i++
}
if i < len(line) {
i++
}
tokens = append(tokens, line[start:i])
continue
}
// Comma (standalone token)
if line[i] == ',' {
tokens = append(tokens, ",")
i++
continue
}
// Word
start := i
for i < len(line) && line[i] != ' ' && line[i] != '\t' && line[i] != ',' {
if line[i] == '"' || line[i] == '\'' {
break
}
i++
}
if i > start {
tokens = append(tokens, line[start:i])
}
}
return tokens
}
// captureExpression captures an expression from line tokens.
// If this is the last marker in the pattern, captures all remaining tokens.
// Otherwise, captures until the next keyword in the pattern.
func captureExpression(lineWords []string, li *int, patternWords []string, nextPi int, caseSens bool) string {
if *li >= len(lineWords) {
return ""
}
// Find next literal keyword in pattern to use as delimiter
delimWord := ""
for pi := nextPi; pi < len(patternWords); pi++ {
pw := patternWords[pi]
if !strings.HasPrefix(pw, "<") && pw != "[" && pw != "]" {
delimWord = pw
break
}
}
if delimWord != "" {
// Capture until delimiter keyword
var parts []string
for *li < len(lineWords) {
if matchWord(lineWords[*li], delimWord, caseSens) {
break
}
parts = append(parts, lineWords[*li])
*li++
}
return strings.Join(parts, " ")
}
// No delimiter: if last marker, capture all remaining tokens
if nextPi >= len(patternWords) {
rest := strings.Join(lineWords[*li:], " ")
*li = len(lineWords)
return rest
}
// Single token capture (between markers)
tok := lineWords[*li]
*li++
return tok
}

189
compiler/pp/command_test.go Normal file
View File

@@ -0,0 +1,189 @@
// Copyright (c) 2026 Charles KWON OhJun (charleskwonohjun@gmail.com)
// All rights reserved.
package pp
import (
"strings"
"testing"
)
func TestCommandSimple(t *testing.T) {
p := New()
src := `#command CLS => @ 0,0 CLEAR
CLS`
result, _ := p.Process("test.prg", src)
if !strings.Contains(result, "@ 0,0 CLEAR") {
t.Errorf("CLS should expand to '@ 0,0 CLEAR', got: %q", result)
}
}
func TestCommandWithMarker(t *testing.T) {
p := New()
src := `#command SAY <text> => QOut( <text> )
SAY "Hello"`
result, _ := p.Process("test.prg", src)
if !strings.Contains(result, `QOut( "Hello" )`) {
t.Errorf("SAY should expand, got: %q", result)
}
}
func TestCommandWithMultipleMarkers(t *testing.T) {
p := New()
src := `#command STORE <val> TO <var> => <var> := <val>
STORE 42 TO myVar`
result, _ := p.Process("test.prg", src)
if !strings.Contains(result, "myVar := 42") {
t.Errorf("STORE should expand, got: %q", result)
}
}
func TestTranslateStringify(t *testing.T) {
p := New()
// Simple stringify without parentheses in pattern
src := `#translate ASSERT <expr> => __Assert( <(expr)>, <expr> )
ASSERT x > 10`
result, _ := p.Process("test.prg", src)
if !strings.Contains(result, `"x > 10"`) {
t.Errorf("stringify should produce quoted text, got: %q", result)
}
}
func TestCommandCaseInsensitive(t *testing.T) {
p := New()
src := `#command CLEAR SCREEN => @ 0,0 CLEAR
clear screen`
result, _ := p.Process("test.prg", src)
if !strings.Contains(result, "@ 0,0 CLEAR") {
t.Errorf("case insensitive match failed, got: %q", result)
}
}
func TestXtranslateCaseSensitive(t *testing.T) {
p := New()
// Without parentheses in pattern for simpler matching
src := `#xtranslate MYFUNC <x> => myFuncImpl( <x> )
MYFUNC 42
myfunc 99`
result, _ := p.Process("test.prg", src)
if !strings.Contains(result, "myFuncImpl( 42 )") {
t.Errorf("case-sensitive match should work, got: %q", result)
}
if strings.Contains(result, "myFuncImpl( 99 )") {
t.Error("case-sensitive should NOT match lowercase")
}
}
func TestCommandWordList(t *testing.T) {
p := New()
src := `#command SET DELETED <x:ON,OFF,&> => Set( _SET_DELETED, <(x)> )
SET DELETED ON`
result, _ := p.Process("test.prg", src)
if !strings.Contains(result, `Set( _SET_DELETED, "ON" )`) {
t.Errorf("word list match failed, got: %q", result)
}
}
func TestCommandWildcard(t *testing.T) {
p := New()
src := `#command NOTE <*x*> =>
NOTE This is a comment that should disappear`
result, _ := p.Process("test.prg", src)
trimmed := strings.TrimSpace(result)
if trimmed != "" {
t.Errorf("NOTE with wildcard should produce empty, got: %q", trimmed)
}
}
func TestCommandOptional(t *testing.T) {
p := New()
// Simpler optional test without comma-list
src := `#command DO <proc> => <proc>()
DO MyFunc`
result, _ := p.Process("test.prg", src)
if !strings.Contains(result, "MyFunc()") {
t.Errorf("DO MyFunc should expand to MyFunc(), got: %q", result)
}
}
func TestCommandWithArgs(t *testing.T) {
p := New()
src := `#command DO <proc> WITH <args> => <proc>( <args> )
DO MyFunc WITH 42`
result, _ := p.Process("test.prg", src)
if !strings.Contains(result, "MyFunc( 42 )") {
t.Errorf("DO WITH should expand, got: %q", result)
}
}
func TestStdChPatterns(t *testing.T) {
// Test patterns from Harbour's std.ch
p := New()
src := `#command END <x> => end
#command ENDDO <*x*> => enddo
#command ENDIF <*x*> => endif
END SEQUENCE
ENDDO something
ENDIF // test`
result, _ := p.Process("test.prg", src)
lines := strings.Split(strings.TrimSpace(result), "\n")
expects := []string{"end", "enddo", "endif"}
idx := 0
for _, l := range lines {
l = strings.TrimSpace(l)
if l == "" {
continue
}
if idx < len(expects) && l == expects[idx] {
idx++
}
}
if idx != len(expects) {
t.Errorf("std.ch patterns: matched %d/%d, result:\n%s", idx, len(expects), result)
}
}
func TestHBTEST_Pattern(t *testing.T) {
// The key pattern from hbtest.ch
p := New()
src := `#xtranslate HBTEST <x> IS <result> => TEST_CALL( #<x>, {|| <x> }, <result> )
HBTEST Len("abc") IS 3`
result, _ := p.Process("test.prg", src)
if !strings.Contains(result, "TEST_CALL") {
t.Errorf("HBTEST macro should expand, got: %q", result)
}
if !strings.Contains(result, `"Len("abc")"`) || !strings.Contains(result, "3") {
// At minimum, the result marker should be present
if !strings.Contains(result, "3") {
t.Errorf("expected result value 3 in expansion, got: %q", result)
}
}
}
func TestMultipleRules(t *testing.T) {
p := New()
src := `#command PRINT <text> => QOut( <text> )
#command PRINTLN <text> => QOut( <text> ) ; QOut()
PRINT "Hello"
PRINTLN "World"`
result, _ := p.Process("test.prg", src)
if !strings.Contains(result, `QOut( "Hello" )`) {
t.Error("PRINT should expand")
}
if !strings.Contains(result, `QOut( "World" )`) {
t.Error("PRINTLN should expand")
}
}

552
compiler/pp/pp.go Normal file
View File

@@ -0,0 +1,552 @@
// Copyright (c) 2026 Charles KWON OhJun (charleskwonohjun@gmail.com)
// All rights reserved.
// Preprocessor for Five — handles #include, #define, #ifdef/#endif.
// Harbour: /mnt/d/harbour-core/src/pp/ppcore.c (6383 lines)
//
// Five PP is simplified but covers the essential directives:
// #include "file.ch" — file inclusion
// #define NAME VALUE — simple text substitution
// #undef NAME — remove definition
// #ifdef NAME / #ifndef NAME / #else / #endif — conditional compilation
// #pragma — compiler hints
//
// #command/#translate (used by hbclass.ch) is NOT implemented yet.
// Five handles CLASS syntax natively in the parser, so hbclass.ch
// is not strictly required. But #include is needed for user headers.
package pp
import (
"fmt"
"os"
"path/filepath"
"strings"
)
// Preprocessor processes source code before lexing.
type Preprocessor struct {
defines map[string]string // #define name → value
includeDirs []string // search paths for #include
included map[string]bool // prevent circular inclusion
commands []*Rule // #command rules
translates []*Rule // #translate rules
errors []string
GoDumps []string // collected #pragma BEGINDUMP Go code blocks
}
// New creates a new Preprocessor.
func New() *Preprocessor {
pp := &Preprocessor{
defines: make(map[string]string),
included: make(map[string]bool),
}
pp.addStdRules()
return pp
}
// addStdRules registers built-in #command rules equivalent to Harbour's std.ch.
func (pp *Preprocessor) addStdRules() {
stdCommands := []string{
// MENU TO
`MENU TO <var> => <var> := __MenuTo(<var>)`,
// CLEAR GETS
`CLEAR GETS => GetList := {}`,
// Note: @ SAY, @ GET, @ PROMPT, READ are handled by the parser directly.
// @ PROMPT rules removed — parser handles them with proper token parsing.
}
for _, cmd := range stdCommands {
if rule := ParseRule(cmd, true, false); rule != nil {
pp.commands = append(pp.commands, rule)
}
}
}
// AddIncludeDir adds a directory to search for #include files.
func (pp *Preprocessor) AddIncludeDir(dir string) {
pp.includeDirs = append(pp.includeDirs, dir)
}
// Define adds a #define.
func (pp *Preprocessor) Define(name, value string) {
pp.defines[name] = value
}
// Process preprocesses the source code, resolving #include and #define.
func (pp *Preprocessor) Process(filename, source string) (string, []string) {
pp.errors = nil
result := pp.processLines(filename, source, 0)
return result, pp.errors
}
func (pp *Preprocessor) processLines(filename, source string, depth int) string {
if depth > 20 {
pp.errors = append(pp.errors, fmt.Sprintf("%s: #include depth exceeded (max 20)", filename))
return source
}
lines := strings.Split(source, "\n")
var result []string
var ifStack []bool // true = active section, false = skipping
active := true
inBlockComment := false // track multi-line /* */ comments
inPragmaDump := false // track #pragma BEGINDUMP ... ENDDUMP
var dumpLines []string // accumulate Go code lines
for i, line := range lines {
// Handle #pragma BEGINDUMP ... ENDDUMP (inline Go code blocks)
if inPragmaDump {
trimCheck := strings.TrimSpace(line)
if strings.HasPrefix(trimCheck, "#") {
dir := strings.TrimSpace(strings.TrimPrefix(trimCheck, "#"))
if strings.HasPrefix(strings.ToUpper(dir), "PRAGMA ") && strings.Contains(strings.ToUpper(dir), "ENDDUMP") {
inPragmaDump = false
pp.GoDumps = append(pp.GoDumps, strings.Join(dumpLines, "\n"))
dumpLines = nil
result = append(result, fmt.Sprintf("FIVE_GODUMP__ %d", len(pp.GoDumps)-1))
continue
}
}
dumpLines = append(dumpLines, line)
result = append(result, "") // blank out for line counting
continue
}
trimmed := strings.TrimSpace(line)
// Handle multi-line block comments
if inBlockComment {
if idx := strings.Index(line, "*/"); idx >= 0 {
inBlockComment = false
line = line[idx+2:] // keep content after */
trimmed = strings.TrimSpace(line)
if trimmed == "" {
result = append(result, "")
continue
}
} else {
result = append(result, "") // blank out comment lines
continue
}
}
// Strip block comments within a single line and detect opening /*
line = stripBlockComments(line, &inBlockComment)
trimmed = strings.TrimSpace(line)
// Check if in active section
if len(ifStack) > 0 {
active = ifStack[len(ifStack)-1]
} else {
active = true
}
// Preprocessor directives (always processed regardless of active state)
if strings.HasPrefix(trimmed, "#") {
directive := strings.TrimPrefix(trimmed, "#")
directive = strings.TrimSpace(directive)
// Detect #pragma BEGINDUMP
upperDir := strings.ToUpper(directive)
if strings.HasPrefix(upperDir, "PRAGMA ") && strings.Contains(upperDir, "BEGINDUMP") {
inPragmaDump = true
dumpLines = nil
result = append(result, "")
continue
}
if pp.handleConditional(directive, &ifStack, active) {
continue
}
if !active {
continue // skip non-conditional directives in inactive sections
}
if pp.handleDirective(filename, directive, depth, &result, i+1) {
continue
}
}
if !active {
continue // skip lines in inactive #ifdef sections
}
// Apply #command/#translate rules
if len(pp.commands) > 0 || len(pp.translates) > 0 {
line = pp.applyRules(line)
}
// Apply #define substitutions
if len(pp.defines) > 0 {
line = pp.applyDefines(line)
}
result = append(result, line)
}
if len(ifStack) > 0 {
pp.errors = append(pp.errors, fmt.Sprintf("%s: unterminated #ifdef/#ifndef", filename))
}
return strings.Join(result, "\n")
}
// handleConditional processes #ifdef, #ifndef, #else, #endif.
// Returns true if the line was a conditional directive.
func (pp *Preprocessor) handleConditional(directive string, ifStack *[]bool, active bool) bool {
upper := strings.ToUpper(directive)
if strings.HasPrefix(upper, "IFDEF ") {
name := strings.TrimSpace(directive[6:])
_, defined := pp.defines[name]
*ifStack = append(*ifStack, defined && active)
return true
}
if strings.HasPrefix(upper, "IFNDEF ") {
name := strings.TrimSpace(directive[7:])
_, defined := pp.defines[name]
*ifStack = append(*ifStack, !defined && active)
return true
}
// #if expr — simplified: support #if 0 (always false), #if 1 (always true),
// and #if __pragma(...) (treat as false for compatibility)
if strings.HasPrefix(upper, "IF ") || upper == "IF" {
rest := strings.TrimSpace(directive[2:])
val := false
if rest == "1" || rest == ".T." {
val = true
} else if rest == "0" || rest == ".F." {
val = false
} else {
// Unknown expression — default to false (conservative)
val = false
}
*ifStack = append(*ifStack, val && active)
return true
}
// #else — may have trailing comment
if upper == "ELSE" || strings.HasPrefix(upper, "ELSE ") || strings.HasPrefix(upper, "ELSE\t") {
if len(*ifStack) > 0 {
// Flip the top of stack (only if parent was active)
parentActive := true
if len(*ifStack) > 1 {
parentActive = (*ifStack)[len(*ifStack)-2]
}
(*ifStack)[len(*ifStack)-1] = !(*ifStack)[len(*ifStack)-1] && parentActive
}
return true
}
// #endif — may have trailing comment: #endif /* COMMENT */
stripped := strings.TrimSpace(upper)
if idx := strings.Index(stripped, " "); idx > 0 {
stripped = stripped[:idx]
}
if idx := strings.Index(stripped, "\t"); idx > 0 {
stripped = stripped[:idx]
}
if stripped == "ENDIF" {
if len(*ifStack) > 0 {
*ifStack = (*ifStack)[:len(*ifStack)-1]
}
return true
}
return false
}
// handleDirective processes non-conditional directives.
func (pp *Preprocessor) handleDirective(filename, directive string, depth int, result *[]string, lineNo int) bool {
upper := strings.ToUpper(directive)
// #include "file" or #include <file>
if strings.HasPrefix(upper, "INCLUDE ") {
rest := strings.TrimSpace(directive[8:])
inclFile := pp.extractIncludeFile(rest)
if inclFile == "" {
pp.errors = append(pp.errors, fmt.Sprintf("%s:%d: invalid #include", filename, lineNo))
return true
}
content := pp.resolveInclude(filename, inclFile)
if content == "" {
// Not found — not an error for Five (some .ch files are optional)
*result = append(*result, fmt.Sprintf("// #include %q — not found (skipped)", inclFile))
return true
}
// Process included content recursively
processed := pp.processLines(inclFile, content, depth+1)
*result = append(*result, strings.Split(processed, "\n")...)
return true
}
// #define NAME [VALUE]
if strings.HasPrefix(upper, "DEFINE ") {
rest := strings.TrimSpace(directive[7:])
// Detect function-like macro: #define NAME( params ) body
// For now, skip these (don't register as simple text substitution)
if idx := strings.IndexByte(rest, '('); idx > 0 && idx < strings.IndexAny(rest+" ", " \t") {
// Function-like macro — not yet supported, skip
return true
}
parts := strings.SplitN(rest, " ", 2)
name := parts[0]
value := ""
if len(parts) > 1 {
value = strings.TrimSpace(parts[1])
}
// Strip trailing // comment and /* */ comment from value
if idx := strings.Index(value, "//"); idx >= 0 {
// Make sure // is not inside a string literal
inStr := false
for i := 0; i < idx; i++ {
if value[i] == '"' || value[i] == '\'' {
inStr = !inStr
}
}
if !inStr {
value = strings.TrimSpace(value[:idx])
}
}
if idx := strings.Index(value, "/*"); idx >= 0 {
value = strings.TrimSpace(value[:idx])
}
pp.defines[name] = value
return true
}
// #undef NAME
if strings.HasPrefix(upper, "UNDEF ") {
name := strings.TrimSpace(directive[6:])
delete(pp.defines, name)
return true
}
// #pragma — just pass through as comment
if strings.HasPrefix(upper, "PRAGMA ") {
*result = append(*result, "// "+directive)
return true
}
// #warning, #error, #stdout — skip (emit as comment)
if strings.HasPrefix(upper, "WARNING") || strings.HasPrefix(upper, "ERROR") || strings.HasPrefix(upper, "STDOUT") {
*result = append(*result, "// #"+directive)
return true
}
// #command / #translate — parse and store rules
if strings.HasPrefix(upper, "COMMAND ") {
if rule := ParseRule(directive[8:], true, false); rule != nil {
pp.commands = append(pp.commands, rule)
}
return true
}
if strings.HasPrefix(upper, "TRANSLATE ") {
if rule := ParseRule(directive[10:], false, false); rule != nil {
pp.translates = append(pp.translates, rule)
}
return true
}
if strings.HasPrefix(upper, "XCOMMAND ") {
if rule := ParseRule(directive[9:], true, true); rule != nil {
pp.commands = append(pp.commands, rule)
}
return true
}
if strings.HasPrefix(upper, "XTRANSLATE ") {
if rule := ParseRule(directive[11:], false, true); rule != nil {
pp.translates = append(pp.translates, rule)
}
return true
}
return false
}
// extractIncludeFile gets the filename from #include "file" or #include <file>
func (pp *Preprocessor) extractIncludeFile(s string) string {
s = strings.TrimSpace(s)
if len(s) >= 2 {
if (s[0] == '"' && s[len(s)-1] == '"') || (s[0] == '<' && s[len(s)-1] == '>') {
return s[1 : len(s)-1]
}
}
return s // bare filename
}
// resolveInclude searches for an include file and returns its content.
func (pp *Preprocessor) resolveInclude(currentFile, inclFile string) string {
// Prevent circular inclusion
absKey := inclFile
if pp.included[absKey] {
return ""
}
pp.included[absKey] = true
defer func() { delete(pp.included, absKey) }()
// Search order:
// 1. Relative to current file
// 2. Include directories
// 3. Harbour include dir (for hbclass.ch etc.)
searchPaths := []string{}
// Relative to current file
if currentFile != "" {
dir := filepath.Dir(currentFile)
searchPaths = append(searchPaths, filepath.Join(dir, inclFile))
}
// Include directories
for _, dir := range pp.includeDirs {
searchPaths = append(searchPaths, filepath.Join(dir, inclFile))
}
// Try each path
for _, path := range searchPaths {
data, err := os.ReadFile(path)
if err == nil {
return string(data)
}
}
return ""
}
// applyRules applies #command and #translate rules to a line.
// #command rules are tried first (they match complete statements).
// #translate rules are tried on any part of a line.
func (pp *Preprocessor) applyRules(line string) string {
trimmed := strings.TrimSpace(line)
if trimmed == "" || strings.HasPrefix(trimmed, "//") {
return line
}
// Try #command rules (match from start of line)
for _, rule := range pp.commands {
if result, ok := rule.MatchLine(trimmed); ok {
// Preserve leading whitespace
indent := line[:len(line)-len(strings.TrimLeft(line, " \t"))]
return indent + result
}
}
// Try #translate rules (can match substrings)
for _, rule := range pp.translates {
if result, ok := rule.MatchLine(trimmed); ok {
indent := line[:len(line)-len(strings.TrimLeft(line, " \t"))]
return indent + result
}
}
return line
}
// stripBlockComments removes /* ... */ comments from a line.
// If a /* is found without closing */, sets inBlock to true.
func stripBlockComments(line string, inBlock *bool) string {
var out strings.Builder
i := 0
inStr := byte(0)
for i < len(line) {
// Track string literals
if inStr == 0 && (line[i] == '"' || line[i] == '\'') {
inStr = line[i]
out.WriteByte(line[i])
i++
continue
}
if inStr != 0 {
if line[i] == inStr {
inStr = 0
}
out.WriteByte(line[i])
i++
continue
}
// Block comment start
if i+1 < len(line) && line[i] == '/' && line[i+1] == '*' {
// Find closing */
end := strings.Index(line[i+2:], "*/")
if end >= 0 {
i = i + 2 + end + 2 // skip past */
out.WriteByte(' ') // replace comment with space
} else {
*inBlock = true
return out.String() // rest of line is comment
}
continue
}
out.WriteByte(line[i])
i++
}
return out.String()
}
// applyDefines substitutes #define macros in a line.
// Simple word-boundary replacement (not full macro expansion).
func (pp *Preprocessor) applyDefines(line string) string {
for name, value := range pp.defines {
if value == "" {
continue // flag-only define, no substitution
}
// Simple word replacement (not inside strings)
line = replaceWord(line, name, value)
}
return line
}
// replaceWord replaces whole-word occurrences of old with new,
// avoiding replacements inside string literals.
func replaceWord(line, old, new string) string {
if !strings.Contains(line, old) {
return line
}
var result strings.Builder
inString := byte(0)
i := 0
for i < len(line) {
// Track string literals
if inString == 0 && (line[i] == '"' || line[i] == '\'') {
inString = line[i]
result.WriteByte(line[i])
i++
continue
}
if inString != 0 && line[i] == inString {
inString = 0
result.WriteByte(line[i])
i++
continue
}
if inString != 0 {
result.WriteByte(line[i])
i++
continue
}
// Check for word match
if i+len(old) <= len(line) && line[i:i+len(old)] == old {
// Check word boundaries
before := i == 0 || !isWordChar(line[i-1])
after := i+len(old) >= len(line) || !isWordChar(line[i+len(old)])
if before && after {
result.WriteString(new)
i += len(old)
continue
}
}
result.WriteByte(line[i])
i++
}
return result.String()
}
func isWordChar(c byte) bool {
return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') || c == '_'
}

264
compiler/pp/pp_test.go Normal file
View File

@@ -0,0 +1,264 @@
// Copyright (c) 2026 Charles KWON OhJun (charleskwonohjun@gmail.com)
// All rights reserved.
package pp
import (
"os"
"path/filepath"
"strings"
"testing"
)
func TestDefine(t *testing.T) {
p := New()
src := `#define VERSION "1.0"
? VERSION`
result, errs := p.Process("test.prg", src)
if len(errs) > 0 {
t.Fatal(errs)
}
if !strings.Contains(result, `"1.0"`) {
t.Errorf("define not substituted: %q", result)
}
}
func TestDefineFlag(t *testing.T) {
p := New()
src := `#define DEBUG
#ifdef DEBUG
? "Debug mode"
#else
? "Release mode"
#endif`
result, errs := p.Process("test.prg", src)
if len(errs) > 0 {
t.Fatal(errs)
}
if !strings.Contains(result, "Debug mode") {
t.Error("ifdef DEBUG should include Debug mode")
}
if strings.Contains(result, "Release mode") {
t.Error("should NOT include Release mode")
}
}
func TestIfndef(t *testing.T) {
p := New()
src := `#ifndef RELEASE
? "Not release"
#else
? "Release"
#endif`
result, _ := p.Process("test.prg", src)
if !strings.Contains(result, "Not release") {
t.Error("ifndef should include 'Not release'")
}
}
func TestNestedIfdef(t *testing.T) {
p := New()
p.Define("A", "")
src := `#ifdef A
? "A is defined"
#ifdef B
? "B is defined"
#else
? "B is not defined"
#endif
#endif`
result, _ := p.Process("test.prg", src)
if !strings.Contains(result, "A is defined") {
t.Error("A should be defined")
}
if !strings.Contains(result, "B is not defined") {
t.Error("B should not be defined")
}
if strings.Contains(result, "B is defined") {
t.Error("B should NOT appear as defined")
}
}
func TestUndef(t *testing.T) {
p := New()
src := `#define FOO "bar"
? FOO
#undef FOO
? FOO`
result, _ := p.Process("test.prg", src)
lines := strings.Split(result, "\n")
// First ? should have "bar", second should still have FOO (not substituted)
found := 0
for _, l := range lines {
l = strings.TrimSpace(l)
if strings.Contains(l, `"bar"`) {
found++
}
}
if found != 1 {
t.Errorf("expected FOO substituted once, found %d times", found)
}
}
func TestInclude(t *testing.T) {
dir := t.TempDir()
// Create header file
headerContent := `#define APP_NAME "Five Test"
#define APP_VERSION "1.0"`
os.WriteFile(filepath.Join(dir, "myapp.ch"), []byte(headerContent), 0644)
// Create main file
src := `#include "myapp.ch"
? APP_NAME
? APP_VERSION`
p := New()
p.AddIncludeDir(dir)
result, errs := p.Process(filepath.Join(dir, "main.prg"), src)
if len(errs) > 0 {
t.Fatal(errs)
}
if !strings.Contains(result, `"Five Test"`) {
t.Errorf("APP_NAME not substituted: %q", result)
}
if !strings.Contains(result, `"1.0"`) {
t.Error("APP_VERSION not substituted")
}
}
func TestIncludeNested(t *testing.T) {
dir := t.TempDir()
// base.ch includes sub.ch
os.WriteFile(filepath.Join(dir, "sub.ch"), []byte(`#define SUB_VAL 42`), 0644)
os.WriteFile(filepath.Join(dir, "base.ch"), []byte(`#include "sub.ch"
#define BASE_VAL 100`), 0644)
src := `#include "base.ch"
? SUB_VAL
? BASE_VAL`
p := New()
p.AddIncludeDir(dir)
result, _ := p.Process(filepath.Join(dir, "main.prg"), src)
if !strings.Contains(result, "42") {
t.Error("SUB_VAL from nested include should be 42")
}
if !strings.Contains(result, "100") {
t.Error("BASE_VAL should be 100")
}
}
func TestIncludeGuard(t *testing.T) {
dir := t.TempDir()
// Header with include guard
header := `#ifndef _MYHEADER_CH
#define _MYHEADER_CH
#define MY_CONST 999
#endif`
os.WriteFile(filepath.Join(dir, "myheader.ch"), []byte(header), 0644)
// Include twice — should work (guard prevents double processing)
src := `#include "myheader.ch"
#include "myheader.ch"
? MY_CONST`
p := New()
p.AddIncludeDir(dir)
result, _ := p.Process(filepath.Join(dir, "main.prg"), src)
if !strings.Contains(result, "999") {
t.Error("MY_CONST should be 999")
}
}
func TestHbclassChHandled(t *testing.T) {
dir := t.TempDir()
// Simulate hbclass.ch — #command CLASS maps to comments (Five handles natively)
hbclass := `#ifndef HB_CLASS_CH_
#define HB_CLASS_CH_
#command CLASS <name> => // class <name> handled natively
#endif`
os.WriteFile(filepath.Join(dir, "hbclass.ch"), []byte(hbclass), 0644)
src := `#include "hbclass.ch"
CLASS Person
FUNCTION Main()
? "OK"
RETURN NIL`
p := New()
p.AddIncludeDir(dir)
result, errs := p.Process(filepath.Join(dir, "main.prg"), src)
if len(errs) > 0 {
t.Fatal(errs)
}
// #command directives themselves should be removed
if strings.Contains(result, "#command") {
t.Error("preprocessor directives should be removed")
}
// CLASS Person should be expanded by #command rule
if !strings.Contains(result, "Person") {
t.Error("Person should appear in output")
}
// FUNCTION should still be there
if !strings.Contains(result, "FUNCTION Main") {
t.Error("FUNCTION Main should pass through")
}
}
func TestDefineInString(t *testing.T) {
p := New()
src := `#define FOO bar
? "FOO should not change"
? FOO`
result, _ := p.Process("test.prg", src)
if !strings.Contains(result, `"FOO should not change"`) {
t.Error("define should not replace inside strings")
}
// Outside string should be replaced
lines := strings.Split(result, "\n")
for _, l := range lines {
l = strings.TrimSpace(l)
if l == "? bar" {
return // found replacement outside string
}
}
t.Error("FOO should be replaced to bar outside strings")
}
func TestPragma(t *testing.T) {
p := New()
src := `#pragma compatibility(harbour)
? "test"`
result, _ := p.Process("test.prg", src)
if !strings.Contains(result, "// pragma") || !strings.Contains(result, "compatibility") {
t.Error("pragma should be converted to comment")
}
}
func TestMissingInclude(t *testing.T) {
p := New()
src := `#include "nonexistent.ch"
? "still works"`
result, _ := p.Process("test.prg", src)
// Missing include should not crash, just skip with comment
if !strings.Contains(result, "not found") {
t.Error("missing include should produce a comment")
}
if !strings.Contains(result, "still works") {
t.Error("code after missing include should continue")
}
}

536
compiler/token/token.go Normal file
View File

@@ -0,0 +1,536 @@
// Copyright (c) 2026 Charles KWON OhJun (charleskwonohjun@gmail.com)
// All rights reserved.
// Token definitions for the Five (Harbour-compatible) language.
// Pattern follows tsgo's Kind+Precedence approach
// (ref/typescript-go/internal/ast/kind.go, precedence.go).
package token
// Kind represents a token type. Using int16 following tsgo pattern.
type Kind int16
const (
// Special
ILLEGAL Kind = iota
EOF
NEWLINE // statement terminator
// Literals
INT // 42
LONG // 42L or large integer
DOUBLE // 3.14
STRING // "hello" or 'hello'
DATE_LIT // 0d20260327 or CTOD("20260327")
TRUE // .T.
FALSE // .F.
NIL_LIT // NIL
// Identifiers
IDENT // variable/function name
// Operators
PLUS // +
MINUS // -
STAR // *
SLASH // /
PERCENT // %
POWER // ** or ^
ASSIGN // :=
EQ // = or ==
EXEQ // ==
NEQ // != or <> or #
LT // <
GT // >
LTE // <=
GTE // >=
DOLLAR // $ (string containment)
AMPERSAND // & (macro)
AT // @ (pass by ref)
ARROW // -> (alias field access)
DBLARROW // => (hash pair)
COLONCOLON // :: (self access)
COLON // : (send message)
DOT // .
INC // ++ (postfix)
DEC // -- (postfix)
PLUSEQ // +=
MINUSEQ // -=
STAREQ // *=
SLASHEQ // /=
PERCENTEQ // %=
POWEREQ // **=
// Logical operators (keyword-style)
AND // .AND.
OR // .OR.
NOT // .NOT. or !
// Delimiters
LPAREN // (
RPAREN // )
LBRACKET // [
RBRACKET // ]
LBRACE // {
RBRACE // }
COMMA // ,
SEMICOLON // ; (line continuation)
PIPE // | (in code blocks {|x| ...})
QMARK // ? (QOut shorthand)
QQMARK // ?? (QQOut shorthand)
// Keywords — Declarations
FUNCTION_KW
PROCEDURE
RETURN
LOCAL
STATIC
PRIVATE
PUBLIC
FIELD
MEMVAR
PARAMETERS
DECLARE
// Keywords — Control flow
IF
ELSEIF
ELSE
ENDIF
DO
WHILE
ENDDO
FOR
TO
STEP
NEXT
EACH
IN
EXIT
LOOP
SWITCH
CASE
OTHERWISE
ENDSWITCH
ENDCASE
BEGIN
SEQUENCE
RECOVER
USING
END
// Keywords — OOP
CLASS
ENDCLASS
DATA
METHOD
INHERIT
FROM
CONSTRUCTOR
DESTRUCTOR
INLINE_KW
OPERATOR_KW
ACCESS
ASSIGN_KW
// Keywords — xBase commands
USE
ALIAS
SELECT
GO
GOTO
TOP
BOTTOM
SKIP_KW
SEEK
SOFTSEEK
REPLACE
WITH
APPEND
BLANK
DELETE_KW
RECALL
PACK
ZAP
INDEX
ON
UNIQUE
DESCENDING
ASCENDING
SET
FILTER
RELATION
INTO
ORDER
// Keywords — New Five extensions
IMPORT
GO_KW // GO (goroutine)
CHANNEL
SEND_KW
RECEIVE
WAITGROUP
TYPE_KW // TYPE ... END TYPE
AS
DEFER_KW // DEFER expr (cleanup on function exit)
CONST_KW // CONST ... END CONST (enum block)
QUESTION_COLON // ?: nil-safe send
WATCH_KW // WATCH ... CASE ... ENDWATCH (channel select)
ASYNC_KW // ASYNC expr (launch async)
AWAIT_KW // AWAIT expr (wait for result)
PARALLEL_KW // PARALLEL FOR (parallel loop)
ARROW_LEFT // <- (channel receive)
TIMEOUT_KW // WITH TIMEOUT n
SPAWN_KW // SPAWN { block } (goroutine)
// Keywords — Preprocessor
PP_INCLUDE // #include
PP_DEFINE // #define
PP_UNDEF // #undef
PP_IFDEF // #ifdef
PP_IFNDEF // #ifndef
PP_ELSE // #else
PP_ENDIF // #endif
PP_COMMAND // #command
PP_TRANSLATE // #translate
PP_PRAGMA // #pragma
// Internal
_kindEnd
)
// Token represents a single lexical token.
type Token struct {
Kind Kind
Literal string // raw text
Pos Position
}
// Position in source file.
type Position struct {
File string
Line int
Col int
Offset int // byte offset from start of source
}
func (p Position) String() string {
if p.File != "" {
return p.File + ":" + itoa(p.Line) + ":" + itoa(p.Col)
}
return itoa(p.Line) + ":" + itoa(p.Col)
}
// simple int-to-string without importing strconv
func itoa(n int) string {
if n == 0 {
return "0"
}
buf := [20]byte{}
i := len(buf) - 1
neg := n < 0
if neg {
n = -n
}
for n > 0 {
buf[i] = byte('0' + n%10)
i--
n /= 10
}
if neg {
buf[i] = '-'
i--
}
return string(buf[i+1:])
}
// --- Operator Precedence (tsgo pattern) ---
type Precedence int
const (
PrecNone Precedence = iota
PrecAssign // :=, +=, -=, ...
PrecOr // .OR.
PrecAnd // .AND.
PrecNot // .NOT., !
PrecComparison // =, ==, !=, <, >, <=, >=, $
PrecAddition // +, -
PrecMultiply // *, /, %
PrecPower // **, ^
PrecUnary // -, !, .NOT., ++, --
PrecPostfix // ++, --, [], ()
PrecCall // function(), obj:method()
PrecPrimary // literals, identifiers, (expr)
)
// GetBinaryPrecedence returns the precedence of a binary operator token.
// Returns PrecNone if not a binary operator.
// Pattern: tsgo GetBinaryOperatorPrecedence (ref/typescript-go/internal/ast/precedence.go:338)
func GetBinaryPrecedence(kind Kind) Precedence {
switch kind {
case ASSIGN, PLUSEQ, MINUSEQ, STAREQ, SLASHEQ, PERCENTEQ, POWEREQ:
return PrecAssign
case OR:
return PrecOr
case AND:
return PrecAnd
case EQ, EXEQ, NEQ, LT, GT, LTE, GTE, DOLLAR:
return PrecComparison
case PLUS, MINUS:
return PrecAddition
case STAR, SLASH, PERCENT:
return PrecMultiply
case POWER:
return PrecPower
default:
return PrecNone
}
}
// IsRightAssociative returns true for right-to-left operators.
func IsRightAssociative(kind Kind) bool {
switch kind {
case POWER, ASSIGN, PLUSEQ, MINUSEQ, STAREQ, SLASHEQ, PERCENTEQ, POWEREQ:
return true
default:
return false
}
}
// --- Keyword lookup ---
var keywords map[string]Kind
func init() {
keywords = map[string]Kind{
"FUNCTION": FUNCTION_KW,
"PROCEDURE": PROCEDURE,
"RETURN": RETURN,
"LOCAL": LOCAL,
"STATIC": STATIC,
"PRIVATE": PRIVATE,
"PUBLIC": PUBLIC,
"FIELD": FIELD,
"MEMVAR": MEMVAR,
"PARAMETERS": PARAMETERS,
"DECLARE": DECLARE,
"IF": IF,
"ELSEIF": ELSEIF,
"ELSE": ELSE,
"ENDIF": ENDIF,
"DO": DO,
"WHILE": WHILE,
"ENDDO": ENDDO,
"FOR": FOR,
"TO": TO,
"STEP": STEP,
"NEXT": NEXT,
"EACH": EACH,
"IN": IN,
"EXIT": EXIT,
"LOOP": LOOP,
"SWITCH": SWITCH,
"CASE": CASE,
"OTHERWISE": OTHERWISE,
"ENDSWITCH": ENDSWITCH,
"ENDCASE": ENDCASE,
"BEGIN": BEGIN,
"SEQUENCE": SEQUENCE,
"RECOVER": RECOVER,
"USING": USING,
"END": END,
"CLASS": CLASS,
"ENDCLASS": ENDCLASS,
"DATA": DATA,
// METHOD: recognized as keyword (used at top level too: METHOD name CLASS classname)
"METHOD": METHOD,
"INHERIT": INHERIT,
"FROM": FROM,
"CONSTRUCTOR": CONSTRUCTOR,
"DESTRUCTOR": DESTRUCTOR,
"INLINE": INLINE_KW,
"OPERATOR": OPERATOR_KW,
"ACCESS": ACCESS,
"ASSIGN": ASSIGN_KW,
"USE": USE,
"ALIAS": ALIAS,
"SELECT": SELECT,
"GO": GO,
"GOTO": GOTO,
"TOP": TOP,
"BOTTOM": BOTTOM,
"SKIP": SKIP_KW,
"SEEK": SEEK,
"SOFTSEEK": SOFTSEEK,
"REPLACE": REPLACE,
"WITH": WITH,
"APPEND": APPEND,
"BLANK": BLANK,
"DELETE": DELETE_KW,
"RECALL": RECALL,
"PACK": PACK,
"ZAP": ZAP,
"INDEX": INDEX,
"ON": ON,
"UNIQUE": UNIQUE,
"DESCENDING": DESCENDING,
"ASCENDING": ASCENDING,
"SET": SET,
"FILTER": FILTER,
"RELATION": RELATION,
"INTO": INTO,
"ORDER": ORDER,
"IMPORT": IMPORT,
// CHANNEL, SEND, RECEIVE, WAITGROUP — now RTL functions, not keywords
"TYPE": TYPE_KW,
"AS": AS,
"DEFER": DEFER_KW,
"CONST": CONST_KW,
"WATCH": WATCH_KW,
"ASYNC": ASYNC_KW,
"AWAIT": AWAIT_KW,
"PARALLEL": PARALLEL_KW,
"TIMEOUT": TIMEOUT_KW,
"SPAWN": SPAWN_KW,
"LAUNCH": SPAWN_KW,
"GOROUTINE": SPAWN_KW,
"NIL": NIL_LIT,
// Harbour aliases
"FUNC": FUNCTION_KW,
"PROC": PROCEDURE,
"RET": RETURN,
"ENDW": ENDDO, // some Harbour code uses ENDW
}
}
// LookupKeyword returns the keyword Kind for an identifier, or IDENT.
// Harbour keywords are case-insensitive.
func LookupKeyword(ident string) Kind {
// Convert to uppercase for case-insensitive lookup
upper := toUpper(ident)
if kind, ok := keywords[upper]; ok {
return kind
}
return IDENT
}
// toUpper converts ASCII string to uppercase without allocating for already-upper strings.
func toUpper(s string) string {
for i := 0; i < len(s); i++ {
if s[i] >= 'a' && s[i] <= 'z' {
// Need to allocate
buf := make([]byte, len(s))
copy(buf, s[:i])
for j := i; j < len(s); j++ {
if s[j] >= 'a' && s[j] <= 'z' {
buf[j] = s[j] - 32
} else {
buf[j] = s[j]
}
}
return string(buf)
}
}
return s // already uppercase
}
// String returns the display name of the token kind.
func (k Kind) String() string {
if int(k) < len(kindNames) {
return kindNames[k]
}
return "UNKNOWN"
}
var kindNames = [...]string{
ILLEGAL: "ILLEGAL",
EOF: "EOF",
NEWLINE: "NEWLINE",
INT: "INT",
LONG: "LONG",
DOUBLE: "DOUBLE",
STRING: "STRING",
DATE_LIT: "DATE",
TRUE: ".T.",
FALSE: ".F.",
NIL_LIT: "NIL",
IDENT: "IDENT",
PLUS: "+",
MINUS: "-",
STAR: "*",
SLASH: "/",
PERCENT: "%",
POWER: "**",
ASSIGN: ":=",
EQ: "=",
EXEQ: "==",
NEQ: "!=",
LT: "<",
GT: ">",
LTE: "<=",
GTE: ">=",
DOLLAR: "$",
AMPERSAND: "&",
AT: "@",
ARROW: "->",
DBLARROW: "=>",
COLONCOLON: "::",
COLON: ":",
DOT: ".",
INC: "++",
DEC: "--",
PLUSEQ: "+=",
MINUSEQ: "-=",
STAREQ: "*=",
SLASHEQ: "/=",
PERCENTEQ: "%=",
POWEREQ: "**=",
AND: ".AND.",
OR: ".OR.",
NOT: ".NOT.",
LPAREN: "(",
RPAREN: ")",
LBRACKET: "[",
RBRACKET: "]",
LBRACE: "{",
RBRACE: "}",
COMMA: ",",
SEMICOLON: ";",
PIPE: "|",
FUNCTION_KW: "FUNCTION",
PROCEDURE: "PROCEDURE",
RETURN: "RETURN",
LOCAL: "LOCAL",
STATIC: "STATIC",
IF: "IF",
ELSEIF: "ELSEIF",
ELSE: "ELSE",
ENDIF: "ENDIF",
DO: "DO",
WHILE: "WHILE",
ENDDO: "ENDDO",
FOR: "FOR",
TO: "TO",
STEP: "STEP",
NEXT: "NEXT",
EACH: "EACH",
IN: "IN",
EXIT: "EXIT",
LOOP: "LOOP",
BEGIN: "BEGIN",
SEQUENCE: "SEQUENCE",
RECOVER: "RECOVER",
END: "END",
CLASS: "CLASS",
ENDCLASS: "ENDCLASS",
DATA: "DATA",
METHOD: "METHOD",
USE: "USE",
SEEK: "SEEK",
REPLACE: "REPLACE",
APPEND: "APPEND",
INDEX: "INDEX",
SET: "SET",
SELECT: "SELECT",
IMPORT: "IMPORT",
}

View File

@@ -0,0 +1,113 @@
// Copyright (c) 2026 Charles KWON OhJun (charleskwonohjun@gmail.com)
// All rights reserved.
package token
import "testing"
func TestLookupKeyword(t *testing.T) {
tests := []struct {
input string
want Kind
}{
{"FUNCTION", FUNCTION_KW},
{"function", FUNCTION_KW},
{"Function", FUNCTION_KW},
{"FuNcTiOn", FUNCTION_KW},
{"IF", IF},
{"if", IF},
{"LOCAL", LOCAL},
{"RETURN", RETURN},
{"USE", USE},
{"SEEK", SEEK},
{"CLASS", CLASS},
{"IMPORT", IMPORT},
{"NIL", NIL_LIT},
// Aliases
{"FUNC", FUNCTION_KW},
{"PROC", PROCEDURE},
// Not keywords
{"myVar", IDENT},
{"foo", IDENT},
{"x", IDENT},
}
for _, tt := range tests {
got := LookupKeyword(tt.input)
if got != tt.want {
t.Errorf("LookupKeyword(%q) = %v, want %v", tt.input, got, tt.want)
}
}
}
func TestGetBinaryPrecedence(t *testing.T) {
tests := []struct {
kind Kind
want Precedence
}{
{ASSIGN, PrecAssign},
{OR, PrecOr},
{AND, PrecAnd},
{EQ, PrecComparison},
{EXEQ, PrecComparison},
{NEQ, PrecComparison},
{LT, PrecComparison},
{GT, PrecComparison},
{LTE, PrecComparison},
{GTE, PrecComparison},
{DOLLAR, PrecComparison},
{PLUS, PrecAddition},
{MINUS, PrecAddition},
{STAR, PrecMultiply},
{SLASH, PrecMultiply},
{PERCENT, PrecMultiply},
{POWER, PrecPower},
// Not binary
{IDENT, PrecNone},
{LPAREN, PrecNone},
{EOF, PrecNone},
}
for _, tt := range tests {
got := GetBinaryPrecedence(tt.kind)
if got != tt.want {
t.Errorf("GetBinaryPrecedence(%v) = %v, want %v", tt.kind, got, tt.want)
}
}
}
func TestIsRightAssociative(t *testing.T) {
if !IsRightAssociative(POWER) {
t.Error("** should be right associative")
}
if !IsRightAssociative(ASSIGN) {
t.Error(":= should be right associative")
}
if IsRightAssociative(PLUS) {
t.Error("+ should NOT be right associative")
}
}
func TestToUpper(t *testing.T) {
tests := []struct{ in, want string }{
{"abc", "ABC"},
{"ABC", "ABC"},
{"aBc", "ABC"},
{"", ""},
{"123", "123"},
{"hello_world", "HELLO_WORLD"},
}
for _, tt := range tests {
got := toUpper(tt.in)
if got != tt.want {
t.Errorf("toUpper(%q) = %q, want %q", tt.in, got, tt.want)
}
}
}
func TestKindString(t *testing.T) {
if PLUS.String() != "+" {
t.Errorf("PLUS.String() = %q, want %q", PLUS.String(), "+")
}
if FUNCTION_KW.String() != "FUNCTION" {
t.Errorf("FUNCTION_KW.String() = %q", FUNCTION_KW.String())
}
}