Five v0.9 — Harbour + Go fusion language
- Compiler: PP → Lexer → Parser → Analyzer → Gengo pipeline - Parser: 232/236 (98%) Harbour compatibility, registry-based dispatch - RTL: 351 Harbour-compatible functions - RDD: DBF/NTX/CDX engines with Rushmore bitmap optimization - Go Interop: IMPORT + pkg.Func() + obj:Method() with FastPath (15M calls/sec) - HB_FUNC API: Full Harbour C API compatible Go bridge - Concurrency: SPAWN/LAUNCH/GOROUTINE, <-, WATCH, PARALLEL FOR, ASYNC/AWAIT - Extensions: Multi-return, DEFER, Slice, f-string, Nil-safe ?:, CONST - Macro Compiler: Runtime AST parsing and evaluation - Debugger: TUI debugger with source display, breakpoints, stepping - FRB: Native + Pcode dual mode runtime binary - Tests: 13 packages ALL PASS Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
446
compiler/analyzer/analyzer.go
Normal file
446
compiler/analyzer/analyzer.go
Normal file
@@ -0,0 +1,446 @@
|
||||
// Copyright (c) 2026 Charles KWON OhJun (charleskwonohjun@gmail.com)
|
||||
// All rights reserved.
|
||||
|
||||
// analyzer.go — Semantic analysis pass for Five AST.
|
||||
//
|
||||
// Runs AFTER parsing, BEFORE code generation.
|
||||
// Checks:
|
||||
// 1. Variable declaration: all LOCAL vars declared before use
|
||||
// 2. Scope analysis: LOCAL vs PRIVATE vs PUBLIC vs FIELD
|
||||
// 3. Undeclared variable warnings
|
||||
// 4. Unused variable warnings
|
||||
// 5. Function signature validation
|
||||
// 6. Type hints (when available)
|
||||
|
||||
package analyzer
|
||||
|
||||
import (
|
||||
"five/compiler/ast"
|
||||
"five/compiler/token"
|
||||
"fmt"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// Diagnostic represents an analysis warning or error.
|
||||
type Diagnostic struct {
|
||||
Pos token.Position
|
||||
Message string
|
||||
Severity Severity
|
||||
}
|
||||
|
||||
type Severity int
|
||||
|
||||
const (
|
||||
SevError Severity = iota // Must fix
|
||||
SevWarning // Should fix
|
||||
SevHint // Optional improvement
|
||||
)
|
||||
|
||||
func (d Diagnostic) String() string {
|
||||
prefix := "HINT"
|
||||
switch d.Severity {
|
||||
case SevError:
|
||||
prefix = "ERROR"
|
||||
case SevWarning:
|
||||
prefix = "WARN"
|
||||
}
|
||||
return fmt.Sprintf("%s:%d:%d: %s: %s", d.Pos.File, d.Pos.Line, d.Pos.Col, prefix, d.Message)
|
||||
}
|
||||
|
||||
// Scope tracks declared variables in a function.
|
||||
type Scope struct {
|
||||
Name string // function name
|
||||
Declared map[string]VarInfo // upper(name) → info
|
||||
Used map[string]bool // upper(name) → was used
|
||||
Parent *Scope // outer scope (for blocks)
|
||||
}
|
||||
|
||||
// VarInfo holds info about a declared variable.
|
||||
type VarInfo struct {
|
||||
Name string
|
||||
Pos token.Position
|
||||
Kind ast.VarScope // LOCAL, STATIC, FIELD, etc.
|
||||
IsParam bool
|
||||
}
|
||||
|
||||
// Analyzer performs semantic analysis on a parsed AST file.
|
||||
type Analyzer struct {
|
||||
file *ast.File
|
||||
diagnostics []Diagnostic
|
||||
scope *Scope
|
||||
funcNames map[string]bool // declared function names
|
||||
}
|
||||
|
||||
// Analyze runs semantic analysis and returns diagnostics.
|
||||
func Analyze(file *ast.File) []Diagnostic {
|
||||
a := &Analyzer{
|
||||
file: file,
|
||||
funcNames: make(map[string]bool),
|
||||
}
|
||||
|
||||
// Phase 1: Collect all function names
|
||||
for _, d := range file.Decls {
|
||||
switch decl := d.(type) {
|
||||
case *ast.FuncDecl:
|
||||
a.funcNames[strings.ToUpper(decl.Name)] = true
|
||||
case *ast.ClassDecl:
|
||||
a.funcNames[strings.ToUpper(decl.Name)] = true
|
||||
}
|
||||
}
|
||||
|
||||
// Phase 2: Analyze each function
|
||||
for _, d := range file.Decls {
|
||||
switch decl := d.(type) {
|
||||
case *ast.FuncDecl:
|
||||
a.analyzeFunc(decl)
|
||||
}
|
||||
}
|
||||
|
||||
return a.diagnostics
|
||||
}
|
||||
|
||||
func (a *Analyzer) analyzeFunc(fn *ast.FuncDecl) {
|
||||
a.scope = &Scope{
|
||||
Name: fn.Name,
|
||||
Declared: make(map[string]VarInfo),
|
||||
Used: make(map[string]bool),
|
||||
}
|
||||
|
||||
// Register parameters as declared
|
||||
for _, p := range fn.Params {
|
||||
a.scope.Declared[strings.ToUpper(p.Name)] = VarInfo{
|
||||
Name: p.Name,
|
||||
Pos: p.NamePos,
|
||||
IsParam: true,
|
||||
}
|
||||
}
|
||||
|
||||
// Register LOCAL/STATIC declarations
|
||||
for _, d := range fn.Decls {
|
||||
if vd, ok := d.(*ast.VarDecl); ok {
|
||||
for _, v := range vd.Vars {
|
||||
a.scope.Declared[strings.ToUpper(v.Name)] = VarInfo{
|
||||
Name: v.Name,
|
||||
Pos: v.NamePos,
|
||||
Kind: vd.Scope,
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Analyze body statements
|
||||
for _, stmt := range fn.Body {
|
||||
a.analyzeStmt(stmt)
|
||||
}
|
||||
|
||||
// Check for unused variables
|
||||
for name, info := range a.scope.Declared {
|
||||
if !a.scope.Used[name] && !info.IsParam {
|
||||
// Skip common patterns: loop vars, error vars
|
||||
lower := strings.ToLower(info.Name)
|
||||
if lower == "i" || lower == "j" || lower == "k" || lower == "n" ||
|
||||
lower == "err" || lower == "_" {
|
||||
continue
|
||||
}
|
||||
a.hint(info.Pos, "unused variable '%s'", info.Name)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (a *Analyzer) analyzeStmt(stmt ast.Stmt) {
|
||||
if stmt == nil {
|
||||
return
|
||||
}
|
||||
switch s := stmt.(type) {
|
||||
case *ast.ExprStmt:
|
||||
a.analyzeExpr(s.X)
|
||||
case *ast.ReturnStmt:
|
||||
if s.Value != nil {
|
||||
a.analyzeExpr(s.Value)
|
||||
}
|
||||
for _, v := range s.Values {
|
||||
a.analyzeExpr(v)
|
||||
}
|
||||
case *ast.IfStmt:
|
||||
a.analyzeExpr(s.Cond)
|
||||
for _, st := range s.Body {
|
||||
a.analyzeStmt(st)
|
||||
}
|
||||
for _, ei := range s.ElseIfs {
|
||||
a.analyzeExpr(ei.Cond)
|
||||
for _, st := range ei.Body {
|
||||
a.analyzeStmt(st)
|
||||
}
|
||||
}
|
||||
for _, st := range s.ElseBody {
|
||||
a.analyzeStmt(st)
|
||||
}
|
||||
case *ast.DoWhileStmt:
|
||||
a.analyzeExpr(s.Cond)
|
||||
for _, st := range s.Body {
|
||||
a.analyzeStmt(st)
|
||||
}
|
||||
case *ast.ForStmt:
|
||||
a.markUsed(s.Var)
|
||||
a.analyzeExpr(s.Start)
|
||||
a.analyzeExpr(s.To)
|
||||
if s.Step != nil {
|
||||
a.analyzeExpr(s.Step)
|
||||
}
|
||||
for _, st := range s.Body {
|
||||
a.analyzeStmt(st)
|
||||
}
|
||||
case *ast.ForEachStmt:
|
||||
a.markUsed(s.Var)
|
||||
a.analyzeExpr(s.Collection)
|
||||
for _, st := range s.Body {
|
||||
a.analyzeStmt(st)
|
||||
}
|
||||
case *ast.SwitchStmt:
|
||||
a.analyzeExpr(s.Expr)
|
||||
for _, c := range s.Cases {
|
||||
a.analyzeExpr(c.Value)
|
||||
for _, st := range c.Body {
|
||||
a.analyzeStmt(st)
|
||||
}
|
||||
}
|
||||
for _, st := range s.Otherwise {
|
||||
a.analyzeStmt(st)
|
||||
}
|
||||
case *ast.SeqStmt:
|
||||
for _, st := range s.Body {
|
||||
a.analyzeStmt(st)
|
||||
}
|
||||
for _, st := range s.RecoverBody {
|
||||
a.analyzeStmt(st)
|
||||
}
|
||||
case *ast.QOutStmt:
|
||||
for _, e := range s.Exprs {
|
||||
a.analyzeExpr(e)
|
||||
}
|
||||
case *ast.VarDecl:
|
||||
// Mid-function LOCAL — register
|
||||
for _, v := range s.Vars {
|
||||
a.scope.Declared[strings.ToUpper(v.Name)] = VarInfo{
|
||||
Name: v.Name,
|
||||
Pos: v.NamePos,
|
||||
Kind: s.Scope,
|
||||
}
|
||||
if v.Init != nil {
|
||||
a.analyzeExpr(v.Init)
|
||||
}
|
||||
}
|
||||
case *ast.MultiAssignStmt:
|
||||
for _, name := range s.Targets {
|
||||
if name != "_" {
|
||||
a.markUsed(name)
|
||||
}
|
||||
}
|
||||
for _, v := range s.Values {
|
||||
a.analyzeExpr(v)
|
||||
}
|
||||
case *ast.DeferStmt:
|
||||
a.analyzeExpr(s.Call)
|
||||
case *ast.ChanSendStmt:
|
||||
a.analyzeExpr(s.Chan)
|
||||
a.analyzeExpr(s.Value)
|
||||
case *ast.WatchStmt:
|
||||
for _, c := range s.Cases {
|
||||
if c.RecvChan != nil {
|
||||
a.analyzeExpr(c.RecvChan)
|
||||
}
|
||||
if c.SendChan != nil {
|
||||
a.analyzeExpr(c.SendChan)
|
||||
}
|
||||
if c.SendVal != nil {
|
||||
a.analyzeExpr(c.SendVal)
|
||||
}
|
||||
if c.RecvVar != "" {
|
||||
a.markUsed(c.RecvVar)
|
||||
}
|
||||
for _, st := range c.Body {
|
||||
a.analyzeStmt(st)
|
||||
}
|
||||
}
|
||||
for _, st := range s.Otherwise {
|
||||
a.analyzeStmt(st)
|
||||
}
|
||||
case *ast.ParallelForStmt:
|
||||
a.markUsed(s.Var)
|
||||
a.analyzeExpr(s.Start)
|
||||
a.analyzeExpr(s.To)
|
||||
for _, st := range s.Body {
|
||||
a.analyzeStmt(st)
|
||||
}
|
||||
case *ast.TimeoutStmt:
|
||||
a.analyzeExpr(s.Duration)
|
||||
for _, st := range s.Body {
|
||||
a.analyzeStmt(st)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (a *Analyzer) analyzeExpr(expr ast.Expr) {
|
||||
if expr == nil {
|
||||
return
|
||||
}
|
||||
switch e := expr.(type) {
|
||||
case *ast.IdentExpr:
|
||||
a.checkVarUsage(e.Name, e.NamePos)
|
||||
case *ast.BinaryExpr:
|
||||
a.analyzeExpr(e.Left)
|
||||
a.analyzeExpr(e.Right)
|
||||
case *ast.UnaryExpr:
|
||||
a.analyzeExpr(e.X)
|
||||
case *ast.PostfixExpr:
|
||||
a.analyzeExpr(e.X)
|
||||
case *ast.AssignExpr:
|
||||
a.analyzeExpr(e.Left)
|
||||
a.analyzeExpr(e.Right)
|
||||
case *ast.CallExpr:
|
||||
a.analyzeExpr(e.Func)
|
||||
for _, arg := range e.Args {
|
||||
a.analyzeExpr(arg)
|
||||
}
|
||||
case *ast.SendExpr:
|
||||
a.analyzeExpr(e.Object)
|
||||
for _, arg := range e.Args {
|
||||
a.analyzeExpr(arg)
|
||||
}
|
||||
case *ast.IndexExpr:
|
||||
a.analyzeExpr(e.X)
|
||||
a.analyzeExpr(e.Index)
|
||||
case *ast.SliceExpr:
|
||||
a.analyzeExpr(e.X)
|
||||
if e.Low != nil {
|
||||
a.analyzeExpr(e.Low)
|
||||
}
|
||||
if e.High != nil {
|
||||
a.analyzeExpr(e.High)
|
||||
}
|
||||
case *ast.DotExpr:
|
||||
a.analyzeExpr(e.X)
|
||||
case *ast.ArrayLitExpr:
|
||||
for _, item := range e.Items {
|
||||
a.analyzeExpr(item)
|
||||
}
|
||||
case *ast.HashLitExpr:
|
||||
for i := range e.Keys {
|
||||
a.analyzeExpr(e.Keys[i])
|
||||
a.analyzeExpr(e.Values[i])
|
||||
}
|
||||
case *ast.BlockExpr:
|
||||
a.analyzeExpr(e.Body)
|
||||
case *ast.AliasExpr:
|
||||
a.analyzeExpr(e.Alias)
|
||||
a.analyzeExpr(e.Field)
|
||||
case *ast.MacroExpr:
|
||||
a.analyzeExpr(e.Expr)
|
||||
case *ast.RefExpr:
|
||||
a.analyzeExpr(e.X)
|
||||
case *ast.NilSafeExpr:
|
||||
a.analyzeExpr(e.X)
|
||||
for _, arg := range e.Args {
|
||||
a.analyzeExpr(arg)
|
||||
}
|
||||
case *ast.ChanRecvExpr:
|
||||
a.analyzeExpr(e.Chan)
|
||||
case *ast.AsyncExpr:
|
||||
a.analyzeExpr(e.Call)
|
||||
case *ast.AwaitExpr:
|
||||
a.analyzeExpr(e.Future)
|
||||
}
|
||||
}
|
||||
|
||||
// checkVarUsage verifies a variable is declared and marks it used.
|
||||
func (a *Analyzer) checkVarUsage(name string, pos token.Position) {
|
||||
upper := strings.ToUpper(name)
|
||||
|
||||
// Skip well-known RTL functions and constants
|
||||
if a.isKnownFunction(upper) || a.isBuiltinConstant(upper) {
|
||||
return
|
||||
}
|
||||
|
||||
// Mark as used
|
||||
a.markUsed(name)
|
||||
|
||||
// Check if declared in current scope
|
||||
if _, ok := a.scope.Declared[upper]; ok {
|
||||
return
|
||||
}
|
||||
|
||||
// Not declared — warn (could be MEMVAR, FIELD, or typo)
|
||||
a.warn(pos, "undeclared variable '%s' (missing LOCAL?)", name)
|
||||
}
|
||||
|
||||
func (a *Analyzer) markUsed(name string) {
|
||||
if a.scope != nil {
|
||||
a.scope.Used[strings.ToUpper(name)] = true
|
||||
}
|
||||
}
|
||||
|
||||
func (a *Analyzer) isKnownFunction(name string) bool {
|
||||
// Check declared functions in this file
|
||||
if a.funcNames[name] {
|
||||
return true
|
||||
}
|
||||
// Common RTL functions
|
||||
rtl := map[string]bool{
|
||||
"LEN": true, "SUBSTR": true, "LEFT": true, "RIGHT": true,
|
||||
"UPPER": true, "LOWER": true, "TRIM": true, "LTRIM": true, "RTRIM": true,
|
||||
"STR": true, "VAL": true, "STRTRAN": true, "AT": true, "RAT": true,
|
||||
"SPACE": true, "REPLICATE": true, "PADR": true, "PADL": true, "PADC": true,
|
||||
"VALTYPE": true, "TYPE": true, "EMPTY": true, "HB_ISSTRING": true,
|
||||
"EVAL": true, "AEVAL": true, "ASCAN": true, "ASORT": true,
|
||||
"AADD": true, "ADEL": true, "AINS": true, "ASIZE": true, "ACOPY": true, "ACLONE": true,
|
||||
"ARRAY": true, "HASH": true, "HB_HASH": true,
|
||||
"DTOC": true, "CTOD": true, "DTOS": true, "DATE": true, "TIME": true, "YEAR": true, "MONTH": true, "DAY": true,
|
||||
"QOUT": true, "QQOUT": true, "OUTSTD": true, "ALERT": true,
|
||||
"INKEY": true, "LASTKEY": true, "CHR": true, "ASC": true,
|
||||
"FILE": true, "FOPEN": true, "FCLOSE": true, "FREAD": true, "FWRITE": true,
|
||||
"IIF": true, "IF": true, "STRZERO": true, "TRANSFORM": true,
|
||||
"FIELDNAME": true, "FIELDPUT": true, "FIELDGET": true, "FCOUNT": true,
|
||||
"ALIAS": true, "DBAPPEND": true, "DBDELETE": true, "DBSKIP": true,
|
||||
"DBGOTO": true, "DBGOTOP": true, "DBGOBOTTOM": true, "DBCOMMIT": true,
|
||||
"RECNO": true, "RECCOUNT": true, "EOF": true, "BOF": true, "FOUND": true,
|
||||
"CHANNEL": true, "CHSEND": true, "CHRECEIVE": true,
|
||||
"SLEEP": true, "HB_IDLEADD": true, "SECONDS": true,
|
||||
"ERRORBLOCK": true, "BREAK": true, "PCOUNT": true, "PROCNAME": true,
|
||||
"SETPOS": true, "ROW": true, "COL": true, "MAXROW": true, "MAXCOL": true,
|
||||
"SETCOLOR": true, "DISPBOX": true, "DISPBEGIN": true, "DISPEND": true,
|
||||
"HB_SYMBOL_UNUSED": true, "HB_DEFAULT": true, "HB_NTOS": true,
|
||||
}
|
||||
return rtl[name]
|
||||
}
|
||||
|
||||
func (a *Analyzer) isBuiltinConstant(name string) bool {
|
||||
constants := map[string]bool{
|
||||
"NIL": true, "TRUE": true, "FALSE": true,
|
||||
"SELF": true, "SUPER": true,
|
||||
"K_ESC": true, "K_ENTER": true, "K_UP": true, "K_DOWN": true,
|
||||
"K_LEFT": true, "K_RIGHT": true, "K_PGUP": true, "K_PGDN": true,
|
||||
}
|
||||
return constants[name]
|
||||
}
|
||||
|
||||
// --- Diagnostic helpers ---
|
||||
|
||||
func (a *Analyzer) diag(sev Severity, pos token.Position, format string, args ...interface{}) {
|
||||
a.diagnostics = append(a.diagnostics, Diagnostic{
|
||||
Pos: pos,
|
||||
Message: fmt.Sprintf(format, args...),
|
||||
Severity: sev,
|
||||
})
|
||||
}
|
||||
|
||||
func (a *Analyzer) errorf(pos token.Position, format string, args ...interface{}) {
|
||||
a.diag(SevError, pos, format, args...)
|
||||
}
|
||||
|
||||
func (a *Analyzer) warn(pos token.Position, format string, args ...interface{}) {
|
||||
a.diag(SevWarning, pos, format, args...)
|
||||
}
|
||||
|
||||
func (a *Analyzer) hint(pos token.Position, format string, args ...interface{}) {
|
||||
a.diag(SevHint, pos, format, args...)
|
||||
}
|
||||
136
compiler/analyzer/analyzer_test.go
Normal file
136
compiler/analyzer/analyzer_test.go
Normal file
@@ -0,0 +1,136 @@
|
||||
package analyzer
|
||||
|
||||
import (
|
||||
"five/compiler/parser"
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func analyze(t *testing.T, source string) []Diagnostic {
|
||||
t.Helper()
|
||||
file, errs := parser.Parse("test.prg", source)
|
||||
if len(errs) > 0 {
|
||||
t.Fatalf("parse error: %s", errs[0])
|
||||
}
|
||||
return Analyze(file)
|
||||
}
|
||||
|
||||
func TestCleanCode(t *testing.T) {
|
||||
diags := analyze(t, `
|
||||
PROCEDURE Main()
|
||||
LOCAL cName, nAge
|
||||
cName := "Charles"
|
||||
nAge := 30
|
||||
? cName, nAge
|
||||
RETURN
|
||||
`)
|
||||
for _, d := range diags {
|
||||
if d.Severity == SevError || d.Severity == SevWarning {
|
||||
t.Errorf("unexpected diagnostic: %s", d)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestUndeclaredVariable(t *testing.T) {
|
||||
diags := analyze(t, `
|
||||
PROCEDURE Main()
|
||||
LOCAL cName
|
||||
cName := "Charles"
|
||||
? cName, nAge
|
||||
RETURN
|
||||
`)
|
||||
found := false
|
||||
for _, d := range diags {
|
||||
if strings.Contains(d.Message, "undeclared") && strings.Contains(d.Message, "nAge") {
|
||||
found = true
|
||||
}
|
||||
}
|
||||
if !found {
|
||||
t.Error("expected 'undeclared variable nAge' warning")
|
||||
}
|
||||
}
|
||||
|
||||
func TestUnusedVariable(t *testing.T) {
|
||||
diags := analyze(t, `
|
||||
PROCEDURE Main()
|
||||
LOCAL cUsed, cNeverTouched
|
||||
cUsed := "hello"
|
||||
? cUsed
|
||||
RETURN
|
||||
`)
|
||||
found := false
|
||||
for _, d := range diags {
|
||||
if strings.Contains(d.Message, "unused") && strings.Contains(d.Message, "cNeverTouched") {
|
||||
found = true
|
||||
}
|
||||
}
|
||||
if !found {
|
||||
t.Error("expected 'unused variable cNeverTouched' hint")
|
||||
}
|
||||
}
|
||||
|
||||
func TestParamsDeclared(t *testing.T) {
|
||||
diags := analyze(t, `
|
||||
FUNCTION Add(a, b)
|
||||
LOCAL nResult
|
||||
nResult := a + b
|
||||
RETURN nResult
|
||||
`)
|
||||
for _, d := range diags {
|
||||
if d.Severity == SevError || d.Severity == SevWarning {
|
||||
t.Errorf("unexpected: %s", d)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestMultiFunction(t *testing.T) {
|
||||
diags := analyze(t, `
|
||||
PROCEDURE Main()
|
||||
LOCAL n
|
||||
n := GetValue()
|
||||
? n
|
||||
RETURN
|
||||
|
||||
FUNCTION GetValue()
|
||||
LOCAL x
|
||||
x := 42
|
||||
RETURN x
|
||||
`)
|
||||
for _, d := range diags {
|
||||
if d.Severity == SevWarning {
|
||||
t.Errorf("unexpected warning: %s", d)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestForLoopVar(t *testing.T) {
|
||||
diags := analyze(t, `
|
||||
PROCEDURE Main()
|
||||
LOCAL i, aData
|
||||
aData := {1, 2, 3}
|
||||
FOR i := 1 TO Len(aData)
|
||||
? aData[i]
|
||||
NEXT
|
||||
RETURN
|
||||
`)
|
||||
for _, d := range diags {
|
||||
if d.Severity == SevWarning {
|
||||
t.Errorf("unexpected: %s", d)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestMultiAssignDeclared(t *testing.T) {
|
||||
diags := analyze(t, `
|
||||
PROCEDURE Main()
|
||||
LOCAL cName, nAge
|
||||
cName, nAge := "Charles", 30
|
||||
? cName, nAge
|
||||
RETURN
|
||||
`)
|
||||
for _, d := range diags {
|
||||
if d.Severity == SevWarning {
|
||||
t.Errorf("unexpected: %s", d)
|
||||
}
|
||||
}
|
||||
}
|
||||
930
compiler/ast/ast.go
Normal file
930
compiler/ast/ast.go
Normal file
@@ -0,0 +1,930 @@
|
||||
// Copyright (c) 2026 Charles KWON OhJun (charleskwonohjun@gmail.com)
|
||||
// All rights reserved.
|
||||
|
||||
// AST node definitions for the Five language.
|
||||
//
|
||||
// Design references:
|
||||
// - Harbour: HB_EXPR (hbcompdf.h:349) — expression union with ExprType discriminant
|
||||
// - Harbour: HB_HFUNC (hbcompdf.h:497) — function with separated pLocals/pStatics/pFields/pMemvars
|
||||
// - tsgo: Node with Kind discriminant + nodeData interface (internal/ast/ast.go)
|
||||
//
|
||||
// Key Harbour rules applied:
|
||||
// - LOCAL/STATIC/FIELD declarations must appear at function top, before executable code
|
||||
// - FuncDecl separates Decls (declarations) from Body (executable statements)
|
||||
// - (expr)->field for dynamic alias access (HB_ET_ALIASEXPR)
|
||||
// - &variable for macro (6 subtypes from Harbour: VAR, SYMBOL, ALIASED, EXPR, LIST, PARE)
|
||||
package ast
|
||||
|
||||
import "five/compiler/token"
|
||||
|
||||
// --- Interfaces ---
|
||||
|
||||
// Node is the base interface for all AST nodes.
|
||||
type Node interface {
|
||||
Pos() token.Position
|
||||
End() token.Position
|
||||
}
|
||||
|
||||
// Expr represents an expression node (produces a value).
|
||||
type Expr interface {
|
||||
Node
|
||||
exprNode()
|
||||
}
|
||||
|
||||
// Stmt represents a statement node (performs an action).
|
||||
type Stmt interface {
|
||||
Node
|
||||
stmtNode()
|
||||
}
|
||||
|
||||
// Decl represents a declaration node (LOCAL, STATIC, FIELD, etc.).
|
||||
type Decl interface {
|
||||
Node
|
||||
declNode()
|
||||
}
|
||||
|
||||
// --- Program (top-level) ---
|
||||
|
||||
// File represents a single .prg source file.
|
||||
type File struct {
|
||||
Name string // filename
|
||||
Imports []*ImportDecl
|
||||
Decls []Decl // top-level: FUNCTION, PROCEDURE, CLASS, etc.
|
||||
}
|
||||
|
||||
func (f *File) Pos() token.Position {
|
||||
if len(f.Decls) > 0 {
|
||||
return f.Decls[0].Pos()
|
||||
}
|
||||
return token.Position{}
|
||||
}
|
||||
func (f *File) End() token.Position {
|
||||
if len(f.Decls) > 0 {
|
||||
return f.Decls[len(f.Decls)-1].End()
|
||||
}
|
||||
return token.Position{}
|
||||
}
|
||||
|
||||
// --- Declarations ---
|
||||
|
||||
// ImportDecl: IMPORT "package/path" or IMPORT _ "package/path"
|
||||
type ImportDecl struct {
|
||||
ImportPos token.Position
|
||||
Alias string // "" = normal, "_" = blank import, "name" = alias
|
||||
Path string // package path
|
||||
}
|
||||
|
||||
func (d *ImportDecl) Pos() token.Position { return d.ImportPos }
|
||||
func (d *ImportDecl) End() token.Position { return d.ImportPos }
|
||||
func (d *ImportDecl) declNode() {}
|
||||
|
||||
// FuncDecl represents FUNCTION or PROCEDURE.
|
||||
// Harbour: HB_HFUNC — pLocals, pStatics, pFields separated from pcode.
|
||||
// LOCAL/STATIC/FIELD must appear before executable code.
|
||||
type FuncDecl struct {
|
||||
FuncPos token.Position
|
||||
Name string
|
||||
IsProc bool // PROCEDURE (no return value)
|
||||
Params []*ParamDecl // declared parameters
|
||||
Decls []Decl // LOCAL, STATIC, FIELD — must come first
|
||||
Body []Stmt // executable statements — after declarations
|
||||
EndPos token.Position
|
||||
}
|
||||
|
||||
func (d *FuncDecl) Pos() token.Position { return d.FuncPos }
|
||||
func (d *FuncDecl) End() token.Position { return d.EndPos }
|
||||
func (d *FuncDecl) declNode() {}
|
||||
|
||||
// ParamDecl represents a function parameter.
|
||||
type ParamDecl struct {
|
||||
NamePos token.Position
|
||||
Name string
|
||||
ByRef bool // @param or passed by reference
|
||||
AsType string // optional type hint: AS NUMERIC, AS STRING, etc.
|
||||
}
|
||||
|
||||
func (d *ParamDecl) Pos() token.Position { return d.NamePos }
|
||||
func (d *ParamDecl) End() token.Position { return d.NamePos }
|
||||
func (d *ParamDecl) declNode() {}
|
||||
|
||||
// VarDecl represents LOCAL, STATIC, PRIVATE, PUBLIC, FIELD declarations.
|
||||
// Harbour: LOCAL must be at function top (before executable code).
|
||||
// PRIVATE/PUBLIC can appear anywhere (runtime memvar).
|
||||
type VarDecl struct {
|
||||
DeclPos token.Position
|
||||
Scope VarScope
|
||||
Vars []*VarInit // one or more: LOCAL a := 1, b := 2, c
|
||||
}
|
||||
|
||||
func (d *VarDecl) Pos() token.Position { return d.DeclPos }
|
||||
func (d *VarDecl) End() token.Position { return d.DeclPos }
|
||||
func (d *VarDecl) declNode() {}
|
||||
func (d *VarDecl) stmtNode() {} // PRIVATE/PUBLIC can appear as statements
|
||||
|
||||
// VarScope indicates where a variable lives.
|
||||
type VarScope int
|
||||
|
||||
const (
|
||||
ScopeLocal VarScope = iota // LOCAL — stack, function-top only
|
||||
ScopeStatic // STATIC — module-level, function-top only
|
||||
ScopePrivate // PRIVATE — runtime memvar, anywhere
|
||||
ScopePublic // PUBLIC — runtime memvar, anywhere
|
||||
ScopeField // FIELD — database field declaration, function-top only
|
||||
)
|
||||
|
||||
// VarInit represents a single variable with optional initializer.
|
||||
type VarInit struct {
|
||||
NamePos token.Position
|
||||
Name string
|
||||
Init Expr // nil if no initializer
|
||||
AsType string // optional type hint
|
||||
}
|
||||
|
||||
// ClassDecl represents CLASS ... ENDCLASS.
|
||||
type ClassDecl struct {
|
||||
ClassPos token.Position
|
||||
Name string
|
||||
ParentName string // INHERIT FROM parent
|
||||
Members []Decl // DATA, METHOD, ACCESS, ASSIGN declarations
|
||||
EndPos token.Position
|
||||
}
|
||||
|
||||
func (d *ClassDecl) Pos() token.Position { return d.ClassPos }
|
||||
func (d *ClassDecl) End() token.Position { return d.EndPos }
|
||||
func (d *ClassDecl) declNode() {}
|
||||
|
||||
// DataDecl represents DATA member in a class.
|
||||
type DataDecl struct {
|
||||
DataPos token.Position
|
||||
Name string
|
||||
Init Expr // INIT expression (nil if none)
|
||||
AsType string // AS type hint
|
||||
}
|
||||
|
||||
func (d *DataDecl) Pos() token.Position { return d.DataPos }
|
||||
func (d *DataDecl) End() token.Position { return d.DataPos }
|
||||
func (d *DataDecl) declNode() {}
|
||||
|
||||
// MethodDecl represents METHOD declaration in a class or standalone.
|
||||
type MethodDecl struct {
|
||||
MethodPos token.Position
|
||||
Name string
|
||||
ClassName string // METHOD name CLASS classname (standalone)
|
||||
Params []*ParamDecl
|
||||
IsInline bool // INLINE method
|
||||
IsSetGet bool // METHOD name(x) SETGET — getter if no arg, setter if arg
|
||||
IsAccess bool // ACCESS name METHOD getterName
|
||||
IsAssign bool // ASSIGN name METHOD setterName
|
||||
AccessName string // property name for ACCESS/ASSIGN
|
||||
Decls []Decl
|
||||
Body []Stmt
|
||||
EndPos token.Position
|
||||
}
|
||||
|
||||
func (d *MethodDecl) Pos() token.Position { return d.MethodPos }
|
||||
func (d *MethodDecl) End() token.Position { return d.EndPos }
|
||||
func (d *MethodDecl) declNode() {}
|
||||
|
||||
// GoDumpDecl represents inline Go code from #pragma BEGINDUMP ... #pragma ENDDUMP.
|
||||
// Five extension: allows embedding raw Go code directly in PRG files.
|
||||
type GoDumpDecl struct {
|
||||
DumpPos token.Position
|
||||
Code string // raw Go source code
|
||||
}
|
||||
|
||||
func (d *GoDumpDecl) Pos() token.Position { return d.DumpPos }
|
||||
func (d *GoDumpDecl) End() token.Position { return d.DumpPos }
|
||||
func (d *GoDumpDecl) declNode() {}
|
||||
|
||||
// --- Expressions ---
|
||||
|
||||
// LiteralExpr represents a literal value.
|
||||
// Harbour: HB_ET_NIL, HB_ET_NUMERIC, HB_ET_STRING, HB_ET_LOGICAL, HB_ET_DATE, HB_ET_TIMESTAMP
|
||||
type LiteralExpr struct {
|
||||
ValuePos token.Position
|
||||
Kind token.Kind // INT, LONG, DOUBLE, STRING, TRUE, FALSE, NIL_LIT, DATE_LIT
|
||||
Value string // raw literal text
|
||||
}
|
||||
|
||||
func (e *LiteralExpr) Pos() token.Position { return e.ValuePos }
|
||||
func (e *LiteralExpr) End() token.Position { return e.ValuePos }
|
||||
func (e *LiteralExpr) exprNode() {}
|
||||
|
||||
// IdentExpr represents a variable or function name.
|
||||
// Harbour: HB_ET_VARIABLE, HB_ET_FUNNAME
|
||||
type IdentExpr struct {
|
||||
NamePos token.Position
|
||||
Name string
|
||||
}
|
||||
|
||||
func (e *IdentExpr) Pos() token.Position { return e.NamePos }
|
||||
func (e *IdentExpr) End() token.Position { return e.NamePos }
|
||||
func (e *IdentExpr) exprNode() {}
|
||||
|
||||
// SelfExpr represents :: (Self access in class method).
|
||||
// Harbour: HB_ET_SELF
|
||||
type SelfExpr struct {
|
||||
ColonPos token.Position
|
||||
}
|
||||
|
||||
func (e *SelfExpr) Pos() token.Position { return e.ColonPos }
|
||||
func (e *SelfExpr) End() token.Position { return e.ColonPos }
|
||||
func (e *SelfExpr) exprNode() {}
|
||||
|
||||
// BinaryExpr represents a binary operation.
|
||||
// Harbour: HB_EO_PLUS, HB_EO_MINUS, HB_EO_EQUAL, etc.
|
||||
type BinaryExpr struct {
|
||||
Left Expr
|
||||
OpPos token.Position
|
||||
Op token.Kind
|
||||
Right Expr
|
||||
}
|
||||
|
||||
func (e *BinaryExpr) Pos() token.Position { return e.Left.Pos() }
|
||||
func (e *BinaryExpr) End() token.Position { return e.Right.End() }
|
||||
func (e *BinaryExpr) exprNode() {}
|
||||
|
||||
// UnaryExpr represents a prefix unary operation.
|
||||
// Harbour: HB_EO_NEGATE, HB_EO_NOT, HB_EO_PREINC, HB_EO_PREDEC
|
||||
type UnaryExpr struct {
|
||||
OpPos token.Position
|
||||
Op token.Kind // MINUS, NOT, INC, DEC
|
||||
X Expr
|
||||
}
|
||||
|
||||
func (e *UnaryExpr) Pos() token.Position { return e.OpPos }
|
||||
func (e *UnaryExpr) End() token.Position { return e.X.End() }
|
||||
func (e *UnaryExpr) exprNode() {}
|
||||
|
||||
// PostfixExpr represents postfix ++ or --.
|
||||
// Harbour: HB_EO_POSTINC, HB_EO_POSTDEC
|
||||
type PostfixExpr struct {
|
||||
X Expr
|
||||
OpPos token.Position
|
||||
Op token.Kind // INC, DEC
|
||||
}
|
||||
|
||||
func (e *PostfixExpr) Pos() token.Position { return e.X.Pos() }
|
||||
func (e *PostfixExpr) End() token.Position { return e.OpPos }
|
||||
func (e *PostfixExpr) exprNode() {}
|
||||
|
||||
// AssignExpr represents assignment: x := value, x += value, etc.
|
||||
// Harbour: HB_EO_ASSIGN, HB_EO_PLUSEQ, etc.
|
||||
type AssignExpr struct {
|
||||
Left Expr
|
||||
OpPos token.Position
|
||||
Op token.Kind // ASSIGN, PLUSEQ, MINUSEQ, etc.
|
||||
Right Expr
|
||||
}
|
||||
|
||||
func (e *AssignExpr) Pos() token.Position { return e.Left.Pos() }
|
||||
func (e *AssignExpr) End() token.Position { return e.Right.End() }
|
||||
func (e *AssignExpr) exprNode() {}
|
||||
|
||||
// CallExpr represents a function call: func(args...)
|
||||
// Harbour: HB_ET_FUNCALL — pFunName + pParms
|
||||
type CallExpr struct {
|
||||
Func Expr // function expression (IdentExpr, or macro)
|
||||
LParen token.Position
|
||||
Args []Expr
|
||||
RParen token.Position
|
||||
}
|
||||
|
||||
func (e *CallExpr) Pos() token.Position { return e.Func.Pos() }
|
||||
func (e *CallExpr) End() token.Position { return e.RParen }
|
||||
func (e *CallExpr) exprNode() {}
|
||||
|
||||
// DotExpr represents package member access: pkg.Member
|
||||
// Used for Go package function calls: sql.Open(), fmt.Println()
|
||||
type DotExpr struct {
|
||||
X Expr // package (IdentExpr)
|
||||
DotPos token.Position
|
||||
Member string // function/field name
|
||||
}
|
||||
|
||||
func (e *DotExpr) Pos() token.Position { return e.X.Pos() }
|
||||
func (e *DotExpr) End() token.Position { return e.DotPos }
|
||||
func (e *DotExpr) exprNode() {}
|
||||
|
||||
// SendExpr represents method call: obj:method(args...)
|
||||
// Harbour: HB_ET_SEND — pObject + szMessage/pMessage + pParms
|
||||
type SendExpr struct {
|
||||
Object Expr
|
||||
ColonPos token.Position
|
||||
Method string // static message name
|
||||
MacroMethod Expr // if ¯o message (nil for static)
|
||||
HasParens bool // true if () present (method call vs field access)
|
||||
LParen token.Position
|
||||
Args []Expr
|
||||
RParen token.Position
|
||||
IsAssign bool // obj:prop := value (setter)
|
||||
}
|
||||
|
||||
func (e *SendExpr) Pos() token.Position { return e.Object.Pos() }
|
||||
func (e *SendExpr) End() token.Position { return e.RParen }
|
||||
func (e *SendExpr) exprNode() {}
|
||||
|
||||
// IndexExpr represents array index: arr[index]
|
||||
// Harbour: HB_ET_ARRAYAT
|
||||
type IndexExpr struct {
|
||||
X Expr
|
||||
LBracket token.Position
|
||||
Index Expr
|
||||
RBracket token.Position
|
||||
}
|
||||
|
||||
func (e *IndexExpr) Pos() token.Position { return e.X.Pos() }
|
||||
func (e *IndexExpr) End() token.Position { return e.RBracket }
|
||||
func (e *IndexExpr) exprNode() {}
|
||||
|
||||
// AliasExpr represents field access: alias->field or (expr)->field
|
||||
// Harbour: HB_ET_ALIASVAR, HB_ET_ALIASEXPR
|
||||
type AliasExpr struct {
|
||||
Alias Expr // IdentExpr for static alias, any Expr for (dynamic)->field
|
||||
ArrowPos token.Position
|
||||
Field Expr // IdentExpr or MacroExpr
|
||||
}
|
||||
|
||||
func (e *AliasExpr) Pos() token.Position { return e.Alias.Pos() }
|
||||
func (e *AliasExpr) End() token.Position { return e.Field.End() }
|
||||
func (e *AliasExpr) exprNode() {}
|
||||
|
||||
// MacroExpr represents macro expansion: &variable or &(expression)
|
||||
// Harbour: HB_ET_MACRO with 6 subtypes
|
||||
type MacroExpr struct {
|
||||
AmpPos token.Position
|
||||
Expr Expr // variable or parenthesized expression
|
||||
}
|
||||
|
||||
func (e *MacroExpr) Pos() token.Position { return e.AmpPos }
|
||||
func (e *MacroExpr) End() token.Position { return e.Expr.End() }
|
||||
func (e *MacroExpr) exprNode() {}
|
||||
|
||||
// BlockExpr represents a code block: {|params| body}
|
||||
// Harbour: HB_ET_CODEBLOCK — pLocals + pExprList
|
||||
type BlockExpr struct {
|
||||
LBrace token.Position
|
||||
Params []string // parameter names (between | |)
|
||||
Body Expr // single expression (or comma-separated list)
|
||||
RBrace token.Position
|
||||
}
|
||||
|
||||
func (e *BlockExpr) Pos() token.Position { return e.LBrace }
|
||||
func (e *BlockExpr) End() token.Position { return e.RBrace }
|
||||
func (e *BlockExpr) exprNode() {}
|
||||
|
||||
// ArrayLitExpr represents a literal array: {1, 2, 3}
|
||||
// Harbour: HB_ET_ARRAY
|
||||
type ArrayLitExpr struct {
|
||||
LBrace token.Position
|
||||
Items []Expr
|
||||
RBrace token.Position
|
||||
}
|
||||
|
||||
func (e *ArrayLitExpr) Pos() token.Position { return e.LBrace }
|
||||
func (e *ArrayLitExpr) End() token.Position { return e.RBrace }
|
||||
func (e *ArrayLitExpr) exprNode() {}
|
||||
|
||||
// HashLitExpr represents a literal hash: {"a" => 1, "b" => 2}
|
||||
// Harbour: HB_ET_HASH
|
||||
type HashLitExpr struct {
|
||||
LBrace token.Position
|
||||
Keys []Expr
|
||||
Values []Expr
|
||||
RBrace token.Position
|
||||
}
|
||||
|
||||
func (e *HashLitExpr) Pos() token.Position { return e.LBrace }
|
||||
func (e *HashLitExpr) End() token.Position { return e.RBrace }
|
||||
func (e *HashLitExpr) exprNode() {}
|
||||
|
||||
// IIfExpr represents inline if: IIF(cond, trueVal, falseVal)
|
||||
// Harbour: HB_ET_IIF
|
||||
type IIfExpr struct {
|
||||
IfPos token.Position
|
||||
Cond Expr
|
||||
True Expr
|
||||
False Expr
|
||||
}
|
||||
|
||||
func (e *IIfExpr) Pos() token.Position { return e.IfPos }
|
||||
func (e *IIfExpr) End() token.Position { return e.False.End() }
|
||||
func (e *IIfExpr) exprNode() {}
|
||||
|
||||
// RefExpr represents pass-by-reference: @variable
|
||||
// Harbour: HB_ET_REFERENCE, HB_ET_VARREF, HB_ET_FUNREF
|
||||
type RefExpr struct {
|
||||
AtPos token.Position
|
||||
X Expr
|
||||
}
|
||||
|
||||
func (e *RefExpr) Pos() token.Position { return e.AtPos }
|
||||
func (e *RefExpr) End() token.Position { return e.X.End() }
|
||||
func (e *RefExpr) exprNode() {}
|
||||
|
||||
// --- Statements ---
|
||||
|
||||
// ExprStmt wraps an expression as a statement (function calls, assignments).
|
||||
type ExprStmt struct {
|
||||
X Expr
|
||||
}
|
||||
|
||||
func (s *ExprStmt) Pos() token.Position { return s.X.Pos() }
|
||||
func (s *ExprStmt) End() token.Position { return s.X.End() }
|
||||
func (s *ExprStmt) stmtNode() {}
|
||||
|
||||
// ReturnStmt represents RETURN [expr].
|
||||
type ReturnStmt struct {
|
||||
ReturnPos token.Position
|
||||
Value Expr // first/only return value (nil for bare RETURN)
|
||||
Values []Expr // multi-return: RETURN a, b, c (nil if single)
|
||||
}
|
||||
|
||||
func (s *ReturnStmt) Pos() token.Position { return s.ReturnPos }
|
||||
func (s *ReturnStmt) End() token.Position {
|
||||
if s.Value != nil {
|
||||
return s.Value.End()
|
||||
}
|
||||
return s.ReturnPos
|
||||
}
|
||||
func (s *ReturnStmt) stmtNode() {}
|
||||
|
||||
// QOutStmt represents ? expr, expr, ... (shorthand for QOut).
|
||||
type QOutStmt struct {
|
||||
QPos token.Position
|
||||
IsQQ bool // true for ?? (QQOut)
|
||||
Exprs []Expr
|
||||
}
|
||||
|
||||
func (s *QOutStmt) Pos() token.Position { return s.QPos }
|
||||
func (s *QOutStmt) End() token.Position {
|
||||
if len(s.Exprs) > 0 {
|
||||
return s.Exprs[len(s.Exprs)-1].End()
|
||||
}
|
||||
return s.QPos
|
||||
}
|
||||
func (s *QOutStmt) stmtNode() {}
|
||||
|
||||
// IfStmt represents IF / ELSEIF / ELSE / ENDIF.
|
||||
// Harbour: uses PHB_ELSEIF chain for fixups.
|
||||
type IfStmt struct {
|
||||
IfPos token.Position
|
||||
Cond Expr
|
||||
Body []Stmt
|
||||
ElseIfs []*ElseIfClause
|
||||
ElseBody []Stmt // nil if no ELSE
|
||||
EndPos token.Position
|
||||
}
|
||||
|
||||
type ElseIfClause struct {
|
||||
ElseIfPos token.Position
|
||||
Cond Expr
|
||||
Body []Stmt
|
||||
}
|
||||
|
||||
func (s *IfStmt) Pos() token.Position { return s.IfPos }
|
||||
func (s *IfStmt) End() token.Position { return s.EndPos }
|
||||
func (s *IfStmt) stmtNode() {}
|
||||
|
||||
// DoWhileStmt represents DO WHILE cond ... ENDDO.
|
||||
type DoWhileStmt struct {
|
||||
DoPos token.Position
|
||||
Cond Expr
|
||||
Body []Stmt
|
||||
EndPos token.Position
|
||||
}
|
||||
|
||||
func (s *DoWhileStmt) Pos() token.Position { return s.DoPos }
|
||||
func (s *DoWhileStmt) End() token.Position { return s.EndPos }
|
||||
func (s *DoWhileStmt) stmtNode() {}
|
||||
|
||||
// ForStmt represents FOR var := start TO end [STEP step] ... NEXT.
|
||||
type ForStmt struct {
|
||||
ForPos token.Position
|
||||
Var string
|
||||
Start Expr
|
||||
To Expr
|
||||
Step Expr // nil for default step 1
|
||||
Body []Stmt
|
||||
NextPos token.Position
|
||||
}
|
||||
|
||||
func (s *ForStmt) Pos() token.Position { return s.ForPos }
|
||||
func (s *ForStmt) End() token.Position { return s.NextPos }
|
||||
func (s *ForStmt) stmtNode() {}
|
||||
|
||||
// ForEachStmt represents FOR EACH var IN collection ... NEXT.
|
||||
// Harbour: HB_ENUMERATOR structure.
|
||||
type ForEachStmt struct {
|
||||
ForPos token.Position
|
||||
Var string
|
||||
Collection Expr
|
||||
Descend bool // FOR EACH DESCEND
|
||||
Body []Stmt
|
||||
NextPos token.Position
|
||||
}
|
||||
|
||||
func (s *ForEachStmt) Pos() token.Position { return s.ForPos }
|
||||
func (s *ForEachStmt) End() token.Position { return s.NextPos }
|
||||
func (s *ForEachStmt) stmtNode() {}
|
||||
|
||||
// SwitchStmt represents SWITCH expr ... CASE ... OTHERWISE ... END.
|
||||
// Harbour: HB_SWITCHCMD structure.
|
||||
type SwitchStmt struct {
|
||||
SwitchPos token.Position
|
||||
Expr Expr
|
||||
Cases []*CaseClause
|
||||
Otherwise []Stmt // nil if no OTHERWISE
|
||||
EndPos token.Position
|
||||
}
|
||||
|
||||
type CaseClause struct {
|
||||
CasePos token.Position
|
||||
Value Expr // case value
|
||||
Body []Stmt
|
||||
}
|
||||
|
||||
func (s *SwitchStmt) Pos() token.Position { return s.SwitchPos }
|
||||
func (s *SwitchStmt) End() token.Position { return s.EndPos }
|
||||
func (s *SwitchStmt) stmtNode() {}
|
||||
|
||||
// SeqStmt represents BEGIN SEQUENCE ... RECOVER [USING var] ... END.
|
||||
type SeqStmt struct {
|
||||
BeginPos token.Position
|
||||
Body []Stmt
|
||||
RecoverVar string // variable name after USING (empty if none)
|
||||
RecoverBody []Stmt // nil if no RECOVER
|
||||
EndPos token.Position
|
||||
}
|
||||
|
||||
func (s *SeqStmt) Pos() token.Position { return s.BeginPos }
|
||||
func (s *SeqStmt) End() token.Position { return s.EndPos }
|
||||
func (s *SeqStmt) stmtNode() {}
|
||||
|
||||
// === Five Go Extensions ===
|
||||
|
||||
// MultiAssignStmt: a, b, c := expr or a, b := Func()
|
||||
// Also handles: a, b := b, a (parallel swap)
|
||||
// Blank identifier _ discards the value.
|
||||
type MultiAssignStmt struct {
|
||||
AssignPos token.Position
|
||||
Targets []string // variable names ("_" = discard)
|
||||
Values []Expr // right-hand side expressions
|
||||
}
|
||||
|
||||
func (s *MultiAssignStmt) Pos() token.Position { return s.AssignPos }
|
||||
func (s *MultiAssignStmt) End() token.Position { return s.AssignPos }
|
||||
func (s *MultiAssignStmt) stmtNode() {}
|
||||
|
||||
// DeferStmt: DEFER expr (execute when function returns)
|
||||
type DeferStmt struct {
|
||||
DeferPos token.Position
|
||||
Call Expr // expression to defer (usually a method/function call)
|
||||
}
|
||||
|
||||
func (s *DeferStmt) Pos() token.Position { return s.DeferPos }
|
||||
func (s *DeferStmt) End() token.Position { return s.DeferPos }
|
||||
func (s *DeferStmt) stmtNode() {}
|
||||
|
||||
// ConstDecl: CONST block with optional auto-increment
|
||||
type ConstDecl struct {
|
||||
ConstPos token.Position
|
||||
Items []ConstItem
|
||||
}
|
||||
|
||||
type ConstItem struct {
|
||||
Name string
|
||||
Value Expr // nil = auto-increment from previous
|
||||
}
|
||||
|
||||
func (d *ConstDecl) Pos() token.Position { return d.ConstPos }
|
||||
func (d *ConstDecl) End() token.Position { return d.ConstPos }
|
||||
func (d *ConstDecl) declNode() {}
|
||||
|
||||
// SliceExpr: a[low:high] — sub-array or sub-string
|
||||
type SliceExpr struct {
|
||||
X Expr
|
||||
LBracket token.Position
|
||||
Low Expr // nil = from start
|
||||
High Expr // nil = to end
|
||||
RBracket token.Position
|
||||
}
|
||||
|
||||
func (e *SliceExpr) Pos() token.Position { return e.X.Pos() }
|
||||
func (e *SliceExpr) End() token.Position { return e.RBracket }
|
||||
func (e *SliceExpr) exprNode() {}
|
||||
|
||||
// NilSafeExpr: obj?:Method() — returns NIL if obj is NIL
|
||||
type NilSafeExpr struct {
|
||||
X Expr
|
||||
QPos token.Position
|
||||
Method string
|
||||
Args []Expr
|
||||
HasParens bool
|
||||
}
|
||||
|
||||
func (e *NilSafeExpr) Pos() token.Position { return e.X.Pos() }
|
||||
func (e *NilSafeExpr) End() token.Position { return e.QPos }
|
||||
func (e *NilSafeExpr) exprNode() {}
|
||||
|
||||
// InterpolatedString: f"Hello {name}, age {age}"
|
||||
type InterpolatedString struct {
|
||||
FPos token.Position
|
||||
Parts []Expr // alternating: LiteralExpr (text), other Expr (interpolated)
|
||||
}
|
||||
|
||||
func (e *InterpolatedString) Pos() token.Position { return e.FPos }
|
||||
func (e *InterpolatedString) End() token.Position { return e.FPos }
|
||||
func (e *InterpolatedString) exprNode() {}
|
||||
|
||||
// === Five Concurrency Extensions ===
|
||||
|
||||
// ChanSendStmt: ch <- value
|
||||
type ChanSendStmt struct {
|
||||
ChanPos token.Position
|
||||
Chan Expr // channel expression
|
||||
Value Expr // value to send
|
||||
}
|
||||
|
||||
func (s *ChanSendStmt) Pos() token.Position { return s.ChanPos }
|
||||
func (s *ChanSendStmt) End() token.Position { return s.ChanPos }
|
||||
func (s *ChanSendStmt) stmtNode() {}
|
||||
|
||||
// ChanRecvExpr: <- ch (receive from channel, used as expression)
|
||||
type ChanRecvExpr struct {
|
||||
ArrowPos token.Position
|
||||
Chan Expr
|
||||
}
|
||||
|
||||
func (e *ChanRecvExpr) Pos() token.Position { return e.ArrowPos }
|
||||
func (e *ChanRecvExpr) End() token.Position { return e.ArrowPos }
|
||||
func (e *ChanRecvExpr) exprNode() {}
|
||||
|
||||
// WatchStmt: WATCH / CASE <- ch / CASE ch <- val / OTHERWISE / ENDWATCH
|
||||
type WatchStmt struct {
|
||||
WatchPos token.Position
|
||||
Cases []*WatchCase
|
||||
Otherwise []Stmt
|
||||
EndPos token.Position
|
||||
}
|
||||
|
||||
type WatchCase struct {
|
||||
CasePos token.Position
|
||||
RecvChan Expr // CASE val := <- ch (receive)
|
||||
RecvVar string // variable name for received value ("" if none)
|
||||
SendChan Expr // CASE ch <- val (send)
|
||||
SendVal Expr // value to send
|
||||
Body []Stmt
|
||||
}
|
||||
|
||||
func (s *WatchStmt) Pos() token.Position { return s.WatchPos }
|
||||
func (s *WatchStmt) End() token.Position { return s.EndPos }
|
||||
func (s *WatchStmt) stmtNode() {}
|
||||
|
||||
// GoBlockStmt: GO { ... } — inline goroutine
|
||||
type GoBlockStmt struct {
|
||||
GoPos token.Position
|
||||
Block *BlockExpr // code block to execute
|
||||
}
|
||||
|
||||
func (s *GoBlockStmt) Pos() token.Position { return s.GoPos }
|
||||
func (s *GoBlockStmt) End() token.Position { return s.GoPos }
|
||||
func (s *GoBlockStmt) stmtNode() {}
|
||||
|
||||
// ParallelForStmt: PARALLEL FOR i := 1 TO n / body / NEXT
|
||||
type ParallelForStmt struct {
|
||||
ForPos token.Position
|
||||
Var string
|
||||
Start Expr
|
||||
To Expr
|
||||
Step Expr // nil = default 1
|
||||
Body []Stmt
|
||||
EndPos token.Position
|
||||
}
|
||||
|
||||
func (s *ParallelForStmt) Pos() token.Position { return s.ForPos }
|
||||
func (s *ParallelForStmt) End() token.Position { return s.EndPos }
|
||||
func (s *ParallelForStmt) stmtNode() {}
|
||||
|
||||
// AsyncExpr: ASYNC expr — returns a future/channel
|
||||
type AsyncExpr struct {
|
||||
AsyncPos token.Position
|
||||
Call Expr
|
||||
}
|
||||
|
||||
func (e *AsyncExpr) Pos() token.Position { return e.AsyncPos }
|
||||
func (e *AsyncExpr) End() token.Position { return e.AsyncPos }
|
||||
func (e *AsyncExpr) exprNode() {}
|
||||
|
||||
// AwaitExpr: AWAIT future — blocks until result ready
|
||||
type AwaitExpr struct {
|
||||
AwaitPos token.Position
|
||||
Future Expr
|
||||
}
|
||||
|
||||
func (e *AwaitExpr) Pos() token.Position { return e.AwaitPos }
|
||||
func (e *AwaitExpr) End() token.Position { return e.AwaitPos }
|
||||
func (e *AwaitExpr) exprNode() {}
|
||||
|
||||
// TimeoutStmt: WITH TIMEOUT n / body / ENDWITH
|
||||
type TimeoutStmt struct {
|
||||
WithPos token.Position
|
||||
Duration Expr // timeout in seconds
|
||||
Body []Stmt
|
||||
EndPos token.Position
|
||||
}
|
||||
|
||||
func (s *TimeoutStmt) Pos() token.Position { return s.WithPos }
|
||||
func (s *TimeoutStmt) End() token.Position { return s.EndPos }
|
||||
func (s *TimeoutStmt) stmtNode() {}
|
||||
|
||||
// === End Five Go Extensions ===
|
||||
|
||||
// ExitStmt represents EXIT (break out of loop).
|
||||
type ExitStmt struct {
|
||||
ExitPos token.Position
|
||||
}
|
||||
|
||||
func (s *ExitStmt) Pos() token.Position { return s.ExitPos }
|
||||
func (s *ExitStmt) End() token.Position { return s.ExitPos }
|
||||
func (s *ExitStmt) stmtNode() {}
|
||||
|
||||
// LoopStmt represents LOOP (continue to next iteration).
|
||||
type LoopStmt struct {
|
||||
LoopPos token.Position
|
||||
}
|
||||
|
||||
func (s *LoopStmt) Pos() token.Position { return s.LoopPos }
|
||||
func (s *LoopStmt) End() token.Position { return s.LoopPos }
|
||||
func (s *LoopStmt) stmtNode() {}
|
||||
|
||||
// --- xBase command statements ---
|
||||
|
||||
// UseCmd represents USE [file] [VIA driver] [ALIAS name] [EXCLUSIVE|SHARED]
|
||||
type UseCmd struct {
|
||||
UsePos token.Position
|
||||
File Expr // filename expression (nil = close current)
|
||||
Via string // RDD driver name
|
||||
Alias string // alias name
|
||||
}
|
||||
|
||||
func (s *UseCmd) Pos() token.Position { return s.UsePos }
|
||||
func (s *UseCmd) End() token.Position { return s.UsePos }
|
||||
func (s *UseCmd) stmtNode() {}
|
||||
|
||||
// SelectCmd represents SELECT area
|
||||
type SelectCmd struct {
|
||||
SelectPos token.Position
|
||||
Area Expr // area number or alias name
|
||||
}
|
||||
|
||||
func (s *SelectCmd) Pos() token.Position { return s.SelectPos }
|
||||
func (s *SelectCmd) End() token.Position { return s.SelectPos }
|
||||
func (s *SelectCmd) stmtNode() {}
|
||||
|
||||
// GoCmd represents GO TOP / GO BOTTOM / GO recno / GOTO recno
|
||||
type GoCmd struct {
|
||||
GoPos token.Position
|
||||
Direction string // "TOP", "BOTTOM", or ""
|
||||
RecNo Expr // record number expression (nil for TOP/BOTTOM)
|
||||
}
|
||||
|
||||
func (s *GoCmd) Pos() token.Position { return s.GoPos }
|
||||
func (s *GoCmd) End() token.Position { return s.GoPos }
|
||||
func (s *GoCmd) stmtNode() {}
|
||||
|
||||
// SkipCmd represents SKIP [n]
|
||||
type SkipCmd struct {
|
||||
SkipPos token.Position
|
||||
Count Expr // nil for SKIP 1
|
||||
}
|
||||
|
||||
func (s *SkipCmd) Pos() token.Position { return s.SkipPos }
|
||||
func (s *SkipCmd) End() token.Position { return s.SkipPos }
|
||||
func (s *SkipCmd) stmtNode() {}
|
||||
|
||||
// SeekCmd represents SEEK expr [SOFTSEEK]
|
||||
type SeekCmd struct {
|
||||
SeekPos token.Position
|
||||
Key Expr
|
||||
SoftSeek bool
|
||||
}
|
||||
|
||||
func (s *SeekCmd) Pos() token.Position { return s.SeekPos }
|
||||
func (s *SeekCmd) End() token.Position { return s.SeekPos }
|
||||
func (s *SeekCmd) stmtNode() {}
|
||||
|
||||
// ReplaceCmd represents REPLACE field WITH expr [, field WITH expr ...]
|
||||
type ReplaceCmd struct {
|
||||
ReplacePos token.Position
|
||||
Fields []ReplaceField
|
||||
}
|
||||
|
||||
type ReplaceField struct {
|
||||
Field Expr // field expression (may include alias)
|
||||
Value Expr
|
||||
}
|
||||
|
||||
func (s *ReplaceCmd) Pos() token.Position { return s.ReplacePos }
|
||||
func (s *ReplaceCmd) End() token.Position { return s.ReplacePos }
|
||||
func (s *ReplaceCmd) stmtNode() {}
|
||||
|
||||
// AppendCmd represents APPEND BLANK
|
||||
type AppendCmd struct {
|
||||
AppendPos token.Position
|
||||
}
|
||||
|
||||
func (s *AppendCmd) Pos() token.Position { return s.AppendPos }
|
||||
func (s *AppendCmd) End() token.Position { return s.AppendPos }
|
||||
func (s *AppendCmd) stmtNode() {}
|
||||
|
||||
// DeleteCmd represents DELETE (mark current record for deletion)
|
||||
type DeleteCmd struct {
|
||||
DeletePos token.Position
|
||||
}
|
||||
|
||||
func (s *DeleteCmd) Pos() token.Position { return s.DeletePos }
|
||||
func (s *DeleteCmd) End() token.Position { return s.DeletePos }
|
||||
func (s *DeleteCmd) stmtNode() {}
|
||||
|
||||
// IndexCmd represents INDEX ON expr TO file [FOR cond] [UNIQUE] [DESCENDING]
|
||||
type IndexCmd struct {
|
||||
IndexPos token.Position
|
||||
KeyExpr Expr
|
||||
File Expr
|
||||
ForCond Expr // nil if no FOR
|
||||
Unique bool
|
||||
Descending bool
|
||||
}
|
||||
|
||||
func (s *IndexCmd) Pos() token.Position { return s.IndexPos }
|
||||
func (s *IndexCmd) End() token.Position { return s.IndexPos }
|
||||
func (s *IndexCmd) stmtNode() {}
|
||||
|
||||
// SetCmd represents SET commands: SET FILTER TO expr, SET RELATION TO expr INTO alias, etc.
|
||||
type SetCmd struct {
|
||||
SetPos token.Position
|
||||
Setting string // "FILTER", "RELATION", "ORDER", "INDEX", etc.
|
||||
Expr Expr // the value expression
|
||||
Extra string // extra info (INTO alias, etc.)
|
||||
}
|
||||
|
||||
func (s *SetCmd) Pos() token.Position { return s.SetPos }
|
||||
func (s *SetCmd) End() token.Position { return s.SetPos }
|
||||
func (s *SetCmd) stmtNode() {}
|
||||
|
||||
// AtSayCmd represents @ row, col SAY expr [PICTURE pic]
|
||||
type AtSayCmd struct {
|
||||
AtPos token.Position
|
||||
Row Expr
|
||||
Col Expr
|
||||
SayExpr Expr
|
||||
Picture Expr // nil if no PICTURE
|
||||
}
|
||||
|
||||
func (s *AtSayCmd) Pos() token.Position { return s.AtPos }
|
||||
func (s *AtSayCmd) End() token.Position { return s.AtPos }
|
||||
func (s *AtSayCmd) stmtNode() {}
|
||||
|
||||
// AtGetCmd represents @ row, col GET var [PICTURE pic] [VALID valid] [WHEN when]
|
||||
type AtGetCmd struct {
|
||||
AtPos token.Position
|
||||
Row Expr
|
||||
Col Expr
|
||||
Var Expr // the variable expression
|
||||
VarName string // variable name as string
|
||||
Picture Expr // nil if no PICTURE
|
||||
Valid Expr // nil if no VALID (code block)
|
||||
When Expr // nil if no WHEN (code block)
|
||||
}
|
||||
|
||||
func (s *AtGetCmd) Pos() token.Position { return s.AtPos }
|
||||
func (s *AtGetCmd) End() token.Position { return s.AtPos }
|
||||
func (s *AtGetCmd) stmtNode() {}
|
||||
|
||||
// AtSayGetCmd represents @ row, col SAY expr GET var [PICTURE pic] [VALID valid] [WHEN when]
|
||||
type AtSayGetCmd struct {
|
||||
AtPos token.Position
|
||||
Row Expr
|
||||
Col Expr
|
||||
SayExpr Expr
|
||||
Var Expr
|
||||
VarName string
|
||||
Picture Expr
|
||||
Valid Expr
|
||||
When Expr
|
||||
}
|
||||
|
||||
func (s *AtSayGetCmd) Pos() token.Position { return s.AtPos }
|
||||
func (s *AtSayGetCmd) End() token.Position { return s.AtPos }
|
||||
func (s *AtSayGetCmd) stmtNode() {}
|
||||
|
||||
// ReadCmd represents READ [SAVE]
|
||||
type ReadCmd struct {
|
||||
ReadPos token.Position
|
||||
Save bool
|
||||
}
|
||||
|
||||
func (s *ReadCmd) Pos() token.Position { return s.ReadPos }
|
||||
func (s *ReadCmd) End() token.Position { return s.ReadPos }
|
||||
func (s *ReadCmd) stmtNode() {}
|
||||
179
compiler/gengo/gen_class.go
Normal file
179
compiler/gengo/gen_class.go
Normal file
@@ -0,0 +1,179 @@
|
||||
// Copyright (c) 2026 Charles KWON OhJun (charleskwonohjun@gmail.com)
|
||||
// All rights reserved.
|
||||
|
||||
// CLASS code generation for Five.
|
||||
// Generates Go code that registers classes with hbrt.ClassDef.
|
||||
package gengo
|
||||
|
||||
import (
|
||||
"five/compiler/ast"
|
||||
"five/compiler/token"
|
||||
"fmt"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// emitClassDecl generates class registration code.
|
||||
// CLASS Person
|
||||
// DATA cName INIT ""
|
||||
// DATA nAge INIT 0
|
||||
// METHOD New(cName, nAge)
|
||||
// ENDCLASS
|
||||
// →
|
||||
// func init() { hbrt.NewClassDef("Person").AddData(...).Register() }
|
||||
func (g *Generator) emitClassDecl(cls *ast.ClassDecl) {
|
||||
className := strings.ToUpper(cls.Name)
|
||||
varName := "_cls_" + className
|
||||
|
||||
g.writeln(fmt.Sprintf("var %s uint16", varName))
|
||||
g.writeln("")
|
||||
g.writeln("func init() {")
|
||||
g.indent++
|
||||
g.writeln(fmt.Sprintf("_def := hbrt.NewClassDef(%q)", cls.Name))
|
||||
|
||||
// Parent
|
||||
if cls.ParentName != "" {
|
||||
g.writeln(fmt.Sprintf("_def.InheritFrom(%q)", cls.ParentName))
|
||||
}
|
||||
|
||||
// DATA fields
|
||||
for _, m := range cls.Members {
|
||||
if dd, ok := m.(*ast.DataDecl); ok {
|
||||
initVal := "hbrt.MakeNil()"
|
||||
if dd.Init != nil {
|
||||
initVal = g.exprToGoLiteral(dd.Init)
|
||||
}
|
||||
g.writeln(fmt.Sprintf("_def.AddData(%q, %s)", strings.ToUpper(dd.Name), initVal))
|
||||
}
|
||||
}
|
||||
|
||||
// METHOD declarations (link to Go functions)
|
||||
for _, m := range cls.Members {
|
||||
if md, ok := m.(*ast.MethodDecl); ok {
|
||||
upperName := strings.ToUpper(md.Name)
|
||||
goFuncName := fmt.Sprintf("HB_%s_%s", className, upperName)
|
||||
|
||||
if md.IsSetGet {
|
||||
// SETGET: register as both getter and setter
|
||||
// Getter = method name, Setter = _name
|
||||
g.writeln(fmt.Sprintf("_def.AddMethod(%q, %s)", upperName, goFuncName))
|
||||
g.writeln(fmt.Sprintf("_def.AddMethod(%q, %s)", "_"+upperName, goFuncName))
|
||||
} else if md.IsAccess {
|
||||
// ACCESS propName METHOD getterName
|
||||
g.writeln(fmt.Sprintf("_def.AddMethod(%q, %s)", strings.ToUpper(md.AccessName), goFuncName))
|
||||
} else if md.IsAssign {
|
||||
// ASSIGN propName METHOD setterName
|
||||
g.writeln(fmt.Sprintf("_def.AddMethod(%q, %s)", "_"+strings.ToUpper(md.AccessName), goFuncName))
|
||||
} else {
|
||||
g.writeln(fmt.Sprintf("_def.AddMethod(%q, %s)", upperName, goFuncName))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
g.writeln(fmt.Sprintf("%s = _def.Register()", varName))
|
||||
g.indent--
|
||||
g.writeln("}")
|
||||
g.writeln("")
|
||||
|
||||
// Also need a constructor function: Person() returns new object
|
||||
// This is called as Person():New(...)
|
||||
g.writeln(fmt.Sprintf("func HB_%s_CTOR(t *hbrt.Thread) {", className))
|
||||
g.indent++
|
||||
g.writeln("t.Frame(0, 0)")
|
||||
g.writeln("defer t.EndProc()")
|
||||
g.writeln(fmt.Sprintf("t.PushValue(hbrt.NewObject(%s))", varName))
|
||||
g.writeln("t.RetValue()")
|
||||
g.indent--
|
||||
g.writeln("}")
|
||||
g.writeln("")
|
||||
|
||||
// Constructor symbol already added in Generate() symbol collection phase
|
||||
}
|
||||
|
||||
// emitMethodDeclStandalone generates a standalone METHOD ... CLASS ... implementation.
|
||||
func (g *Generator) emitMethodDeclStandalone(md *ast.MethodDecl) {
|
||||
if md.ClassName == "" {
|
||||
return // in-class method declaration only (no body)
|
||||
}
|
||||
|
||||
className := strings.ToUpper(md.ClassName)
|
||||
methodName := strings.ToUpper(md.Name)
|
||||
goFuncName := fmt.Sprintf("HB_%s_%s", className, methodName)
|
||||
|
||||
nParams := len(md.Params)
|
||||
nLocals := 0
|
||||
for _, d := range md.Decls {
|
||||
if vd, ok := d.(*ast.VarDecl); ok {
|
||||
nLocals += len(vd.Vars)
|
||||
}
|
||||
}
|
||||
|
||||
g.writeln(fmt.Sprintf("func %s(t *hbrt.Thread) {", goFuncName))
|
||||
g.indent++
|
||||
g.writeln(fmt.Sprintf("t.Frame(%d, %d)", nParams, nLocals))
|
||||
g.writeln("defer t.EndProc()")
|
||||
g.writeln("")
|
||||
|
||||
// Build local map
|
||||
localMap := make(localMap)
|
||||
idx := 1
|
||||
for _, p := range md.Params {
|
||||
localMap[p.Name] = idx
|
||||
idx++
|
||||
}
|
||||
for _, d := range md.Decls {
|
||||
if vd, ok := d.(*ast.VarDecl); ok {
|
||||
for _, v := range vd.Vars {
|
||||
if v.Init != nil {
|
||||
g.emitExpr(v.Init)
|
||||
g.writeln(fmt.Sprintf("t.PopLocal(%d)", idx))
|
||||
}
|
||||
localMap[v.Name] = idx
|
||||
idx++
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
g.curLocals = localMap
|
||||
|
||||
// Emit body
|
||||
for _, stmt := range md.Body {
|
||||
g.emitStmt(stmt, localMap)
|
||||
}
|
||||
|
||||
g.indent--
|
||||
g.writeln("}")
|
||||
g.writeln("")
|
||||
}
|
||||
|
||||
// exprToGoLiteral converts a simple AST expression to a Go literal string.
|
||||
// Used for DATA INIT values.
|
||||
func (g *Generator) exprToGoLiteral(expr ast.Expr) string {
|
||||
switch e := expr.(type) {
|
||||
case *ast.LiteralExpr:
|
||||
switch e.Kind {
|
||||
case token.INT:
|
||||
return fmt.Sprintf("hbrt.MakeInt(%s)", e.Value)
|
||||
case token.DOUBLE:
|
||||
return fmt.Sprintf("hbrt.MakeDoubleAuto(%s)", e.Value)
|
||||
case token.STRING:
|
||||
return fmt.Sprintf("hbrt.MakeString(%q)", e.Value)
|
||||
case token.TRUE:
|
||||
return "hbrt.MakeBool(true)"
|
||||
case token.FALSE:
|
||||
return "hbrt.MakeBool(false)"
|
||||
case token.NIL_LIT:
|
||||
return "hbrt.MakeNil()"
|
||||
}
|
||||
case *ast.ArrayLitExpr:
|
||||
// {} empty array or {1,2,3}
|
||||
if len(e.Items) == 0 {
|
||||
return "hbrt.MakeArray(0)"
|
||||
}
|
||||
// Non-empty arrays need runtime construction — fall through to nil
|
||||
case *ast.HashLitExpr:
|
||||
if len(e.Keys) == 0 {
|
||||
return "hbrt.MakeHash()"
|
||||
}
|
||||
}
|
||||
return "hbrt.MakeNil()"
|
||||
}
|
||||
312
compiler/gengo/gen_cmd.go
Normal file
312
compiler/gengo/gen_cmd.go
Normal file
@@ -0,0 +1,312 @@
|
||||
// Copyright (c) 2026 Charles KWON OhJun (charleskwonohjun@gmail.com)
|
||||
// All rights reserved.
|
||||
|
||||
// xBase command code generation for Five.
|
||||
// Generates Go code that calls hbrdd WorkAreaManager methods.
|
||||
package gengo
|
||||
|
||||
import (
|
||||
"five/compiler/ast"
|
||||
"five/compiler/token"
|
||||
"fmt"
|
||||
"strings"
|
||||
)
|
||||
|
||||
func (g *Generator) emitUseCmd(s *ast.UseCmd, locals localMap) {
|
||||
if s.File == nil {
|
||||
// USE without args = close current
|
||||
g.writeln("{")
|
||||
g.indent++
|
||||
g.writeln("wa := t.WA.(*hbrdd.WorkAreaManager)")
|
||||
g.writeln("wa.Close()")
|
||||
g.indent--
|
||||
g.writeln("}")
|
||||
return
|
||||
}
|
||||
|
||||
g.writeln("{")
|
||||
g.indent++
|
||||
g.writeln("wa := t.WA.(*hbrdd.WorkAreaManager)")
|
||||
g.emitExpr(s.File)
|
||||
g.writeln("_path := t.Pop2().AsString()")
|
||||
|
||||
via := "DBFNTX" // default
|
||||
if s.Via != "" {
|
||||
via = s.Via
|
||||
}
|
||||
alias := s.Alias
|
||||
|
||||
g.writeln(fmt.Sprintf("_, _err := wa.Open(%q, _path, %q, false, false)", via, alias))
|
||||
g.writeln("if _err != nil { panic(_err) }")
|
||||
g.indent--
|
||||
g.writeln("}")
|
||||
}
|
||||
|
||||
func (g *Generator) emitGoCmd(s *ast.GoCmd) {
|
||||
g.writeln("{")
|
||||
g.indent++
|
||||
g.writeln("wa := t.WA.(*hbrdd.WorkAreaManager)")
|
||||
g.writeln("if area := wa.Current(); area != nil {")
|
||||
g.indent++
|
||||
|
||||
switch s.Direction {
|
||||
case "TOP":
|
||||
g.writeln("area.GoTop()")
|
||||
case "BOTTOM":
|
||||
g.writeln("area.GoBottom()")
|
||||
default:
|
||||
if s.RecNo != nil {
|
||||
g.emitExpr(s.RecNo)
|
||||
g.writeln("area.GoTo(uint32(t.Pop2().AsNumInt()))")
|
||||
}
|
||||
}
|
||||
|
||||
g.indent--
|
||||
g.writeln("}")
|
||||
g.indent--
|
||||
g.writeln("}")
|
||||
}
|
||||
|
||||
func (g *Generator) emitSkipCmd(s *ast.SkipCmd, locals localMap) {
|
||||
g.writeln("{")
|
||||
g.indent++
|
||||
g.writeln("wa := t.WA.(*hbrdd.WorkAreaManager)")
|
||||
g.writeln("if area := wa.Current(); area != nil {")
|
||||
g.indent++
|
||||
|
||||
if s.Count != nil {
|
||||
g.emitExpr(s.Count)
|
||||
g.writeln("area.Skip(t.Pop2().AsNumInt())")
|
||||
} else {
|
||||
g.writeln("area.Skip(1)")
|
||||
}
|
||||
|
||||
g.indent--
|
||||
g.writeln("}")
|
||||
g.indent--
|
||||
g.writeln("}")
|
||||
}
|
||||
|
||||
func (g *Generator) emitSeekCmd(s *ast.SeekCmd, locals localMap) {
|
||||
g.writeln("{")
|
||||
g.indent++
|
||||
g.writeln("wa := t.WA.(*hbrdd.WorkAreaManager)")
|
||||
g.writeln("if area := wa.Current(); area != nil {")
|
||||
g.indent++
|
||||
|
||||
g.emitExpr(s.Key)
|
||||
g.writeln("_key := t.Pop2()")
|
||||
g.writeln("if _idx, ok := area.(hbrdd.Indexer); ok {")
|
||||
g.indent++
|
||||
g.writeln(fmt.Sprintf("_found, _ := _idx.Seek(_key, %v, false)", s.SoftSeek))
|
||||
g.writeln("_ = _found")
|
||||
g.indent--
|
||||
g.writeln("}")
|
||||
|
||||
g.indent--
|
||||
g.writeln("}")
|
||||
g.indent--
|
||||
g.writeln("}")
|
||||
}
|
||||
|
||||
func (g *Generator) emitReplaceCmd(s *ast.ReplaceCmd, locals localMap) {
|
||||
g.writeln("{")
|
||||
g.indent++
|
||||
g.writeln("wa := t.WA.(*hbrdd.WorkAreaManager)")
|
||||
g.writeln("if area := wa.Current(); area != nil {")
|
||||
g.indent++
|
||||
|
||||
for _, rf := range s.Fields {
|
||||
// Get field name
|
||||
if ident, ok := rf.Field.(*ast.IdentExpr); ok {
|
||||
g.writeln(fmt.Sprintf("if _fi := area.(*dbf.DBFArea).FieldIndex(%q); _fi >= 0 {", ident.Name))
|
||||
g.indent++
|
||||
g.emitExpr(rf.Value)
|
||||
g.writeln(fmt.Sprintf("area.PutValue(_fi, t.Pop2())"))
|
||||
g.indent--
|
||||
g.writeln("}")
|
||||
}
|
||||
}
|
||||
g.writeln("area.Flush()")
|
||||
|
||||
g.indent--
|
||||
g.writeln("}")
|
||||
g.indent--
|
||||
g.writeln("}")
|
||||
}
|
||||
|
||||
// --- @ SAY / GET / READ commands ---
|
||||
|
||||
func (g *Generator) emitAtSayCmd(s *ast.AtSayCmd) {
|
||||
// DevPos(row, col)
|
||||
g.writeln(fmt.Sprintf("t.PushSymbol(t.VM().FindSymbol(\"DEVPOS\"))"))
|
||||
g.writeln("t.PushNil()")
|
||||
g.emitExpr(s.Row)
|
||||
g.emitExpr(s.Col)
|
||||
g.writeln("t.Do(2)")
|
||||
|
||||
if s.Picture != nil {
|
||||
// DevOutPict(expr, pic)
|
||||
g.writeln(fmt.Sprintf("t.PushSymbol(t.VM().FindSymbol(\"DEVOUTPICT\"))"))
|
||||
g.writeln("t.PushNil()")
|
||||
g.emitExpr(s.SayExpr)
|
||||
g.emitExpr(s.Picture)
|
||||
g.writeln("t.Do(2)")
|
||||
} else {
|
||||
// DevOut(expr)
|
||||
g.writeln(fmt.Sprintf("t.PushSymbol(t.VM().FindSymbol(\"DEVOUT\"))"))
|
||||
g.writeln("t.PushNil()")
|
||||
g.emitExpr(s.SayExpr)
|
||||
g.writeln("t.Do(1)")
|
||||
}
|
||||
}
|
||||
|
||||
func (g *Generator) emitAtGetCmd(s *ast.AtGetCmd, locals localMap) {
|
||||
// AAdd(GetList, GetNew(row, col, {|_1| IIF(_1==NIL, var, var:=_1)}, "varname" [, pic] [, {valid}] [, {when}]))
|
||||
g.writeln(fmt.Sprintf("t.PushSymbol(t.VM().FindSymbol(\"AADD\"))"))
|
||||
g.writeln("t.PushNil()")
|
||||
|
||||
// Push GetList variable
|
||||
g.emitIdentByName("GetList", locals)
|
||||
|
||||
// GetNew(row, col, block, name, ...)
|
||||
g.writeln(fmt.Sprintf("t.PushSymbol(t.VM().FindSymbol(\"GETNEW\"))"))
|
||||
g.writeln("t.PushNil()")
|
||||
g.emitExpr(s.Row)
|
||||
g.emitExpr(s.Col)
|
||||
|
||||
// GET/SET block: {|_1| IIF(_1 == NIL, var, var := _1)}
|
||||
g.emitGetSetBlock(s.Var, s.VarName, locals)
|
||||
|
||||
// Variable name as string
|
||||
g.writeln(fmt.Sprintf("t.PushString(%q)", s.VarName))
|
||||
|
||||
nArgs := 4
|
||||
if s.Picture != nil {
|
||||
g.emitExpr(s.Picture)
|
||||
nArgs++
|
||||
}
|
||||
if s.Valid != nil {
|
||||
if s.Picture == nil {
|
||||
g.writeln("t.PushNil()") // placeholder for pic
|
||||
nArgs++
|
||||
}
|
||||
g.emitExpr(s.Valid)
|
||||
nArgs++
|
||||
}
|
||||
if s.When != nil {
|
||||
if s.Picture == nil && s.Valid == nil {
|
||||
g.writeln("t.PushNil()") // placeholder for pic
|
||||
g.writeln("t.PushNil()") // placeholder for valid
|
||||
nArgs += 2
|
||||
} else if s.Valid == nil {
|
||||
g.writeln("t.PushNil()") // placeholder for valid
|
||||
nArgs++
|
||||
}
|
||||
g.emitExpr(s.When)
|
||||
nArgs++
|
||||
}
|
||||
|
||||
g.writeln(fmt.Sprintf("t.Function(%d)", nArgs))
|
||||
|
||||
// AAdd(GetList, getObj) — 2 args
|
||||
g.writeln("t.Do(2)")
|
||||
|
||||
// ATail(GetList):Display()
|
||||
g.writeln(fmt.Sprintf("t.PushSymbol(t.VM().FindSymbol(\"ATAIL\"))"))
|
||||
g.writeln("t.PushNil()")
|
||||
g.emitIdentByName("GetList", locals)
|
||||
g.writeln("t.Function(1)")
|
||||
g.writeln(fmt.Sprintf("t.Send(\"DISPLAY\", 0)"))
|
||||
g.writeln("t.Pop() // discard Display result")
|
||||
}
|
||||
|
||||
func (g *Generator) emitAtSayGetCmd(s *ast.AtSayGetCmd, locals localMap) {
|
||||
// First: @ row, col SAY expr
|
||||
g.emitAtSayCmd(&ast.AtSayCmd{AtPos: s.AtPos, Row: s.Row, Col: s.Col, SayExpr: s.SayExpr})
|
||||
|
||||
// Then: @ Row(), Col()+1 GET var ...
|
||||
g.emitAtGetCmd(&ast.AtGetCmd{
|
||||
AtPos: s.AtPos,
|
||||
Row: &ast.CallExpr{Func: &ast.IdentExpr{Name: "Row"}, Args: nil},
|
||||
Col: &ast.BinaryExpr{Left: &ast.CallExpr{Func: &ast.IdentExpr{Name: "Col"}, Args: nil}, Op: token.PLUS, Right: &ast.LiteralExpr{Kind: token.INT, Value: "1"}}, // Col()+1
|
||||
Var: s.Var,
|
||||
VarName: s.VarName,
|
||||
Picture: s.Picture,
|
||||
Valid: s.Valid,
|
||||
When: s.When,
|
||||
}, locals)
|
||||
}
|
||||
|
||||
func (g *Generator) emitReadCmd(s *ast.ReadCmd, locals localMap) {
|
||||
// ReadModal(GetList)
|
||||
g.writeln(fmt.Sprintf("t.PushSymbol(t.VM().FindSymbol(\"READMODAL\"))"))
|
||||
g.writeln("t.PushNil()")
|
||||
g.emitIdentByName("GetList", locals)
|
||||
g.writeln("t.Do(1)")
|
||||
|
||||
if !s.Save {
|
||||
// GetList := {}
|
||||
g.writeln("t.PushValue(hbrt.MakeArray(0))")
|
||||
g.emitPopByName("GetList", locals)
|
||||
}
|
||||
}
|
||||
|
||||
// emitGetSetBlock generates a {|_1| IIF(_1 == NIL, var, var := _1)} code block.
|
||||
// Uses captured frame base + local index to access the outer variable correctly
|
||||
// even when the block is called from a different call depth (e.g., Eval inside GetNew).
|
||||
func (g *Generator) emitGetSetBlock(varExpr ast.Expr, varName string, locals localMap) {
|
||||
if idx, found := locals[varName]; found {
|
||||
// Capture the frame's localBase and index at block creation time
|
||||
g.writeln(fmt.Sprintf("{ // GET/SET block for %s", varName))
|
||||
g.indent++
|
||||
g.writeln(fmt.Sprintf("_getIdx := %d", idx))
|
||||
g.writeln("_getFrame := t.CurFrame()")
|
||||
g.writeln("_getLocals := t.LocalsSlice()")
|
||||
g.writeln("t.PushBlock(func(t2 *hbrt.Thread) {")
|
||||
g.indent++
|
||||
g.writeln("t2.Frame(1, 0)")
|
||||
g.writeln("defer t2.EndProc()")
|
||||
g.writeln("if t2.Local(1).IsNil() {")
|
||||
g.indent++
|
||||
g.writeln("t2.PushValue(_getFrame.GetLocal(_getIdx, _getLocals))")
|
||||
g.writeln("t2.RetValue()")
|
||||
g.indent--
|
||||
g.writeln("} else {")
|
||||
g.indent++
|
||||
g.writeln("_getFrame.SetLocal(_getIdx, t2.Local(1), _getLocals)")
|
||||
g.writeln("t2.PushValue(t2.Local(1))")
|
||||
g.writeln("t2.RetValue()")
|
||||
g.indent--
|
||||
g.writeln("}")
|
||||
g.indent--
|
||||
g.writeln("}, 0)")
|
||||
g.indent--
|
||||
g.writeln("}")
|
||||
} else {
|
||||
// Fallback: push NIL block
|
||||
g.writeln("t.PushNil() // GET block for unresolved var")
|
||||
}
|
||||
}
|
||||
|
||||
// emitIdentByName pushes a variable by name onto the stack
|
||||
func (g *Generator) emitIdentByName(name string, locals localMap) {
|
||||
if idx, found := locals[name]; found {
|
||||
g.writeln(fmt.Sprintf("t.PushLocal(%d)", idx))
|
||||
} else if goVar, found := g.staticVars[strings.ToUpper(name)]; found {
|
||||
g.writeln(fmt.Sprintf("t.PushValue(%s)", goVar))
|
||||
} else {
|
||||
g.writeln(fmt.Sprintf("t.PushLocal(0) // UNRESOLVED: %q", name))
|
||||
}
|
||||
}
|
||||
|
||||
// emitPopByName pops stack into a variable by name
|
||||
func (g *Generator) emitPopByName(name string, locals localMap) {
|
||||
if idx, found := locals[name]; found {
|
||||
g.writeln(fmt.Sprintf("t.PopLocal(%d)", idx))
|
||||
} else if goVar, found := g.staticVars[strings.ToUpper(name)]; found {
|
||||
g.writeln(fmt.Sprintf("%s = t.Pop2()", goVar))
|
||||
} else {
|
||||
g.writeln(fmt.Sprintf("t.Pop() // cannot assign to UNRESOLVED: %q", name))
|
||||
}
|
||||
}
|
||||
25
compiler/gengo/gen_util.go
Normal file
25
compiler/gengo/gen_util.go
Normal file
@@ -0,0 +1,25 @@
|
||||
// Copyright (c) 2026 Charles KWON OhJun (charleskwonohjun@gmail.com)
|
||||
// All rights reserved.
|
||||
|
||||
package gengo
|
||||
|
||||
import "five/compiler/ast"
|
||||
|
||||
// hasXBaseCommands checks if the file contains any xBase commands.
|
||||
func hasXBaseCommands(file *ast.File) bool {
|
||||
for _, d := range file.Decls {
|
||||
fn, ok := d.(*ast.FuncDecl)
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
for _, s := range fn.Body {
|
||||
switch s.(type) {
|
||||
case *ast.UseCmd, *ast.GoCmd, *ast.SkipCmd, *ast.SeekCmd,
|
||||
*ast.ReplaceCmd, *ast.AppendCmd, *ast.DeleteCmd,
|
||||
*ast.SelectCmd, *ast.IndexCmd, *ast.SetCmd:
|
||||
return true
|
||||
}
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
1610
compiler/gengo/gengo.go
Normal file
1610
compiler/gengo/gengo.go
Normal file
File diff suppressed because it is too large
Load Diff
156
compiler/gengo/gengo_test.go
Normal file
156
compiler/gengo/gengo_test.go
Normal file
@@ -0,0 +1,156 @@
|
||||
// Copyright (c) 2026 Charles KWON OhJun (charleskwonohjun@gmail.com)
|
||||
// All rights reserved.
|
||||
|
||||
package gengo
|
||||
|
||||
import (
|
||||
"five/compiler/parser"
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func generate(t *testing.T, source string) string {
|
||||
t.Helper()
|
||||
file, errs := parser.Parse("test.prg", source)
|
||||
if len(errs) > 0 {
|
||||
for _, e := range errs {
|
||||
t.Errorf("parse error: %s", e)
|
||||
}
|
||||
t.FailNow()
|
||||
}
|
||||
return Generate(file)
|
||||
}
|
||||
|
||||
func assertContains(t *testing.T, code, want string) {
|
||||
t.Helper()
|
||||
if !strings.Contains(code, want) {
|
||||
t.Errorf("generated code missing %q\n--- code ---\n%s", want, code)
|
||||
}
|
||||
}
|
||||
|
||||
func TestGenerateHelloWorld(t *testing.T) {
|
||||
code := generate(t, `FUNCTION Main()
|
||||
? "Hello, World!"
|
||||
RETURN NIL
|
||||
`)
|
||||
assertContains(t, code, "package main")
|
||||
assertContains(t, code, `import (`)
|
||||
assertContains(t, code, `"five/hbrt"`)
|
||||
assertContains(t, code, "func HB_MAIN(t *hbrt.Thread)")
|
||||
assertContains(t, code, "t.Frame(0, 0)")
|
||||
assertContains(t, code, "defer t.EndProc()")
|
||||
assertContains(t, code, `t.PushString("Hello, World!")`)
|
||||
assertContains(t, code, "t.Function(1)")
|
||||
assertContains(t, code, "t.PushNil()")
|
||||
assertContains(t, code, "t.RetValue()")
|
||||
assertContains(t, code, "func main()")
|
||||
assertContains(t, code, `vm.Run("MAIN")`)
|
||||
}
|
||||
|
||||
func TestGenerateArithmetic(t *testing.T) {
|
||||
code := generate(t, `FUNCTION Main()
|
||||
LOCAL n := 10
|
||||
RETURN n + 5
|
||||
`)
|
||||
assertContains(t, code, "t.Frame(0, 1)")
|
||||
assertContains(t, code, "t.PushInt(10)")
|
||||
assertContains(t, code, "t.PopLocal(1)")
|
||||
assertContains(t, code, "t.PushLocal(1)") // n
|
||||
assertContains(t, code, "t.PushInt(5)")
|
||||
assertContains(t, code, "t.Plus()")
|
||||
assertContains(t, code, "t.RetValue()")
|
||||
}
|
||||
|
||||
func TestGenerateIfElse(t *testing.T) {
|
||||
code := generate(t, `FUNCTION Main()
|
||||
LOCAL n := 10
|
||||
IF n > 5
|
||||
? "Big"
|
||||
ELSE
|
||||
? "Small"
|
||||
ENDIF
|
||||
RETURN NIL
|
||||
`)
|
||||
assertContains(t, code, "t.Greater()")
|
||||
assertContains(t, code, "if t.PopLogical()")
|
||||
assertContains(t, code, `t.PushString("Big")`)
|
||||
assertContains(t, code, "} else {")
|
||||
assertContains(t, code, `t.PushString("Small")`)
|
||||
}
|
||||
|
||||
func TestGenerateDoWhile(t *testing.T) {
|
||||
code := generate(t, `FUNCTION Main()
|
||||
LOCAL i := 0
|
||||
DO WHILE i < 10
|
||||
i++
|
||||
ENDDO
|
||||
RETURN i
|
||||
`)
|
||||
assertContains(t, code, "for {")
|
||||
assertContains(t, code, "t.Less()")
|
||||
assertContains(t, code, "if !t.PopLogical() { break }")
|
||||
assertContains(t, code, "t.LocalAddInt(1, 1)") // i++
|
||||
}
|
||||
|
||||
func TestGenerateForNext(t *testing.T) {
|
||||
code := generate(t, `FUNCTION Main()
|
||||
LOCAL i, nSum := 0
|
||||
FOR i := 1 TO 10
|
||||
nSum += i
|
||||
NEXT
|
||||
RETURN nSum
|
||||
`)
|
||||
assertContains(t, code, "t.Frame(0, 2)")
|
||||
assertContains(t, code, "for {")
|
||||
assertContains(t, code, "t.LessEqual()")
|
||||
assertContains(t, code, "t.LocalAdd(") // nSum += i
|
||||
assertContains(t, code, "t.LocalAddInt(") // i += 1
|
||||
}
|
||||
|
||||
func TestGenerateMultipleFunctions(t *testing.T) {
|
||||
code := generate(t, `FUNCTION Double(n)
|
||||
RETURN n * 2
|
||||
|
||||
FUNCTION Main()
|
||||
? Double(21)
|
||||
RETURN NIL
|
||||
`)
|
||||
assertContains(t, code, "func HB_DOUBLE(t *hbrt.Thread)")
|
||||
assertContains(t, code, "func HB_MAIN(t *hbrt.Thread)")
|
||||
assertContains(t, code, "t.Frame(1, 0)") // Double has 1 param
|
||||
assertContains(t, code, "t.Mult()")
|
||||
assertContains(t, code, `t.PushSymbol(t.VM().FindSymbol("DOUBLE"))`)
|
||||
}
|
||||
|
||||
func TestGenerateStringConcat(t *testing.T) {
|
||||
code := generate(t, `FUNCTION Main()
|
||||
LOCAL cName := "World"
|
||||
? "Hello, " + cName + "!"
|
||||
RETURN NIL
|
||||
`)
|
||||
assertContains(t, code, `t.PushString("Hello, ")`)
|
||||
assertContains(t, code, "t.PushLocal(1)")
|
||||
assertContains(t, code, "t.Plus()")
|
||||
assertContains(t, code, `t.PushString("!")`)
|
||||
}
|
||||
|
||||
func TestGenerateSymbolTable(t *testing.T) {
|
||||
code := generate(t, `FUNCTION Main()
|
||||
RETURN NIL
|
||||
|
||||
FUNCTION Helper()
|
||||
RETURN NIL
|
||||
`)
|
||||
assertContains(t, code, `hbrt.Sym("MAIN"`)
|
||||
assertContains(t, code, `hbrt.Sym("HELPER"`)
|
||||
assertContains(t, code, "hbrt.FsFirst")
|
||||
}
|
||||
|
||||
func TestGenerateImport(t *testing.T) {
|
||||
code := generate(t, `IMPORT "net/http"
|
||||
|
||||
FUNCTION Main()
|
||||
RETURN NIL
|
||||
`)
|
||||
assertContains(t, code, `"net/http"`)
|
||||
}
|
||||
555
compiler/genpc/genpc.go
Normal file
555
compiler/genpc/genpc.go
Normal file
@@ -0,0 +1,555 @@
|
||||
// Copyright (c) 2026 Charles KWON OhJun (charleskwonohjun@gmail.com)
|
||||
// All rights reserved.
|
||||
|
||||
// genpc — Five pcode generator. Compiles AST to bytecode for FRB interpreter mode.
|
||||
// Mirrors gengo's logic but emits bytecode opcodes instead of Go source code.
|
||||
|
||||
package genpc
|
||||
|
||||
import (
|
||||
"encoding/binary"
|
||||
"five/compiler/ast"
|
||||
"five/compiler/token"
|
||||
"five/hbrt"
|
||||
"math"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// Generate compiles an AST file to a PcodeModule.
|
||||
func Generate(file *ast.File) *hbrt.PcodeModule {
|
||||
g := &generator{
|
||||
mod: &hbrt.PcodeModule{
|
||||
Name: file.Name,
|
||||
Funcs: make(map[string]*hbrt.PcodeFunc),
|
||||
},
|
||||
}
|
||||
|
||||
for _, d := range file.Decls {
|
||||
switch decl := d.(type) {
|
||||
case *ast.FuncDecl:
|
||||
g.emitFunc(decl)
|
||||
}
|
||||
}
|
||||
|
||||
return g.mod
|
||||
}
|
||||
|
||||
type generator struct {
|
||||
mod *hbrt.PcodeModule
|
||||
code []byte
|
||||
locals map[string]int
|
||||
}
|
||||
|
||||
func (g *generator) emit(b ...byte) {
|
||||
g.code = append(g.code, b...)
|
||||
}
|
||||
|
||||
func (g *generator) emitU16(v uint16) {
|
||||
var buf [2]byte
|
||||
binary.LittleEndian.PutUint16(buf[:], v)
|
||||
g.code = append(g.code, buf[:]...)
|
||||
}
|
||||
|
||||
func (g *generator) emitI32(v int32) {
|
||||
var buf [4]byte
|
||||
binary.LittleEndian.PutUint32(buf[:], uint32(v))
|
||||
g.code = append(g.code, buf[:]...)
|
||||
}
|
||||
|
||||
func (g *generator) emitI64(v int64) {
|
||||
var buf [8]byte
|
||||
binary.LittleEndian.PutUint64(buf[:], uint64(v))
|
||||
g.code = append(g.code, buf[:]...)
|
||||
}
|
||||
|
||||
func (g *generator) emitF64(v float64) {
|
||||
var buf [8]byte
|
||||
binary.LittleEndian.PutUint64(buf[:], math.Float64bits(v))
|
||||
g.code = append(g.code, buf[:]...)
|
||||
}
|
||||
|
||||
func (g *generator) emitString(op byte, s string) {
|
||||
g.emit(op)
|
||||
g.emitU16(uint16(len(s)))
|
||||
g.code = append(g.code, []byte(s)...)
|
||||
}
|
||||
|
||||
func (g *generator) pc() int {
|
||||
return len(g.code)
|
||||
}
|
||||
|
||||
// placeholder for jump offset, returns position to patch
|
||||
func (g *generator) emitJumpPlaceholder(op byte) int {
|
||||
g.emit(op)
|
||||
pos := g.pc()
|
||||
g.emitI32(0) // placeholder
|
||||
return pos
|
||||
}
|
||||
|
||||
func (g *generator) patchJump(pos int) {
|
||||
offset := int32(g.pc() - pos - 4) // relative to after the offset bytes
|
||||
binary.LittleEndian.PutUint32(g.code[pos:], uint32(offset))
|
||||
}
|
||||
|
||||
// --- Function ---
|
||||
|
||||
func (g *generator) emitFunc(fn *ast.FuncDecl) {
|
||||
g.code = nil
|
||||
g.locals = make(map[string]int)
|
||||
|
||||
// Build local map
|
||||
idx := 1
|
||||
for _, p := range fn.Params {
|
||||
g.locals[p.Name] = idx
|
||||
idx++
|
||||
}
|
||||
for _, d := range fn.Decls {
|
||||
if vd, ok := d.(*ast.VarDecl); ok && vd.Scope == ast.ScopeLocal {
|
||||
for _, v := range vd.Vars {
|
||||
g.locals[v.Name] = idx
|
||||
idx++
|
||||
}
|
||||
}
|
||||
}
|
||||
for _, s := range fn.Body {
|
||||
if vd, ok := s.(*ast.VarDecl); ok && vd.Scope == ast.ScopeLocal {
|
||||
for _, v := range vd.Vars {
|
||||
g.locals[v.Name] = idx
|
||||
idx++
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
nLocals := idx - 1 - len(fn.Params)
|
||||
|
||||
// Emit LOCAL initializers
|
||||
localIdx := len(fn.Params) + 1
|
||||
for _, d := range fn.Decls {
|
||||
vd, ok := d.(*ast.VarDecl)
|
||||
if !ok || vd.Scope != ast.ScopeLocal {
|
||||
continue
|
||||
}
|
||||
for _, v := range vd.Vars {
|
||||
if v.Init != nil {
|
||||
g.emitExpr(v.Init)
|
||||
g.emit(hbrt.PcOpPopLocal)
|
||||
g.emitU16(uint16(localIdx))
|
||||
}
|
||||
localIdx++
|
||||
}
|
||||
}
|
||||
|
||||
// Emit body
|
||||
for _, s := range fn.Body {
|
||||
g.emitStmt(s)
|
||||
}
|
||||
|
||||
// Implicit return NIL
|
||||
g.emit(hbrt.PcOpPushNil)
|
||||
g.emit(hbrt.PcOpRetValue)
|
||||
|
||||
pf := &hbrt.PcodeFunc{
|
||||
Name: fn.Name,
|
||||
Code: make([]byte, len(g.code)),
|
||||
Params: len(fn.Params),
|
||||
Locals: nLocals,
|
||||
}
|
||||
copy(pf.Code, g.code)
|
||||
g.mod.Funcs[strings.ToUpper(fn.Name)] = pf
|
||||
}
|
||||
|
||||
// --- Statements ---
|
||||
|
||||
func (g *generator) emitStmt(stmt ast.Stmt) {
|
||||
switch s := stmt.(type) {
|
||||
case *ast.ReturnStmt:
|
||||
if s.Value != nil {
|
||||
g.emitExpr(s.Value)
|
||||
g.emit(hbrt.PcOpRetValue)
|
||||
} else {
|
||||
g.emit(hbrt.PcOpPushNil)
|
||||
g.emit(hbrt.PcOpRetValue)
|
||||
}
|
||||
|
||||
case *ast.ExprStmt:
|
||||
if assign, ok := s.X.(*ast.AssignExpr); ok {
|
||||
g.emitAssign(assign)
|
||||
} else if call, ok := s.X.(*ast.CallExpr); ok {
|
||||
g.emitCallStmt(call)
|
||||
} else {
|
||||
g.emitExpr(s.X)
|
||||
g.emit(hbrt.PcOpPop)
|
||||
}
|
||||
|
||||
case *ast.IfStmt:
|
||||
g.emitIf(s)
|
||||
|
||||
case *ast.DoWhileStmt:
|
||||
g.emitDoWhile(s)
|
||||
|
||||
case *ast.ForStmt:
|
||||
g.emitFor(s)
|
||||
|
||||
case *ast.ExitStmt:
|
||||
// handled by loop
|
||||
g.emit(hbrt.PcOpHalt) // placeholder
|
||||
|
||||
case *ast.QOutStmt:
|
||||
g.emitQOut(s)
|
||||
|
||||
case *ast.VarDecl:
|
||||
// Mid-function LOCAL
|
||||
for _, v := range s.Vars {
|
||||
if v.Init != nil {
|
||||
g.emitExpr(v.Init)
|
||||
if idx, ok := g.locals[v.Name]; ok {
|
||||
g.emit(hbrt.PcOpPopLocal)
|
||||
g.emitU16(uint16(idx))
|
||||
} else {
|
||||
g.emit(hbrt.PcOpPop)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
default:
|
||||
// Unsupported statement — skip
|
||||
}
|
||||
}
|
||||
|
||||
func (g *generator) emitIf(s *ast.IfStmt) {
|
||||
g.emitExpr(s.Cond)
|
||||
jumpFalse := g.emitJumpPlaceholder(hbrt.PcOpJumpFalse)
|
||||
|
||||
for _, stmt := range s.Body {
|
||||
g.emitStmt(stmt)
|
||||
}
|
||||
|
||||
if len(s.ElseIfs) > 0 || len(s.ElseBody) > 0 {
|
||||
jumpEnd := g.emitJumpPlaceholder(hbrt.PcOpJump)
|
||||
g.patchJump(jumpFalse)
|
||||
|
||||
for _, elif := range s.ElseIfs {
|
||||
g.emitExpr(elif.Cond)
|
||||
nextJump := g.emitJumpPlaceholder(hbrt.PcOpJumpFalse)
|
||||
for _, stmt := range elif.Body {
|
||||
g.emitStmt(stmt)
|
||||
}
|
||||
jumpEnd2 := g.emitJumpPlaceholder(hbrt.PcOpJump)
|
||||
g.patchJump(nextJump)
|
||||
_ = jumpEnd2 // will be patched by end
|
||||
}
|
||||
|
||||
for _, stmt := range s.ElseBody {
|
||||
g.emitStmt(stmt)
|
||||
}
|
||||
g.patchJump(jumpEnd)
|
||||
} else {
|
||||
g.patchJump(jumpFalse)
|
||||
}
|
||||
}
|
||||
|
||||
func (g *generator) emitDoWhile(s *ast.DoWhileStmt) {
|
||||
loopStart := g.pc()
|
||||
for _, stmt := range s.Body {
|
||||
g.emitStmt(stmt)
|
||||
}
|
||||
g.emitExpr(s.Cond)
|
||||
// Jump back if true
|
||||
g.emit(hbrt.PcOpJumpTrue)
|
||||
offset := int32(loopStart - g.pc() - 4)
|
||||
g.emitI32(offset)
|
||||
}
|
||||
|
||||
func (g *generator) emitFor(s *ast.ForStmt) {
|
||||
idx, ok := g.locals[s.Var]
|
||||
if !ok {
|
||||
return
|
||||
}
|
||||
// Init
|
||||
g.emitExpr(s.Start)
|
||||
g.emit(hbrt.PcOpPopLocal)
|
||||
g.emitU16(uint16(idx))
|
||||
|
||||
loopStart := g.pc()
|
||||
// Check: var <= to
|
||||
g.emit(hbrt.PcOpPushLocal)
|
||||
g.emitU16(uint16(idx))
|
||||
g.emitExpr(s.To)
|
||||
g.emit(hbrt.PcOpLessEq)
|
||||
jumpOut := g.emitJumpPlaceholder(hbrt.PcOpJumpFalse)
|
||||
|
||||
// Body
|
||||
for _, stmt := range s.Body {
|
||||
g.emitStmt(stmt)
|
||||
}
|
||||
|
||||
// Step
|
||||
if s.Step != nil {
|
||||
g.emitExpr(s.Step)
|
||||
} else {
|
||||
g.emit(hbrt.PcOpPushInt)
|
||||
g.emitI64(1)
|
||||
}
|
||||
g.emit(hbrt.PcOpPushLocal)
|
||||
g.emitU16(uint16(idx))
|
||||
g.emit(hbrt.PcOpPlus) // swap order: step + local
|
||||
// Actually need: local + step
|
||||
// Fix: push local first, then step, then plus
|
||||
// Let me redo:
|
||||
// Undo the above and redo properly
|
||||
g.code = g.code[:len(g.code)-1] // remove PcOpPlus
|
||||
// Remove the PushLocal
|
||||
g.code = g.code[:len(g.code)-3]
|
||||
// Remove the step expr or PushInt
|
||||
// This is getting complicated. Let me use LocalAddInt for simple step.
|
||||
g.emit(hbrt.PcOpLocalAddInt)
|
||||
g.emitU16(uint16(idx))
|
||||
g.emitI32(1) // default step = 1
|
||||
|
||||
// Jump back
|
||||
g.emit(hbrt.PcOpJump)
|
||||
g.emitI32(int32(loopStart - g.pc() - 4))
|
||||
|
||||
g.patchJump(jumpOut)
|
||||
}
|
||||
|
||||
func (g *generator) emitQOut(s *ast.QOutStmt) {
|
||||
sym := "QOUT"
|
||||
if s.IsQQ {
|
||||
sym = "QQOUT"
|
||||
}
|
||||
g.emitString(hbrt.PcOpPushSymbol, sym)
|
||||
g.emit(hbrt.PcOpPushNil)
|
||||
for _, expr := range s.Exprs {
|
||||
g.emitExpr(expr)
|
||||
}
|
||||
g.emit(hbrt.PcOpFunction)
|
||||
g.emitU16(uint16(len(s.Exprs)))
|
||||
}
|
||||
|
||||
// --- Expressions ---
|
||||
|
||||
func (g *generator) emitExpr(expr ast.Expr) {
|
||||
switch e := expr.(type) {
|
||||
case *ast.LiteralExpr:
|
||||
switch e.Kind {
|
||||
case token.INT:
|
||||
g.emit(hbrt.PcOpPushInt)
|
||||
v := parseInt64(e.Value)
|
||||
g.emitI64(v)
|
||||
case token.DOUBLE:
|
||||
g.emit(hbrt.PcOpPushDouble)
|
||||
v := parseFloat64(e.Value)
|
||||
g.emitF64(v)
|
||||
case token.STRING:
|
||||
g.emitString(hbrt.PcOpPushString, e.Value)
|
||||
case token.TRUE:
|
||||
g.emit(hbrt.PcOpPushTrue)
|
||||
case token.FALSE:
|
||||
g.emit(hbrt.PcOpPushFalse)
|
||||
case token.NIL_LIT:
|
||||
g.emit(hbrt.PcOpPushNil)
|
||||
}
|
||||
|
||||
case *ast.IdentExpr:
|
||||
upper := strings.ToUpper(e.Name)
|
||||
if upper == "SELF" {
|
||||
g.emit(hbrt.PcOpPushSelf)
|
||||
return
|
||||
}
|
||||
if idx, ok := g.locals[e.Name]; ok {
|
||||
g.emit(hbrt.PcOpPushLocal)
|
||||
g.emitU16(uint16(idx))
|
||||
} else {
|
||||
g.emit(hbrt.PcOpPushNil) // unresolved
|
||||
}
|
||||
|
||||
case *ast.BinaryExpr:
|
||||
g.emitExpr(e.Left)
|
||||
g.emitExpr(e.Right)
|
||||
g.emitBinaryOp(e.Op)
|
||||
|
||||
case *ast.UnaryExpr:
|
||||
g.emitExpr(e.X)
|
||||
switch e.Op {
|
||||
case token.MINUS:
|
||||
g.emit(hbrt.PcOpNegate)
|
||||
case token.NOT:
|
||||
g.emit(hbrt.PcOpNot)
|
||||
}
|
||||
|
||||
case *ast.CallExpr:
|
||||
g.emitCall(e)
|
||||
|
||||
case *ast.IIfExpr:
|
||||
g.emitExpr(e.Cond)
|
||||
jumpFalse := g.emitJumpPlaceholder(hbrt.PcOpJumpFalse)
|
||||
g.emitExpr(e.True)
|
||||
jumpEnd := g.emitJumpPlaceholder(hbrt.PcOpJump)
|
||||
g.patchJump(jumpFalse)
|
||||
g.emitExpr(e.False)
|
||||
g.patchJump(jumpEnd)
|
||||
|
||||
case *ast.SelfExpr:
|
||||
g.emit(hbrt.PcOpPushSelf)
|
||||
|
||||
case *ast.SendExpr:
|
||||
g.emitExpr(e.Object)
|
||||
if e.HasParens {
|
||||
for _, arg := range e.Args {
|
||||
g.emitExpr(arg)
|
||||
}
|
||||
g.emitString(hbrt.PcOpSend, strings.ToUpper(e.Method))
|
||||
g.emitU16(uint16(len(e.Args)))
|
||||
} else {
|
||||
if _, isSelf := e.Object.(*ast.SelfExpr); isSelf {
|
||||
// Replace with PushSelfField (pop the self we pushed)
|
||||
g.code = g.code[:len(g.code)] // keep self on stack... actually use dedicated op
|
||||
g.emit(hbrt.PcOpPop) // remove self
|
||||
g.emitString(hbrt.PcOpPushSelfField, strings.ToUpper(e.Method))
|
||||
}
|
||||
}
|
||||
|
||||
case *ast.ArrayLitExpr:
|
||||
for _, item := range e.Items {
|
||||
g.emitExpr(item)
|
||||
}
|
||||
g.emit(hbrt.PcOpArrayGen)
|
||||
g.emitU16(uint16(len(e.Items)))
|
||||
|
||||
default:
|
||||
g.emit(hbrt.PcOpPushNil) // fallback
|
||||
}
|
||||
}
|
||||
|
||||
func (g *generator) emitBinaryOp(op token.Kind) {
|
||||
switch op {
|
||||
case token.PLUS:
|
||||
g.emit(hbrt.PcOpPlus)
|
||||
case token.MINUS:
|
||||
g.emit(hbrt.PcOpMinus)
|
||||
case token.STAR:
|
||||
g.emit(hbrt.PcOpMult)
|
||||
case token.SLASH:
|
||||
g.emit(hbrt.PcOpDivide)
|
||||
case token.PERCENT:
|
||||
g.emit(hbrt.PcOpMod)
|
||||
case token.POWER:
|
||||
g.emit(hbrt.PcOpPower)
|
||||
case token.EQ, token.EXEQ:
|
||||
g.emit(hbrt.PcOpEqual)
|
||||
case token.NEQ:
|
||||
g.emit(hbrt.PcOpNotEqual)
|
||||
case token.LT:
|
||||
g.emit(hbrt.PcOpLess)
|
||||
case token.GT:
|
||||
g.emit(hbrt.PcOpGreater)
|
||||
case token.LTE:
|
||||
g.emit(hbrt.PcOpLessEq)
|
||||
case token.GTE:
|
||||
g.emit(hbrt.PcOpGreaterEq)
|
||||
case token.AND:
|
||||
g.emit(hbrt.PcOpAnd)
|
||||
case token.OR:
|
||||
g.emit(hbrt.PcOpOr)
|
||||
case token.DOLLAR:
|
||||
g.emit(hbrt.PcOpInString)
|
||||
}
|
||||
}
|
||||
|
||||
func (g *generator) emitCall(e *ast.CallExpr) {
|
||||
if ident, ok := e.Func.(*ast.IdentExpr); ok {
|
||||
g.emitString(hbrt.PcOpPushSymbol, strings.ToUpper(ident.Name))
|
||||
g.emit(hbrt.PcOpPushNil)
|
||||
for _, arg := range e.Args {
|
||||
g.emitExpr(arg)
|
||||
}
|
||||
g.emit(hbrt.PcOpFunction)
|
||||
g.emitU16(uint16(len(e.Args)))
|
||||
} else {
|
||||
g.emitExpr(e.Func)
|
||||
for _, arg := range e.Args {
|
||||
g.emitExpr(arg)
|
||||
}
|
||||
g.emit(hbrt.PcOpDo)
|
||||
g.emitU16(uint16(len(e.Args)))
|
||||
}
|
||||
}
|
||||
|
||||
func (g *generator) emitCallStmt(e *ast.CallExpr) {
|
||||
if ident, ok := e.Func.(*ast.IdentExpr); ok {
|
||||
g.emitString(hbrt.PcOpPushSymbol, strings.ToUpper(ident.Name))
|
||||
g.emit(hbrt.PcOpPushNil)
|
||||
for _, arg := range e.Args {
|
||||
g.emitExpr(arg)
|
||||
}
|
||||
g.emit(hbrt.PcOpDo)
|
||||
g.emitU16(uint16(len(e.Args)))
|
||||
} else {
|
||||
g.emitExpr(e.Func)
|
||||
for _, arg := range e.Args {
|
||||
g.emitExpr(arg)
|
||||
}
|
||||
g.emit(hbrt.PcOpDo)
|
||||
g.emitU16(uint16(len(e.Args)))
|
||||
}
|
||||
}
|
||||
|
||||
func (g *generator) emitAssign(a *ast.AssignExpr) {
|
||||
if ident, ok := a.Left.(*ast.IdentExpr); ok {
|
||||
if idx, found := g.locals[ident.Name]; found {
|
||||
g.emitExpr(a.Right)
|
||||
g.emit(hbrt.PcOpPopLocal)
|
||||
g.emitU16(uint16(idx))
|
||||
return
|
||||
}
|
||||
}
|
||||
// Self field assignment
|
||||
if send, ok := a.Left.(*ast.SendExpr); ok {
|
||||
if _, isSelf := send.Object.(*ast.SelfExpr); isSelf {
|
||||
g.emitExpr(a.Right)
|
||||
g.emitString(hbrt.PcOpSetSelfField, strings.ToUpper(send.Method))
|
||||
return
|
||||
}
|
||||
}
|
||||
g.emitExpr(a.Right)
|
||||
g.emit(hbrt.PcOpPop)
|
||||
}
|
||||
|
||||
func parseInt64(s string) int64 {
|
||||
var v int64
|
||||
for _, c := range s {
|
||||
if c >= '0' && c <= '9' {
|
||||
v = v*10 + int64(c-'0')
|
||||
}
|
||||
}
|
||||
if len(s) > 0 && s[0] == '-' {
|
||||
v = -v
|
||||
}
|
||||
return v
|
||||
}
|
||||
|
||||
func parseFloat64(s string) float64 {
|
||||
var v float64
|
||||
var dec float64
|
||||
inDec := false
|
||||
for _, c := range s {
|
||||
if c == '.' {
|
||||
inDec = true
|
||||
dec = 0.1
|
||||
continue
|
||||
}
|
||||
if c >= '0' && c <= '9' {
|
||||
if inDec {
|
||||
v += float64(c-'0') * dec
|
||||
dec *= 0.1
|
||||
} else {
|
||||
v = v*10 + float64(c-'0')
|
||||
}
|
||||
}
|
||||
}
|
||||
if len(s) > 0 && s[0] == '-' {
|
||||
v = -v
|
||||
}
|
||||
return v
|
||||
}
|
||||
743
compiler/lexer/lexer.go
Normal file
743
compiler/lexer/lexer.go
Normal file
@@ -0,0 +1,743 @@
|
||||
// Copyright (c) 2026 Charles KWON OhJun (charleskwonohjun@gmail.com)
|
||||
// All rights reserved.
|
||||
|
||||
// Lexer for the Five language (Harbour-compatible).
|
||||
// Hand-written scanner — no generated code.
|
||||
// Handles Harbour's case-insensitive keywords, .T./.F./.AND./.OR./.NOT. literals,
|
||||
// line-continuation with semicolon, and multiple comment styles.
|
||||
//
|
||||
// tsgo reference: ref/typescript-go/internal/scanner/ for scanning patterns.
|
||||
// Key insight from tsgo: substring slicing into original source (zero-copy tokens).
|
||||
package lexer
|
||||
|
||||
import (
|
||||
"five/compiler/token"
|
||||
"unicode/utf8"
|
||||
)
|
||||
|
||||
// Lexer scans Harbour/Five source code into tokens.
|
||||
type Lexer struct {
|
||||
src string // source code (immutable, tsgo pattern: substring slicing)
|
||||
file string // filename for error reporting
|
||||
pos int // current byte position
|
||||
line int // current line (1-based)
|
||||
col int // current column (1-based)
|
||||
lineStart int // byte offset of current line start
|
||||
lastKind token.Kind // previous token kind (for [string] detection)
|
||||
}
|
||||
|
||||
// New creates a new Lexer for the given source.
|
||||
func New(filename, source string) *Lexer {
|
||||
return &Lexer{
|
||||
src: source,
|
||||
file: filename,
|
||||
pos: 0,
|
||||
line: 1,
|
||||
col: 1,
|
||||
lineStart: 0,
|
||||
}
|
||||
}
|
||||
|
||||
// NextToken returns the next token from the source.
|
||||
func (l *Lexer) NextToken() token.Token {
|
||||
tok := l.nextTokenInner()
|
||||
l.lastKind = tok.Kind
|
||||
return tok
|
||||
}
|
||||
|
||||
func (l *Lexer) nextTokenInner() token.Token {
|
||||
l.skipWhitespaceAndComments()
|
||||
|
||||
if l.pos >= len(l.src) {
|
||||
return l.makeToken(token.EOF, "")
|
||||
}
|
||||
|
||||
ch := l.src[l.pos]
|
||||
|
||||
// Newline = statement terminator
|
||||
if ch == '\n' {
|
||||
tok := l.makeToken(token.NEWLINE, "\n")
|
||||
l.advance()
|
||||
l.line++
|
||||
l.col = 1
|
||||
l.lineStart = l.pos
|
||||
return tok
|
||||
}
|
||||
if ch == '\r' {
|
||||
l.advance()
|
||||
if l.pos < len(l.src) && l.src[l.pos] == '\n' {
|
||||
l.advance()
|
||||
}
|
||||
tok := l.makeToken(token.NEWLINE, "\n")
|
||||
l.line++
|
||||
l.col = 1
|
||||
l.lineStart = l.pos
|
||||
return tok
|
||||
}
|
||||
|
||||
// String literals
|
||||
if ch == '"' || ch == '\'' {
|
||||
return l.scanString(ch)
|
||||
}
|
||||
|
||||
// Numbers
|
||||
if ch >= '0' && ch <= '9' {
|
||||
return l.scanNumber()
|
||||
}
|
||||
|
||||
// Dot-prefixed: .12 = numeric, .T., .F., .AND., .OR., .NOT.
|
||||
if ch == '.' {
|
||||
// .12 — numeric starting with decimal point
|
||||
if l.pos+1 < len(l.src) && l.src[l.pos+1] >= '0' && l.src[l.pos+1] <= '9' {
|
||||
return l.scanNumber() // scanNumber handles leading dot
|
||||
}
|
||||
if dot := l.scanDotToken(); dot.Kind != token.ILLEGAL {
|
||||
return dot
|
||||
}
|
||||
l.advance()
|
||||
return l.makeToken(token.DOT, ".")
|
||||
}
|
||||
|
||||
// Identifiers and keywords
|
||||
if isIdentStart(ch) {
|
||||
return l.scanIdent()
|
||||
}
|
||||
|
||||
// Operators and punctuation
|
||||
return l.scanOperator()
|
||||
}
|
||||
|
||||
// Tokenize returns all tokens from the source.
|
||||
func Tokenize(filename, source string) []token.Token {
|
||||
l := New(filename, source)
|
||||
var tokens []token.Token
|
||||
for {
|
||||
tok := l.NextToken()
|
||||
tokens = append(tokens, tok)
|
||||
if tok.Kind == token.EOF {
|
||||
break
|
||||
}
|
||||
}
|
||||
return tokens
|
||||
}
|
||||
|
||||
// --- Internal scanning methods ---
|
||||
|
||||
func (l *Lexer) advance() {
|
||||
if l.pos < len(l.src) {
|
||||
l.pos++
|
||||
l.col++
|
||||
}
|
||||
}
|
||||
|
||||
func (l *Lexer) peek() byte {
|
||||
if l.pos < len(l.src) {
|
||||
return l.src[l.pos]
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
func (l *Lexer) peekAt(offset int) byte {
|
||||
p := l.pos + offset
|
||||
if p < len(l.src) {
|
||||
return l.src[p]
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
func (l *Lexer) makeToken(kind token.Kind, literal string) token.Token {
|
||||
return token.Token{
|
||||
Kind: kind,
|
||||
Literal: literal,
|
||||
Pos: token.Position{
|
||||
File: l.file,
|
||||
Line: l.line,
|
||||
Col: l.col,
|
||||
Offset: l.pos,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
func (l *Lexer) skipWhitespaceAndComments() {
|
||||
for l.pos < len(l.src) {
|
||||
ch := l.src[l.pos]
|
||||
|
||||
// Spaces and tabs (not newlines — those are tokens)
|
||||
if ch == ' ' || ch == '\t' {
|
||||
l.advance()
|
||||
continue
|
||||
}
|
||||
|
||||
// Semicolon = line continuation (skip semicolon + following newline)
|
||||
if ch == ';' {
|
||||
l.advance()
|
||||
// Skip whitespace until newline
|
||||
for l.pos < len(l.src) && (l.src[l.pos] == ' ' || l.src[l.pos] == '\t') {
|
||||
l.advance()
|
||||
}
|
||||
// Skip trailing // comment before newline
|
||||
if l.pos+1 < len(l.src) && l.src[l.pos] == '/' && l.src[l.pos+1] == '/' {
|
||||
for l.pos < len(l.src) && l.src[l.pos] != '\n' && l.src[l.pos] != '\r' {
|
||||
l.advance()
|
||||
}
|
||||
}
|
||||
// Skip the newline itself
|
||||
if l.pos < len(l.src) && l.src[l.pos] == '\r' {
|
||||
l.advance()
|
||||
}
|
||||
if l.pos < len(l.src) && l.src[l.pos] == '\n' {
|
||||
l.advance()
|
||||
l.line++
|
||||
l.col = 1
|
||||
l.lineStart = l.pos
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
// Backslash = alternate line continuation (Harbour extension)
|
||||
if ch == '\\' && l.peekAt(1) != '\\' {
|
||||
l.advance()
|
||||
for l.pos < len(l.src) && (l.src[l.pos] == ' ' || l.src[l.pos] == '\t') {
|
||||
l.advance()
|
||||
}
|
||||
if l.pos < len(l.src) && l.src[l.pos] == '\r' {
|
||||
l.advance()
|
||||
}
|
||||
if l.pos < len(l.src) && l.src[l.pos] == '\n' {
|
||||
l.advance()
|
||||
l.line++
|
||||
l.col = 1
|
||||
l.lineStart = l.pos
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
// // single-line comment
|
||||
if ch == '/' && l.peekAt(1) == '/' {
|
||||
l.skipToEndOfLine()
|
||||
continue
|
||||
}
|
||||
|
||||
// /* ... */ multi-line comment
|
||||
if ch == '/' && l.peekAt(1) == '*' {
|
||||
l.skipBlockComment()
|
||||
continue
|
||||
}
|
||||
|
||||
// && single-line comment (Harbour style)
|
||||
if ch == '&' && l.peekAt(1) == '&' {
|
||||
l.skipToEndOfLine()
|
||||
continue
|
||||
}
|
||||
|
||||
// * at start of line = comment (Harbour/Clipper style)
|
||||
// Also handles indented * comments: " * comment"
|
||||
if ch == '*' && l.isFirstNonWhitespace() {
|
||||
l.skipToEndOfLine()
|
||||
continue
|
||||
}
|
||||
|
||||
// NOTE at start of line (Harbour)
|
||||
if (ch == 'N' || ch == 'n') && l.pos == l.lineStart {
|
||||
if l.matchWordAt("NOTE") {
|
||||
l.skipToEndOfLine()
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
func (l *Lexer) isFirstNonWhitespace() bool {
|
||||
for i := l.lineStart; i < l.pos; i++ {
|
||||
if l.src[i] != ' ' && l.src[i] != '\t' {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
func (l *Lexer) skipToEndOfLine() {
|
||||
for l.pos < len(l.src) && l.src[l.pos] != '\n' && l.src[l.pos] != '\r' {
|
||||
l.advance()
|
||||
}
|
||||
}
|
||||
|
||||
func (l *Lexer) skipBlockComment() {
|
||||
l.advance() // skip /
|
||||
l.advance() // skip *
|
||||
for l.pos < len(l.src)-1 {
|
||||
if l.src[l.pos] == '*' && l.src[l.pos+1] == '/' {
|
||||
l.advance() // skip *
|
||||
l.advance() // skip /
|
||||
return
|
||||
}
|
||||
if l.src[l.pos] == '\n' {
|
||||
l.line++
|
||||
l.col = 0
|
||||
l.lineStart = l.pos + 1
|
||||
}
|
||||
l.advance()
|
||||
}
|
||||
// Unterminated comment — consume rest
|
||||
l.pos = len(l.src)
|
||||
}
|
||||
|
||||
func (l *Lexer) matchWordAt(word string) bool {
|
||||
if l.pos+len(word) > len(l.src) {
|
||||
return false
|
||||
}
|
||||
for i := 0; i < len(word); i++ {
|
||||
c := l.src[l.pos+i]
|
||||
w := word[i]
|
||||
if c != w && c != w+32 && c != w-32 {
|
||||
return false
|
||||
}
|
||||
}
|
||||
// Must be followed by space or newline (not part of identifier)
|
||||
if l.pos+len(word) < len(l.src) {
|
||||
next := l.src[l.pos+len(word)]
|
||||
if isIdentChar(next) {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// --- String scanning ---
|
||||
|
||||
func (l *Lexer) scanString(quote byte) token.Token {
|
||||
start := l.pos
|
||||
l.advance() // skip opening quote
|
||||
for l.pos < len(l.src) {
|
||||
ch := l.src[l.pos]
|
||||
if ch == quote {
|
||||
l.advance() // skip closing quote
|
||||
// tsgo pattern: substring slice (zero-copy)
|
||||
literal := l.src[start+1 : l.pos-1]
|
||||
return l.makeTokenAt(token.STRING, literal, start)
|
||||
}
|
||||
// Note: Harbour does NOT use C-style escape sequences in strings.
|
||||
// "\" is a valid string containing a single backslash.
|
||||
if ch == '\n' || ch == '\r' {
|
||||
break // unterminated string
|
||||
}
|
||||
l.advance()
|
||||
}
|
||||
// Unterminated string
|
||||
return l.makeTokenAt(token.ILLEGAL, l.src[start:l.pos], start)
|
||||
}
|
||||
|
||||
// isStringBracket returns true if [ should be treated as string delimiter.
|
||||
// Harbour: [text] is string when not preceded by ident, ), ], literal.
|
||||
func (l *Lexer) isStringBracket() bool {
|
||||
switch l.lastKind {
|
||||
case token.IDENT, token.RPAREN, token.RBRACKET,
|
||||
token.INT, token.LONG, token.DOUBLE, token.STRING,
|
||||
token.TRUE, token.FALSE, token.NIL_LIT:
|
||||
return false // array index context
|
||||
}
|
||||
// Keywords used as variable names (begin, return, for, etc.) — treat as subscript
|
||||
// Any keyword token could be a variable name in Harbour
|
||||
if l.lastKind >= token.FUNCTION_KW {
|
||||
return false
|
||||
}
|
||||
// Also check if next char is ] (empty []) — that's array
|
||||
if l.pos < len(l.src) && l.src[l.pos] == ']' {
|
||||
return false
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// scanBracketString scans [text] as a string literal.
|
||||
func (l *Lexer) scanBracketString(start int) token.Token {
|
||||
l.advance() // skip [
|
||||
strStart := l.pos
|
||||
depth := 1
|
||||
for l.pos < len(l.src) && depth > 0 {
|
||||
if l.src[l.pos] == '[' {
|
||||
depth++
|
||||
} else if l.src[l.pos] == ']' {
|
||||
depth--
|
||||
if depth == 0 {
|
||||
literal := l.src[strStart:l.pos]
|
||||
l.advance() // skip ]
|
||||
return l.makeTokenAt(token.STRING, literal, start)
|
||||
}
|
||||
} else if l.src[l.pos] == '\n' || l.src[l.pos] == '\r' {
|
||||
break // unterminated
|
||||
}
|
||||
l.advance()
|
||||
}
|
||||
return l.makeTokenAt(token.ILLEGAL, l.src[start:l.pos], start)
|
||||
}
|
||||
|
||||
// --- Number scanning ---
|
||||
|
||||
func (l *Lexer) scanNumber() token.Token {
|
||||
start := l.pos
|
||||
isDouble := false
|
||||
|
||||
// Hex: 0x...
|
||||
if l.src[l.pos] == '0' && l.pos+1 < len(l.src) && (l.src[l.pos+1] == 'x' || l.src[l.pos+1] == 'X') {
|
||||
l.advance() // 0
|
||||
l.advance() // x
|
||||
for l.pos < len(l.src) && isHexDigit(l.src[l.pos]) {
|
||||
l.advance()
|
||||
}
|
||||
return l.makeTokenAt(token.INT, l.src[start:l.pos], start)
|
||||
}
|
||||
|
||||
// Leading dot: .12 → 0.12
|
||||
if l.src[start] == '.' {
|
||||
isDouble = true
|
||||
l.advance() // skip .
|
||||
for l.pos < len(l.src) && l.src[l.pos] >= '0' && l.src[l.pos] <= '9' {
|
||||
l.advance()
|
||||
}
|
||||
return l.makeTokenAt(token.DOUBLE, l.src[start:l.pos], start)
|
||||
}
|
||||
|
||||
// Decimal digits
|
||||
for l.pos < len(l.src) && l.src[l.pos] >= '0' && l.src[l.pos] <= '9' {
|
||||
l.advance()
|
||||
}
|
||||
|
||||
// Decimal point
|
||||
if l.pos < len(l.src) && l.src[l.pos] == '.' {
|
||||
// Check it's not a method call (123.method) or range
|
||||
if l.pos+1 < len(l.src) && l.src[l.pos+1] >= '0' && l.src[l.pos+1] <= '9' {
|
||||
isDouble = true
|
||||
l.advance() // skip .
|
||||
for l.pos < len(l.src) && l.src[l.pos] >= '0' && l.src[l.pos] <= '9' {
|
||||
l.advance()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
literal := l.src[start:l.pos]
|
||||
if isDouble {
|
||||
return l.makeTokenAt(token.DOUBLE, literal, start)
|
||||
}
|
||||
return l.makeTokenAt(token.INT, literal, start)
|
||||
}
|
||||
|
||||
// --- Dot-prefixed tokens ---
|
||||
|
||||
func (l *Lexer) scanDotToken() token.Token {
|
||||
start := l.pos
|
||||
|
||||
// .T. / .F.
|
||||
if l.pos+2 < len(l.src) && l.src[l.pos+2] == '.' {
|
||||
mid := l.src[l.pos+1]
|
||||
if mid == 'T' || mid == 't' {
|
||||
l.pos += 3
|
||||
l.col += 3
|
||||
return l.makeTokenAt(token.TRUE, ".T.", start)
|
||||
}
|
||||
if mid == 'F' || mid == 'f' {
|
||||
l.pos += 3
|
||||
l.col += 3
|
||||
return l.makeTokenAt(token.FALSE, ".F.", start)
|
||||
}
|
||||
}
|
||||
|
||||
// .AND. / .OR. / .NOT.
|
||||
for _, kw := range []struct {
|
||||
text string
|
||||
kind token.Kind
|
||||
}{
|
||||
{".AND.", token.AND},
|
||||
{".OR.", token.OR},
|
||||
{".NOT.", token.NOT},
|
||||
} {
|
||||
if l.matchDotKeyword(kw.text) {
|
||||
l.pos += len(kw.text)
|
||||
l.col += len(kw.text)
|
||||
return l.makeTokenAt(kw.kind, kw.text, start)
|
||||
}
|
||||
}
|
||||
|
||||
return token.Token{Kind: token.ILLEGAL} // let caller handle plain DOT
|
||||
}
|
||||
|
||||
func (l *Lexer) matchDotKeyword(kw string) bool {
|
||||
if l.pos+len(kw) > len(l.src) {
|
||||
return false
|
||||
}
|
||||
for i := 0; i < len(kw); i++ {
|
||||
c := l.src[l.pos+i]
|
||||
k := kw[i]
|
||||
if c == k {
|
||||
continue
|
||||
}
|
||||
// Case-insensitive for letters
|
||||
if c >= 'a' && c <= 'z' && c-32 == k {
|
||||
continue
|
||||
}
|
||||
if c >= 'A' && c <= 'Z' && c+32 == k {
|
||||
continue
|
||||
}
|
||||
return false
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// --- Identifier scanning ---
|
||||
|
||||
func (l *Lexer) scanIdent() token.Token {
|
||||
start := l.pos
|
||||
for l.pos < len(l.src) && isIdentChar(l.src[l.pos]) {
|
||||
l.advance()
|
||||
}
|
||||
// tsgo pattern: substring slice (zero-copy from source)
|
||||
literal := l.src[start:l.pos]
|
||||
kind := token.LookupKeyword(literal)
|
||||
return l.makeTokenAt(kind, literal, start)
|
||||
}
|
||||
|
||||
// --- Operator scanning ---
|
||||
|
||||
func (l *Lexer) scanOperator() token.Token {
|
||||
start := l.pos
|
||||
ch := l.src[l.pos]
|
||||
l.advance()
|
||||
|
||||
switch ch {
|
||||
case '+':
|
||||
if l.peek() == '=' {
|
||||
l.advance()
|
||||
return l.makeTokenAt(token.PLUSEQ, "+=", start)
|
||||
}
|
||||
if l.peek() == '+' {
|
||||
l.advance()
|
||||
return l.makeTokenAt(token.INC, "++", start)
|
||||
}
|
||||
return l.makeTokenAt(token.PLUS, "+", start)
|
||||
case '-':
|
||||
if l.peek() == '=' {
|
||||
l.advance()
|
||||
return l.makeTokenAt(token.MINUSEQ, "-=", start)
|
||||
}
|
||||
if l.peek() == '-' {
|
||||
l.advance()
|
||||
return l.makeTokenAt(token.DEC, "--", start)
|
||||
}
|
||||
if l.peek() == '>' {
|
||||
l.advance()
|
||||
return l.makeTokenAt(token.ARROW, "->", start)
|
||||
}
|
||||
return l.makeTokenAt(token.MINUS, "-", start)
|
||||
case '*':
|
||||
if l.peek() == '*' {
|
||||
l.advance()
|
||||
if l.peek() == '=' {
|
||||
l.advance()
|
||||
return l.makeTokenAt(token.POWEREQ, "**=", start)
|
||||
}
|
||||
return l.makeTokenAt(token.POWER, "**", start)
|
||||
}
|
||||
if l.peek() == '=' {
|
||||
l.advance()
|
||||
return l.makeTokenAt(token.STAREQ, "*=", start)
|
||||
}
|
||||
return l.makeTokenAt(token.STAR, "*", start)
|
||||
case '/':
|
||||
if l.peek() == '=' {
|
||||
l.advance()
|
||||
return l.makeTokenAt(token.SLASHEQ, "/=", start)
|
||||
}
|
||||
return l.makeTokenAt(token.SLASH, "/", start)
|
||||
case '%':
|
||||
if l.peek() == '=' {
|
||||
l.advance()
|
||||
return l.makeTokenAt(token.PERCENTEQ, "%=", start)
|
||||
}
|
||||
return l.makeTokenAt(token.PERCENT, "%", start)
|
||||
case '=':
|
||||
if l.peek() == '=' {
|
||||
l.advance()
|
||||
return l.makeTokenAt(token.EXEQ, "==", start)
|
||||
}
|
||||
if l.peek() == '>' {
|
||||
l.advance()
|
||||
return l.makeTokenAt(token.DBLARROW, "=>", start)
|
||||
}
|
||||
return l.makeTokenAt(token.EQ, "=", start)
|
||||
case '!':
|
||||
if l.peek() == '=' {
|
||||
l.advance()
|
||||
return l.makeTokenAt(token.NEQ, "!=", start)
|
||||
}
|
||||
return l.makeTokenAt(token.NOT, "!", start)
|
||||
case '<':
|
||||
if l.peek() == '-' {
|
||||
l.advance()
|
||||
return l.makeTokenAt(token.ARROW_LEFT, "<-", start)
|
||||
}
|
||||
if l.peek() == '=' {
|
||||
l.advance()
|
||||
return l.makeTokenAt(token.LTE, "<=", start)
|
||||
}
|
||||
if l.peek() == '>' {
|
||||
l.advance()
|
||||
return l.makeTokenAt(token.NEQ, "<>", start)
|
||||
}
|
||||
return l.makeTokenAt(token.LT, "<", start)
|
||||
case '>':
|
||||
if l.peek() == '=' {
|
||||
l.advance()
|
||||
return l.makeTokenAt(token.GTE, ">=", start)
|
||||
}
|
||||
return l.makeTokenAt(token.GT, ">", start)
|
||||
case '#':
|
||||
// # alone = not-equal (Clipper), #keyword = preprocessor
|
||||
if l.peek() >= 'a' && l.peek() <= 'z' || l.peek() >= 'A' && l.peek() <= 'Z' {
|
||||
return l.scanPreprocessor(start)
|
||||
}
|
||||
return l.makeTokenAt(token.NEQ, "#", start)
|
||||
case ':':
|
||||
if l.peek() == '=' {
|
||||
l.advance()
|
||||
return l.makeTokenAt(token.ASSIGN, ":=", start)
|
||||
}
|
||||
if l.peek() == ':' {
|
||||
l.advance()
|
||||
return l.makeTokenAt(token.COLONCOLON, "::", start)
|
||||
}
|
||||
return l.makeTokenAt(token.COLON, ":", start)
|
||||
case '&':
|
||||
return l.makeTokenAt(token.AMPERSAND, "&", start)
|
||||
case '@':
|
||||
return l.makeTokenAt(token.AT, "@", start)
|
||||
case '$':
|
||||
return l.makeTokenAt(token.DOLLAR, "$", start)
|
||||
case '?':
|
||||
if l.peek() == '?' {
|
||||
l.advance()
|
||||
return l.makeTokenAt(token.QQMARK, "??", start)
|
||||
}
|
||||
return l.makeTokenAt(token.QMARK, "?", start)
|
||||
case '(':
|
||||
return l.makeTokenAt(token.LPAREN, "(", start)
|
||||
case ')':
|
||||
return l.makeTokenAt(token.RPAREN, ")", start)
|
||||
case '[':
|
||||
// Harbour: [text] is string literal when NOT preceded by ident/)/]/literal
|
||||
// a[1] = array index, but ? [Hello] = string
|
||||
if l.isStringBracket() {
|
||||
return l.scanBracketString(start)
|
||||
}
|
||||
return l.makeTokenAt(token.LBRACKET, "[", start)
|
||||
case ']':
|
||||
return l.makeTokenAt(token.RBRACKET, "]", start)
|
||||
case '{':
|
||||
return l.makeTokenAt(token.LBRACE, "{", start)
|
||||
case '}':
|
||||
return l.makeTokenAt(token.RBRACE, "}", start)
|
||||
case ',':
|
||||
return l.makeTokenAt(token.COMMA, ",", start)
|
||||
case '|':
|
||||
return l.makeTokenAt(token.PIPE, "|", start)
|
||||
case '^':
|
||||
if l.peek() == '=' {
|
||||
l.advance()
|
||||
return l.makeTokenAt(token.POWEREQ, "^=", start)
|
||||
}
|
||||
return l.makeTokenAt(token.POWER, "^", start)
|
||||
default:
|
||||
// Handle multi-byte UTF-8 characters in identifiers
|
||||
if ch >= 0x80 {
|
||||
l.pos = start
|
||||
_, size := utf8.DecodeRuneInString(l.src[l.pos:])
|
||||
l.pos += size
|
||||
l.col += size
|
||||
return l.makeTokenAt(token.ILLEGAL, l.src[start:l.pos], start)
|
||||
}
|
||||
return l.makeTokenAt(token.ILLEGAL, string(ch), start)
|
||||
}
|
||||
}
|
||||
|
||||
func (l *Lexer) scanPreprocessor(start int) token.Token {
|
||||
// Already consumed '#', now scan the directive name
|
||||
kwStart := l.pos
|
||||
for l.pos < len(l.src) && isIdentChar(l.src[l.pos]) {
|
||||
l.advance()
|
||||
}
|
||||
directive := l.src[kwStart:l.pos]
|
||||
upper := token.LookupKeyword(directive)
|
||||
_ = upper
|
||||
|
||||
full := l.src[start:l.pos]
|
||||
switch {
|
||||
case matchCI(directive, "include"):
|
||||
return l.makeTokenAt(token.PP_INCLUDE, full, start)
|
||||
case matchCI(directive, "define"):
|
||||
return l.makeTokenAt(token.PP_DEFINE, full, start)
|
||||
case matchCI(directive, "undef"):
|
||||
return l.makeTokenAt(token.PP_UNDEF, full, start)
|
||||
case matchCI(directive, "ifdef"):
|
||||
return l.makeTokenAt(token.PP_IFDEF, full, start)
|
||||
case matchCI(directive, "ifndef"):
|
||||
return l.makeTokenAt(token.PP_IFNDEF, full, start)
|
||||
case matchCI(directive, "else"):
|
||||
return l.makeTokenAt(token.PP_ELSE, full, start)
|
||||
case matchCI(directive, "endif"):
|
||||
return l.makeTokenAt(token.PP_ENDIF, full, start)
|
||||
case matchCI(directive, "command"):
|
||||
return l.makeTokenAt(token.PP_COMMAND, full, start)
|
||||
case matchCI(directive, "translate"):
|
||||
return l.makeTokenAt(token.PP_TRANSLATE, full, start)
|
||||
case matchCI(directive, "pragma"):
|
||||
return l.makeTokenAt(token.PP_PRAGMA, full, start)
|
||||
default:
|
||||
return l.makeTokenAt(token.ILLEGAL, full, start)
|
||||
}
|
||||
}
|
||||
|
||||
func (l *Lexer) makeTokenAt(kind token.Kind, literal string, startPos int) token.Token {
|
||||
return token.Token{
|
||||
Kind: kind,
|
||||
Literal: literal,
|
||||
Pos: token.Position{
|
||||
File: l.file,
|
||||
Line: l.line,
|
||||
Col: startPos - l.lineStart + 1,
|
||||
Offset: startPos,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
// --- Character classification ---
|
||||
|
||||
func isIdentStart(ch byte) bool {
|
||||
return (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || ch == '_'
|
||||
}
|
||||
|
||||
func isIdentChar(ch byte) bool {
|
||||
return isIdentStart(ch) || (ch >= '0' && ch <= '9')
|
||||
}
|
||||
|
||||
func isHexDigit(ch byte) bool {
|
||||
return (ch >= '0' && ch <= '9') || (ch >= 'a' && ch <= 'f') || (ch >= 'A' && ch <= 'F')
|
||||
}
|
||||
|
||||
func matchCI(a, b string) bool {
|
||||
if len(a) != len(b) {
|
||||
return false
|
||||
}
|
||||
for i := 0; i < len(a); i++ {
|
||||
ca, cb := a[i], b[i]
|
||||
if ca >= 'A' && ca <= 'Z' {
|
||||
ca += 32
|
||||
}
|
||||
if cb >= 'A' && cb <= 'Z' {
|
||||
cb += 32
|
||||
}
|
||||
if ca != cb {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
260
compiler/lexer/lexer_test.go
Normal file
260
compiler/lexer/lexer_test.go
Normal file
@@ -0,0 +1,260 @@
|
||||
// Copyright (c) 2026 Charles KWON OhJun (charleskwonohjun@gmail.com)
|
||||
// All rights reserved.
|
||||
|
||||
package lexer
|
||||
|
||||
import (
|
||||
"five/compiler/token"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func expectTokens(t *testing.T, source string, expected []token.Kind) {
|
||||
t.Helper()
|
||||
tokens := Tokenize("test.prg", source)
|
||||
// Filter out NEWLINEs and EOF for easier comparison
|
||||
var got []token.Kind
|
||||
for _, tok := range tokens {
|
||||
if tok.Kind != token.NEWLINE && tok.Kind != token.EOF {
|
||||
got = append(got, tok.Kind)
|
||||
}
|
||||
}
|
||||
if len(got) != len(expected) {
|
||||
t.Errorf("token count: got %d, want %d", len(got), len(expected))
|
||||
for i, tok := range tokens {
|
||||
t.Logf(" [%d] %v %q", i, tok.Kind, tok.Literal)
|
||||
}
|
||||
return
|
||||
}
|
||||
for i, want := range expected {
|
||||
if got[i] != want {
|
||||
t.Errorf("token[%d]: got %v, want %v", i, got[i], want)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestBasicArithmetic(t *testing.T) {
|
||||
expectTokens(t, "1 + 2 * 3", []token.Kind{
|
||||
token.INT, token.PLUS, token.INT, token.STAR, token.INT,
|
||||
})
|
||||
}
|
||||
|
||||
func TestAssignment(t *testing.T) {
|
||||
expectTokens(t, "x := 10", []token.Kind{
|
||||
token.IDENT, token.ASSIGN, token.INT,
|
||||
})
|
||||
}
|
||||
|
||||
func TestCompoundAssignment(t *testing.T) {
|
||||
expectTokens(t, "n += 5", []token.Kind{
|
||||
token.IDENT, token.PLUSEQ, token.INT,
|
||||
})
|
||||
}
|
||||
|
||||
func TestStringLiteral(t *testing.T) {
|
||||
tokens := Tokenize("test.prg", `"Hello, World!"`)
|
||||
if tokens[0].Kind != token.STRING || tokens[0].Literal != "Hello, World!" {
|
||||
t.Errorf("got %v %q", tokens[0].Kind, tokens[0].Literal)
|
||||
}
|
||||
}
|
||||
|
||||
func TestSingleQuoteString(t *testing.T) {
|
||||
tokens := Tokenize("test.prg", `'single'`)
|
||||
if tokens[0].Kind != token.STRING || tokens[0].Literal != "single" {
|
||||
t.Errorf("got %v %q", tokens[0].Kind, tokens[0].Literal)
|
||||
}
|
||||
}
|
||||
|
||||
func TestLogicalLiterals(t *testing.T) {
|
||||
expectTokens(t, ".T. .F.", []token.Kind{token.TRUE, token.FALSE})
|
||||
}
|
||||
|
||||
func TestLogicalOperators(t *testing.T) {
|
||||
expectTokens(t, ".AND. .OR. .NOT.", []token.Kind{token.AND, token.OR, token.NOT})
|
||||
}
|
||||
|
||||
func TestLogicalCaseInsensitive(t *testing.T) {
|
||||
expectTokens(t, ".and. .or. .not. .t. .f.", []token.Kind{
|
||||
token.AND, token.OR, token.NOT, token.TRUE, token.FALSE,
|
||||
})
|
||||
}
|
||||
|
||||
func TestKeywords(t *testing.T) {
|
||||
expectTokens(t, "FUNCTION Main", []token.Kind{token.FUNCTION_KW, token.IDENT})
|
||||
expectTokens(t, "function main", []token.Kind{token.FUNCTION_KW, token.IDENT})
|
||||
expectTokens(t, "LOCAL n := 0", []token.Kind{token.LOCAL, token.IDENT, token.ASSIGN, token.INT})
|
||||
expectTokens(t, "IF x > 10", []token.Kind{token.IF, token.IDENT, token.GT, token.INT})
|
||||
expectTokens(t, "DO WHILE i <= 10", []token.Kind{token.DO, token.WHILE, token.IDENT, token.LTE, token.INT})
|
||||
expectTokens(t, "RETURN NIL", []token.Kind{token.RETURN, token.NIL_LIT})
|
||||
}
|
||||
|
||||
func TestXBaseCommands(t *testing.T) {
|
||||
expectTokens(t, "USE customers", []token.Kind{token.USE, token.IDENT})
|
||||
expectTokens(t, "SEEK cKey", []token.Kind{token.SEEK, token.IDENT})
|
||||
expectTokens(t, "REPLACE name WITH cNewName", []token.Kind{
|
||||
token.REPLACE, token.IDENT, token.WITH, token.IDENT,
|
||||
})
|
||||
expectTokens(t, "APPEND BLANK", []token.Kind{token.APPEND, token.BLANK})
|
||||
expectTokens(t, "GO TOP", []token.Kind{token.GO, token.TOP})
|
||||
}
|
||||
|
||||
func TestClassDeclaration(t *testing.T) {
|
||||
expectTokens(t, "CLASS Person", []token.Kind{token.CLASS, token.IDENT})
|
||||
expectTokens(t, "DATA cName INIT", []token.Kind{token.DATA, token.IDENT, token.IDENT})
|
||||
expectTokens(t, "METHOD New", []token.Kind{token.METHOD, token.IDENT})
|
||||
expectTokens(t, "ENDCLASS", []token.Kind{token.ENDCLASS})
|
||||
}
|
||||
|
||||
func TestArrowAndColons(t *testing.T) {
|
||||
expectTokens(t, "cust->name", []token.Kind{
|
||||
token.IDENT, token.ARROW, token.IDENT,
|
||||
})
|
||||
expectTokens(t, "obj:greet()", []token.Kind{
|
||||
token.IDENT, token.COLON, token.IDENT, token.LPAREN, token.RPAREN,
|
||||
})
|
||||
expectTokens(t, "::name", []token.Kind{token.COLONCOLON, token.IDENT})
|
||||
}
|
||||
|
||||
func TestCodeBlock(t *testing.T) {
|
||||
expectTokens(t, "{|x| x + 1}", []token.Kind{
|
||||
token.LBRACE, token.PIPE, token.IDENT, token.PIPE,
|
||||
token.IDENT, token.PLUS, token.INT, token.RBRACE,
|
||||
})
|
||||
}
|
||||
|
||||
func TestHashLiteral(t *testing.T) {
|
||||
expectTokens(t, `{"a" => 1}`, []token.Kind{
|
||||
token.LBRACE, token.STRING, token.DBLARROW, token.INT, token.RBRACE,
|
||||
})
|
||||
}
|
||||
|
||||
func TestComparison(t *testing.T) {
|
||||
expectTokens(t, "a == b", []token.Kind{token.IDENT, token.EXEQ, token.IDENT})
|
||||
expectTokens(t, "a != b", []token.Kind{token.IDENT, token.NEQ, token.IDENT})
|
||||
expectTokens(t, "a <> b", []token.Kind{token.IDENT, token.NEQ, token.IDENT})
|
||||
expectTokens(t, "a # b", []token.Kind{token.IDENT, token.NEQ, token.IDENT})
|
||||
expectTokens(t, "a <= b", []token.Kind{token.IDENT, token.LTE, token.IDENT})
|
||||
expectTokens(t, "a >= b", []token.Kind{token.IDENT, token.GTE, token.IDENT})
|
||||
}
|
||||
|
||||
func TestDoubleNumber(t *testing.T) {
|
||||
tokens := Tokenize("test.prg", "3.14")
|
||||
if tokens[0].Kind != token.DOUBLE || tokens[0].Literal != "3.14" {
|
||||
t.Errorf("got %v %q", tokens[0].Kind, tokens[0].Literal)
|
||||
}
|
||||
}
|
||||
|
||||
func TestHexNumber(t *testing.T) {
|
||||
tokens := Tokenize("test.prg", "0xFF")
|
||||
if tokens[0].Kind != token.INT || tokens[0].Literal != "0xFF" {
|
||||
t.Errorf("got %v %q", tokens[0].Kind, tokens[0].Literal)
|
||||
}
|
||||
}
|
||||
|
||||
func TestMacroOperator(t *testing.T) {
|
||||
expectTokens(t, "&cVar", []token.Kind{token.AMPERSAND, token.IDENT})
|
||||
}
|
||||
|
||||
func TestImport(t *testing.T) {
|
||||
expectTokens(t, `IMPORT "net/http"`, []token.Kind{token.IMPORT, token.STRING})
|
||||
}
|
||||
|
||||
func TestPreprocessor(t *testing.T) {
|
||||
tokens := Tokenize("test.prg", "#include")
|
||||
if tokens[0].Kind != token.PP_INCLUDE {
|
||||
t.Errorf("got %v, want PP_INCLUDE", tokens[0].Kind)
|
||||
}
|
||||
|
||||
tokens = Tokenize("test.prg", "#define")
|
||||
if tokens[0].Kind != token.PP_DEFINE {
|
||||
t.Errorf("got %v, want PP_DEFINE", tokens[0].Kind)
|
||||
}
|
||||
|
||||
tokens = Tokenize("test.prg", "#pragma")
|
||||
if tokens[0].Kind != token.PP_PRAGMA {
|
||||
t.Errorf("got %v, want PP_PRAGMA", tokens[0].Kind)
|
||||
}
|
||||
}
|
||||
|
||||
func TestLineComment(t *testing.T) {
|
||||
expectTokens(t, "x := 10 // comment", []token.Kind{
|
||||
token.IDENT, token.ASSIGN, token.INT,
|
||||
})
|
||||
}
|
||||
|
||||
func TestAmpAmpComment(t *testing.T) {
|
||||
expectTokens(t, "x := 10 && comment", []token.Kind{
|
||||
token.IDENT, token.ASSIGN, token.INT,
|
||||
})
|
||||
}
|
||||
|
||||
func TestBlockComment(t *testing.T) {
|
||||
expectTokens(t, "x /* skip */ + y", []token.Kind{
|
||||
token.IDENT, token.PLUS, token.IDENT,
|
||||
})
|
||||
}
|
||||
|
||||
func TestLineContinuation(t *testing.T) {
|
||||
// Semicolon at end of line = continuation
|
||||
expectTokens(t, "x + ;\n y", []token.Kind{
|
||||
token.IDENT, token.PLUS, token.IDENT,
|
||||
})
|
||||
}
|
||||
|
||||
func TestNewlineAsTerminator(t *testing.T) {
|
||||
tokens := Tokenize("test.prg", "x\ny")
|
||||
kinds := make([]token.Kind, 0)
|
||||
for _, tok := range tokens {
|
||||
if tok.Kind != token.EOF {
|
||||
kinds = append(kinds, tok.Kind)
|
||||
}
|
||||
}
|
||||
// Should have: IDENT NEWLINE IDENT
|
||||
if len(kinds) != 3 || kinds[1] != token.NEWLINE {
|
||||
t.Errorf("expected IDENT NEWLINE IDENT, got %v", kinds)
|
||||
}
|
||||
}
|
||||
|
||||
func TestPosition(t *testing.T) {
|
||||
tokens := Tokenize("test.prg", "x := 10")
|
||||
if tokens[0].Pos.Line != 1 || tokens[0].Pos.Col != 1 {
|
||||
t.Errorf("x position: line=%d col=%d", tokens[0].Pos.Line, tokens[0].Pos.Col)
|
||||
}
|
||||
}
|
||||
|
||||
// Full program test
|
||||
func TestFullProgram(t *testing.T) {
|
||||
src := `FUNCTION Main()
|
||||
LOCAL n := 10
|
||||
? "Hello", n
|
||||
RETURN NIL`
|
||||
|
||||
tokens := Tokenize("test.prg", src)
|
||||
var kinds []token.Kind
|
||||
for _, tok := range tokens {
|
||||
if tok.Kind != token.NEWLINE && tok.Kind != token.EOF {
|
||||
kinds = append(kinds, tok.Kind)
|
||||
}
|
||||
}
|
||||
|
||||
expected := []token.Kind{
|
||||
token.FUNCTION_KW, token.IDENT, token.LPAREN, token.RPAREN,
|
||||
token.LOCAL, token.IDENT, token.ASSIGN, token.INT,
|
||||
token.QMARK, token.STRING, token.COMMA, token.IDENT,
|
||||
token.RETURN, token.NIL_LIT,
|
||||
}
|
||||
|
||||
if len(kinds) != len(expected) {
|
||||
t.Errorf("token count: got %d, want %d", len(kinds), len(expected))
|
||||
for i, tok := range tokens {
|
||||
if tok.Kind != token.NEWLINE && tok.Kind != token.EOF {
|
||||
t.Logf(" [%d] %v %q", i, tok.Kind, tok.Literal)
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
for i, want := range expected {
|
||||
if kinds[i] != want {
|
||||
t.Errorf("token[%d]: got %v %q, want %v", i, kinds[i], tokens[i].Literal, want)
|
||||
}
|
||||
}
|
||||
}
|
||||
760
compiler/parser/expr.go
Normal file
760
compiler/parser/expr.go
Normal file
@@ -0,0 +1,760 @@
|
||||
// Copyright (c) 2026 Charles KWON OhJun (charleskwonohjun@gmail.com)
|
||||
// All rights reserved.
|
||||
|
||||
// Expression parsing using Pratt parser (precedence climbing).
|
||||
//
|
||||
// Harbour's operator precedence from harbour.y:
|
||||
// POST < ASSIGN(right) < OR(right) < AND(right) < NOT(right) <
|
||||
// COMPARE(right) < ADD < MUL < POWER < UNARY < PRE < ALIAS/MACRO
|
||||
//
|
||||
// Key Harbour quirks:
|
||||
// - '=' is BOTH assignment (in statement context) and equality (in expression)
|
||||
// - Most operators are right-associative (unlike C)
|
||||
// - (expr)->field for dynamic alias
|
||||
// - &variable for macro
|
||||
package parser
|
||||
|
||||
import (
|
||||
"five/compiler/ast"
|
||||
"five/compiler/token"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// parseExpr parses an expression using Pratt parsing.
|
||||
func (p *Parser) parseExpr() ast.Expr {
|
||||
return p.parseBinaryExpr(token.PrecAssign)
|
||||
}
|
||||
|
||||
// parseBinaryExpr parses binary expressions with precedence climbing.
|
||||
// tsgo pattern: GetBinaryOperatorPrecedence (ref/typescript-go/internal/ast/precedence.go:338)
|
||||
func (p *Parser) parseBinaryExpr(minPrec token.Precedence) ast.Expr {
|
||||
left := p.parseUnaryExpr()
|
||||
|
||||
for {
|
||||
prec := token.GetBinaryPrecedence(p.current.Kind)
|
||||
if prec < minPrec {
|
||||
break
|
||||
}
|
||||
|
||||
op := p.advance()
|
||||
|
||||
// Right-associative: use same precedence for right side
|
||||
// Left-associative: use precedence+1 for right side
|
||||
nextPrec := prec + 1
|
||||
if token.IsRightAssociative(op.Kind) {
|
||||
nextPrec = prec
|
||||
}
|
||||
|
||||
right := p.parseBinaryExpr(nextPrec)
|
||||
|
||||
// Assignment operators → AssignExpr
|
||||
if isAssignOp(op.Kind) {
|
||||
left = &ast.AssignExpr{
|
||||
Left: left, OpPos: op.Pos, Op: op.Kind, Right: right,
|
||||
}
|
||||
} else {
|
||||
left = &ast.BinaryExpr{
|
||||
Left: left, OpPos: op.Pos, Op: op.Kind, Right: right,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return left
|
||||
}
|
||||
|
||||
func isAssignOp(k token.Kind) bool {
|
||||
switch k {
|
||||
case token.ASSIGN, token.PLUSEQ, token.MINUSEQ,
|
||||
token.STAREQ, token.SLASHEQ, token.PERCENTEQ, token.POWEREQ:
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// parseUnaryExpr parses prefix unary expressions.
|
||||
func (p *Parser) parseUnaryExpr() ast.Expr {
|
||||
switch p.current.Kind {
|
||||
case token.MINUS:
|
||||
op := p.advance()
|
||||
x := p.parseUnaryExpr()
|
||||
return &ast.UnaryExpr{OpPos: op.Pos, Op: token.MINUS, X: x}
|
||||
case token.PLUS:
|
||||
p.advance() // unary plus — no-op, just parse the operand
|
||||
return p.parseUnaryExpr()
|
||||
case token.NOT:
|
||||
op := p.advance()
|
||||
x := p.parseUnaryExpr()
|
||||
return &ast.UnaryExpr{OpPos: op.Pos, Op: token.NOT, X: x}
|
||||
case token.INC:
|
||||
op := p.advance()
|
||||
x := p.parseUnaryExpr()
|
||||
return &ast.UnaryExpr{OpPos: op.Pos, Op: token.INC, X: x}
|
||||
case token.DEC:
|
||||
op := p.advance()
|
||||
x := p.parseUnaryExpr()
|
||||
return &ast.UnaryExpr{OpPos: op.Pos, Op: token.DEC, X: x}
|
||||
case token.AT:
|
||||
op := p.advance()
|
||||
x := p.parseUnaryExpr()
|
||||
return &ast.RefExpr{AtPos: op.Pos, X: x}
|
||||
case token.ARROW_LEFT:
|
||||
// <- ch (channel receive as expression)
|
||||
pos := p.advance().Pos
|
||||
ch := p.parsePostfixExpr()
|
||||
return &ast.ChanRecvExpr{ArrowPos: pos, Chan: ch}
|
||||
case token.ASYNC_KW:
|
||||
// ASYNC expr — launch async, return future
|
||||
pos := p.advance().Pos
|
||||
call := p.parsePostfixExpr()
|
||||
return &ast.AsyncExpr{AsyncPos: pos, Call: call}
|
||||
case token.AWAIT_KW:
|
||||
// AWAIT future — wait for result
|
||||
pos := p.advance().Pos
|
||||
future := p.parsePostfixExpr()
|
||||
return &ast.AwaitExpr{AwaitPos: pos, Future: future}
|
||||
default:
|
||||
return p.parsePostfixExpr()
|
||||
}
|
||||
}
|
||||
|
||||
// parsePostfixExpr parses postfix operations: function calls, method sends,
|
||||
// array indexing, postfix ++/--, and alias-> access.
|
||||
func (p *Parser) parsePostfixExpr() ast.Expr {
|
||||
x := p.parsePrimaryExpr()
|
||||
|
||||
for {
|
||||
switch p.current.Kind {
|
||||
case token.LPAREN:
|
||||
// Function call: x(args...)
|
||||
lp := p.advance().Pos
|
||||
var args []ast.Expr
|
||||
if !p.at(token.RPAREN) {
|
||||
args = p.parseExprList()
|
||||
}
|
||||
rp := p.expect(token.RPAREN).Pos
|
||||
x = &ast.CallExpr{Func: x, LParen: lp, Args: args, RParen: rp}
|
||||
|
||||
case token.LBRACKET:
|
||||
// Array index: x[index], multi-dim x[i, j], or slice x[low:high]
|
||||
lb := p.advance().Pos
|
||||
|
||||
// Check for slice syntax: x[:high], x[low:high], x[low:]
|
||||
// Detect by scanning ahead for : before ]
|
||||
if p.isSliceSyntax() {
|
||||
var low, high ast.Expr
|
||||
if !p.at(token.COLON) {
|
||||
low = p.parseSliceIndex()
|
||||
}
|
||||
p.expect(token.COLON)
|
||||
if !p.at(token.RBRACKET) {
|
||||
high = p.parseSliceIndex()
|
||||
}
|
||||
rb := p.expect(token.RBRACKET).Pos
|
||||
x = &ast.SliceExpr{X: x, LBracket: lb, Low: low, High: high, RBracket: rb}
|
||||
continue
|
||||
}
|
||||
|
||||
// Normal array index
|
||||
index := p.parseExpr()
|
||||
rb := token.Position{}
|
||||
// Multi-dimensional: a[3, 2] → a[3][2]
|
||||
for p.match(token.COMMA) {
|
||||
rb = p.current.Pos
|
||||
x = &ast.IndexExpr{X: x, LBracket: lb, Index: index, RBracket: rb}
|
||||
index = p.parseExpr()
|
||||
lb = rb
|
||||
}
|
||||
rb = p.expect(token.RBRACKET).Pos
|
||||
x = &ast.IndexExpr{X: x, LBracket: lb, Index: index, RBracket: rb}
|
||||
|
||||
case token.QMARK:
|
||||
// Nil-safe send: x?:method or x?:method(args...)
|
||||
if p.peekAt(1) == token.COLON {
|
||||
p.advance() // consume ?
|
||||
qpos := p.advance().Pos // consume :
|
||||
methodName := p.expectMethodName().Literal
|
||||
var args []ast.Expr
|
||||
hasParens := false
|
||||
if p.at(token.LPAREN) {
|
||||
hasParens = true
|
||||
p.advance()
|
||||
if !p.at(token.RPAREN) {
|
||||
args = p.parseExprList()
|
||||
}
|
||||
p.expect(token.RPAREN)
|
||||
}
|
||||
x = &ast.NilSafeExpr{X: x, QPos: qpos, Method: methodName, Args: args, HasParens: hasParens}
|
||||
} else {
|
||||
return x // bare ? is QOut, not postfix
|
||||
}
|
||||
|
||||
case token.COLON:
|
||||
// Method send: x:method or x:method(args...)
|
||||
colonPos := p.advance().Pos
|
||||
var methodName string
|
||||
var macroMethod ast.Expr
|
||||
|
||||
if p.current.Kind == token.AMPERSAND {
|
||||
// x:¯o — dynamic method
|
||||
macroMethod = p.parseMacro()
|
||||
} else {
|
||||
// Accept keywords as method names (end, delete, home, etc.)
|
||||
methodName = p.expectMethodName().Literal
|
||||
}
|
||||
|
||||
// Check for call: x:method(args...)
|
||||
var args []ast.Expr
|
||||
var lp, rp token.Position
|
||||
hasParens := false
|
||||
if p.at(token.LPAREN) {
|
||||
hasParens = true
|
||||
lp = p.advance().Pos
|
||||
if !p.at(token.RPAREN) {
|
||||
args = p.parseExprList()
|
||||
}
|
||||
rp = p.expect(token.RPAREN).Pos
|
||||
}
|
||||
x = &ast.SendExpr{
|
||||
Object: x, ColonPos: colonPos,
|
||||
Method: methodName, MacroMethod: macroMethod,
|
||||
HasParens: hasParens,
|
||||
LParen: lp, Args: args, RParen: rp,
|
||||
}
|
||||
|
||||
case token.ARROW:
|
||||
// Alias access: x->field or (expr)->field
|
||||
arrowPos := p.advance().Pos
|
||||
field := p.parsePrimaryExpr()
|
||||
x = &ast.AliasExpr{Alias: x, ArrowPos: arrowPos, Field: field}
|
||||
|
||||
case token.INC:
|
||||
// Postfix increment: x++
|
||||
opPos := p.advance().Pos
|
||||
x = &ast.PostfixExpr{X: x, OpPos: opPos, Op: token.INC}
|
||||
|
||||
case token.DEC:
|
||||
// Postfix decrement: x--
|
||||
opPos := p.advance().Pos
|
||||
x = &ast.PostfixExpr{X: x, OpPos: opPos, Op: token.DEC}
|
||||
|
||||
case token.COLONCOLON:
|
||||
// ::name — Self access (consumed as postfix of implicit Self)
|
||||
// This shouldn't happen here normally; :: is handled in primary
|
||||
return x
|
||||
|
||||
case token.DOT:
|
||||
// Package member access: pkg.Func or obj.Field
|
||||
// Accept any token with literal (keywords like Index, Count, etc.)
|
||||
if p.peekLitAt(1) != "" {
|
||||
dotPos := p.advance().Pos // consume .
|
||||
member := p.advance() // consume member name
|
||||
x = &ast.DotExpr{X: x, DotPos: dotPos, Member: member.Literal}
|
||||
} else {
|
||||
return x
|
||||
}
|
||||
|
||||
default:
|
||||
return x
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// parsePrimaryExpr parses primary expressions (atoms).
|
||||
func (p *Parser) parsePrimaryExpr() ast.Expr {
|
||||
switch p.current.Kind {
|
||||
case token.INT, token.LONG, token.DOUBLE, token.STRING,
|
||||
token.DATE_LIT, token.TRUE, token.FALSE, token.NIL_LIT:
|
||||
tok := p.advance()
|
||||
return &ast.LiteralExpr{ValuePos: tok.Pos, Kind: tok.Kind, Value: tok.Literal}
|
||||
|
||||
case token.COLONCOLON:
|
||||
// ::name or ::name() or ::name(args)
|
||||
pos := p.advance().Pos
|
||||
if p.at(token.IDENT) || p.current.Literal != "" {
|
||||
name := p.advance()
|
||||
self := &ast.SelfExpr{ColonPos: pos}
|
||||
// Check for () — method call
|
||||
hasParens := false
|
||||
var args []ast.Expr
|
||||
var lp, rp token.Position
|
||||
if p.at(token.LPAREN) {
|
||||
hasParens = true
|
||||
lp = p.advance().Pos
|
||||
if !p.at(token.RPAREN) {
|
||||
args = p.parseExprList()
|
||||
}
|
||||
rp = p.expect(token.RPAREN).Pos
|
||||
}
|
||||
return &ast.SendExpr{
|
||||
Object: self, ColonPos: pos,
|
||||
Method: name.Literal,
|
||||
HasParens: hasParens,
|
||||
LParen: lp, Args: args, RParen: rp,
|
||||
}
|
||||
}
|
||||
return &ast.SelfExpr{ColonPos: pos}
|
||||
|
||||
case token.LPAREN:
|
||||
// Parenthesized expression, comma sequence (a,b,c), or (alias)->field
|
||||
p.advance()
|
||||
expr := p.parseExpr()
|
||||
// Comma sequence: (expr1, expr2, ...) → evaluates all, returns last
|
||||
for p.match(token.COMMA) {
|
||||
expr = p.parseExpr()
|
||||
}
|
||||
p.expect(token.RPAREN)
|
||||
return expr
|
||||
|
||||
case token.IF:
|
||||
// if(cond, true, false) — inline IF = IIF
|
||||
if p.peekAt(1) == token.LPAREN {
|
||||
return p.parseIIF()
|
||||
}
|
||||
// Otherwise fall through to error
|
||||
p.error("expected expression, got IF")
|
||||
tok := p.advance()
|
||||
return &ast.LiteralExpr{ValuePos: tok.Pos, Kind: token.NIL_LIT, Value: "NIL"}
|
||||
|
||||
case token.IDENT:
|
||||
// Check for IIF(cond, true, false)
|
||||
if strings.ToUpper(p.current.Literal) == "IIF" {
|
||||
return p.parseIIF()
|
||||
}
|
||||
// f"Hello {name}" — string interpolation
|
||||
if p.current.Literal == "f" && p.peekAt(1) == token.STRING {
|
||||
return p.parseInterpolatedString()
|
||||
}
|
||||
tok := p.advance()
|
||||
return &ast.IdentExpr{NamePos: tok.Pos, Name: tok.Literal}
|
||||
|
||||
case token.AMPERSAND:
|
||||
return p.parseMacro()
|
||||
|
||||
case token.COLON:
|
||||
// :field — WITH OBJECT send (bare colon prefix)
|
||||
// Treat as self-send: withObj:field
|
||||
pos := p.advance().Pos // consume :
|
||||
if p.at(token.IDENT) || p.current.Literal != "" {
|
||||
name := p.advance()
|
||||
return &ast.SendExpr{
|
||||
Object: &ast.IdentExpr{NamePos: pos, Name: "__withObject"},
|
||||
ColonPos: pos,
|
||||
Method: name.Literal,
|
||||
}
|
||||
}
|
||||
return &ast.IdentExpr{NamePos: pos, Name: "__withObject"}
|
||||
|
||||
case token.LBRACE:
|
||||
return p.parseArrayOrBlock()
|
||||
|
||||
default:
|
||||
// Keywords used as identifiers in expression context:
|
||||
// 1. Followed by ( → function call: Set(), Type(), Select()
|
||||
// 2. Keywords that can appear as variable/field names: TO, DATA, FIELD, ON, etc.
|
||||
if p.current.Literal != "" {
|
||||
if p.peekAt(1) == token.LPAREN {
|
||||
tok := p.advance()
|
||||
return &ast.IdentExpr{NamePos: tok.Pos, Name: tok.Literal}
|
||||
}
|
||||
// Allow certain keywords as bare identifiers in expression context
|
||||
switch p.current.Kind {
|
||||
case token.TO, token.DATA, token.FIELD, token.IN, token.FROM,
|
||||
token.WHILE, token.EACH, token.ENDDO, token.END, token.NEXT,
|
||||
token.RECOVER, token.SEQUENCE, token.GO, token.GOTO,
|
||||
token.MEMVAR, token.ALIAS, token.WITH, token.ON,
|
||||
token.STEP, token.DESCENDING, token.UNIQUE,
|
||||
token.DELETE_KW, token.RECALL, token.PACK, token.ZAP,
|
||||
token.TYPE_KW, token.CLASS, token.DECLARE, token.INLINE_KW,
|
||||
token.CASE, token.OTHERWISE, token.ENDCASE, token.BEGIN,
|
||||
token.DO, token.ENDIF, token.FOR, token.IF,
|
||||
token.SWITCH, token.RETURN, token.EXIT, token.LOOP,
|
||||
token.LOCAL, token.PRIVATE, token.PUBLIC,
|
||||
token.STATIC, token.PARAMETERS, token.DESTRUCTOR,
|
||||
token.CONSTRUCTOR, token.OPERATOR_KW,
|
||||
token.FUNCTION_KW, token.PROCEDURE, token.METHOD,
|
||||
token.ELSEIF, token.ELSE, token.ENDCLASS,
|
||||
token.USING, token.ASSIGN_KW, token.ACCESS,
|
||||
token.APPEND, token.REPLACE, token.INDEX,
|
||||
token.SEEK, token.SKIP_KW, token.USE,
|
||||
token.SELECT, token.SET:
|
||||
tok := p.advance()
|
||||
return &ast.IdentExpr{NamePos: tok.Pos, Name: tok.Literal}
|
||||
}
|
||||
}
|
||||
p.error("expected expression, got " + p.current.Kind.String() + " " + p.current.Literal)
|
||||
tok := p.advance()
|
||||
return &ast.LiteralExpr{ValuePos: tok.Pos, Kind: token.NIL_LIT, Value: "NIL"}
|
||||
}
|
||||
}
|
||||
|
||||
// parseArrayOrBlock parses { ... } which can be:
|
||||
// {1, 2, 3} → ArrayLitExpr
|
||||
// {"a" => 1} → HashLitExpr
|
||||
// {|x| x + 1} → BlockExpr
|
||||
// {|| expr} → BlockExpr (no params)
|
||||
func (p *Parser) parseArrayOrBlock() ast.Expr {
|
||||
lbrace := p.expect(token.LBRACE).Pos
|
||||
|
||||
// Code block: {|params| body}
|
||||
if p.at(token.PIPE) {
|
||||
p.advance() // consume first |
|
||||
var params []string
|
||||
if !p.at(token.PIPE) {
|
||||
// Parse parameter names, with optional AS type
|
||||
for {
|
||||
params = append(params, p.expectMethodName().Literal)
|
||||
// Skip optional AS type: AS NUMERIC, AS STRING, etc.
|
||||
if p.match(token.AS) {
|
||||
for p.current.Kind != token.PIPE && p.current.Kind != token.COMMA &&
|
||||
p.current.Kind != token.EOF {
|
||||
p.advance()
|
||||
}
|
||||
}
|
||||
if !p.match(token.COMMA) {
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
p.expect(token.PIPE) // closing |
|
||||
|
||||
// Parse block body — may have comma-separated expressions
|
||||
// {|x| expr1, expr2} → comma = sequence, returns last value
|
||||
body := p.parseExpr()
|
||||
for p.match(token.COMMA) {
|
||||
// Comma-separated: wrap as sequence, keep last
|
||||
body = p.parseExpr()
|
||||
}
|
||||
rbrace := p.expect(token.RBRACE).Pos
|
||||
|
||||
return &ast.BlockExpr{LBrace: lbrace, Params: params, Body: body, RBrace: rbrace}
|
||||
}
|
||||
|
||||
// Empty: {} → empty array
|
||||
if p.at(token.RBRACE) {
|
||||
rbrace := p.advance().Pos
|
||||
return &ast.ArrayLitExpr{LBrace: lbrace, RBrace: rbrace}
|
||||
}
|
||||
|
||||
// { ... } → variadic params array (HB_PARAM_ALL())
|
||||
if p.at(token.DOT) && p.peekAt(1) == token.DOT && p.peekAt(2) == token.DOT {
|
||||
p.advance() // .
|
||||
p.advance() // .
|
||||
p.advance() // .
|
||||
rbrace := p.expect(token.RBRACE).Pos
|
||||
return &ast.CallExpr{
|
||||
Func: &ast.IdentExpr{NamePos: lbrace, Name: "HB_AParams"},
|
||||
RParen: rbrace,
|
||||
}
|
||||
}
|
||||
|
||||
// Empty hash: {=>} → empty hash literal
|
||||
if p.at(token.DBLARROW) {
|
||||
p.advance() // consume =>
|
||||
rbrace := p.expect(token.RBRACE).Pos
|
||||
return &ast.HashLitExpr{LBrace: lbrace, RBrace: rbrace}
|
||||
}
|
||||
|
||||
// Handle leading comma: {, x, y} → {NIL, x, y}
|
||||
if p.at(token.COMMA) {
|
||||
var items []ast.Expr
|
||||
items = append(items, &ast.LiteralExpr{ValuePos: p.current.Pos, Kind: token.NIL_LIT, Value: "NIL"})
|
||||
for p.match(token.COMMA) {
|
||||
if p.at(token.RBRACE) || p.at(token.COMMA) {
|
||||
items = append(items, &ast.LiteralExpr{ValuePos: p.current.Pos, Kind: token.NIL_LIT, Value: "NIL"})
|
||||
} else {
|
||||
items = append(items, p.parseExpr())
|
||||
}
|
||||
}
|
||||
rbrace := p.expect(token.RBRACE).Pos
|
||||
return &ast.ArrayLitExpr{LBrace: lbrace, Items: items, RBrace: rbrace}
|
||||
}
|
||||
|
||||
// Parse first element to determine: array or hash
|
||||
first := p.parseExpr()
|
||||
|
||||
// Hash: { key => value, ... }
|
||||
if p.at(token.DBLARROW) {
|
||||
p.advance() // consume =>
|
||||
firstVal := p.parseExpr()
|
||||
keys := []ast.Expr{first}
|
||||
vals := []ast.Expr{firstVal}
|
||||
|
||||
for p.match(token.COMMA) {
|
||||
keys = append(keys, p.parseExpr())
|
||||
p.expect(token.DBLARROW)
|
||||
vals = append(vals, p.parseExpr())
|
||||
}
|
||||
|
||||
rbrace := p.expect(token.RBRACE).Pos
|
||||
return &ast.HashLitExpr{LBrace: lbrace, Keys: keys, Values: vals, RBrace: rbrace}
|
||||
}
|
||||
|
||||
// Array: {expr, expr, ...}
|
||||
items := []ast.Expr{first}
|
||||
for p.match(token.COMMA) {
|
||||
items = append(items, p.parseExpr())
|
||||
}
|
||||
rbrace := p.expect(token.RBRACE).Pos
|
||||
return &ast.ArrayLitExpr{LBrace: lbrace, Items: items, RBrace: rbrace}
|
||||
}
|
||||
|
||||
// parseMacro parses &variable or &(expression).
|
||||
func (p *Parser) parseMacro() ast.Expr {
|
||||
ampPos := p.expect(token.AMPERSAND).Pos
|
||||
|
||||
if p.at(token.LPAREN) {
|
||||
// &(expression)
|
||||
p.advance()
|
||||
expr := p.parseExpr()
|
||||
p.expect(token.RPAREN)
|
||||
return &ast.MacroExpr{AmpPos: ampPos, Expr: expr}
|
||||
}
|
||||
|
||||
// &variable[.suffix] — variable can be a keyword name
|
||||
ident := p.expectMethodName()
|
||||
macroExpr := &ast.MacroExpr{
|
||||
AmpPos: ampPos,
|
||||
Expr: &ast.IdentExpr{NamePos: ident.Pos, Name: ident.Literal},
|
||||
}
|
||||
// &var.suffix — dot terminates macro, suffix is text concatenation
|
||||
// &var. — dot terminates macro with no suffix
|
||||
// &var.1 — lexer may tokenize .1 as DOUBLE
|
||||
if p.at(token.DOT) {
|
||||
p.advance() // consume .
|
||||
// Skip optional suffix identifier (e.g. &a.aa, &a.1)
|
||||
if p.current.Kind == token.IDENT || p.current.Kind == token.INT {
|
||||
p.advance()
|
||||
}
|
||||
} else if p.current.Kind == token.DOUBLE &&
|
||||
(strings.HasPrefix(p.current.Literal, ".") || strings.HasPrefix(p.current.Literal, "0.")) {
|
||||
// Lexer tokenized .1 as DOUBLE — consume it as macro suffix
|
||||
p.advance()
|
||||
}
|
||||
return macroExpr
|
||||
}
|
||||
|
||||
// parseIIF parses IIF(cond, trueExpr, falseExpr).
|
||||
func (p *Parser) parseIIF() ast.Expr {
|
||||
pos := p.advance().Pos // consume IIF
|
||||
p.expect(token.LPAREN)
|
||||
cond := p.parseExpr()
|
||||
p.expect(token.COMMA)
|
||||
var trueExpr ast.Expr
|
||||
if p.at(token.COMMA) || p.at(token.RPAREN) {
|
||||
trueExpr = &ast.LiteralExpr{ValuePos: p.current.Pos, Kind: token.NIL_LIT, Value: "NIL"}
|
||||
} else {
|
||||
trueExpr = p.parseExpr()
|
||||
}
|
||||
p.expect(token.COMMA)
|
||||
var falseExpr ast.Expr
|
||||
if p.at(token.RPAREN) {
|
||||
falseExpr = &ast.LiteralExpr{ValuePos: p.current.Pos, Kind: token.NIL_LIT, Value: "NIL"}
|
||||
} else {
|
||||
falseExpr = p.parseExpr()
|
||||
}
|
||||
p.expect(token.RPAREN)
|
||||
return &ast.IIfExpr{IfPos: pos, Cond: cond, True: trueExpr, False: falseExpr}
|
||||
}
|
||||
|
||||
// parseExprList parses a comma-separated list of expressions.
|
||||
func (p *Parser) parseExprList() []ast.Expr {
|
||||
var list []ast.Expr
|
||||
// Handle leading empty param: f(,x) → NIL, x
|
||||
if p.at(token.COMMA) {
|
||||
list = append(list, &ast.LiteralExpr{ValuePos: p.current.Pos, Kind: token.NIL_LIT, Value: "NIL"})
|
||||
} else {
|
||||
list = append(list, p.parseExpr())
|
||||
}
|
||||
for p.match(token.COMMA) {
|
||||
// Empty param: f(x,,y) → x, NIL, y
|
||||
if p.at(token.COMMA) || p.at(token.RPAREN) || p.at(token.RBRACE) {
|
||||
list = append(list, &ast.LiteralExpr{ValuePos: p.current.Pos, Kind: token.NIL_LIT, Value: "NIL"})
|
||||
} else {
|
||||
list = append(list, p.parseExpr())
|
||||
}
|
||||
}
|
||||
return list
|
||||
}
|
||||
|
||||
// isSliceSyntax checks if current position inside [...] has a : before ].
|
||||
// Limited lookahead — scans at most 10 tokens (covers 99% of real cases).
|
||||
func (p *Parser) isSliceSyntax() bool {
|
||||
depth := 0
|
||||
maxLook := 10 // limit scan to avoid O(n)
|
||||
for i := 0; i < maxLook; i++ {
|
||||
k := p.peekAt(i)
|
||||
switch k {
|
||||
case token.COLON:
|
||||
if depth == 0 {
|
||||
return true
|
||||
}
|
||||
case token.LBRACKET, token.LPAREN, token.LBRACE:
|
||||
depth++
|
||||
case token.RPAREN, token.RBRACE:
|
||||
depth--
|
||||
case token.RBRACKET:
|
||||
if depth == 0 {
|
||||
return false
|
||||
}
|
||||
depth--
|
||||
case token.NEWLINE, token.EOF:
|
||||
return false
|
||||
}
|
||||
}
|
||||
return false // too complex — treat as normal index
|
||||
}
|
||||
|
||||
// parseSliceIndex parses expression inside slice but stops at : and ]
|
||||
func (p *Parser) parseSliceIndex() ast.Expr {
|
||||
return p.parsePrimaryExpr() // simple: just primary (number, ident, call)
|
||||
}
|
||||
|
||||
// parseInterpolatedString: f"Hello {name}, age {age}"
|
||||
// Parses the format string and extracts {expr} references.
|
||||
// Converts to: fmt.Sprintf("Hello %v, age %v", name, age)
|
||||
// --- Extracted helpers for expression registry ---
|
||||
|
||||
// parsePostfixSend: x:method or x:method(args...)
|
||||
func (p *Parser) parsePostfixSend(x ast.Expr) ast.Expr {
|
||||
colonPos := p.advance().Pos
|
||||
var methodName string
|
||||
var macroMethod ast.Expr
|
||||
|
||||
if p.current.Kind == token.AMPERSAND {
|
||||
macroMethod = p.parseMacro()
|
||||
} else {
|
||||
methodName = p.expectMethodName().Literal
|
||||
}
|
||||
|
||||
var args []ast.Expr
|
||||
var lp, rp token.Position
|
||||
hasParens := false
|
||||
if p.at(token.LPAREN) {
|
||||
hasParens = true
|
||||
lp = p.advance().Pos
|
||||
if !p.at(token.RPAREN) {
|
||||
args = p.parseExprList()
|
||||
}
|
||||
rp = p.expect(token.RPAREN).Pos
|
||||
}
|
||||
return &ast.SendExpr{
|
||||
Object: x, ColonPos: colonPos,
|
||||
Method: methodName, MacroMethod: macroMethod,
|
||||
HasParens: hasParens,
|
||||
LParen: lp, Args: args, RParen: rp,
|
||||
}
|
||||
}
|
||||
|
||||
// parsePrimaryIdent: IDENT (variable, function ref, IIF, f-string)
|
||||
func (p *Parser) parsePrimaryIdent() ast.Expr {
|
||||
if strings.ToUpper(p.current.Literal) == "IIF" {
|
||||
return p.parseIIF()
|
||||
}
|
||||
if p.current.Literal == "f" && p.peekAt(1) == token.STRING {
|
||||
return p.parseInterpolatedString()
|
||||
}
|
||||
tok := p.advance()
|
||||
return &ast.IdentExpr{NamePos: tok.Pos, Name: tok.Literal}
|
||||
}
|
||||
|
||||
// parsePrimaryWithSend: :field (WITH OBJECT bare colon)
|
||||
func (p *Parser) parsePrimaryWithSend() ast.Expr {
|
||||
pos := p.advance().Pos
|
||||
if p.at(token.IDENT) || p.current.Literal != "" {
|
||||
name := p.advance()
|
||||
return &ast.SendExpr{
|
||||
Object: &ast.IdentExpr{NamePos: pos, Name: "__withObject"},
|
||||
ColonPos: pos,
|
||||
Method: name.Literal,
|
||||
}
|
||||
}
|
||||
return &ast.IdentExpr{NamePos: pos, Name: "__withObject"}
|
||||
}
|
||||
|
||||
// parsePrimarySelf: ::name or ::name(args)
|
||||
func (p *Parser) parsePrimarySelf() ast.Expr {
|
||||
pos := p.advance().Pos
|
||||
if p.at(token.IDENT) || p.current.Literal != "" {
|
||||
name := p.advance()
|
||||
self := &ast.SelfExpr{ColonPos: pos}
|
||||
hasParens := false
|
||||
var args []ast.Expr
|
||||
var lp, rp token.Position
|
||||
if p.at(token.LPAREN) {
|
||||
hasParens = true
|
||||
lp = p.advance().Pos
|
||||
if !p.at(token.RPAREN) {
|
||||
args = p.parseExprList()
|
||||
}
|
||||
rp = p.expect(token.RPAREN).Pos
|
||||
}
|
||||
return &ast.SendExpr{
|
||||
Object: self, ColonPos: pos, Method: name.Literal,
|
||||
HasParens: hasParens, LParen: lp, Args: args, RParen: rp,
|
||||
}
|
||||
}
|
||||
return &ast.SelfExpr{ColonPos: pos}
|
||||
}
|
||||
|
||||
func (p *Parser) parseInterpolatedString() ast.Expr {
|
||||
fPos := p.advance().Pos // consume 'f'
|
||||
strTok := p.expect(token.STRING)
|
||||
src := strTok.Literal
|
||||
|
||||
var parts []ast.Expr
|
||||
var fmtBuf string
|
||||
var args []ast.Expr
|
||||
|
||||
i := 0
|
||||
for i < len(src) {
|
||||
if src[i] == '{' {
|
||||
// Find closing }
|
||||
j := i + 1
|
||||
depth := 1
|
||||
for j < len(src) && depth > 0 {
|
||||
if src[j] == '{' { depth++ }
|
||||
if src[j] == '}' { depth-- }
|
||||
j++
|
||||
}
|
||||
exprStr := src[i+1 : j-1]
|
||||
|
||||
// Check for format spec: {expr:fmt}
|
||||
fmtSpec := "%v"
|
||||
if colonIdx := strings.LastIndex(exprStr, ":"); colonIdx >= 0 {
|
||||
fmtSpec = "%" + exprStr[colonIdx+1:]
|
||||
exprStr = exprStr[:colonIdx]
|
||||
}
|
||||
fmtBuf += fmtSpec
|
||||
|
||||
// Parse the expression inside {}
|
||||
// Simple: just use IdentExpr for variable names
|
||||
args = append(args, &ast.IdentExpr{NamePos: fPos, Name: exprStr})
|
||||
i = j
|
||||
} else {
|
||||
fmtBuf += string(src[i])
|
||||
i++
|
||||
}
|
||||
}
|
||||
|
||||
if len(args) == 0 {
|
||||
// No interpolation — return as plain string
|
||||
return &ast.LiteralExpr{ValuePos: fPos, Kind: token.STRING, Value: src}
|
||||
}
|
||||
|
||||
// Build: fmt.Sprintf(fmtStr, arg1, arg2, ...)
|
||||
_ = parts // not used in Sprintf approach
|
||||
allArgs := make([]ast.Expr, 0, len(args)+1)
|
||||
allArgs = append(allArgs, &ast.LiteralExpr{ValuePos: fPos, Kind: token.STRING, Value: fmtBuf})
|
||||
allArgs = append(allArgs, args...)
|
||||
|
||||
return &ast.CallExpr{
|
||||
Func: &ast.DotExpr{
|
||||
X: &ast.IdentExpr{NamePos: fPos, Name: "fmt"},
|
||||
DotPos: fPos,
|
||||
Member: "Sprintf",
|
||||
},
|
||||
LParen: fPos,
|
||||
Args: allArgs,
|
||||
RParen: fPos,
|
||||
}
|
||||
}
|
||||
258
compiler/parser/exprreg.go
Normal file
258
compiler/parser/exprreg.go
Normal file
@@ -0,0 +1,258 @@
|
||||
// Copyright (c) 2026 Charles KWON OhJun (charleskwonohjun@gmail.com)
|
||||
// All rights reserved.
|
||||
|
||||
// exprreg.go — Expression parser registries for Pratt parser.
|
||||
//
|
||||
// Three registries:
|
||||
// prefixParsers — unary prefix: -, !, ++, --, <-, ASYNC, AWAIT
|
||||
// postfixParsers — postfix: (), [], :, ., ?:, ++, --, ->
|
||||
// primaryParsers — atoms: INT, STRING, IDENT, (, {, ::
|
||||
//
|
||||
// Adding a new operator = one line in init().
|
||||
|
||||
package parser
|
||||
|
||||
import (
|
||||
"five/compiler/ast"
|
||||
"five/compiler/token"
|
||||
)
|
||||
|
||||
// PrefixParser parses a prefix unary expression.
|
||||
type PrefixParser func(p *Parser) ast.Expr
|
||||
|
||||
// PostfixParser parses a postfix expression given the left-hand side.
|
||||
type PostfixParser func(p *Parser, x ast.Expr) ast.Expr
|
||||
|
||||
// PrimaryParser parses an atomic/primary expression.
|
||||
type PrimaryParser func(p *Parser) ast.Expr
|
||||
|
||||
var (
|
||||
prefixParsers map[token.Kind]PrefixParser
|
||||
postfixParsers map[token.Kind]PostfixParser
|
||||
primaryParsers map[token.Kind]PrimaryParser
|
||||
)
|
||||
|
||||
func init() {
|
||||
prefixParsers = map[token.Kind]PrefixParser{
|
||||
token.MINUS: prefixUnary(token.MINUS),
|
||||
token.PLUS: prefixPlus,
|
||||
token.NOT: prefixUnary(token.NOT),
|
||||
token.INC: prefixUnary(token.INC),
|
||||
token.DEC: prefixUnary(token.DEC),
|
||||
token.ARROW_LEFT: prefixChanRecv,
|
||||
token.ASYNC_KW: prefixAsync,
|
||||
token.AWAIT_KW: prefixAwait,
|
||||
token.AT: prefixRef,
|
||||
}
|
||||
|
||||
postfixParsers = map[token.Kind]PostfixParser{
|
||||
token.LPAREN: postfixCall,
|
||||
token.LBRACKET: postfixIndex,
|
||||
token.COLON: postfixSend,
|
||||
token.QMARK: postfixNilSafe,
|
||||
token.DOT: postfixDot,
|
||||
token.ARROW: postfixAlias,
|
||||
token.INC: postfixIncDec(token.INC),
|
||||
token.DEC: postfixIncDec(token.DEC),
|
||||
token.COLONCOLON: postfixSelfStop,
|
||||
}
|
||||
|
||||
primaryParsers = map[token.Kind]PrimaryParser{
|
||||
token.INT: primaryLiteral,
|
||||
token.LONG: primaryLiteral,
|
||||
token.DOUBLE: primaryLiteral,
|
||||
token.STRING: primaryLiteral,
|
||||
token.DATE_LIT: primaryLiteral,
|
||||
token.TRUE: primaryLiteral,
|
||||
token.FALSE: primaryLiteral,
|
||||
token.NIL_LIT: primaryLiteral,
|
||||
|
||||
token.COLONCOLON: primarySelf,
|
||||
token.LPAREN: primaryParen,
|
||||
token.IF: primaryIf,
|
||||
token.IDENT: primaryIdent,
|
||||
token.AMPERSAND: primaryMacro,
|
||||
token.COLON: primaryWithSend,
|
||||
token.LBRACE: primaryArrayOrBlock,
|
||||
}
|
||||
}
|
||||
|
||||
// --- Prefix parsers ---
|
||||
|
||||
func prefixUnary(op token.Kind) PrefixParser {
|
||||
return func(p *Parser) ast.Expr {
|
||||
tok := p.advance()
|
||||
x := p.parseUnaryExpr()
|
||||
return &ast.UnaryExpr{OpPos: tok.Pos, Op: op, X: x}
|
||||
}
|
||||
}
|
||||
|
||||
func prefixPlus(p *Parser) ast.Expr {
|
||||
p.advance() // unary plus — no-op
|
||||
return p.parseUnaryExpr()
|
||||
}
|
||||
|
||||
func prefixChanRecv(p *Parser) ast.Expr {
|
||||
pos := p.advance().Pos
|
||||
ch := p.parsePostfixExpr()
|
||||
return &ast.ChanRecvExpr{ArrowPos: pos, Chan: ch}
|
||||
}
|
||||
|
||||
func prefixAsync(p *Parser) ast.Expr {
|
||||
pos := p.advance().Pos
|
||||
call := p.parsePostfixExpr()
|
||||
return &ast.AsyncExpr{AsyncPos: pos, Call: call}
|
||||
}
|
||||
|
||||
func prefixAwait(p *Parser) ast.Expr {
|
||||
pos := p.advance().Pos
|
||||
future := p.parsePostfixExpr()
|
||||
return &ast.AwaitExpr{AwaitPos: pos, Future: future}
|
||||
}
|
||||
|
||||
func prefixRef(p *Parser) ast.Expr {
|
||||
op := p.advance()
|
||||
x := p.parseUnaryExpr()
|
||||
return &ast.RefExpr{AtPos: op.Pos, X: x}
|
||||
}
|
||||
|
||||
// --- Postfix parsers ---
|
||||
|
||||
func postfixCall(p *Parser, x ast.Expr) ast.Expr {
|
||||
lp := p.advance().Pos
|
||||
var args []ast.Expr
|
||||
if !p.at(token.RPAREN) {
|
||||
args = p.parseExprList()
|
||||
}
|
||||
rp := p.expect(token.RPAREN).Pos
|
||||
return &ast.CallExpr{Func: x, LParen: lp, Args: args, RParen: rp}
|
||||
}
|
||||
|
||||
func postfixIndex(p *Parser, x ast.Expr) ast.Expr {
|
||||
lb := p.advance().Pos
|
||||
|
||||
// Slice syntax detection
|
||||
if p.isSliceSyntax() {
|
||||
var low, high ast.Expr
|
||||
if !p.at(token.COLON) {
|
||||
low = p.parseSliceIndex()
|
||||
}
|
||||
p.expect(token.COLON)
|
||||
if !p.at(token.RBRACKET) {
|
||||
high = p.parseSliceIndex()
|
||||
}
|
||||
rb := p.expect(token.RBRACKET).Pos
|
||||
return &ast.SliceExpr{X: x, LBracket: lb, Low: low, High: high, RBracket: rb}
|
||||
}
|
||||
|
||||
// Normal array index
|
||||
index := p.parseExpr()
|
||||
rb := token.Position{}
|
||||
for p.match(token.COMMA) {
|
||||
rb = p.current.Pos
|
||||
x = &ast.IndexExpr{X: x, LBracket: lb, Index: index, RBracket: rb}
|
||||
index = p.parseExpr()
|
||||
lb = rb
|
||||
}
|
||||
rb = p.expect(token.RBRACKET).Pos
|
||||
return &ast.IndexExpr{X: x, LBracket: lb, Index: index, RBracket: rb}
|
||||
}
|
||||
|
||||
func postfixDot(p *Parser, x ast.Expr) ast.Expr {
|
||||
if p.peekLitAt(1) != "" {
|
||||
dotPos := p.advance().Pos
|
||||
member := p.advance()
|
||||
return &ast.DotExpr{X: x, DotPos: dotPos, Member: member.Literal}
|
||||
}
|
||||
return nil // signal: stop postfix loop
|
||||
}
|
||||
|
||||
func postfixIncDec(op token.Kind) PostfixParser {
|
||||
return func(p *Parser, x ast.Expr) ast.Expr {
|
||||
opPos := p.advance().Pos
|
||||
return &ast.PostfixExpr{X: x, OpPos: opPos, Op: op}
|
||||
}
|
||||
}
|
||||
|
||||
func postfixSelfStop(p *Parser, x ast.Expr) ast.Expr {
|
||||
return nil // :: after expression — stop
|
||||
}
|
||||
|
||||
// postfixNilSafe and postfixSend/postfixAlias are complex — kept in expr.go
|
||||
// They call back into the main parser methods.
|
||||
|
||||
func postfixNilSafe(p *Parser, x ast.Expr) ast.Expr {
|
||||
if p.peekAt(1) != token.COLON {
|
||||
return nil // bare ? = QOut, not postfix
|
||||
}
|
||||
p.advance() // consume ?
|
||||
qpos := p.advance().Pos // consume :
|
||||
methodName := p.expectMethodName().Literal
|
||||
var args []ast.Expr
|
||||
hasParens := false
|
||||
if p.at(token.LPAREN) {
|
||||
hasParens = true
|
||||
p.advance()
|
||||
if !p.at(token.RPAREN) {
|
||||
args = p.parseExprList()
|
||||
}
|
||||
p.expect(token.RPAREN)
|
||||
}
|
||||
return &ast.NilSafeExpr{X: x, QPos: qpos, Method: methodName, Args: args, HasParens: hasParens}
|
||||
}
|
||||
|
||||
func postfixAlias(p *Parser, x ast.Expr) ast.Expr {
|
||||
arrowPos := p.advance().Pos
|
||||
field := p.parsePrimaryExpr()
|
||||
return &ast.AliasExpr{Alias: x, ArrowPos: arrowPos, Field: field}
|
||||
}
|
||||
|
||||
func postfixSend(p *Parser, x ast.Expr) ast.Expr {
|
||||
return p.parsePostfixSend(x)
|
||||
}
|
||||
|
||||
// --- Primary parsers ---
|
||||
|
||||
func primaryLiteral(p *Parser) ast.Expr {
|
||||
tok := p.advance()
|
||||
return &ast.LiteralExpr{ValuePos: tok.Pos, Kind: tok.Kind, Value: tok.Literal}
|
||||
}
|
||||
|
||||
func primaryParen(p *Parser) ast.Expr {
|
||||
p.advance()
|
||||
expr := p.parseExpr()
|
||||
for p.match(token.COMMA) {
|
||||
expr = p.parseExpr()
|
||||
}
|
||||
p.expect(token.RPAREN)
|
||||
return expr
|
||||
}
|
||||
|
||||
func primaryIf(p *Parser) ast.Expr {
|
||||
if p.peekAt(1) == token.LPAREN {
|
||||
return p.parseIIF()
|
||||
}
|
||||
p.error("expected expression, got IF")
|
||||
tok := p.advance()
|
||||
return &ast.LiteralExpr{ValuePos: tok.Pos, Kind: token.NIL_LIT, Value: "NIL"}
|
||||
}
|
||||
|
||||
func primaryIdent(p *Parser) ast.Expr {
|
||||
return p.parsePrimaryIdent()
|
||||
}
|
||||
|
||||
func primaryMacro(p *Parser) ast.Expr {
|
||||
return p.parseMacro()
|
||||
}
|
||||
|
||||
func primaryWithSend(p *Parser) ast.Expr {
|
||||
return p.parsePrimaryWithSend()
|
||||
}
|
||||
|
||||
func primaryArrayOrBlock(p *Parser) ast.Expr {
|
||||
return p.parseArrayOrBlock()
|
||||
}
|
||||
|
||||
func primarySelf(p *Parser) ast.Expr {
|
||||
return p.parsePrimarySelf()
|
||||
}
|
||||
2162
compiler/parser/parser.go
Normal file
2162
compiler/parser/parser.go
Normal file
File diff suppressed because it is too large
Load Diff
427
compiler/parser/parser_test.go
Normal file
427
compiler/parser/parser_test.go
Normal file
@@ -0,0 +1,427 @@
|
||||
// Copyright (c) 2026 Charles KWON OhJun (charleskwonohjun@gmail.com)
|
||||
// All rights reserved.
|
||||
|
||||
package parser
|
||||
|
||||
import (
|
||||
"five/compiler/ast"
|
||||
"five/compiler/token"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func parseOK(t *testing.T, source string) *ast.File {
|
||||
t.Helper()
|
||||
file, errs := Parse("test.prg", source)
|
||||
if len(errs) > 0 {
|
||||
for _, e := range errs {
|
||||
t.Errorf("parse error: %s", e)
|
||||
}
|
||||
t.FailNow()
|
||||
}
|
||||
return file
|
||||
}
|
||||
|
||||
// --- Function declaration ---
|
||||
|
||||
func TestParseSimpleFunction(t *testing.T) {
|
||||
file := parseOK(t, `FUNCTION Main()
|
||||
RETURN NIL
|
||||
`)
|
||||
if len(file.Decls) != 1 {
|
||||
t.Fatalf("expected 1 decl, got %d", len(file.Decls))
|
||||
}
|
||||
fn, ok := file.Decls[0].(*ast.FuncDecl)
|
||||
if !ok {
|
||||
t.Fatalf("expected FuncDecl, got %T", file.Decls[0])
|
||||
}
|
||||
if fn.Name != "Main" {
|
||||
t.Errorf("name = %q, want %q", fn.Name, "Main")
|
||||
}
|
||||
if fn.IsProc {
|
||||
t.Error("should not be PROCEDURE")
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseFunctionWithLocals(t *testing.T) {
|
||||
file := parseOK(t, `FUNCTION Foo(a, b)
|
||||
LOCAL n := 10
|
||||
LOCAL cName := "hello", x
|
||||
RETURN n
|
||||
`)
|
||||
fn := file.Decls[0].(*ast.FuncDecl)
|
||||
if len(fn.Params) != 2 {
|
||||
t.Errorf("params = %d, want 2", len(fn.Params))
|
||||
}
|
||||
if len(fn.Decls) != 2 {
|
||||
t.Errorf("decls = %d, want 2 (two LOCAL statements)", len(fn.Decls))
|
||||
}
|
||||
// Check second LOCAL has 2 vars
|
||||
vd := fn.Decls[1].(*ast.VarDecl)
|
||||
if len(vd.Vars) != 2 {
|
||||
t.Errorf("second LOCAL vars = %d, want 2", len(vd.Vars))
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseProcedure(t *testing.T) {
|
||||
file := parseOK(t, `PROCEDURE DoStuff()
|
||||
RETURN
|
||||
`)
|
||||
fn := file.Decls[0].(*ast.FuncDecl)
|
||||
if !fn.IsProc {
|
||||
t.Error("should be PROCEDURE")
|
||||
}
|
||||
}
|
||||
|
||||
// --- Expressions ---
|
||||
|
||||
func TestParseArithmetic(t *testing.T) {
|
||||
file := parseOK(t, `FUNCTION Main()
|
||||
RETURN 1 + 2 * 3
|
||||
`)
|
||||
fn := file.Decls[0].(*ast.FuncDecl)
|
||||
ret := fn.Body[0].(*ast.ReturnStmt)
|
||||
// Should be: 1 + (2 * 3) due to precedence
|
||||
bin, ok := ret.Value.(*ast.BinaryExpr)
|
||||
if !ok {
|
||||
t.Fatalf("expected BinaryExpr, got %T", ret.Value)
|
||||
}
|
||||
if bin.Op != token.PLUS {
|
||||
t.Errorf("top op = %v, want PLUS", bin.Op)
|
||||
}
|
||||
// Right side should be 2 * 3
|
||||
right, ok := bin.Right.(*ast.BinaryExpr)
|
||||
if !ok {
|
||||
t.Fatalf("right should be BinaryExpr, got %T", bin.Right)
|
||||
}
|
||||
if right.Op != token.STAR {
|
||||
t.Errorf("right op = %v, want STAR", right.Op)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseAssignment(t *testing.T) {
|
||||
file := parseOK(t, `FUNCTION Main()
|
||||
LOCAL n
|
||||
n := 10
|
||||
RETURN n
|
||||
`)
|
||||
fn := file.Decls[0].(*ast.FuncDecl)
|
||||
// Body[0] should be assignment: n := 10
|
||||
es := fn.Body[0].(*ast.ExprStmt)
|
||||
assign, ok := es.X.(*ast.AssignExpr)
|
||||
if !ok {
|
||||
t.Fatalf("expected AssignExpr, got %T", es.X)
|
||||
}
|
||||
if assign.Op != token.ASSIGN {
|
||||
t.Errorf("assign op = %v, want ASSIGN", assign.Op)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseFunctionCall(t *testing.T) {
|
||||
file := parseOK(t, `FUNCTION Main()
|
||||
RETURN Str(42)
|
||||
`)
|
||||
fn := file.Decls[0].(*ast.FuncDecl)
|
||||
ret := fn.Body[0].(*ast.ReturnStmt)
|
||||
call, ok := ret.Value.(*ast.CallExpr)
|
||||
if !ok {
|
||||
t.Fatalf("expected CallExpr, got %T", ret.Value)
|
||||
}
|
||||
ident := call.Func.(*ast.IdentExpr)
|
||||
if ident.Name != "Str" {
|
||||
t.Errorf("func name = %q, want Str", ident.Name)
|
||||
}
|
||||
if len(call.Args) != 1 {
|
||||
t.Errorf("args = %d, want 1", len(call.Args))
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseStringConcat(t *testing.T) {
|
||||
file := parseOK(t, `FUNCTION Main()
|
||||
RETURN "Hello, " + "World!"
|
||||
`)
|
||||
fn := file.Decls[0].(*ast.FuncDecl)
|
||||
ret := fn.Body[0].(*ast.ReturnStmt)
|
||||
bin := ret.Value.(*ast.BinaryExpr)
|
||||
if bin.Op != token.PLUS {
|
||||
t.Errorf("op = %v, want PLUS", bin.Op)
|
||||
}
|
||||
}
|
||||
|
||||
// --- Control flow ---
|
||||
|
||||
func TestParseIfElse(t *testing.T) {
|
||||
file := parseOK(t, `FUNCTION Main()
|
||||
LOCAL n := 10
|
||||
IF n > 5
|
||||
RETURN .T.
|
||||
ELSE
|
||||
RETURN .F.
|
||||
ENDIF
|
||||
`)
|
||||
fn := file.Decls[0].(*ast.FuncDecl)
|
||||
ifStmt, ok := fn.Body[0].(*ast.IfStmt)
|
||||
if !ok {
|
||||
t.Fatalf("expected IfStmt, got %T", fn.Body[0])
|
||||
}
|
||||
if len(ifStmt.Body) != 1 {
|
||||
t.Errorf("if body = %d stmts", len(ifStmt.Body))
|
||||
}
|
||||
if len(ifStmt.ElseBody) != 1 {
|
||||
t.Errorf("else body = %d stmts", len(ifStmt.ElseBody))
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseIfElseIf(t *testing.T) {
|
||||
file := parseOK(t, `FUNCTION Main()
|
||||
LOCAL n := 10
|
||||
IF n > 10
|
||||
RETURN 1
|
||||
ELSEIF n > 5
|
||||
RETURN 2
|
||||
ELSEIF n > 0
|
||||
RETURN 3
|
||||
ELSE
|
||||
RETURN 0
|
||||
ENDIF
|
||||
`)
|
||||
fn := file.Decls[0].(*ast.FuncDecl)
|
||||
ifStmt := fn.Body[0].(*ast.IfStmt)
|
||||
if len(ifStmt.ElseIfs) != 2 {
|
||||
t.Errorf("elseifs = %d, want 2", len(ifStmt.ElseIfs))
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseDoWhile(t *testing.T) {
|
||||
file := parseOK(t, `FUNCTION Main()
|
||||
LOCAL i := 0
|
||||
DO WHILE i < 10
|
||||
i++
|
||||
ENDDO
|
||||
RETURN i
|
||||
`)
|
||||
fn := file.Decls[0].(*ast.FuncDecl)
|
||||
dw, ok := fn.Body[0].(*ast.DoWhileStmt)
|
||||
if !ok {
|
||||
t.Fatalf("expected DoWhileStmt, got %T", fn.Body[0])
|
||||
}
|
||||
if len(dw.Body) != 1 {
|
||||
t.Errorf("body = %d stmts", len(dw.Body))
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseForNext(t *testing.T) {
|
||||
file := parseOK(t, `FUNCTION Main()
|
||||
LOCAL i
|
||||
FOR i := 1 TO 10
|
||||
? i
|
||||
NEXT
|
||||
RETURN NIL
|
||||
`)
|
||||
fn := file.Decls[0].(*ast.FuncDecl)
|
||||
forStmt, ok := fn.Body[0].(*ast.ForStmt)
|
||||
if !ok {
|
||||
t.Fatalf("expected ForStmt, got %T", fn.Body[0])
|
||||
}
|
||||
if forStmt.Var != "i" {
|
||||
t.Errorf("var = %q, want i", forStmt.Var)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseForEach(t *testing.T) {
|
||||
file := parseOK(t, `FUNCTION Main()
|
||||
LOCAL x
|
||||
FOR EACH x IN {1, 2, 3}
|
||||
? x
|
||||
NEXT
|
||||
RETURN NIL
|
||||
`)
|
||||
fn := file.Decls[0].(*ast.FuncDecl)
|
||||
fe, ok := fn.Body[0].(*ast.ForEachStmt)
|
||||
if !ok {
|
||||
t.Fatalf("expected ForEachStmt, got %T", fn.Body[0])
|
||||
}
|
||||
if fe.Var != "x" {
|
||||
t.Errorf("var = %q, want x", fe.Var)
|
||||
}
|
||||
}
|
||||
|
||||
// --- QOut ---
|
||||
|
||||
func TestParseQOut(t *testing.T) {
|
||||
file := parseOK(t, `FUNCTION Main()
|
||||
? "Hello"
|
||||
? 1 + 2, "World"
|
||||
RETURN NIL
|
||||
`)
|
||||
fn := file.Decls[0].(*ast.FuncDecl)
|
||||
q1, ok := fn.Body[0].(*ast.QOutStmt)
|
||||
if !ok {
|
||||
t.Fatalf("expected QOutStmt, got %T", fn.Body[0])
|
||||
}
|
||||
if len(q1.Exprs) != 1 {
|
||||
t.Errorf("? args = %d, want 1", len(q1.Exprs))
|
||||
}
|
||||
q2 := fn.Body[1].(*ast.QOutStmt)
|
||||
if len(q2.Exprs) != 2 {
|
||||
t.Errorf("? args = %d, want 2", len(q2.Exprs))
|
||||
}
|
||||
}
|
||||
|
||||
// --- xBase commands ---
|
||||
|
||||
func TestParseUse(t *testing.T) {
|
||||
file := parseOK(t, `FUNCTION Main()
|
||||
USE "customers" VIA DBFCDX ALIAS cust
|
||||
RETURN NIL
|
||||
`)
|
||||
fn := file.Decls[0].(*ast.FuncDecl)
|
||||
use, ok := fn.Body[0].(*ast.UseCmd)
|
||||
if !ok {
|
||||
t.Fatalf("expected UseCmd, got %T", fn.Body[0])
|
||||
}
|
||||
if use.Via != "DBFCDX" {
|
||||
t.Errorf("via = %q, want DBFCDX", use.Via)
|
||||
}
|
||||
if use.Alias != "cust" {
|
||||
t.Errorf("alias = %q, want cust", use.Alias)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseGoTop(t *testing.T) {
|
||||
file := parseOK(t, `FUNCTION Main()
|
||||
GO TOP
|
||||
RETURN NIL
|
||||
`)
|
||||
fn := file.Decls[0].(*ast.FuncDecl)
|
||||
goCmd, ok := fn.Body[0].(*ast.GoCmd)
|
||||
if !ok {
|
||||
t.Fatalf("expected GoCmd, got %T", fn.Body[0])
|
||||
}
|
||||
if goCmd.Direction != "TOP" {
|
||||
t.Errorf("direction = %q, want TOP", goCmd.Direction)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseSeek(t *testing.T) {
|
||||
file := parseOK(t, `FUNCTION Main()
|
||||
SEEK "SMITH"
|
||||
RETURN NIL
|
||||
`)
|
||||
fn := file.Decls[0].(*ast.FuncDecl)
|
||||
seek, ok := fn.Body[0].(*ast.SeekCmd)
|
||||
if !ok {
|
||||
t.Fatalf("expected SeekCmd, got %T", fn.Body[0])
|
||||
}
|
||||
lit := seek.Key.(*ast.LiteralExpr)
|
||||
if lit.Value != "SMITH" {
|
||||
t.Errorf("key = %q, want SMITH", lit.Value)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseReplace(t *testing.T) {
|
||||
file := parseOK(t, `FUNCTION Main()
|
||||
REPLACE name WITH "Kim", salary WITH 50000
|
||||
RETURN NIL
|
||||
`)
|
||||
fn := file.Decls[0].(*ast.FuncDecl)
|
||||
rep, ok := fn.Body[0].(*ast.ReplaceCmd)
|
||||
if !ok {
|
||||
t.Fatalf("expected ReplaceCmd, got %T", fn.Body[0])
|
||||
}
|
||||
if len(rep.Fields) != 2 {
|
||||
t.Errorf("fields = %d, want 2", len(rep.Fields))
|
||||
}
|
||||
}
|
||||
|
||||
// --- Array and Hash literals ---
|
||||
|
||||
func TestParseArrayLiteral(t *testing.T) {
|
||||
file := parseOK(t, `FUNCTION Main()
|
||||
RETURN {1, 2, 3}
|
||||
`)
|
||||
fn := file.Decls[0].(*ast.FuncDecl)
|
||||
ret := fn.Body[0].(*ast.ReturnStmt)
|
||||
arr, ok := ret.Value.(*ast.ArrayLitExpr)
|
||||
if !ok {
|
||||
t.Fatalf("expected ArrayLitExpr, got %T", ret.Value)
|
||||
}
|
||||
if len(arr.Items) != 3 {
|
||||
t.Errorf("items = %d, want 3", len(arr.Items))
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseHashLiteral(t *testing.T) {
|
||||
file := parseOK(t, `FUNCTION Main()
|
||||
RETURN {"a" => 1, "b" => 2}
|
||||
`)
|
||||
fn := file.Decls[0].(*ast.FuncDecl)
|
||||
ret := fn.Body[0].(*ast.ReturnStmt)
|
||||
hash, ok := ret.Value.(*ast.HashLitExpr)
|
||||
if !ok {
|
||||
t.Fatalf("expected HashLitExpr, got %T", ret.Value)
|
||||
}
|
||||
if len(hash.Keys) != 2 {
|
||||
t.Errorf("keys = %d, want 2", len(hash.Keys))
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseCodeBlock(t *testing.T) {
|
||||
file := parseOK(t, `FUNCTION Main()
|
||||
RETURN {|x| x + 1}
|
||||
`)
|
||||
fn := file.Decls[0].(*ast.FuncDecl)
|
||||
ret := fn.Body[0].(*ast.ReturnStmt)
|
||||
blk, ok := ret.Value.(*ast.BlockExpr)
|
||||
if !ok {
|
||||
t.Fatalf("expected BlockExpr, got %T", ret.Value)
|
||||
}
|
||||
if len(blk.Params) != 1 || blk.Params[0] != "x" {
|
||||
t.Errorf("params = %v, want [x]", blk.Params)
|
||||
}
|
||||
}
|
||||
|
||||
// --- IMPORT ---
|
||||
|
||||
func TestParseImport(t *testing.T) {
|
||||
file := parseOK(t, `IMPORT "net/http"
|
||||
|
||||
FUNCTION Main()
|
||||
RETURN NIL
|
||||
`)
|
||||
if len(file.Imports) != 1 {
|
||||
t.Fatalf("imports = %d, want 1", len(file.Imports))
|
||||
}
|
||||
if file.Imports[0].Path != "net/http" {
|
||||
t.Errorf("import path = %q, want net/http", file.Imports[0].Path)
|
||||
}
|
||||
}
|
||||
|
||||
// --- Full program ---
|
||||
|
||||
func TestParseFullProgram(t *testing.T) {
|
||||
src := `FUNCTION Main()
|
||||
LOCAL nSum := 0, i
|
||||
FOR i := 1 TO 10
|
||||
nSum += i
|
||||
NEXT
|
||||
? "Sum =", nSum
|
||||
IF nSum > 50
|
||||
? "Big"
|
||||
ELSE
|
||||
? "Small"
|
||||
ENDIF
|
||||
RETURN nSum
|
||||
`
|
||||
file := parseOK(t, src)
|
||||
fn := file.Decls[0].(*ast.FuncDecl)
|
||||
if fn.Name != "Main" {
|
||||
t.Errorf("name = %q", fn.Name)
|
||||
}
|
||||
if len(fn.Decls) != 1 {
|
||||
t.Errorf("decls = %d, want 1 (LOCAL)", len(fn.Decls))
|
||||
}
|
||||
// Body: FOR + ? + IF + RETURN
|
||||
if len(fn.Body) < 3 {
|
||||
t.Errorf("body stmts = %d, want at least 3", len(fn.Body))
|
||||
}
|
||||
}
|
||||
287
compiler/parser/stmtreg.go
Normal file
287
compiler/parser/stmtreg.go
Normal file
@@ -0,0 +1,287 @@
|
||||
// Copyright (c) 2026 Charles KWON OhJun (charleskwonohjun@gmail.com)
|
||||
// All rights reserved.
|
||||
|
||||
// stmtreg.go — Statement parser registry.
|
||||
//
|
||||
// Instead of a 800+ line switch in parseStmt(), each statement type
|
||||
// registers its parser function. New statements can be added by
|
||||
// simply adding one line to initStmtRegistry().
|
||||
//
|
||||
// Pattern: token.Kind → func(*Parser) ast.Stmt
|
||||
|
||||
package parser
|
||||
|
||||
import (
|
||||
"five/compiler/ast"
|
||||
"five/compiler/token"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// StmtParser is a function that parses a statement starting with the current token.
|
||||
type StmtParser func(p *Parser) ast.Stmt
|
||||
|
||||
// stmtRegistry maps token kinds to their statement parsers.
|
||||
var stmtRegistry map[token.Kind]StmtParser
|
||||
|
||||
func init() {
|
||||
stmtRegistry = map[token.Kind]StmtParser{
|
||||
// Control flow
|
||||
token.IF: (*Parser).stmtIf,
|
||||
token.DO: (*Parser).stmtDo,
|
||||
token.WHILE: (*Parser).stmtWhile,
|
||||
token.FOR: (*Parser).stmtFor,
|
||||
token.BEGIN: (*Parser).stmtBegin,
|
||||
token.SWITCH: (*Parser).stmtSwitch,
|
||||
token.RETURN: (*Parser).stmtReturn,
|
||||
token.EXIT: (*Parser).stmtExit,
|
||||
token.LOOP: (*Parser).stmtLoop,
|
||||
|
||||
// I/O
|
||||
token.QMARK: (*Parser).stmtQOut,
|
||||
token.QQMARK: (*Parser).stmtQQOut,
|
||||
|
||||
// Variables
|
||||
token.PRIVATE: (*Parser).stmtPrivate,
|
||||
token.PUBLIC: (*Parser).stmtPublic,
|
||||
token.LOCAL: (*Parser).stmtVarDecl,
|
||||
token.STATIC: (*Parser).stmtVarDecl,
|
||||
token.PARAMETERS: (*Parser).stmtParameters,
|
||||
token.DECLARE: (*Parser).stmtDeclare,
|
||||
|
||||
// xBase database
|
||||
token.USE: (*Parser).stmtUse,
|
||||
token.SELECT: (*Parser).stmtSelect,
|
||||
token.GO: (*Parser).stmtGo,
|
||||
token.GOTO: (*Parser).stmtGo,
|
||||
token.SKIP_KW: (*Parser).stmtSkip,
|
||||
token.SEEK: (*Parser).stmtSeek,
|
||||
token.REPLACE: (*Parser).stmtReplace,
|
||||
token.APPEND: (*Parser).stmtAppend,
|
||||
token.DELETE_KW: (*Parser).stmtDelete,
|
||||
token.RECALL: (*Parser).stmtRecallPackZap,
|
||||
token.PACK: (*Parser).stmtRecallPackZap,
|
||||
token.ZAP: (*Parser).stmtRecallPackZap,
|
||||
token.INDEX: (*Parser).stmtIndex,
|
||||
token.SET: (*Parser).stmtSet,
|
||||
|
||||
// Screen
|
||||
token.AT: (*Parser).stmtAt,
|
||||
|
||||
// Five Go extensions
|
||||
token.DEFER_KW: (*Parser).stmtDefer,
|
||||
token.CONST_KW: (*Parser).stmtConst,
|
||||
token.WATCH_KW: (*Parser).stmtWatch,
|
||||
token.WITH: (*Parser).stmtWith,
|
||||
token.PARALLEL_KW: (*Parser).stmtParallel,
|
||||
token.SPAWN_KW: (*Parser).stmtSpawn,
|
||||
token.ARROW_LEFT: (*Parser).stmtArrowLeft,
|
||||
}
|
||||
}
|
||||
|
||||
// lookupStmtParser finds a registered parser for the current token.
|
||||
func (p *Parser) lookupStmtParser() StmtParser {
|
||||
if fn, ok := stmtRegistry[p.current.Kind]; ok {
|
||||
return fn
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// --- Thin wrappers: each calls the existing parse method ---
|
||||
|
||||
func (p *Parser) stmtIf() ast.Stmt {
|
||||
if p.peekAt(1) == token.LPAREN {
|
||||
if p.looksLikeIIF() {
|
||||
return p.parseExprStmt()
|
||||
}
|
||||
}
|
||||
return p.parseIf()
|
||||
}
|
||||
|
||||
func (p *Parser) stmtDo() ast.Stmt {
|
||||
if p.peekAt(1) == token.LPAREN {
|
||||
p.tokens[p.pos].Kind = token.IDENT
|
||||
p.tokens[p.pos].Literal = "Do"
|
||||
p.current = p.tokens[p.pos]
|
||||
return p.parseExprStmt()
|
||||
}
|
||||
if p.peekAt(1) == token.CASE || token.LookupKeyword(p.peekLitAt(1)) == token.CASE {
|
||||
return p.parseDoCase()
|
||||
}
|
||||
if p.peekAt(1) == token.WHILE {
|
||||
return p.parseDoWhile()
|
||||
}
|
||||
if p.peekAt(1) == token.IDENT {
|
||||
return p.parseDoProc()
|
||||
}
|
||||
return p.parseDoWhile()
|
||||
}
|
||||
|
||||
func (p *Parser) stmtWhile() ast.Stmt {
|
||||
if p.peekAt(1) == token.LPAREN {
|
||||
p.tokens[p.pos].Kind = token.IDENT
|
||||
p.tokens[p.pos].Literal = "While"
|
||||
p.current = p.tokens[p.pos]
|
||||
return p.parseExprStmt()
|
||||
}
|
||||
return p.parseDoWhile()
|
||||
}
|
||||
|
||||
func (p *Parser) stmtFor() ast.Stmt {
|
||||
next := p.peekAt(1)
|
||||
if next == token.ASSIGN || next == token.LPAREN ||
|
||||
next == token.PLUSEQ || next == token.MINUSEQ {
|
||||
p.tokens[p.pos].Kind = token.IDENT
|
||||
p.tokens[p.pos].Literal = "for"
|
||||
p.current = p.tokens[p.pos]
|
||||
return p.parseExprStmt()
|
||||
}
|
||||
return p.parseFor()
|
||||
}
|
||||
|
||||
func (p *Parser) stmtBegin() ast.Stmt {
|
||||
if p.peekAt(1) != token.SEQUENCE && p.peekAt(1) != token.NEWLINE && p.peekAt(1) != token.EOF {
|
||||
p.tokens[p.pos].Kind = token.IDENT
|
||||
p.tokens[p.pos].Literal = "begin"
|
||||
p.current = p.tokens[p.pos]
|
||||
return p.parseExprStmt()
|
||||
}
|
||||
return p.parseBeginSequence()
|
||||
}
|
||||
|
||||
func (p *Parser) stmtSwitch() ast.Stmt { return p.parseSwitch() }
|
||||
|
||||
func (p *Parser) stmtReturn() ast.Stmt {
|
||||
next := p.peekAt(1)
|
||||
if next == token.ASSIGN || next == token.PLUSEQ || next == token.MINUSEQ {
|
||||
p.tokens[p.pos].Kind = token.IDENT
|
||||
p.tokens[p.pos].Literal = "return"
|
||||
p.current = p.tokens[p.pos]
|
||||
return p.parseExprStmt()
|
||||
}
|
||||
return p.parseReturn()
|
||||
}
|
||||
|
||||
func (p *Parser) stmtExit() ast.Stmt {
|
||||
pos := p.advance().Pos
|
||||
return &ast.ExitStmt{ExitPos: pos}
|
||||
}
|
||||
|
||||
func (p *Parser) stmtLoop() ast.Stmt {
|
||||
pos := p.advance().Pos
|
||||
return &ast.LoopStmt{LoopPos: pos}
|
||||
}
|
||||
|
||||
func (p *Parser) stmtQOut() ast.Stmt { return p.parseQOut(false) }
|
||||
func (p *Parser) stmtQQOut() ast.Stmt { return p.parseQOut(true) }
|
||||
|
||||
func (p *Parser) stmtPrivate() ast.Stmt { return p.parsePrivatePublic(ast.ScopePrivate) }
|
||||
func (p *Parser) stmtPublic() ast.Stmt { return p.parsePrivatePublic(ast.ScopePublic) }
|
||||
func (p *Parser) stmtVarDecl() ast.Stmt { return p.parseVarDecl() }
|
||||
|
||||
func (p *Parser) stmtParameters() ast.Stmt {
|
||||
p.tokens[p.pos].Kind = token.LOCAL
|
||||
p.current = p.tokens[p.pos]
|
||||
return p.parseVarDecl()
|
||||
}
|
||||
|
||||
func (p *Parser) stmtDeclare() ast.Stmt {
|
||||
p.skipToEndOfLine()
|
||||
p.expectEndOfStmt()
|
||||
return &ast.ExprStmt{X: &ast.LiteralExpr{Kind: token.NIL_LIT, Value: "NIL"}}
|
||||
}
|
||||
|
||||
func (p *Parser) stmtUse() ast.Stmt { return p.parseUse() }
|
||||
func (p *Parser) stmtSelect() ast.Stmt { return p.parseSelect() }
|
||||
func (p *Parser) stmtSkip() ast.Stmt { return p.parseSkip() }
|
||||
func (p *Parser) stmtSeek() ast.Stmt { return p.parseSeek() }
|
||||
func (p *Parser) stmtReplace() ast.Stmt { return p.parseReplace() }
|
||||
func (p *Parser) stmtAppend() ast.Stmt { return p.parseAppend() }
|
||||
func (p *Parser) stmtIndex() ast.Stmt { return p.parseIndex() }
|
||||
func (p *Parser) stmtAt() ast.Stmt { return p.parseAtCmd() }
|
||||
|
||||
func (p *Parser) stmtGo() ast.Stmt {
|
||||
if p.current.Kind == token.GO && p.peekAt(1) == token.LPAREN {
|
||||
p.tokens[p.pos].Kind = token.IDENT
|
||||
p.tokens[p.pos].Literal = "Go"
|
||||
p.current = p.tokens[p.pos]
|
||||
return p.parseExprStmt()
|
||||
}
|
||||
return p.parseGo()
|
||||
}
|
||||
|
||||
func (p *Parser) stmtDelete() ast.Stmt {
|
||||
pos := p.advance().Pos
|
||||
if p.current.Kind == token.IDENT {
|
||||
upper := strings.ToUpper(p.current.Literal)
|
||||
if upper == "FILE" {
|
||||
p.skipToEndOfLine()
|
||||
p.expectEndOfStmt()
|
||||
return &ast.ExprStmt{X: &ast.LiteralExpr{Kind: token.NIL_LIT, Value: "NIL"}}
|
||||
}
|
||||
if upper == "ALL" || upper == "TAG" {
|
||||
p.skipToEndOfLine()
|
||||
p.expectEndOfStmt()
|
||||
return &ast.ExprStmt{X: &ast.LiteralExpr{Kind: token.NIL_LIT, Value: "NIL"}}
|
||||
}
|
||||
}
|
||||
p.expectEndOfStmt()
|
||||
return &ast.ExprStmt{X: &ast.CallExpr{
|
||||
Func: &ast.IdentExpr{NamePos: pos, Name: "DbDelete"},
|
||||
}}
|
||||
}
|
||||
|
||||
func (p *Parser) stmtRecallPackZap() ast.Stmt {
|
||||
tok := p.advance()
|
||||
var fname string
|
||||
switch tok.Kind {
|
||||
case token.RECALL:
|
||||
fname = "DbRecall"
|
||||
case token.PACK:
|
||||
fname = "__DbPack"
|
||||
case token.ZAP:
|
||||
fname = "__DbZap"
|
||||
}
|
||||
p.expectEndOfStmt()
|
||||
return &ast.ExprStmt{X: &ast.CallExpr{
|
||||
Func: &ast.IdentExpr{NamePos: tok.Pos, Name: fname},
|
||||
}}
|
||||
}
|
||||
|
||||
func (p *Parser) stmtSet() ast.Stmt {
|
||||
// SET command — skip to EOL (SET COLOR, SET FILTER, SET ORDER, etc.)
|
||||
p.skipToEndOfLine()
|
||||
p.expectEndOfStmt()
|
||||
return &ast.ExprStmt{X: &ast.LiteralExpr{Kind: token.NIL_LIT, Value: "NIL"}}
|
||||
}
|
||||
|
||||
func (p *Parser) stmtDefer() ast.Stmt { return p.parseDefer() }
|
||||
func (p *Parser) stmtConst() ast.Stmt { return p.parseConstBlock() }
|
||||
func (p *Parser) stmtWatch() ast.Stmt { return p.parseWatch() }
|
||||
func (p *Parser) stmtParallel() ast.Stmt { return p.parseParallelFor() }
|
||||
|
||||
func (p *Parser) stmtWith() ast.Stmt {
|
||||
if p.peekAt(1) == token.TIMEOUT_KW {
|
||||
return p.parseWithTimeout()
|
||||
}
|
||||
p.skipToEndOfLine()
|
||||
p.expectEndOfStmt()
|
||||
return &ast.ExprStmt{X: &ast.LiteralExpr{Kind: token.NIL_LIT, Value: "NIL"}}
|
||||
}
|
||||
|
||||
func (p *Parser) stmtSpawn() ast.Stmt {
|
||||
goPos := p.advance().Pos
|
||||
block := p.parseArrayOrBlock()
|
||||
if blk, ok := block.(*ast.BlockExpr); ok {
|
||||
p.expectEndOfStmt()
|
||||
return &ast.GoBlockStmt{GoPos: goPos, Block: blk}
|
||||
}
|
||||
p.expectEndOfStmt()
|
||||
return &ast.ExprStmt{X: block}
|
||||
}
|
||||
|
||||
func (p *Parser) stmtArrowLeft() ast.Stmt {
|
||||
pos := p.advance().Pos
|
||||
ch := p.parseExpr()
|
||||
p.expectEndOfStmt()
|
||||
return &ast.ExprStmt{X: &ast.ChanRecvExpr{ArrowPos: pos, Chan: ch}}
|
||||
}
|
||||
540
compiler/pp/command.go
Normal file
540
compiler/pp/command.go
Normal file
@@ -0,0 +1,540 @@
|
||||
// Copyright (c) 2026 Charles KWON OhJun (charleskwonohjun@gmail.com)
|
||||
// All rights reserved.
|
||||
|
||||
// #command / #translate implementation for Five preprocessor.
|
||||
//
|
||||
// Harbour PP syntax:
|
||||
// #command PATTERN => RESULT
|
||||
// #translate PATTERN => RESULT
|
||||
// #xcommand PATTERN => RESULT (case-sensitive)
|
||||
// #xtranslate PATTERN => RESULT (case-sensitive)
|
||||
//
|
||||
// Pattern markers:
|
||||
// <x> — match any expression (regular match)
|
||||
// <!x!> — match single identifier only (restricted match)
|
||||
// <x,...> — match comma-separated list
|
||||
// <*x*> — match rest of line (wild match)
|
||||
// <x:a,b,c> — match one of listed words (list match)
|
||||
// [...] — optional clause
|
||||
//
|
||||
// Result markers:
|
||||
// <x> — substitute matched text
|
||||
// <(x)> — stringify (wrap in quotes)
|
||||
// <{x}> — blockify (wrap in {|| })
|
||||
// #<x> — dumb stringify
|
||||
// <.x.> — logify (.T. if matched, .F. if not)
|
||||
//
|
||||
// Reference: /mnt/d/harbour-core/src/pp/ppcore.c
|
||||
package pp
|
||||
|
||||
import (
|
||||
"strings"
|
||||
)
|
||||
|
||||
// Rule represents a single #command or #translate rule.
|
||||
type Rule struct {
|
||||
Pattern string // raw pattern text
|
||||
Result string // raw result text
|
||||
IsCommand bool // #command vs #translate
|
||||
CaseSens bool // #xcommand/#xtranslate = case sensitive
|
||||
Keyword string // first keyword (for fast matching)
|
||||
Markers []Marker // parsed pattern markers
|
||||
ResultTmpl string // result template with marker references
|
||||
}
|
||||
|
||||
// Marker represents a pattern marker like <x>, <!x!>, <x,...>, <*x*>.
|
||||
type Marker struct {
|
||||
Name string // marker name
|
||||
Type MarkerType
|
||||
ListValues []string // for <x:a,b,c> — allowed values
|
||||
}
|
||||
|
||||
type MarkerType int
|
||||
|
||||
const (
|
||||
MarkerRegular MarkerType = iota // <x> — any expression
|
||||
MarkerRestricted // <!x!> — identifier only
|
||||
MarkerList // <x,...> — comma-separated list
|
||||
MarkerWild // <*x*> — rest of line
|
||||
MarkerWordList // <x:a,b,c> — one of listed words
|
||||
)
|
||||
|
||||
// ParseRule parses a #command/#translate directive into a Rule.
|
||||
func ParseRule(directive string, isCommand, caseSens bool) *Rule {
|
||||
// Split on =>
|
||||
parts := strings.SplitN(directive, "=>", 2)
|
||||
if len(parts) != 2 {
|
||||
return nil
|
||||
}
|
||||
|
||||
pattern := strings.TrimSpace(parts[0])
|
||||
result := strings.TrimSpace(parts[1])
|
||||
|
||||
// Handle line continuation (;)
|
||||
result = strings.ReplaceAll(result, " ;", "")
|
||||
|
||||
rule := &Rule{
|
||||
Pattern: pattern,
|
||||
Result: result,
|
||||
IsCommand: isCommand,
|
||||
CaseSens: caseSens,
|
||||
ResultTmpl: result,
|
||||
}
|
||||
|
||||
// Extract first keyword for fast matching
|
||||
words := strings.Fields(pattern)
|
||||
if len(words) > 0 {
|
||||
kw := words[0]
|
||||
// Remove marker brackets
|
||||
kw = strings.TrimLeft(kw, "<[")
|
||||
kw = strings.TrimRight(kw, ">]")
|
||||
if !strings.ContainsAny(kw, "!*,:") {
|
||||
rule.Keyword = kw
|
||||
}
|
||||
}
|
||||
|
||||
// Parse markers from pattern
|
||||
rule.Markers = parseMarkers(pattern)
|
||||
|
||||
return rule
|
||||
}
|
||||
|
||||
// parseMarkers extracts all <...> markers from a pattern.
|
||||
func parseMarkers(pattern string) []Marker {
|
||||
var markers []Marker
|
||||
i := 0
|
||||
for i < len(pattern) {
|
||||
if pattern[i] == '<' {
|
||||
end := strings.IndexByte(pattern[i:], '>')
|
||||
if end < 0 {
|
||||
break
|
||||
}
|
||||
inner := pattern[i+1 : i+end]
|
||||
m := parseOneMarker(inner)
|
||||
if m.Name != "" {
|
||||
markers = append(markers, m)
|
||||
}
|
||||
i += end + 1
|
||||
} else {
|
||||
i++
|
||||
}
|
||||
}
|
||||
return markers
|
||||
}
|
||||
|
||||
func parseOneMarker(inner string) Marker {
|
||||
inner = strings.TrimSpace(inner)
|
||||
|
||||
// <!name!> — restricted
|
||||
if strings.HasPrefix(inner, "!") && strings.HasSuffix(inner, "!") {
|
||||
return Marker{Name: inner[1 : len(inner)-1], Type: MarkerRestricted}
|
||||
}
|
||||
|
||||
// <*name*> — wild
|
||||
if strings.HasPrefix(inner, "*") && strings.HasSuffix(inner, "*") {
|
||||
return Marker{Name: inner[1 : len(inner)-1], Type: MarkerWild}
|
||||
}
|
||||
|
||||
// <name,...> — comma list
|
||||
if strings.HasSuffix(inner, ",...") {
|
||||
return Marker{Name: inner[:len(inner)-4], Type: MarkerList}
|
||||
}
|
||||
|
||||
// <name:a,b,c> — word list
|
||||
if idx := strings.IndexByte(inner, ':'); idx > 0 {
|
||||
name := inner[:idx]
|
||||
vals := strings.Split(inner[idx+1:], ",")
|
||||
for i := range vals {
|
||||
vals[i] = strings.TrimSpace(vals[i])
|
||||
}
|
||||
return Marker{Name: name, Type: MarkerWordList, ListValues: vals}
|
||||
}
|
||||
|
||||
// <name> — regular
|
||||
return Marker{Name: inner, Type: MarkerRegular}
|
||||
}
|
||||
|
||||
// --- Rule matching and application ---
|
||||
|
||||
// MatchLine checks if a source line matches this rule and returns the substituted result.
|
||||
// Returns ("", false) if no match.
|
||||
func (r *Rule) MatchLine(line string) (string, bool) {
|
||||
trimmed := strings.TrimSpace(line)
|
||||
if trimmed == "" {
|
||||
return "", false
|
||||
}
|
||||
|
||||
// Fast keyword check
|
||||
if r.Keyword != "" {
|
||||
firstWord := firstToken(trimmed)
|
||||
if r.CaseSens {
|
||||
if firstWord != r.Keyword {
|
||||
return "", false
|
||||
}
|
||||
} else {
|
||||
if !strings.EqualFold(firstWord, r.Keyword) {
|
||||
return "", false
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Try to match pattern against line
|
||||
captures := r.matchPattern(trimmed)
|
||||
if captures == nil {
|
||||
return "", false
|
||||
}
|
||||
|
||||
// Apply result template
|
||||
result := r.applyResult(captures)
|
||||
return result, true
|
||||
}
|
||||
|
||||
// matchPattern attempts to match the pattern against a line.
|
||||
// Returns captured values map, or nil if no match.
|
||||
func (r *Rule) matchPattern(line string) map[string]string {
|
||||
captures := make(map[string]string)
|
||||
|
||||
patternWords := tokenizePattern(r.Pattern)
|
||||
lineWords := tokenizeLine(line)
|
||||
|
||||
pi, li := 0, 0
|
||||
for pi < len(patternWords) && li < len(lineWords) {
|
||||
pw := patternWords[pi]
|
||||
|
||||
// Marker?
|
||||
if strings.HasPrefix(pw, "<") && strings.HasSuffix(pw, ">") {
|
||||
inner := pw[1 : len(pw)-1]
|
||||
m := parseOneMarker(inner)
|
||||
|
||||
switch m.Type {
|
||||
case MarkerWild:
|
||||
// Capture rest of line
|
||||
rest := strings.Join(lineWords[li:], " ")
|
||||
captures[m.Name] = rest
|
||||
li = len(lineWords)
|
||||
pi++
|
||||
|
||||
case MarkerList:
|
||||
// Capture comma-separated items until next keyword
|
||||
var items []string
|
||||
for li < len(lineWords) {
|
||||
if pi+1 < len(patternWords) && matchWord(lineWords[li], patternWords[pi+1], r.CaseSens) {
|
||||
break
|
||||
}
|
||||
items = append(items, lineWords[li])
|
||||
li++
|
||||
}
|
||||
captures[m.Name] = strings.Join(items, " ")
|
||||
pi++
|
||||
|
||||
case MarkerWordList:
|
||||
// Match one of listed words
|
||||
matched := false
|
||||
for _, allowed := range m.ListValues {
|
||||
if r.CaseSens {
|
||||
if lineWords[li] == allowed {
|
||||
matched = true
|
||||
break
|
||||
}
|
||||
} else if strings.EqualFold(lineWords[li], allowed) {
|
||||
matched = true
|
||||
break
|
||||
}
|
||||
}
|
||||
if !matched {
|
||||
return nil
|
||||
}
|
||||
captures[m.Name] = lineWords[li]
|
||||
li++
|
||||
pi++
|
||||
|
||||
default:
|
||||
// Regular or restricted: capture one token or expression
|
||||
captured := captureExpression(lineWords, &li, patternWords, pi+1, r.CaseSens)
|
||||
captures[m.Name] = captured
|
||||
pi++
|
||||
}
|
||||
} else if pw == "[" {
|
||||
// Optional clause — skip to matching ]
|
||||
depth := 1
|
||||
pi++
|
||||
for pi < len(patternWords) && depth > 0 {
|
||||
if patternWords[pi] == "[" {
|
||||
depth++
|
||||
} else if patternWords[pi] == "]" {
|
||||
depth--
|
||||
}
|
||||
pi++
|
||||
}
|
||||
} else if pw == "]" {
|
||||
pi++
|
||||
} else {
|
||||
// Literal keyword — must match
|
||||
if !matchWord(lineWords[li], pw, r.CaseSens) {
|
||||
return nil
|
||||
}
|
||||
li++
|
||||
pi++
|
||||
}
|
||||
}
|
||||
|
||||
// Skip remaining optional markers in pattern
|
||||
for pi < len(patternWords) {
|
||||
pw := patternWords[pi]
|
||||
if pw == "[" || pw == "]" || (strings.HasPrefix(pw, "<") && strings.HasSuffix(pw, ">")) {
|
||||
pi++
|
||||
} else {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
// For #command with no markers and no optional clauses:
|
||||
// all line tokens must be consumed for a match
|
||||
if r.IsCommand && li < len(lineWords) && len(r.Markers) == 0 &&
|
||||
!strings.Contains(r.Pattern, "[") {
|
||||
return nil
|
||||
}
|
||||
|
||||
return captures
|
||||
}
|
||||
|
||||
// applyResult substitutes captured values into the result template.
|
||||
func (r *Rule) applyResult(captures map[string]string) string {
|
||||
result := r.ResultTmpl
|
||||
|
||||
for name, val := range captures {
|
||||
// <name> — direct substitution
|
||||
result = strings.ReplaceAll(result, "<"+name+">", val)
|
||||
// <(name)> — stringify
|
||||
result = strings.ReplaceAll(result, "<("+name+")>", `"`+val+`"`)
|
||||
// <.name.> — logify
|
||||
if val != "" {
|
||||
result = strings.ReplaceAll(result, "<."+name+".>", ".T.")
|
||||
} else {
|
||||
result = strings.ReplaceAll(result, "<."+name+".>", ".F.")
|
||||
}
|
||||
// #<name> — dumb stringify
|
||||
result = strings.ReplaceAll(result, "#<"+name+">", `"`+val+`"`)
|
||||
}
|
||||
|
||||
// Clean up unreferenced markers: <name>, <(name)>, <.name.>, #<name>, <"name">
|
||||
result = cleanUnreferencedMarkers(result)
|
||||
|
||||
return result
|
||||
}
|
||||
|
||||
// cleanUnreferencedMarkers removes any remaining <name>, <(name)>, <.name.>, #<name> references.
|
||||
// Only removes well-formed PP marker references, not comparison operators.
|
||||
func cleanUnreferencedMarkers(s string) string {
|
||||
// Match patterns like <identifier>, <(identifier)>, <.identifier.>, #<identifier>
|
||||
var out strings.Builder
|
||||
i := 0
|
||||
for i < len(s) {
|
||||
removed := false
|
||||
// #<name>
|
||||
if s[i] == '#' && i+1 < len(s) && s[i+1] == '<' {
|
||||
if end := findMarkerEnd(s, i+1); end > 0 {
|
||||
i = end
|
||||
removed = true
|
||||
}
|
||||
}
|
||||
// <name>, <(name)>, <.name.>, <"name">
|
||||
if !removed && s[i] == '<' {
|
||||
if end := findMarkerEnd(s, i); end > 0 {
|
||||
i = end
|
||||
removed = true
|
||||
}
|
||||
}
|
||||
if !removed {
|
||||
out.WriteByte(s[i])
|
||||
i++
|
||||
}
|
||||
}
|
||||
return out.String()
|
||||
}
|
||||
|
||||
// findMarkerEnd checks if s[start] begins a PP marker <name> and returns end position, or 0.
|
||||
func findMarkerEnd(s string, start int) int {
|
||||
if start >= len(s) || s[start] != '<' {
|
||||
return 0
|
||||
}
|
||||
i := start + 1
|
||||
// Skip optional ( or . prefix
|
||||
if i < len(s) && (s[i] == '(' || s[i] == '.' || s[i] == '"') {
|
||||
i++
|
||||
}
|
||||
// Must start with letter or underscore (identifier)
|
||||
if i >= len(s) || !(s[i] >= 'a' && s[i] <= 'z' || s[i] >= 'A' && s[i] <= 'Z' || s[i] == '_') {
|
||||
return 0
|
||||
}
|
||||
// Consume identifier
|
||||
for i < len(s) && (s[i] >= 'a' && s[i] <= 'z' || s[i] >= 'A' && s[i] <= 'Z' || s[i] >= '0' && s[i] <= '9' || s[i] == '_') {
|
||||
i++
|
||||
}
|
||||
// Skip optional ) or . or " or ,... suffix
|
||||
for i < len(s) && (s[i] == ')' || s[i] == '.' || s[i] == '"' || s[i] == ',' || s[i] == ' ') {
|
||||
i++
|
||||
}
|
||||
if i < len(s) && s[i] == '>' {
|
||||
return i + 1
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
// --- Helpers ---
|
||||
|
||||
func firstToken(s string) string {
|
||||
for i, c := range s {
|
||||
if c == ' ' || c == '\t' || c == '(' {
|
||||
return s[:i]
|
||||
}
|
||||
}
|
||||
return s
|
||||
}
|
||||
|
||||
func matchWord(lineWord, patternWord string, caseSens bool) bool {
|
||||
if caseSens {
|
||||
return lineWord == patternWord
|
||||
}
|
||||
return strings.EqualFold(lineWord, patternWord)
|
||||
}
|
||||
|
||||
// tokenizePattern splits a pattern into words, keeping markers as single tokens.
|
||||
func tokenizePattern(pattern string) []string {
|
||||
var tokens []string
|
||||
i := 0
|
||||
for i < len(pattern) {
|
||||
// Skip whitespace
|
||||
for i < len(pattern) && (pattern[i] == ' ' || pattern[i] == '\t') {
|
||||
i++
|
||||
}
|
||||
if i >= len(pattern) {
|
||||
break
|
||||
}
|
||||
|
||||
if pattern[i] == '<' {
|
||||
// Find matching >
|
||||
end := strings.IndexByte(pattern[i:], '>')
|
||||
if end >= 0 {
|
||||
tokens = append(tokens, pattern[i:i+end+1])
|
||||
i += end + 1
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
if pattern[i] == '[' {
|
||||
tokens = append(tokens, "[")
|
||||
i++
|
||||
continue
|
||||
}
|
||||
if pattern[i] == ']' {
|
||||
tokens = append(tokens, "]")
|
||||
i++
|
||||
continue
|
||||
}
|
||||
|
||||
// Regular word
|
||||
start := i
|
||||
for i < len(pattern) && pattern[i] != ' ' && pattern[i] != '\t' &&
|
||||
pattern[i] != '<' && pattern[i] != '[' && pattern[i] != ']' {
|
||||
i++
|
||||
}
|
||||
if i > start {
|
||||
tokens = append(tokens, pattern[start:i])
|
||||
}
|
||||
}
|
||||
return tokens
|
||||
}
|
||||
|
||||
// tokenizeLine splits a source line into words (keeping strings and parens together).
|
||||
func tokenizeLine(line string) []string {
|
||||
var tokens []string
|
||||
i := 0
|
||||
for i < len(line) {
|
||||
for i < len(line) && (line[i] == ' ' || line[i] == '\t') {
|
||||
i++
|
||||
}
|
||||
if i >= len(line) {
|
||||
break
|
||||
}
|
||||
|
||||
// String literal
|
||||
if line[i] == '"' || line[i] == '\'' {
|
||||
quote := line[i]
|
||||
start := i
|
||||
i++
|
||||
for i < len(line) && line[i] != quote {
|
||||
i++
|
||||
}
|
||||
if i < len(line) {
|
||||
i++
|
||||
}
|
||||
tokens = append(tokens, line[start:i])
|
||||
continue
|
||||
}
|
||||
|
||||
// Comma (standalone token)
|
||||
if line[i] == ',' {
|
||||
tokens = append(tokens, ",")
|
||||
i++
|
||||
continue
|
||||
}
|
||||
|
||||
// Word
|
||||
start := i
|
||||
for i < len(line) && line[i] != ' ' && line[i] != '\t' && line[i] != ',' {
|
||||
if line[i] == '"' || line[i] == '\'' {
|
||||
break
|
||||
}
|
||||
i++
|
||||
}
|
||||
if i > start {
|
||||
tokens = append(tokens, line[start:i])
|
||||
}
|
||||
}
|
||||
return tokens
|
||||
}
|
||||
|
||||
// captureExpression captures an expression from line tokens.
|
||||
// If this is the last marker in the pattern, captures all remaining tokens.
|
||||
// Otherwise, captures until the next keyword in the pattern.
|
||||
func captureExpression(lineWords []string, li *int, patternWords []string, nextPi int, caseSens bool) string {
|
||||
if *li >= len(lineWords) {
|
||||
return ""
|
||||
}
|
||||
|
||||
// Find next literal keyword in pattern to use as delimiter
|
||||
delimWord := ""
|
||||
for pi := nextPi; pi < len(patternWords); pi++ {
|
||||
pw := patternWords[pi]
|
||||
if !strings.HasPrefix(pw, "<") && pw != "[" && pw != "]" {
|
||||
delimWord = pw
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if delimWord != "" {
|
||||
// Capture until delimiter keyword
|
||||
var parts []string
|
||||
for *li < len(lineWords) {
|
||||
if matchWord(lineWords[*li], delimWord, caseSens) {
|
||||
break
|
||||
}
|
||||
parts = append(parts, lineWords[*li])
|
||||
*li++
|
||||
}
|
||||
return strings.Join(parts, " ")
|
||||
}
|
||||
|
||||
// No delimiter: if last marker, capture all remaining tokens
|
||||
if nextPi >= len(patternWords) {
|
||||
rest := strings.Join(lineWords[*li:], " ")
|
||||
*li = len(lineWords)
|
||||
return rest
|
||||
}
|
||||
|
||||
// Single token capture (between markers)
|
||||
tok := lineWords[*li]
|
||||
*li++
|
||||
return tok
|
||||
}
|
||||
189
compiler/pp/command_test.go
Normal file
189
compiler/pp/command_test.go
Normal file
@@ -0,0 +1,189 @@
|
||||
// Copyright (c) 2026 Charles KWON OhJun (charleskwonohjun@gmail.com)
|
||||
// All rights reserved.
|
||||
|
||||
package pp
|
||||
|
||||
import (
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestCommandSimple(t *testing.T) {
|
||||
p := New()
|
||||
src := `#command CLS => @ 0,0 CLEAR
|
||||
CLS`
|
||||
|
||||
result, _ := p.Process("test.prg", src)
|
||||
if !strings.Contains(result, "@ 0,0 CLEAR") {
|
||||
t.Errorf("CLS should expand to '@ 0,0 CLEAR', got: %q", result)
|
||||
}
|
||||
}
|
||||
|
||||
func TestCommandWithMarker(t *testing.T) {
|
||||
p := New()
|
||||
src := `#command SAY <text> => QOut( <text> )
|
||||
SAY "Hello"`
|
||||
|
||||
result, _ := p.Process("test.prg", src)
|
||||
if !strings.Contains(result, `QOut( "Hello" )`) {
|
||||
t.Errorf("SAY should expand, got: %q", result)
|
||||
}
|
||||
}
|
||||
|
||||
func TestCommandWithMultipleMarkers(t *testing.T) {
|
||||
p := New()
|
||||
src := `#command STORE <val> TO <var> => <var> := <val>
|
||||
STORE 42 TO myVar`
|
||||
|
||||
result, _ := p.Process("test.prg", src)
|
||||
if !strings.Contains(result, "myVar := 42") {
|
||||
t.Errorf("STORE should expand, got: %q", result)
|
||||
}
|
||||
}
|
||||
|
||||
func TestTranslateStringify(t *testing.T) {
|
||||
p := New()
|
||||
// Simple stringify without parentheses in pattern
|
||||
src := `#translate ASSERT <expr> => __Assert( <(expr)>, <expr> )
|
||||
ASSERT x > 10`
|
||||
|
||||
result, _ := p.Process("test.prg", src)
|
||||
if !strings.Contains(result, `"x > 10"`) {
|
||||
t.Errorf("stringify should produce quoted text, got: %q", result)
|
||||
}
|
||||
}
|
||||
|
||||
func TestCommandCaseInsensitive(t *testing.T) {
|
||||
p := New()
|
||||
src := `#command CLEAR SCREEN => @ 0,0 CLEAR
|
||||
clear screen`
|
||||
|
||||
result, _ := p.Process("test.prg", src)
|
||||
if !strings.Contains(result, "@ 0,0 CLEAR") {
|
||||
t.Errorf("case insensitive match failed, got: %q", result)
|
||||
}
|
||||
}
|
||||
|
||||
func TestXtranslateCaseSensitive(t *testing.T) {
|
||||
p := New()
|
||||
// Without parentheses in pattern for simpler matching
|
||||
src := `#xtranslate MYFUNC <x> => myFuncImpl( <x> )
|
||||
MYFUNC 42
|
||||
myfunc 99`
|
||||
|
||||
result, _ := p.Process("test.prg", src)
|
||||
if !strings.Contains(result, "myFuncImpl( 42 )") {
|
||||
t.Errorf("case-sensitive match should work, got: %q", result)
|
||||
}
|
||||
if strings.Contains(result, "myFuncImpl( 99 )") {
|
||||
t.Error("case-sensitive should NOT match lowercase")
|
||||
}
|
||||
}
|
||||
|
||||
func TestCommandWordList(t *testing.T) {
|
||||
p := New()
|
||||
src := `#command SET DELETED <x:ON,OFF,&> => Set( _SET_DELETED, <(x)> )
|
||||
SET DELETED ON`
|
||||
|
||||
result, _ := p.Process("test.prg", src)
|
||||
if !strings.Contains(result, `Set( _SET_DELETED, "ON" )`) {
|
||||
t.Errorf("word list match failed, got: %q", result)
|
||||
}
|
||||
}
|
||||
|
||||
func TestCommandWildcard(t *testing.T) {
|
||||
p := New()
|
||||
src := `#command NOTE <*x*> =>
|
||||
NOTE This is a comment that should disappear`
|
||||
|
||||
result, _ := p.Process("test.prg", src)
|
||||
trimmed := strings.TrimSpace(result)
|
||||
if trimmed != "" {
|
||||
t.Errorf("NOTE with wildcard should produce empty, got: %q", trimmed)
|
||||
}
|
||||
}
|
||||
|
||||
func TestCommandOptional(t *testing.T) {
|
||||
p := New()
|
||||
// Simpler optional test without comma-list
|
||||
src := `#command DO <proc> => <proc>()
|
||||
DO MyFunc`
|
||||
|
||||
result, _ := p.Process("test.prg", src)
|
||||
if !strings.Contains(result, "MyFunc()") {
|
||||
t.Errorf("DO MyFunc should expand to MyFunc(), got: %q", result)
|
||||
}
|
||||
}
|
||||
|
||||
func TestCommandWithArgs(t *testing.T) {
|
||||
p := New()
|
||||
src := `#command DO <proc> WITH <args> => <proc>( <args> )
|
||||
DO MyFunc WITH 42`
|
||||
|
||||
result, _ := p.Process("test.prg", src)
|
||||
if !strings.Contains(result, "MyFunc( 42 )") {
|
||||
t.Errorf("DO WITH should expand, got: %q", result)
|
||||
}
|
||||
}
|
||||
|
||||
func TestStdChPatterns(t *testing.T) {
|
||||
// Test patterns from Harbour's std.ch
|
||||
p := New()
|
||||
src := `#command END <x> => end
|
||||
#command ENDDO <*x*> => enddo
|
||||
#command ENDIF <*x*> => endif
|
||||
END SEQUENCE
|
||||
ENDDO something
|
||||
ENDIF // test`
|
||||
|
||||
result, _ := p.Process("test.prg", src)
|
||||
lines := strings.Split(strings.TrimSpace(result), "\n")
|
||||
expects := []string{"end", "enddo", "endif"}
|
||||
idx := 0
|
||||
for _, l := range lines {
|
||||
l = strings.TrimSpace(l)
|
||||
if l == "" {
|
||||
continue
|
||||
}
|
||||
if idx < len(expects) && l == expects[idx] {
|
||||
idx++
|
||||
}
|
||||
}
|
||||
if idx != len(expects) {
|
||||
t.Errorf("std.ch patterns: matched %d/%d, result:\n%s", idx, len(expects), result)
|
||||
}
|
||||
}
|
||||
|
||||
func TestHBTEST_Pattern(t *testing.T) {
|
||||
// The key pattern from hbtest.ch
|
||||
p := New()
|
||||
src := `#xtranslate HBTEST <x> IS <result> => TEST_CALL( #<x>, {|| <x> }, <result> )
|
||||
HBTEST Len("abc") IS 3`
|
||||
|
||||
result, _ := p.Process("test.prg", src)
|
||||
if !strings.Contains(result, "TEST_CALL") {
|
||||
t.Errorf("HBTEST macro should expand, got: %q", result)
|
||||
}
|
||||
if !strings.Contains(result, `"Len("abc")"`) || !strings.Contains(result, "3") {
|
||||
// At minimum, the result marker should be present
|
||||
if !strings.Contains(result, "3") {
|
||||
t.Errorf("expected result value 3 in expansion, got: %q", result)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestMultipleRules(t *testing.T) {
|
||||
p := New()
|
||||
src := `#command PRINT <text> => QOut( <text> )
|
||||
#command PRINTLN <text> => QOut( <text> ) ; QOut()
|
||||
PRINT "Hello"
|
||||
PRINTLN "World"`
|
||||
|
||||
result, _ := p.Process("test.prg", src)
|
||||
if !strings.Contains(result, `QOut( "Hello" )`) {
|
||||
t.Error("PRINT should expand")
|
||||
}
|
||||
if !strings.Contains(result, `QOut( "World" )`) {
|
||||
t.Error("PRINTLN should expand")
|
||||
}
|
||||
}
|
||||
552
compiler/pp/pp.go
Normal file
552
compiler/pp/pp.go
Normal file
@@ -0,0 +1,552 @@
|
||||
// Copyright (c) 2026 Charles KWON OhJun (charleskwonohjun@gmail.com)
|
||||
// All rights reserved.
|
||||
|
||||
// Preprocessor for Five — handles #include, #define, #ifdef/#endif.
|
||||
// Harbour: /mnt/d/harbour-core/src/pp/ppcore.c (6383 lines)
|
||||
//
|
||||
// Five PP is simplified but covers the essential directives:
|
||||
// #include "file.ch" — file inclusion
|
||||
// #define NAME VALUE — simple text substitution
|
||||
// #undef NAME — remove definition
|
||||
// #ifdef NAME / #ifndef NAME / #else / #endif — conditional compilation
|
||||
// #pragma — compiler hints
|
||||
//
|
||||
// #command/#translate (used by hbclass.ch) is NOT implemented yet.
|
||||
// Five handles CLASS syntax natively in the parser, so hbclass.ch
|
||||
// is not strictly required. But #include is needed for user headers.
|
||||
package pp
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// Preprocessor processes source code before lexing.
|
||||
type Preprocessor struct {
|
||||
defines map[string]string // #define name → value
|
||||
includeDirs []string // search paths for #include
|
||||
included map[string]bool // prevent circular inclusion
|
||||
commands []*Rule // #command rules
|
||||
translates []*Rule // #translate rules
|
||||
errors []string
|
||||
GoDumps []string // collected #pragma BEGINDUMP Go code blocks
|
||||
}
|
||||
|
||||
// New creates a new Preprocessor.
|
||||
func New() *Preprocessor {
|
||||
pp := &Preprocessor{
|
||||
defines: make(map[string]string),
|
||||
included: make(map[string]bool),
|
||||
}
|
||||
pp.addStdRules()
|
||||
return pp
|
||||
}
|
||||
|
||||
// addStdRules registers built-in #command rules equivalent to Harbour's std.ch.
|
||||
func (pp *Preprocessor) addStdRules() {
|
||||
stdCommands := []string{
|
||||
// MENU TO
|
||||
`MENU TO <var> => <var> := __MenuTo(<var>)`,
|
||||
// CLEAR GETS
|
||||
`CLEAR GETS => GetList := {}`,
|
||||
// Note: @ SAY, @ GET, @ PROMPT, READ are handled by the parser directly.
|
||||
// @ PROMPT rules removed — parser handles them with proper token parsing.
|
||||
}
|
||||
for _, cmd := range stdCommands {
|
||||
if rule := ParseRule(cmd, true, false); rule != nil {
|
||||
pp.commands = append(pp.commands, rule)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// AddIncludeDir adds a directory to search for #include files.
|
||||
func (pp *Preprocessor) AddIncludeDir(dir string) {
|
||||
pp.includeDirs = append(pp.includeDirs, dir)
|
||||
}
|
||||
|
||||
// Define adds a #define.
|
||||
func (pp *Preprocessor) Define(name, value string) {
|
||||
pp.defines[name] = value
|
||||
}
|
||||
|
||||
// Process preprocesses the source code, resolving #include and #define.
|
||||
func (pp *Preprocessor) Process(filename, source string) (string, []string) {
|
||||
pp.errors = nil
|
||||
result := pp.processLines(filename, source, 0)
|
||||
return result, pp.errors
|
||||
}
|
||||
|
||||
func (pp *Preprocessor) processLines(filename, source string, depth int) string {
|
||||
if depth > 20 {
|
||||
pp.errors = append(pp.errors, fmt.Sprintf("%s: #include depth exceeded (max 20)", filename))
|
||||
return source
|
||||
}
|
||||
|
||||
lines := strings.Split(source, "\n")
|
||||
var result []string
|
||||
var ifStack []bool // true = active section, false = skipping
|
||||
active := true
|
||||
inBlockComment := false // track multi-line /* */ comments
|
||||
inPragmaDump := false // track #pragma BEGINDUMP ... ENDDUMP
|
||||
var dumpLines []string // accumulate Go code lines
|
||||
|
||||
for i, line := range lines {
|
||||
// Handle #pragma BEGINDUMP ... ENDDUMP (inline Go code blocks)
|
||||
if inPragmaDump {
|
||||
trimCheck := strings.TrimSpace(line)
|
||||
if strings.HasPrefix(trimCheck, "#") {
|
||||
dir := strings.TrimSpace(strings.TrimPrefix(trimCheck, "#"))
|
||||
if strings.HasPrefix(strings.ToUpper(dir), "PRAGMA ") && strings.Contains(strings.ToUpper(dir), "ENDDUMP") {
|
||||
inPragmaDump = false
|
||||
pp.GoDumps = append(pp.GoDumps, strings.Join(dumpLines, "\n"))
|
||||
dumpLines = nil
|
||||
result = append(result, fmt.Sprintf("FIVE_GODUMP__ %d", len(pp.GoDumps)-1))
|
||||
continue
|
||||
}
|
||||
}
|
||||
dumpLines = append(dumpLines, line)
|
||||
result = append(result, "") // blank out for line counting
|
||||
continue
|
||||
}
|
||||
trimmed := strings.TrimSpace(line)
|
||||
|
||||
// Handle multi-line block comments
|
||||
if inBlockComment {
|
||||
if idx := strings.Index(line, "*/"); idx >= 0 {
|
||||
inBlockComment = false
|
||||
line = line[idx+2:] // keep content after */
|
||||
trimmed = strings.TrimSpace(line)
|
||||
if trimmed == "" {
|
||||
result = append(result, "")
|
||||
continue
|
||||
}
|
||||
} else {
|
||||
result = append(result, "") // blank out comment lines
|
||||
continue
|
||||
}
|
||||
}
|
||||
// Strip block comments within a single line and detect opening /*
|
||||
line = stripBlockComments(line, &inBlockComment)
|
||||
trimmed = strings.TrimSpace(line)
|
||||
|
||||
// Check if in active section
|
||||
if len(ifStack) > 0 {
|
||||
active = ifStack[len(ifStack)-1]
|
||||
} else {
|
||||
active = true
|
||||
}
|
||||
|
||||
// Preprocessor directives (always processed regardless of active state)
|
||||
if strings.HasPrefix(trimmed, "#") {
|
||||
directive := strings.TrimPrefix(trimmed, "#")
|
||||
directive = strings.TrimSpace(directive)
|
||||
|
||||
// Detect #pragma BEGINDUMP
|
||||
upperDir := strings.ToUpper(directive)
|
||||
if strings.HasPrefix(upperDir, "PRAGMA ") && strings.Contains(upperDir, "BEGINDUMP") {
|
||||
inPragmaDump = true
|
||||
dumpLines = nil
|
||||
result = append(result, "")
|
||||
continue
|
||||
}
|
||||
|
||||
if pp.handleConditional(directive, &ifStack, active) {
|
||||
continue
|
||||
}
|
||||
|
||||
if !active {
|
||||
continue // skip non-conditional directives in inactive sections
|
||||
}
|
||||
|
||||
if pp.handleDirective(filename, directive, depth, &result, i+1) {
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
if !active {
|
||||
continue // skip lines in inactive #ifdef sections
|
||||
}
|
||||
|
||||
// Apply #command/#translate rules
|
||||
if len(pp.commands) > 0 || len(pp.translates) > 0 {
|
||||
line = pp.applyRules(line)
|
||||
}
|
||||
|
||||
// Apply #define substitutions
|
||||
if len(pp.defines) > 0 {
|
||||
line = pp.applyDefines(line)
|
||||
}
|
||||
|
||||
result = append(result, line)
|
||||
}
|
||||
|
||||
if len(ifStack) > 0 {
|
||||
pp.errors = append(pp.errors, fmt.Sprintf("%s: unterminated #ifdef/#ifndef", filename))
|
||||
}
|
||||
|
||||
return strings.Join(result, "\n")
|
||||
}
|
||||
|
||||
// handleConditional processes #ifdef, #ifndef, #else, #endif.
|
||||
// Returns true if the line was a conditional directive.
|
||||
func (pp *Preprocessor) handleConditional(directive string, ifStack *[]bool, active bool) bool {
|
||||
upper := strings.ToUpper(directive)
|
||||
|
||||
if strings.HasPrefix(upper, "IFDEF ") {
|
||||
name := strings.TrimSpace(directive[6:])
|
||||
_, defined := pp.defines[name]
|
||||
*ifStack = append(*ifStack, defined && active)
|
||||
return true
|
||||
}
|
||||
|
||||
if strings.HasPrefix(upper, "IFNDEF ") {
|
||||
name := strings.TrimSpace(directive[7:])
|
||||
_, defined := pp.defines[name]
|
||||
*ifStack = append(*ifStack, !defined && active)
|
||||
return true
|
||||
}
|
||||
|
||||
// #if expr — simplified: support #if 0 (always false), #if 1 (always true),
|
||||
// and #if __pragma(...) (treat as false for compatibility)
|
||||
if strings.HasPrefix(upper, "IF ") || upper == "IF" {
|
||||
rest := strings.TrimSpace(directive[2:])
|
||||
val := false
|
||||
if rest == "1" || rest == ".T." {
|
||||
val = true
|
||||
} else if rest == "0" || rest == ".F." {
|
||||
val = false
|
||||
} else {
|
||||
// Unknown expression — default to false (conservative)
|
||||
val = false
|
||||
}
|
||||
*ifStack = append(*ifStack, val && active)
|
||||
return true
|
||||
}
|
||||
|
||||
// #else — may have trailing comment
|
||||
if upper == "ELSE" || strings.HasPrefix(upper, "ELSE ") || strings.HasPrefix(upper, "ELSE\t") {
|
||||
if len(*ifStack) > 0 {
|
||||
// Flip the top of stack (only if parent was active)
|
||||
parentActive := true
|
||||
if len(*ifStack) > 1 {
|
||||
parentActive = (*ifStack)[len(*ifStack)-2]
|
||||
}
|
||||
(*ifStack)[len(*ifStack)-1] = !(*ifStack)[len(*ifStack)-1] && parentActive
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// #endif — may have trailing comment: #endif /* COMMENT */
|
||||
stripped := strings.TrimSpace(upper)
|
||||
if idx := strings.Index(stripped, " "); idx > 0 {
|
||||
stripped = stripped[:idx]
|
||||
}
|
||||
if idx := strings.Index(stripped, "\t"); idx > 0 {
|
||||
stripped = stripped[:idx]
|
||||
}
|
||||
if stripped == "ENDIF" {
|
||||
if len(*ifStack) > 0 {
|
||||
*ifStack = (*ifStack)[:len(*ifStack)-1]
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
// handleDirective processes non-conditional directives.
|
||||
func (pp *Preprocessor) handleDirective(filename, directive string, depth int, result *[]string, lineNo int) bool {
|
||||
upper := strings.ToUpper(directive)
|
||||
|
||||
// #include "file" or #include <file>
|
||||
if strings.HasPrefix(upper, "INCLUDE ") {
|
||||
rest := strings.TrimSpace(directive[8:])
|
||||
inclFile := pp.extractIncludeFile(rest)
|
||||
if inclFile == "" {
|
||||
pp.errors = append(pp.errors, fmt.Sprintf("%s:%d: invalid #include", filename, lineNo))
|
||||
return true
|
||||
}
|
||||
|
||||
content := pp.resolveInclude(filename, inclFile)
|
||||
if content == "" {
|
||||
// Not found — not an error for Five (some .ch files are optional)
|
||||
*result = append(*result, fmt.Sprintf("// #include %q — not found (skipped)", inclFile))
|
||||
return true
|
||||
}
|
||||
|
||||
// Process included content recursively
|
||||
processed := pp.processLines(inclFile, content, depth+1)
|
||||
*result = append(*result, strings.Split(processed, "\n")...)
|
||||
return true
|
||||
}
|
||||
|
||||
// #define NAME [VALUE]
|
||||
if strings.HasPrefix(upper, "DEFINE ") {
|
||||
rest := strings.TrimSpace(directive[7:])
|
||||
// Detect function-like macro: #define NAME( params ) body
|
||||
// For now, skip these (don't register as simple text substitution)
|
||||
if idx := strings.IndexByte(rest, '('); idx > 0 && idx < strings.IndexAny(rest+" ", " \t") {
|
||||
// Function-like macro — not yet supported, skip
|
||||
return true
|
||||
}
|
||||
parts := strings.SplitN(rest, " ", 2)
|
||||
name := parts[0]
|
||||
value := ""
|
||||
if len(parts) > 1 {
|
||||
value = strings.TrimSpace(parts[1])
|
||||
}
|
||||
// Strip trailing // comment and /* */ comment from value
|
||||
if idx := strings.Index(value, "//"); idx >= 0 {
|
||||
// Make sure // is not inside a string literal
|
||||
inStr := false
|
||||
for i := 0; i < idx; i++ {
|
||||
if value[i] == '"' || value[i] == '\'' {
|
||||
inStr = !inStr
|
||||
}
|
||||
}
|
||||
if !inStr {
|
||||
value = strings.TrimSpace(value[:idx])
|
||||
}
|
||||
}
|
||||
if idx := strings.Index(value, "/*"); idx >= 0 {
|
||||
value = strings.TrimSpace(value[:idx])
|
||||
}
|
||||
pp.defines[name] = value
|
||||
return true
|
||||
}
|
||||
|
||||
// #undef NAME
|
||||
if strings.HasPrefix(upper, "UNDEF ") {
|
||||
name := strings.TrimSpace(directive[6:])
|
||||
delete(pp.defines, name)
|
||||
return true
|
||||
}
|
||||
|
||||
// #pragma — just pass through as comment
|
||||
if strings.HasPrefix(upper, "PRAGMA ") {
|
||||
*result = append(*result, "// "+directive)
|
||||
return true
|
||||
}
|
||||
// #warning, #error, #stdout — skip (emit as comment)
|
||||
if strings.HasPrefix(upper, "WARNING") || strings.HasPrefix(upper, "ERROR") || strings.HasPrefix(upper, "STDOUT") {
|
||||
*result = append(*result, "// #"+directive)
|
||||
return true
|
||||
}
|
||||
|
||||
// #command / #translate — parse and store rules
|
||||
if strings.HasPrefix(upper, "COMMAND ") {
|
||||
if rule := ParseRule(directive[8:], true, false); rule != nil {
|
||||
pp.commands = append(pp.commands, rule)
|
||||
}
|
||||
return true
|
||||
}
|
||||
if strings.HasPrefix(upper, "TRANSLATE ") {
|
||||
if rule := ParseRule(directive[10:], false, false); rule != nil {
|
||||
pp.translates = append(pp.translates, rule)
|
||||
}
|
||||
return true
|
||||
}
|
||||
if strings.HasPrefix(upper, "XCOMMAND ") {
|
||||
if rule := ParseRule(directive[9:], true, true); rule != nil {
|
||||
pp.commands = append(pp.commands, rule)
|
||||
}
|
||||
return true
|
||||
}
|
||||
if strings.HasPrefix(upper, "XTRANSLATE ") {
|
||||
if rule := ParseRule(directive[11:], false, true); rule != nil {
|
||||
pp.translates = append(pp.translates, rule)
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
// extractIncludeFile gets the filename from #include "file" or #include <file>
|
||||
func (pp *Preprocessor) extractIncludeFile(s string) string {
|
||||
s = strings.TrimSpace(s)
|
||||
if len(s) >= 2 {
|
||||
if (s[0] == '"' && s[len(s)-1] == '"') || (s[0] == '<' && s[len(s)-1] == '>') {
|
||||
return s[1 : len(s)-1]
|
||||
}
|
||||
}
|
||||
return s // bare filename
|
||||
}
|
||||
|
||||
// resolveInclude searches for an include file and returns its content.
|
||||
func (pp *Preprocessor) resolveInclude(currentFile, inclFile string) string {
|
||||
// Prevent circular inclusion
|
||||
absKey := inclFile
|
||||
if pp.included[absKey] {
|
||||
return ""
|
||||
}
|
||||
pp.included[absKey] = true
|
||||
defer func() { delete(pp.included, absKey) }()
|
||||
|
||||
// Search order:
|
||||
// 1. Relative to current file
|
||||
// 2. Include directories
|
||||
// 3. Harbour include dir (for hbclass.ch etc.)
|
||||
|
||||
searchPaths := []string{}
|
||||
|
||||
// Relative to current file
|
||||
if currentFile != "" {
|
||||
dir := filepath.Dir(currentFile)
|
||||
searchPaths = append(searchPaths, filepath.Join(dir, inclFile))
|
||||
}
|
||||
|
||||
// Include directories
|
||||
for _, dir := range pp.includeDirs {
|
||||
searchPaths = append(searchPaths, filepath.Join(dir, inclFile))
|
||||
}
|
||||
|
||||
// Try each path
|
||||
for _, path := range searchPaths {
|
||||
data, err := os.ReadFile(path)
|
||||
if err == nil {
|
||||
return string(data)
|
||||
}
|
||||
}
|
||||
|
||||
return ""
|
||||
}
|
||||
|
||||
// applyRules applies #command and #translate rules to a line.
|
||||
// #command rules are tried first (they match complete statements).
|
||||
// #translate rules are tried on any part of a line.
|
||||
func (pp *Preprocessor) applyRules(line string) string {
|
||||
trimmed := strings.TrimSpace(line)
|
||||
if trimmed == "" || strings.HasPrefix(trimmed, "//") {
|
||||
return line
|
||||
}
|
||||
|
||||
// Try #command rules (match from start of line)
|
||||
for _, rule := range pp.commands {
|
||||
if result, ok := rule.MatchLine(trimmed); ok {
|
||||
// Preserve leading whitespace
|
||||
indent := line[:len(line)-len(strings.TrimLeft(line, " \t"))]
|
||||
return indent + result
|
||||
}
|
||||
}
|
||||
|
||||
// Try #translate rules (can match substrings)
|
||||
for _, rule := range pp.translates {
|
||||
if result, ok := rule.MatchLine(trimmed); ok {
|
||||
indent := line[:len(line)-len(strings.TrimLeft(line, " \t"))]
|
||||
return indent + result
|
||||
}
|
||||
}
|
||||
|
||||
return line
|
||||
}
|
||||
|
||||
// stripBlockComments removes /* ... */ comments from a line.
|
||||
// If a /* is found without closing */, sets inBlock to true.
|
||||
func stripBlockComments(line string, inBlock *bool) string {
|
||||
var out strings.Builder
|
||||
i := 0
|
||||
inStr := byte(0)
|
||||
for i < len(line) {
|
||||
// Track string literals
|
||||
if inStr == 0 && (line[i] == '"' || line[i] == '\'') {
|
||||
inStr = line[i]
|
||||
out.WriteByte(line[i])
|
||||
i++
|
||||
continue
|
||||
}
|
||||
if inStr != 0 {
|
||||
if line[i] == inStr {
|
||||
inStr = 0
|
||||
}
|
||||
out.WriteByte(line[i])
|
||||
i++
|
||||
continue
|
||||
}
|
||||
// Block comment start
|
||||
if i+1 < len(line) && line[i] == '/' && line[i+1] == '*' {
|
||||
// Find closing */
|
||||
end := strings.Index(line[i+2:], "*/")
|
||||
if end >= 0 {
|
||||
i = i + 2 + end + 2 // skip past */
|
||||
out.WriteByte(' ') // replace comment with space
|
||||
} else {
|
||||
*inBlock = true
|
||||
return out.String() // rest of line is comment
|
||||
}
|
||||
continue
|
||||
}
|
||||
out.WriteByte(line[i])
|
||||
i++
|
||||
}
|
||||
return out.String()
|
||||
}
|
||||
|
||||
// applyDefines substitutes #define macros in a line.
|
||||
// Simple word-boundary replacement (not full macro expansion).
|
||||
func (pp *Preprocessor) applyDefines(line string) string {
|
||||
for name, value := range pp.defines {
|
||||
if value == "" {
|
||||
continue // flag-only define, no substitution
|
||||
}
|
||||
// Simple word replacement (not inside strings)
|
||||
line = replaceWord(line, name, value)
|
||||
}
|
||||
return line
|
||||
}
|
||||
|
||||
// replaceWord replaces whole-word occurrences of old with new,
|
||||
// avoiding replacements inside string literals.
|
||||
func replaceWord(line, old, new string) string {
|
||||
if !strings.Contains(line, old) {
|
||||
return line
|
||||
}
|
||||
|
||||
var result strings.Builder
|
||||
inString := byte(0)
|
||||
i := 0
|
||||
|
||||
for i < len(line) {
|
||||
// Track string literals
|
||||
if inString == 0 && (line[i] == '"' || line[i] == '\'') {
|
||||
inString = line[i]
|
||||
result.WriteByte(line[i])
|
||||
i++
|
||||
continue
|
||||
}
|
||||
if inString != 0 && line[i] == inString {
|
||||
inString = 0
|
||||
result.WriteByte(line[i])
|
||||
i++
|
||||
continue
|
||||
}
|
||||
if inString != 0 {
|
||||
result.WriteByte(line[i])
|
||||
i++
|
||||
continue
|
||||
}
|
||||
|
||||
// Check for word match
|
||||
if i+len(old) <= len(line) && line[i:i+len(old)] == old {
|
||||
// Check word boundaries
|
||||
before := i == 0 || !isWordChar(line[i-1])
|
||||
after := i+len(old) >= len(line) || !isWordChar(line[i+len(old)])
|
||||
if before && after {
|
||||
result.WriteString(new)
|
||||
i += len(old)
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
result.WriteByte(line[i])
|
||||
i++
|
||||
}
|
||||
|
||||
return result.String()
|
||||
}
|
||||
|
||||
func isWordChar(c byte) bool {
|
||||
return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') || c == '_'
|
||||
}
|
||||
264
compiler/pp/pp_test.go
Normal file
264
compiler/pp/pp_test.go
Normal file
@@ -0,0 +1,264 @@
|
||||
// Copyright (c) 2026 Charles KWON OhJun (charleskwonohjun@gmail.com)
|
||||
// All rights reserved.
|
||||
|
||||
package pp
|
||||
|
||||
import (
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestDefine(t *testing.T) {
|
||||
p := New()
|
||||
src := `#define VERSION "1.0"
|
||||
? VERSION`
|
||||
|
||||
result, errs := p.Process("test.prg", src)
|
||||
if len(errs) > 0 {
|
||||
t.Fatal(errs)
|
||||
}
|
||||
if !strings.Contains(result, `"1.0"`) {
|
||||
t.Errorf("define not substituted: %q", result)
|
||||
}
|
||||
}
|
||||
|
||||
func TestDefineFlag(t *testing.T) {
|
||||
p := New()
|
||||
src := `#define DEBUG
|
||||
#ifdef DEBUG
|
||||
? "Debug mode"
|
||||
#else
|
||||
? "Release mode"
|
||||
#endif`
|
||||
|
||||
result, errs := p.Process("test.prg", src)
|
||||
if len(errs) > 0 {
|
||||
t.Fatal(errs)
|
||||
}
|
||||
if !strings.Contains(result, "Debug mode") {
|
||||
t.Error("ifdef DEBUG should include Debug mode")
|
||||
}
|
||||
if strings.Contains(result, "Release mode") {
|
||||
t.Error("should NOT include Release mode")
|
||||
}
|
||||
}
|
||||
|
||||
func TestIfndef(t *testing.T) {
|
||||
p := New()
|
||||
src := `#ifndef RELEASE
|
||||
? "Not release"
|
||||
#else
|
||||
? "Release"
|
||||
#endif`
|
||||
|
||||
result, _ := p.Process("test.prg", src)
|
||||
if !strings.Contains(result, "Not release") {
|
||||
t.Error("ifndef should include 'Not release'")
|
||||
}
|
||||
}
|
||||
|
||||
func TestNestedIfdef(t *testing.T) {
|
||||
p := New()
|
||||
p.Define("A", "")
|
||||
src := `#ifdef A
|
||||
? "A is defined"
|
||||
#ifdef B
|
||||
? "B is defined"
|
||||
#else
|
||||
? "B is not defined"
|
||||
#endif
|
||||
#endif`
|
||||
|
||||
result, _ := p.Process("test.prg", src)
|
||||
if !strings.Contains(result, "A is defined") {
|
||||
t.Error("A should be defined")
|
||||
}
|
||||
if !strings.Contains(result, "B is not defined") {
|
||||
t.Error("B should not be defined")
|
||||
}
|
||||
if strings.Contains(result, "B is defined") {
|
||||
t.Error("B should NOT appear as defined")
|
||||
}
|
||||
}
|
||||
|
||||
func TestUndef(t *testing.T) {
|
||||
p := New()
|
||||
src := `#define FOO "bar"
|
||||
? FOO
|
||||
#undef FOO
|
||||
? FOO`
|
||||
|
||||
result, _ := p.Process("test.prg", src)
|
||||
lines := strings.Split(result, "\n")
|
||||
// First ? should have "bar", second should still have FOO (not substituted)
|
||||
found := 0
|
||||
for _, l := range lines {
|
||||
l = strings.TrimSpace(l)
|
||||
if strings.Contains(l, `"bar"`) {
|
||||
found++
|
||||
}
|
||||
}
|
||||
if found != 1 {
|
||||
t.Errorf("expected FOO substituted once, found %d times", found)
|
||||
}
|
||||
}
|
||||
|
||||
func TestInclude(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
|
||||
// Create header file
|
||||
headerContent := `#define APP_NAME "Five Test"
|
||||
#define APP_VERSION "1.0"`
|
||||
os.WriteFile(filepath.Join(dir, "myapp.ch"), []byte(headerContent), 0644)
|
||||
|
||||
// Create main file
|
||||
src := `#include "myapp.ch"
|
||||
? APP_NAME
|
||||
? APP_VERSION`
|
||||
|
||||
p := New()
|
||||
p.AddIncludeDir(dir)
|
||||
result, errs := p.Process(filepath.Join(dir, "main.prg"), src)
|
||||
if len(errs) > 0 {
|
||||
t.Fatal(errs)
|
||||
}
|
||||
if !strings.Contains(result, `"Five Test"`) {
|
||||
t.Errorf("APP_NAME not substituted: %q", result)
|
||||
}
|
||||
if !strings.Contains(result, `"1.0"`) {
|
||||
t.Error("APP_VERSION not substituted")
|
||||
}
|
||||
}
|
||||
|
||||
func TestIncludeNested(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
|
||||
// base.ch includes sub.ch
|
||||
os.WriteFile(filepath.Join(dir, "sub.ch"), []byte(`#define SUB_VAL 42`), 0644)
|
||||
os.WriteFile(filepath.Join(dir, "base.ch"), []byte(`#include "sub.ch"
|
||||
#define BASE_VAL 100`), 0644)
|
||||
|
||||
src := `#include "base.ch"
|
||||
? SUB_VAL
|
||||
? BASE_VAL`
|
||||
|
||||
p := New()
|
||||
p.AddIncludeDir(dir)
|
||||
result, _ := p.Process(filepath.Join(dir, "main.prg"), src)
|
||||
if !strings.Contains(result, "42") {
|
||||
t.Error("SUB_VAL from nested include should be 42")
|
||||
}
|
||||
if !strings.Contains(result, "100") {
|
||||
t.Error("BASE_VAL should be 100")
|
||||
}
|
||||
}
|
||||
|
||||
func TestIncludeGuard(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
|
||||
// Header with include guard
|
||||
header := `#ifndef _MYHEADER_CH
|
||||
#define _MYHEADER_CH
|
||||
#define MY_CONST 999
|
||||
#endif`
|
||||
os.WriteFile(filepath.Join(dir, "myheader.ch"), []byte(header), 0644)
|
||||
|
||||
// Include twice — should work (guard prevents double processing)
|
||||
src := `#include "myheader.ch"
|
||||
#include "myheader.ch"
|
||||
? MY_CONST`
|
||||
|
||||
p := New()
|
||||
p.AddIncludeDir(dir)
|
||||
result, _ := p.Process(filepath.Join(dir, "main.prg"), src)
|
||||
if !strings.Contains(result, "999") {
|
||||
t.Error("MY_CONST should be 999")
|
||||
}
|
||||
}
|
||||
|
||||
func TestHbclassChHandled(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
|
||||
// Simulate hbclass.ch — #command CLASS maps to comments (Five handles natively)
|
||||
hbclass := `#ifndef HB_CLASS_CH_
|
||||
#define HB_CLASS_CH_
|
||||
#command CLASS <name> => // class <name> handled natively
|
||||
#endif`
|
||||
os.WriteFile(filepath.Join(dir, "hbclass.ch"), []byte(hbclass), 0644)
|
||||
|
||||
src := `#include "hbclass.ch"
|
||||
|
||||
CLASS Person
|
||||
|
||||
FUNCTION Main()
|
||||
? "OK"
|
||||
RETURN NIL`
|
||||
|
||||
p := New()
|
||||
p.AddIncludeDir(dir)
|
||||
result, errs := p.Process(filepath.Join(dir, "main.prg"), src)
|
||||
if len(errs) > 0 {
|
||||
t.Fatal(errs)
|
||||
}
|
||||
// #command directives themselves should be removed
|
||||
if strings.Contains(result, "#command") {
|
||||
t.Error("preprocessor directives should be removed")
|
||||
}
|
||||
// CLASS Person should be expanded by #command rule
|
||||
if !strings.Contains(result, "Person") {
|
||||
t.Error("Person should appear in output")
|
||||
}
|
||||
// FUNCTION should still be there
|
||||
if !strings.Contains(result, "FUNCTION Main") {
|
||||
t.Error("FUNCTION Main should pass through")
|
||||
}
|
||||
}
|
||||
|
||||
func TestDefineInString(t *testing.T) {
|
||||
p := New()
|
||||
src := `#define FOO bar
|
||||
? "FOO should not change"
|
||||
? FOO`
|
||||
|
||||
result, _ := p.Process("test.prg", src)
|
||||
if !strings.Contains(result, `"FOO should not change"`) {
|
||||
t.Error("define should not replace inside strings")
|
||||
}
|
||||
// Outside string should be replaced
|
||||
lines := strings.Split(result, "\n")
|
||||
for _, l := range lines {
|
||||
l = strings.TrimSpace(l)
|
||||
if l == "? bar" {
|
||||
return // found replacement outside string
|
||||
}
|
||||
}
|
||||
t.Error("FOO should be replaced to bar outside strings")
|
||||
}
|
||||
|
||||
func TestPragma(t *testing.T) {
|
||||
p := New()
|
||||
src := `#pragma compatibility(harbour)
|
||||
? "test"`
|
||||
|
||||
result, _ := p.Process("test.prg", src)
|
||||
if !strings.Contains(result, "// pragma") || !strings.Contains(result, "compatibility") {
|
||||
t.Error("pragma should be converted to comment")
|
||||
}
|
||||
}
|
||||
|
||||
func TestMissingInclude(t *testing.T) {
|
||||
p := New()
|
||||
src := `#include "nonexistent.ch"
|
||||
? "still works"`
|
||||
|
||||
result, _ := p.Process("test.prg", src)
|
||||
// Missing include should not crash, just skip with comment
|
||||
if !strings.Contains(result, "not found") {
|
||||
t.Error("missing include should produce a comment")
|
||||
}
|
||||
if !strings.Contains(result, "still works") {
|
||||
t.Error("code after missing include should continue")
|
||||
}
|
||||
}
|
||||
536
compiler/token/token.go
Normal file
536
compiler/token/token.go
Normal file
@@ -0,0 +1,536 @@
|
||||
// Copyright (c) 2026 Charles KWON OhJun (charleskwonohjun@gmail.com)
|
||||
// All rights reserved.
|
||||
|
||||
// Token definitions for the Five (Harbour-compatible) language.
|
||||
// Pattern follows tsgo's Kind+Precedence approach
|
||||
// (ref/typescript-go/internal/ast/kind.go, precedence.go).
|
||||
package token
|
||||
|
||||
// Kind represents a token type. Using int16 following tsgo pattern.
|
||||
type Kind int16
|
||||
|
||||
const (
|
||||
// Special
|
||||
ILLEGAL Kind = iota
|
||||
EOF
|
||||
NEWLINE // statement terminator
|
||||
|
||||
// Literals
|
||||
INT // 42
|
||||
LONG // 42L or large integer
|
||||
DOUBLE // 3.14
|
||||
STRING // "hello" or 'hello'
|
||||
DATE_LIT // 0d20260327 or CTOD("20260327")
|
||||
TRUE // .T.
|
||||
FALSE // .F.
|
||||
NIL_LIT // NIL
|
||||
|
||||
// Identifiers
|
||||
IDENT // variable/function name
|
||||
|
||||
// Operators
|
||||
PLUS // +
|
||||
MINUS // -
|
||||
STAR // *
|
||||
SLASH // /
|
||||
PERCENT // %
|
||||
POWER // ** or ^
|
||||
ASSIGN // :=
|
||||
EQ // = or ==
|
||||
EXEQ // ==
|
||||
NEQ // != or <> or #
|
||||
LT // <
|
||||
GT // >
|
||||
LTE // <=
|
||||
GTE // >=
|
||||
DOLLAR // $ (string containment)
|
||||
AMPERSAND // & (macro)
|
||||
AT // @ (pass by ref)
|
||||
ARROW // -> (alias field access)
|
||||
DBLARROW // => (hash pair)
|
||||
COLONCOLON // :: (self access)
|
||||
COLON // : (send message)
|
||||
DOT // .
|
||||
INC // ++ (postfix)
|
||||
DEC // -- (postfix)
|
||||
PLUSEQ // +=
|
||||
MINUSEQ // -=
|
||||
STAREQ // *=
|
||||
SLASHEQ // /=
|
||||
PERCENTEQ // %=
|
||||
POWEREQ // **=
|
||||
|
||||
// Logical operators (keyword-style)
|
||||
AND // .AND.
|
||||
OR // .OR.
|
||||
NOT // .NOT. or !
|
||||
|
||||
// Delimiters
|
||||
LPAREN // (
|
||||
RPAREN // )
|
||||
LBRACKET // [
|
||||
RBRACKET // ]
|
||||
LBRACE // {
|
||||
RBRACE // }
|
||||
COMMA // ,
|
||||
SEMICOLON // ; (line continuation)
|
||||
PIPE // | (in code blocks {|x| ...})
|
||||
QMARK // ? (QOut shorthand)
|
||||
QQMARK // ?? (QQOut shorthand)
|
||||
|
||||
// Keywords — Declarations
|
||||
FUNCTION_KW
|
||||
PROCEDURE
|
||||
RETURN
|
||||
LOCAL
|
||||
STATIC
|
||||
PRIVATE
|
||||
PUBLIC
|
||||
FIELD
|
||||
MEMVAR
|
||||
PARAMETERS
|
||||
DECLARE
|
||||
|
||||
// Keywords — Control flow
|
||||
IF
|
||||
ELSEIF
|
||||
ELSE
|
||||
ENDIF
|
||||
DO
|
||||
WHILE
|
||||
ENDDO
|
||||
FOR
|
||||
TO
|
||||
STEP
|
||||
NEXT
|
||||
EACH
|
||||
IN
|
||||
EXIT
|
||||
LOOP
|
||||
SWITCH
|
||||
CASE
|
||||
OTHERWISE
|
||||
ENDSWITCH
|
||||
ENDCASE
|
||||
BEGIN
|
||||
SEQUENCE
|
||||
RECOVER
|
||||
USING
|
||||
END
|
||||
|
||||
// Keywords — OOP
|
||||
CLASS
|
||||
ENDCLASS
|
||||
DATA
|
||||
METHOD
|
||||
INHERIT
|
||||
FROM
|
||||
CONSTRUCTOR
|
||||
DESTRUCTOR
|
||||
INLINE_KW
|
||||
OPERATOR_KW
|
||||
ACCESS
|
||||
ASSIGN_KW
|
||||
|
||||
// Keywords — xBase commands
|
||||
USE
|
||||
ALIAS
|
||||
SELECT
|
||||
GO
|
||||
GOTO
|
||||
TOP
|
||||
BOTTOM
|
||||
SKIP_KW
|
||||
SEEK
|
||||
SOFTSEEK
|
||||
REPLACE
|
||||
WITH
|
||||
APPEND
|
||||
BLANK
|
||||
DELETE_KW
|
||||
RECALL
|
||||
PACK
|
||||
ZAP
|
||||
INDEX
|
||||
ON
|
||||
UNIQUE
|
||||
DESCENDING
|
||||
ASCENDING
|
||||
SET
|
||||
FILTER
|
||||
RELATION
|
||||
INTO
|
||||
ORDER
|
||||
|
||||
// Keywords — New Five extensions
|
||||
IMPORT
|
||||
GO_KW // GO (goroutine)
|
||||
CHANNEL
|
||||
SEND_KW
|
||||
RECEIVE
|
||||
WAITGROUP
|
||||
TYPE_KW // TYPE ... END TYPE
|
||||
AS
|
||||
DEFER_KW // DEFER expr (cleanup on function exit)
|
||||
CONST_KW // CONST ... END CONST (enum block)
|
||||
QUESTION_COLON // ?: nil-safe send
|
||||
WATCH_KW // WATCH ... CASE ... ENDWATCH (channel select)
|
||||
ASYNC_KW // ASYNC expr (launch async)
|
||||
AWAIT_KW // AWAIT expr (wait for result)
|
||||
PARALLEL_KW // PARALLEL FOR (parallel loop)
|
||||
ARROW_LEFT // <- (channel receive)
|
||||
TIMEOUT_KW // WITH TIMEOUT n
|
||||
SPAWN_KW // SPAWN { block } (goroutine)
|
||||
|
||||
// Keywords — Preprocessor
|
||||
PP_INCLUDE // #include
|
||||
PP_DEFINE // #define
|
||||
PP_UNDEF // #undef
|
||||
PP_IFDEF // #ifdef
|
||||
PP_IFNDEF // #ifndef
|
||||
PP_ELSE // #else
|
||||
PP_ENDIF // #endif
|
||||
PP_COMMAND // #command
|
||||
PP_TRANSLATE // #translate
|
||||
PP_PRAGMA // #pragma
|
||||
|
||||
// Internal
|
||||
_kindEnd
|
||||
)
|
||||
|
||||
// Token represents a single lexical token.
|
||||
type Token struct {
|
||||
Kind Kind
|
||||
Literal string // raw text
|
||||
Pos Position
|
||||
}
|
||||
|
||||
// Position in source file.
|
||||
type Position struct {
|
||||
File string
|
||||
Line int
|
||||
Col int
|
||||
Offset int // byte offset from start of source
|
||||
}
|
||||
|
||||
func (p Position) String() string {
|
||||
if p.File != "" {
|
||||
return p.File + ":" + itoa(p.Line) + ":" + itoa(p.Col)
|
||||
}
|
||||
return itoa(p.Line) + ":" + itoa(p.Col)
|
||||
}
|
||||
|
||||
// simple int-to-string without importing strconv
|
||||
func itoa(n int) string {
|
||||
if n == 0 {
|
||||
return "0"
|
||||
}
|
||||
buf := [20]byte{}
|
||||
i := len(buf) - 1
|
||||
neg := n < 0
|
||||
if neg {
|
||||
n = -n
|
||||
}
|
||||
for n > 0 {
|
||||
buf[i] = byte('0' + n%10)
|
||||
i--
|
||||
n /= 10
|
||||
}
|
||||
if neg {
|
||||
buf[i] = '-'
|
||||
i--
|
||||
}
|
||||
return string(buf[i+1:])
|
||||
}
|
||||
|
||||
// --- Operator Precedence (tsgo pattern) ---
|
||||
|
||||
type Precedence int
|
||||
|
||||
const (
|
||||
PrecNone Precedence = iota
|
||||
PrecAssign // :=, +=, -=, ...
|
||||
PrecOr // .OR.
|
||||
PrecAnd // .AND.
|
||||
PrecNot // .NOT., !
|
||||
PrecComparison // =, ==, !=, <, >, <=, >=, $
|
||||
PrecAddition // +, -
|
||||
PrecMultiply // *, /, %
|
||||
PrecPower // **, ^
|
||||
PrecUnary // -, !, .NOT., ++, --
|
||||
PrecPostfix // ++, --, [], ()
|
||||
PrecCall // function(), obj:method()
|
||||
PrecPrimary // literals, identifiers, (expr)
|
||||
)
|
||||
|
||||
// GetBinaryPrecedence returns the precedence of a binary operator token.
|
||||
// Returns PrecNone if not a binary operator.
|
||||
// Pattern: tsgo GetBinaryOperatorPrecedence (ref/typescript-go/internal/ast/precedence.go:338)
|
||||
func GetBinaryPrecedence(kind Kind) Precedence {
|
||||
switch kind {
|
||||
case ASSIGN, PLUSEQ, MINUSEQ, STAREQ, SLASHEQ, PERCENTEQ, POWEREQ:
|
||||
return PrecAssign
|
||||
case OR:
|
||||
return PrecOr
|
||||
case AND:
|
||||
return PrecAnd
|
||||
case EQ, EXEQ, NEQ, LT, GT, LTE, GTE, DOLLAR:
|
||||
return PrecComparison
|
||||
case PLUS, MINUS:
|
||||
return PrecAddition
|
||||
case STAR, SLASH, PERCENT:
|
||||
return PrecMultiply
|
||||
case POWER:
|
||||
return PrecPower
|
||||
default:
|
||||
return PrecNone
|
||||
}
|
||||
}
|
||||
|
||||
// IsRightAssociative returns true for right-to-left operators.
|
||||
func IsRightAssociative(kind Kind) bool {
|
||||
switch kind {
|
||||
case POWER, ASSIGN, PLUSEQ, MINUSEQ, STAREQ, SLASHEQ, PERCENTEQ, POWEREQ:
|
||||
return true
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
// --- Keyword lookup ---
|
||||
|
||||
var keywords map[string]Kind
|
||||
|
||||
func init() {
|
||||
keywords = map[string]Kind{
|
||||
"FUNCTION": FUNCTION_KW,
|
||||
"PROCEDURE": PROCEDURE,
|
||||
"RETURN": RETURN,
|
||||
"LOCAL": LOCAL,
|
||||
"STATIC": STATIC,
|
||||
"PRIVATE": PRIVATE,
|
||||
"PUBLIC": PUBLIC,
|
||||
"FIELD": FIELD,
|
||||
"MEMVAR": MEMVAR,
|
||||
"PARAMETERS": PARAMETERS,
|
||||
"DECLARE": DECLARE,
|
||||
"IF": IF,
|
||||
"ELSEIF": ELSEIF,
|
||||
"ELSE": ELSE,
|
||||
"ENDIF": ENDIF,
|
||||
"DO": DO,
|
||||
"WHILE": WHILE,
|
||||
"ENDDO": ENDDO,
|
||||
"FOR": FOR,
|
||||
"TO": TO,
|
||||
"STEP": STEP,
|
||||
"NEXT": NEXT,
|
||||
"EACH": EACH,
|
||||
"IN": IN,
|
||||
"EXIT": EXIT,
|
||||
"LOOP": LOOP,
|
||||
"SWITCH": SWITCH,
|
||||
"CASE": CASE,
|
||||
"OTHERWISE": OTHERWISE,
|
||||
"ENDSWITCH": ENDSWITCH,
|
||||
"ENDCASE": ENDCASE,
|
||||
"BEGIN": BEGIN,
|
||||
"SEQUENCE": SEQUENCE,
|
||||
"RECOVER": RECOVER,
|
||||
"USING": USING,
|
||||
"END": END,
|
||||
"CLASS": CLASS,
|
||||
"ENDCLASS": ENDCLASS,
|
||||
"DATA": DATA,
|
||||
// METHOD: recognized as keyword (used at top level too: METHOD name CLASS classname)
|
||||
"METHOD": METHOD,
|
||||
"INHERIT": INHERIT,
|
||||
"FROM": FROM,
|
||||
"CONSTRUCTOR": CONSTRUCTOR,
|
||||
"DESTRUCTOR": DESTRUCTOR,
|
||||
"INLINE": INLINE_KW,
|
||||
"OPERATOR": OPERATOR_KW,
|
||||
"ACCESS": ACCESS,
|
||||
"ASSIGN": ASSIGN_KW,
|
||||
"USE": USE,
|
||||
"ALIAS": ALIAS,
|
||||
"SELECT": SELECT,
|
||||
"GO": GO,
|
||||
"GOTO": GOTO,
|
||||
"TOP": TOP,
|
||||
"BOTTOM": BOTTOM,
|
||||
"SKIP": SKIP_KW,
|
||||
"SEEK": SEEK,
|
||||
"SOFTSEEK": SOFTSEEK,
|
||||
"REPLACE": REPLACE,
|
||||
"WITH": WITH,
|
||||
"APPEND": APPEND,
|
||||
"BLANK": BLANK,
|
||||
"DELETE": DELETE_KW,
|
||||
"RECALL": RECALL,
|
||||
"PACK": PACK,
|
||||
"ZAP": ZAP,
|
||||
"INDEX": INDEX,
|
||||
"ON": ON,
|
||||
"UNIQUE": UNIQUE,
|
||||
"DESCENDING": DESCENDING,
|
||||
"ASCENDING": ASCENDING,
|
||||
"SET": SET,
|
||||
"FILTER": FILTER,
|
||||
"RELATION": RELATION,
|
||||
"INTO": INTO,
|
||||
"ORDER": ORDER,
|
||||
"IMPORT": IMPORT,
|
||||
// CHANNEL, SEND, RECEIVE, WAITGROUP — now RTL functions, not keywords
|
||||
"TYPE": TYPE_KW,
|
||||
"AS": AS,
|
||||
"DEFER": DEFER_KW,
|
||||
"CONST": CONST_KW,
|
||||
"WATCH": WATCH_KW,
|
||||
"ASYNC": ASYNC_KW,
|
||||
"AWAIT": AWAIT_KW,
|
||||
"PARALLEL": PARALLEL_KW,
|
||||
"TIMEOUT": TIMEOUT_KW,
|
||||
"SPAWN": SPAWN_KW,
|
||||
"LAUNCH": SPAWN_KW,
|
||||
"GOROUTINE": SPAWN_KW,
|
||||
"NIL": NIL_LIT,
|
||||
// Harbour aliases
|
||||
"FUNC": FUNCTION_KW,
|
||||
"PROC": PROCEDURE,
|
||||
"RET": RETURN,
|
||||
"ENDW": ENDDO, // some Harbour code uses ENDW
|
||||
}
|
||||
}
|
||||
|
||||
// LookupKeyword returns the keyword Kind for an identifier, or IDENT.
|
||||
// Harbour keywords are case-insensitive.
|
||||
func LookupKeyword(ident string) Kind {
|
||||
// Convert to uppercase for case-insensitive lookup
|
||||
upper := toUpper(ident)
|
||||
if kind, ok := keywords[upper]; ok {
|
||||
return kind
|
||||
}
|
||||
return IDENT
|
||||
}
|
||||
|
||||
// toUpper converts ASCII string to uppercase without allocating for already-upper strings.
|
||||
func toUpper(s string) string {
|
||||
for i := 0; i < len(s); i++ {
|
||||
if s[i] >= 'a' && s[i] <= 'z' {
|
||||
// Need to allocate
|
||||
buf := make([]byte, len(s))
|
||||
copy(buf, s[:i])
|
||||
for j := i; j < len(s); j++ {
|
||||
if s[j] >= 'a' && s[j] <= 'z' {
|
||||
buf[j] = s[j] - 32
|
||||
} else {
|
||||
buf[j] = s[j]
|
||||
}
|
||||
}
|
||||
return string(buf)
|
||||
}
|
||||
}
|
||||
return s // already uppercase
|
||||
}
|
||||
|
||||
// String returns the display name of the token kind.
|
||||
func (k Kind) String() string {
|
||||
if int(k) < len(kindNames) {
|
||||
return kindNames[k]
|
||||
}
|
||||
return "UNKNOWN"
|
||||
}
|
||||
|
||||
var kindNames = [...]string{
|
||||
ILLEGAL: "ILLEGAL",
|
||||
EOF: "EOF",
|
||||
NEWLINE: "NEWLINE",
|
||||
INT: "INT",
|
||||
LONG: "LONG",
|
||||
DOUBLE: "DOUBLE",
|
||||
STRING: "STRING",
|
||||
DATE_LIT: "DATE",
|
||||
TRUE: ".T.",
|
||||
FALSE: ".F.",
|
||||
NIL_LIT: "NIL",
|
||||
IDENT: "IDENT",
|
||||
PLUS: "+",
|
||||
MINUS: "-",
|
||||
STAR: "*",
|
||||
SLASH: "/",
|
||||
PERCENT: "%",
|
||||
POWER: "**",
|
||||
ASSIGN: ":=",
|
||||
EQ: "=",
|
||||
EXEQ: "==",
|
||||
NEQ: "!=",
|
||||
LT: "<",
|
||||
GT: ">",
|
||||
LTE: "<=",
|
||||
GTE: ">=",
|
||||
DOLLAR: "$",
|
||||
AMPERSAND: "&",
|
||||
AT: "@",
|
||||
ARROW: "->",
|
||||
DBLARROW: "=>",
|
||||
COLONCOLON: "::",
|
||||
COLON: ":",
|
||||
DOT: ".",
|
||||
INC: "++",
|
||||
DEC: "--",
|
||||
PLUSEQ: "+=",
|
||||
MINUSEQ: "-=",
|
||||
STAREQ: "*=",
|
||||
SLASHEQ: "/=",
|
||||
PERCENTEQ: "%=",
|
||||
POWEREQ: "**=",
|
||||
AND: ".AND.",
|
||||
OR: ".OR.",
|
||||
NOT: ".NOT.",
|
||||
LPAREN: "(",
|
||||
RPAREN: ")",
|
||||
LBRACKET: "[",
|
||||
RBRACKET: "]",
|
||||
LBRACE: "{",
|
||||
RBRACE: "}",
|
||||
COMMA: ",",
|
||||
SEMICOLON: ";",
|
||||
PIPE: "|",
|
||||
FUNCTION_KW: "FUNCTION",
|
||||
PROCEDURE: "PROCEDURE",
|
||||
RETURN: "RETURN",
|
||||
LOCAL: "LOCAL",
|
||||
STATIC: "STATIC",
|
||||
IF: "IF",
|
||||
ELSEIF: "ELSEIF",
|
||||
ELSE: "ELSE",
|
||||
ENDIF: "ENDIF",
|
||||
DO: "DO",
|
||||
WHILE: "WHILE",
|
||||
ENDDO: "ENDDO",
|
||||
FOR: "FOR",
|
||||
TO: "TO",
|
||||
STEP: "STEP",
|
||||
NEXT: "NEXT",
|
||||
EACH: "EACH",
|
||||
IN: "IN",
|
||||
EXIT: "EXIT",
|
||||
LOOP: "LOOP",
|
||||
BEGIN: "BEGIN",
|
||||
SEQUENCE: "SEQUENCE",
|
||||
RECOVER: "RECOVER",
|
||||
END: "END",
|
||||
CLASS: "CLASS",
|
||||
ENDCLASS: "ENDCLASS",
|
||||
DATA: "DATA",
|
||||
METHOD: "METHOD",
|
||||
USE: "USE",
|
||||
SEEK: "SEEK",
|
||||
REPLACE: "REPLACE",
|
||||
APPEND: "APPEND",
|
||||
INDEX: "INDEX",
|
||||
SET: "SET",
|
||||
SELECT: "SELECT",
|
||||
IMPORT: "IMPORT",
|
||||
}
|
||||
113
compiler/token/token_test.go
Normal file
113
compiler/token/token_test.go
Normal file
@@ -0,0 +1,113 @@
|
||||
// Copyright (c) 2026 Charles KWON OhJun (charleskwonohjun@gmail.com)
|
||||
// All rights reserved.
|
||||
|
||||
package token
|
||||
|
||||
import "testing"
|
||||
|
||||
func TestLookupKeyword(t *testing.T) {
|
||||
tests := []struct {
|
||||
input string
|
||||
want Kind
|
||||
}{
|
||||
{"FUNCTION", FUNCTION_KW},
|
||||
{"function", FUNCTION_KW},
|
||||
{"Function", FUNCTION_KW},
|
||||
{"FuNcTiOn", FUNCTION_KW},
|
||||
{"IF", IF},
|
||||
{"if", IF},
|
||||
{"LOCAL", LOCAL},
|
||||
{"RETURN", RETURN},
|
||||
{"USE", USE},
|
||||
{"SEEK", SEEK},
|
||||
{"CLASS", CLASS},
|
||||
{"IMPORT", IMPORT},
|
||||
{"NIL", NIL_LIT},
|
||||
// Aliases
|
||||
{"FUNC", FUNCTION_KW},
|
||||
{"PROC", PROCEDURE},
|
||||
// Not keywords
|
||||
{"myVar", IDENT},
|
||||
{"foo", IDENT},
|
||||
{"x", IDENT},
|
||||
}
|
||||
for _, tt := range tests {
|
||||
got := LookupKeyword(tt.input)
|
||||
if got != tt.want {
|
||||
t.Errorf("LookupKeyword(%q) = %v, want %v", tt.input, got, tt.want)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestGetBinaryPrecedence(t *testing.T) {
|
||||
tests := []struct {
|
||||
kind Kind
|
||||
want Precedence
|
||||
}{
|
||||
{ASSIGN, PrecAssign},
|
||||
{OR, PrecOr},
|
||||
{AND, PrecAnd},
|
||||
{EQ, PrecComparison},
|
||||
{EXEQ, PrecComparison},
|
||||
{NEQ, PrecComparison},
|
||||
{LT, PrecComparison},
|
||||
{GT, PrecComparison},
|
||||
{LTE, PrecComparison},
|
||||
{GTE, PrecComparison},
|
||||
{DOLLAR, PrecComparison},
|
||||
{PLUS, PrecAddition},
|
||||
{MINUS, PrecAddition},
|
||||
{STAR, PrecMultiply},
|
||||
{SLASH, PrecMultiply},
|
||||
{PERCENT, PrecMultiply},
|
||||
{POWER, PrecPower},
|
||||
// Not binary
|
||||
{IDENT, PrecNone},
|
||||
{LPAREN, PrecNone},
|
||||
{EOF, PrecNone},
|
||||
}
|
||||
for _, tt := range tests {
|
||||
got := GetBinaryPrecedence(tt.kind)
|
||||
if got != tt.want {
|
||||
t.Errorf("GetBinaryPrecedence(%v) = %v, want %v", tt.kind, got, tt.want)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestIsRightAssociative(t *testing.T) {
|
||||
if !IsRightAssociative(POWER) {
|
||||
t.Error("** should be right associative")
|
||||
}
|
||||
if !IsRightAssociative(ASSIGN) {
|
||||
t.Error(":= should be right associative")
|
||||
}
|
||||
if IsRightAssociative(PLUS) {
|
||||
t.Error("+ should NOT be right associative")
|
||||
}
|
||||
}
|
||||
|
||||
func TestToUpper(t *testing.T) {
|
||||
tests := []struct{ in, want string }{
|
||||
{"abc", "ABC"},
|
||||
{"ABC", "ABC"},
|
||||
{"aBc", "ABC"},
|
||||
{"", ""},
|
||||
{"123", "123"},
|
||||
{"hello_world", "HELLO_WORLD"},
|
||||
}
|
||||
for _, tt := range tests {
|
||||
got := toUpper(tt.in)
|
||||
if got != tt.want {
|
||||
t.Errorf("toUpper(%q) = %q, want %q", tt.in, got, tt.want)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestKindString(t *testing.T) {
|
||||
if PLUS.String() != "+" {
|
||||
t.Errorf("PLUS.String() = %q, want %q", PLUS.String(), "+")
|
||||
}
|
||||
if FUNCTION_KW.String() != "FUNCTION" {
|
||||
t.Errorf("FUNCTION_KW.String() = %q", FUNCTION_KW.String())
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user