Files modified (6): compiler/parser/parser.go — #27: Add currentUpper() helper Replaces 30 strings.ToUpper(p.current.Literal) calls compiler/parser/stmtreg.go — Remove now-unused strings import compiler/parser/expr.go — #30: Document comma expr Harbour semantics compiler/gengo/gengo.go — #31: Replace 8 TODO comments with WARN Macro expr now emits MacroPush() instead of TODO compiler/token/token.go — #25: Replace itoa with strconv.Itoa #41: Add 50+ missing kindNames entries for complete String() Issues resolved: #25,27,30,31,41 Total fixed: 39/53 Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
569 lines
12 KiB
Go
569 lines
12 KiB
Go
// Copyright (c) 2026 Charles KWON OhJun (charleskwonohjun@gmail.com)
|
|
// All rights reserved.
|
|
|
|
// Token definitions for the Five (Harbour-compatible) language.
|
|
// Pattern follows tsgo's Kind+Precedence approach
|
|
// (ref/typescript-go/internal/ast/kind.go, precedence.go).
|
|
package token
|
|
|
|
import "strconv"
|
|
|
|
// Kind represents a token type. Using int16 following tsgo pattern.
|
|
type Kind int16
|
|
|
|
const (
|
|
// Special
|
|
ILLEGAL Kind = iota
|
|
EOF
|
|
NEWLINE // statement terminator
|
|
|
|
// Literals
|
|
INT // 42
|
|
LONG // 42L or large integer
|
|
DOUBLE // 3.14
|
|
STRING // "hello" or 'hello'
|
|
DATE_LIT // 0d20260327 or CTOD("20260327")
|
|
TRUE // .T.
|
|
FALSE // .F.
|
|
NIL_LIT // NIL
|
|
|
|
// Identifiers
|
|
IDENT // variable/function name
|
|
|
|
// Operators
|
|
PLUS // +
|
|
MINUS // -
|
|
STAR // *
|
|
SLASH // /
|
|
PERCENT // %
|
|
POWER // ** or ^
|
|
ASSIGN // :=
|
|
EQ // = or ==
|
|
EXEQ // ==
|
|
NEQ // != or <> or #
|
|
LT // <
|
|
GT // >
|
|
LTE // <=
|
|
GTE // >=
|
|
DOLLAR // $ (string containment)
|
|
AMPERSAND // & (macro)
|
|
AT // @ (pass by ref)
|
|
ARROW // -> (alias field access)
|
|
DBLARROW // => (hash pair)
|
|
COLONCOLON // :: (self access)
|
|
COLON // : (send message)
|
|
DOT // .
|
|
INC // ++ (postfix)
|
|
DEC // -- (postfix)
|
|
PLUSEQ // +=
|
|
MINUSEQ // -=
|
|
STAREQ // *=
|
|
SLASHEQ // /=
|
|
PERCENTEQ // %=
|
|
POWEREQ // **=
|
|
|
|
// Logical operators (keyword-style)
|
|
AND // .AND.
|
|
OR // .OR.
|
|
NOT // .NOT. or !
|
|
|
|
// Delimiters
|
|
LPAREN // (
|
|
RPAREN // )
|
|
LBRACKET // [
|
|
RBRACKET // ]
|
|
LBRACE // {
|
|
RBRACE // }
|
|
COMMA // ,
|
|
SEMICOLON // ; (line continuation)
|
|
PIPE // | (in code blocks {|x| ...})
|
|
QMARK // ? (QOut shorthand)
|
|
QQMARK // ?? (QQOut shorthand)
|
|
|
|
// Keywords — Declarations
|
|
FUNCTION_KW
|
|
PROCEDURE
|
|
RETURN
|
|
LOCAL
|
|
STATIC
|
|
PRIVATE
|
|
PUBLIC
|
|
FIELD
|
|
MEMVAR
|
|
PARAMETERS
|
|
DECLARE
|
|
|
|
// Keywords — Control flow
|
|
IF
|
|
ELSEIF
|
|
ELSE
|
|
ENDIF
|
|
DO
|
|
WHILE
|
|
ENDDO
|
|
FOR
|
|
TO
|
|
STEP
|
|
NEXT
|
|
EACH
|
|
IN
|
|
EXIT
|
|
LOOP
|
|
SWITCH
|
|
CASE
|
|
OTHERWISE
|
|
ENDSWITCH
|
|
ENDCASE
|
|
BEGIN
|
|
SEQUENCE
|
|
RECOVER
|
|
USING
|
|
END
|
|
|
|
// Keywords — OOP
|
|
CLASS
|
|
ENDCLASS
|
|
DATA
|
|
METHOD
|
|
INHERIT
|
|
FROM
|
|
CONSTRUCTOR
|
|
DESTRUCTOR
|
|
INLINE_KW
|
|
OPERATOR_KW
|
|
ACCESS
|
|
ASSIGN_KW
|
|
|
|
// Keywords — xBase commands
|
|
USE
|
|
ALIAS
|
|
SELECT
|
|
GO
|
|
GOTO
|
|
TOP
|
|
BOTTOM
|
|
SKIP_KW
|
|
SEEK
|
|
SOFTSEEK
|
|
REPLACE
|
|
WITH
|
|
APPEND
|
|
BLANK
|
|
DELETE_KW
|
|
RECALL
|
|
PACK
|
|
ZAP
|
|
INDEX
|
|
ON
|
|
UNIQUE
|
|
DESCENDING
|
|
ASCENDING
|
|
SET
|
|
FILTER
|
|
RELATION
|
|
INTO
|
|
ORDER
|
|
|
|
// Keywords — New Five extensions
|
|
IMPORT
|
|
GO_KW // GO (goroutine)
|
|
CHANNEL
|
|
SEND_KW
|
|
RECEIVE
|
|
WAITGROUP
|
|
TYPE_KW // TYPE ... END TYPE
|
|
AS
|
|
DEFER_KW // DEFER expr (cleanup on function exit)
|
|
CONST_KW // CONST ... END CONST (enum block)
|
|
QUESTION_COLON // ?: nil-safe send
|
|
WATCH_KW // WATCH ... CASE ... ENDWATCH (channel select)
|
|
ASYNC_KW // ASYNC expr (launch async)
|
|
AWAIT_KW // AWAIT expr (wait for result)
|
|
PARALLEL_KW // PARALLEL FOR (parallel loop)
|
|
ARROW_LEFT // <- (channel receive)
|
|
TIMEOUT_KW // WITH TIMEOUT n
|
|
SPAWN_KW // SPAWN { block } (goroutine)
|
|
|
|
// Keywords — Preprocessor
|
|
PP_INCLUDE // #include
|
|
PP_DEFINE // #define
|
|
PP_UNDEF // #undef
|
|
PP_IFDEF // #ifdef
|
|
PP_IFNDEF // #ifndef
|
|
PP_ELSE // #else
|
|
PP_ENDIF // #endif
|
|
PP_COMMAND // #command
|
|
PP_TRANSLATE // #translate
|
|
PP_PRAGMA // #pragma
|
|
|
|
// Internal
|
|
_kindEnd
|
|
)
|
|
|
|
// Token represents a single lexical token.
|
|
type Token struct {
|
|
Kind Kind
|
|
Literal string // raw text
|
|
Pos Position
|
|
}
|
|
|
|
// Position in source file.
|
|
type Position struct {
|
|
File string
|
|
Line int
|
|
Col int
|
|
Offset int // byte offset from start of source
|
|
}
|
|
|
|
func (p Position) String() string {
|
|
if p.File != "" {
|
|
return p.File + ":" + strconv.Itoa(p.Line) + ":" + strconv.Itoa(p.Col)
|
|
}
|
|
return strconv.Itoa(p.Line) + ":" + strconv.Itoa(p.Col)
|
|
}
|
|
|
|
// itoa removed — using strconv.Itoa (fixes math.MinInt overflow bug)
|
|
|
|
// --- Operator Precedence (tsgo pattern) ---
|
|
|
|
type Precedence int
|
|
|
|
const (
|
|
PrecNone Precedence = iota
|
|
PrecAssign // :=, +=, -=, ...
|
|
PrecOr // .OR.
|
|
PrecAnd // .AND.
|
|
PrecNot // .NOT., !
|
|
PrecComparison // =, ==, !=, <, >, <=, >=, $
|
|
PrecAddition // +, -
|
|
PrecMultiply // *, /, %
|
|
PrecPower // **, ^
|
|
PrecUnary // -, !, .NOT., ++, --
|
|
PrecPostfix // ++, --, [], ()
|
|
PrecCall // function(), obj:method()
|
|
PrecPrimary // literals, identifiers, (expr)
|
|
)
|
|
|
|
// GetBinaryPrecedence returns the precedence of a binary operator token.
|
|
// Returns PrecNone if not a binary operator.
|
|
// Pattern: tsgo GetBinaryOperatorPrecedence (ref/typescript-go/internal/ast/precedence.go:338)
|
|
func GetBinaryPrecedence(kind Kind) Precedence {
|
|
switch kind {
|
|
case ASSIGN, PLUSEQ, MINUSEQ, STAREQ, SLASHEQ, PERCENTEQ, POWEREQ:
|
|
return PrecAssign
|
|
case OR:
|
|
return PrecOr
|
|
case AND:
|
|
return PrecAnd
|
|
case EQ, EXEQ, NEQ, LT, GT, LTE, GTE, DOLLAR:
|
|
return PrecComparison
|
|
case PLUS, MINUS:
|
|
return PrecAddition
|
|
case STAR, SLASH, PERCENT:
|
|
return PrecMultiply
|
|
case POWER:
|
|
return PrecPower
|
|
default:
|
|
return PrecNone
|
|
}
|
|
}
|
|
|
|
// IsRightAssociative returns true for right-to-left operators.
|
|
func IsRightAssociative(kind Kind) bool {
|
|
switch kind {
|
|
case POWER, ASSIGN, PLUSEQ, MINUSEQ, STAREQ, SLASHEQ, PERCENTEQ, POWEREQ:
|
|
return true
|
|
default:
|
|
return false
|
|
}
|
|
}
|
|
|
|
// --- Keyword lookup ---
|
|
|
|
var keywords map[string]Kind
|
|
|
|
func init() {
|
|
keywords = map[string]Kind{
|
|
"FUNCTION": FUNCTION_KW,
|
|
"PROCEDURE": PROCEDURE,
|
|
"RETURN": RETURN,
|
|
"LOCAL": LOCAL,
|
|
"STATIC": STATIC,
|
|
"PRIVATE": PRIVATE,
|
|
"PUBLIC": PUBLIC,
|
|
"FIELD": FIELD,
|
|
"MEMVAR": MEMVAR,
|
|
"PARAMETERS": PARAMETERS,
|
|
"DECLARE": DECLARE,
|
|
"IF": IF,
|
|
"ELSEIF": ELSEIF,
|
|
"ELSE": ELSE,
|
|
"ENDIF": ENDIF,
|
|
"DO": DO,
|
|
"WHILE": WHILE,
|
|
"ENDDO": ENDDO,
|
|
"FOR": FOR,
|
|
"TO": TO,
|
|
"STEP": STEP,
|
|
"NEXT": NEXT,
|
|
"EACH": EACH,
|
|
"IN": IN,
|
|
"EXIT": EXIT,
|
|
"LOOP": LOOP,
|
|
"SWITCH": SWITCH,
|
|
"CASE": CASE,
|
|
"OTHERWISE": OTHERWISE,
|
|
"ENDSWITCH": ENDSWITCH,
|
|
"ENDCASE": ENDCASE,
|
|
"BEGIN": BEGIN,
|
|
"SEQUENCE": SEQUENCE,
|
|
"RECOVER": RECOVER,
|
|
"USING": USING,
|
|
"END": END,
|
|
"CLASS": CLASS,
|
|
"ENDCLASS": ENDCLASS,
|
|
"DATA": DATA,
|
|
// METHOD: recognized as keyword (used at top level too: METHOD name CLASS classname)
|
|
"METHOD": METHOD,
|
|
"INHERIT": INHERIT,
|
|
"FROM": FROM,
|
|
"CONSTRUCTOR": CONSTRUCTOR,
|
|
"DESTRUCTOR": DESTRUCTOR,
|
|
"INLINE": INLINE_KW,
|
|
"OPERATOR": OPERATOR_KW,
|
|
"ACCESS": ACCESS,
|
|
"ASSIGN": ASSIGN_KW,
|
|
"USE": USE,
|
|
"ALIAS": ALIAS,
|
|
"SELECT": SELECT,
|
|
"GO": GO,
|
|
"GOTO": GOTO,
|
|
"TOP": TOP,
|
|
"BOTTOM": BOTTOM,
|
|
"SKIP": SKIP_KW,
|
|
"SEEK": SEEK,
|
|
"SOFTSEEK": SOFTSEEK,
|
|
"REPLACE": REPLACE,
|
|
"WITH": WITH,
|
|
"APPEND": APPEND,
|
|
"BLANK": BLANK,
|
|
"DELETE": DELETE_KW,
|
|
"RECALL": RECALL,
|
|
"PACK": PACK,
|
|
"ZAP": ZAP,
|
|
"INDEX": INDEX,
|
|
"ON": ON,
|
|
"UNIQUE": UNIQUE,
|
|
"DESCENDING": DESCENDING,
|
|
"ASCENDING": ASCENDING,
|
|
"SET": SET,
|
|
"FILTER": FILTER,
|
|
"RELATION": RELATION,
|
|
"INTO": INTO,
|
|
"ORDER": ORDER,
|
|
"IMPORT": IMPORT,
|
|
// CHANNEL, SEND, RECEIVE, WAITGROUP — now RTL functions, not keywords
|
|
"TYPE": TYPE_KW,
|
|
"AS": AS,
|
|
"DEFER": DEFER_KW,
|
|
"CONST": CONST_KW,
|
|
"WATCH": WATCH_KW,
|
|
"ASYNC": ASYNC_KW,
|
|
"AWAIT": AWAIT_KW,
|
|
"PARALLEL": PARALLEL_KW,
|
|
"TIMEOUT": TIMEOUT_KW,
|
|
"SPAWN": SPAWN_KW,
|
|
"LAUNCH": SPAWN_KW,
|
|
"GOROUTINE": SPAWN_KW,
|
|
"NIL": NIL_LIT,
|
|
// Harbour aliases
|
|
"FUNC": FUNCTION_KW,
|
|
"PROC": PROCEDURE,
|
|
"RET": RETURN,
|
|
"ENDW": ENDDO, // some Harbour code uses ENDW
|
|
}
|
|
}
|
|
|
|
// LookupKeyword returns the keyword Kind for an identifier, or IDENT.
|
|
// Harbour keywords are case-insensitive.
|
|
func LookupKeyword(ident string) Kind {
|
|
// Convert to uppercase for case-insensitive lookup
|
|
upper := toUpper(ident)
|
|
if kind, ok := keywords[upper]; ok {
|
|
return kind
|
|
}
|
|
return IDENT
|
|
}
|
|
|
|
// toUpper converts ASCII string to uppercase without allocating for already-upper strings.
|
|
func toUpper(s string) string {
|
|
for i := 0; i < len(s); i++ {
|
|
if s[i] >= 'a' && s[i] <= 'z' {
|
|
// Need to allocate
|
|
buf := make([]byte, len(s))
|
|
copy(buf, s[:i])
|
|
for j := i; j < len(s); j++ {
|
|
if s[j] >= 'a' && s[j] <= 'z' {
|
|
buf[j] = s[j] - 32
|
|
} else {
|
|
buf[j] = s[j]
|
|
}
|
|
}
|
|
return string(buf)
|
|
}
|
|
}
|
|
return s // already uppercase
|
|
}
|
|
|
|
// String returns the display name of the token kind.
|
|
func (k Kind) String() string {
|
|
if int(k) < len(kindNames) {
|
|
return kindNames[k]
|
|
}
|
|
return "UNKNOWN"
|
|
}
|
|
|
|
var kindNames = [...]string{
|
|
ILLEGAL: "ILLEGAL",
|
|
EOF: "EOF",
|
|
NEWLINE: "NEWLINE",
|
|
INT: "INT",
|
|
LONG: "LONG",
|
|
DOUBLE: "DOUBLE",
|
|
STRING: "STRING",
|
|
DATE_LIT: "DATE",
|
|
TRUE: ".T.",
|
|
FALSE: ".F.",
|
|
NIL_LIT: "NIL",
|
|
IDENT: "IDENT",
|
|
PLUS: "+",
|
|
MINUS: "-",
|
|
STAR: "*",
|
|
SLASH: "/",
|
|
PERCENT: "%",
|
|
POWER: "**",
|
|
ASSIGN: ":=",
|
|
EQ: "=",
|
|
EXEQ: "==",
|
|
NEQ: "!=",
|
|
LT: "<",
|
|
GT: ">",
|
|
LTE: "<=",
|
|
GTE: ">=",
|
|
DOLLAR: "$",
|
|
AMPERSAND: "&",
|
|
AT: "@",
|
|
ARROW: "->",
|
|
DBLARROW: "=>",
|
|
COLONCOLON: "::",
|
|
COLON: ":",
|
|
DOT: ".",
|
|
INC: "++",
|
|
DEC: "--",
|
|
PLUSEQ: "+=",
|
|
MINUSEQ: "-=",
|
|
STAREQ: "*=",
|
|
SLASHEQ: "/=",
|
|
PERCENTEQ: "%=",
|
|
POWEREQ: "**=",
|
|
AND: ".AND.",
|
|
OR: ".OR.",
|
|
NOT: ".NOT.",
|
|
LPAREN: "(",
|
|
RPAREN: ")",
|
|
LBRACKET: "[",
|
|
RBRACKET: "]",
|
|
LBRACE: "{",
|
|
RBRACE: "}",
|
|
COMMA: ",",
|
|
SEMICOLON: ";",
|
|
PIPE: "|",
|
|
FUNCTION_KW: "FUNCTION",
|
|
PROCEDURE: "PROCEDURE",
|
|
RETURN: "RETURN",
|
|
LOCAL: "LOCAL",
|
|
STATIC: "STATIC",
|
|
IF: "IF",
|
|
ELSEIF: "ELSEIF",
|
|
ELSE: "ELSE",
|
|
ENDIF: "ENDIF",
|
|
DO: "DO",
|
|
WHILE: "WHILE",
|
|
ENDDO: "ENDDO",
|
|
FOR: "FOR",
|
|
TO: "TO",
|
|
STEP: "STEP",
|
|
NEXT: "NEXT",
|
|
EACH: "EACH",
|
|
IN: "IN",
|
|
EXIT: "EXIT",
|
|
LOOP: "LOOP",
|
|
BEGIN: "BEGIN",
|
|
SEQUENCE: "SEQUENCE",
|
|
RECOVER: "RECOVER",
|
|
END: "END",
|
|
CLASS: "CLASS",
|
|
ENDCLASS: "ENDCLASS",
|
|
DATA: "DATA",
|
|
METHOD: "METHOD",
|
|
USE: "USE",
|
|
SEEK: "SEEK",
|
|
REPLACE: "REPLACE",
|
|
APPEND: "APPEND",
|
|
INDEX: "INDEX",
|
|
SET: "SET",
|
|
SELECT: "SELECT",
|
|
IMPORT: "IMPORT",
|
|
// Missing tokens added for complete String() coverage
|
|
PRIVATE: "PRIVATE",
|
|
PUBLIC: "PUBLIC",
|
|
FIELD: "FIELD",
|
|
MEMVAR: "MEMVAR",
|
|
PARAMETERS: "PARAMETERS",
|
|
DECLARE: "DECLARE",
|
|
SWITCH: "SWITCH",
|
|
CASE: "CASE",
|
|
OTHERWISE: "OTHERWISE",
|
|
ENDSWITCH: "ENDSWITCH",
|
|
ENDCASE: "ENDCASE",
|
|
QMARK: "?",
|
|
QQMARK: "??",
|
|
BLANK: "BLANK",
|
|
SKIP_KW: "SKIP",
|
|
DELETE_KW: "DELETE",
|
|
RECALL: "RECALL",
|
|
PACK: "PACK",
|
|
ZAP: "ZAP",
|
|
GO: "GO",
|
|
GOTO: "GOTO",
|
|
TOP: "TOP",
|
|
BOTTOM: "BOTTOM",
|
|
SOFTSEEK: "SOFTSEEK",
|
|
UNIQUE: "UNIQUE",
|
|
DESCENDING: "DESCENDING",
|
|
FROM: "FROM",
|
|
ON: "ON",
|
|
WITH: "WITH",
|
|
ALIAS: "ALIAS",
|
|
ACCESS: "ACCESS",
|
|
ASSIGN_KW: "ASSIGN",
|
|
INHERIT: "INHERIT",
|
|
INLINE_KW: "INLINE",
|
|
DESTRUCTOR: "DESTRUCTOR",
|
|
CONSTRUCTOR: "CONSTRUCTOR",
|
|
OPERATOR_KW: "OPERATOR",
|
|
USING: "USING",
|
|
TYPE_KW: "TYPE",
|
|
AS: "AS",
|
|
GO_KW: "GO_KW",
|
|
DEFER_KW: "DEFER",
|
|
CONST_KW: "CONST",
|
|
WATCH_KW: "WATCH",
|
|
ASYNC_KW: "ASYNC",
|
|
AWAIT_KW: "AWAIT",
|
|
PARALLEL_KW: "PARALLEL",
|
|
TIMEOUT_KW: "TIMEOUT",
|
|
SPAWN_KW: "SPAWN",
|
|
ARROW_LEFT: "<-",
|
|
}
|