Files
five/compiler/token/token.go
Charles KWON OhJun 59568f3301 Five v0.9 — Harbour + Go fusion language
- Compiler: PP → Lexer → Parser → Analyzer → Gengo pipeline
- Parser: 232/236 (98%) Harbour compatibility, registry-based dispatch
- RTL: 351 Harbour-compatible functions
- RDD: DBF/NTX/CDX engines with Rushmore bitmap optimization
- Go Interop: IMPORT + pkg.Func() + obj:Method() with FastPath (15M calls/sec)
- HB_FUNC API: Full Harbour C API compatible Go bridge
- Concurrency: SPAWN/LAUNCH/GOROUTINE, <-, WATCH, PARALLEL FOR, ASYNC/AWAIT
- Extensions: Multi-return, DEFER, Slice, f-string, Nil-safe ?:, CONST
- Macro Compiler: Runtime AST parsing and evaluation
- Debugger: TUI debugger with source display, breakpoints, stepping
- FRB: Native + Pcode dual mode runtime binary
- Tests: 13 packages ALL PASS

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-31 09:41:50 +09:00

537 lines
11 KiB
Go

// Copyright (c) 2026 Charles KWON OhJun (charleskwonohjun@gmail.com)
// All rights reserved.
// Token definitions for the Five (Harbour-compatible) language.
// Pattern follows tsgo's Kind+Precedence approach
// (ref/typescript-go/internal/ast/kind.go, precedence.go).
package token
// Kind represents a token type. Using int16 following tsgo pattern.
type Kind int16
const (
// Special
ILLEGAL Kind = iota
EOF
NEWLINE // statement terminator
// Literals
INT // 42
LONG // 42L or large integer
DOUBLE // 3.14
STRING // "hello" or 'hello'
DATE_LIT // 0d20260327 or CTOD("20260327")
TRUE // .T.
FALSE // .F.
NIL_LIT // NIL
// Identifiers
IDENT // variable/function name
// Operators
PLUS // +
MINUS // -
STAR // *
SLASH // /
PERCENT // %
POWER // ** or ^
ASSIGN // :=
EQ // = or ==
EXEQ // ==
NEQ // != or <> or #
LT // <
GT // >
LTE // <=
GTE // >=
DOLLAR // $ (string containment)
AMPERSAND // & (macro)
AT // @ (pass by ref)
ARROW // -> (alias field access)
DBLARROW // => (hash pair)
COLONCOLON // :: (self access)
COLON // : (send message)
DOT // .
INC // ++ (postfix)
DEC // -- (postfix)
PLUSEQ // +=
MINUSEQ // -=
STAREQ // *=
SLASHEQ // /=
PERCENTEQ // %=
POWEREQ // **=
// Logical operators (keyword-style)
AND // .AND.
OR // .OR.
NOT // .NOT. or !
// Delimiters
LPAREN // (
RPAREN // )
LBRACKET // [
RBRACKET // ]
LBRACE // {
RBRACE // }
COMMA // ,
SEMICOLON // ; (line continuation)
PIPE // | (in code blocks {|x| ...})
QMARK // ? (QOut shorthand)
QQMARK // ?? (QQOut shorthand)
// Keywords — Declarations
FUNCTION_KW
PROCEDURE
RETURN
LOCAL
STATIC
PRIVATE
PUBLIC
FIELD
MEMVAR
PARAMETERS
DECLARE
// Keywords — Control flow
IF
ELSEIF
ELSE
ENDIF
DO
WHILE
ENDDO
FOR
TO
STEP
NEXT
EACH
IN
EXIT
LOOP
SWITCH
CASE
OTHERWISE
ENDSWITCH
ENDCASE
BEGIN
SEQUENCE
RECOVER
USING
END
// Keywords — OOP
CLASS
ENDCLASS
DATA
METHOD
INHERIT
FROM
CONSTRUCTOR
DESTRUCTOR
INLINE_KW
OPERATOR_KW
ACCESS
ASSIGN_KW
// Keywords — xBase commands
USE
ALIAS
SELECT
GO
GOTO
TOP
BOTTOM
SKIP_KW
SEEK
SOFTSEEK
REPLACE
WITH
APPEND
BLANK
DELETE_KW
RECALL
PACK
ZAP
INDEX
ON
UNIQUE
DESCENDING
ASCENDING
SET
FILTER
RELATION
INTO
ORDER
// Keywords — New Five extensions
IMPORT
GO_KW // GO (goroutine)
CHANNEL
SEND_KW
RECEIVE
WAITGROUP
TYPE_KW // TYPE ... END TYPE
AS
DEFER_KW // DEFER expr (cleanup on function exit)
CONST_KW // CONST ... END CONST (enum block)
QUESTION_COLON // ?: nil-safe send
WATCH_KW // WATCH ... CASE ... ENDWATCH (channel select)
ASYNC_KW // ASYNC expr (launch async)
AWAIT_KW // AWAIT expr (wait for result)
PARALLEL_KW // PARALLEL FOR (parallel loop)
ARROW_LEFT // <- (channel receive)
TIMEOUT_KW // WITH TIMEOUT n
SPAWN_KW // SPAWN { block } (goroutine)
// Keywords — Preprocessor
PP_INCLUDE // #include
PP_DEFINE // #define
PP_UNDEF // #undef
PP_IFDEF // #ifdef
PP_IFNDEF // #ifndef
PP_ELSE // #else
PP_ENDIF // #endif
PP_COMMAND // #command
PP_TRANSLATE // #translate
PP_PRAGMA // #pragma
// Internal
_kindEnd
)
// Token represents a single lexical token.
type Token struct {
Kind Kind
Literal string // raw text
Pos Position
}
// Position in source file.
type Position struct {
File string
Line int
Col int
Offset int // byte offset from start of source
}
func (p Position) String() string {
if p.File != "" {
return p.File + ":" + itoa(p.Line) + ":" + itoa(p.Col)
}
return itoa(p.Line) + ":" + itoa(p.Col)
}
// simple int-to-string without importing strconv
func itoa(n int) string {
if n == 0 {
return "0"
}
buf := [20]byte{}
i := len(buf) - 1
neg := n < 0
if neg {
n = -n
}
for n > 0 {
buf[i] = byte('0' + n%10)
i--
n /= 10
}
if neg {
buf[i] = '-'
i--
}
return string(buf[i+1:])
}
// --- Operator Precedence (tsgo pattern) ---
type Precedence int
const (
PrecNone Precedence = iota
PrecAssign // :=, +=, -=, ...
PrecOr // .OR.
PrecAnd // .AND.
PrecNot // .NOT., !
PrecComparison // =, ==, !=, <, >, <=, >=, $
PrecAddition // +, -
PrecMultiply // *, /, %
PrecPower // **, ^
PrecUnary // -, !, .NOT., ++, --
PrecPostfix // ++, --, [], ()
PrecCall // function(), obj:method()
PrecPrimary // literals, identifiers, (expr)
)
// GetBinaryPrecedence returns the precedence of a binary operator token.
// Returns PrecNone if not a binary operator.
// Pattern: tsgo GetBinaryOperatorPrecedence (ref/typescript-go/internal/ast/precedence.go:338)
func GetBinaryPrecedence(kind Kind) Precedence {
switch kind {
case ASSIGN, PLUSEQ, MINUSEQ, STAREQ, SLASHEQ, PERCENTEQ, POWEREQ:
return PrecAssign
case OR:
return PrecOr
case AND:
return PrecAnd
case EQ, EXEQ, NEQ, LT, GT, LTE, GTE, DOLLAR:
return PrecComparison
case PLUS, MINUS:
return PrecAddition
case STAR, SLASH, PERCENT:
return PrecMultiply
case POWER:
return PrecPower
default:
return PrecNone
}
}
// IsRightAssociative returns true for right-to-left operators.
func IsRightAssociative(kind Kind) bool {
switch kind {
case POWER, ASSIGN, PLUSEQ, MINUSEQ, STAREQ, SLASHEQ, PERCENTEQ, POWEREQ:
return true
default:
return false
}
}
// --- Keyword lookup ---
var keywords map[string]Kind
func init() {
keywords = map[string]Kind{
"FUNCTION": FUNCTION_KW,
"PROCEDURE": PROCEDURE,
"RETURN": RETURN,
"LOCAL": LOCAL,
"STATIC": STATIC,
"PRIVATE": PRIVATE,
"PUBLIC": PUBLIC,
"FIELD": FIELD,
"MEMVAR": MEMVAR,
"PARAMETERS": PARAMETERS,
"DECLARE": DECLARE,
"IF": IF,
"ELSEIF": ELSEIF,
"ELSE": ELSE,
"ENDIF": ENDIF,
"DO": DO,
"WHILE": WHILE,
"ENDDO": ENDDO,
"FOR": FOR,
"TO": TO,
"STEP": STEP,
"NEXT": NEXT,
"EACH": EACH,
"IN": IN,
"EXIT": EXIT,
"LOOP": LOOP,
"SWITCH": SWITCH,
"CASE": CASE,
"OTHERWISE": OTHERWISE,
"ENDSWITCH": ENDSWITCH,
"ENDCASE": ENDCASE,
"BEGIN": BEGIN,
"SEQUENCE": SEQUENCE,
"RECOVER": RECOVER,
"USING": USING,
"END": END,
"CLASS": CLASS,
"ENDCLASS": ENDCLASS,
"DATA": DATA,
// METHOD: recognized as keyword (used at top level too: METHOD name CLASS classname)
"METHOD": METHOD,
"INHERIT": INHERIT,
"FROM": FROM,
"CONSTRUCTOR": CONSTRUCTOR,
"DESTRUCTOR": DESTRUCTOR,
"INLINE": INLINE_KW,
"OPERATOR": OPERATOR_KW,
"ACCESS": ACCESS,
"ASSIGN": ASSIGN_KW,
"USE": USE,
"ALIAS": ALIAS,
"SELECT": SELECT,
"GO": GO,
"GOTO": GOTO,
"TOP": TOP,
"BOTTOM": BOTTOM,
"SKIP": SKIP_KW,
"SEEK": SEEK,
"SOFTSEEK": SOFTSEEK,
"REPLACE": REPLACE,
"WITH": WITH,
"APPEND": APPEND,
"BLANK": BLANK,
"DELETE": DELETE_KW,
"RECALL": RECALL,
"PACK": PACK,
"ZAP": ZAP,
"INDEX": INDEX,
"ON": ON,
"UNIQUE": UNIQUE,
"DESCENDING": DESCENDING,
"ASCENDING": ASCENDING,
"SET": SET,
"FILTER": FILTER,
"RELATION": RELATION,
"INTO": INTO,
"ORDER": ORDER,
"IMPORT": IMPORT,
// CHANNEL, SEND, RECEIVE, WAITGROUP — now RTL functions, not keywords
"TYPE": TYPE_KW,
"AS": AS,
"DEFER": DEFER_KW,
"CONST": CONST_KW,
"WATCH": WATCH_KW,
"ASYNC": ASYNC_KW,
"AWAIT": AWAIT_KW,
"PARALLEL": PARALLEL_KW,
"TIMEOUT": TIMEOUT_KW,
"SPAWN": SPAWN_KW,
"LAUNCH": SPAWN_KW,
"GOROUTINE": SPAWN_KW,
"NIL": NIL_LIT,
// Harbour aliases
"FUNC": FUNCTION_KW,
"PROC": PROCEDURE,
"RET": RETURN,
"ENDW": ENDDO, // some Harbour code uses ENDW
}
}
// LookupKeyword returns the keyword Kind for an identifier, or IDENT.
// Harbour keywords are case-insensitive.
func LookupKeyword(ident string) Kind {
// Convert to uppercase for case-insensitive lookup
upper := toUpper(ident)
if kind, ok := keywords[upper]; ok {
return kind
}
return IDENT
}
// toUpper converts ASCII string to uppercase without allocating for already-upper strings.
func toUpper(s string) string {
for i := 0; i < len(s); i++ {
if s[i] >= 'a' && s[i] <= 'z' {
// Need to allocate
buf := make([]byte, len(s))
copy(buf, s[:i])
for j := i; j < len(s); j++ {
if s[j] >= 'a' && s[j] <= 'z' {
buf[j] = s[j] - 32
} else {
buf[j] = s[j]
}
}
return string(buf)
}
}
return s // already uppercase
}
// String returns the display name of the token kind.
func (k Kind) String() string {
if int(k) < len(kindNames) {
return kindNames[k]
}
return "UNKNOWN"
}
var kindNames = [...]string{
ILLEGAL: "ILLEGAL",
EOF: "EOF",
NEWLINE: "NEWLINE",
INT: "INT",
LONG: "LONG",
DOUBLE: "DOUBLE",
STRING: "STRING",
DATE_LIT: "DATE",
TRUE: ".T.",
FALSE: ".F.",
NIL_LIT: "NIL",
IDENT: "IDENT",
PLUS: "+",
MINUS: "-",
STAR: "*",
SLASH: "/",
PERCENT: "%",
POWER: "**",
ASSIGN: ":=",
EQ: "=",
EXEQ: "==",
NEQ: "!=",
LT: "<",
GT: ">",
LTE: "<=",
GTE: ">=",
DOLLAR: "$",
AMPERSAND: "&",
AT: "@",
ARROW: "->",
DBLARROW: "=>",
COLONCOLON: "::",
COLON: ":",
DOT: ".",
INC: "++",
DEC: "--",
PLUSEQ: "+=",
MINUSEQ: "-=",
STAREQ: "*=",
SLASHEQ: "/=",
PERCENTEQ: "%=",
POWEREQ: "**=",
AND: ".AND.",
OR: ".OR.",
NOT: ".NOT.",
LPAREN: "(",
RPAREN: ")",
LBRACKET: "[",
RBRACKET: "]",
LBRACE: "{",
RBRACE: "}",
COMMA: ",",
SEMICOLON: ";",
PIPE: "|",
FUNCTION_KW: "FUNCTION",
PROCEDURE: "PROCEDURE",
RETURN: "RETURN",
LOCAL: "LOCAL",
STATIC: "STATIC",
IF: "IF",
ELSEIF: "ELSEIF",
ELSE: "ELSE",
ENDIF: "ENDIF",
DO: "DO",
WHILE: "WHILE",
ENDDO: "ENDDO",
FOR: "FOR",
TO: "TO",
STEP: "STEP",
NEXT: "NEXT",
EACH: "EACH",
IN: "IN",
EXIT: "EXIT",
LOOP: "LOOP",
BEGIN: "BEGIN",
SEQUENCE: "SEQUENCE",
RECOVER: "RECOVER",
END: "END",
CLASS: "CLASS",
ENDCLASS: "ENDCLASS",
DATA: "DATA",
METHOD: "METHOD",
USE: "USE",
SEEK: "SEEK",
REPLACE: "REPLACE",
APPEND: "APPEND",
INDEX: "INDEX",
SET: "SET",
SELECT: "SELECT",
IMPORT: "IMPORT",
}