Five v0.9 — Harbour + Go fusion language
- Compiler: PP → Lexer → Parser → Analyzer → Gengo pipeline - Parser: 232/236 (98%) Harbour compatibility, registry-based dispatch - RTL: 351 Harbour-compatible functions - RDD: DBF/NTX/CDX engines with Rushmore bitmap optimization - Go Interop: IMPORT + pkg.Func() + obj:Method() with FastPath (15M calls/sec) - HB_FUNC API: Full Harbour C API compatible Go bridge - Concurrency: SPAWN/LAUNCH/GOROUTINE, <-, WATCH, PARALLEL FOR, ASYNC/AWAIT - Extensions: Multi-return, DEFER, Slice, f-string, Nil-safe ?:, CONST - Macro Compiler: Runtime AST parsing and evaluation - Debugger: TUI debugger with source display, breakpoints, stepping - FRB: Native + Pcode dual mode runtime binary - Tests: 13 packages ALL PASS Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
540
compiler/pp/command.go
Normal file
540
compiler/pp/command.go
Normal file
@@ -0,0 +1,540 @@
|
||||
// Copyright (c) 2026 Charles KWON OhJun (charleskwonohjun@gmail.com)
|
||||
// All rights reserved.
|
||||
|
||||
// #command / #translate implementation for Five preprocessor.
|
||||
//
|
||||
// Harbour PP syntax:
|
||||
// #command PATTERN => RESULT
|
||||
// #translate PATTERN => RESULT
|
||||
// #xcommand PATTERN => RESULT (case-sensitive)
|
||||
// #xtranslate PATTERN => RESULT (case-sensitive)
|
||||
//
|
||||
// Pattern markers:
|
||||
// <x> — match any expression (regular match)
|
||||
// <!x!> — match single identifier only (restricted match)
|
||||
// <x,...> — match comma-separated list
|
||||
// <*x*> — match rest of line (wild match)
|
||||
// <x:a,b,c> — match one of listed words (list match)
|
||||
// [...] — optional clause
|
||||
//
|
||||
// Result markers:
|
||||
// <x> — substitute matched text
|
||||
// <(x)> — stringify (wrap in quotes)
|
||||
// <{x}> — blockify (wrap in {|| })
|
||||
// #<x> — dumb stringify
|
||||
// <.x.> — logify (.T. if matched, .F. if not)
|
||||
//
|
||||
// Reference: /mnt/d/harbour-core/src/pp/ppcore.c
|
||||
package pp
|
||||
|
||||
import (
|
||||
"strings"
|
||||
)
|
||||
|
||||
// Rule represents a single #command or #translate rule.
|
||||
type Rule struct {
|
||||
Pattern string // raw pattern text
|
||||
Result string // raw result text
|
||||
IsCommand bool // #command vs #translate
|
||||
CaseSens bool // #xcommand/#xtranslate = case sensitive
|
||||
Keyword string // first keyword (for fast matching)
|
||||
Markers []Marker // parsed pattern markers
|
||||
ResultTmpl string // result template with marker references
|
||||
}
|
||||
|
||||
// Marker represents a pattern marker like <x>, <!x!>, <x,...>, <*x*>.
|
||||
type Marker struct {
|
||||
Name string // marker name
|
||||
Type MarkerType
|
||||
ListValues []string // for <x:a,b,c> — allowed values
|
||||
}
|
||||
|
||||
type MarkerType int
|
||||
|
||||
const (
|
||||
MarkerRegular MarkerType = iota // <x> — any expression
|
||||
MarkerRestricted // <!x!> — identifier only
|
||||
MarkerList // <x,...> — comma-separated list
|
||||
MarkerWild // <*x*> — rest of line
|
||||
MarkerWordList // <x:a,b,c> — one of listed words
|
||||
)
|
||||
|
||||
// ParseRule parses a #command/#translate directive into a Rule.
|
||||
func ParseRule(directive string, isCommand, caseSens bool) *Rule {
|
||||
// Split on =>
|
||||
parts := strings.SplitN(directive, "=>", 2)
|
||||
if len(parts) != 2 {
|
||||
return nil
|
||||
}
|
||||
|
||||
pattern := strings.TrimSpace(parts[0])
|
||||
result := strings.TrimSpace(parts[1])
|
||||
|
||||
// Handle line continuation (;)
|
||||
result = strings.ReplaceAll(result, " ;", "")
|
||||
|
||||
rule := &Rule{
|
||||
Pattern: pattern,
|
||||
Result: result,
|
||||
IsCommand: isCommand,
|
||||
CaseSens: caseSens,
|
||||
ResultTmpl: result,
|
||||
}
|
||||
|
||||
// Extract first keyword for fast matching
|
||||
words := strings.Fields(pattern)
|
||||
if len(words) > 0 {
|
||||
kw := words[0]
|
||||
// Remove marker brackets
|
||||
kw = strings.TrimLeft(kw, "<[")
|
||||
kw = strings.TrimRight(kw, ">]")
|
||||
if !strings.ContainsAny(kw, "!*,:") {
|
||||
rule.Keyword = kw
|
||||
}
|
||||
}
|
||||
|
||||
// Parse markers from pattern
|
||||
rule.Markers = parseMarkers(pattern)
|
||||
|
||||
return rule
|
||||
}
|
||||
|
||||
// parseMarkers extracts all <...> markers from a pattern.
|
||||
func parseMarkers(pattern string) []Marker {
|
||||
var markers []Marker
|
||||
i := 0
|
||||
for i < len(pattern) {
|
||||
if pattern[i] == '<' {
|
||||
end := strings.IndexByte(pattern[i:], '>')
|
||||
if end < 0 {
|
||||
break
|
||||
}
|
||||
inner := pattern[i+1 : i+end]
|
||||
m := parseOneMarker(inner)
|
||||
if m.Name != "" {
|
||||
markers = append(markers, m)
|
||||
}
|
||||
i += end + 1
|
||||
} else {
|
||||
i++
|
||||
}
|
||||
}
|
||||
return markers
|
||||
}
|
||||
|
||||
func parseOneMarker(inner string) Marker {
|
||||
inner = strings.TrimSpace(inner)
|
||||
|
||||
// <!name!> — restricted
|
||||
if strings.HasPrefix(inner, "!") && strings.HasSuffix(inner, "!") {
|
||||
return Marker{Name: inner[1 : len(inner)-1], Type: MarkerRestricted}
|
||||
}
|
||||
|
||||
// <*name*> — wild
|
||||
if strings.HasPrefix(inner, "*") && strings.HasSuffix(inner, "*") {
|
||||
return Marker{Name: inner[1 : len(inner)-1], Type: MarkerWild}
|
||||
}
|
||||
|
||||
// <name,...> — comma list
|
||||
if strings.HasSuffix(inner, ",...") {
|
||||
return Marker{Name: inner[:len(inner)-4], Type: MarkerList}
|
||||
}
|
||||
|
||||
// <name:a,b,c> — word list
|
||||
if idx := strings.IndexByte(inner, ':'); idx > 0 {
|
||||
name := inner[:idx]
|
||||
vals := strings.Split(inner[idx+1:], ",")
|
||||
for i := range vals {
|
||||
vals[i] = strings.TrimSpace(vals[i])
|
||||
}
|
||||
return Marker{Name: name, Type: MarkerWordList, ListValues: vals}
|
||||
}
|
||||
|
||||
// <name> — regular
|
||||
return Marker{Name: inner, Type: MarkerRegular}
|
||||
}
|
||||
|
||||
// --- Rule matching and application ---
|
||||
|
||||
// MatchLine checks if a source line matches this rule and returns the substituted result.
|
||||
// Returns ("", false) if no match.
|
||||
func (r *Rule) MatchLine(line string) (string, bool) {
|
||||
trimmed := strings.TrimSpace(line)
|
||||
if trimmed == "" {
|
||||
return "", false
|
||||
}
|
||||
|
||||
// Fast keyword check
|
||||
if r.Keyword != "" {
|
||||
firstWord := firstToken(trimmed)
|
||||
if r.CaseSens {
|
||||
if firstWord != r.Keyword {
|
||||
return "", false
|
||||
}
|
||||
} else {
|
||||
if !strings.EqualFold(firstWord, r.Keyword) {
|
||||
return "", false
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Try to match pattern against line
|
||||
captures := r.matchPattern(trimmed)
|
||||
if captures == nil {
|
||||
return "", false
|
||||
}
|
||||
|
||||
// Apply result template
|
||||
result := r.applyResult(captures)
|
||||
return result, true
|
||||
}
|
||||
|
||||
// matchPattern attempts to match the pattern against a line.
|
||||
// Returns captured values map, or nil if no match.
|
||||
func (r *Rule) matchPattern(line string) map[string]string {
|
||||
captures := make(map[string]string)
|
||||
|
||||
patternWords := tokenizePattern(r.Pattern)
|
||||
lineWords := tokenizeLine(line)
|
||||
|
||||
pi, li := 0, 0
|
||||
for pi < len(patternWords) && li < len(lineWords) {
|
||||
pw := patternWords[pi]
|
||||
|
||||
// Marker?
|
||||
if strings.HasPrefix(pw, "<") && strings.HasSuffix(pw, ">") {
|
||||
inner := pw[1 : len(pw)-1]
|
||||
m := parseOneMarker(inner)
|
||||
|
||||
switch m.Type {
|
||||
case MarkerWild:
|
||||
// Capture rest of line
|
||||
rest := strings.Join(lineWords[li:], " ")
|
||||
captures[m.Name] = rest
|
||||
li = len(lineWords)
|
||||
pi++
|
||||
|
||||
case MarkerList:
|
||||
// Capture comma-separated items until next keyword
|
||||
var items []string
|
||||
for li < len(lineWords) {
|
||||
if pi+1 < len(patternWords) && matchWord(lineWords[li], patternWords[pi+1], r.CaseSens) {
|
||||
break
|
||||
}
|
||||
items = append(items, lineWords[li])
|
||||
li++
|
||||
}
|
||||
captures[m.Name] = strings.Join(items, " ")
|
||||
pi++
|
||||
|
||||
case MarkerWordList:
|
||||
// Match one of listed words
|
||||
matched := false
|
||||
for _, allowed := range m.ListValues {
|
||||
if r.CaseSens {
|
||||
if lineWords[li] == allowed {
|
||||
matched = true
|
||||
break
|
||||
}
|
||||
} else if strings.EqualFold(lineWords[li], allowed) {
|
||||
matched = true
|
||||
break
|
||||
}
|
||||
}
|
||||
if !matched {
|
||||
return nil
|
||||
}
|
||||
captures[m.Name] = lineWords[li]
|
||||
li++
|
||||
pi++
|
||||
|
||||
default:
|
||||
// Regular or restricted: capture one token or expression
|
||||
captured := captureExpression(lineWords, &li, patternWords, pi+1, r.CaseSens)
|
||||
captures[m.Name] = captured
|
||||
pi++
|
||||
}
|
||||
} else if pw == "[" {
|
||||
// Optional clause — skip to matching ]
|
||||
depth := 1
|
||||
pi++
|
||||
for pi < len(patternWords) && depth > 0 {
|
||||
if patternWords[pi] == "[" {
|
||||
depth++
|
||||
} else if patternWords[pi] == "]" {
|
||||
depth--
|
||||
}
|
||||
pi++
|
||||
}
|
||||
} else if pw == "]" {
|
||||
pi++
|
||||
} else {
|
||||
// Literal keyword — must match
|
||||
if !matchWord(lineWords[li], pw, r.CaseSens) {
|
||||
return nil
|
||||
}
|
||||
li++
|
||||
pi++
|
||||
}
|
||||
}
|
||||
|
||||
// Skip remaining optional markers in pattern
|
||||
for pi < len(patternWords) {
|
||||
pw := patternWords[pi]
|
||||
if pw == "[" || pw == "]" || (strings.HasPrefix(pw, "<") && strings.HasSuffix(pw, ">")) {
|
||||
pi++
|
||||
} else {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
// For #command with no markers and no optional clauses:
|
||||
// all line tokens must be consumed for a match
|
||||
if r.IsCommand && li < len(lineWords) && len(r.Markers) == 0 &&
|
||||
!strings.Contains(r.Pattern, "[") {
|
||||
return nil
|
||||
}
|
||||
|
||||
return captures
|
||||
}
|
||||
|
||||
// applyResult substitutes captured values into the result template.
|
||||
func (r *Rule) applyResult(captures map[string]string) string {
|
||||
result := r.ResultTmpl
|
||||
|
||||
for name, val := range captures {
|
||||
// <name> — direct substitution
|
||||
result = strings.ReplaceAll(result, "<"+name+">", val)
|
||||
// <(name)> — stringify
|
||||
result = strings.ReplaceAll(result, "<("+name+")>", `"`+val+`"`)
|
||||
// <.name.> — logify
|
||||
if val != "" {
|
||||
result = strings.ReplaceAll(result, "<."+name+".>", ".T.")
|
||||
} else {
|
||||
result = strings.ReplaceAll(result, "<."+name+".>", ".F.")
|
||||
}
|
||||
// #<name> — dumb stringify
|
||||
result = strings.ReplaceAll(result, "#<"+name+">", `"`+val+`"`)
|
||||
}
|
||||
|
||||
// Clean up unreferenced markers: <name>, <(name)>, <.name.>, #<name>, <"name">
|
||||
result = cleanUnreferencedMarkers(result)
|
||||
|
||||
return result
|
||||
}
|
||||
|
||||
// cleanUnreferencedMarkers removes any remaining <name>, <(name)>, <.name.>, #<name> references.
|
||||
// Only removes well-formed PP marker references, not comparison operators.
|
||||
func cleanUnreferencedMarkers(s string) string {
|
||||
// Match patterns like <identifier>, <(identifier)>, <.identifier.>, #<identifier>
|
||||
var out strings.Builder
|
||||
i := 0
|
||||
for i < len(s) {
|
||||
removed := false
|
||||
// #<name>
|
||||
if s[i] == '#' && i+1 < len(s) && s[i+1] == '<' {
|
||||
if end := findMarkerEnd(s, i+1); end > 0 {
|
||||
i = end
|
||||
removed = true
|
||||
}
|
||||
}
|
||||
// <name>, <(name)>, <.name.>, <"name">
|
||||
if !removed && s[i] == '<' {
|
||||
if end := findMarkerEnd(s, i); end > 0 {
|
||||
i = end
|
||||
removed = true
|
||||
}
|
||||
}
|
||||
if !removed {
|
||||
out.WriteByte(s[i])
|
||||
i++
|
||||
}
|
||||
}
|
||||
return out.String()
|
||||
}
|
||||
|
||||
// findMarkerEnd checks if s[start] begins a PP marker <name> and returns end position, or 0.
|
||||
func findMarkerEnd(s string, start int) int {
|
||||
if start >= len(s) || s[start] != '<' {
|
||||
return 0
|
||||
}
|
||||
i := start + 1
|
||||
// Skip optional ( or . prefix
|
||||
if i < len(s) && (s[i] == '(' || s[i] == '.' || s[i] == '"') {
|
||||
i++
|
||||
}
|
||||
// Must start with letter or underscore (identifier)
|
||||
if i >= len(s) || !(s[i] >= 'a' && s[i] <= 'z' || s[i] >= 'A' && s[i] <= 'Z' || s[i] == '_') {
|
||||
return 0
|
||||
}
|
||||
// Consume identifier
|
||||
for i < len(s) && (s[i] >= 'a' && s[i] <= 'z' || s[i] >= 'A' && s[i] <= 'Z' || s[i] >= '0' && s[i] <= '9' || s[i] == '_') {
|
||||
i++
|
||||
}
|
||||
// Skip optional ) or . or " or ,... suffix
|
||||
for i < len(s) && (s[i] == ')' || s[i] == '.' || s[i] == '"' || s[i] == ',' || s[i] == ' ') {
|
||||
i++
|
||||
}
|
||||
if i < len(s) && s[i] == '>' {
|
||||
return i + 1
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
// --- Helpers ---
|
||||
|
||||
func firstToken(s string) string {
|
||||
for i, c := range s {
|
||||
if c == ' ' || c == '\t' || c == '(' {
|
||||
return s[:i]
|
||||
}
|
||||
}
|
||||
return s
|
||||
}
|
||||
|
||||
func matchWord(lineWord, patternWord string, caseSens bool) bool {
|
||||
if caseSens {
|
||||
return lineWord == patternWord
|
||||
}
|
||||
return strings.EqualFold(lineWord, patternWord)
|
||||
}
|
||||
|
||||
// tokenizePattern splits a pattern into words, keeping markers as single tokens.
|
||||
func tokenizePattern(pattern string) []string {
|
||||
var tokens []string
|
||||
i := 0
|
||||
for i < len(pattern) {
|
||||
// Skip whitespace
|
||||
for i < len(pattern) && (pattern[i] == ' ' || pattern[i] == '\t') {
|
||||
i++
|
||||
}
|
||||
if i >= len(pattern) {
|
||||
break
|
||||
}
|
||||
|
||||
if pattern[i] == '<' {
|
||||
// Find matching >
|
||||
end := strings.IndexByte(pattern[i:], '>')
|
||||
if end >= 0 {
|
||||
tokens = append(tokens, pattern[i:i+end+1])
|
||||
i += end + 1
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
if pattern[i] == '[' {
|
||||
tokens = append(tokens, "[")
|
||||
i++
|
||||
continue
|
||||
}
|
||||
if pattern[i] == ']' {
|
||||
tokens = append(tokens, "]")
|
||||
i++
|
||||
continue
|
||||
}
|
||||
|
||||
// Regular word
|
||||
start := i
|
||||
for i < len(pattern) && pattern[i] != ' ' && pattern[i] != '\t' &&
|
||||
pattern[i] != '<' && pattern[i] != '[' && pattern[i] != ']' {
|
||||
i++
|
||||
}
|
||||
if i > start {
|
||||
tokens = append(tokens, pattern[start:i])
|
||||
}
|
||||
}
|
||||
return tokens
|
||||
}
|
||||
|
||||
// tokenizeLine splits a source line into words (keeping strings and parens together).
|
||||
func tokenizeLine(line string) []string {
|
||||
var tokens []string
|
||||
i := 0
|
||||
for i < len(line) {
|
||||
for i < len(line) && (line[i] == ' ' || line[i] == '\t') {
|
||||
i++
|
||||
}
|
||||
if i >= len(line) {
|
||||
break
|
||||
}
|
||||
|
||||
// String literal
|
||||
if line[i] == '"' || line[i] == '\'' {
|
||||
quote := line[i]
|
||||
start := i
|
||||
i++
|
||||
for i < len(line) && line[i] != quote {
|
||||
i++
|
||||
}
|
||||
if i < len(line) {
|
||||
i++
|
||||
}
|
||||
tokens = append(tokens, line[start:i])
|
||||
continue
|
||||
}
|
||||
|
||||
// Comma (standalone token)
|
||||
if line[i] == ',' {
|
||||
tokens = append(tokens, ",")
|
||||
i++
|
||||
continue
|
||||
}
|
||||
|
||||
// Word
|
||||
start := i
|
||||
for i < len(line) && line[i] != ' ' && line[i] != '\t' && line[i] != ',' {
|
||||
if line[i] == '"' || line[i] == '\'' {
|
||||
break
|
||||
}
|
||||
i++
|
||||
}
|
||||
if i > start {
|
||||
tokens = append(tokens, line[start:i])
|
||||
}
|
||||
}
|
||||
return tokens
|
||||
}
|
||||
|
||||
// captureExpression captures an expression from line tokens.
|
||||
// If this is the last marker in the pattern, captures all remaining tokens.
|
||||
// Otherwise, captures until the next keyword in the pattern.
|
||||
func captureExpression(lineWords []string, li *int, patternWords []string, nextPi int, caseSens bool) string {
|
||||
if *li >= len(lineWords) {
|
||||
return ""
|
||||
}
|
||||
|
||||
// Find next literal keyword in pattern to use as delimiter
|
||||
delimWord := ""
|
||||
for pi := nextPi; pi < len(patternWords); pi++ {
|
||||
pw := patternWords[pi]
|
||||
if !strings.HasPrefix(pw, "<") && pw != "[" && pw != "]" {
|
||||
delimWord = pw
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if delimWord != "" {
|
||||
// Capture until delimiter keyword
|
||||
var parts []string
|
||||
for *li < len(lineWords) {
|
||||
if matchWord(lineWords[*li], delimWord, caseSens) {
|
||||
break
|
||||
}
|
||||
parts = append(parts, lineWords[*li])
|
||||
*li++
|
||||
}
|
||||
return strings.Join(parts, " ")
|
||||
}
|
||||
|
||||
// No delimiter: if last marker, capture all remaining tokens
|
||||
if nextPi >= len(patternWords) {
|
||||
rest := strings.Join(lineWords[*li:], " ")
|
||||
*li = len(lineWords)
|
||||
return rest
|
||||
}
|
||||
|
||||
// Single token capture (between markers)
|
||||
tok := lineWords[*li]
|
||||
*li++
|
||||
return tok
|
||||
}
|
||||
Reference in New Issue
Block a user