Five v0.9 — Harbour + Go fusion language

- Compiler: PP → Lexer → Parser → Analyzer → Gengo pipeline - Parser: 232/236 (98%) Harbour compatibility, registry-based dispatch - RTL: 351 Harbour-compatible functions - RDD: DBF/NTX/CDX engines with Rushmore bitmap optimization - Go Interop: IMPORT + pkg.Func() + obj:Method() with FastPath (15M calls/sec) - HB_FUNC API: Full Harbour C API compatible Go bridge - Concurrency: SPAWN/LAUNCH/GOROUTINE, <-, WATCH, PARALLEL FOR, ASYNC/AWAIT - Extensions: Multi-return, DEFER, Slice, f-string, Nil-safe ?:, CONST - Macro Compiler: Runtime AST parsing and evaluation - Debugger: TUI debugger with source display, breakpoints, stepping - FRB: Native + Pcode dual mode runtime binary - Tests: 13 packages ALL PASS Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-31 09:41:50 +09:00
commit 59568f3301
282 changed files with 66658 additions and 0 deletions
--- a/compiler/pp/command.go
+++ b/compiler/pp/command.go
@@ -0,0 +1,540 @@
+// Copyright (c) 2026 Charles KWON OhJun (charleskwonohjun@gmail.com)
+// All rights reserved.
+
+// #command / #translate implementation for Five preprocessor.
+//
+// Harbour PP syntax:
+//   #command PATTERN => RESULT
+//   #translate PATTERN => RESULT
+//   #xcommand PATTERN => RESULT   (case-sensitive)
+//   #xtranslate PATTERN => RESULT (case-sensitive)
+//
+// Pattern markers:
+//   <x>       — match any expression (regular match)
+//   <!x!>     — match single identifier only (restricted match)
+//   <x,...>   — match comma-separated list
+//   <*x*>     — match rest of line (wild match)
+//   <x:a,b,c> — match one of listed words (list match)
+//   [...]     — optional clause
+//
+// Result markers:
+//   <x>       — substitute matched text
+//   <(x)>     — stringify (wrap in quotes)
+//   <{x}>     — blockify (wrap in {|| })
+//   #<x>      — dumb stringify
+//   <.x.>     — logify (.T. if matched, .F. if not)
+//
+// Reference: /mnt/d/harbour-core/src/pp/ppcore.c
+package pp
+
+import (
+	"strings"
+)
+
+// Rule represents a single #command or #translate rule.
+type Rule struct {
+	Pattern    string   // raw pattern text
+	Result     string   // raw result text
+	IsCommand  bool     // #command vs #translate
+	CaseSens   bool     // #xcommand/#xtranslate = case sensitive
+	Keyword    string   // first keyword (for fast matching)
+	Markers    []Marker // parsed pattern markers
+	ResultTmpl string   // result template with marker references
+}
+
+// Marker represents a pattern marker like <x>, <!x!>, <x,...>, <*x*>.
+type Marker struct {
+	Name       string // marker name
+	Type       MarkerType
+	ListValues []string // for <x:a,b,c> — allowed values
+}
+
+type MarkerType int
+
+const (
+	MarkerRegular    MarkerType = iota // <x> — any expression
+	MarkerRestricted                    // <!x!> — identifier only
+	MarkerList                          // <x,...> — comma-separated list
+	MarkerWild                          // <*x*> — rest of line
+	MarkerWordList                      // <x:a,b,c> — one of listed words
+)
+
+// ParseRule parses a #command/#translate directive into a Rule.
+func ParseRule(directive string, isCommand, caseSens bool) *Rule {
+	// Split on =>
+	parts := strings.SplitN(directive, "=>", 2)
+	if len(parts) != 2 {
+		return nil
+	}
+
+	pattern := strings.TrimSpace(parts[0])
+	result := strings.TrimSpace(parts[1])
+
+	// Handle line continuation (;)
+	result = strings.ReplaceAll(result, " ;", "")
+
+	rule := &Rule{
+		Pattern:    pattern,
+		Result:     result,
+		IsCommand:  isCommand,
+		CaseSens:   caseSens,
+		ResultTmpl: result,
+	}
+
+	// Extract first keyword for fast matching
+	words := strings.Fields(pattern)
+	if len(words) > 0 {
+		kw := words[0]
+		// Remove marker brackets
+		kw = strings.TrimLeft(kw, "<[")
+		kw = strings.TrimRight(kw, ">]")
+		if !strings.ContainsAny(kw, "!*,:") {
+			rule.Keyword = kw
+		}
+	}
+
+	// Parse markers from pattern
+	rule.Markers = parseMarkers(pattern)
+
+	return rule
+}
+
+// parseMarkers extracts all <...> markers from a pattern.
+func parseMarkers(pattern string) []Marker {
+	var markers []Marker
+	i := 0
+	for i < len(pattern) {
+		if pattern[i] == '<' {
+			end := strings.IndexByte(pattern[i:], '>')
+			if end < 0 {
+				break
+			}
+			inner := pattern[i+1 : i+end]
+			m := parseOneMarker(inner)
+			if m.Name != "" {
+				markers = append(markers, m)
+			}
+			i += end + 1
+		} else {
+			i++
+		}
+	}
+	return markers
+}
+
+func parseOneMarker(inner string) Marker {
+	inner = strings.TrimSpace(inner)
+
+	// <!name!> — restricted
+	if strings.HasPrefix(inner, "!") && strings.HasSuffix(inner, "!") {
+		return Marker{Name: inner[1 : len(inner)-1], Type: MarkerRestricted}
+	}
+
+	// <*name*> — wild
+	if strings.HasPrefix(inner, "*") && strings.HasSuffix(inner, "*") {
+		return Marker{Name: inner[1 : len(inner)-1], Type: MarkerWild}
+	}
+
+	// <name,...> — comma list
+	if strings.HasSuffix(inner, ",...") {
+		return Marker{Name: inner[:len(inner)-4], Type: MarkerList}
+	}
+
+	// <name:a,b,c> — word list
+	if idx := strings.IndexByte(inner, ':'); idx > 0 {
+		name := inner[:idx]
+		vals := strings.Split(inner[idx+1:], ",")
+		for i := range vals {
+			vals[i] = strings.TrimSpace(vals[i])
+		}
+		return Marker{Name: name, Type: MarkerWordList, ListValues: vals}
+	}
+
+	// <name> — regular
+	return Marker{Name: inner, Type: MarkerRegular}
+}
+
+// --- Rule matching and application ---
+
+// MatchLine checks if a source line matches this rule and returns the substituted result.
+// Returns ("", false) if no match.
+func (r *Rule) MatchLine(line string) (string, bool) {
+	trimmed := strings.TrimSpace(line)
+	if trimmed == "" {
+		return "", false
+	}
+
+	// Fast keyword check
+	if r.Keyword != "" {
+		firstWord := firstToken(trimmed)
+		if r.CaseSens {
+			if firstWord != r.Keyword {
+				return "", false
+			}
+		} else {
+			if !strings.EqualFold(firstWord, r.Keyword) {
+				return "", false
+			}
+		}
+	}
+
+	// Try to match pattern against line
+	captures := r.matchPattern(trimmed)
+	if captures == nil {
+		return "", false
+	}
+
+	// Apply result template
+	result := r.applyResult(captures)
+	return result, true
+}
+
+// matchPattern attempts to match the pattern against a line.
+// Returns captured values map, or nil if no match.
+func (r *Rule) matchPattern(line string) map[string]string {
+	captures := make(map[string]string)
+
+	patternWords := tokenizePattern(r.Pattern)
+	lineWords := tokenizeLine(line)
+
+	pi, li := 0, 0
+	for pi < len(patternWords) && li < len(lineWords) {
+		pw := patternWords[pi]
+
+		// Marker?
+		if strings.HasPrefix(pw, "<") && strings.HasSuffix(pw, ">") {
+			inner := pw[1 : len(pw)-1]
+			m := parseOneMarker(inner)
+
+			switch m.Type {
+			case MarkerWild:
+				// Capture rest of line
+				rest := strings.Join(lineWords[li:], " ")
+				captures[m.Name] = rest
+				li = len(lineWords)
+				pi++
+
+			case MarkerList:
+				// Capture comma-separated items until next keyword
+				var items []string
+				for li < len(lineWords) {
+					if pi+1 < len(patternWords) && matchWord(lineWords[li], patternWords[pi+1], r.CaseSens) {
+						break
+					}
+					items = append(items, lineWords[li])
+					li++
+				}
+				captures[m.Name] = strings.Join(items, " ")
+				pi++
+
+			case MarkerWordList:
+				// Match one of listed words
+				matched := false
+				for _, allowed := range m.ListValues {
+					if r.CaseSens {
+						if lineWords[li] == allowed {
+							matched = true
+							break
+						}
+					} else if strings.EqualFold(lineWords[li], allowed) {
+						matched = true
+						break
+					}
+				}
+				if !matched {
+					return nil
+				}
+				captures[m.Name] = lineWords[li]
+				li++
+				pi++
+
+			default:
+				// Regular or restricted: capture one token or expression
+				captured := captureExpression(lineWords, &li, patternWords, pi+1, r.CaseSens)
+				captures[m.Name] = captured
+				pi++
+			}
+		} else if pw == "[" {
+			// Optional clause — skip to matching ]
+			depth := 1
+			pi++
+			for pi < len(patternWords) && depth > 0 {
+				if patternWords[pi] == "[" {
+					depth++
+				} else if patternWords[pi] == "]" {
+					depth--
+				}
+				pi++
+			}
+		} else if pw == "]" {
+			pi++
+		} else {
+			// Literal keyword — must match
+			if !matchWord(lineWords[li], pw, r.CaseSens) {
+				return nil
+			}
+			li++
+			pi++
+		}
+	}
+
+	// Skip remaining optional markers in pattern
+	for pi < len(patternWords) {
+		pw := patternWords[pi]
+		if pw == "[" || pw == "]" || (strings.HasPrefix(pw, "<") && strings.HasSuffix(pw, ">")) {
+			pi++
+		} else {
+			break
+		}
+	}
+
+	// For #command with no markers and no optional clauses:
+	// all line tokens must be consumed for a match
+	if r.IsCommand && li < len(lineWords) && len(r.Markers) == 0 &&
+		!strings.Contains(r.Pattern, "[") {
+		return nil
+	}
+
+	return captures
+}
+
+// applyResult substitutes captured values into the result template.
+func (r *Rule) applyResult(captures map[string]string) string {
+	result := r.ResultTmpl
+
+	for name, val := range captures {
+		// <name> — direct substitution
+		result = strings.ReplaceAll(result, "<"+name+">", val)
+		// <(name)> — stringify
+		result = strings.ReplaceAll(result, "<("+name+")>", `"`+val+`"`)
+		// <.name.> — logify
+		if val != "" {
+			result = strings.ReplaceAll(result, "<."+name+".>", ".T.")
+		} else {
+			result = strings.ReplaceAll(result, "<."+name+".>", ".F.")
+		}
+		// #<name> — dumb stringify
+		result = strings.ReplaceAll(result, "#<"+name+">", `"`+val+`"`)
+	}
+
+	// Clean up unreferenced markers: <name>, <(name)>, <.name.>, #<name>, <"name">
+	result = cleanUnreferencedMarkers(result)
+
+	return result
+}
+
+// cleanUnreferencedMarkers removes any remaining <name>, <(name)>, <.name.>, #<name> references.
+// Only removes well-formed PP marker references, not comparison operators.
+func cleanUnreferencedMarkers(s string) string {
+	// Match patterns like <identifier>, <(identifier)>, <.identifier.>, #<identifier>
+	var out strings.Builder
+	i := 0
+	for i < len(s) {
+		removed := false
+		// #<name>
+		if s[i] == '#' && i+1 < len(s) && s[i+1] == '<' {
+			if end := findMarkerEnd(s, i+1); end > 0 {
+				i = end
+				removed = true
+			}
+		}
+		// <name>, <(name)>, <.name.>, <"name">
+		if !removed && s[i] == '<' {
+			if end := findMarkerEnd(s, i); end > 0 {
+				i = end
+				removed = true
+			}
+		}
+		if !removed {
+			out.WriteByte(s[i])
+			i++
+		}
+	}
+	return out.String()
+}
+
+// findMarkerEnd checks if s[start] begins a PP marker <name> and returns end position, or 0.
+func findMarkerEnd(s string, start int) int {
+	if start >= len(s) || s[start] != '<' {
+		return 0
+	}
+	i := start + 1
+	// Skip optional ( or . prefix
+	if i < len(s) && (s[i] == '(' || s[i] == '.' || s[i] == '"') {
+		i++
+	}
+	// Must start with letter or underscore (identifier)
+	if i >= len(s) || !(s[i] >= 'a' && s[i] <= 'z' || s[i] >= 'A' && s[i] <= 'Z' || s[i] == '_') {
+		return 0
+	}
+	// Consume identifier
+	for i < len(s) && (s[i] >= 'a' && s[i] <= 'z' || s[i] >= 'A' && s[i] <= 'Z' || s[i] >= '0' && s[i] <= '9' || s[i] == '_') {
+		i++
+	}
+	// Skip optional ) or . or " or ,... suffix
+	for i < len(s) && (s[i] == ')' || s[i] == '.' || s[i] == '"' || s[i] == ',' || s[i] == ' ') {
+		i++
+	}
+	if i < len(s) && s[i] == '>' {
+		return i + 1
+	}
+	return 0
+}
+
+// --- Helpers ---
+
+func firstToken(s string) string {
+	for i, c := range s {
+		if c == ' ' || c == '\t' || c == '(' {
+			return s[:i]
+		}
+	}
+	return s
+}
+
+func matchWord(lineWord, patternWord string, caseSens bool) bool {
+	if caseSens {
+		return lineWord == patternWord
+	}
+	return strings.EqualFold(lineWord, patternWord)
+}
+
+// tokenizePattern splits a pattern into words, keeping markers as single tokens.
+func tokenizePattern(pattern string) []string {
+	var tokens []string
+	i := 0
+	for i < len(pattern) {
+		// Skip whitespace
+		for i < len(pattern) && (pattern[i] == ' ' || pattern[i] == '\t') {
+			i++
+		}
+		if i >= len(pattern) {
+			break
+		}
+
+		if pattern[i] == '<' {
+			// Find matching >
+			end := strings.IndexByte(pattern[i:], '>')
+			if end >= 0 {
+				tokens = append(tokens, pattern[i:i+end+1])
+				i += end + 1
+				continue
+			}
+		}
+
+		if pattern[i] == '[' {
+			tokens = append(tokens, "[")
+			i++
+			continue
+		}
+		if pattern[i] == ']' {
+			tokens = append(tokens, "]")
+			i++
+			continue
+		}
+
+		// Regular word
+		start := i
+		for i < len(pattern) && pattern[i] != ' ' && pattern[i] != '\t' &&
+			pattern[i] != '<' && pattern[i] != '[' && pattern[i] != ']' {
+			i++
+		}
+		if i > start {
+			tokens = append(tokens, pattern[start:i])
+		}
+	}
+	return tokens
+}
+
+// tokenizeLine splits a source line into words (keeping strings and parens together).
+func tokenizeLine(line string) []string {
+	var tokens []string
+	i := 0
+	for i < len(line) {
+		for i < len(line) && (line[i] == ' ' || line[i] == '\t') {
+			i++
+		}
+		if i >= len(line) {
+			break
+		}
+
+		// String literal
+		if line[i] == '"' || line[i] == '\'' {
+			quote := line[i]
+			start := i
+			i++
+			for i < len(line) && line[i] != quote {
+				i++
+			}
+			if i < len(line) {
+				i++
+			}
+			tokens = append(tokens, line[start:i])
+			continue
+		}
+
+		// Comma (standalone token)
+		if line[i] == ',' {
+			tokens = append(tokens, ",")
+			i++
+			continue
+		}
+
+		// Word
+		start := i
+		for i < len(line) && line[i] != ' ' && line[i] != '\t' && line[i] != ',' {
+			if line[i] == '"' || line[i] == '\'' {
+				break
+			}
+			i++
+		}
+		if i > start {
+			tokens = append(tokens, line[start:i])
+		}
+	}
+	return tokens
+}
+
+// captureExpression captures an expression from line tokens.
+// If this is the last marker in the pattern, captures all remaining tokens.
+// Otherwise, captures until the next keyword in the pattern.
+func captureExpression(lineWords []string, li *int, patternWords []string, nextPi int, caseSens bool) string {
+	if *li >= len(lineWords) {
+		return ""
+	}
+
+	// Find next literal keyword in pattern to use as delimiter
+	delimWord := ""
+	for pi := nextPi; pi < len(patternWords); pi++ {
+		pw := patternWords[pi]
+		if !strings.HasPrefix(pw, "<") && pw != "[" && pw != "]" {
+			delimWord = pw
+			break
+		}
+	}
+
+	if delimWord != "" {
+		// Capture until delimiter keyword
+		var parts []string
+		for *li < len(lineWords) {
+			if matchWord(lineWords[*li], delimWord, caseSens) {
+				break
+			}
+			parts = append(parts, lineWords[*li])
+			*li++
+		}
+		return strings.Join(parts, " ")
+	}
+
+	// No delimiter: if last marker, capture all remaining tokens
+	if nextPi >= len(patternWords) {
+		rest := strings.Join(lineWords[*li:], " ")
+		*li = len(lineWords)
+		return rest
+	}
+
+	// Single token capture (between markers)
+	tok := lineWords[*li]
+	*li++
+	return tok
+}