five/compiler/pp/command.go

// Copyright (c) 2026 Charles KWON OhJun (charleskwonohjun@gmail.com)
// All rights reserved.

// #command / #translate implementation for Five preprocessor.
//
// Harbour PP syntax:
//   #command PATTERN => RESULT
//   #translate PATTERN => RESULT
//   #xcommand PATTERN => RESULT   (case-sensitive)
//   #xtranslate PATTERN => RESULT (case-sensitive)
//
// Pattern markers:
//   <x>       — match any expression (regular match)
//   <!x!>     — match single identifier only (restricted match)
//   <x,...>   — match comma-separated list
//   <*x*>     — match rest of line (wild match)
//   <x:a,b,c> — match one of listed words (list match)
//   [...]     — optional clause
//
// Result markers:
//   <x>       — substitute matched text
//   <(x)>     — stringify (wrap in quotes)
//   <{x}>     — blockify (wrap in {|| })
//   #<x>      — dumb stringify
//   <.x.>     — logify (.T. if matched, .F. if not)
//
// Reference: /mnt/d/harbour-core/src/pp/ppcore.c
package pp

import (
	"strings"
)

// Rule represents a single #command or #translate rule.
type Rule struct {
	Pattern    string   // raw pattern text
	Result     string   // raw result text
	IsCommand  bool     // #command vs #translate
	CaseSens   bool     // #xcommand/#xtranslate = case sensitive
	Keyword    string   // first keyword (for fast matching)
	Markers    []Marker // parsed pattern markers
	ResultTmpl string   // result template with marker references
}

// Marker represents a pattern marker like <x>, <!x!>, <x,...>, <*x*>.
type Marker struct {
	Name       string // marker name
	Type       MarkerType
	ListValues []string // for <x:a,b,c> — allowed values
}

type MarkerType int

const (
	MarkerRegular    MarkerType = iota // <x> — any expression
	MarkerRestricted                    // <!x!> — identifier only
	MarkerList                          // <x,...> — comma-separated list
	MarkerWild                          // <*x*> — rest of line
	MarkerWordList                      // <x:a,b,c> — one of listed words
)

// ParseRule parses a #command/#translate directive into a Rule.
func ParseRule(directive string, isCommand, caseSens bool) *Rule {
	// Split on =>
	parts := strings.SplitN(directive, "=>", 2)
	if len(parts) != 2 {
		return nil
	}

	pattern := strings.TrimSpace(parts[0])
	result := strings.TrimSpace(parts[1])

	// Handle line continuation (;)
	result = strings.ReplaceAll(result, " ;", "")

	rule := &Rule{
		Pattern:    pattern,
		Result:     result,
		IsCommand:  isCommand,
		CaseSens:   caseSens,
		ResultTmpl: result,
	}

	// Extract first keyword for fast matching. The first whitespace-
	// delimited token of the pattern becomes the dispatch key; we
	// strip marker wrappers and any trailing `(` so a pattern like
	// `MAKE_TEST( <obj>, <v> )` hashes on `MAKE_TEST`, matching how
	// firstToken normalises source lines.
	words := strings.Fields(pattern)
	if len(words) > 0 {
		kw := words[0]
		kw = strings.TrimLeft(kw, "<[")
		kw = strings.TrimRight(kw, ">]")
		if idx := strings.IndexByte(kw, '('); idx >= 0 {
			kw = kw[:idx]
		}
		if !strings.ContainsAny(kw, "!*,:") {
			rule.Keyword = kw
		}
	}

	// Parse markers from pattern
	rule.Markers = parseMarkers(pattern)

	return rule
}

// parseMarkers extracts all <...> markers from a pattern.
func parseMarkers(pattern string) []Marker {
	var markers []Marker
	i := 0
	for i < len(pattern) {
		if pattern[i] == '<' {
			end := strings.IndexByte(pattern[i:], '>')
			if end < 0 {
				break
			}
			inner := pattern[i+1 : i+end]
			m := parseOneMarker(inner)
			if m.Name != "" {
				markers = append(markers, m)
			}
			i += end + 1
		} else {
			i++
		}
	}
	return markers
}

func parseOneMarker(inner string) Marker {
	inner = strings.TrimSpace(inner)

	// <!name!> — restricted
	if strings.HasPrefix(inner, "!") && strings.HasSuffix(inner, "!") {
		return Marker{Name: inner[1 : len(inner)-1], Type: MarkerRestricted}
	}

	// <*name*> — wild
	if strings.HasPrefix(inner, "*") && strings.HasSuffix(inner, "*") {
		return Marker{Name: inner[1 : len(inner)-1], Type: MarkerWild}
	}

	// <name,...> — comma list
	if strings.HasSuffix(inner, ",...") {
		return Marker{Name: inner[:len(inner)-4], Type: MarkerList}
	}

	// <name:a,b,c> — word list
	if idx := strings.IndexByte(inner, ':'); idx > 0 {
		name := inner[:idx]
		vals := strings.Split(inner[idx+1:], ",")
		for i := range vals {
			vals[i] = strings.TrimSpace(vals[i])
		}
		return Marker{Name: name, Type: MarkerWordList, ListValues: vals}
	}

	// <name> — regular
	return Marker{Name: inner, Type: MarkerRegular}
}

// --- Rule matching and application ---

// MatchLine checks if a source line matches this rule and returns the substituted result.
// Returns ("", false) if no match.
func (r *Rule) MatchLine(line string) (string, bool) {
	trimmed := strings.TrimSpace(line)
	if trimmed == "" {
		return "", false
	}

	// Fast keyword check
	if r.Keyword != "" {
		firstWord := firstToken(trimmed)
		if r.CaseSens {
			if firstWord != r.Keyword {
				return "", false
			}
		} else {
			if !strings.EqualFold(firstWord, r.Keyword) {
				return "", false
			}
		}
	}

	// Try to match pattern against line
	captures := r.matchPattern(trimmed)
	if captures == nil {
		return "", false
	}

	// Apply result template
	result := r.applyResult(captures)
	return result, true
}

// matchPattern attempts to match the pattern against a line.
// Returns captured values map, or nil if no match.
func (r *Rule) matchPattern(line string) map[string]string {
	captures := make(map[string]string)

	patternWords := tokenizePattern(r.Pattern)
	lineWords := tokenizeLine(line)

	pi, li := 0, 0
	for pi < len(patternWords) && li < len(lineWords) {
		pw := patternWords[pi]

		// Marker?
		if strings.HasPrefix(pw, "<") && strings.HasSuffix(pw, ">") {
			inner := pw[1 : len(pw)-1]
			m := parseOneMarker(inner)

			switch m.Type {
			case MarkerWild:
				// Capture rest of line
				rest := strings.Join(lineWords[li:], " ")
				captures[m.Name] = rest
				li = len(lineWords)
				pi++

			case MarkerList:
				// Capture comma-separated items until next keyword
				var items []string
				for li < len(lineWords) {
					if pi+1 < len(patternWords) && matchWord(lineWords[li], patternWords[pi+1], r.CaseSens) {
						break
					}
					items = append(items, lineWords[li])
					li++
				}
				captures[m.Name] = strings.Join(items, " ")
				pi++

			case MarkerWordList:
				// Match one of listed words
				matched := false
				for _, allowed := range m.ListValues {
					if r.CaseSens {
						if lineWords[li] == allowed {
							matched = true
							break
						}
					} else if strings.EqualFold(lineWords[li], allowed) {
						matched = true
						break
					}
				}
				if !matched {
					return nil
				}
				captures[m.Name] = lineWords[li]
				li++
				pi++

			default:
				// Regular or restricted: capture one token or expression
				captured := captureExpression(lineWords, &li, patternWords, pi+1, r.CaseSens)
				captures[m.Name] = captured
				pi++
			}
		} else if pw == "[" {
			// Optional clause — skip to matching ]
			depth := 1
			pi++
			for pi < len(patternWords) && depth > 0 {
				if patternWords[pi] == "[" {
					depth++
				} else if patternWords[pi] == "]" {
					depth--
				}
				pi++
			}
		} else if pw == "]" {
			pi++
		} else {
			// Literal keyword — must match
			if !matchWord(lineWords[li], pw, r.CaseSens) {
				return nil
			}
			li++
			pi++
		}
	}

	// Skip remaining optional markers in pattern
	for pi < len(patternWords) {
		pw := patternWords[pi]
		if pw == "[" || pw == "]" || (strings.HasPrefix(pw, "<") && strings.HasSuffix(pw, ">")) {
			pi++
		} else {
			break
		}
	}

	// For #command with no markers and no optional clauses:
	// all line tokens must be consumed for a match
	if r.IsCommand && li < len(lineWords) && len(r.Markers) == 0 &&
		!strings.Contains(r.Pattern, "[") {
		return nil
	}

	return captures
}

// applyResult substitutes captured values into the result template.
func (r *Rule) applyResult(captures map[string]string) string {
	result := r.ResultTmpl

	for name, val := range captures {
		// <name> — direct substitution
		result = strings.ReplaceAll(result, "<"+name+">", val)
		// <(name)> — stringify
		result = strings.ReplaceAll(result, "<("+name+")>", `"`+val+`"`)
		// <.name.> — logify
		if val != "" {
			result = strings.ReplaceAll(result, "<."+name+".>", ".T.")
		} else {
			result = strings.ReplaceAll(result, "<."+name+".>", ".F.")
		}
		// #<name> — dumb stringify
		result = strings.ReplaceAll(result, "#<"+name+">", `"`+val+`"`)
	}

	// Clean up unreferenced markers: <name>, <(name)>, <.name.>, #<name>, <"name">
	result = cleanUnreferencedMarkers(result)

	return result
}

// cleanUnreferencedMarkers removes any remaining <name>, <(name)>, <.name.>, #<name> references.
// Only removes well-formed PP marker references, not comparison operators.
func cleanUnreferencedMarkers(s string) string {
	// Match patterns like <identifier>, <(identifier)>, <.identifier.>, #<identifier>
	var out strings.Builder
	i := 0
	for i < len(s) {
		removed := false
		// #<name>
		if s[i] == '#' && i+1 < len(s) && s[i+1] == '<' {
			if end := findMarkerEnd(s, i+1); end > 0 {
				i = end
				removed = true
			}
		}
		// <name>, <(name)>, <.name.>, <"name">
		if !removed && s[i] == '<' {
			if end := findMarkerEnd(s, i); end > 0 {
				i = end
				removed = true
			}
		}
		if !removed {
			out.WriteByte(s[i])
			i++
		}
	}
	return out.String()
}

// findMarkerEnd checks if s[start] begins a PP marker <name> and returns end position, or 0.
func findMarkerEnd(s string, start int) int {
	if start >= len(s) || s[start] != '<' {
		return 0
	}
	i := start + 1
	// Skip optional ( or . prefix
	if i < len(s) && (s[i] == '(' || s[i] == '.' || s[i] == '"') {
		i++
	}
	// Must start with letter or underscore (identifier)
	if i >= len(s) || !(s[i] >= 'a' && s[i] <= 'z' || s[i] >= 'A' && s[i] <= 'Z' || s[i] == '_') {
		return 0
	}
	// Consume identifier
	for i < len(s) && (s[i] >= 'a' && s[i] <= 'z' || s[i] >= 'A' && s[i] <= 'Z' || s[i] >= '0' && s[i] <= '9' || s[i] == '_') {
		i++
	}
	// Skip optional ) or . or " or ,... suffix
	for i < len(s) && (s[i] == ')' || s[i] == '.' || s[i] == '"' || s[i] == ',' || s[i] == ' ') {
		i++
	}
	if i < len(s) && s[i] == '>' {
		return i + 1
	}
	return 0
}

// --- Helpers ---

func firstToken(s string) string {
	for i, c := range s {
		if c == ' ' || c == '\t' || c == '(' {
			return s[:i]
		}
	}
	return s
}

func matchWord(lineWord, patternWord string, caseSens bool) bool {
	if caseSens {
		return lineWord == patternWord
	}
	return strings.EqualFold(lineWord, patternWord)
}

// tokenizePattern splits a pattern into words, keeping markers as single tokens.
func tokenizePattern(pattern string) []string {
	var tokens []string
	i := 0
	for i < len(pattern) {
		// Skip whitespace
		for i < len(pattern) && (pattern[i] == ' ' || pattern[i] == '\t') {
			i++
		}
		if i >= len(pattern) {
			break
		}

		if pattern[i] == '<' {
			// Find matching >
			end := strings.IndexByte(pattern[i:], '>')
			if end >= 0 {
				tokens = append(tokens, pattern[i:i+end+1])
				i += end + 1
				continue
			}
		}

		if pattern[i] == '[' {
			tokens = append(tokens, "[")
			i++
			continue
		}
		if pattern[i] == ']' {
			tokens = append(tokens, "]")
			i++
			continue
		}

		// Regular word
		start := i
		for i < len(pattern) && pattern[i] != ' ' && pattern[i] != '\t' &&
			pattern[i] != '<' && pattern[i] != '[' && pattern[i] != ']' {
			i++
		}
		if i > start {
			tokens = append(tokens, pattern[start:i])
		}
	}
	return tokens
}

// tokenizeLine splits a source line into words (keeping strings and parens together).
func tokenizeLine(line string) []string {
	var tokens []string
	i := 0
	for i < len(line) {
		for i < len(line) && (line[i] == ' ' || line[i] == '\t') {
			i++
		}
		if i >= len(line) {
			break
		}

		// String literal
		if line[i] == '"' || line[i] == '\'' {
			quote := line[i]
			start := i
			i++
			for i < len(line) && line[i] != quote {
				i++
			}
			if i < len(line) {
				i++
			}
			tokens = append(tokens, line[start:i])
			continue
		}

		// Comma (standalone token)
		if line[i] == ',' {
			tokens = append(tokens, ",")
			i++
			continue
		}

		// Word
		start := i
		for i < len(line) && line[i] != ' ' && line[i] != '\t' && line[i] != ',' {
			if line[i] == '"' || line[i] == '\'' {
				break
			}
			i++
		}
		if i > start {
			tokens = append(tokens, line[start:i])
		}
	}
	return tokens
}

// captureExpression captures an expression from line tokens.
// If this is the last marker in the pattern, captures all remaining tokens.
// Otherwise, captures until the next keyword in the pattern.
func captureExpression(lineWords []string, li *int, patternWords []string, nextPi int, caseSens bool) string {
	if *li >= len(lineWords) {
		return ""
	}

	// Find next literal keyword in pattern to use as delimiter
	delimWord := ""
	for pi := nextPi; pi < len(patternWords); pi++ {
		pw := patternWords[pi]
		if !strings.HasPrefix(pw, "<") && pw != "[" && pw != "]" {
			delimWord = pw
			break
		}
	}

	if delimWord != "" {
		// Capture until delimiter keyword
		var parts []string
		for *li < len(lineWords) {
			if matchWord(lineWords[*li], delimWord, caseSens) {
				break
			}
			parts = append(parts, lineWords[*li])
			*li++
		}
		return strings.Join(parts, " ")
	}

	// No delimiter: if last marker, capture all remaining tokens
	if nextPi >= len(patternWords) {
		rest := strings.Join(lineWords[*li:], " ")
		*li = len(lineWords)
		return rest
	}

	// Single token capture (between markers)
	tok := lineWords[*li]
	*li++
	return tok
}