Real Harbour headers write parameterised commands with no space between the keyword and its opening paren: #xcommand MAKE_TEST( <obj>, <v> ) => ... ParseRule stored the rule keyword as `MAKE_TEST(` (stripping only <>, [] marker wrappers), but firstToken normalised source lines by stopping the first-word scan at `(` — so `MAKE_TEST( o, 42 )` produced `MAKE_TEST` for the lookup. The two strings didn't match and the fast-path keyword check rejected every invocation, leaving the macro unexpanded and the call site as a bare undeclared identifier. Trim everything from the first `(` onward during keyword extraction so both halves agree on the dispatch key. The marker tokens inside the parens are still parsed normally by parseMarkers / matchPattern. Verified with /tmp/test_xcmd2.prg (`MAKE_TEST( o, 99 )` expands and dispatches to the object's :hVar access). FiveSql2 43/43, Harbour compat 56/56, Go test ALL PASS. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
547 lines
13 KiB
Go
547 lines
13 KiB
Go
// Copyright (c) 2026 Charles KWON OhJun (charleskwonohjun@gmail.com)
|
|
// All rights reserved.
|
|
|
|
// #command / #translate implementation for Five preprocessor.
|
|
//
|
|
// Harbour PP syntax:
|
|
// #command PATTERN => RESULT
|
|
// #translate PATTERN => RESULT
|
|
// #xcommand PATTERN => RESULT (case-sensitive)
|
|
// #xtranslate PATTERN => RESULT (case-sensitive)
|
|
//
|
|
// Pattern markers:
|
|
// <x> — match any expression (regular match)
|
|
// <!x!> — match single identifier only (restricted match)
|
|
// <x,...> — match comma-separated list
|
|
// <*x*> — match rest of line (wild match)
|
|
// <x:a,b,c> — match one of listed words (list match)
|
|
// [...] — optional clause
|
|
//
|
|
// Result markers:
|
|
// <x> — substitute matched text
|
|
// <(x)> — stringify (wrap in quotes)
|
|
// <{x}> — blockify (wrap in {|| })
|
|
// #<x> — dumb stringify
|
|
// <.x.> — logify (.T. if matched, .F. if not)
|
|
//
|
|
// Reference: /mnt/d/harbour-core/src/pp/ppcore.c
|
|
package pp
|
|
|
|
import (
|
|
"strings"
|
|
)
|
|
|
|
// Rule represents a single #command or #translate rule.
|
|
type Rule struct {
|
|
Pattern string // raw pattern text
|
|
Result string // raw result text
|
|
IsCommand bool // #command vs #translate
|
|
CaseSens bool // #xcommand/#xtranslate = case sensitive
|
|
Keyword string // first keyword (for fast matching)
|
|
Markers []Marker // parsed pattern markers
|
|
ResultTmpl string // result template with marker references
|
|
}
|
|
|
|
// Marker represents a pattern marker like <x>, <!x!>, <x,...>, <*x*>.
|
|
type Marker struct {
|
|
Name string // marker name
|
|
Type MarkerType
|
|
ListValues []string // for <x:a,b,c> — allowed values
|
|
}
|
|
|
|
type MarkerType int
|
|
|
|
const (
|
|
MarkerRegular MarkerType = iota // <x> — any expression
|
|
MarkerRestricted // <!x!> — identifier only
|
|
MarkerList // <x,...> — comma-separated list
|
|
MarkerWild // <*x*> — rest of line
|
|
MarkerWordList // <x:a,b,c> — one of listed words
|
|
)
|
|
|
|
// ParseRule parses a #command/#translate directive into a Rule.
|
|
func ParseRule(directive string, isCommand, caseSens bool) *Rule {
|
|
// Split on =>
|
|
parts := strings.SplitN(directive, "=>", 2)
|
|
if len(parts) != 2 {
|
|
return nil
|
|
}
|
|
|
|
pattern := strings.TrimSpace(parts[0])
|
|
result := strings.TrimSpace(parts[1])
|
|
|
|
// Handle line continuation (;)
|
|
result = strings.ReplaceAll(result, " ;", "")
|
|
|
|
rule := &Rule{
|
|
Pattern: pattern,
|
|
Result: result,
|
|
IsCommand: isCommand,
|
|
CaseSens: caseSens,
|
|
ResultTmpl: result,
|
|
}
|
|
|
|
// Extract first keyword for fast matching. The first whitespace-
|
|
// delimited token of the pattern becomes the dispatch key; we
|
|
// strip marker wrappers and any trailing `(` so a pattern like
|
|
// `MAKE_TEST( <obj>, <v> )` hashes on `MAKE_TEST`, matching how
|
|
// firstToken normalises source lines.
|
|
words := strings.Fields(pattern)
|
|
if len(words) > 0 {
|
|
kw := words[0]
|
|
kw = strings.TrimLeft(kw, "<[")
|
|
kw = strings.TrimRight(kw, ">]")
|
|
if idx := strings.IndexByte(kw, '('); idx >= 0 {
|
|
kw = kw[:idx]
|
|
}
|
|
if !strings.ContainsAny(kw, "!*,:") {
|
|
rule.Keyword = kw
|
|
}
|
|
}
|
|
|
|
// Parse markers from pattern
|
|
rule.Markers = parseMarkers(pattern)
|
|
|
|
return rule
|
|
}
|
|
|
|
// parseMarkers extracts all <...> markers from a pattern.
|
|
func parseMarkers(pattern string) []Marker {
|
|
var markers []Marker
|
|
i := 0
|
|
for i < len(pattern) {
|
|
if pattern[i] == '<' {
|
|
end := strings.IndexByte(pattern[i:], '>')
|
|
if end < 0 {
|
|
break
|
|
}
|
|
inner := pattern[i+1 : i+end]
|
|
m := parseOneMarker(inner)
|
|
if m.Name != "" {
|
|
markers = append(markers, m)
|
|
}
|
|
i += end + 1
|
|
} else {
|
|
i++
|
|
}
|
|
}
|
|
return markers
|
|
}
|
|
|
|
func parseOneMarker(inner string) Marker {
|
|
inner = strings.TrimSpace(inner)
|
|
|
|
// <!name!> — restricted
|
|
if strings.HasPrefix(inner, "!") && strings.HasSuffix(inner, "!") {
|
|
return Marker{Name: inner[1 : len(inner)-1], Type: MarkerRestricted}
|
|
}
|
|
|
|
// <*name*> — wild
|
|
if strings.HasPrefix(inner, "*") && strings.HasSuffix(inner, "*") {
|
|
return Marker{Name: inner[1 : len(inner)-1], Type: MarkerWild}
|
|
}
|
|
|
|
// <name,...> — comma list
|
|
if strings.HasSuffix(inner, ",...") {
|
|
return Marker{Name: inner[:len(inner)-4], Type: MarkerList}
|
|
}
|
|
|
|
// <name:a,b,c> — word list
|
|
if idx := strings.IndexByte(inner, ':'); idx > 0 {
|
|
name := inner[:idx]
|
|
vals := strings.Split(inner[idx+1:], ",")
|
|
for i := range vals {
|
|
vals[i] = strings.TrimSpace(vals[i])
|
|
}
|
|
return Marker{Name: name, Type: MarkerWordList, ListValues: vals}
|
|
}
|
|
|
|
// <name> — regular
|
|
return Marker{Name: inner, Type: MarkerRegular}
|
|
}
|
|
|
|
// --- Rule matching and application ---
|
|
|
|
// MatchLine checks if a source line matches this rule and returns the substituted result.
|
|
// Returns ("", false) if no match.
|
|
func (r *Rule) MatchLine(line string) (string, bool) {
|
|
trimmed := strings.TrimSpace(line)
|
|
if trimmed == "" {
|
|
return "", false
|
|
}
|
|
|
|
// Fast keyword check
|
|
if r.Keyword != "" {
|
|
firstWord := firstToken(trimmed)
|
|
if r.CaseSens {
|
|
if firstWord != r.Keyword {
|
|
return "", false
|
|
}
|
|
} else {
|
|
if !strings.EqualFold(firstWord, r.Keyword) {
|
|
return "", false
|
|
}
|
|
}
|
|
}
|
|
|
|
// Try to match pattern against line
|
|
captures := r.matchPattern(trimmed)
|
|
if captures == nil {
|
|
return "", false
|
|
}
|
|
|
|
// Apply result template
|
|
result := r.applyResult(captures)
|
|
return result, true
|
|
}
|
|
|
|
// matchPattern attempts to match the pattern against a line.
|
|
// Returns captured values map, or nil if no match.
|
|
func (r *Rule) matchPattern(line string) map[string]string {
|
|
captures := make(map[string]string)
|
|
|
|
patternWords := tokenizePattern(r.Pattern)
|
|
lineWords := tokenizeLine(line)
|
|
|
|
pi, li := 0, 0
|
|
for pi < len(patternWords) && li < len(lineWords) {
|
|
pw := patternWords[pi]
|
|
|
|
// Marker?
|
|
if strings.HasPrefix(pw, "<") && strings.HasSuffix(pw, ">") {
|
|
inner := pw[1 : len(pw)-1]
|
|
m := parseOneMarker(inner)
|
|
|
|
switch m.Type {
|
|
case MarkerWild:
|
|
// Capture rest of line
|
|
rest := strings.Join(lineWords[li:], " ")
|
|
captures[m.Name] = rest
|
|
li = len(lineWords)
|
|
pi++
|
|
|
|
case MarkerList:
|
|
// Capture comma-separated items until next keyword
|
|
var items []string
|
|
for li < len(lineWords) {
|
|
if pi+1 < len(patternWords) && matchWord(lineWords[li], patternWords[pi+1], r.CaseSens) {
|
|
break
|
|
}
|
|
items = append(items, lineWords[li])
|
|
li++
|
|
}
|
|
captures[m.Name] = strings.Join(items, " ")
|
|
pi++
|
|
|
|
case MarkerWordList:
|
|
// Match one of listed words
|
|
matched := false
|
|
for _, allowed := range m.ListValues {
|
|
if r.CaseSens {
|
|
if lineWords[li] == allowed {
|
|
matched = true
|
|
break
|
|
}
|
|
} else if strings.EqualFold(lineWords[li], allowed) {
|
|
matched = true
|
|
break
|
|
}
|
|
}
|
|
if !matched {
|
|
return nil
|
|
}
|
|
captures[m.Name] = lineWords[li]
|
|
li++
|
|
pi++
|
|
|
|
default:
|
|
// Regular or restricted: capture one token or expression
|
|
captured := captureExpression(lineWords, &li, patternWords, pi+1, r.CaseSens)
|
|
captures[m.Name] = captured
|
|
pi++
|
|
}
|
|
} else if pw == "[" {
|
|
// Optional clause — skip to matching ]
|
|
depth := 1
|
|
pi++
|
|
for pi < len(patternWords) && depth > 0 {
|
|
if patternWords[pi] == "[" {
|
|
depth++
|
|
} else if patternWords[pi] == "]" {
|
|
depth--
|
|
}
|
|
pi++
|
|
}
|
|
} else if pw == "]" {
|
|
pi++
|
|
} else {
|
|
// Literal keyword — must match
|
|
if !matchWord(lineWords[li], pw, r.CaseSens) {
|
|
return nil
|
|
}
|
|
li++
|
|
pi++
|
|
}
|
|
}
|
|
|
|
// Skip remaining optional markers in pattern
|
|
for pi < len(patternWords) {
|
|
pw := patternWords[pi]
|
|
if pw == "[" || pw == "]" || (strings.HasPrefix(pw, "<") && strings.HasSuffix(pw, ">")) {
|
|
pi++
|
|
} else {
|
|
break
|
|
}
|
|
}
|
|
|
|
// For #command with no markers and no optional clauses:
|
|
// all line tokens must be consumed for a match
|
|
if r.IsCommand && li < len(lineWords) && len(r.Markers) == 0 &&
|
|
!strings.Contains(r.Pattern, "[") {
|
|
return nil
|
|
}
|
|
|
|
return captures
|
|
}
|
|
|
|
// applyResult substitutes captured values into the result template.
|
|
func (r *Rule) applyResult(captures map[string]string) string {
|
|
result := r.ResultTmpl
|
|
|
|
for name, val := range captures {
|
|
// <name> — direct substitution
|
|
result = strings.ReplaceAll(result, "<"+name+">", val)
|
|
// <(name)> — stringify
|
|
result = strings.ReplaceAll(result, "<("+name+")>", `"`+val+`"`)
|
|
// <.name.> — logify
|
|
if val != "" {
|
|
result = strings.ReplaceAll(result, "<."+name+".>", ".T.")
|
|
} else {
|
|
result = strings.ReplaceAll(result, "<."+name+".>", ".F.")
|
|
}
|
|
// #<name> — dumb stringify
|
|
result = strings.ReplaceAll(result, "#<"+name+">", `"`+val+`"`)
|
|
}
|
|
|
|
// Clean up unreferenced markers: <name>, <(name)>, <.name.>, #<name>, <"name">
|
|
result = cleanUnreferencedMarkers(result)
|
|
|
|
return result
|
|
}
|
|
|
|
// cleanUnreferencedMarkers removes any remaining <name>, <(name)>, <.name.>, #<name> references.
|
|
// Only removes well-formed PP marker references, not comparison operators.
|
|
func cleanUnreferencedMarkers(s string) string {
|
|
// Match patterns like <identifier>, <(identifier)>, <.identifier.>, #<identifier>
|
|
var out strings.Builder
|
|
i := 0
|
|
for i < len(s) {
|
|
removed := false
|
|
// #<name>
|
|
if s[i] == '#' && i+1 < len(s) && s[i+1] == '<' {
|
|
if end := findMarkerEnd(s, i+1); end > 0 {
|
|
i = end
|
|
removed = true
|
|
}
|
|
}
|
|
// <name>, <(name)>, <.name.>, <"name">
|
|
if !removed && s[i] == '<' {
|
|
if end := findMarkerEnd(s, i); end > 0 {
|
|
i = end
|
|
removed = true
|
|
}
|
|
}
|
|
if !removed {
|
|
out.WriteByte(s[i])
|
|
i++
|
|
}
|
|
}
|
|
return out.String()
|
|
}
|
|
|
|
// findMarkerEnd checks if s[start] begins a PP marker <name> and returns end position, or 0.
|
|
func findMarkerEnd(s string, start int) int {
|
|
if start >= len(s) || s[start] != '<' {
|
|
return 0
|
|
}
|
|
i := start + 1
|
|
// Skip optional ( or . prefix
|
|
if i < len(s) && (s[i] == '(' || s[i] == '.' || s[i] == '"') {
|
|
i++
|
|
}
|
|
// Must start with letter or underscore (identifier)
|
|
if i >= len(s) || !(s[i] >= 'a' && s[i] <= 'z' || s[i] >= 'A' && s[i] <= 'Z' || s[i] == '_') {
|
|
return 0
|
|
}
|
|
// Consume identifier
|
|
for i < len(s) && (s[i] >= 'a' && s[i] <= 'z' || s[i] >= 'A' && s[i] <= 'Z' || s[i] >= '0' && s[i] <= '9' || s[i] == '_') {
|
|
i++
|
|
}
|
|
// Skip optional ) or . or " or ,... suffix
|
|
for i < len(s) && (s[i] == ')' || s[i] == '.' || s[i] == '"' || s[i] == ',' || s[i] == ' ') {
|
|
i++
|
|
}
|
|
if i < len(s) && s[i] == '>' {
|
|
return i + 1
|
|
}
|
|
return 0
|
|
}
|
|
|
|
// --- Helpers ---
|
|
|
|
func firstToken(s string) string {
|
|
for i, c := range s {
|
|
if c == ' ' || c == '\t' || c == '(' {
|
|
return s[:i]
|
|
}
|
|
}
|
|
return s
|
|
}
|
|
|
|
func matchWord(lineWord, patternWord string, caseSens bool) bool {
|
|
if caseSens {
|
|
return lineWord == patternWord
|
|
}
|
|
return strings.EqualFold(lineWord, patternWord)
|
|
}
|
|
|
|
// tokenizePattern splits a pattern into words, keeping markers as single tokens.
|
|
func tokenizePattern(pattern string) []string {
|
|
var tokens []string
|
|
i := 0
|
|
for i < len(pattern) {
|
|
// Skip whitespace
|
|
for i < len(pattern) && (pattern[i] == ' ' || pattern[i] == '\t') {
|
|
i++
|
|
}
|
|
if i >= len(pattern) {
|
|
break
|
|
}
|
|
|
|
if pattern[i] == '<' {
|
|
// Find matching >
|
|
end := strings.IndexByte(pattern[i:], '>')
|
|
if end >= 0 {
|
|
tokens = append(tokens, pattern[i:i+end+1])
|
|
i += end + 1
|
|
continue
|
|
}
|
|
}
|
|
|
|
if pattern[i] == '[' {
|
|
tokens = append(tokens, "[")
|
|
i++
|
|
continue
|
|
}
|
|
if pattern[i] == ']' {
|
|
tokens = append(tokens, "]")
|
|
i++
|
|
continue
|
|
}
|
|
|
|
// Regular word
|
|
start := i
|
|
for i < len(pattern) && pattern[i] != ' ' && pattern[i] != '\t' &&
|
|
pattern[i] != '<' && pattern[i] != '[' && pattern[i] != ']' {
|
|
i++
|
|
}
|
|
if i > start {
|
|
tokens = append(tokens, pattern[start:i])
|
|
}
|
|
}
|
|
return tokens
|
|
}
|
|
|
|
// tokenizeLine splits a source line into words (keeping strings and parens together).
|
|
func tokenizeLine(line string) []string {
|
|
var tokens []string
|
|
i := 0
|
|
for i < len(line) {
|
|
for i < len(line) && (line[i] == ' ' || line[i] == '\t') {
|
|
i++
|
|
}
|
|
if i >= len(line) {
|
|
break
|
|
}
|
|
|
|
// String literal
|
|
if line[i] == '"' || line[i] == '\'' {
|
|
quote := line[i]
|
|
start := i
|
|
i++
|
|
for i < len(line) && line[i] != quote {
|
|
i++
|
|
}
|
|
if i < len(line) {
|
|
i++
|
|
}
|
|
tokens = append(tokens, line[start:i])
|
|
continue
|
|
}
|
|
|
|
// Comma (standalone token)
|
|
if line[i] == ',' {
|
|
tokens = append(tokens, ",")
|
|
i++
|
|
continue
|
|
}
|
|
|
|
// Word
|
|
start := i
|
|
for i < len(line) && line[i] != ' ' && line[i] != '\t' && line[i] != ',' {
|
|
if line[i] == '"' || line[i] == '\'' {
|
|
break
|
|
}
|
|
i++
|
|
}
|
|
if i > start {
|
|
tokens = append(tokens, line[start:i])
|
|
}
|
|
}
|
|
return tokens
|
|
}
|
|
|
|
// captureExpression captures an expression from line tokens.
|
|
// If this is the last marker in the pattern, captures all remaining tokens.
|
|
// Otherwise, captures until the next keyword in the pattern.
|
|
func captureExpression(lineWords []string, li *int, patternWords []string, nextPi int, caseSens bool) string {
|
|
if *li >= len(lineWords) {
|
|
return ""
|
|
}
|
|
|
|
// Find next literal keyword in pattern to use as delimiter
|
|
delimWord := ""
|
|
for pi := nextPi; pi < len(patternWords); pi++ {
|
|
pw := patternWords[pi]
|
|
if !strings.HasPrefix(pw, "<") && pw != "[" && pw != "]" {
|
|
delimWord = pw
|
|
break
|
|
}
|
|
}
|
|
|
|
if delimWord != "" {
|
|
// Capture until delimiter keyword
|
|
var parts []string
|
|
for *li < len(lineWords) {
|
|
if matchWord(lineWords[*li], delimWord, caseSens) {
|
|
break
|
|
}
|
|
parts = append(parts, lineWords[*li])
|
|
*li++
|
|
}
|
|
return strings.Join(parts, " ")
|
|
}
|
|
|
|
// No delimiter: if last marker, capture all remaining tokens
|
|
if nextPi >= len(patternWords) {
|
|
rest := strings.Join(lineWords[*li:], " ")
|
|
*li = len(lineWords)
|
|
return rest
|
|
}
|
|
|
|
// Single token capture (between markers)
|
|
tok := lineWords[*li]
|
|
*li++
|
|
return tok
|
|
}
|