// Copyright (c) 2026 Charles KWON OhJun (charleskwonohjun@gmail.com) // All rights reserved. // #command / #translate implementation for Five preprocessor. // // Harbour PP syntax: // #command PATTERN => RESULT // #translate PATTERN => RESULT // #xcommand PATTERN => RESULT (case-sensitive) // #xtranslate PATTERN => RESULT (case-sensitive) // // Pattern markers: // — match any expression (regular match) // — match single identifier only (restricted match) // — match comma-separated list // <*x*> — match rest of line (wild match) // — match one of listed words (list match) // [...] — optional clause // // Result markers: // — substitute matched text // <(x)> — stringify (wrap in quotes) // <{x}> — blockify (wrap in {|| }) // # — dumb stringify // <.x.> — logify (.T. if matched, .F. if not) // // Reference: /mnt/d/harbour-core/src/pp/ppcore.c package pp import ( "strings" ) // Rule represents a single #command or #translate rule. type Rule struct { Pattern string // raw pattern text Result string // raw result text IsCommand bool // #command vs #translate CaseSens bool // #xcommand/#xtranslate = case sensitive Keyword string // first keyword (for fast matching) Markers []Marker // parsed pattern markers ResultTmpl string // result template with marker references } // Marker represents a pattern marker like , , , <*x*>. type Marker struct { Name string // marker name Type MarkerType ListValues []string // for — allowed values } type MarkerType int const ( MarkerRegular MarkerType = iota // — any expression MarkerRestricted // — identifier only MarkerList // — comma-separated list MarkerWild // <*x*> — rest of line MarkerWordList // — one of listed words ) // ParseRule parses a #command/#translate directive into a Rule. func ParseRule(directive string, isCommand, caseSens bool) *Rule { // Split on => parts := strings.SplitN(directive, "=>", 2) if len(parts) != 2 { return nil } pattern := strings.TrimSpace(parts[0]) result := strings.TrimSpace(parts[1]) // Handle line continuation (;) result = strings.ReplaceAll(result, " ;", "") rule := &Rule{ Pattern: pattern, Result: result, IsCommand: isCommand, CaseSens: caseSens, ResultTmpl: result, } // Extract first keyword for fast matching words := strings.Fields(pattern) if len(words) > 0 { kw := words[0] // Remove marker brackets kw = strings.TrimLeft(kw, "<[") kw = strings.TrimRight(kw, ">]") if !strings.ContainsAny(kw, "!*,:") { rule.Keyword = kw } } // Parse markers from pattern rule.Markers = parseMarkers(pattern) return rule } // parseMarkers extracts all <...> markers from a pattern. func parseMarkers(pattern string) []Marker { var markers []Marker i := 0 for i < len(pattern) { if pattern[i] == '<' { end := strings.IndexByte(pattern[i:], '>') if end < 0 { break } inner := pattern[i+1 : i+end] m := parseOneMarker(inner) if m.Name != "" { markers = append(markers, m) } i += end + 1 } else { i++ } } return markers } func parseOneMarker(inner string) Marker { inner = strings.TrimSpace(inner) // — restricted if strings.HasPrefix(inner, "!") && strings.HasSuffix(inner, "!") { return Marker{Name: inner[1 : len(inner)-1], Type: MarkerRestricted} } // <*name*> — wild if strings.HasPrefix(inner, "*") && strings.HasSuffix(inner, "*") { return Marker{Name: inner[1 : len(inner)-1], Type: MarkerWild} } // — comma list if strings.HasSuffix(inner, ",...") { return Marker{Name: inner[:len(inner)-4], Type: MarkerList} } // — word list if idx := strings.IndexByte(inner, ':'); idx > 0 { name := inner[:idx] vals := strings.Split(inner[idx+1:], ",") for i := range vals { vals[i] = strings.TrimSpace(vals[i]) } return Marker{Name: name, Type: MarkerWordList, ListValues: vals} } // — regular return Marker{Name: inner, Type: MarkerRegular} } // --- Rule matching and application --- // MatchLine checks if a source line matches this rule and returns the substituted result. // Returns ("", false) if no match. func (r *Rule) MatchLine(line string) (string, bool) { trimmed := strings.TrimSpace(line) if trimmed == "" { return "", false } // Fast keyword check if r.Keyword != "" { firstWord := firstToken(trimmed) if r.CaseSens { if firstWord != r.Keyword { return "", false } } else { if !strings.EqualFold(firstWord, r.Keyword) { return "", false } } } // Try to match pattern against line captures := r.matchPattern(trimmed) if captures == nil { return "", false } // Apply result template result := r.applyResult(captures) return result, true } // matchPattern attempts to match the pattern against a line. // Returns captured values map, or nil if no match. func (r *Rule) matchPattern(line string) map[string]string { captures := make(map[string]string) patternWords := tokenizePattern(r.Pattern) lineWords := tokenizeLine(line) pi, li := 0, 0 for pi < len(patternWords) && li < len(lineWords) { pw := patternWords[pi] // Marker? if strings.HasPrefix(pw, "<") && strings.HasSuffix(pw, ">") { inner := pw[1 : len(pw)-1] m := parseOneMarker(inner) switch m.Type { case MarkerWild: // Capture rest of line rest := strings.Join(lineWords[li:], " ") captures[m.Name] = rest li = len(lineWords) pi++ case MarkerList: // Capture comma-separated items until next keyword var items []string for li < len(lineWords) { if pi+1 < len(patternWords) && matchWord(lineWords[li], patternWords[pi+1], r.CaseSens) { break } items = append(items, lineWords[li]) li++ } captures[m.Name] = strings.Join(items, " ") pi++ case MarkerWordList: // Match one of listed words matched := false for _, allowed := range m.ListValues { if r.CaseSens { if lineWords[li] == allowed { matched = true break } } else if strings.EqualFold(lineWords[li], allowed) { matched = true break } } if !matched { return nil } captures[m.Name] = lineWords[li] li++ pi++ default: // Regular or restricted: capture one token or expression captured := captureExpression(lineWords, &li, patternWords, pi+1, r.CaseSens) captures[m.Name] = captured pi++ } } else if pw == "[" { // Optional clause — skip to matching ] depth := 1 pi++ for pi < len(patternWords) && depth > 0 { if patternWords[pi] == "[" { depth++ } else if patternWords[pi] == "]" { depth-- } pi++ } } else if pw == "]" { pi++ } else { // Literal keyword — must match if !matchWord(lineWords[li], pw, r.CaseSens) { return nil } li++ pi++ } } // Skip remaining optional markers in pattern for pi < len(patternWords) { pw := patternWords[pi] if pw == "[" || pw == "]" || (strings.HasPrefix(pw, "<") && strings.HasSuffix(pw, ">")) { pi++ } else { break } } // For #command with no markers and no optional clauses: // all line tokens must be consumed for a match if r.IsCommand && li < len(lineWords) && len(r.Markers) == 0 && !strings.Contains(r.Pattern, "[") { return nil } return captures } // applyResult substitutes captured values into the result template. func (r *Rule) applyResult(captures map[string]string) string { result := r.ResultTmpl for name, val := range captures { // — direct substitution result = strings.ReplaceAll(result, "<"+name+">", val) // <(name)> — stringify result = strings.ReplaceAll(result, "<("+name+")>", `"`+val+`"`) // <.name.> — logify if val != "" { result = strings.ReplaceAll(result, "<."+name+".>", ".T.") } else { result = strings.ReplaceAll(result, "<."+name+".>", ".F.") } // # — dumb stringify result = strings.ReplaceAll(result, "#<"+name+">", `"`+val+`"`) } // Clean up unreferenced markers: , <(name)>, <.name.>, #, <"name"> result = cleanUnreferencedMarkers(result) return result } // cleanUnreferencedMarkers removes any remaining , <(name)>, <.name.>, # references. // Only removes well-formed PP marker references, not comparison operators. func cleanUnreferencedMarkers(s string) string { // Match patterns like , <(identifier)>, <.identifier.>, # var out strings.Builder i := 0 for i < len(s) { removed := false // # if s[i] == '#' && i+1 < len(s) && s[i+1] == '<' { if end := findMarkerEnd(s, i+1); end > 0 { i = end removed = true } } // , <(name)>, <.name.>, <"name"> if !removed && s[i] == '<' { if end := findMarkerEnd(s, i); end > 0 { i = end removed = true } } if !removed { out.WriteByte(s[i]) i++ } } return out.String() } // findMarkerEnd checks if s[start] begins a PP marker and returns end position, or 0. func findMarkerEnd(s string, start int) int { if start >= len(s) || s[start] != '<' { return 0 } i := start + 1 // Skip optional ( or . prefix if i < len(s) && (s[i] == '(' || s[i] == '.' || s[i] == '"') { i++ } // Must start with letter or underscore (identifier) if i >= len(s) || !(s[i] >= 'a' && s[i] <= 'z' || s[i] >= 'A' && s[i] <= 'Z' || s[i] == '_') { return 0 } // Consume identifier for i < len(s) && (s[i] >= 'a' && s[i] <= 'z' || s[i] >= 'A' && s[i] <= 'Z' || s[i] >= '0' && s[i] <= '9' || s[i] == '_') { i++ } // Skip optional ) or . or " or ,... suffix for i < len(s) && (s[i] == ')' || s[i] == '.' || s[i] == '"' || s[i] == ',' || s[i] == ' ') { i++ } if i < len(s) && s[i] == '>' { return i + 1 } return 0 } // --- Helpers --- func firstToken(s string) string { for i, c := range s { if c == ' ' || c == '\t' || c == '(' { return s[:i] } } return s } func matchWord(lineWord, patternWord string, caseSens bool) bool { if caseSens { return lineWord == patternWord } return strings.EqualFold(lineWord, patternWord) } // tokenizePattern splits a pattern into words, keeping markers as single tokens. func tokenizePattern(pattern string) []string { var tokens []string i := 0 for i < len(pattern) { // Skip whitespace for i < len(pattern) && (pattern[i] == ' ' || pattern[i] == '\t') { i++ } if i >= len(pattern) { break } if pattern[i] == '<' { // Find matching > end := strings.IndexByte(pattern[i:], '>') if end >= 0 { tokens = append(tokens, pattern[i:i+end+1]) i += end + 1 continue } } if pattern[i] == '[' { tokens = append(tokens, "[") i++ continue } if pattern[i] == ']' { tokens = append(tokens, "]") i++ continue } // Regular word start := i for i < len(pattern) && pattern[i] != ' ' && pattern[i] != '\t' && pattern[i] != '<' && pattern[i] != '[' && pattern[i] != ']' { i++ } if i > start { tokens = append(tokens, pattern[start:i]) } } return tokens } // tokenizeLine splits a source line into words (keeping strings and parens together). func tokenizeLine(line string) []string { var tokens []string i := 0 for i < len(line) { for i < len(line) && (line[i] == ' ' || line[i] == '\t') { i++ } if i >= len(line) { break } // String literal if line[i] == '"' || line[i] == '\'' { quote := line[i] start := i i++ for i < len(line) && line[i] != quote { i++ } if i < len(line) { i++ } tokens = append(tokens, line[start:i]) continue } // Comma (standalone token) if line[i] == ',' { tokens = append(tokens, ",") i++ continue } // Word start := i for i < len(line) && line[i] != ' ' && line[i] != '\t' && line[i] != ',' { if line[i] == '"' || line[i] == '\'' { break } i++ } if i > start { tokens = append(tokens, line[start:i]) } } return tokens } // captureExpression captures an expression from line tokens. // If this is the last marker in the pattern, captures all remaining tokens. // Otherwise, captures until the next keyword in the pattern. func captureExpression(lineWords []string, li *int, patternWords []string, nextPi int, caseSens bool) string { if *li >= len(lineWords) { return "" } // Find next literal keyword in pattern to use as delimiter delimWord := "" for pi := nextPi; pi < len(patternWords); pi++ { pw := patternWords[pi] if !strings.HasPrefix(pw, "<") && pw != "[" && pw != "]" { delimWord = pw break } } if delimWord != "" { // Capture until delimiter keyword var parts []string for *li < len(lineWords) { if matchWord(lineWords[*li], delimWord, caseSens) { break } parts = append(parts, lineWords[*li]) *li++ } return strings.Join(parts, " ") } // No delimiter: if last marker, capture all remaining tokens if nextPi >= len(patternWords) { rest := strings.Join(lineWords[*li:], " ") *li = len(lineWords) return rest } // Single token capture (between markers) tok := lineWords[*li] *li++ return tok }