// Copyright (c) 2026 Charles KWON OhJun (charleskwonohjun@gmail.com) // All rights reserved. // #command / #translate implementation for Five preprocessor. // // Harbour PP syntax: // #command PATTERN => RESULT // #translate PATTERN => RESULT // #xcommand PATTERN => RESULT (case-sensitive) // #xtranslate PATTERN => RESULT (case-sensitive) // // Pattern markers: // — match any expression (regular match) // — match single identifier only (restricted match) // — match comma-separated list // <*x*> — match rest of line (wild match) // — match one of listed words (list match) // [...] — optional clause // // Result markers: // — substitute matched text // <(x)> — stringify (wrap in quotes) // <{x}> — blockify (wrap in {|| }) // # — dumb stringify // <.x.> — logify (.T. if matched, .F. if not) // // Reference: /mnt/d/harbour-core/src/pp/ppcore.c package pp import ( "strings" ) // Rule represents a single #command or #translate rule. type Rule struct { Pattern string // raw pattern text Result string // raw result text IsCommand bool // #command vs #translate CaseSens bool // #xcommand/#xtranslate = case sensitive Keyword string // first keyword (for fast matching) Markers []Marker // parsed pattern markers ResultTmpl string // result template with marker references } // Marker represents a pattern marker like , , , <*x*>. type Marker struct { Name string // marker name Type MarkerType ListValues []string // for — allowed values } type MarkerType int const ( MarkerRegular MarkerType = iota // — any expression MarkerRestricted // — identifier only MarkerList // — comma-separated list MarkerWild // <*x*> — rest of line MarkerWordList // — one of listed words ) // ParseRule parses a #command/#translate directive into a Rule. func ParseRule(directive string, isCommand, caseSens bool) *Rule { // Split on => parts := strings.SplitN(directive, "=>", 2) if len(parts) != 2 { return nil } pattern := strings.TrimSpace(parts[0]) result := strings.TrimSpace(parts[1]) // Handle line continuation (;) result = strings.ReplaceAll(result, " ;", "") rule := &Rule{ Pattern: pattern, Result: result, IsCommand: isCommand, CaseSens: caseSens, ResultTmpl: result, } // Extract first keyword for fast matching. The first whitespace- // delimited token of the pattern becomes the dispatch key; we // strip marker wrappers and any trailing `(` so a pattern like // `MAKE_TEST( , )` hashes on `MAKE_TEST`, matching how // firstToken normalises source lines. words := strings.Fields(pattern) if len(words) > 0 { kw := words[0] kw = strings.TrimLeft(kw, "<[") kw = strings.TrimRight(kw, ">]") if idx := strings.IndexByte(kw, '('); idx >= 0 { kw = kw[:idx] } if !strings.ContainsAny(kw, "!*,:") { rule.Keyword = kw } } // Parse markers from pattern rule.Markers = parseMarkers(pattern) return rule } // parseMarkers extracts all <...> markers from a pattern. func parseMarkers(pattern string) []Marker { var markers []Marker i := 0 for i < len(pattern) { if pattern[i] == '<' { end := strings.IndexByte(pattern[i:], '>') if end < 0 { break } inner := pattern[i+1 : i+end] m := parseOneMarker(inner) if m.Name != "" { markers = append(markers, m) } i += end + 1 } else { i++ } } return markers } func parseOneMarker(inner string) Marker { inner = strings.TrimSpace(inner) // — restricted if strings.HasPrefix(inner, "!") && strings.HasSuffix(inner, "!") { return Marker{Name: inner[1 : len(inner)-1], Type: MarkerRestricted} } // <*name*> — wild if strings.HasPrefix(inner, "*") && strings.HasSuffix(inner, "*") { return Marker{Name: inner[1 : len(inner)-1], Type: MarkerWild} } // — comma list if strings.HasSuffix(inner, ",...") { return Marker{Name: inner[:len(inner)-4], Type: MarkerList} } // — word list if idx := strings.IndexByte(inner, ':'); idx > 0 { name := inner[:idx] vals := strings.Split(inner[idx+1:], ",") for i := range vals { vals[i] = strings.TrimSpace(vals[i]) } return Marker{Name: name, Type: MarkerWordList, ListValues: vals} } // — regular return Marker{Name: inner, Type: MarkerRegular} } // --- Rule matching and application --- // MatchLine checks if a source line matches this rule and returns the substituted result. // Returns ("", false) if no match. func (r *Rule) MatchLine(line string) (string, bool) { trimmed := strings.TrimSpace(line) if trimmed == "" { return "", false } // Fast keyword check if r.Keyword != "" { firstWord := firstToken(trimmed) if r.CaseSens { if firstWord != r.Keyword { return "", false } } else { if !strings.EqualFold(firstWord, r.Keyword) { return "", false } } } // Try to match pattern against line captures := r.matchPattern(trimmed) if captures == nil { return "", false } // Apply result template result := r.applyResult(captures) return result, true } // matchPattern attempts to match the pattern against a line. // Returns captured values map, or nil if no match. func (r *Rule) matchPattern(line string) map[string]string { captures := make(map[string]string) patternWords := tokenizePattern(r.Pattern) lineWords := tokenizeLine(line) pi, li := 0, 0 for pi < len(patternWords) && li < len(lineWords) { pw := patternWords[pi] // Marker? if strings.HasPrefix(pw, "<") && strings.HasSuffix(pw, ">") { inner := pw[1 : len(pw)-1] m := parseOneMarker(inner) switch m.Type { case MarkerWild: // Capture rest of line rest := strings.Join(lineWords[li:], " ") captures[m.Name] = rest li = len(lineWords) pi++ case MarkerList: // Capture a comma-separated list until the next literal // pattern token. Paren-balanced so nested `(`/`[`/`{` // don't let an inner `)` terminate the capture. Commas // at the top level are preserved verbatim in the // captured string so the `` substitution in the // result template reproduces the argument list as-is. var parts []string depth := 0 delim := "" if pi+1 < len(patternWords) { delim = patternWords[pi+1] } for li < len(lineWords) { w := lineWords[li] if depth == 0 && delim != "" && matchWord(w, delim, r.CaseSens) { break } switch w { case "(", "[", "{": depth++ case ")", "]", "}": if depth > 0 { depth-- } } parts = append(parts, w) li++ } captures[m.Name] = strings.Join(parts, " ") pi++ case MarkerWordList: // Match one of listed words matched := false for _, allowed := range m.ListValues { if r.CaseSens { if lineWords[li] == allowed { matched = true break } } else if strings.EqualFold(lineWords[li], allowed) { matched = true break } } if !matched { return nil } captures[m.Name] = lineWords[li] li++ pi++ default: // Regular or restricted: capture one token or expression captured := captureExpression(lineWords, &li, patternWords, pi+1, r.CaseSens) captures[m.Name] = captured pi++ } } else if pw == "[" { // Optional clause — skip to matching ] depth := 1 pi++ for pi < len(patternWords) && depth > 0 { if patternWords[pi] == "[" { depth++ } else if patternWords[pi] == "]" { depth-- } pi++ } } else if pw == "]" { pi++ } else { // Literal keyword — must match if !matchWord(lineWords[li], pw, r.CaseSens) { return nil } li++ pi++ } } // Skip remaining optional markers in pattern for pi < len(patternWords) { pw := patternWords[pi] if pw == "[" || pw == "]" || (strings.HasPrefix(pw, "<") && strings.HasSuffix(pw, ">")) { pi++ } else { break } } // For #command with no markers and no optional clauses: // all line tokens must be consumed for a match if r.IsCommand && li < len(lineWords) && len(r.Markers) == 0 && !strings.Contains(r.Pattern, "[") { return nil } return captures } // ppQuote wraps a captured value in a PRG string literal, picking a // delimiter that doesn't collide with characters already inside. Harbour // # stringify takes the raw source text of the argument and must // produce a legal PRG string — if the capture is `"world"`, the result // can't just be `""world""`. Preference order matches Harbour: // double-quotes first, then single-quotes, then bracket literals. func ppQuote(val string) string { if !strings.ContainsRune(val, '"') { return `"` + val + `"` } if !strings.ContainsRune(val, '\'') { return "'" + val + "'" } if !strings.ContainsRune(val, '[') && !strings.ContainsRune(val, ']') { return "[" + val + "]" } // Fallback: double-quote with embedded quotes dropped. Pathological // input only; Harbour itself refuses to handle this cleanly. return `"` + strings.ReplaceAll(val, `"`, "") + `"` } // applyResult substitutes captured values into the result template. // Order matters — the compound forms (`#`, `<(z)>`, `<.z.>`, `<"z">`) // all contain the bare `` token, so the bare substitution has to run // LAST. Previously `` was replaced first and left a stray `#` / `(` / // `.` / `"` behind, producing bogus lines like `? #hello` that the // lexer then choked on with ILLEGAL token errors. func (r *Rule) applyResult(captures map[string]string) string { result := r.ResultTmpl for name, val := range captures { quoted := ppQuote(val) // # — dumb stringify (always quote). result = strings.ReplaceAll(result, "#<"+name+">", quoted) // <"name"> — explicit stringify. result = strings.ReplaceAll(result, `<"`+name+`">`, quoted) // <(name)> — smart stringify: already a string literal → keep; // otherwise quote. `val` comes straight from the capture, so // trim and check for surrounding quotes. trim := strings.TrimSpace(val) smart := quoted if n := len(trim); n >= 2 && ((trim[0] == '"' && trim[n-1] == '"') || (trim[0] == '\'' && trim[n-1] == '\'') || (trim[0] == '[' && trim[n-1] == ']')) { smart = trim } result = strings.ReplaceAll(result, "<("+name+")>", smart) // <.name.> — logify (empty → .F., else .T.) if val != "" { result = strings.ReplaceAll(result, "<."+name+".>", ".T.") } else { result = strings.ReplaceAll(result, "<."+name+".>", ".F.") } // — bare substitution (must be LAST, after all wrappers). result = strings.ReplaceAll(result, "<"+name+">", val) } // Clean up unreferenced markers: , <(name)>, <.name.>, #, <"name"> result = cleanUnreferencedMarkers(result) return result } // cleanUnreferencedMarkers removes any remaining , <(name)>, <.name.>, # references. // Only removes well-formed PP marker references, not comparison operators. func cleanUnreferencedMarkers(s string) string { // Match patterns like , <(identifier)>, <.identifier.>, # var out strings.Builder i := 0 for i < len(s) { removed := false // # if s[i] == '#' && i+1 < len(s) && s[i+1] == '<' { if end := findMarkerEnd(s, i+1); end > 0 { i = end removed = true } } // , <(name)>, <.name.>, <"name"> if !removed && s[i] == '<' { if end := findMarkerEnd(s, i); end > 0 { i = end removed = true } } if !removed { out.WriteByte(s[i]) i++ } } return out.String() } // findMarkerEnd checks if s[start] begins a PP marker and returns end position, or 0. func findMarkerEnd(s string, start int) int { if start >= len(s) || s[start] != '<' { return 0 } i := start + 1 // Skip optional ( or . prefix if i < len(s) && (s[i] == '(' || s[i] == '.' || s[i] == '"') { i++ } // Must start with letter or underscore (identifier) if i >= len(s) || !(s[i] >= 'a' && s[i] <= 'z' || s[i] >= 'A' && s[i] <= 'Z' || s[i] == '_') { return 0 } // Consume identifier for i < len(s) && (s[i] >= 'a' && s[i] <= 'z' || s[i] >= 'A' && s[i] <= 'Z' || s[i] >= '0' && s[i] <= '9' || s[i] == '_') { i++ } // Skip optional ) or . or " or ,... suffix for i < len(s) && (s[i] == ')' || s[i] == '.' || s[i] == '"' || s[i] == ',' || s[i] == ' ') { i++ } if i < len(s) && s[i] == '>' { return i + 1 } return 0 } // --- Helpers --- func firstToken(s string) string { for i, c := range s { if c == ' ' || c == '\t' || c == '(' { return s[:i] } } return s } func matchWord(lineWord, patternWord string, caseSens bool) bool { if caseSens { return lineWord == patternWord } return strings.EqualFold(lineWord, patternWord) } // tokenizePattern splits a pattern into words, keeping markers as single tokens. // Parens and commas are emitted as their own tokens so `DUMB()` and // `DUMB( )` tokenise identically — matching what tokenizeLine does // on call sites. Without this, `_DUMB_(a)` (no space) stored as a // single word would never align with the pattern's `DUMB( , , )` // tokens. func tokenizePattern(pattern string) []string { var tokens []string i := 0 for i < len(pattern) { for i < len(pattern) && (pattern[i] == ' ' || pattern[i] == '\t') { i++ } if i >= len(pattern) { break } if pattern[i] == '<' { end := strings.IndexByte(pattern[i:], '>') if end >= 0 { tokens = append(tokens, pattern[i:i+end+1]) i += end + 1 continue } } switch pattern[i] { case '[', ']', '(', ')', ',': tokens = append(tokens, string(pattern[i])) i++ continue } // Regular word — stop at space/tab/marker/bracket/paren/comma. start := i for i < len(pattern) { c := pattern[i] if c == ' ' || c == '\t' || c == '<' || c == '[' || c == ']' || c == '(' || c == ')' || c == ',' { break } i++ } if i > start { tokens = append(tokens, pattern[start:i]) } } return tokens } // tokenizeLine splits a source line into words matching the rules used // by tokenizePattern: string literals stay intact, commas/parens/brackets // emit as standalone tokens so a call like `DUMB(hello)` tokenises as // `DUMB`, `(`, `hello`, `)` — aligning with the pattern side. func tokenizeLine(line string) []string { var tokens []string i := 0 for i < len(line) { for i < len(line) && (line[i] == ' ' || line[i] == '\t') { i++ } if i >= len(line) { break } // String literal if line[i] == '"' || line[i] == '\'' { quote := line[i] start := i i++ for i < len(line) && line[i] != quote { i++ } if i < len(line) { i++ } tokens = append(tokens, line[start:i]) continue } switch line[i] { case ',', '(', ')', '[', ']': tokens = append(tokens, string(line[i])) i++ continue } // Word — stop at whitespace, brackets, parens, comma, quotes. start := i for i < len(line) { c := line[i] if c == ' ' || c == '\t' || c == ',' || c == '(' || c == ')' || c == '[' || c == ']' || c == '"' || c == '\'' { break } i++ } if i > start { tokens = append(tokens, line[start:i]) } } return tokens } // captureExpression captures an expression from line tokens. // If this is the last marker in the pattern, captures all remaining tokens. // Otherwise, captures until the next keyword in the pattern. func captureExpression(lineWords []string, li *int, patternWords []string, nextPi int, caseSens bool) string { if *li >= len(lineWords) { return "" } // Find next literal keyword in pattern to use as delimiter delimWord := "" for pi := nextPi; pi < len(patternWords); pi++ { pw := patternWords[pi] if !strings.HasPrefix(pw, "<") && pw != "[" && pw != "]" { delimWord = pw break } } if delimWord != "" { // Capture until the delimiter, paren-balancing so nested // parens/brackets/braces inside the expression don't falsely // terminate the capture. Harbour's own PP does the same — // `_REGULAR_(&(a))` must capture `&(a)` (incl. inner parens) // and leave the outer `)` for the pattern's own delimiter. var parts []string depth := 0 for *li < len(lineWords) { w := lineWords[*li] if depth == 0 && matchWord(w, delimWord, caseSens) { break } switch w { case "(", "[", "{": depth++ case ")", "]", "}": if depth > 0 { depth-- } } parts = append(parts, w) *li++ } return strings.Join(parts, " ") } // No delimiter: if last marker, capture all remaining tokens if nextPi >= len(patternWords) { rest := strings.Join(lineWords[*li:], " ") *li = len(lineWords) return rest } // Single token capture (between markers) tok := lineWords[*li] *li++ return tok }