// Copyright (c) 2026 Charles KWON OhJun (charleskwonohjun@gmail.com) // All rights reserved. // #command / #translate implementation for Five preprocessor. // // Harbour PP syntax: // #command PATTERN => RESULT // #translate PATTERN => RESULT // #xcommand PATTERN => RESULT (case-sensitive) // #xtranslate PATTERN => RESULT (case-sensitive) // // Pattern markers: // — match any expression (regular match) // — match single identifier only (restricted match) // — match comma-separated list // <*x*> — match rest of line (wild match) // — match one of listed words (list match) // [...] — optional clause // // Result markers: // — substitute matched text // <(x)> — stringify (wrap in quotes) // <{x}> — blockify (wrap in {|| }) // # — dumb stringify // <.x.> — logify (.T. if matched, .F. if not) // // Reference: /mnt/d/harbour-core/src/pp/ppcore.c package pp import ( "strings" ) // Rule represents a single #command or #translate rule. type Rule struct { Pattern string // raw pattern text Result string // raw result text IsCommand bool // #command vs #translate CaseSens bool // #xcommand/#xtranslate = case sensitive Keyword string // first keyword (for fast matching) Markers []Marker // parsed pattern markers ResultTmpl string // result template with marker references } // Marker represents a pattern marker like , , , <*x*>. type Marker struct { Name string // marker name Type MarkerType ListValues []string // for — allowed values } type MarkerType int const ( MarkerRegular MarkerType = iota // — any expression MarkerRestricted // — identifier only MarkerList // — comma-separated list MarkerWild // <*x*> — rest of line MarkerWordList // — one of listed words ) // ParseRule parses a #command/#translate directive into a Rule. func ParseRule(directive string, isCommand, caseSens bool) *Rule { // Split on => parts := strings.SplitN(directive, "=>", 2) if len(parts) != 2 { return nil } pattern := strings.TrimSpace(parts[0]) result := strings.TrimSpace(parts[1]) // Earlier versions stripped every ` ;` as Harbour line-continuation. // That also destroyed in-line PRG statement separators — `IF x == // NIL ; x := y ; ENDIF` lost all its semicolons. Line-continuation // joining is the preprocessor's job (processLines), not this rule // parser's. Keep the semicolons as-is. rule := &Rule{ Pattern: pattern, Result: result, IsCommand: isCommand, CaseSens: caseSens, ResultTmpl: result, } // Extract first keyword for fast matching. The first whitespace- // delimited token of the pattern becomes the dispatch key; we // strip marker wrappers and any trailing `(` so a pattern like // `MAKE_TEST( , )` hashes on `MAKE_TEST`, matching how // firstToken normalises source lines. words := strings.Fields(pattern) if len(words) > 0 { kw := words[0] kw = strings.TrimLeft(kw, "<[") kw = strings.TrimRight(kw, ">]") if idx := strings.IndexByte(kw, '('); idx >= 0 { kw = kw[:idx] } if !strings.ContainsAny(kw, "!*,:") { rule.Keyword = kw } } // Parse markers from pattern rule.Markers = parseMarkers(pattern) return rule } // parseMarkers extracts all <...> markers from a pattern. func parseMarkers(pattern string) []Marker { var markers []Marker i := 0 for i < len(pattern) { if pattern[i] == '<' { end := strings.IndexByte(pattern[i:], '>') if end < 0 { break } inner := pattern[i+1 : i+end] m := parseOneMarker(inner) if m.Name != "" { markers = append(markers, m) } i += end + 1 } else { i++ } } return markers } func parseOneMarker(inner string) Marker { inner = strings.TrimSpace(inner) // — restricted if strings.HasPrefix(inner, "!") && strings.HasSuffix(inner, "!") { return Marker{Name: inner[1 : len(inner)-1], Type: MarkerRestricted} } // <*name*> — wild if strings.HasPrefix(inner, "*") && strings.HasSuffix(inner, "*") { return Marker{Name: inner[1 : len(inner)-1], Type: MarkerWild} } // — comma list if strings.HasSuffix(inner, ",...") { return Marker{Name: inner[:len(inner)-4], Type: MarkerList} } // — word list if idx := strings.IndexByte(inner, ':'); idx > 0 { name := inner[:idx] vals := strings.Split(inner[idx+1:], ",") for i := range vals { vals[i] = strings.TrimSpace(vals[i]) } return Marker{Name: name, Type: MarkerWordList, ListValues: vals} } // — regular return Marker{Name: inner, Type: MarkerRegular} } // --- Rule matching and application --- // MatchLine checks if a source line matches this rule and returns the substituted result. // Returns ("", false) if no match. func (r *Rule) MatchLine(line string) (string, bool) { trimmed := strings.TrimSpace(line) if trimmed == "" { return "", false } // Fast keyword check if r.Keyword != "" { firstWord := firstToken(trimmed) if r.CaseSens { if firstWord != r.Keyword { return "", false } } else { if !strings.EqualFold(firstWord, r.Keyword) { return "", false } } } // Try to match pattern against line captures := r.matchPattern(trimmed) if captures == nil { return "", false } // Apply result template result := r.applyResult(captures) return result, true } // matchPattern attempts to match the pattern against a line. // Returns captured values map, or nil if no match. func (r *Rule) matchPattern(line string) map[string]string { captures := make(map[string]string) patternWords := tokenizePattern(r.Pattern) lineWords := tokenizeLine(line) pi, li := 0, 0 for pi < len(patternWords) && li < len(lineWords) { pw := patternWords[pi] // Marker? if strings.HasPrefix(pw, "<") && strings.HasSuffix(pw, ">") { inner := pw[1 : len(pw)-1] m := parseOneMarker(inner) switch m.Type { case MarkerWild: // Capture rest of line rest := strings.Join(lineWords[li:], " ") captures[m.Name] = rest li = len(lineWords) pi++ case MarkerList: // Capture a comma-separated list until the next literal // pattern token. Paren-balanced so nested `(`/`[`/`{` // don't let an inner `)` terminate the capture. Commas // at the top level are preserved verbatim in the // captured string so the `` substitution in the // result template reproduces the argument list as-is. var parts []string depth := 0 delim := "" if pi+1 < len(patternWords) { delim = patternWords[pi+1] } for li < len(lineWords) { w := lineWords[li] if depth == 0 && delim != "" && matchWord(w, delim, r.CaseSens) { break } switch w { case "(", "[", "{": depth++ case ")", "]", "}": if depth > 0 { depth-- } } parts = append(parts, w) li++ } captures[m.Name] = strings.Join(parts, " ") pi++ case MarkerWordList: // Match one of listed words matched := false for _, allowed := range m.ListValues { if r.CaseSens { if lineWords[li] == allowed { matched = true break } } else if strings.EqualFold(lineWords[li], allowed) { matched = true break } } if !matched { return nil } captures[m.Name] = lineWords[li] li++ pi++ default: // Regular or restricted: capture one token or expression captured := captureExpression(lineWords, &li, patternWords, pi+1, r.CaseSens) captures[m.Name] = captured pi++ } } else if pw == "[" { // Optional, possibly-repeating sub-pattern. Try matching the // bracketed body repeatedly against the remaining line; each // successful iteration appends its marker captures under the // same name with a \x01 separator. Used by Harbour forms // like `DEFAULT TO [, TO ]` where the // trailing bracket repeats for each additional pair. depth := 1 bodyStart := pi + 1 bodyEnd := bodyStart for bodyEnd < len(patternWords) && depth > 0 { if patternWords[bodyEnd] == "[" { depth++ } else if patternWords[bodyEnd] == "]" { depth-- if depth == 0 { break } } bodyEnd++ } body := patternWords[bodyStart:bodyEnd] for li < len(lineWords) { snapshotLi := li iterCaps, newLi, ok := matchSegment(body, lineWords, li, r.CaseSens) if !ok { li = snapshotLi break } for k, v := range iterCaps { if prev, hit := captures[k]; hit && prev != "" { captures[k] = prev + "\x01" + v } else { captures[k] = v } } li = newLi if li == snapshotLi { break // no progress — avoid infinite loop } } pi = bodyEnd + 1 // past ] } else if pw == "]" { pi++ } else { // Literal keyword — must match if !matchWord(lineWords[li], pw, r.CaseSens) { return nil } li++ pi++ } } // Skip remaining optional markers in pattern for pi < len(patternWords) { pw := patternWords[pi] if pw == "[" || pw == "]" || (strings.HasPrefix(pw, "<") && strings.HasSuffix(pw, ">")) { pi++ } else { break } } // For #command with no markers and no optional clauses: // all line tokens must be consumed for a match if r.IsCommand && li < len(lineWords) && len(r.Markers) == 0 && !strings.Contains(r.Pattern, "[") { return nil } return captures } // matchSegment tries to match a bracketed sub-pattern against a slice // of the line tokens starting at startLi. Returns per-iteration // captures and the new line position on success. The segment cannot // contain nested `[...]` — callers of the optional-repeat logic // flatten one level at a time. // // A "mini-matcher" that mirrors the main loop for MarkerRegular and // literal keywords. MarkerList and MarkerWild inside `[...]` would // need additional plumbing; defer those until real patterns need them. func matchSegment(segment, lineWords []string, startLi int, caseSens bool) (map[string]string, int, bool) { caps := make(map[string]string) li := startLi // When the segment starts with a literal (e.g. `,` in // `[, TO ]`), treat that literal as the natural boundary // between iterations. Used as the delimiter for a trailing marker // that would otherwise gobble the rest of the line. repeatBoundary := "" if len(segment) > 0 && !strings.HasPrefix(segment[0], "<") && segment[0] != "[" && segment[0] != "]" { repeatBoundary = segment[0] } for pi := 0; pi < len(segment); pi++ { pw := segment[pi] if li >= len(lineWords) { return nil, startLi, false } if strings.HasPrefix(pw, "<") && strings.HasSuffix(pw, ">") { inner := pw[1 : len(pw)-1] m := parseOneMarker(inner) if m.Type != MarkerRegular && m.Type != MarkerRestricted { return nil, startLi, false } // Build a pseudo-pattern tail so captureExpression picks the // right delimiter. If there's a next literal inside `segment`, // use it; otherwise fall back to the repeat boundary so the // capture stops before the next iteration starts. tail := segment[pi+1:] if !hasLiteralAfter(tail) && repeatBoundary != "" { tail = []string{repeatBoundary} } captured := captureExpression(lineWords, &li, tail, 0, caseSens) caps[m.Name] = captured continue } if !matchWord(lineWords[li], pw, caseSens) { return nil, startLi, false } li++ } return caps, li, true } // hasLiteralAfter reports whether a pattern slice contains any literal // keyword token (non-marker, non-bracket) — used to decide whether a // marker's capture has a real delimiter or needs a synthetic one. func hasLiteralAfter(segment []string) bool { for _, pw := range segment { if pw == "[" || pw == "]" || pw == "" { continue } if strings.HasPrefix(pw, "<") && strings.HasSuffix(pw, ">") { continue } return true } return false } // ppQuote wraps a captured value in a PRG string literal, picking a // delimiter that doesn't collide with characters already inside. Harbour // # stringify takes the raw source text of the argument and must // produce a legal PRG string — if the capture is `"world"`, the result // can't just be `""world""`. Preference order matches Harbour: // double-quotes first, then single-quotes, then bracket literals. func ppQuote(val string) string { if !strings.ContainsRune(val, '"') { return `"` + val + `"` } if !strings.ContainsRune(val, '\'') { return "'" + val + "'" } if !strings.ContainsRune(val, '[') && !strings.ContainsRune(val, ']') { return "[" + val + "]" } // Fallback: double-quote with embedded quotes dropped. Pathological // input only; Harbour itself refuses to handle this cleanly. return `"` + strings.ReplaceAll(val, `"`, "") + `"` } // applyResult substitutes captured values into the result template. // Order matters — the compound forms (`#`, `<(z)>`, `<.z.>`, `<"z">`) // all contain the bare `` token, so the bare substitution has to run // LAST. Previously `` was replaced first and left a stray `#` / `(` / // `.` / `"` behind, producing bogus lines like `? #hello` that the // lexer then choked on with ILLEGAL token errors. func (r *Rule) applyResult(captures map[string]string) string { result := r.ResultTmpl // Expand optional-repeat `[ ... ]` segments in the template. If any // marker inside a bracketed section was multi-captured during the // pattern match (values joined with \x01), emit the body once per // iteration with per-iter values. If no markers inside are multi- // captured, the bracket body is included once with whatever single // captures apply (the required-or-absent case). result = expandOptionalRepeat(result, captures) for name, val := range captures { // Multi-capture markers are consumed by expandOptionalRepeat; // the bare substitution for the joined form would produce // garbage (values separated by \x01). Skip them here and let // any remaining bare `` fall through to the cleanup. if strings.ContainsRune(val, '\x01') { continue } quoted := ppQuote(val) // # — dumb stringify (always quote). result = strings.ReplaceAll(result, "#<"+name+">", quoted) // <"name"> — explicit stringify. result = strings.ReplaceAll(result, `<"`+name+`">`, quoted) // <(name)> — smart stringify: already a string literal → keep; // otherwise quote. `val` comes straight from the capture, so // trim and check for surrounding quotes. trim := strings.TrimSpace(val) smart := quoted if n := len(trim); n >= 2 && ((trim[0] == '"' && trim[n-1] == '"') || (trim[0] == '\'' && trim[n-1] == '\'') || (trim[0] == '[' && trim[n-1] == ']')) { smart = trim } result = strings.ReplaceAll(result, "<("+name+")>", smart) // <.name.> — logify (empty → .F., else .T.) if val != "" { result = strings.ReplaceAll(result, "<."+name+".>", ".T.") } else { result = strings.ReplaceAll(result, "<."+name+".>", ".F.") } // — bare substitution (must be LAST, after all wrappers). result = strings.ReplaceAll(result, "<"+name+">", val) } // Clean up unreferenced markers: , <(name)>, <.name.>, #, <"name"> result = cleanUnreferencedMarkers(result) return result } // expandOptionalRepeat walks a result template and rewrites each top- // level `[ ... ]` block by examining the captures referenced inside: // // - If any referenced marker has multiple captured iterations // (values joined with \x01), emit the body N times, substituting // the i-th iteration's value for each such marker and dropping // single-valued markers into each iteration unchanged. // - If no referenced marker is multi-captured BUT the single // captures include non-empty values, emit the body once. // - Otherwise drop the block. // // Nested brackets are not supported — Harbour uses a single level of // `[...]` for the common repeat form. Callers that need deeper nesting // can fall back to writing out separate #xcommand rules. func expandOptionalRepeat(template string, captures map[string]string) string { var out strings.Builder i := 0 for i < len(template) { if template[i] == '[' { // Find matching top-level ']'. Skip over quoted strings // and nested brackets inside PP markers like `<.x.>`. depth := 1 j := i + 1 for j < len(template) && depth > 0 { switch template[j] { case '[': // Inside a marker `<...>` the `[` is just text; // only count top-level brackets. if inMarker(template, j) { j++ continue } depth++ case ']': if inMarker(template, j) { j++ continue } depth-- if depth == 0 { body := template[i+1 : j] out.WriteString(expandBracketBody(body, captures)) i = j + 1 goto next } } j++ } // Unmatched [ — copy literally. out.WriteByte(template[i]) i++ next: continue } out.WriteByte(template[i]) i++ } return out.String() } // inMarker reports whether position `p` in s is inside a PP marker // reference like `<.x.>` / `<"x">` / `<(x)>` — where `[` and `]` are // ordinary text, not template delimiters. func inMarker(s string, p int) bool { // Look backward for `<` not preceded by a marker-terminator. for k := p - 1; k >= 0; k-- { c := s[k] if c == '>' { return false } if c == '<' { // Scan forward from `<` to see if we're still inside. for m := k + 1; m < len(s) && m <= p; m++ { if s[m] == '>' { return false } } return true } } return false } // expandBracketBody returns the optional-repeat body expanded once per // iteration of its multi-captured markers. See expandOptionalRepeat. func expandBracketBody(body string, captures map[string]string) string { // Find marker names referenced inside the body. refs := referencedMarkers(body) iters := 1 hasMulti := false for _, name := range refs { if val, ok := captures[name]; ok && strings.ContainsRune(val, '\x01') { n := strings.Count(val, "\x01") + 1 if n > iters { iters = n } hasMulti = true } } if !hasMulti { // No multi-capture — include body once if any referenced marker // has a (single) capture; otherwise drop. anyPresent := false for _, name := range refs { if _, ok := captures[name]; ok { anyPresent = true break } } if !anyPresent { return "" } return body } // Pre-split each multi-captured referent into a per-iteration list. parts := make(map[string][]string, len(refs)) for _, name := range refs { if val, ok := captures[name]; ok { parts[name] = strings.Split(val, "\x01") } } var out strings.Builder for iter := 0; iter < iters; iter++ { piece := body for name, vals := range parts { var v string if iter < len(vals) { v = vals[iter] } quoted := ppQuote(v) piece = strings.ReplaceAll(piece, "#<"+name+">", quoted) piece = strings.ReplaceAll(piece, `<"`+name+`">`, quoted) piece = strings.ReplaceAll(piece, "<("+name+")>", quoted) if v != "" { piece = strings.ReplaceAll(piece, "<."+name+".>", ".T.") } else { piece = strings.ReplaceAll(piece, "<."+name+".>", ".F.") } piece = strings.ReplaceAll(piece, "<"+name+">", v) } out.WriteString(piece) } return out.String() } // referencedMarkers extracts marker names referenced inside a template // fragment. Handles ``, `<(name)>`, `<.name.>`, `<"name">`, and // `#` forms. func referencedMarkers(s string) []string { seen := map[string]bool{} var out []string i := 0 for i < len(s) { if s[i] == '<' { j := i + 1 // Skip leading punctuation forms: (name), .name., "name". for j < len(s) && (s[j] == '(' || s[j] == '.' || s[j] == '"') { j++ } start := j for j < len(s) && (s[j] == '_' || (s[j] >= 'a' && s[j] <= 'z') || (s[j] >= 'A' && s[j] <= 'Z') || (s[j] >= '0' && s[j] <= '9')) { j++ } if j > start { name := s[start:j] if !seen[name] { seen[name] = true out = append(out, name) } } i = j continue } i++ } return out } // cleanUnreferencedMarkers removes any remaining , <(name)>, <.name.>, # references. // Only removes well-formed PP marker references, not comparison operators. func cleanUnreferencedMarkers(s string) string { // Match patterns like , <(identifier)>, <.identifier.>, # var out strings.Builder i := 0 for i < len(s) { removed := false // # if s[i] == '#' && i+1 < len(s) && s[i+1] == '<' { if end := findMarkerEnd(s, i+1); end > 0 { i = end removed = true } } // , <(name)>, <.name.>, <"name"> if !removed && s[i] == '<' { if end := findMarkerEnd(s, i); end > 0 { i = end removed = true } } if !removed { out.WriteByte(s[i]) i++ } } return out.String() } // findMarkerEnd checks if s[start] begins a PP marker and returns end position, or 0. func findMarkerEnd(s string, start int) int { if start >= len(s) || s[start] != '<' { return 0 } i := start + 1 // Skip optional ( or . prefix if i < len(s) && (s[i] == '(' || s[i] == '.' || s[i] == '"') { i++ } // Must start with letter or underscore (identifier) if i >= len(s) || !(s[i] >= 'a' && s[i] <= 'z' || s[i] >= 'A' && s[i] <= 'Z' || s[i] == '_') { return 0 } // Consume identifier for i < len(s) && (s[i] >= 'a' && s[i] <= 'z' || s[i] >= 'A' && s[i] <= 'Z' || s[i] >= '0' && s[i] <= '9' || s[i] == '_') { i++ } // Skip optional ) or . or " or ,... suffix for i < len(s) && (s[i] == ')' || s[i] == '.' || s[i] == '"' || s[i] == ',' || s[i] == ' ') { i++ } if i < len(s) && s[i] == '>' { return i + 1 } return 0 } // --- Helpers --- func firstToken(s string) string { for i, c := range s { if c == ' ' || c == '\t' || c == '(' { return s[:i] } } return s } func matchWord(lineWord, patternWord string, caseSens bool) bool { if caseSens { return lineWord == patternWord } return strings.EqualFold(lineWord, patternWord) } // tokenizePattern splits a pattern into words, keeping markers as single tokens. // Parens and commas are emitted as their own tokens so `DUMB()` and // `DUMB( )` tokenise identically — matching what tokenizeLine does // on call sites. Without this, `_DUMB_(a)` (no space) stored as a // single word would never align with the pattern's `DUMB( , , )` // tokens. func tokenizePattern(pattern string) []string { var tokens []string i := 0 for i < len(pattern) { for i < len(pattern) && (pattern[i] == ' ' || pattern[i] == '\t') { i++ } if i >= len(pattern) { break } if pattern[i] == '<' { end := strings.IndexByte(pattern[i:], '>') if end >= 0 { tokens = append(tokens, pattern[i:i+end+1]) i += end + 1 continue } } switch pattern[i] { case '[', ']', '(', ')', ',': tokens = append(tokens, string(pattern[i])) i++ continue } // Regular word — stop at space/tab/marker/bracket/paren/comma. start := i for i < len(pattern) { c := pattern[i] if c == ' ' || c == '\t' || c == '<' || c == '[' || c == ']' || c == '(' || c == ')' || c == ',' { break } i++ } if i > start { tokens = append(tokens, pattern[start:i]) } } return tokens } // tokenizeLine splits a source line into words matching the rules used // by tokenizePattern: string literals stay intact, commas/parens/brackets // emit as standalone tokens so a call like `DUMB(hello)` tokenises as // `DUMB`, `(`, `hello`, `)` — aligning with the pattern side. func tokenizeLine(line string) []string { var tokens []string i := 0 for i < len(line) { for i < len(line) && (line[i] == ' ' || line[i] == '\t') { i++ } if i >= len(line) { break } // String literal if line[i] == '"' || line[i] == '\'' { quote := line[i] start := i i++ for i < len(line) && line[i] != quote { i++ } if i < len(line) { i++ } tokens = append(tokens, line[start:i]) continue } switch line[i] { case ',', '(', ')', '[', ']': tokens = append(tokens, string(line[i])) i++ continue } // Word — stop at whitespace, brackets, parens, comma, quotes. start := i for i < len(line) { c := line[i] if c == ' ' || c == '\t' || c == ',' || c == '(' || c == ')' || c == '[' || c == ']' || c == '"' || c == '\'' { break } i++ } if i > start { tokens = append(tokens, line[start:i]) } } return tokens } // captureExpression captures an expression from line tokens. // If this is the last marker in the pattern, captures all remaining tokens. // Otherwise, captures until the next keyword in the pattern. func captureExpression(lineWords []string, li *int, patternWords []string, nextPi int, caseSens bool) string { if *li >= len(lineWords) { return "" } // Find next literal keyword in pattern to use as delimiter delimWord := "" for pi := nextPi; pi < len(patternWords); pi++ { pw := patternWords[pi] if !strings.HasPrefix(pw, "<") && pw != "[" && pw != "]" { delimWord = pw break } } if delimWord != "" { // Capture until the delimiter, paren-balancing so nested // parens/brackets/braces inside the expression don't falsely // terminate the capture. Harbour's own PP does the same — // `_REGULAR_(&(a))` must capture `&(a)` (incl. inner parens) // and leave the outer `)` for the pattern's own delimiter. var parts []string depth := 0 for *li < len(lineWords) { w := lineWords[*li] if depth == 0 && matchWord(w, delimWord, caseSens) { break } switch w { case "(", "[", "{": depth++ case ")", "]", "}": if depth > 0 { depth-- } } parts = append(parts, w) *li++ } return strings.Join(parts, " ") } // No delimiter: if last marker, capture all remaining tokens if nextPi >= len(patternWords) { rest := strings.Join(lineWords[*li:], " ") *li = len(lineWords) return rest } // Single token capture (between markers) tok := lineWords[*li] *li++ return tok }