diff --git a/compiler/pp/command.go b/compiler/pp/command.go index 745aa46..d8665b9 100644 --- a/compiler/pp/command.go +++ b/compiler/pp/command.go @@ -70,8 +70,11 @@ func ParseRule(directive string, isCommand, caseSens bool) *Rule { pattern := strings.TrimSpace(parts[0]) result := strings.TrimSpace(parts[1]) - // Handle line continuation (;) - result = strings.ReplaceAll(result, " ;", "") + // Earlier versions stripped every ` ;` as Harbour line-continuation. + // That also destroyed in-line PRG statement separators — `IF x == + // NIL ; x := y ; ENDIF` lost all its semicolons. Line-continuation + // joining is the preprocessor's job (processLines), not this rule + // parser's. Keep the semicolons as-is. rule := &Rule{ Pattern: pattern, @@ -280,17 +283,47 @@ func (r *Rule) matchPattern(line string) map[string]string { pi++ } } else if pw == "[" { - // Optional clause — skip to matching ] + // Optional, possibly-repeating sub-pattern. Try matching the + // bracketed body repeatedly against the remaining line; each + // successful iteration appends its marker captures under the + // same name with a \x01 separator. Used by Harbour forms + // like `DEFAULT TO [, TO ]` where the + // trailing bracket repeats for each additional pair. depth := 1 - pi++ - for pi < len(patternWords) && depth > 0 { - if patternWords[pi] == "[" { + bodyStart := pi + 1 + bodyEnd := bodyStart + for bodyEnd < len(patternWords) && depth > 0 { + if patternWords[bodyEnd] == "[" { depth++ - } else if patternWords[pi] == "]" { + } else if patternWords[bodyEnd] == "]" { depth-- + if depth == 0 { + break + } } - pi++ + bodyEnd++ } + body := patternWords[bodyStart:bodyEnd] + for li < len(lineWords) { + snapshotLi := li + iterCaps, newLi, ok := matchSegment(body, lineWords, li, r.CaseSens) + if !ok { + li = snapshotLi + break + } + for k, v := range iterCaps { + if prev, hit := captures[k]; hit && prev != "" { + captures[k] = prev + "\x01" + v + } else { + captures[k] = v + } + } + li = newLi + if li == snapshotLi { + break // no progress — avoid infinite loop + } + } + pi = bodyEnd + 1 // past ] } else if pw == "]" { pi++ } else { @@ -323,6 +356,76 @@ func (r *Rule) matchPattern(line string) map[string]string { return captures } +// matchSegment tries to match a bracketed sub-pattern against a slice +// of the line tokens starting at startLi. Returns per-iteration +// captures and the new line position on success. The segment cannot +// contain nested `[...]` — callers of the optional-repeat logic +// flatten one level at a time. +// +// A "mini-matcher" that mirrors the main loop for MarkerRegular and +// literal keywords. MarkerList and MarkerWild inside `[...]` would +// need additional plumbing; defer those until real patterns need them. +func matchSegment(segment, lineWords []string, startLi int, caseSens bool) (map[string]string, int, bool) { + caps := make(map[string]string) + li := startLi + + // When the segment starts with a literal (e.g. `,` in + // `[, TO ]`), treat that literal as the natural boundary + // between iterations. Used as the delimiter for a trailing marker + // that would otherwise gobble the rest of the line. + repeatBoundary := "" + if len(segment) > 0 && !strings.HasPrefix(segment[0], "<") && + segment[0] != "[" && segment[0] != "]" { + repeatBoundary = segment[0] + } + + for pi := 0; pi < len(segment); pi++ { + pw := segment[pi] + if li >= len(lineWords) { + return nil, startLi, false + } + if strings.HasPrefix(pw, "<") && strings.HasSuffix(pw, ">") { + inner := pw[1 : len(pw)-1] + m := parseOneMarker(inner) + if m.Type != MarkerRegular && m.Type != MarkerRestricted { + return nil, startLi, false + } + // Build a pseudo-pattern tail so captureExpression picks the + // right delimiter. If there's a next literal inside `segment`, + // use it; otherwise fall back to the repeat boundary so the + // capture stops before the next iteration starts. + tail := segment[pi+1:] + if !hasLiteralAfter(tail) && repeatBoundary != "" { + tail = []string{repeatBoundary} + } + captured := captureExpression(lineWords, &li, tail, 0, caseSens) + caps[m.Name] = captured + continue + } + if !matchWord(lineWords[li], pw, caseSens) { + return nil, startLi, false + } + li++ + } + return caps, li, true +} + +// hasLiteralAfter reports whether a pattern slice contains any literal +// keyword token (non-marker, non-bracket) — used to decide whether a +// marker's capture has a real delimiter or needs a synthetic one. +func hasLiteralAfter(segment []string) bool { + for _, pw := range segment { + if pw == "[" || pw == "]" || pw == "" { + continue + } + if strings.HasPrefix(pw, "<") && strings.HasSuffix(pw, ">") { + continue + } + return true + } + return false +} + // ppQuote wraps a captured value in a PRG string literal, picking a // delimiter that doesn't collide with characters already inside. Harbour // # stringify takes the raw source text of the argument and must @@ -353,7 +456,22 @@ func ppQuote(val string) string { func (r *Rule) applyResult(captures map[string]string) string { result := r.ResultTmpl + // Expand optional-repeat `[ ... ]` segments in the template. If any + // marker inside a bracketed section was multi-captured during the + // pattern match (values joined with \x01), emit the body once per + // iteration with per-iter values. If no markers inside are multi- + // captured, the bracket body is included once with whatever single + // captures apply (the required-or-absent case). + result = expandOptionalRepeat(result, captures) + for name, val := range captures { + // Multi-capture markers are consumed by expandOptionalRepeat; + // the bare substitution for the joined form would produce + // garbage (values separated by \x01). Skip them here and let + // any remaining bare `` fall through to the cleanup. + if strings.ContainsRune(val, '\x01') { + continue + } quoted := ppQuote(val) // # — dumb stringify (always quote). result = strings.ReplaceAll(result, "#<"+name+">", quoted) @@ -387,6 +505,187 @@ func (r *Rule) applyResult(captures map[string]string) string { return result } +// expandOptionalRepeat walks a result template and rewrites each top- +// level `[ ... ]` block by examining the captures referenced inside: +// +// - If any referenced marker has multiple captured iterations +// (values joined with \x01), emit the body N times, substituting +// the i-th iteration's value for each such marker and dropping +// single-valued markers into each iteration unchanged. +// - If no referenced marker is multi-captured BUT the single +// captures include non-empty values, emit the body once. +// - Otherwise drop the block. +// +// Nested brackets are not supported — Harbour uses a single level of +// `[...]` for the common repeat form. Callers that need deeper nesting +// can fall back to writing out separate #xcommand rules. +func expandOptionalRepeat(template string, captures map[string]string) string { + var out strings.Builder + i := 0 + for i < len(template) { + if template[i] == '[' { + // Find matching top-level ']'. Skip over quoted strings + // and nested brackets inside PP markers like `<.x.>`. + depth := 1 + j := i + 1 + for j < len(template) && depth > 0 { + switch template[j] { + case '[': + // Inside a marker `<...>` the `[` is just text; + // only count top-level brackets. + if inMarker(template, j) { + j++ + continue + } + depth++ + case ']': + if inMarker(template, j) { + j++ + continue + } + depth-- + if depth == 0 { + body := template[i+1 : j] + out.WriteString(expandBracketBody(body, captures)) + i = j + 1 + goto next + } + } + j++ + } + // Unmatched [ — copy literally. + out.WriteByte(template[i]) + i++ + next: + continue + } + out.WriteByte(template[i]) + i++ + } + return out.String() +} + +// inMarker reports whether position `p` in s is inside a PP marker +// reference like `<.x.>` / `<"x">` / `<(x)>` — where `[` and `]` are +// ordinary text, not template delimiters. +func inMarker(s string, p int) bool { + // Look backward for `<` not preceded by a marker-terminator. + for k := p - 1; k >= 0; k-- { + c := s[k] + if c == '>' { + return false + } + if c == '<' { + // Scan forward from `<` to see if we're still inside. + for m := k + 1; m < len(s) && m <= p; m++ { + if s[m] == '>' { + return false + } + } + return true + } + } + return false +} + +// expandBracketBody returns the optional-repeat body expanded once per +// iteration of its multi-captured markers. See expandOptionalRepeat. +func expandBracketBody(body string, captures map[string]string) string { + // Find marker names referenced inside the body. + refs := referencedMarkers(body) + iters := 1 + hasMulti := false + for _, name := range refs { + if val, ok := captures[name]; ok && strings.ContainsRune(val, '\x01') { + n := strings.Count(val, "\x01") + 1 + if n > iters { + iters = n + } + hasMulti = true + } + } + if !hasMulti { + // No multi-capture — include body once if any referenced marker + // has a (single) capture; otherwise drop. + anyPresent := false + for _, name := range refs { + if _, ok := captures[name]; ok { + anyPresent = true + break + } + } + if !anyPresent { + return "" + } + return body + } + + // Pre-split each multi-captured referent into a per-iteration list. + parts := make(map[string][]string, len(refs)) + for _, name := range refs { + if val, ok := captures[name]; ok { + parts[name] = strings.Split(val, "\x01") + } + } + + var out strings.Builder + for iter := 0; iter < iters; iter++ { + piece := body + for name, vals := range parts { + var v string + if iter < len(vals) { + v = vals[iter] + } + quoted := ppQuote(v) + piece = strings.ReplaceAll(piece, "#<"+name+">", quoted) + piece = strings.ReplaceAll(piece, `<"`+name+`">`, quoted) + piece = strings.ReplaceAll(piece, "<("+name+")>", quoted) + if v != "" { + piece = strings.ReplaceAll(piece, "<."+name+".>", ".T.") + } else { + piece = strings.ReplaceAll(piece, "<."+name+".>", ".F.") + } + piece = strings.ReplaceAll(piece, "<"+name+">", v) + } + out.WriteString(piece) + } + return out.String() +} + +// referencedMarkers extracts marker names referenced inside a template +// fragment. Handles ``, `<(name)>`, `<.name.>`, `<"name">`, and +// `#` forms. +func referencedMarkers(s string) []string { + seen := map[string]bool{} + var out []string + i := 0 + for i < len(s) { + if s[i] == '<' { + j := i + 1 + // Skip leading punctuation forms: (name), .name., "name". + for j < len(s) && (s[j] == '(' || s[j] == '.' || s[j] == '"') { + j++ + } + start := j + for j < len(s) && (s[j] == '_' || (s[j] >= 'a' && s[j] <= 'z') || + (s[j] >= 'A' && s[j] <= 'Z') || (s[j] >= '0' && s[j] <= '9')) { + j++ + } + if j > start { + name := s[start:j] + if !seen[name] { + seen[name] = true + out = append(out, name) + } + } + i = j + continue + } + i++ + } + return out +} + // cleanUnreferencedMarkers removes any remaining , <(name)>, <.name.>, # references. // Only removes well-formed PP marker references, not comparison operators. func cleanUnreferencedMarkers(s string) string { diff --git a/include/common.ch b/include/common.ch index 22a2d61..9fe6706 100644 --- a/include/common.ch +++ b/include/common.ch @@ -5,14 +5,11 @@ * ISNUMBER / ISCHARACTER / ISLOGICAL / ISDATE / ISBLOCK / ISMEMO / * ISOBJECT. Five registers those as direct RTL symbols in * hbrtl/register.go (each points at the same Go function as its - * HB_IS* twin), so they work without any preprocessor translation - * and without this include. + * HB_IS* twin), so they work without any preprocessor translation. * - * This header stays as a stub so `#include "common.ch"` in ported - * Harbour code doesn't error. The DEFAULT / UPDATE #xcommand forms - * from Harbour's common.ch are not yet supported — use explicit - * `IF xVar == NIL ; xVar := default ; ENDIF` until the preprocessor's - * #xcommand marker handling is extended. + * The DEFAULT / UPDATE #xcommand forms use Five's optional-repeat + * PP support — `DEFAULT a TO 1, b TO 2, c TO 3` expands into three + * `IF x == NIL ; x := v ; ENDIF` statements. */ #ifndef HB_COMMON_CH_ @@ -23,4 +20,8 @@ #define YES .T. #define NO .F. +#xcommand DEFAULT TO [, TO ] => IF == NIL ; := ; ENDIF [; IF == NIL ; := ; ENDIF ] + +#command UPDATE IF TO => IF ; := ; ENDIF + #endif