feat(pp): optional-repeat [...] blocks — DEFAULT / UPDATE from common.ch
Harbour's `#xcommand DEFAULT <v1> TO <x1> [, <vn> TO <xn>] => ...` uses an optional, repeatable trailing `[...]` block to accept any number of `var TO default` pairs on a single line. Five's PP skipped bracket bodies during pattern matching and treated them as no-ops in result templates, so DEFAULT a TO 10, b TO 20, c TO 30 expanded (at best) the first pair and dropped the rest — and common.ch itself was documented as "not yet supported". Three concrete changes: 1. matchPattern now matches the `[...]` body repeatedly against remaining line tokens via a new matchSegment helper. Each successful iteration appends captures for the interior markers under the same name, joined with a \x01 sentinel. 2. matchSegment, when capturing the last marker in a body with no following literal, uses the body's opening literal (e.g. the `,` in `[, <vn> TO <xn>]`) as the iteration boundary. Otherwise captureExpression would greedily eat the rest of the line and collapse every remaining pair into one capture. 3. applyResult's new expandOptionalRepeat walks the result template for top-level `[...]` blocks. When a referenced marker is multi- captured it emits the body N times (substituting per-iter value); when it's single-captured it emits the body once; otherwise drops the block. A separate referencedMarkers scanner and an inMarker guard keep literal `[` / `]` inside PP markers (like `<.x.>`) from being mistaken for bracket delimiters. Side fix: ParseRule previously stripped every ` ;` as a Harbour line-continuation marker, but that also destroyed in-line PRG statement separators in result templates. Line joining is the preprocessor's job upstream — keep semicolons intact here. common.ch now ships real DEFAULT and UPDATE #xcommands. Verified 1-, 2-, and 3-pair DEFAULT expansion plus `common.ch` inclusion from user code. FiveSql2 43/43, Harbour compat 56/56, Go test ALL PASS. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -70,8 +70,11 @@ func ParseRule(directive string, isCommand, caseSens bool) *Rule {
|
|||||||
pattern := strings.TrimSpace(parts[0])
|
pattern := strings.TrimSpace(parts[0])
|
||||||
result := strings.TrimSpace(parts[1])
|
result := strings.TrimSpace(parts[1])
|
||||||
|
|
||||||
// Handle line continuation (;)
|
// Earlier versions stripped every ` ;` as Harbour line-continuation.
|
||||||
result = strings.ReplaceAll(result, " ;", "")
|
// That also destroyed in-line PRG statement separators — `IF x ==
|
||||||
|
// NIL ; x := y ; ENDIF` lost all its semicolons. Line-continuation
|
||||||
|
// joining is the preprocessor's job (processLines), not this rule
|
||||||
|
// parser's. Keep the semicolons as-is.
|
||||||
|
|
||||||
rule := &Rule{
|
rule := &Rule{
|
||||||
Pattern: pattern,
|
Pattern: pattern,
|
||||||
@@ -280,17 +283,47 @@ func (r *Rule) matchPattern(line string) map[string]string {
|
|||||||
pi++
|
pi++
|
||||||
}
|
}
|
||||||
} else if pw == "[" {
|
} else if pw == "[" {
|
||||||
// Optional clause — skip to matching ]
|
// Optional, possibly-repeating sub-pattern. Try matching the
|
||||||
|
// bracketed body repeatedly against the remaining line; each
|
||||||
|
// successful iteration appends its marker captures under the
|
||||||
|
// same name with a \x01 separator. Used by Harbour forms
|
||||||
|
// like `DEFAULT <v1> TO <x1> [, <vn> TO <xn> ]` where the
|
||||||
|
// trailing bracket repeats for each additional pair.
|
||||||
depth := 1
|
depth := 1
|
||||||
pi++
|
bodyStart := pi + 1
|
||||||
for pi < len(patternWords) && depth > 0 {
|
bodyEnd := bodyStart
|
||||||
if patternWords[pi] == "[" {
|
for bodyEnd < len(patternWords) && depth > 0 {
|
||||||
|
if patternWords[bodyEnd] == "[" {
|
||||||
depth++
|
depth++
|
||||||
} else if patternWords[pi] == "]" {
|
} else if patternWords[bodyEnd] == "]" {
|
||||||
depth--
|
depth--
|
||||||
|
if depth == 0 {
|
||||||
|
break
|
||||||
|
}
|
||||||
}
|
}
|
||||||
pi++
|
bodyEnd++
|
||||||
}
|
}
|
||||||
|
body := patternWords[bodyStart:bodyEnd]
|
||||||
|
for li < len(lineWords) {
|
||||||
|
snapshotLi := li
|
||||||
|
iterCaps, newLi, ok := matchSegment(body, lineWords, li, r.CaseSens)
|
||||||
|
if !ok {
|
||||||
|
li = snapshotLi
|
||||||
|
break
|
||||||
|
}
|
||||||
|
for k, v := range iterCaps {
|
||||||
|
if prev, hit := captures[k]; hit && prev != "" {
|
||||||
|
captures[k] = prev + "\x01" + v
|
||||||
|
} else {
|
||||||
|
captures[k] = v
|
||||||
|
}
|
||||||
|
}
|
||||||
|
li = newLi
|
||||||
|
if li == snapshotLi {
|
||||||
|
break // no progress — avoid infinite loop
|
||||||
|
}
|
||||||
|
}
|
||||||
|
pi = bodyEnd + 1 // past ]
|
||||||
} else if pw == "]" {
|
} else if pw == "]" {
|
||||||
pi++
|
pi++
|
||||||
} else {
|
} else {
|
||||||
@@ -323,6 +356,76 @@ func (r *Rule) matchPattern(line string) map[string]string {
|
|||||||
return captures
|
return captures
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// matchSegment tries to match a bracketed sub-pattern against a slice
|
||||||
|
// of the line tokens starting at startLi. Returns per-iteration
|
||||||
|
// captures and the new line position on success. The segment cannot
|
||||||
|
// contain nested `[...]` — callers of the optional-repeat logic
|
||||||
|
// flatten one level at a time.
|
||||||
|
//
|
||||||
|
// A "mini-matcher" that mirrors the main loop for MarkerRegular and
|
||||||
|
// literal keywords. MarkerList and MarkerWild inside `[...]` would
|
||||||
|
// need additional plumbing; defer those until real patterns need them.
|
||||||
|
func matchSegment(segment, lineWords []string, startLi int, caseSens bool) (map[string]string, int, bool) {
|
||||||
|
caps := make(map[string]string)
|
||||||
|
li := startLi
|
||||||
|
|
||||||
|
// When the segment starts with a literal (e.g. `,` in
|
||||||
|
// `[, <vn> TO <xn>]`), treat that literal as the natural boundary
|
||||||
|
// between iterations. Used as the delimiter for a trailing marker
|
||||||
|
// that would otherwise gobble the rest of the line.
|
||||||
|
repeatBoundary := ""
|
||||||
|
if len(segment) > 0 && !strings.HasPrefix(segment[0], "<") &&
|
||||||
|
segment[0] != "[" && segment[0] != "]" {
|
||||||
|
repeatBoundary = segment[0]
|
||||||
|
}
|
||||||
|
|
||||||
|
for pi := 0; pi < len(segment); pi++ {
|
||||||
|
pw := segment[pi]
|
||||||
|
if li >= len(lineWords) {
|
||||||
|
return nil, startLi, false
|
||||||
|
}
|
||||||
|
if strings.HasPrefix(pw, "<") && strings.HasSuffix(pw, ">") {
|
||||||
|
inner := pw[1 : len(pw)-1]
|
||||||
|
m := parseOneMarker(inner)
|
||||||
|
if m.Type != MarkerRegular && m.Type != MarkerRestricted {
|
||||||
|
return nil, startLi, false
|
||||||
|
}
|
||||||
|
// Build a pseudo-pattern tail so captureExpression picks the
|
||||||
|
// right delimiter. If there's a next literal inside `segment`,
|
||||||
|
// use it; otherwise fall back to the repeat boundary so the
|
||||||
|
// capture stops before the next iteration starts.
|
||||||
|
tail := segment[pi+1:]
|
||||||
|
if !hasLiteralAfter(tail) && repeatBoundary != "" {
|
||||||
|
tail = []string{repeatBoundary}
|
||||||
|
}
|
||||||
|
captured := captureExpression(lineWords, &li, tail, 0, caseSens)
|
||||||
|
caps[m.Name] = captured
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if !matchWord(lineWords[li], pw, caseSens) {
|
||||||
|
return nil, startLi, false
|
||||||
|
}
|
||||||
|
li++
|
||||||
|
}
|
||||||
|
return caps, li, true
|
||||||
|
}
|
||||||
|
|
||||||
|
// hasLiteralAfter reports whether a pattern slice contains any literal
|
||||||
|
// keyword token (non-marker, non-bracket) — used to decide whether a
|
||||||
|
// marker's capture has a real delimiter or needs a synthetic one.
|
||||||
|
func hasLiteralAfter(segment []string) bool {
|
||||||
|
for _, pw := range segment {
|
||||||
|
if pw == "[" || pw == "]" || pw == "" {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if strings.HasPrefix(pw, "<") && strings.HasSuffix(pw, ">") {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
// ppQuote wraps a captured value in a PRG string literal, picking a
|
// ppQuote wraps a captured value in a PRG string literal, picking a
|
||||||
// delimiter that doesn't collide with characters already inside. Harbour
|
// delimiter that doesn't collide with characters already inside. Harbour
|
||||||
// #<name> stringify takes the raw source text of the argument and must
|
// #<name> stringify takes the raw source text of the argument and must
|
||||||
@@ -353,7 +456,22 @@ func ppQuote(val string) string {
|
|||||||
func (r *Rule) applyResult(captures map[string]string) string {
|
func (r *Rule) applyResult(captures map[string]string) string {
|
||||||
result := r.ResultTmpl
|
result := r.ResultTmpl
|
||||||
|
|
||||||
|
// Expand optional-repeat `[ ... ]` segments in the template. If any
|
||||||
|
// marker inside a bracketed section was multi-captured during the
|
||||||
|
// pattern match (values joined with \x01), emit the body once per
|
||||||
|
// iteration with per-iter values. If no markers inside are multi-
|
||||||
|
// captured, the bracket body is included once with whatever single
|
||||||
|
// captures apply (the required-or-absent case).
|
||||||
|
result = expandOptionalRepeat(result, captures)
|
||||||
|
|
||||||
for name, val := range captures {
|
for name, val := range captures {
|
||||||
|
// Multi-capture markers are consumed by expandOptionalRepeat;
|
||||||
|
// the bare substitution for the joined form would produce
|
||||||
|
// garbage (values separated by \x01). Skip them here and let
|
||||||
|
// any remaining bare `<name>` fall through to the cleanup.
|
||||||
|
if strings.ContainsRune(val, '\x01') {
|
||||||
|
continue
|
||||||
|
}
|
||||||
quoted := ppQuote(val)
|
quoted := ppQuote(val)
|
||||||
// #<name> — dumb stringify (always quote).
|
// #<name> — dumb stringify (always quote).
|
||||||
result = strings.ReplaceAll(result, "#<"+name+">", quoted)
|
result = strings.ReplaceAll(result, "#<"+name+">", quoted)
|
||||||
@@ -387,6 +505,187 @@ func (r *Rule) applyResult(captures map[string]string) string {
|
|||||||
return result
|
return result
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// expandOptionalRepeat walks a result template and rewrites each top-
|
||||||
|
// level `[ ... ]` block by examining the captures referenced inside:
|
||||||
|
//
|
||||||
|
// - If any referenced marker has multiple captured iterations
|
||||||
|
// (values joined with \x01), emit the body N times, substituting
|
||||||
|
// the i-th iteration's value for each such marker and dropping
|
||||||
|
// single-valued markers into each iteration unchanged.
|
||||||
|
// - If no referenced marker is multi-captured BUT the single
|
||||||
|
// captures include non-empty values, emit the body once.
|
||||||
|
// - Otherwise drop the block.
|
||||||
|
//
|
||||||
|
// Nested brackets are not supported — Harbour uses a single level of
|
||||||
|
// `[...]` for the common repeat form. Callers that need deeper nesting
|
||||||
|
// can fall back to writing out separate #xcommand rules.
|
||||||
|
func expandOptionalRepeat(template string, captures map[string]string) string {
|
||||||
|
var out strings.Builder
|
||||||
|
i := 0
|
||||||
|
for i < len(template) {
|
||||||
|
if template[i] == '[' {
|
||||||
|
// Find matching top-level ']'. Skip over quoted strings
|
||||||
|
// and nested brackets inside PP markers like `<.x.>`.
|
||||||
|
depth := 1
|
||||||
|
j := i + 1
|
||||||
|
for j < len(template) && depth > 0 {
|
||||||
|
switch template[j] {
|
||||||
|
case '[':
|
||||||
|
// Inside a marker `<...>` the `[` is just text;
|
||||||
|
// only count top-level brackets.
|
||||||
|
if inMarker(template, j) {
|
||||||
|
j++
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
depth++
|
||||||
|
case ']':
|
||||||
|
if inMarker(template, j) {
|
||||||
|
j++
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
depth--
|
||||||
|
if depth == 0 {
|
||||||
|
body := template[i+1 : j]
|
||||||
|
out.WriteString(expandBracketBody(body, captures))
|
||||||
|
i = j + 1
|
||||||
|
goto next
|
||||||
|
}
|
||||||
|
}
|
||||||
|
j++
|
||||||
|
}
|
||||||
|
// Unmatched [ — copy literally.
|
||||||
|
out.WriteByte(template[i])
|
||||||
|
i++
|
||||||
|
next:
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
out.WriteByte(template[i])
|
||||||
|
i++
|
||||||
|
}
|
||||||
|
return out.String()
|
||||||
|
}
|
||||||
|
|
||||||
|
// inMarker reports whether position `p` in s is inside a PP marker
|
||||||
|
// reference like `<.x.>` / `<"x">` / `<(x)>` — where `[` and `]` are
|
||||||
|
// ordinary text, not template delimiters.
|
||||||
|
func inMarker(s string, p int) bool {
|
||||||
|
// Look backward for `<` not preceded by a marker-terminator.
|
||||||
|
for k := p - 1; k >= 0; k-- {
|
||||||
|
c := s[k]
|
||||||
|
if c == '>' {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
if c == '<' {
|
||||||
|
// Scan forward from `<` to see if we're still inside.
|
||||||
|
for m := k + 1; m < len(s) && m <= p; m++ {
|
||||||
|
if s[m] == '>' {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
// expandBracketBody returns the optional-repeat body expanded once per
|
||||||
|
// iteration of its multi-captured markers. See expandOptionalRepeat.
|
||||||
|
func expandBracketBody(body string, captures map[string]string) string {
|
||||||
|
// Find marker names referenced inside the body.
|
||||||
|
refs := referencedMarkers(body)
|
||||||
|
iters := 1
|
||||||
|
hasMulti := false
|
||||||
|
for _, name := range refs {
|
||||||
|
if val, ok := captures[name]; ok && strings.ContainsRune(val, '\x01') {
|
||||||
|
n := strings.Count(val, "\x01") + 1
|
||||||
|
if n > iters {
|
||||||
|
iters = n
|
||||||
|
}
|
||||||
|
hasMulti = true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if !hasMulti {
|
||||||
|
// No multi-capture — include body once if any referenced marker
|
||||||
|
// has a (single) capture; otherwise drop.
|
||||||
|
anyPresent := false
|
||||||
|
for _, name := range refs {
|
||||||
|
if _, ok := captures[name]; ok {
|
||||||
|
anyPresent = true
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if !anyPresent {
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
return body
|
||||||
|
}
|
||||||
|
|
||||||
|
// Pre-split each multi-captured referent into a per-iteration list.
|
||||||
|
parts := make(map[string][]string, len(refs))
|
||||||
|
for _, name := range refs {
|
||||||
|
if val, ok := captures[name]; ok {
|
||||||
|
parts[name] = strings.Split(val, "\x01")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
var out strings.Builder
|
||||||
|
for iter := 0; iter < iters; iter++ {
|
||||||
|
piece := body
|
||||||
|
for name, vals := range parts {
|
||||||
|
var v string
|
||||||
|
if iter < len(vals) {
|
||||||
|
v = vals[iter]
|
||||||
|
}
|
||||||
|
quoted := ppQuote(v)
|
||||||
|
piece = strings.ReplaceAll(piece, "#<"+name+">", quoted)
|
||||||
|
piece = strings.ReplaceAll(piece, `<"`+name+`">`, quoted)
|
||||||
|
piece = strings.ReplaceAll(piece, "<("+name+")>", quoted)
|
||||||
|
if v != "" {
|
||||||
|
piece = strings.ReplaceAll(piece, "<."+name+".>", ".T.")
|
||||||
|
} else {
|
||||||
|
piece = strings.ReplaceAll(piece, "<."+name+".>", ".F.")
|
||||||
|
}
|
||||||
|
piece = strings.ReplaceAll(piece, "<"+name+">", v)
|
||||||
|
}
|
||||||
|
out.WriteString(piece)
|
||||||
|
}
|
||||||
|
return out.String()
|
||||||
|
}
|
||||||
|
|
||||||
|
// referencedMarkers extracts marker names referenced inside a template
|
||||||
|
// fragment. Handles `<name>`, `<(name)>`, `<.name.>`, `<"name">`, and
|
||||||
|
// `#<name>` forms.
|
||||||
|
func referencedMarkers(s string) []string {
|
||||||
|
seen := map[string]bool{}
|
||||||
|
var out []string
|
||||||
|
i := 0
|
||||||
|
for i < len(s) {
|
||||||
|
if s[i] == '<' {
|
||||||
|
j := i + 1
|
||||||
|
// Skip leading punctuation forms: (name), .name., "name".
|
||||||
|
for j < len(s) && (s[j] == '(' || s[j] == '.' || s[j] == '"') {
|
||||||
|
j++
|
||||||
|
}
|
||||||
|
start := j
|
||||||
|
for j < len(s) && (s[j] == '_' || (s[j] >= 'a' && s[j] <= 'z') ||
|
||||||
|
(s[j] >= 'A' && s[j] <= 'Z') || (s[j] >= '0' && s[j] <= '9')) {
|
||||||
|
j++
|
||||||
|
}
|
||||||
|
if j > start {
|
||||||
|
name := s[start:j]
|
||||||
|
if !seen[name] {
|
||||||
|
seen[name] = true
|
||||||
|
out = append(out, name)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
i = j
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
i++
|
||||||
|
}
|
||||||
|
return out
|
||||||
|
}
|
||||||
|
|
||||||
// cleanUnreferencedMarkers removes any remaining <name>, <(name)>, <.name.>, #<name> references.
|
// cleanUnreferencedMarkers removes any remaining <name>, <(name)>, <.name.>, #<name> references.
|
||||||
// Only removes well-formed PP marker references, not comparison operators.
|
// Only removes well-formed PP marker references, not comparison operators.
|
||||||
func cleanUnreferencedMarkers(s string) string {
|
func cleanUnreferencedMarkers(s string) string {
|
||||||
|
|||||||
@@ -5,14 +5,11 @@
|
|||||||
* ISNUMBER / ISCHARACTER / ISLOGICAL / ISDATE / ISBLOCK / ISMEMO /
|
* ISNUMBER / ISCHARACTER / ISLOGICAL / ISDATE / ISBLOCK / ISMEMO /
|
||||||
* ISOBJECT. Five registers those as direct RTL symbols in
|
* ISOBJECT. Five registers those as direct RTL symbols in
|
||||||
* hbrtl/register.go (each points at the same Go function as its
|
* hbrtl/register.go (each points at the same Go function as its
|
||||||
* HB_IS* twin), so they work without any preprocessor translation
|
* HB_IS* twin), so they work without any preprocessor translation.
|
||||||
* and without this include.
|
|
||||||
*
|
*
|
||||||
* This header stays as a stub so `#include "common.ch"` in ported
|
* The DEFAULT / UPDATE #xcommand forms use Five's optional-repeat
|
||||||
* Harbour code doesn't error. The DEFAULT / UPDATE #xcommand forms
|
* PP support — `DEFAULT a TO 1, b TO 2, c TO 3` expands into three
|
||||||
* from Harbour's common.ch are not yet supported — use explicit
|
* `IF x == NIL ; x := v ; ENDIF` statements.
|
||||||
* `IF xVar == NIL ; xVar := default ; ENDIF` until the preprocessor's
|
|
||||||
* #xcommand marker handling is extended.
|
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#ifndef HB_COMMON_CH_
|
#ifndef HB_COMMON_CH_
|
||||||
@@ -23,4 +20,8 @@
|
|||||||
#define YES .T.
|
#define YES .T.
|
||||||
#define NO .F.
|
#define NO .F.
|
||||||
|
|
||||||
|
#xcommand DEFAULT <v1> TO <x1> [, <vn> TO <xn>] => IF <v1> == NIL ; <v1> := <x1> ; ENDIF [; IF <vn> == NIL ; <vn> := <xn> ; ENDIF ]
|
||||||
|
|
||||||
|
#command UPDATE <v1> IF <exp> TO <v2> => IF <exp> ; <v1> := <v2> ; ENDIF
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
Reference in New Issue
Block a user