feat(pp): COPY TO via std.ch + four PP completeness fixes

`COPY TO <file> [FIELDS <list>] [FOR ...] [WHILE ...] [NEXT ...] [RECORD ...] [REST] [ALL]` reaches the parser as a plain function call to a new RTL primitive __dbCopy (rtlDbCopy in hbrtl/database.go). Implementation: project the field list (case-insensitive name match against the source's structure, full copy when omitted), dbCreate the target file with that struct, open it under a temp alias, walk the source under dbEval-style FOR/WHILE/NEXT/RECORD/REST bounds, and GetValue/Append/PutValue per record into the target. SDF / DELIMITED variants stay parser no-ops until those backends arrive. Wiring up COPY surfaced four longstanding gaps in the PP that had to be fixed for the rule to even reach the runtime: * `<(name)>` *pattern* marker was treated as a regular `<name>` with the parens baked into the captured key, so the matching result substitution `<(name)>` couldn't find it. parseOneMarker now strips the parens at parse time so capture key and result marker share the bare name. The smart-stringify result behavior is unchanged. * matchSegment (the optional-clause matcher) bailed on every non-Regular marker. `[FIELDS <fields,...>]` therefore failed to match at all and the fields list arrived empty in the result template. matchSegment now handles MarkerList with paren-balanced capture and segment+outer literal stop boundaries. * captureExpression only used the first literal in the pattern tail as a stop boundary. With std.ch's chain of optional clauses (`[TO <(f)>] [FIELDS ...] [FOR ...] [WHILE ...] ...`) the file-name marker was happy to gobble a trailing FOR clause when FIELDS was absent. It now stops at *any* of the remaining pattern literals. * `<(name)>` smart-stringify on a list-typed capture wrapped the whole comma-joined string in one set of quotes — `{ "a , b" }` — instead of `{ "a", "b" }`. New helper quoteListElements splits on top-level commas (paren / bracket / brace / string-balanced) and quotes each element. applyResult now consults the rule's marker table to know which captures came from `<name,...>`. Parser cleanup: COPY removed from the IDENT-statement no-op switch in both parseIdentStmt and parseExprStmt. Gates green: go test ./... : PASS FiveSql2 SQL:1999 : 43/43 Harbour compat : 56/56 Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-30 15:00:18 +09:00
parent c2e7f7ea27
commit e961660f61
5 changed files with 357 additions and 23 deletions
--- a/compiler/pp/command.go
+++ b/compiler/pp/command.go
@@ -159,6 +159,16 @@ func parseOneMarker(inner string) Marker {
 		return Marker{Name: name, Type: MarkerWordList, ListValues: vals}
 	}

+	// <(name)> — extended-expression marker. In Harbour PP this captures
+	// a file-name-like extended expression and the matching result token
+	// `<(name)>` smart-stringifies it (already-quoted → keep, identifier
+	// → quote). Strip the parens so captures are stored under the bare
+	// name; result substitution then matches both `<(name)>` and `<name>`
+	// via the existing path.
+	if strings.HasPrefix(inner, "(") && strings.HasSuffix(inner, ")") {
+		return Marker{Name: inner[1 : len(inner)-1], Type: MarkerRegular}
+	}
+
 	// <name> — regular
 	return Marker{Name: inner, Type: MarkerRegular}
 }
@@ -384,9 +394,9 @@ func (r *Rule) matchPattern(line string) map[string]string {
 // contain nested `[...]` — callers of the optional-repeat logic
 // flatten one level at a time.
 //
-// A "mini-matcher" that mirrors the main loop for MarkerRegular and
-// literal keywords. MarkerList and MarkerWild inside `[...]` would
-// need additional plumbing; defer those until real patterns need them.
+// A "mini-matcher" that mirrors the main loop for MarkerRegular,
+// MarkerRestricted, and MarkerList plus literal keywords. MarkerWild
+// inside `[...]` is rare and still defers to the main matcher.
 func matchSegment(segment, lineWords []string, startLi int, caseSens bool, outerTail []string) (map[string]string, int, bool) {
 	caps := make(map[string]string)
 	li := startLi
@@ -409,21 +419,70 @@ func matchSegment(segment, lineWords []string, startLi int, caseSens bool, outer
 		if strings.HasPrefix(pw, "<") && strings.HasSuffix(pw, ">") {
 			inner := pw[1 : len(pw)-1]
 			m := parseOneMarker(inner)
-			if m.Type != MarkerRegular && m.Type != MarkerRestricted {
+			switch m.Type {
+			case MarkerList:
+				// Capture comma-separated tokens until we hit the
+				// segment's next literal, an outer literal, or the end
+				// of the line. Paren-balanced so `f(a,b)` inside the
+				// list doesn't terminate prematurely. Mirrors the main
+				// matchPattern's MarkerList branch.
+				stop := map[string]struct{}{}
+				for _, w := range segment[pi+1:] {
+					if w != "" && w != "[" && w != "]" &&
+						!(strings.HasPrefix(w, "<") && strings.HasSuffix(w, ">")) {
+						stop[strings.ToUpper(w)] = struct{}{}
+					}
+				}
+				for _, w := range outerTail {
+					if w != "" && w != "[" && w != "]" &&
+						!(strings.HasPrefix(w, "<") && strings.HasSuffix(w, ">")) {
+						stop[strings.ToUpper(w)] = struct{}{}
+					}
+				}
+				var parts []string
+				depth := 0
+				for li < len(lineWords) {
+					w := lineWords[li]
+					if depth == 0 {
+						key := w
+						if !caseSens {
+							key = strings.ToUpper(w)
+						}
+						if _, hit := stop[key]; hit {
+							break
+						}
+					}
+					switch w {
+					case "(", "[", "{":
+						depth++
+					case ")", "]", "}":
+						if depth > 0 {
+							depth--
+						}
+					}
+					parts = append(parts, w)
+					li++
+				}
+				caps[m.Name] = strings.Join(parts, " ")
+				continue
+			case MarkerRegular, MarkerRestricted:
+				// fall through to capture-one-expression below
+			default:
 				return nil, startLi, false
 			}
 			// Build a pseudo-pattern tail so captureExpression picks the
-			// right delimiter. Priority:
-			//   1. Next literal inside the same segment.
-			//   2. First literal in the outer-pattern tail — this is what
-			//      stops `[TO <v>] [FOR <for>]` from letting `<v>` swallow
-			//      the FOR clause.
+			// right delimiters. Priority:
+			//   1. Next literals inside the same segment.
+			//   2. Every literal in the outer-pattern tail — this is
+			//      what stops `[TO <(f)>] [FIELDS ...] [FOR ...]` from
+			//      letting `<(f)>` swallow a trailing FOR/WHILE/NEXT
+			//      clause that happened to be present.
 			//   3. Repeat boundary (the segment's leading literal) so a
 			//      multi-iteration capture stops before the next iter.
 			tail := segment[pi+1:]
 			if !hasLiteralAfter(tail) {
-				if outerLit := firstLiteral(outerTail); outerLit != "" {
-					tail = []string{outerLit}
+				if hasLiteralAfter(outerTail) {
+					tail = outerTail
 				} else if repeatBoundary != "" {
 					tail = []string{repeatBoundary}
 				}
@@ -472,6 +531,72 @@ func hasLiteralAfter(segment []string) bool {
 	return false
 }

+// quoteListElements smart-stringifies a list-style capture: split val
+// on top-level commas (paren / bracket / brace balanced) and emit each
+// element quoted. Already-quoted elements are kept as-is so a literal
+// like `"a", "b"` round-trips intact. Used by `<(name)>` substitution
+// when `name` came from a `<name,...>` marker — Harbour's std.ch idiom
+// for `{ <(fields)> }` to expand to `{ "a", "b", "c" }`.
+func quoteListElements(val string) string {
+	parts := splitTopLevelCommas(val)
+	if len(parts) == 0 {
+		return ""
+	}
+	out := make([]string, 0, len(parts))
+	for _, p := range parts {
+		t := strings.TrimSpace(p)
+		if t == "" {
+			continue
+		}
+		// Already a string literal — keep verbatim.
+		if n := len(t); n >= 2 &&
+			((t[0] == '"' && t[n-1] == '"') ||
+				(t[0] == '\'' && t[n-1] == '\'') ||
+				(t[0] == '[' && t[n-1] == ']')) {
+			out = append(out, t)
+			continue
+		}
+		out = append(out, ppQuote(t))
+	}
+	return strings.Join(out, ", ")
+}
+
+// splitTopLevelCommas splits s on commas that are not nested inside
+// (), [], or {}. Strings ("..." / '...') are skipped to avoid breaking
+// captured PRG expressions.
+func splitTopLevelCommas(s string) []string {
+	var parts []string
+	depth := 0
+	start := 0
+	inStr := byte(0)
+	for i := 0; i < len(s); i++ {
+		c := s[i]
+		if inStr != 0 {
+			if c == inStr {
+				inStr = 0
+			}
+			continue
+		}
+		switch c {
+		case '"', '\'':
+			inStr = c
+		case '(', '[', '{':
+			depth++
+		case ')', ']', '}':
+			if depth > 0 {
+				depth--
+			}
+		case ',':
+			if depth == 0 {
+				parts = append(parts, s[start:i])
+				start = i + 1
+			}
+		}
+	}
+	parts = append(parts, s[start:])
+	return parts
+}
+
 // ppQuote wraps a captured value in a PRG string literal, picking a
 // delimiter that doesn't collide with characters already inside. Harbour
 // #<name> stringify takes the raw source text of the argument and must
@@ -510,6 +635,16 @@ func (r *Rule) applyResult(captures map[string]string) string {
 	// captures apply (the required-or-absent case).
 	result = expandOptionalRepeat(result, captures)

+	// Marker-name → list flag, so the smart-stringify branch below can
+	// emit per-element quoting (`{ "a", "b" }`) for list captures
+	// instead of treating the comma-joined string as one literal.
+	isList := make(map[string]bool, len(r.Markers))
+	for _, m := range r.Markers {
+		if m.Type == MarkerList {
+			isList[m.Name] = true
+		}
+	}
+
 	for name, val := range captures {
 		// Multi-capture markers are consumed by expandOptionalRepeat;
 		// the bare substitution for the joined form would produce
@@ -524,8 +659,9 @@ func (r *Rule) applyResult(captures map[string]string) string {
 		// <"name"> — explicit stringify.
 		result = strings.ReplaceAll(result, `<"`+name+`">`, quoted)
 		// <(name)> — smart stringify: already a string literal → keep;
-		// otherwise quote. `val` comes straight from the capture, so
-		// trim and check for surrounding quotes.
+		// list capture → quote each comma-separated element; otherwise
+		// quote whole. `val` comes straight from the capture, so trim
+		// and check for surrounding quotes.
 		trim := strings.TrimSpace(val)
 		smart := quoted
 		if n := len(trim); n >= 2 &&
@@ -533,6 +669,8 @@ func (r *Rule) applyResult(captures map[string]string) string {
 				(trim[0] == '\'' && trim[n-1] == '\'') ||
 				(trim[0] == '[' && trim[n-1] == ']')) {
 			smart = trim
+		} else if isList[name] {
+			smart = quoteListElements(val)
 		}
 		result = strings.ReplaceAll(result, "<("+name+")>", smart)
 		// <.name.> — logify (empty → .F., else .T.)
@@ -963,18 +1101,28 @@ func captureExpression(lineWords []string, li *int, patternWords []string, nextP
 		return ""
 	}

-	// Find next literal keyword in pattern to use as delimiter
-	delimWord := ""
+	// Collect every literal-keyword delimiter that follows in the
+	// pattern, not just the first. Optional clauses in std.ch sit
+	// next to one another (`[TO <(f)>] [FIELDS <fields,...>]
+	// [FOR <for>] [WHILE <while>] ...`), so the file-name marker
+	// must stop at TO's *successor* — but we don't know which
+	// successor will actually be present in the input. Stopping on
+	// any of them keeps `<(f)>` from swallowing a trailing
+	// `FOR x > 5` clause.
+	var delims []string
 	for pi := nextPi; pi < len(patternWords); pi++ {
 		pw := patternWords[pi]
-		if !strings.HasPrefix(pw, "<") && pw != "[" && pw != "]" {
-			delimWord = pw
-			break
+		if pw == "" || pw == "[" || pw == "]" {
+			continue
 		}
+		if strings.HasPrefix(pw, "<") && strings.HasSuffix(pw, ">") {
+			continue
+		}
+		delims = append(delims, pw)
 	}

-	if delimWord != "" {
-		// Capture until the delimiter, paren-balancing so nested
+	if len(delims) > 0 {
+		// Capture until any delimiter is hit, paren-balancing so nested
 		// parens/brackets/braces inside the expression don't falsely
 		// terminate the capture. Harbour's own PP does the same —
 		// `_REGULAR_(&(a))` must capture `&(a)` (incl. inner parens)
@@ -983,8 +1131,17 @@ func captureExpression(lineWords []string, li *int, patternWords []string, nextP
 		depth := 0
 		for *li < len(lineWords) {
 			w := lineWords[*li]
-			if depth == 0 && matchWord(w, delimWord, caseSens) {
-				break
+			if depth == 0 {
+				stop := false
+				for _, d := range delims {
+					if matchWord(w, d, caseSens) {
+						stop = true
+						break
+					}
+				}
+				if stop {
+					break
+				}
 			}
 			switch w {
 			case "(", "[", "{":