From e961660f6135fdcd83fb73bf0de16ad57d7ad1e6 Mon Sep 17 00:00:00 2001
From: CharlesKWON <charleskwonohjun@gmail.com>
Date: Thu, 30 Apr 2026 15:00:18 +0900
Subject: [PATCH] feat(pp): COPY TO via std.ch + four PP completeness fixes
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

`COPY TO <file> [FIELDS <list>] [FOR ...] [WHILE ...] [NEXT ...]
[RECORD ...] [REST] [ALL]` reaches the parser as a plain function
call to a new RTL primitive __dbCopy (rtlDbCopy in hbrtl/database.go).

Implementation: project the field list (case-insensitive name match
against the source's structure, full copy when omitted), dbCreate the
target file with that struct, open it under a temp alias, walk the
source under dbEval-style FOR/WHILE/NEXT/RECORD/REST bounds, and
GetValue/Append/PutValue per record into the target. SDF / DELIMITED
variants stay parser no-ops until those backends arrive.

Wiring up COPY surfaced four longstanding gaps in the PP that had to
be fixed for the rule to even reach the runtime:

  * `<(name)>` *pattern* marker was treated as a regular `<name>`
    with the parens baked into the captured key, so the matching
    result substitution `<(name)>` couldn't find it. parseOneMarker
    now strips the parens at parse time so capture key and result
    marker share the bare name. The smart-stringify result behavior
    is unchanged.
  * matchSegment (the optional-clause matcher) bailed on every
    non-Regular marker. `[FIELDS <fields,...>]` therefore failed to
    match at all and the fields list arrived empty in the result
    template. matchSegment now handles MarkerList with paren-balanced
    capture and segment+outer literal stop boundaries.
  * captureExpression only used the first literal in the pattern
    tail as a stop boundary. With std.ch's chain of optional
    clauses (`[TO <(f)>] [FIELDS ...] [FOR ...] [WHILE ...] ...`)
    the file-name marker was happy to gobble a trailing FOR clause
    when FIELDS was absent. It now stops at *any* of the remaining
    pattern literals.
  * `<(name)>` smart-stringify on a list-typed capture wrapped the
    whole comma-joined string in one set of quotes — `{ "a , b" }` —
    instead of `{ "a", "b" }`. New helper quoteListElements splits on
    top-level commas (paren / bracket / brace / string-balanced) and
    quotes each element. applyResult now consults the rule's marker
    table to know which captures came from `<name,...>`.

Parser cleanup: COPY removed from the IDENT-statement no-op switch in
both parseIdentStmt and parseExprStmt.

Gates green:
  go test ./...      : PASS
  FiveSql2 SQL:1999  : 43/43
  Harbour compat     : 56/56

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 compiler/parser/parser.go |   2 +-
 compiler/pp/command.go    | 201 +++++++++++++++++++++++++++++++++-----
 compiler/pp/std.ch        |  11 +++
 hbrtl/database.go         | 165 +++++++++++++++++++++++++++++++
 hbrtl/register.go         |   1 +
 5 files changed, 357 insertions(+), 23 deletions(-)
diff --git a/compiler/parser/parser.go b/compiler/parser/parser.go
index ccec679..27b482f 100644
--- a/compiler/parser/parser.go
+++ b/compiler/parser/parser.go
@@ -1155,7 +1155,7 @@ func (p *Parser) parseIdentStmt() ast.Stmt {
 	// rewritten by compiler/pp/std.ch into function calls before the
 	// parser sees them.
 	switch upper {
-	case "COPY", "SORT", "TOTAL", "UPDATE",
+	case "SORT", "TOTAL", "UPDATE",
 		"LABEL", "REPORT", "ACCEPT", "INPUT",
 		"JOIN", "RELEASE", "SAVE", "RESTORE",
 		"DIR", "STORE", "NOTE", "TEXT", "ENDTEXT",
diff --git a/compiler/pp/command.go b/compiler/pp/command.go
index ce2c177..0f30120 100644
--- a/compiler/pp/command.go
+++ b/compiler/pp/command.go
@@ -159,6 +159,16 @@ func parseOneMarker(inner string) Marker {
 		return Marker{Name: name, Type: MarkerWordList, ListValues: vals}
 	}
 
+	// <(name)> — extended-expression marker. In Harbour PP this captures
+	// a file-name-like extended expression and the matching result token
+	// `<(name)>` smart-stringifies it (already-quoted → keep, identifier
+	// → quote). Strip the parens so captures are stored under the bare
+	// name; result substitution then matches both `<(name)>` and `<name>`
+	// via the existing path.
+	if strings.HasPrefix(inner, "(") && strings.HasSuffix(inner, ")") {
+		return Marker{Name: inner[1 : len(inner)-1], Type: MarkerRegular}
+	}
+
 	// <name> — regular
 	return Marker{Name: inner, Type: MarkerRegular}
 }
@@ -384,9 +394,9 @@ func (r *Rule) matchPattern(line string) map[string]string {
 // contain nested `[...]` — callers of the optional-repeat logic
 // flatten one level at a time.
 //
-// A "mini-matcher" that mirrors the main loop for MarkerRegular and
-// literal keywords. MarkerList and MarkerWild inside `[...]` would
-// need additional plumbing; defer those until real patterns need them.
+// A "mini-matcher" that mirrors the main loop for MarkerRegular,
+// MarkerRestricted, and MarkerList plus literal keywords. MarkerWild
+// inside `[...]` is rare and still defers to the main matcher.
 func matchSegment(segment, lineWords []string, startLi int, caseSens bool, outerTail []string) (map[string]string, int, bool) {
 	caps := make(map[string]string)
 	li := startLi
@@ -409,21 +419,70 @@ func matchSegment(segment, lineWords []string, startLi int, caseSens bool, outer
 		if strings.HasPrefix(pw, "<") && strings.HasSuffix(pw, ">") {
 			inner := pw[1 : len(pw)-1]
 			m := parseOneMarker(inner)
-			if m.Type != MarkerRegular && m.Type != MarkerRestricted {
+			switch m.Type {
+			case MarkerList:
+				// Capture comma-separated tokens until we hit the
+				// segment's next literal, an outer literal, or the end
+				// of the line. Paren-balanced so `f(a,b)` inside the
+				// list doesn't terminate prematurely. Mirrors the main
+				// matchPattern's MarkerList branch.
+				stop := map[string]struct{}{}
+				for _, w := range segment[pi+1:] {
+					if w != "" && w != "[" && w != "]" &&
+						!(strings.HasPrefix(w, "<") && strings.HasSuffix(w, ">")) {
+						stop[strings.ToUpper(w)] = struct{}{}
+					}
+				}
+				for _, w := range outerTail {
+					if w != "" && w != "[" && w != "]" &&
+						!(strings.HasPrefix(w, "<") && strings.HasSuffix(w, ">")) {
+						stop[strings.ToUpper(w)] = struct{}{}
+					}
+				}
+				var parts []string
+				depth := 0
+				for li < len(lineWords) {
+					w := lineWords[li]
+					if depth == 0 {
+						key := w
+						if !caseSens {
+							key = strings.ToUpper(w)
+						}
+						if _, hit := stop[key]; hit {
+							break
+						}
+					}
+					switch w {
+					case "(", "[", "{":
+						depth++
+					case ")", "]", "}":
+						if depth > 0 {
+							depth--
+						}
+					}
+					parts = append(parts, w)
+					li++
+				}
+				caps[m.Name] = strings.Join(parts, " ")
+				continue
+			case MarkerRegular, MarkerRestricted:
+				// fall through to capture-one-expression below
+			default:
 				return nil, startLi, false
 			}
 			// Build a pseudo-pattern tail so captureExpression picks the
-			// right delimiter. Priority:
-			//   1. Next literal inside the same segment.
-			//   2. First literal in the outer-pattern tail — this is what
-			//      stops `[TO <v>] [FOR <for>]` from letting `<v>` swallow
-			//      the FOR clause.
+			// right delimiters. Priority:
+			//   1. Next literals inside the same segment.
+			//   2. Every literal in the outer-pattern tail — this is
+			//      what stops `[TO <(f)>] [FIELDS ...] [FOR ...]` from
+			//      letting `<(f)>` swallow a trailing FOR/WHILE/NEXT
+			//      clause that happened to be present.
 			//   3. Repeat boundary (the segment's leading literal) so a
 			//      multi-iteration capture stops before the next iter.
 			tail := segment[pi+1:]
 			if !hasLiteralAfter(tail) {
-				if outerLit := firstLiteral(outerTail); outerLit != "" {
-					tail = []string{outerLit}
+				if hasLiteralAfter(outerTail) {
+					tail = outerTail
 				} else if repeatBoundary != "" {
 					tail = []string{repeatBoundary}
 				}
@@ -472,6 +531,72 @@ func hasLiteralAfter(segment []string) bool {
 	return false
 }
 
+// quoteListElements smart-stringifies a list-style capture: split val
+// on top-level commas (paren / bracket / brace balanced) and emit each
+// element quoted. Already-quoted elements are kept as-is so a literal
+// like `"a", "b"` round-trips intact. Used by `<(name)>` substitution
+// when `name` came from a `<name,...>` marker — Harbour's std.ch idiom
+// for `{ <(fields)> }` to expand to `{ "a", "b", "c" }`.
+func quoteListElements(val string) string {
+	parts := splitTopLevelCommas(val)
+	if len(parts) == 0 {
+		return ""
+	}
+	out := make([]string, 0, len(parts))
+	for _, p := range parts {
+		t := strings.TrimSpace(p)
+		if t == "" {
+			continue
+		}
+		// Already a string literal — keep verbatim.
+		if n := len(t); n >= 2 &&
+			((t[0] == '"' && t[n-1] == '"') ||
+				(t[0] == '\'' && t[n-1] == '\'') ||
+				(t[0] == '[' && t[n-1] == ']')) {
+			out = append(out, t)
+			continue
+		}
+		out = append(out, ppQuote(t))
+	}
+	return strings.Join(out, ", ")
+}
+
+// splitTopLevelCommas splits s on commas that are not nested inside
+// (), [], or {}. Strings ("..." / '...') are skipped to avoid breaking
+// captured PRG expressions.
+func splitTopLevelCommas(s string) []string {
+	var parts []string
+	depth := 0
+	start := 0
+	inStr := byte(0)
+	for i := 0; i < len(s); i++ {
+		c := s[i]
+		if inStr != 0 {
+			if c == inStr {
+				inStr = 0
+			}
+			continue
+		}
+		switch c {
+		case '"', '\'':
+			inStr = c
+		case '(', '[', '{':
+			depth++
+		case ')', ']', '}':
+			if depth > 0 {
+				depth--
+			}
+		case ',':
+			if depth == 0 {
+				parts = append(parts, s[start:i])
+				start = i + 1
+			}
+		}
+	}
+	parts = append(parts, s[start:])
+	return parts
+}
+
 // ppQuote wraps a captured value in a PRG string literal, picking a
 // delimiter that doesn't collide with characters already inside. Harbour
 // #<name> stringify takes the raw source text of the argument and must
@@ -510,6 +635,16 @@ func (r *Rule) applyResult(captures map[string]string) string {
 	// captures apply (the required-or-absent case).
 	result = expandOptionalRepeat(result, captures)
 
+	// Marker-name → list flag, so the smart-stringify branch below can
+	// emit per-element quoting (`{ "a", "b" }`) for list captures
+	// instead of treating the comma-joined string as one literal.
+	isList := make(map[string]bool, len(r.Markers))
+	for _, m := range r.Markers {
+		if m.Type == MarkerList {
+			isList[m.Name] = true
+		}
+	}
+
 	for name, val := range captures {
 		// Multi-capture markers are consumed by expandOptionalRepeat;
 		// the bare substitution for the joined form would produce
@@ -524,8 +659,9 @@ func (r *Rule) applyResult(captures map[string]string) string {
 		// <"name"> — explicit stringify.
 		result = strings.ReplaceAll(result, `<"`+name+`">`, quoted)
 		// <(name)> — smart stringify: already a string literal → keep;
-		// otherwise quote. `val` comes straight from the capture, so
-		// trim and check for surrounding quotes.
+		// list capture → quote each comma-separated element; otherwise
+		// quote whole. `val` comes straight from the capture, so trim
+		// and check for surrounding quotes.
 		trim := strings.TrimSpace(val)
 		smart := quoted
 		if n := len(trim); n >= 2 &&
@@ -533,6 +669,8 @@ func (r *Rule) applyResult(captures map[string]string) string {
 				(trim[0] == '\'' && trim[n-1] == '\'') ||
 				(trim[0] == '[' && trim[n-1] == ']')) {
 			smart = trim
+		} else if isList[name] {
+			smart = quoteListElements(val)
 		}
 		result = strings.ReplaceAll(result, "<("+name+")>", smart)
 		// <.name.> — logify (empty → .F., else .T.)
@@ -963,18 +1101,28 @@ func captureExpression(lineWords []string, li *int, patternWords []string, nextP
 		return ""
 	}
 
-	// Find next literal keyword in pattern to use as delimiter
-	delimWord := ""
+	// Collect every literal-keyword delimiter that follows in the
+	// pattern, not just the first. Optional clauses in std.ch sit
+	// next to one another (`[TO <(f)>] [FIELDS <fields,...>]
+	// [FOR <for>] [WHILE <while>] ...`), so the file-name marker
+	// must stop at TO's *successor* — but we don't know which
+	// successor will actually be present in the input. Stopping on
+	// any of them keeps `<(f)>` from swallowing a trailing
+	// `FOR x > 5` clause.
+	var delims []string
 	for pi := nextPi; pi < len(patternWords); pi++ {
 		pw := patternWords[pi]
-		if !strings.HasPrefix(pw, "<") && pw != "[" && pw != "]" {
-			delimWord = pw
-			break
+		if pw == "" || pw == "[" || pw == "]" {
+			continue
 		}
+		if strings.HasPrefix(pw, "<") && strings.HasSuffix(pw, ">") {
+			continue
+		}
+		delims = append(delims, pw)
 	}
 
-	if delimWord != "" {
-		// Capture until the delimiter, paren-balancing so nested
+	if len(delims) > 0 {
+		// Capture until any delimiter is hit, paren-balancing so nested
 		// parens/brackets/braces inside the expression don't falsely
 		// terminate the capture. Harbour's own PP does the same —
 		// `_REGULAR_(&(a))` must capture `&(a)` (incl. inner parens)
@@ -983,8 +1131,17 @@ func captureExpression(lineWords []string, li *int, patternWords []string, nextP
 		depth := 0
 		for *li < len(lineWords) {
 			w := lineWords[*li]
-			if depth == 0 && matchWord(w, delimWord, caseSens) {
-				break
+			if depth == 0 {
+				stop := false
+				for _, d := range delims {
+					if matchWord(w, d, caseSens) {
+						stop = true
+						break
+					}
+				}
+				if stop {
+					break
+				}
 			}
 			switch w {
 			case "(", "[", "{":
diff --git a/compiler/pp/std.ch b/compiler/pp/std.ch
index 52f30d0..f5ccd01 100644
--- a/compiler/pp/std.ch
+++ b/compiler/pp/std.ch
@@ -62,6 +62,17 @@
          <v> := __dbAverage( <{x}>, ;
                              <{for}>, <{while}>, <next>, <rec>, <.rest.> )
 
+/* --- bulk record export ---
+   COPY TO copies visible records of the current workarea into a fresh
+   DBF. FIELDS/FOR/WHILE/NEXT/RECORD/REST work as in Harbour. SDF and
+   DELIMITED variants stay as silent no-ops in the parser until their
+   backends land. */
+#command COPY [TO <(f)>] [FIELDS <fields,...>] ;
+              [FOR <for>] [WHILE <while>] [NEXT <next>] ;
+              [RECORD <rec>] [<rest:REST>] [ALL] => ;
+         __dbCopy( <(f)>, { <(fields)> }, ;
+                   <{for}>, <{while}>, <next>, <rec>, <.rest.> )
+
 /* --- bulk maintenance --- */
 #command REINDEX                         => DbReindex()
 #command PACK                            => DbPack()
diff --git a/hbrtl/database.go b/hbrtl/database.go
index dce125d..631089a 100644
--- a/hbrtl/database.go
+++ b/hbrtl/database.go
@@ -8,6 +8,8 @@
 package hbrtl
 
 import (
+	"strings"
+
 	"five/hbrt"
 	"five/hbrdd"
 	"five/hbrdd/dbf"
@@ -766,6 +768,169 @@ func rtlDbAverage(t *hbrt.Thread) {
 	t.RetDouble(sum/float64(n), 10, 2)
 }
 
+// rtlDbCopy implements __dbCopy(cFile, aFields, bFor, bWhile, nNext,
+// xRec, lRest) — copy visible records from the current workarea into a
+// freshly created DBF. Field projection: an empty/missing aFields
+// copies the whole structure; otherwise only fields whose names match
+// (case-insensitive) are carried over. Used by `COPY TO <f> [FIELDS]
+// [FOR] [WHILE] [NEXT] [RECORD] [REST] [ALL]` in std.ch.
+//
+// Harbour's __dbCopy also accepts cRDD / nConnection / cCodepage / xDelim
+// (params 8..11). Five only supports DBFNTX→DBFNTX for now; SDF/DELIMITED
+// copies stay parser no-ops until that backend lands.
+func rtlDbCopy(t *hbrt.Thread) {
+	nParams := t.ParamCount()
+	t.Frame(nParams, 0)
+	defer t.EndProcFast()
+
+	wam := getWA(t)
+	if wam == nil {
+		t.RetBool(false)
+		return
+	}
+	srcArea := wam.Current()
+	if srcArea == nil {
+		t.RetBool(false)
+		return
+	}
+
+	if nParams < 1 || t.Local(1).IsNil() {
+		t.RetBool(false)
+		return
+	}
+	cFile := t.Local(1).AsString()
+	if cFile == "" {
+		t.RetBool(false)
+		return
+	}
+
+	// Field projection. Harbour passes `{ <(fields)> }` so each entry
+	// is a string literal already; uppercase for case-insensitive
+	// matching against the source's field names.
+	var srcIdx []int
+	var dstFields []hbrdd.FieldInfo
+	nSrcFields := srcArea.FieldCount()
+	useAll := true
+	if nParams >= 2 && t.Local(2).IsArray() {
+		arr := t.Local(2).AsArray()
+		if arr != nil && len(arr.Items) > 0 {
+			useAll = false
+			wanted := make(map[string]struct{}, len(arr.Items))
+			for _, it := range arr.Items {
+				s := strings.ToUpper(strings.TrimSpace(it.AsString()))
+				if s != "" {
+					wanted[s] = struct{}{}
+				}
+			}
+			for i := 0; i < nSrcFields; i++ {
+				fi := srcArea.GetFieldInfo(i)
+				if _, ok := wanted[strings.ToUpper(fi.Name)]; ok {
+					srcIdx = append(srcIdx, i)
+					dstFields = append(dstFields, fi)
+				}
+			}
+		}
+	}
+	if useAll {
+		srcIdx = make([]int, nSrcFields)
+		dstFields = make([]hbrdd.FieldInfo, nSrcFields)
+		for i := 0; i < nSrcFields; i++ {
+			srcIdx[i] = i
+			dstFields[i] = srcArea.GetFieldInfo(i)
+		}
+	}
+	if len(dstFields) == 0 {
+		// Nothing to copy — empty FIELDS list with no matches.
+		t.RetBool(false)
+		return
+	}
+
+	// Loop bounds — same shape as dbEval.
+	var bFor, bWhile hbrt.Value
+	if nParams >= 3 {
+		bFor = t.Local(3)
+	}
+	if nParams >= 4 {
+		bWhile = t.Local(4)
+	}
+	nCount := -1
+	if nParams >= 5 && !t.Local(5).IsNil() {
+		nCount = t.Local(5).AsInt()
+	}
+	if nParams >= 6 && !t.Local(6).IsNil() {
+		srcArea.GoTo(uint32(t.Local(6).AsInt()))
+	}
+	lRest := false
+	if nParams >= 7 && !t.Local(7).IsNil() {
+		lRest = t.Local(7).AsBool()
+	}
+	if !lRest && (nParams < 6 || t.Local(6).IsNil()) {
+		srcArea.GoTop()
+	}
+
+	// Create + open the destination. Use a temp alias so we don't
+	// clash with whatever the caller may have open under a name
+	// matching the file's basename.
+	drv, err := hbrdd.GetDriver("DBFNTX")
+	if err != nil {
+		t.RetBool(false)
+		return
+	}
+	if _, err := drv.Create(hbrdd.CreateParams{Path: cFile, Fields: dstFields}); err != nil {
+		t.RetBool(false)
+		return
+	}
+	srcSel := wam.CurrentNum()
+	dstSel, err := wam.Open("DBFNTX", cFile, "__copytmp", false, false)
+	if err != nil {
+		t.RetBool(false)
+		return
+	}
+	dstArea := wam.AreaAt(dstSel)
+	wam.SelectByNum(srcSel)
+
+	scanned := 0
+	for !srcArea.EOF() {
+		if nCount >= 0 && scanned >= nCount {
+			break
+		}
+		if bWhile.IsBlock() {
+			t.PendingParams2(0)
+			bWhile.AsBlock().Fn(t)
+			if !t.GetRetValue().AsBool() {
+				break
+			}
+		}
+		emit := true
+		if bFor.IsBlock() {
+			t.PendingParams2(0)
+			bFor.AsBlock().Fn(t)
+			emit = t.GetRetValue().AsBool()
+		}
+		if emit {
+			vals := make([]hbrt.Value, len(srcIdx))
+			for i, idx := range srcIdx {
+				v, _ := srcArea.GetValue(idx)
+				vals[i] = v
+			}
+			wam.SelectByNum(dstSel)
+			dstArea.Append()
+			for i, v := range vals {
+				dstArea.PutValue(i, v)
+			}
+			wam.SelectByNum(srcSel)
+		}
+		srcArea.Skip(1)
+		scanned++
+	}
+
+	// Close the destination, leaving the source selected as on entry.
+	wam.SelectByNum(dstSel)
+	wam.Close()
+	wam.SelectByNum(srcSel)
+	t.RetBool(true)
+}
+
 // --- DBSETFILTER / DBCLEARFILTER / DBFILTER ---
 
 // DBSETFILTER(bCondition [, cCondition])
diff --git a/hbrtl/register.go b/hbrtl/register.go
index 0cef4bb..8eefdf1 100644
--- a/hbrtl/register.go
+++ b/hbrtl/register.go
@@ -199,6 +199,7 @@ func RegisterRTL(vm *hbrt.VM) {
 		hbrt.Sym("__DBLOCATE", hbrt.FsPublic, rtlDbLocate),
 		hbrt.Sym("__DBCONTINUE", hbrt.FsPublic, rtlDbContinue),
 		hbrt.Sym("__DBAVERAGE", hbrt.FsPublic, rtlDbAverage),
+		hbrt.Sym("__DBCOPY", hbrt.FsPublic, rtlDbCopy),
 		hbrt.Sym("DBSETFILTER", hbrt.FsPublic, rtlDbSetFilter),
 		hbrt.Sym("DBCLEARFILTER", hbrt.FsPublic, rtlDbClearFilter),
 		hbrt.Sym("DBFILTER", hbrt.FsPublic, rtlDbFilter),