feat(pp): COPY TO via std.ch + four PP completeness fixes

`COPY TO <file> [FIELDS <list>] [FOR ...] [WHILE ...] [NEXT ...]
[RECORD ...] [REST] [ALL]` reaches the parser as a plain function
call to a new RTL primitive __dbCopy (rtlDbCopy in hbrtl/database.go).

Implementation: project the field list (case-insensitive name match
against the source's structure, full copy when omitted), dbCreate the
target file with that struct, open it under a temp alias, walk the
source under dbEval-style FOR/WHILE/NEXT/RECORD/REST bounds, and
GetValue/Append/PutValue per record into the target. SDF / DELIMITED
variants stay parser no-ops until those backends arrive.

Wiring up COPY surfaced four longstanding gaps in the PP that had to
be fixed for the rule to even reach the runtime:

  * `<(name)>` *pattern* marker was treated as a regular `<name>`
    with the parens baked into the captured key, so the matching
    result substitution `<(name)>` couldn't find it. parseOneMarker
    now strips the parens at parse time so capture key and result
    marker share the bare name. The smart-stringify result behavior
    is unchanged.
  * matchSegment (the optional-clause matcher) bailed on every
    non-Regular marker. `[FIELDS <fields,...>]` therefore failed to
    match at all and the fields list arrived empty in the result
    template. matchSegment now handles MarkerList with paren-balanced
    capture and segment+outer literal stop boundaries.
  * captureExpression only used the first literal in the pattern
    tail as a stop boundary. With std.ch's chain of optional
    clauses (`[TO <(f)>] [FIELDS ...] [FOR ...] [WHILE ...] ...`)
    the file-name marker was happy to gobble a trailing FOR clause
    when FIELDS was absent. It now stops at *any* of the remaining
    pattern literals.
  * `<(name)>` smart-stringify on a list-typed capture wrapped the
    whole comma-joined string in one set of quotes — `{ "a , b" }` —
    instead of `{ "a", "b" }`. New helper quoteListElements splits on
    top-level commas (paren / bracket / brace / string-balanced) and
    quotes each element. applyResult now consults the rule's marker
    table to know which captures came from `<name,...>`.

Parser cleanup: COPY removed from the IDENT-statement no-op switch in
both parseIdentStmt and parseExprStmt.

Gates green:
  go test ./...      : PASS
  FiveSql2 SQL:1999  : 43/43
  Harbour compat     : 56/56

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-04-30 15:00:18 +09:00
parent c2e7f7ea27
commit e961660f61
5 changed files with 357 additions and 23 deletions

View File

@@ -159,6 +159,16 @@ func parseOneMarker(inner string) Marker {
return Marker{Name: name, Type: MarkerWordList, ListValues: vals}
}
// <(name)> — extended-expression marker. In Harbour PP this captures
// a file-name-like extended expression and the matching result token
// `<(name)>` smart-stringifies it (already-quoted → keep, identifier
// → quote). Strip the parens so captures are stored under the bare
// name; result substitution then matches both `<(name)>` and `<name>`
// via the existing path.
if strings.HasPrefix(inner, "(") && strings.HasSuffix(inner, ")") {
return Marker{Name: inner[1 : len(inner)-1], Type: MarkerRegular}
}
// <name> — regular
return Marker{Name: inner, Type: MarkerRegular}
}
@@ -384,9 +394,9 @@ func (r *Rule) matchPattern(line string) map[string]string {
// contain nested `[...]` — callers of the optional-repeat logic
// flatten one level at a time.
//
// A "mini-matcher" that mirrors the main loop for MarkerRegular and
// literal keywords. MarkerList and MarkerWild inside `[...]` would
// need additional plumbing; defer those until real patterns need them.
// A "mini-matcher" that mirrors the main loop for MarkerRegular,
// MarkerRestricted, and MarkerList plus literal keywords. MarkerWild
// inside `[...]` is rare and still defers to the main matcher.
func matchSegment(segment, lineWords []string, startLi int, caseSens bool, outerTail []string) (map[string]string, int, bool) {
caps := make(map[string]string)
li := startLi
@@ -409,21 +419,70 @@ func matchSegment(segment, lineWords []string, startLi int, caseSens bool, outer
if strings.HasPrefix(pw, "<") && strings.HasSuffix(pw, ">") {
inner := pw[1 : len(pw)-1]
m := parseOneMarker(inner)
if m.Type != MarkerRegular && m.Type != MarkerRestricted {
switch m.Type {
case MarkerList:
// Capture comma-separated tokens until we hit the
// segment's next literal, an outer literal, or the end
// of the line. Paren-balanced so `f(a,b)` inside the
// list doesn't terminate prematurely. Mirrors the main
// matchPattern's MarkerList branch.
stop := map[string]struct{}{}
for _, w := range segment[pi+1:] {
if w != "" && w != "[" && w != "]" &&
!(strings.HasPrefix(w, "<") && strings.HasSuffix(w, ">")) {
stop[strings.ToUpper(w)] = struct{}{}
}
}
for _, w := range outerTail {
if w != "" && w != "[" && w != "]" &&
!(strings.HasPrefix(w, "<") && strings.HasSuffix(w, ">")) {
stop[strings.ToUpper(w)] = struct{}{}
}
}
var parts []string
depth := 0
for li < len(lineWords) {
w := lineWords[li]
if depth == 0 {
key := w
if !caseSens {
key = strings.ToUpper(w)
}
if _, hit := stop[key]; hit {
break
}
}
switch w {
case "(", "[", "{":
depth++
case ")", "]", "}":
if depth > 0 {
depth--
}
}
parts = append(parts, w)
li++
}
caps[m.Name] = strings.Join(parts, " ")
continue
case MarkerRegular, MarkerRestricted:
// fall through to capture-one-expression below
default:
return nil, startLi, false
}
// Build a pseudo-pattern tail so captureExpression picks the
// right delimiter. Priority:
// 1. Next literal inside the same segment.
// 2. First literal in the outer-pattern tail — this is what
// stops `[TO <v>] [FOR <for>]` from letting `<v>` swallow
// the FOR clause.
// right delimiters. Priority:
// 1. Next literals inside the same segment.
// 2. Every literal in the outer-pattern tail — this is
// what stops `[TO <(f)>] [FIELDS ...] [FOR ...]` from
// letting `<(f)>` swallow a trailing FOR/WHILE/NEXT
// clause that happened to be present.
// 3. Repeat boundary (the segment's leading literal) so a
// multi-iteration capture stops before the next iter.
tail := segment[pi+1:]
if !hasLiteralAfter(tail) {
if outerLit := firstLiteral(outerTail); outerLit != "" {
tail = []string{outerLit}
if hasLiteralAfter(outerTail) {
tail = outerTail
} else if repeatBoundary != "" {
tail = []string{repeatBoundary}
}
@@ -472,6 +531,72 @@ func hasLiteralAfter(segment []string) bool {
return false
}
// quoteListElements smart-stringifies a list-style capture: split val
// on top-level commas (paren / bracket / brace balanced) and emit each
// element quoted. Already-quoted elements are kept as-is so a literal
// like `"a", "b"` round-trips intact. Used by `<(name)>` substitution
// when `name` came from a `<name,...>` marker — Harbour's std.ch idiom
// for `{ <(fields)> }` to expand to `{ "a", "b", "c" }`.
func quoteListElements(val string) string {
parts := splitTopLevelCommas(val)
if len(parts) == 0 {
return ""
}
out := make([]string, 0, len(parts))
for _, p := range parts {
t := strings.TrimSpace(p)
if t == "" {
continue
}
// Already a string literal — keep verbatim.
if n := len(t); n >= 2 &&
((t[0] == '"' && t[n-1] == '"') ||
(t[0] == '\'' && t[n-1] == '\'') ||
(t[0] == '[' && t[n-1] == ']')) {
out = append(out, t)
continue
}
out = append(out, ppQuote(t))
}
return strings.Join(out, ", ")
}
// splitTopLevelCommas splits s on commas that are not nested inside
// (), [], or {}. Strings ("..." / '...') are skipped to avoid breaking
// captured PRG expressions.
func splitTopLevelCommas(s string) []string {
var parts []string
depth := 0
start := 0
inStr := byte(0)
for i := 0; i < len(s); i++ {
c := s[i]
if inStr != 0 {
if c == inStr {
inStr = 0
}
continue
}
switch c {
case '"', '\'':
inStr = c
case '(', '[', '{':
depth++
case ')', ']', '}':
if depth > 0 {
depth--
}
case ',':
if depth == 0 {
parts = append(parts, s[start:i])
start = i + 1
}
}
}
parts = append(parts, s[start:])
return parts
}
// ppQuote wraps a captured value in a PRG string literal, picking a
// delimiter that doesn't collide with characters already inside. Harbour
// #<name> stringify takes the raw source text of the argument and must
@@ -510,6 +635,16 @@ func (r *Rule) applyResult(captures map[string]string) string {
// captures apply (the required-or-absent case).
result = expandOptionalRepeat(result, captures)
// Marker-name → list flag, so the smart-stringify branch below can
// emit per-element quoting (`{ "a", "b" }`) for list captures
// instead of treating the comma-joined string as one literal.
isList := make(map[string]bool, len(r.Markers))
for _, m := range r.Markers {
if m.Type == MarkerList {
isList[m.Name] = true
}
}
for name, val := range captures {
// Multi-capture markers are consumed by expandOptionalRepeat;
// the bare substitution for the joined form would produce
@@ -524,8 +659,9 @@ func (r *Rule) applyResult(captures map[string]string) string {
// <"name"> — explicit stringify.
result = strings.ReplaceAll(result, `<"`+name+`">`, quoted)
// <(name)> — smart stringify: already a string literal → keep;
// otherwise quote. `val` comes straight from the capture, so
// trim and check for surrounding quotes.
// list capture → quote each comma-separated element; otherwise
// quote whole. `val` comes straight from the capture, so trim
// and check for surrounding quotes.
trim := strings.TrimSpace(val)
smart := quoted
if n := len(trim); n >= 2 &&
@@ -533,6 +669,8 @@ func (r *Rule) applyResult(captures map[string]string) string {
(trim[0] == '\'' && trim[n-1] == '\'') ||
(trim[0] == '[' && trim[n-1] == ']')) {
smart = trim
} else if isList[name] {
smart = quoteListElements(val)
}
result = strings.ReplaceAll(result, "<("+name+")>", smart)
// <.name.> — logify (empty → .F., else .T.)
@@ -963,18 +1101,28 @@ func captureExpression(lineWords []string, li *int, patternWords []string, nextP
return ""
}
// Find next literal keyword in pattern to use as delimiter
delimWord := ""
// Collect every literal-keyword delimiter that follows in the
// pattern, not just the first. Optional clauses in std.ch sit
// next to one another (`[TO <(f)>] [FIELDS <fields,...>]
// [FOR <for>] [WHILE <while>] ...`), so the file-name marker
// must stop at TO's *successor* — but we don't know which
// successor will actually be present in the input. Stopping on
// any of them keeps `<(f)>` from swallowing a trailing
// `FOR x > 5` clause.
var delims []string
for pi := nextPi; pi < len(patternWords); pi++ {
pw := patternWords[pi]
if !strings.HasPrefix(pw, "<") && pw != "[" && pw != "]" {
delimWord = pw
break
if pw == "" || pw == "[" || pw == "]" {
continue
}
if strings.HasPrefix(pw, "<") && strings.HasSuffix(pw, ">") {
continue
}
delims = append(delims, pw)
}
if delimWord != "" {
// Capture until the delimiter, paren-balancing so nested
if len(delims) > 0 {
// Capture until any delimiter is hit, paren-balancing so nested
// parens/brackets/braces inside the expression don't falsely
// terminate the capture. Harbour's own PP does the same —
// `_REGULAR_(&(a))` must capture `&(a)` (incl. inner parens)
@@ -983,8 +1131,17 @@ func captureExpression(lineWords []string, li *int, patternWords []string, nextP
depth := 0
for *li < len(lineWords) {
w := lineWords[*li]
if depth == 0 && matchWord(w, delimWord, caseSens) {
break
if depth == 0 {
stop := false
for _, d := range delims {
if matchWord(w, d, caseSens) {
stop = true
break
}
}
if stop {
break
}
}
switch w {
case "(", "[", "{":