From e961660f6135fdcd83fb73bf0de16ad57d7ad1e6 Mon Sep 17 00:00:00 2001 From: CharlesKWON Date: Thu, 30 Apr 2026 15:00:18 +0900 Subject: [PATCH] feat(pp): COPY TO via std.ch + four PP completeness fixes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `COPY TO [FIELDS ] [FOR ...] [WHILE ...] [NEXT ...] [RECORD ...] [REST] [ALL]` reaches the parser as a plain function call to a new RTL primitive __dbCopy (rtlDbCopy in hbrtl/database.go). Implementation: project the field list (case-insensitive name match against the source's structure, full copy when omitted), dbCreate the target file with that struct, open it under a temp alias, walk the source under dbEval-style FOR/WHILE/NEXT/RECORD/REST bounds, and GetValue/Append/PutValue per record into the target. SDF / DELIMITED variants stay parser no-ops until those backends arrive. Wiring up COPY surfaced four longstanding gaps in the PP that had to be fixed for the rule to even reach the runtime: * `<(name)>` *pattern* marker was treated as a regular `` with the parens baked into the captured key, so the matching result substitution `<(name)>` couldn't find it. parseOneMarker now strips the parens at parse time so capture key and result marker share the bare name. The smart-stringify result behavior is unchanged. * matchSegment (the optional-clause matcher) bailed on every non-Regular marker. `[FIELDS ]` therefore failed to match at all and the fields list arrived empty in the result template. matchSegment now handles MarkerList with paren-balanced capture and segment+outer literal stop boundaries. * captureExpression only used the first literal in the pattern tail as a stop boundary. With std.ch's chain of optional clauses (`[TO <(f)>] [FIELDS ...] [FOR ...] [WHILE ...] ...`) the file-name marker was happy to gobble a trailing FOR clause when FIELDS was absent. It now stops at *any* of the remaining pattern literals. * `<(name)>` smart-stringify on a list-typed capture wrapped the whole comma-joined string in one set of quotes — `{ "a , b" }` — instead of `{ "a", "b" }`. New helper quoteListElements splits on top-level commas (paren / bracket / brace / string-balanced) and quotes each element. applyResult now consults the rule's marker table to know which captures came from ``. Parser cleanup: COPY removed from the IDENT-statement no-op switch in both parseIdentStmt and parseExprStmt. Gates green: go test ./... : PASS FiveSql2 SQL:1999 : 43/43 Harbour compat : 56/56 Co-Authored-By: Claude Opus 4.7 (1M context) --- compiler/parser/parser.go | 2 +- compiler/pp/command.go | 201 +++++++++++++++++++++++++++++++++----- compiler/pp/std.ch | 11 +++ hbrtl/database.go | 165 +++++++++++++++++++++++++++++++ hbrtl/register.go | 1 + 5 files changed, 357 insertions(+), 23 deletions(-) diff --git a/compiler/parser/parser.go b/compiler/parser/parser.go index ccec679..27b482f 100644 --- a/compiler/parser/parser.go +++ b/compiler/parser/parser.go @@ -1155,7 +1155,7 @@ func (p *Parser) parseIdentStmt() ast.Stmt { // rewritten by compiler/pp/std.ch into function calls before the // parser sees them. switch upper { - case "COPY", "SORT", "TOTAL", "UPDATE", + case "SORT", "TOTAL", "UPDATE", "LABEL", "REPORT", "ACCEPT", "INPUT", "JOIN", "RELEASE", "SAVE", "RESTORE", "DIR", "STORE", "NOTE", "TEXT", "ENDTEXT", diff --git a/compiler/pp/command.go b/compiler/pp/command.go index ce2c177..0f30120 100644 --- a/compiler/pp/command.go +++ b/compiler/pp/command.go @@ -159,6 +159,16 @@ func parseOneMarker(inner string) Marker { return Marker{Name: name, Type: MarkerWordList, ListValues: vals} } + // <(name)> — extended-expression marker. In Harbour PP this captures + // a file-name-like extended expression and the matching result token + // `<(name)>` smart-stringifies it (already-quoted → keep, identifier + // → quote). Strip the parens so captures are stored under the bare + // name; result substitution then matches both `<(name)>` and `` + // via the existing path. + if strings.HasPrefix(inner, "(") && strings.HasSuffix(inner, ")") { + return Marker{Name: inner[1 : len(inner)-1], Type: MarkerRegular} + } + // — regular return Marker{Name: inner, Type: MarkerRegular} } @@ -384,9 +394,9 @@ func (r *Rule) matchPattern(line string) map[string]string { // contain nested `[...]` — callers of the optional-repeat logic // flatten one level at a time. // -// A "mini-matcher" that mirrors the main loop for MarkerRegular and -// literal keywords. MarkerList and MarkerWild inside `[...]` would -// need additional plumbing; defer those until real patterns need them. +// A "mini-matcher" that mirrors the main loop for MarkerRegular, +// MarkerRestricted, and MarkerList plus literal keywords. MarkerWild +// inside `[...]` is rare and still defers to the main matcher. func matchSegment(segment, lineWords []string, startLi int, caseSens bool, outerTail []string) (map[string]string, int, bool) { caps := make(map[string]string) li := startLi @@ -409,21 +419,70 @@ func matchSegment(segment, lineWords []string, startLi int, caseSens bool, outer if strings.HasPrefix(pw, "<") && strings.HasSuffix(pw, ">") { inner := pw[1 : len(pw)-1] m := parseOneMarker(inner) - if m.Type != MarkerRegular && m.Type != MarkerRestricted { + switch m.Type { + case MarkerList: + // Capture comma-separated tokens until we hit the + // segment's next literal, an outer literal, or the end + // of the line. Paren-balanced so `f(a,b)` inside the + // list doesn't terminate prematurely. Mirrors the main + // matchPattern's MarkerList branch. + stop := map[string]struct{}{} + for _, w := range segment[pi+1:] { + if w != "" && w != "[" && w != "]" && + !(strings.HasPrefix(w, "<") && strings.HasSuffix(w, ">")) { + stop[strings.ToUpper(w)] = struct{}{} + } + } + for _, w := range outerTail { + if w != "" && w != "[" && w != "]" && + !(strings.HasPrefix(w, "<") && strings.HasSuffix(w, ">")) { + stop[strings.ToUpper(w)] = struct{}{} + } + } + var parts []string + depth := 0 + for li < len(lineWords) { + w := lineWords[li] + if depth == 0 { + key := w + if !caseSens { + key = strings.ToUpper(w) + } + if _, hit := stop[key]; hit { + break + } + } + switch w { + case "(", "[", "{": + depth++ + case ")", "]", "}": + if depth > 0 { + depth-- + } + } + parts = append(parts, w) + li++ + } + caps[m.Name] = strings.Join(parts, " ") + continue + case MarkerRegular, MarkerRestricted: + // fall through to capture-one-expression below + default: return nil, startLi, false } // Build a pseudo-pattern tail so captureExpression picks the - // right delimiter. Priority: - // 1. Next literal inside the same segment. - // 2. First literal in the outer-pattern tail — this is what - // stops `[TO ] [FOR ]` from letting `` swallow - // the FOR clause. + // right delimiters. Priority: + // 1. Next literals inside the same segment. + // 2. Every literal in the outer-pattern tail — this is + // what stops `[TO <(f)>] [FIELDS ...] [FOR ...]` from + // letting `<(f)>` swallow a trailing FOR/WHILE/NEXT + // clause that happened to be present. // 3. Repeat boundary (the segment's leading literal) so a // multi-iteration capture stops before the next iter. tail := segment[pi+1:] if !hasLiteralAfter(tail) { - if outerLit := firstLiteral(outerTail); outerLit != "" { - tail = []string{outerLit} + if hasLiteralAfter(outerTail) { + tail = outerTail } else if repeatBoundary != "" { tail = []string{repeatBoundary} } @@ -472,6 +531,72 @@ func hasLiteralAfter(segment []string) bool { return false } +// quoteListElements smart-stringifies a list-style capture: split val +// on top-level commas (paren / bracket / brace balanced) and emit each +// element quoted. Already-quoted elements are kept as-is so a literal +// like `"a", "b"` round-trips intact. Used by `<(name)>` substitution +// when `name` came from a `` marker — Harbour's std.ch idiom +// for `{ <(fields)> }` to expand to `{ "a", "b", "c" }`. +func quoteListElements(val string) string { + parts := splitTopLevelCommas(val) + if len(parts) == 0 { + return "" + } + out := make([]string, 0, len(parts)) + for _, p := range parts { + t := strings.TrimSpace(p) + if t == "" { + continue + } + // Already a string literal — keep verbatim. + if n := len(t); n >= 2 && + ((t[0] == '"' && t[n-1] == '"') || + (t[0] == '\'' && t[n-1] == '\'') || + (t[0] == '[' && t[n-1] == ']')) { + out = append(out, t) + continue + } + out = append(out, ppQuote(t)) + } + return strings.Join(out, ", ") +} + +// splitTopLevelCommas splits s on commas that are not nested inside +// (), [], or {}. Strings ("..." / '...') are skipped to avoid breaking +// captured PRG expressions. +func splitTopLevelCommas(s string) []string { + var parts []string + depth := 0 + start := 0 + inStr := byte(0) + for i := 0; i < len(s); i++ { + c := s[i] + if inStr != 0 { + if c == inStr { + inStr = 0 + } + continue + } + switch c { + case '"', '\'': + inStr = c + case '(', '[', '{': + depth++ + case ')', ']', '}': + if depth > 0 { + depth-- + } + case ',': + if depth == 0 { + parts = append(parts, s[start:i]) + start = i + 1 + } + } + } + parts = append(parts, s[start:]) + return parts +} + // ppQuote wraps a captured value in a PRG string literal, picking a // delimiter that doesn't collide with characters already inside. Harbour // # stringify takes the raw source text of the argument and must @@ -510,6 +635,16 @@ func (r *Rule) applyResult(captures map[string]string) string { // captures apply (the required-or-absent case). result = expandOptionalRepeat(result, captures) + // Marker-name → list flag, so the smart-stringify branch below can + // emit per-element quoting (`{ "a", "b" }`) for list captures + // instead of treating the comma-joined string as one literal. + isList := make(map[string]bool, len(r.Markers)) + for _, m := range r.Markers { + if m.Type == MarkerList { + isList[m.Name] = true + } + } + for name, val := range captures { // Multi-capture markers are consumed by expandOptionalRepeat; // the bare substitution for the joined form would produce @@ -524,8 +659,9 @@ func (r *Rule) applyResult(captures map[string]string) string { // <"name"> — explicit stringify. result = strings.ReplaceAll(result, `<"`+name+`">`, quoted) // <(name)> — smart stringify: already a string literal → keep; - // otherwise quote. `val` comes straight from the capture, so - // trim and check for surrounding quotes. + // list capture → quote each comma-separated element; otherwise + // quote whole. `val` comes straight from the capture, so trim + // and check for surrounding quotes. trim := strings.TrimSpace(val) smart := quoted if n := len(trim); n >= 2 && @@ -533,6 +669,8 @@ func (r *Rule) applyResult(captures map[string]string) string { (trim[0] == '\'' && trim[n-1] == '\'') || (trim[0] == '[' && trim[n-1] == ']')) { smart = trim + } else if isList[name] { + smart = quoteListElements(val) } result = strings.ReplaceAll(result, "<("+name+")>", smart) // <.name.> — logify (empty → .F., else .T.) @@ -963,18 +1101,28 @@ func captureExpression(lineWords []string, li *int, patternWords []string, nextP return "" } - // Find next literal keyword in pattern to use as delimiter - delimWord := "" + // Collect every literal-keyword delimiter that follows in the + // pattern, not just the first. Optional clauses in std.ch sit + // next to one another (`[TO <(f)>] [FIELDS ] + // [FOR ] [WHILE ] ...`), so the file-name marker + // must stop at TO's *successor* — but we don't know which + // successor will actually be present in the input. Stopping on + // any of them keeps `<(f)>` from swallowing a trailing + // `FOR x > 5` clause. + var delims []string for pi := nextPi; pi < len(patternWords); pi++ { pw := patternWords[pi] - if !strings.HasPrefix(pw, "<") && pw != "[" && pw != "]" { - delimWord = pw - break + if pw == "" || pw == "[" || pw == "]" { + continue } + if strings.HasPrefix(pw, "<") && strings.HasSuffix(pw, ">") { + continue + } + delims = append(delims, pw) } - if delimWord != "" { - // Capture until the delimiter, paren-balancing so nested + if len(delims) > 0 { + // Capture until any delimiter is hit, paren-balancing so nested // parens/brackets/braces inside the expression don't falsely // terminate the capture. Harbour's own PP does the same — // `_REGULAR_(&(a))` must capture `&(a)` (incl. inner parens) @@ -983,8 +1131,17 @@ func captureExpression(lineWords []string, li *int, patternWords []string, nextP depth := 0 for *li < len(lineWords) { w := lineWords[*li] - if depth == 0 && matchWord(w, delimWord, caseSens) { - break + if depth == 0 { + stop := false + for _, d := range delims { + if matchWord(w, d, caseSens) { + stop = true + break + } + } + if stop { + break + } } switch w { case "(", "[", "{": diff --git a/compiler/pp/std.ch b/compiler/pp/std.ch index 52f30d0..f5ccd01 100644 --- a/compiler/pp/std.ch +++ b/compiler/pp/std.ch @@ -62,6 +62,17 @@ := __dbAverage( <{x}>, ; <{for}>, <{while}>, , , <.rest.> ) +/* --- bulk record export --- + COPY TO copies visible records of the current workarea into a fresh + DBF. FIELDS/FOR/WHILE/NEXT/RECORD/REST work as in Harbour. SDF and + DELIMITED variants stay as silent no-ops in the parser until their + backends land. */ +#command COPY [TO <(f)>] [FIELDS ] ; + [FOR ] [WHILE ] [NEXT ] ; + [RECORD ] [] [ALL] => ; + __dbCopy( <(f)>, { <(fields)> }, ; + <{for}>, <{while}>, , , <.rest.> ) + /* --- bulk maintenance --- */ #command REINDEX => DbReindex() #command PACK => DbPack() diff --git a/hbrtl/database.go b/hbrtl/database.go index dce125d..631089a 100644 --- a/hbrtl/database.go +++ b/hbrtl/database.go @@ -8,6 +8,8 @@ package hbrtl import ( + "strings" + "five/hbrt" "five/hbrdd" "five/hbrdd/dbf" @@ -766,6 +768,169 @@ func rtlDbAverage(t *hbrt.Thread) { t.RetDouble(sum/float64(n), 10, 2) } +// rtlDbCopy implements __dbCopy(cFile, aFields, bFor, bWhile, nNext, +// xRec, lRest) — copy visible records from the current workarea into a +// freshly created DBF. Field projection: an empty/missing aFields +// copies the whole structure; otherwise only fields whose names match +// (case-insensitive) are carried over. Used by `COPY TO [FIELDS] +// [FOR] [WHILE] [NEXT] [RECORD] [REST] [ALL]` in std.ch. +// +// Harbour's __dbCopy also accepts cRDD / nConnection / cCodepage / xDelim +// (params 8..11). Five only supports DBFNTX→DBFNTX for now; SDF/DELIMITED +// copies stay parser no-ops until that backend lands. +func rtlDbCopy(t *hbrt.Thread) { + nParams := t.ParamCount() + t.Frame(nParams, 0) + defer t.EndProcFast() + + wam := getWA(t) + if wam == nil { + t.RetBool(false) + return + } + srcArea := wam.Current() + if srcArea == nil { + t.RetBool(false) + return + } + + if nParams < 1 || t.Local(1).IsNil() { + t.RetBool(false) + return + } + cFile := t.Local(1).AsString() + if cFile == "" { + t.RetBool(false) + return + } + + // Field projection. Harbour passes `{ <(fields)> }` so each entry + // is a string literal already; uppercase for case-insensitive + // matching against the source's field names. + var srcIdx []int + var dstFields []hbrdd.FieldInfo + nSrcFields := srcArea.FieldCount() + useAll := true + if nParams >= 2 && t.Local(2).IsArray() { + arr := t.Local(2).AsArray() + if arr != nil && len(arr.Items) > 0 { + useAll = false + wanted := make(map[string]struct{}, len(arr.Items)) + for _, it := range arr.Items { + s := strings.ToUpper(strings.TrimSpace(it.AsString())) + if s != "" { + wanted[s] = struct{}{} + } + } + for i := 0; i < nSrcFields; i++ { + fi := srcArea.GetFieldInfo(i) + if _, ok := wanted[strings.ToUpper(fi.Name)]; ok { + srcIdx = append(srcIdx, i) + dstFields = append(dstFields, fi) + } + } + } + } + if useAll { + srcIdx = make([]int, nSrcFields) + dstFields = make([]hbrdd.FieldInfo, nSrcFields) + for i := 0; i < nSrcFields; i++ { + srcIdx[i] = i + dstFields[i] = srcArea.GetFieldInfo(i) + } + } + if len(dstFields) == 0 { + // Nothing to copy — empty FIELDS list with no matches. + t.RetBool(false) + return + } + + // Loop bounds — same shape as dbEval. + var bFor, bWhile hbrt.Value + if nParams >= 3 { + bFor = t.Local(3) + } + if nParams >= 4 { + bWhile = t.Local(4) + } + nCount := -1 + if nParams >= 5 && !t.Local(5).IsNil() { + nCount = t.Local(5).AsInt() + } + if nParams >= 6 && !t.Local(6).IsNil() { + srcArea.GoTo(uint32(t.Local(6).AsInt())) + } + lRest := false + if nParams >= 7 && !t.Local(7).IsNil() { + lRest = t.Local(7).AsBool() + } + if !lRest && (nParams < 6 || t.Local(6).IsNil()) { + srcArea.GoTop() + } + + // Create + open the destination. Use a temp alias so we don't + // clash with whatever the caller may have open under a name + // matching the file's basename. + drv, err := hbrdd.GetDriver("DBFNTX") + if err != nil { + t.RetBool(false) + return + } + if _, err := drv.Create(hbrdd.CreateParams{Path: cFile, Fields: dstFields}); err != nil { + t.RetBool(false) + return + } + srcSel := wam.CurrentNum() + dstSel, err := wam.Open("DBFNTX", cFile, "__copytmp", false, false) + if err != nil { + t.RetBool(false) + return + } + dstArea := wam.AreaAt(dstSel) + wam.SelectByNum(srcSel) + + scanned := 0 + for !srcArea.EOF() { + if nCount >= 0 && scanned >= nCount { + break + } + if bWhile.IsBlock() { + t.PendingParams2(0) + bWhile.AsBlock().Fn(t) + if !t.GetRetValue().AsBool() { + break + } + } + emit := true + if bFor.IsBlock() { + t.PendingParams2(0) + bFor.AsBlock().Fn(t) + emit = t.GetRetValue().AsBool() + } + if emit { + vals := make([]hbrt.Value, len(srcIdx)) + for i, idx := range srcIdx { + v, _ := srcArea.GetValue(idx) + vals[i] = v + } + wam.SelectByNum(dstSel) + dstArea.Append() + for i, v := range vals { + dstArea.PutValue(i, v) + } + wam.SelectByNum(srcSel) + } + srcArea.Skip(1) + scanned++ + } + + // Close the destination, leaving the source selected as on entry. + wam.SelectByNum(dstSel) + wam.Close() + wam.SelectByNum(srcSel) + t.RetBool(true) +} + // --- DBSETFILTER / DBCLEARFILTER / DBFILTER --- // DBSETFILTER(bCondition [, cCondition]) diff --git a/hbrtl/register.go b/hbrtl/register.go index 0cef4bb..8eefdf1 100644 --- a/hbrtl/register.go +++ b/hbrtl/register.go @@ -199,6 +199,7 @@ func RegisterRTL(vm *hbrt.VM) { hbrt.Sym("__DBLOCATE", hbrt.FsPublic, rtlDbLocate), hbrt.Sym("__DBCONTINUE", hbrt.FsPublic, rtlDbContinue), hbrt.Sym("__DBAVERAGE", hbrt.FsPublic, rtlDbAverage), + hbrt.Sym("__DBCOPY", hbrt.FsPublic, rtlDbCopy), hbrt.Sym("DBSETFILTER", hbrt.FsPublic, rtlDbSetFilter), hbrt.Sym("DBCLEARFILTER", hbrt.FsPublic, rtlDbClearFilter), hbrt.Sym("DBFILTER", hbrt.FsPublic, rtlDbFilter),