feat(pp): LIST / DISPLAY via std.ch + four PP completeness fixes

`LIST [<fields>] [OFF] [FOR ...] [WHILE ...] [NEXT ...] [RECORD ...] [REST] [ALL]` and `DISPLAY [<fields>] [OFF] [FOR ...] ... [ALL]` reach the parser as plain function calls to a new RTL primitive __dbList (rtlDbList in hbrtl/database.go). Implementation: walk the workarea under dbEval-style FOR/WHILE/NEXT/ RECORD/REST bounds. For each visible record, evaluate each column block and emit the rendered values via valueToDisplay (the same formatter QOut already uses). Empty fields list defaults to "all fields". OFF suppresses the record-number prefix. LIST always emits the full filtered range; DISPLAY without ALL emits only the current record (encoded as nCount=1). TO PRINTER / TO FILE clauses are not yet wired through — for now everything goes to stdout. Wiring up LIST/DISPLAY surfaced four further gaps in PP that were silently masking bugs in any rule with multiple word-list / list / optional clauses chained together: * matchSegment refused MarkerWordList inside `[...]`. The LIST rule's `[<off:OFF>]` clause therefore never set the off capture, and `<.off.>` substituted to nothing instead of .T./.F. matchSegment now matches WordList markers the same way the top-level matcher does. * `<v,...>` and `<(f)>` capture stop boundaries didn't include the values of following MarkerWordList markers. For `[<v,...>] [<off:OFF>] [<all:ALL>]` against `LIST id, name OFF`, the v list would happily eat OFF. New addStopFrom helper contributes both literal keywords and word-list values; both matchSegment's MarkerList branch and captureExpression now use it. * Optional-repeat loop in matchPattern merged a no-progress iteration's empty capture into the running multi-capture string (with the `\x01` separator) before the no-progress break check fired. So a successful first iteration's value got contaminated and the substitution loop then skipped it as multi-capture garbage. The merge now happens after the progress check. * Unreferenced `<.name.>` markers (optional clauses that didn't match in the input) were getting cleaned up to empty by the generic marker scrubber instead of the .F. sentinel Harbour's std.ch expects. New replaceUnreferencedLogify pass mirrors the existing replaceUnreferencedBlockify and runs just before the cleanup. Parser cleanup: LIST and DISPLAY removed from the IDENT-statement no-op switch in both parseIdentStmt and parseExprStmt. Gates green: go test ./... : PASS FiveSql2 SQL:1999 : 43/43 Harbour compat : 56/56 Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-30 15:19:36 +09:00
parent 6dbc34b34b
commit 1cc2d94927
5 changed files with 254 additions and 30 deletions
--- a/compiler/parser/parser.go
+++ b/compiler/parser/parser.go
@@ -1159,7 +1159,7 @@ func (p *Parser) parseIdentStmt() ast.Stmt {
 		"LABEL", "REPORT", "ACCEPT", "INPUT",
 		"JOIN", "RELEASE", "SAVE", "RESTORE",
 		"DIR", "STORE", "NOTE", "TEXT", "ENDTEXT",
-		"WITH", "CLEAR", "DISPLAY", "LIST":
+		"WITH", "CLEAR":
 		p.advance()
 		for p.current.Kind != token.NEWLINE && p.current.Kind != token.EOF {
 			p.advance()
--- a/compiler/pp/command.go
+++ b/compiler/pp/command.go
@@ -328,6 +328,16 @@ func (r *Rule) matchPattern(line string) map[string]string {
 					li = snapshotLi
 					break
 				}
+				// No-progress matches can happen when the body is just
+				// a list/regular marker that immediately hits a stop
+				// boundary on this iteration — its captured value is
+				// empty. Don't merge those into captures, otherwise an
+				// earlier successful iteration's value gets contaminated
+				// with the `\x01`-separator form and the result-template
+				// substitution skips it as multi-capture garbage.
+				if newLi == snapshotLi {
+					break
+				}
 				for k, v := range iterCaps {
 					if prev, hit := captures[k]; hit && prev != "" {
 						captures[k] = prev + "\x01" + v
@@ -336,9 +346,6 @@ func (r *Rule) matchPattern(line string) map[string]string {
 					}
 				}
 				li = newLi
-				if li == snapshotLi {
-					break // no progress — avoid infinite loop
-				}
 			}
 			pi = bodyEnd + 1 // past ]
 		} else if pw == "]" {
@@ -420,25 +427,41 @@ func matchSegment(segment, lineWords []string, startLi int, caseSens bool, outer
 			inner := pw[1 : len(pw)-1]
 			m := parseOneMarker(inner)
 			switch m.Type {
+			case MarkerWordList:
+				// Match one of the listed words. If the current line
+				// token isn't in the allowed set, the segment fails to
+				// match — same behavior as the top-level matcher.
+				w := lineWords[li]
+				matched := false
+				for _, allowed := range m.ListValues {
+					if caseSens {
+						if w == allowed {
+							matched = true
+							break
+						}
+					} else if strings.EqualFold(w, allowed) {
+						matched = true
+						break
+					}
+				}
+				if !matched {
+					return nil, startLi, false
+				}
+				caps[m.Name] = w
+				li++
+				continue
 			case MarkerList:
 				// Capture comma-separated tokens until we hit the
-				// segment's next literal, an outer literal, or the end
-				// of the line. Paren-balanced so `f(a,b)` inside the
-				// list doesn't terminate prematurely. Mirrors the main
-				// matchPattern's MarkerList branch.
+				// segment's next literal, an outer literal, or one of
+				// the limited values of a following MarkerWordList
+				// (e.g. `<off:OFF>` — OFF is the only token that can
+				// match it, so the list before it must stop at OFF).
+				// Paren-balanced so `f(a,b)` inside the list doesn't
+				// terminate prematurely. Mirrors the main matchPattern's
+				// MarkerList branch.
 				stop := map[string]struct{}{}
-				for _, w := range segment[pi+1:] {
-					if w != "" && w != "[" && w != "]" &&
-						!(strings.HasPrefix(w, "<") && strings.HasSuffix(w, ">")) {
-						stop[strings.ToUpper(w)] = struct{}{}
-					}
-				}
-				for _, w := range outerTail {
-					if w != "" && w != "[" && w != "]" &&
-						!(strings.HasPrefix(w, "<") && strings.HasSuffix(w, ">")) {
-						stop[strings.ToUpper(w)] = struct{}{}
-					}
-				}
+				addStopFrom(stop, segment[pi+1:])
+				addStopFrom(stop, outerTail)
 				var parts []string
 				depth := 0
 				for li < len(lineWords) {
@@ -499,6 +522,30 @@ func matchSegment(segment, lineWords []string, startLi int, caseSens bool, outer
 	return caps, li, true
 }

+// addStopFrom merges into `stop` every token that could legally match
+// the next position in `pw`: bare literals AND each value of any
+// MarkerWordList (`<name:A,B,C>`) since those markers can match only
+// their listed words. Used so a preceding list/regular capture knows
+// to stop before any of them. Always uppercased — the caller decides
+// whether to do a case-insensitive lookup.
+func addStopFrom(stop map[string]struct{}, pw []string) {
+	for _, w := range pw {
+		if w == "" || w == "[" || w == "]" {
+			continue
+		}
+		if strings.HasPrefix(w, "<") && strings.HasSuffix(w, ">") {
+			inner := w[1 : len(w)-1]
+			if m := parseOneMarker(inner); m.Type == MarkerWordList {
+				for _, v := range m.ListValues {
+					stop[strings.ToUpper(v)] = struct{}{}
+				}
+			}
+			continue
+		}
+		stop[strings.ToUpper(w)] = struct{}{}
+	}
+}
+
 // firstLiteral returns the first non-marker, non-bracket token in pw,
 // or "" if none. Used to give matchSegment a stop-boundary drawn from
 // the outer pattern when its body ends in a regular marker.
@@ -710,12 +757,44 @@ func (r *Rule) applyResult(captures map[string]string) string {
 	// (matches Harbour: empty FOR/WHILE → NIL → bypass the condition).
 	result = replaceUnreferencedBlockify(result)

+	// Same idea for `<.name.>`: a missing marker logifies to .F.,
+	// matching Harbour's behavior of "absent optional clause => .F."
+	// for OFF / ALL / REST / etc.
+	result = replaceUnreferencedLogify(result)
+
 	// Clean up unreferenced markers: <name>, <(name)>, <.name.>, #<name>, <"name">
 	result = cleanUnreferencedMarkers(result)

 	return result
 }

+// replaceUnreferencedLogify rewrites every remaining `<.ident.>` to
+// `.F.` — the absent-optional-clause sentinel that matches Harbour's
+// std.ch convention.
+func replaceUnreferencedLogify(s string) string {
+	var out strings.Builder
+	i := 0
+	for i < len(s) {
+		if i+2 < len(s) && s[i] == '<' && s[i+1] == '.' {
+			j := i + 2
+			if j < len(s) && (s[j] == '_' || (s[j] >= 'a' && s[j] <= 'z') || (s[j] >= 'A' && s[j] <= 'Z')) {
+				j++
+				for j < len(s) && (s[j] == '_' || (s[j] >= 'a' && s[j] <= 'z') || (s[j] >= 'A' && s[j] <= 'Z') || (s[j] >= '0' && s[j] <= '9')) {
+					j++
+				}
+				if j+1 < len(s) && s[j] == '.' && s[j+1] == '>' {
+					out.WriteString(".F.")
+					i = j + 2
+					continue
+				}
+			}
+		}
+		out.WriteByte(s[i])
+		i++
+	}
+	return out.String()
+}
+
 // replaceUnreferencedBlockify rewrites every remaining `<{ident}>` to
 // NIL. Run after the main substitution loop, before the generic
 // unreferenced-marker cleanup.
@@ -1121,17 +1200,14 @@ func captureExpression(lineWords []string, li *int, patternWords []string, nextP
 	// must stop at TO's *successor* — but we don't know which
 	// successor will actually be present in the input. Stopping on
 	// any of them keeps `<(f)>` from swallowing a trailing
-	// `FOR x > 5` clause.
+	// `FOR x > 5` clause. MarkerWordList values count too — a
+	// `<off:OFF>` marker can only match the word OFF, so prior
+	// captures must stop at it.
+	stopSet := map[string]struct{}{}
+	addStopFrom(stopSet, patternWords[nextPi:])
 	var delims []string
-	for pi := nextPi; pi < len(patternWords); pi++ {
-		pw := patternWords[pi]
-		if pw == "" || pw == "[" || pw == "]" {
-			continue
-		}
-		if strings.HasPrefix(pw, "<") && strings.HasSuffix(pw, ">") {
-			continue
-		}
-		delims = append(delims, pw)
+	for k := range stopSet {
+		delims = append(delims, k)
 	}

 	if len(delims) > 0 {
--- a/compiler/pp/std.ch
+++ b/compiler/pp/std.ch
@@ -82,6 +82,23 @@
         __dbSort( <(f)>, { <(fields)> }, ;
                   <{for}>, <{while}>, <next>, <rec>, <.rest.> )

+/* --- console output ---
+   LIST emits every record matching the filter; DISPLAY without ALL
+   shows just the current record. Both share __dbList — lAll
+   distinguishes them. TO PRINTER / TO FILE accepted but unused;
+   stdout is the only sink for now. */
+#command LIST [<v,...>] [<off:OFF>] ;
+              [FOR <for>] [WHILE <while>] [NEXT <next>] ;
+              [RECORD <rec>] [<rest:REST>] [ALL] => ;
+         __dbList( <.off.>, { <{v}> }, .T., ;
+                   <{for}>, <{while}>, <next>, <rec>, <.rest.> )
+
+#command DISPLAY [<v,...>] [<off:OFF>] ;
+                 [FOR <for>] [WHILE <while>] [NEXT <next>] ;
+                 [RECORD <rec>] [<rest:REST>] [<all:ALL>] => ;
+         __dbList( <.off.>, { <{v}> }, <.all.>, ;
+                   <{for}>, <{while}>, <next>, <rec>, <.rest.> )
+
 /* --- bulk maintenance --- */
 #command REINDEX                         => DbReindex()
 #command PACK                            => DbPack()
--- a/hbrtl/database.go
+++ b/hbrtl/database.go
@@ -8,6 +8,7 @@
 package hbrtl

 import (
+	"fmt"
 	"strings"

 	"five/hbrt"
@@ -1117,6 +1118,135 @@ func rtlDbSort(t *hbrt.Thread) {
 	t.RetBool(true)
 }

+// rtlDbList implements __dbList(lOff, aBlocks, lAll, bFor, bWhile,
+// nNext, nRec, lRest, lPrn, cFile) — output visible records to
+// stdout. aBlocks is an array of column-evaluation code blocks (one
+// per LIST / DISPLAY column expression). If aBlocks is empty or
+// contains only NIL placeholders, every field of the current
+// workarea is emitted.
+//
+// Used by both `LIST [<v,...>]` and `DISPLAY [<v,...>]` in std.ch.
+// lAll distinguishes them: LIST always passes .T. (all matching
+// records); DISPLAY passes .T. only for `DISPLAY ALL`, otherwise .F.
+// (just the current record).
+//
+// TO PRINTER / TO FILE redirection (lPrn / cFile) is accepted but
+// not yet implemented — both paths still write to stdout. OFF (lOff)
+// suppresses the record-number prefix.
+func rtlDbList(t *hbrt.Thread) {
+	nParams := t.ParamCount()
+	t.Frame(nParams, 0)
+	defer t.EndProcFast()
+
+	wam := getWA(t)
+	if wam == nil {
+		t.RetNil()
+		return
+	}
+	srcArea := wam.Current()
+	if srcArea == nil {
+		t.RetNil()
+		return
+	}
+
+	lOff := false
+	if nParams >= 1 && !t.Local(1).IsNil() {
+		lOff = t.Local(1).AsBool()
+	}
+
+	// Decode column blocks. Empty / `{ NIL }` → fall back to "all fields".
+	var blocks []hbrt.Value
+	useAllFields := true
+	if nParams >= 2 && t.Local(2).IsArray() {
+		arr := t.Local(2).AsArray()
+		if arr != nil {
+			for _, it := range arr.Items {
+				if it.IsBlock() {
+					blocks = append(blocks, it)
+					useAllFields = false
+				}
+			}
+		}
+	}
+
+	lAll := true
+	if nParams >= 3 && !t.Local(3).IsNil() {
+		lAll = t.Local(3).AsBool()
+	}
+
+	// Loop bounds — same shape as dbEval.
+	var bFor, bWhile hbrt.Value
+	if nParams >= 4 {
+		bFor = t.Local(4)
+	}
+	if nParams >= 5 {
+		bWhile = t.Local(5)
+	}
+	nCount := -1
+	if nParams >= 6 && !t.Local(6).IsNil() {
+		nCount = t.Local(6).AsInt()
+	}
+	if nParams >= 7 && !t.Local(7).IsNil() {
+		srcArea.GoTo(uint32(t.Local(7).AsInt()))
+	}
+	lRest := false
+	if nParams >= 8 && !t.Local(8).IsNil() {
+		lRest = t.Local(8).AsBool()
+	}
+	// DISPLAY without ALL emits exactly one record; LIST always emits
+	// the full filtered range. Encode the difference by clamping
+	// nCount to 1 when lAll is false and no explicit NEXT was given.
+	if !lAll && nCount < 0 {
+		nCount = 1
+	}
+	if !lRest && lAll && (nParams < 7 || t.Local(7).IsNil()) {
+		srcArea.GoTop()
+	}
+
+	nFields := srcArea.FieldCount()
+	scanned := 0
+	for !srcArea.EOF() {
+		if nCount >= 0 && scanned >= nCount {
+			break
+		}
+		if bWhile.IsBlock() {
+			t.PendingParams2(0)
+			bWhile.AsBlock().Fn(t)
+			if !t.GetRetValue().AsBool() {
+				break
+			}
+		}
+		emit := true
+		if bFor.IsBlock() {
+			t.PendingParams2(0)
+			bFor.AsBlock().Fn(t)
+			emit = t.GetRetValue().AsBool()
+		}
+		if emit {
+			parts := []string{}
+			if !lOff {
+				parts = append(parts, fmt.Sprintf("%6d", srcArea.RecNo()))
+			}
+			if useAllFields {
+				for i := 0; i < nFields; i++ {
+					v, _ := srcArea.GetValue(i)
+					parts = append(parts, valueToDisplay(v))
+				}
+			} else {
+				for _, blk := range blocks {
+					t.PendingParams2(0)
+					blk.AsBlock().Fn(t)
+					parts = append(parts, valueToDisplay(t.GetRetValue()))
+				}
+			}
+			fmt.Print("\r\n" + strings.Join(parts, " "))
+		}
+		srcArea.Skip(1)
+		scanned++
+	}
+	t.RetNil()
+}
+
 // stableSort is a tiny insertion sort for small N (typical DBF SORT
 // targets are interactive datasets). Avoids a sort import dependency.
 func stableSort(rows [][]hbrt.Value, less func(i, j int) bool) {
--- a/hbrtl/register.go
+++ b/hbrtl/register.go
@@ -201,6 +201,7 @@ func RegisterRTL(vm *hbrt.VM) {
 		hbrt.Sym("__DBAVERAGE", hbrt.FsPublic, rtlDbAverage),
 		hbrt.Sym("__DBCOPY", hbrt.FsPublic, rtlDbCopy),
 		hbrt.Sym("__DBSORT", hbrt.FsPublic, rtlDbSort),
+		hbrt.Sym("__DBLIST", hbrt.FsPublic, rtlDbList),
 		hbrt.Sym("DBSETFILTER", hbrt.FsPublic, rtlDbSetFilter),
 		hbrt.Sym("DBCLEARFILTER", hbrt.FsPublic, rtlDbClearFilter),
 		hbrt.Sym("DBFILTER", hbrt.FsPublic, rtlDbFilter),