From 1cc2d949275456393ffbc8eee17342a7c69185b5 Mon Sep 17 00:00:00 2001
From: CharlesKWON <charleskwonohjun@gmail.com>
Date: Thu, 30 Apr 2026 15:19:36 +0900
Subject: [PATCH] feat(pp): LIST / DISPLAY via std.ch + four PP completeness
 fixes
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

`LIST [<fields>] [OFF] [FOR ...] [WHILE ...] [NEXT ...] [RECORD ...]
[REST] [ALL]` and `DISPLAY [<fields>] [OFF] [FOR ...] ... [ALL]`
reach the parser as plain function calls to a new RTL primitive
__dbList (rtlDbList in hbrtl/database.go).

Implementation: walk the workarea under dbEval-style FOR/WHILE/NEXT/
RECORD/REST bounds. For each visible record, evaluate each column
block and emit the rendered values via valueToDisplay (the same
formatter QOut already uses). Empty fields list defaults to
"all fields". OFF suppresses the record-number prefix.
LIST always emits the full filtered range; DISPLAY without ALL emits
only the current record (encoded as nCount=1). TO PRINTER / TO FILE
clauses are not yet wired through — for now everything goes to
stdout.

Wiring up LIST/DISPLAY surfaced four further gaps in PP that were
silently masking bugs in any rule with multiple word-list / list /
optional clauses chained together:

  * matchSegment refused MarkerWordList inside `[...]`. The LIST
    rule's `[<off:OFF>]` clause therefore never set the off
    capture, and `<.off.>` substituted to nothing instead of .T./.F.
    matchSegment now matches WordList markers the same way the
    top-level matcher does.

  * `<v,...>` and `<(f)>` capture stop boundaries didn't include the
    values of following MarkerWordList markers. For
    `[<v,...>] [<off:OFF>] [<all:ALL>]` against `LIST id, name OFF`,
    the v list would happily eat OFF. New addStopFrom helper
    contributes both literal keywords and word-list values; both
    matchSegment's MarkerList branch and captureExpression now use
    it.

  * Optional-repeat loop in matchPattern merged a no-progress
    iteration's empty capture into the running multi-capture string
    (with the `\x01` separator) before the no-progress break check
    fired. So a successful first iteration's value got contaminated
    and the substitution loop then skipped it as multi-capture
    garbage. The merge now happens after the progress check.

  * Unreferenced `<.name.>` markers (optional clauses that didn't
    match in the input) were getting cleaned up to empty by the
    generic marker scrubber instead of the .F. sentinel Harbour's
    std.ch expects. New replaceUnreferencedLogify pass mirrors the
    existing replaceUnreferencedBlockify and runs just before the
    cleanup.

Parser cleanup: LIST and DISPLAY removed from the IDENT-statement
no-op switch in both parseIdentStmt and parseExprStmt.

Gates green:
  go test ./...      : PASS
  FiveSql2 SQL:1999  : 43/43
  Harbour compat     : 56/56

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 compiler/parser/parser.go |   2 +-
 compiler/pp/command.go    | 134 +++++++++++++++++++++++++++++---------
 compiler/pp/std.ch        |  17 +++++
 hbrtl/database.go         | 130 ++++++++++++++++++++++++++++++++++++
 hbrtl/register.go         |   1 +
 5 files changed, 254 insertions(+), 30 deletions(-)
diff --git a/compiler/parser/parser.go b/compiler/parser/parser.go
index 104f12c..717cfd0 100644
--- a/compiler/parser/parser.go
+++ b/compiler/parser/parser.go
@@ -1159,7 +1159,7 @@ func (p *Parser) parseIdentStmt() ast.Stmt {
 		"LABEL", "REPORT", "ACCEPT", "INPUT",
 		"JOIN", "RELEASE", "SAVE", "RESTORE",
 		"DIR", "STORE", "NOTE", "TEXT", "ENDTEXT",
-		"WITH", "CLEAR", "DISPLAY", "LIST":
+		"WITH", "CLEAR":
 		p.advance()
 		for p.current.Kind != token.NEWLINE && p.current.Kind != token.EOF {
 			p.advance()
diff --git a/compiler/pp/command.go b/compiler/pp/command.go
index 3199e17..882c88f 100644
--- a/compiler/pp/command.go
+++ b/compiler/pp/command.go
@@ -328,6 +328,16 @@ func (r *Rule) matchPattern(line string) map[string]string {
 					li = snapshotLi
 					break
 				}
+				// No-progress matches can happen when the body is just
+				// a list/regular marker that immediately hits a stop
+				// boundary on this iteration — its captured value is
+				// empty. Don't merge those into captures, otherwise an
+				// earlier successful iteration's value gets contaminated
+				// with the `\x01`-separator form and the result-template
+				// substitution skips it as multi-capture garbage.
+				if newLi == snapshotLi {
+					break
+				}
 				for k, v := range iterCaps {
 					if prev, hit := captures[k]; hit && prev != "" {
 						captures[k] = prev + "\x01" + v
@@ -336,9 +346,6 @@ func (r *Rule) matchPattern(line string) map[string]string {
 					}
 				}
 				li = newLi
-				if li == snapshotLi {
-					break // no progress — avoid infinite loop
-				}
 			}
 			pi = bodyEnd + 1 // past ]
 		} else if pw == "]" {
@@ -420,25 +427,41 @@ func matchSegment(segment, lineWords []string, startLi int, caseSens bool, outer
 			inner := pw[1 : len(pw)-1]
 			m := parseOneMarker(inner)
 			switch m.Type {
+			case MarkerWordList:
+				// Match one of the listed words. If the current line
+				// token isn't in the allowed set, the segment fails to
+				// match — same behavior as the top-level matcher.
+				w := lineWords[li]
+				matched := false
+				for _, allowed := range m.ListValues {
+					if caseSens {
+						if w == allowed {
+							matched = true
+							break
+						}
+					} else if strings.EqualFold(w, allowed) {
+						matched = true
+						break
+					}
+				}
+				if !matched {
+					return nil, startLi, false
+				}
+				caps[m.Name] = w
+				li++
+				continue
 			case MarkerList:
 				// Capture comma-separated tokens until we hit the
-				// segment's next literal, an outer literal, or the end
-				// of the line. Paren-balanced so `f(a,b)` inside the
-				// list doesn't terminate prematurely. Mirrors the main
-				// matchPattern's MarkerList branch.
+				// segment's next literal, an outer literal, or one of
+				// the limited values of a following MarkerWordList
+				// (e.g. `<off:OFF>` — OFF is the only token that can
+				// match it, so the list before it must stop at OFF).
+				// Paren-balanced so `f(a,b)` inside the list doesn't
+				// terminate prematurely. Mirrors the main matchPattern's
+				// MarkerList branch.
 				stop := map[string]struct{}{}
-				for _, w := range segment[pi+1:] {
-					if w != "" && w != "[" && w != "]" &&
-						!(strings.HasPrefix(w, "<") && strings.HasSuffix(w, ">")) {
-						stop[strings.ToUpper(w)] = struct{}{}
-					}
-				}
-				for _, w := range outerTail {
-					if w != "" && w != "[" && w != "]" &&
-						!(strings.HasPrefix(w, "<") && strings.HasSuffix(w, ">")) {
-						stop[strings.ToUpper(w)] = struct{}{}
-					}
-				}
+				addStopFrom(stop, segment[pi+1:])
+				addStopFrom(stop, outerTail)
 				var parts []string
 				depth := 0
 				for li < len(lineWords) {
@@ -499,6 +522,30 @@ func matchSegment(segment, lineWords []string, startLi int, caseSens bool, outer
 	return caps, li, true
 }
 
+// addStopFrom merges into `stop` every token that could legally match
+// the next position in `pw`: bare literals AND each value of any
+// MarkerWordList (`<name:A,B,C>`) since those markers can match only
+// their listed words. Used so a preceding list/regular capture knows
+// to stop before any of them. Always uppercased — the caller decides
+// whether to do a case-insensitive lookup.
+func addStopFrom(stop map[string]struct{}, pw []string) {
+	for _, w := range pw {
+		if w == "" || w == "[" || w == "]" {
+			continue
+		}
+		if strings.HasPrefix(w, "<") && strings.HasSuffix(w, ">") {
+			inner := w[1 : len(w)-1]
+			if m := parseOneMarker(inner); m.Type == MarkerWordList {
+				for _, v := range m.ListValues {
+					stop[strings.ToUpper(v)] = struct{}{}
+				}
+			}
+			continue
+		}
+		stop[strings.ToUpper(w)] = struct{}{}
+	}
+}
+
 // firstLiteral returns the first non-marker, non-bracket token in pw,
 // or "" if none. Used to give matchSegment a stop-boundary drawn from
 // the outer pattern when its body ends in a regular marker.
@@ -710,12 +757,44 @@ func (r *Rule) applyResult(captures map[string]string) string {
 	// (matches Harbour: empty FOR/WHILE → NIL → bypass the condition).
 	result = replaceUnreferencedBlockify(result)
 
+	// Same idea for `<.name.>`: a missing marker logifies to .F.,
+	// matching Harbour's behavior of "absent optional clause => .F."
+	// for OFF / ALL / REST / etc.
+	result = replaceUnreferencedLogify(result)
+
 	// Clean up unreferenced markers: <name>, <(name)>, <.name.>, #<name>, <"name">
 	result = cleanUnreferencedMarkers(result)
 
 	return result
 }
 
+// replaceUnreferencedLogify rewrites every remaining `<.ident.>` to
+// `.F.` — the absent-optional-clause sentinel that matches Harbour's
+// std.ch convention.
+func replaceUnreferencedLogify(s string) string {
+	var out strings.Builder
+	i := 0
+	for i < len(s) {
+		if i+2 < len(s) && s[i] == '<' && s[i+1] == '.' {
+			j := i + 2
+			if j < len(s) && (s[j] == '_' || (s[j] >= 'a' && s[j] <= 'z') || (s[j] >= 'A' && s[j] <= 'Z')) {
+				j++
+				for j < len(s) && (s[j] == '_' || (s[j] >= 'a' && s[j] <= 'z') || (s[j] >= 'A' && s[j] <= 'Z') || (s[j] >= '0' && s[j] <= '9')) {
+					j++
+				}
+				if j+1 < len(s) && s[j] == '.' && s[j+1] == '>' {
+					out.WriteString(".F.")
+					i = j + 2
+					continue
+				}
+			}
+		}
+		out.WriteByte(s[i])
+		i++
+	}
+	return out.String()
+}
+
 // replaceUnreferencedBlockify rewrites every remaining `<{ident}>` to
 // NIL. Run after the main substitution loop, before the generic
 // unreferenced-marker cleanup.
@@ -1121,17 +1200,14 @@ func captureExpression(lineWords []string, li *int, patternWords []string, nextP
 	// must stop at TO's *successor* — but we don't know which
 	// successor will actually be present in the input. Stopping on
 	// any of them keeps `<(f)>` from swallowing a trailing
-	// `FOR x > 5` clause.
+	// `FOR x > 5` clause. MarkerWordList values count too — a
+	// `<off:OFF>` marker can only match the word OFF, so prior
+	// captures must stop at it.
+	stopSet := map[string]struct{}{}
+	addStopFrom(stopSet, patternWords[nextPi:])
 	var delims []string
-	for pi := nextPi; pi < len(patternWords); pi++ {
-		pw := patternWords[pi]
-		if pw == "" || pw == "[" || pw == "]" {
-			continue
-		}
-		if strings.HasPrefix(pw, "<") && strings.HasSuffix(pw, ">") {
-			continue
-		}
-		delims = append(delims, pw)
+	for k := range stopSet {
+		delims = append(delims, k)
 	}
 
 	if len(delims) > 0 {
diff --git a/compiler/pp/std.ch b/compiler/pp/std.ch
index d1831fe..c8fb4b1 100644
--- a/compiler/pp/std.ch
+++ b/compiler/pp/std.ch
@@ -82,6 +82,23 @@
          __dbSort( <(f)>, { <(fields)> }, ;
                    <{for}>, <{while}>, <next>, <rec>, <.rest.> )
 
+/* --- console output ---
+   LIST emits every record matching the filter; DISPLAY without ALL
+   shows just the current record. Both share __dbList — lAll
+   distinguishes them. TO PRINTER / TO FILE accepted but unused;
+   stdout is the only sink for now. */
+#command LIST [<v,...>] [<off:OFF>] ;
+              [FOR <for>] [WHILE <while>] [NEXT <next>] ;
+              [RECORD <rec>] [<rest:REST>] [ALL] => ;
+         __dbList( <.off.>, { <{v}> }, .T., ;
+                   <{for}>, <{while}>, <next>, <rec>, <.rest.> )
+
+#command DISPLAY [<v,...>] [<off:OFF>] ;
+                 [FOR <for>] [WHILE <while>] [NEXT <next>] ;
+                 [RECORD <rec>] [<rest:REST>] [<all:ALL>] => ;
+         __dbList( <.off.>, { <{v}> }, <.all.>, ;
+                   <{for}>, <{while}>, <next>, <rec>, <.rest.> )
+
 /* --- bulk maintenance --- */
 #command REINDEX                         => DbReindex()
 #command PACK                            => DbPack()
diff --git a/hbrtl/database.go b/hbrtl/database.go
index 3aaa950..95dcca4 100644
--- a/hbrtl/database.go
+++ b/hbrtl/database.go
@@ -8,6 +8,7 @@
 package hbrtl
 
 import (
+	"fmt"
 	"strings"
 
 	"five/hbrt"
@@ -1117,6 +1118,135 @@ func rtlDbSort(t *hbrt.Thread) {
 	t.RetBool(true)
 }
 
+// rtlDbList implements __dbList(lOff, aBlocks, lAll, bFor, bWhile,
+// nNext, nRec, lRest, lPrn, cFile) — output visible records to
+// stdout. aBlocks is an array of column-evaluation code blocks (one
+// per LIST / DISPLAY column expression). If aBlocks is empty or
+// contains only NIL placeholders, every field of the current
+// workarea is emitted.
+//
+// Used by both `LIST [<v,...>]` and `DISPLAY [<v,...>]` in std.ch.
+// lAll distinguishes them: LIST always passes .T. (all matching
+// records); DISPLAY passes .T. only for `DISPLAY ALL`, otherwise .F.
+// (just the current record).
+//
+// TO PRINTER / TO FILE redirection (lPrn / cFile) is accepted but
+// not yet implemented — both paths still write to stdout. OFF (lOff)
+// suppresses the record-number prefix.
+func rtlDbList(t *hbrt.Thread) {
+	nParams := t.ParamCount()
+	t.Frame(nParams, 0)
+	defer t.EndProcFast()
+
+	wam := getWA(t)
+	if wam == nil {
+		t.RetNil()
+		return
+	}
+	srcArea := wam.Current()
+	if srcArea == nil {
+		t.RetNil()
+		return
+	}
+
+	lOff := false
+	if nParams >= 1 && !t.Local(1).IsNil() {
+		lOff = t.Local(1).AsBool()
+	}
+
+	// Decode column blocks. Empty / `{ NIL }` → fall back to "all fields".
+	var blocks []hbrt.Value
+	useAllFields := true
+	if nParams >= 2 && t.Local(2).IsArray() {
+		arr := t.Local(2).AsArray()
+		if arr != nil {
+			for _, it := range arr.Items {
+				if it.IsBlock() {
+					blocks = append(blocks, it)
+					useAllFields = false
+				}
+			}
+		}
+	}
+
+	lAll := true
+	if nParams >= 3 && !t.Local(3).IsNil() {
+		lAll = t.Local(3).AsBool()
+	}
+
+	// Loop bounds — same shape as dbEval.
+	var bFor, bWhile hbrt.Value
+	if nParams >= 4 {
+		bFor = t.Local(4)
+	}
+	if nParams >= 5 {
+		bWhile = t.Local(5)
+	}
+	nCount := -1
+	if nParams >= 6 && !t.Local(6).IsNil() {
+		nCount = t.Local(6).AsInt()
+	}
+	if nParams >= 7 && !t.Local(7).IsNil() {
+		srcArea.GoTo(uint32(t.Local(7).AsInt()))
+	}
+	lRest := false
+	if nParams >= 8 && !t.Local(8).IsNil() {
+		lRest = t.Local(8).AsBool()
+	}
+	// DISPLAY without ALL emits exactly one record; LIST always emits
+	// the full filtered range. Encode the difference by clamping
+	// nCount to 1 when lAll is false and no explicit NEXT was given.
+	if !lAll && nCount < 0 {
+		nCount = 1
+	}
+	if !lRest && lAll && (nParams < 7 || t.Local(7).IsNil()) {
+		srcArea.GoTop()
+	}
+
+	nFields := srcArea.FieldCount()
+	scanned := 0
+	for !srcArea.EOF() {
+		if nCount >= 0 && scanned >= nCount {
+			break
+		}
+		if bWhile.IsBlock() {
+			t.PendingParams2(0)
+			bWhile.AsBlock().Fn(t)
+			if !t.GetRetValue().AsBool() {
+				break
+			}
+		}
+		emit := true
+		if bFor.IsBlock() {
+			t.PendingParams2(0)
+			bFor.AsBlock().Fn(t)
+			emit = t.GetRetValue().AsBool()
+		}
+		if emit {
+			parts := []string{}
+			if !lOff {
+				parts = append(parts, fmt.Sprintf("%6d", srcArea.RecNo()))
+			}
+			if useAllFields {
+				for i := 0; i < nFields; i++ {
+					v, _ := srcArea.GetValue(i)
+					parts = append(parts, valueToDisplay(v))
+				}
+			} else {
+				for _, blk := range blocks {
+					t.PendingParams2(0)
+					blk.AsBlock().Fn(t)
+					parts = append(parts, valueToDisplay(t.GetRetValue()))
+				}
+			}
+			fmt.Print("\r\n" + strings.Join(parts, " "))
+		}
+		srcArea.Skip(1)
+		scanned++
+	}
+	t.RetNil()
+}
+
 // stableSort is a tiny insertion sort for small N (typical DBF SORT
 // targets are interactive datasets). Avoids a sort import dependency.
 func stableSort(rows [][]hbrt.Value, less func(i, j int) bool) {
diff --git a/hbrtl/register.go b/hbrtl/register.go
index bc21db8..212f552 100644
--- a/hbrtl/register.go
+++ b/hbrtl/register.go
@@ -201,6 +201,7 @@ func RegisterRTL(vm *hbrt.VM) {
 		hbrt.Sym("__DBAVERAGE", hbrt.FsPublic, rtlDbAverage),
 		hbrt.Sym("__DBCOPY", hbrt.FsPublic, rtlDbCopy),
 		hbrt.Sym("__DBSORT", hbrt.FsPublic, rtlDbSort),
+		hbrt.Sym("__DBLIST", hbrt.FsPublic, rtlDbList),
 		hbrt.Sym("DBSETFILTER", hbrt.FsPublic, rtlDbSetFilter),
 		hbrt.Sym("DBCLEARFILTER", hbrt.FsPublic, rtlDbClearFilter),
 		hbrt.Sym("DBFILTER", hbrt.FsPublic, rtlDbFilter),