feat(pp): LIST / DISPLAY via std.ch + four PP completeness fixes

`LIST [<fields>] [OFF] [FOR ...] [WHILE ...] [NEXT ...] [RECORD ...]
[REST] [ALL]` and `DISPLAY [<fields>] [OFF] [FOR ...] ... [ALL]`
reach the parser as plain function calls to a new RTL primitive
__dbList (rtlDbList in hbrtl/database.go).

Implementation: walk the workarea under dbEval-style FOR/WHILE/NEXT/
RECORD/REST bounds. For each visible record, evaluate each column
block and emit the rendered values via valueToDisplay (the same
formatter QOut already uses). Empty fields list defaults to
"all fields". OFF suppresses the record-number prefix.
LIST always emits the full filtered range; DISPLAY without ALL emits
only the current record (encoded as nCount=1). TO PRINTER / TO FILE
clauses are not yet wired through — for now everything goes to
stdout.

Wiring up LIST/DISPLAY surfaced four further gaps in PP that were
silently masking bugs in any rule with multiple word-list / list /
optional clauses chained together:

  * matchSegment refused MarkerWordList inside `[...]`. The LIST
    rule's `[<off:OFF>]` clause therefore never set the off
    capture, and `<.off.>` substituted to nothing instead of .T./.F.
    matchSegment now matches WordList markers the same way the
    top-level matcher does.

  * `<v,...>` and `<(f)>` capture stop boundaries didn't include the
    values of following MarkerWordList markers. For
    `[<v,...>] [<off:OFF>] [<all:ALL>]` against `LIST id, name OFF`,
    the v list would happily eat OFF. New addStopFrom helper
    contributes both literal keywords and word-list values; both
    matchSegment's MarkerList branch and captureExpression now use
    it.

  * Optional-repeat loop in matchPattern merged a no-progress
    iteration's empty capture into the running multi-capture string
    (with the `\x01` separator) before the no-progress break check
    fired. So a successful first iteration's value got contaminated
    and the substitution loop then skipped it as multi-capture
    garbage. The merge now happens after the progress check.

  * Unreferenced `<.name.>` markers (optional clauses that didn't
    match in the input) were getting cleaned up to empty by the
    generic marker scrubber instead of the .F. sentinel Harbour's
    std.ch expects. New replaceUnreferencedLogify pass mirrors the
    existing replaceUnreferencedBlockify and runs just before the
    cleanup.

Parser cleanup: LIST and DISPLAY removed from the IDENT-statement
no-op switch in both parseIdentStmt and parseExprStmt.

Gates green:
  go test ./...      : PASS
  FiveSql2 SQL:1999  : 43/43
  Harbour compat     : 56/56

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-04-30 15:19:36 +09:00
parent 6dbc34b34b
commit 1cc2d94927
5 changed files with 254 additions and 30 deletions

View File

@@ -1159,7 +1159,7 @@ func (p *Parser) parseIdentStmt() ast.Stmt {
"LABEL", "REPORT", "ACCEPT", "INPUT",
"JOIN", "RELEASE", "SAVE", "RESTORE",
"DIR", "STORE", "NOTE", "TEXT", "ENDTEXT",
"WITH", "CLEAR", "DISPLAY", "LIST":
"WITH", "CLEAR":
p.advance()
for p.current.Kind != token.NEWLINE && p.current.Kind != token.EOF {
p.advance()

View File

@@ -328,6 +328,16 @@ func (r *Rule) matchPattern(line string) map[string]string {
li = snapshotLi
break
}
// No-progress matches can happen when the body is just
// a list/regular marker that immediately hits a stop
// boundary on this iteration — its captured value is
// empty. Don't merge those into captures, otherwise an
// earlier successful iteration's value gets contaminated
// with the `\x01`-separator form and the result-template
// substitution skips it as multi-capture garbage.
if newLi == snapshotLi {
break
}
for k, v := range iterCaps {
if prev, hit := captures[k]; hit && prev != "" {
captures[k] = prev + "\x01" + v
@@ -336,9 +346,6 @@ func (r *Rule) matchPattern(line string) map[string]string {
}
}
li = newLi
if li == snapshotLi {
break // no progress — avoid infinite loop
}
}
pi = bodyEnd + 1 // past ]
} else if pw == "]" {
@@ -420,25 +427,41 @@ func matchSegment(segment, lineWords []string, startLi int, caseSens bool, outer
inner := pw[1 : len(pw)-1]
m := parseOneMarker(inner)
switch m.Type {
case MarkerWordList:
// Match one of the listed words. If the current line
// token isn't in the allowed set, the segment fails to
// match — same behavior as the top-level matcher.
w := lineWords[li]
matched := false
for _, allowed := range m.ListValues {
if caseSens {
if w == allowed {
matched = true
break
}
} else if strings.EqualFold(w, allowed) {
matched = true
break
}
}
if !matched {
return nil, startLi, false
}
caps[m.Name] = w
li++
continue
case MarkerList:
// Capture comma-separated tokens until we hit the
// segment's next literal, an outer literal, or the end
// of the line. Paren-balanced so `f(a,b)` inside the
// list doesn't terminate prematurely. Mirrors the main
// matchPattern's MarkerList branch.
// segment's next literal, an outer literal, or one of
// the limited values of a following MarkerWordList
// (e.g. `<off:OFF>` — OFF is the only token that can
// match it, so the list before it must stop at OFF).
// Paren-balanced so `f(a,b)` inside the list doesn't
// terminate prematurely. Mirrors the main matchPattern's
// MarkerList branch.
stop := map[string]struct{}{}
for _, w := range segment[pi+1:] {
if w != "" && w != "[" && w != "]" &&
!(strings.HasPrefix(w, "<") && strings.HasSuffix(w, ">")) {
stop[strings.ToUpper(w)] = struct{}{}
}
}
for _, w := range outerTail {
if w != "" && w != "[" && w != "]" &&
!(strings.HasPrefix(w, "<") && strings.HasSuffix(w, ">")) {
stop[strings.ToUpper(w)] = struct{}{}
}
}
addStopFrom(stop, segment[pi+1:])
addStopFrom(stop, outerTail)
var parts []string
depth := 0
for li < len(lineWords) {
@@ -499,6 +522,30 @@ func matchSegment(segment, lineWords []string, startLi int, caseSens bool, outer
return caps, li, true
}
// addStopFrom merges into `stop` every token that could legally match
// the next position in `pw`: bare literals AND each value of any
// MarkerWordList (`<name:A,B,C>`) since those markers can match only
// their listed words. Used so a preceding list/regular capture knows
// to stop before any of them. Always uppercased — the caller decides
// whether to do a case-insensitive lookup.
func addStopFrom(stop map[string]struct{}, pw []string) {
for _, w := range pw {
if w == "" || w == "[" || w == "]" {
continue
}
if strings.HasPrefix(w, "<") && strings.HasSuffix(w, ">") {
inner := w[1 : len(w)-1]
if m := parseOneMarker(inner); m.Type == MarkerWordList {
for _, v := range m.ListValues {
stop[strings.ToUpper(v)] = struct{}{}
}
}
continue
}
stop[strings.ToUpper(w)] = struct{}{}
}
}
// firstLiteral returns the first non-marker, non-bracket token in pw,
// or "" if none. Used to give matchSegment a stop-boundary drawn from
// the outer pattern when its body ends in a regular marker.
@@ -710,12 +757,44 @@ func (r *Rule) applyResult(captures map[string]string) string {
// (matches Harbour: empty FOR/WHILE → NIL → bypass the condition).
result = replaceUnreferencedBlockify(result)
// Same idea for `<.name.>`: a missing marker logifies to .F.,
// matching Harbour's behavior of "absent optional clause => .F."
// for OFF / ALL / REST / etc.
result = replaceUnreferencedLogify(result)
// Clean up unreferenced markers: <name>, <(name)>, <.name.>, #<name>, <"name">
result = cleanUnreferencedMarkers(result)
return result
}
// replaceUnreferencedLogify rewrites every remaining `<.ident.>` to
// `.F.` — the absent-optional-clause sentinel that matches Harbour's
// std.ch convention.
func replaceUnreferencedLogify(s string) string {
var out strings.Builder
i := 0
for i < len(s) {
if i+2 < len(s) && s[i] == '<' && s[i+1] == '.' {
j := i + 2
if j < len(s) && (s[j] == '_' || (s[j] >= 'a' && s[j] <= 'z') || (s[j] >= 'A' && s[j] <= 'Z')) {
j++
for j < len(s) && (s[j] == '_' || (s[j] >= 'a' && s[j] <= 'z') || (s[j] >= 'A' && s[j] <= 'Z') || (s[j] >= '0' && s[j] <= '9')) {
j++
}
if j+1 < len(s) && s[j] == '.' && s[j+1] == '>' {
out.WriteString(".F.")
i = j + 2
continue
}
}
}
out.WriteByte(s[i])
i++
}
return out.String()
}
// replaceUnreferencedBlockify rewrites every remaining `<{ident}>` to
// NIL. Run after the main substitution loop, before the generic
// unreferenced-marker cleanup.
@@ -1121,17 +1200,14 @@ func captureExpression(lineWords []string, li *int, patternWords []string, nextP
// must stop at TO's *successor* — but we don't know which
// successor will actually be present in the input. Stopping on
// any of them keeps `<(f)>` from swallowing a trailing
// `FOR x > 5` clause.
// `FOR x > 5` clause. MarkerWordList values count too — a
// `<off:OFF>` marker can only match the word OFF, so prior
// captures must stop at it.
stopSet := map[string]struct{}{}
addStopFrom(stopSet, patternWords[nextPi:])
var delims []string
for pi := nextPi; pi < len(patternWords); pi++ {
pw := patternWords[pi]
if pw == "" || pw == "[" || pw == "]" {
continue
}
if strings.HasPrefix(pw, "<") && strings.HasSuffix(pw, ">") {
continue
}
delims = append(delims, pw)
for k := range stopSet {
delims = append(delims, k)
}
if len(delims) > 0 {

View File

@@ -82,6 +82,23 @@
__dbSort( <(f)>, { <(fields)> }, ;
<{for}>, <{while}>, <next>, <rec>, <.rest.> )
/* --- console output ---
LIST emits every record matching the filter; DISPLAY without ALL
shows just the current record. Both share __dbList — lAll
distinguishes them. TO PRINTER / TO FILE accepted but unused;
stdout is the only sink for now. */
#command LIST [<v,...>] [<off:OFF>] ;
[FOR <for>] [WHILE <while>] [NEXT <next>] ;
[RECORD <rec>] [<rest:REST>] [ALL] => ;
__dbList( <.off.>, { <{v}> }, .T., ;
<{for}>, <{while}>, <next>, <rec>, <.rest.> )
#command DISPLAY [<v,...>] [<off:OFF>] ;
[FOR <for>] [WHILE <while>] [NEXT <next>] ;
[RECORD <rec>] [<rest:REST>] [<all:ALL>] => ;
__dbList( <.off.>, { <{v}> }, <.all.>, ;
<{for}>, <{while}>, <next>, <rec>, <.rest.> )
/* --- bulk maintenance --- */
#command REINDEX => DbReindex()
#command PACK => DbPack()

View File

@@ -8,6 +8,7 @@
package hbrtl
import (
"fmt"
"strings"
"five/hbrt"
@@ -1117,6 +1118,135 @@ func rtlDbSort(t *hbrt.Thread) {
t.RetBool(true)
}
// rtlDbList implements __dbList(lOff, aBlocks, lAll, bFor, bWhile,
// nNext, nRec, lRest, lPrn, cFile) — output visible records to
// stdout. aBlocks is an array of column-evaluation code blocks (one
// per LIST / DISPLAY column expression). If aBlocks is empty or
// contains only NIL placeholders, every field of the current
// workarea is emitted.
//
// Used by both `LIST [<v,...>]` and `DISPLAY [<v,...>]` in std.ch.
// lAll distinguishes them: LIST always passes .T. (all matching
// records); DISPLAY passes .T. only for `DISPLAY ALL`, otherwise .F.
// (just the current record).
//
// TO PRINTER / TO FILE redirection (lPrn / cFile) is accepted but
// not yet implemented — both paths still write to stdout. OFF (lOff)
// suppresses the record-number prefix.
func rtlDbList(t *hbrt.Thread) {
nParams := t.ParamCount()
t.Frame(nParams, 0)
defer t.EndProcFast()
wam := getWA(t)
if wam == nil {
t.RetNil()
return
}
srcArea := wam.Current()
if srcArea == nil {
t.RetNil()
return
}
lOff := false
if nParams >= 1 && !t.Local(1).IsNil() {
lOff = t.Local(1).AsBool()
}
// Decode column blocks. Empty / `{ NIL }` → fall back to "all fields".
var blocks []hbrt.Value
useAllFields := true
if nParams >= 2 && t.Local(2).IsArray() {
arr := t.Local(2).AsArray()
if arr != nil {
for _, it := range arr.Items {
if it.IsBlock() {
blocks = append(blocks, it)
useAllFields = false
}
}
}
}
lAll := true
if nParams >= 3 && !t.Local(3).IsNil() {
lAll = t.Local(3).AsBool()
}
// Loop bounds — same shape as dbEval.
var bFor, bWhile hbrt.Value
if nParams >= 4 {
bFor = t.Local(4)
}
if nParams >= 5 {
bWhile = t.Local(5)
}
nCount := -1
if nParams >= 6 && !t.Local(6).IsNil() {
nCount = t.Local(6).AsInt()
}
if nParams >= 7 && !t.Local(7).IsNil() {
srcArea.GoTo(uint32(t.Local(7).AsInt()))
}
lRest := false
if nParams >= 8 && !t.Local(8).IsNil() {
lRest = t.Local(8).AsBool()
}
// DISPLAY without ALL emits exactly one record; LIST always emits
// the full filtered range. Encode the difference by clamping
// nCount to 1 when lAll is false and no explicit NEXT was given.
if !lAll && nCount < 0 {
nCount = 1
}
if !lRest && lAll && (nParams < 7 || t.Local(7).IsNil()) {
srcArea.GoTop()
}
nFields := srcArea.FieldCount()
scanned := 0
for !srcArea.EOF() {
if nCount >= 0 && scanned >= nCount {
break
}
if bWhile.IsBlock() {
t.PendingParams2(0)
bWhile.AsBlock().Fn(t)
if !t.GetRetValue().AsBool() {
break
}
}
emit := true
if bFor.IsBlock() {
t.PendingParams2(0)
bFor.AsBlock().Fn(t)
emit = t.GetRetValue().AsBool()
}
if emit {
parts := []string{}
if !lOff {
parts = append(parts, fmt.Sprintf("%6d", srcArea.RecNo()))
}
if useAllFields {
for i := 0; i < nFields; i++ {
v, _ := srcArea.GetValue(i)
parts = append(parts, valueToDisplay(v))
}
} else {
for _, blk := range blocks {
t.PendingParams2(0)
blk.AsBlock().Fn(t)
parts = append(parts, valueToDisplay(t.GetRetValue()))
}
}
fmt.Print("\r\n" + strings.Join(parts, " "))
}
srcArea.Skip(1)
scanned++
}
t.RetNil()
}
// stableSort is a tiny insertion sort for small N (typical DBF SORT
// targets are interactive datasets). Avoids a sort import dependency.
func stableSort(rows [][]hbrt.Value, less func(i, j int) bool) {

View File

@@ -201,6 +201,7 @@ func RegisterRTL(vm *hbrt.VM) {
hbrt.Sym("__DBAVERAGE", hbrt.FsPublic, rtlDbAverage),
hbrt.Sym("__DBCOPY", hbrt.FsPublic, rtlDbCopy),
hbrt.Sym("__DBSORT", hbrt.FsPublic, rtlDbSort),
hbrt.Sym("__DBLIST", hbrt.FsPublic, rtlDbList),
hbrt.Sym("DBSETFILTER", hbrt.FsPublic, rtlDbSetFilter),
hbrt.Sym("DBCLEARFILTER", hbrt.FsPublic, rtlDbClearFilter),
hbrt.Sym("DBFILTER", hbrt.FsPublic, rtlDbFilter),