feat(pp): LIST / DISPLAY via std.ch + four PP completeness fixes
`LIST [<fields>] [OFF] [FOR ...] [WHILE ...] [NEXT ...] [RECORD ...]
[REST] [ALL]` and `DISPLAY [<fields>] [OFF] [FOR ...] ... [ALL]`
reach the parser as plain function calls to a new RTL primitive
__dbList (rtlDbList in hbrtl/database.go).
Implementation: walk the workarea under dbEval-style FOR/WHILE/NEXT/
RECORD/REST bounds. For each visible record, evaluate each column
block and emit the rendered values via valueToDisplay (the same
formatter QOut already uses). Empty fields list defaults to
"all fields". OFF suppresses the record-number prefix.
LIST always emits the full filtered range; DISPLAY without ALL emits
only the current record (encoded as nCount=1). TO PRINTER / TO FILE
clauses are not yet wired through — for now everything goes to
stdout.
Wiring up LIST/DISPLAY surfaced four further gaps in PP that were
silently masking bugs in any rule with multiple word-list / list /
optional clauses chained together:
* matchSegment refused MarkerWordList inside `[...]`. The LIST
rule's `[<off:OFF>]` clause therefore never set the off
capture, and `<.off.>` substituted to nothing instead of .T./.F.
matchSegment now matches WordList markers the same way the
top-level matcher does.
* `<v,...>` and `<(f)>` capture stop boundaries didn't include the
values of following MarkerWordList markers. For
`[<v,...>] [<off:OFF>] [<all:ALL>]` against `LIST id, name OFF`,
the v list would happily eat OFF. New addStopFrom helper
contributes both literal keywords and word-list values; both
matchSegment's MarkerList branch and captureExpression now use
it.
* Optional-repeat loop in matchPattern merged a no-progress
iteration's empty capture into the running multi-capture string
(with the `\x01` separator) before the no-progress break check
fired. So a successful first iteration's value got contaminated
and the substitution loop then skipped it as multi-capture
garbage. The merge now happens after the progress check.
* Unreferenced `<.name.>` markers (optional clauses that didn't
match in the input) were getting cleaned up to empty by the
generic marker scrubber instead of the .F. sentinel Harbour's
std.ch expects. New replaceUnreferencedLogify pass mirrors the
existing replaceUnreferencedBlockify and runs just before the
cleanup.
Parser cleanup: LIST and DISPLAY removed from the IDENT-statement
no-op switch in both parseIdentStmt and parseExprStmt.
Gates green:
go test ./... : PASS
FiveSql2 SQL:1999 : 43/43
Harbour compat : 56/56
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -1159,7 +1159,7 @@ func (p *Parser) parseIdentStmt() ast.Stmt {
|
||||
"LABEL", "REPORT", "ACCEPT", "INPUT",
|
||||
"JOIN", "RELEASE", "SAVE", "RESTORE",
|
||||
"DIR", "STORE", "NOTE", "TEXT", "ENDTEXT",
|
||||
"WITH", "CLEAR", "DISPLAY", "LIST":
|
||||
"WITH", "CLEAR":
|
||||
p.advance()
|
||||
for p.current.Kind != token.NEWLINE && p.current.Kind != token.EOF {
|
||||
p.advance()
|
||||
|
||||
@@ -328,6 +328,16 @@ func (r *Rule) matchPattern(line string) map[string]string {
|
||||
li = snapshotLi
|
||||
break
|
||||
}
|
||||
// No-progress matches can happen when the body is just
|
||||
// a list/regular marker that immediately hits a stop
|
||||
// boundary on this iteration — its captured value is
|
||||
// empty. Don't merge those into captures, otherwise an
|
||||
// earlier successful iteration's value gets contaminated
|
||||
// with the `\x01`-separator form and the result-template
|
||||
// substitution skips it as multi-capture garbage.
|
||||
if newLi == snapshotLi {
|
||||
break
|
||||
}
|
||||
for k, v := range iterCaps {
|
||||
if prev, hit := captures[k]; hit && prev != "" {
|
||||
captures[k] = prev + "\x01" + v
|
||||
@@ -336,9 +346,6 @@ func (r *Rule) matchPattern(line string) map[string]string {
|
||||
}
|
||||
}
|
||||
li = newLi
|
||||
if li == snapshotLi {
|
||||
break // no progress — avoid infinite loop
|
||||
}
|
||||
}
|
||||
pi = bodyEnd + 1 // past ]
|
||||
} else if pw == "]" {
|
||||
@@ -420,25 +427,41 @@ func matchSegment(segment, lineWords []string, startLi int, caseSens bool, outer
|
||||
inner := pw[1 : len(pw)-1]
|
||||
m := parseOneMarker(inner)
|
||||
switch m.Type {
|
||||
case MarkerWordList:
|
||||
// Match one of the listed words. If the current line
|
||||
// token isn't in the allowed set, the segment fails to
|
||||
// match — same behavior as the top-level matcher.
|
||||
w := lineWords[li]
|
||||
matched := false
|
||||
for _, allowed := range m.ListValues {
|
||||
if caseSens {
|
||||
if w == allowed {
|
||||
matched = true
|
||||
break
|
||||
}
|
||||
} else if strings.EqualFold(w, allowed) {
|
||||
matched = true
|
||||
break
|
||||
}
|
||||
}
|
||||
if !matched {
|
||||
return nil, startLi, false
|
||||
}
|
||||
caps[m.Name] = w
|
||||
li++
|
||||
continue
|
||||
case MarkerList:
|
||||
// Capture comma-separated tokens until we hit the
|
||||
// segment's next literal, an outer literal, or the end
|
||||
// of the line. Paren-balanced so `f(a,b)` inside the
|
||||
// list doesn't terminate prematurely. Mirrors the main
|
||||
// matchPattern's MarkerList branch.
|
||||
// segment's next literal, an outer literal, or one of
|
||||
// the limited values of a following MarkerWordList
|
||||
// (e.g. `<off:OFF>` — OFF is the only token that can
|
||||
// match it, so the list before it must stop at OFF).
|
||||
// Paren-balanced so `f(a,b)` inside the list doesn't
|
||||
// terminate prematurely. Mirrors the main matchPattern's
|
||||
// MarkerList branch.
|
||||
stop := map[string]struct{}{}
|
||||
for _, w := range segment[pi+1:] {
|
||||
if w != "" && w != "[" && w != "]" &&
|
||||
!(strings.HasPrefix(w, "<") && strings.HasSuffix(w, ">")) {
|
||||
stop[strings.ToUpper(w)] = struct{}{}
|
||||
}
|
||||
}
|
||||
for _, w := range outerTail {
|
||||
if w != "" && w != "[" && w != "]" &&
|
||||
!(strings.HasPrefix(w, "<") && strings.HasSuffix(w, ">")) {
|
||||
stop[strings.ToUpper(w)] = struct{}{}
|
||||
}
|
||||
}
|
||||
addStopFrom(stop, segment[pi+1:])
|
||||
addStopFrom(stop, outerTail)
|
||||
var parts []string
|
||||
depth := 0
|
||||
for li < len(lineWords) {
|
||||
@@ -499,6 +522,30 @@ func matchSegment(segment, lineWords []string, startLi int, caseSens bool, outer
|
||||
return caps, li, true
|
||||
}
|
||||
|
||||
// addStopFrom merges into `stop` every token that could legally match
|
||||
// the next position in `pw`: bare literals AND each value of any
|
||||
// MarkerWordList (`<name:A,B,C>`) since those markers can match only
|
||||
// their listed words. Used so a preceding list/regular capture knows
|
||||
// to stop before any of them. Always uppercased — the caller decides
|
||||
// whether to do a case-insensitive lookup.
|
||||
func addStopFrom(stop map[string]struct{}, pw []string) {
|
||||
for _, w := range pw {
|
||||
if w == "" || w == "[" || w == "]" {
|
||||
continue
|
||||
}
|
||||
if strings.HasPrefix(w, "<") && strings.HasSuffix(w, ">") {
|
||||
inner := w[1 : len(w)-1]
|
||||
if m := parseOneMarker(inner); m.Type == MarkerWordList {
|
||||
for _, v := range m.ListValues {
|
||||
stop[strings.ToUpper(v)] = struct{}{}
|
||||
}
|
||||
}
|
||||
continue
|
||||
}
|
||||
stop[strings.ToUpper(w)] = struct{}{}
|
||||
}
|
||||
}
|
||||
|
||||
// firstLiteral returns the first non-marker, non-bracket token in pw,
|
||||
// or "" if none. Used to give matchSegment a stop-boundary drawn from
|
||||
// the outer pattern when its body ends in a regular marker.
|
||||
@@ -710,12 +757,44 @@ func (r *Rule) applyResult(captures map[string]string) string {
|
||||
// (matches Harbour: empty FOR/WHILE → NIL → bypass the condition).
|
||||
result = replaceUnreferencedBlockify(result)
|
||||
|
||||
// Same idea for `<.name.>`: a missing marker logifies to .F.,
|
||||
// matching Harbour's behavior of "absent optional clause => .F."
|
||||
// for OFF / ALL / REST / etc.
|
||||
result = replaceUnreferencedLogify(result)
|
||||
|
||||
// Clean up unreferenced markers: <name>, <(name)>, <.name.>, #<name>, <"name">
|
||||
result = cleanUnreferencedMarkers(result)
|
||||
|
||||
return result
|
||||
}
|
||||
|
||||
// replaceUnreferencedLogify rewrites every remaining `<.ident.>` to
|
||||
// `.F.` — the absent-optional-clause sentinel that matches Harbour's
|
||||
// std.ch convention.
|
||||
func replaceUnreferencedLogify(s string) string {
|
||||
var out strings.Builder
|
||||
i := 0
|
||||
for i < len(s) {
|
||||
if i+2 < len(s) && s[i] == '<' && s[i+1] == '.' {
|
||||
j := i + 2
|
||||
if j < len(s) && (s[j] == '_' || (s[j] >= 'a' && s[j] <= 'z') || (s[j] >= 'A' && s[j] <= 'Z')) {
|
||||
j++
|
||||
for j < len(s) && (s[j] == '_' || (s[j] >= 'a' && s[j] <= 'z') || (s[j] >= 'A' && s[j] <= 'Z') || (s[j] >= '0' && s[j] <= '9')) {
|
||||
j++
|
||||
}
|
||||
if j+1 < len(s) && s[j] == '.' && s[j+1] == '>' {
|
||||
out.WriteString(".F.")
|
||||
i = j + 2
|
||||
continue
|
||||
}
|
||||
}
|
||||
}
|
||||
out.WriteByte(s[i])
|
||||
i++
|
||||
}
|
||||
return out.String()
|
||||
}
|
||||
|
||||
// replaceUnreferencedBlockify rewrites every remaining `<{ident}>` to
|
||||
// NIL. Run after the main substitution loop, before the generic
|
||||
// unreferenced-marker cleanup.
|
||||
@@ -1121,17 +1200,14 @@ func captureExpression(lineWords []string, li *int, patternWords []string, nextP
|
||||
// must stop at TO's *successor* — but we don't know which
|
||||
// successor will actually be present in the input. Stopping on
|
||||
// any of them keeps `<(f)>` from swallowing a trailing
|
||||
// `FOR x > 5` clause.
|
||||
// `FOR x > 5` clause. MarkerWordList values count too — a
|
||||
// `<off:OFF>` marker can only match the word OFF, so prior
|
||||
// captures must stop at it.
|
||||
stopSet := map[string]struct{}{}
|
||||
addStopFrom(stopSet, patternWords[nextPi:])
|
||||
var delims []string
|
||||
for pi := nextPi; pi < len(patternWords); pi++ {
|
||||
pw := patternWords[pi]
|
||||
if pw == "" || pw == "[" || pw == "]" {
|
||||
continue
|
||||
}
|
||||
if strings.HasPrefix(pw, "<") && strings.HasSuffix(pw, ">") {
|
||||
continue
|
||||
}
|
||||
delims = append(delims, pw)
|
||||
for k := range stopSet {
|
||||
delims = append(delims, k)
|
||||
}
|
||||
|
||||
if len(delims) > 0 {
|
||||
|
||||
@@ -82,6 +82,23 @@
|
||||
__dbSort( <(f)>, { <(fields)> }, ;
|
||||
<{for}>, <{while}>, <next>, <rec>, <.rest.> )
|
||||
|
||||
/* --- console output ---
|
||||
LIST emits every record matching the filter; DISPLAY without ALL
|
||||
shows just the current record. Both share __dbList — lAll
|
||||
distinguishes them. TO PRINTER / TO FILE accepted but unused;
|
||||
stdout is the only sink for now. */
|
||||
#command LIST [<v,...>] [<off:OFF>] ;
|
||||
[FOR <for>] [WHILE <while>] [NEXT <next>] ;
|
||||
[RECORD <rec>] [<rest:REST>] [ALL] => ;
|
||||
__dbList( <.off.>, { <{v}> }, .T., ;
|
||||
<{for}>, <{while}>, <next>, <rec>, <.rest.> )
|
||||
|
||||
#command DISPLAY [<v,...>] [<off:OFF>] ;
|
||||
[FOR <for>] [WHILE <while>] [NEXT <next>] ;
|
||||
[RECORD <rec>] [<rest:REST>] [<all:ALL>] => ;
|
||||
__dbList( <.off.>, { <{v}> }, <.all.>, ;
|
||||
<{for}>, <{while}>, <next>, <rec>, <.rest.> )
|
||||
|
||||
/* --- bulk maintenance --- */
|
||||
#command REINDEX => DbReindex()
|
||||
#command PACK => DbPack()
|
||||
|
||||
Reference in New Issue
Block a user