feat(pp): Phase B — COUNT / SUM / AVERAGE via std.ch

Three xBase analytical commands that were silent no-ops in the
parser now execute as Harbour-style PP rewrites:

  COUNT [TO <v>]   [FOR <for>] [WHILE <while>] ... -> dbEval()
  SUM <x> TO <v>   [FOR <for>] [WHILE <while>] ... -> dbEval()
  AVERAGE <x> TO <v> [FOR ...]                     -> __dbAverage()

COUNT and SUM expand to a `<v> := 0 ; dbEval( {|| ... } )` pair
matching harbour-core/include/std.ch verbatim. AVERAGE delegates to
a new RTL function rtlDbAverage (sum + count + divide; returns 0 on
empty match) — the chained-private-variable trick Harbour uses to
keep AVERAGE inline doesn't translate cleanly through Five's PP.

Wiring up these rules surfaced four PP issues that had to be fixed
for the rewrite to even reach the parser:

  * Result template did not implement <{name}> blockify. So a rule
    body like `{|| x := x + <x> }, <{for}>` left the literal text
    `<{for}>` in the output. Added blockify substitution: captured
    -> `{|| <captured> }`, missing -> NIL.
  * findMarkerEnd did not recognise `{`/`}` so unreferenced
    blockify markers were not cleaned up either. Added `{`/`}` to
    its prefix/suffix sets.
  * Optional-clause matching had no view of the outer pattern, so a
    regular marker at the end of `[TO <v>]` would swallow the rest
    of the line — `COUNT TO n FOR x>5` captured `<v>` as
    "n FOR x>5". matchSegment now takes outerTail and stops at its
    first literal.
  * `#command` directives could not span multiple physical lines.
    A trailing `;` is harbour-core's line-continuation marker for
    std.ch and now joins the next line into the directive before
    parsing.

Parser cleanup: COUNT, SUM, AVERAGE removed from the IDENT-statement
no-op switch in parseIdentStmt + parseExprStmt. The remaining xBase
verbs (COPY, SORT, TOTAL, JOIN, LIST, DISPLAY, LABEL, REPORT, ...)
stay in the parser until their RTL backends arrive.

Gates green:
  go test ./...      : PASS
  FiveSql2 SQL:1999  : 43/43
  Harbour compat     : 56/56

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-04-30 14:11:20 +09:00
parent c4f85f494c
commit c2e7f7ea27
6 changed files with 209 additions and 13 deletions

View File

@@ -1155,7 +1155,7 @@ func (p *Parser) parseIdentStmt() ast.Stmt {
// rewritten by compiler/pp/std.ch into function calls before the
// parser sees them.
switch upper {
case "COPY", "SORT", "COUNT", "SUM", "AVERAGE", "TOTAL", "UPDATE",
case "COPY", "SORT", "TOTAL", "UPDATE",
"LABEL", "REPORT", "ACCEPT", "INPUT",
"JOIN", "RELEASE", "SAVE", "RESTORE",
"DIR", "STORE", "NOTE", "TEXT", "ENDTEXT",

View File

@@ -304,9 +304,16 @@ func (r *Rule) matchPattern(line string) map[string]string {
bodyEnd++
}
body := patternWords[bodyStart:bodyEnd]
// Outer-pattern tail (everything after the matching `]`) is
// needed so a regular marker at the end of `body` knows where
// to stop capturing. Without this, `[TO <v>] [FOR <for>]`
// against `TO n FOR age >= 30` would let `<v>` swallow the
// rest of the line because `body` itself has no literal that
// follows the marker.
outerTail := patternWords[bodyEnd+1:]
for li < len(lineWords) {
snapshotLi := li
iterCaps, newLi, ok := matchSegment(body, lineWords, li, r.CaseSens)
iterCaps, newLi, ok := matchSegment(body, lineWords, li, r.CaseSens, outerTail)
if !ok {
li = snapshotLi
break
@@ -380,7 +387,7 @@ func (r *Rule) matchPattern(line string) map[string]string {
// A "mini-matcher" that mirrors the main loop for MarkerRegular and
// literal keywords. MarkerList and MarkerWild inside `[...]` would
// need additional plumbing; defer those until real patterns need them.
func matchSegment(segment, lineWords []string, startLi int, caseSens bool) (map[string]string, int, bool) {
func matchSegment(segment, lineWords []string, startLi int, caseSens bool, outerTail []string) (map[string]string, int, bool) {
caps := make(map[string]string)
li := startLi
@@ -406,12 +413,20 @@ func matchSegment(segment, lineWords []string, startLi int, caseSens bool) (map[
return nil, startLi, false
}
// Build a pseudo-pattern tail so captureExpression picks the
// right delimiter. If there's a next literal inside `segment`,
// use it; otherwise fall back to the repeat boundary so the
// capture stops before the next iteration starts.
// right delimiter. Priority:
// 1. Next literal inside the same segment.
// 2. First literal in the outer-pattern tail — this is what
// stops `[TO <v>] [FOR <for>]` from letting `<v>` swallow
// the FOR clause.
// 3. Repeat boundary (the segment's leading literal) so a
// multi-iteration capture stops before the next iter.
tail := segment[pi+1:]
if !hasLiteralAfter(tail) && repeatBoundary != "" {
tail = []string{repeatBoundary}
if !hasLiteralAfter(tail) {
if outerLit := firstLiteral(outerTail); outerLit != "" {
tail = []string{outerLit}
} else if repeatBoundary != "" {
tail = []string{repeatBoundary}
}
}
captured := captureExpression(lineWords, &li, tail, 0, caseSens)
caps[m.Name] = captured
@@ -425,6 +440,22 @@ func matchSegment(segment, lineWords []string, startLi int, caseSens bool) (map[
return caps, li, true
}
// firstLiteral returns the first non-marker, non-bracket token in pw,
// or "" if none. Used to give matchSegment a stop-boundary drawn from
// the outer pattern when its body ends in a regular marker.
func firstLiteral(pw []string) string {
for _, w := range pw {
if w == "[" || w == "]" || w == "" {
continue
}
if strings.HasPrefix(w, "<") && strings.HasSuffix(w, ">") {
continue
}
return w
}
return ""
}
// hasLiteralAfter reports whether a pattern slice contains any literal
// keyword token (non-marker, non-bracket) — used to decide whether a
// marker's capture has a real delimiter or needs a synthetic one.
@@ -510,16 +541,58 @@ func (r *Rule) applyResult(captures map[string]string) string {
} else {
result = strings.ReplaceAll(result, "<."+name+".>", ".F.")
}
// <{name}> — blockify: wrap captured expression in {|| ... }.
// Empty capture → NIL so the call site sees a nil block, matching
// how Harbour's std.ch expects __dbLocate / dbEval to interpret a
// missing FOR/WHILE clause.
if val != "" {
result = strings.ReplaceAll(result, "<{"+name+"}>", "{|| "+val+" }")
} else {
result = strings.ReplaceAll(result, "<{"+name+"}>", "NIL")
}
// <name> — bare substitution (must be LAST, after all wrappers).
result = strings.ReplaceAll(result, "<"+name+">", val)
}
// Any `<{name}>` still in the template means `name` was never
// captured — emit NIL so call sites see a missing block argument
// (matches Harbour: empty FOR/WHILE → NIL → bypass the condition).
result = replaceUnreferencedBlockify(result)
// Clean up unreferenced markers: <name>, <(name)>, <.name.>, #<name>, <"name">
result = cleanUnreferencedMarkers(result)
return result
}
// replaceUnreferencedBlockify rewrites every remaining `<{ident}>` to
// NIL. Run after the main substitution loop, before the generic
// unreferenced-marker cleanup.
func replaceUnreferencedBlockify(s string) string {
var out strings.Builder
i := 0
for i < len(s) {
if i+2 < len(s) && s[i] == '<' && s[i+1] == '{' {
j := i + 2
// Identifier
if j < len(s) && (s[j] == '_' || (s[j] >= 'a' && s[j] <= 'z') || (s[j] >= 'A' && s[j] <= 'Z')) {
j++
for j < len(s) && (s[j] == '_' || (s[j] >= 'a' && s[j] <= 'z') || (s[j] >= 'A' && s[j] <= 'Z') || (s[j] >= '0' && s[j] <= '9')) {
j++
}
if j+1 < len(s) && s[j] == '}' && s[j+1] == '>' {
out.WriteString("NIL")
i = j + 2
continue
}
}
}
out.WriteByte(s[i])
i++
}
return out.String()
}
// expandOptionalRepeat walks a result template and rewrites each top-
// level `[ ... ]` block by examining the captures referenced inside:
//
@@ -737,8 +810,9 @@ func findMarkerEnd(s string, start int) int {
return 0
}
i := start + 1
// Skip optional ( or . prefix
if i < len(s) && (s[i] == '(' || s[i] == '.' || s[i] == '"') {
// Skip optional ( or . or " or { prefix (smart-stringify, logify,
// stringify, blockify respectively)
if i < len(s) && (s[i] == '(' || s[i] == '.' || s[i] == '"' || s[i] == '{') {
i++
}
// Must start with letter or underscore (identifier)
@@ -749,8 +823,8 @@ func findMarkerEnd(s string, start int) int {
for i < len(s) && (s[i] >= 'a' && s[i] <= 'z' || s[i] >= 'A' && s[i] <= 'Z' || s[i] >= '0' && s[i] <= '9' || s[i] == '_') {
i++
}
// Skip optional ) or . or " or ,... suffix
for i < len(s) && (s[i] == ')' || s[i] == '.' || s[i] == '"' || s[i] == ',' || s[i] == ' ') {
// Skip optional ) or . or " or } or , suffix
for i < len(s) && (s[i] == ')' || s[i] == '.' || s[i] == '"' || s[i] == '}' || s[i] == ',' || s[i] == ' ') {
i++
}
if i < len(s) && s[i] == '>' {

View File

@@ -97,7 +97,19 @@ func (pp *Preprocessor) processLines(filename, source string, depth int) string
dumpStartLine := 0 // 1-based line where BEGINDUMP appeared
var dumpLines []string // accumulate Go code lines
for i, line := range lines {
for i := 0; i < len(lines); i++ {
line := lines[i]
// `#command`/`#translate` directives that end with a trailing `;`
// continue on the next physical line — this is how harbour-core
// formats its std.ch rules. Join the continuation here so the
// directive parser sees one logical line. Only `#`-directives
// participate; user code uses `;` differently.
if t := strings.TrimSpace(line); strings.HasPrefix(t, "#") {
for strings.HasSuffix(strings.TrimRight(line, " \t"), ";") && i+1 < len(lines) {
line = strings.TrimRight(line, " \t;") + " " + strings.TrimSpace(lines[i+1])
i++
}
}
// Handle #pragma BEGINDUMP ... ENDDUMP (inline Go code blocks)
if inPragmaDump {
trimCheck := strings.TrimSpace(line)

View File

@@ -40,6 +40,28 @@
#command CONTINUE => __dbContinue()
/* --- analytical (no extra RTL — just dbEval) ---
These mirror Harbour's std.ch but use single-value forms. Multi-
expression SUM/AVERAGE (`SUM x, y TO sx, sy`) use optional-repeat
syntax in Harbour and can be added here once a real test exercises
the more elaborate form. */
#command COUNT [TO <v>] [FOR <for>] [WHILE <while>] ;
[NEXT <next>] [RECORD <rec>] [<rest:REST>] [ALL] => ;
<v> := 0 ; dbEval( {|| <v> := <v> + 1 }, ;
<{for}>, <{while}>, <next>, <rec>, <.rest.> )
#command SUM <x> TO <v> ;
[FOR <for>] [WHILE <while>] [NEXT <next>] ;
[RECORD <rec>] [<rest:REST>] [ALL] => ;
<v> := 0 ; dbEval( {|| <v> := <v> + <x> }, ;
<{for}>, <{while}>, <next>, <rec>, <.rest.> )
#command AVERAGE <x> TO <v> ;
[FOR <for>] [WHILE <while>] [NEXT <next>] ;
[RECORD <rec>] [<rest:REST>] [ALL] => ;
<v> := __dbAverage( <{x}>, ;
<{for}>, <{while}>, <next>, <rec>, <.rest.> )
/* --- bulk maintenance --- */
#command REINDEX => DbReindex()
#command PACK => DbPack()

View File

@@ -679,6 +679,93 @@ func rtlDbContinue(t *hbrt.Thread) {
t.RetBool(found)
}
// rtlDbAverage implements __dbAverage(bExpr, bFor, bWhile, nNext, nRec,
// lRest) — sum the expression over visible records and return the
// arithmetic mean. Returns 0 when the loop visits no records (mirrors
// Harbour's idiom of avoiding a divide-by-zero in the expansion).
//
// Used by `AVERAGE <x> TO <v>` in std.ch.
func rtlDbAverage(t *hbrt.Thread) {
nParams := t.ParamCount()
t.Frame(nParams, 0)
defer t.EndProcFast()
wam := getWA(t)
if wam == nil {
t.RetDouble(0, 10, 2)
return
}
area := wam.Current()
if area == nil {
t.RetDouble(0, 10, 2)
return
}
bExpr := t.Local(1)
if !bExpr.IsBlock() {
t.RetDouble(0, 10, 2)
return
}
var bFor, bWhile hbrt.Value
if nParams >= 2 {
bFor = t.Local(2)
}
if nParams >= 3 {
bWhile = t.Local(3)
}
nCount := -1
if nParams >= 4 && !t.Local(4).IsNil() {
nCount = t.Local(4).AsInt()
}
if nParams >= 5 && !t.Local(5).IsNil() {
area.GoTo(uint32(t.Local(5).AsInt()))
}
lRest := false
if nParams >= 6 && !t.Local(6).IsNil() {
lRest = t.Local(6).AsBool()
}
if !lRest && (nParams < 5 || t.Local(5).IsNil()) {
area.GoTop()
}
sum := 0.0
n := 0
scanned := 0
for !area.EOF() {
if nCount >= 0 && scanned >= nCount {
break
}
// WHILE
if bWhile.IsBlock() {
t.PendingParams2(0)
bWhile.AsBlock().Fn(t)
if !t.GetRetValue().AsBool() {
break
}
}
// FOR
eval := true
if bFor.IsBlock() {
t.PendingParams2(0)
bFor.AsBlock().Fn(t)
eval = t.GetRetValue().AsBool()
}
if eval {
t.PendingParams2(0)
bExpr.AsBlock().Fn(t)
sum += t.GetRetValue().AsNumDouble()
n++
}
area.Skip(1)
scanned++
}
if n == 0 {
t.RetDouble(0, 10, 2)
return
}
t.RetDouble(sum/float64(n), 10, 2)
}
// --- DBSETFILTER / DBCLEARFILTER / DBFILTER ---
// DBSETFILTER(bCondition [, cCondition])

View File

@@ -198,6 +198,7 @@ func RegisterRTL(vm *hbrt.VM) {
hbrt.Sym("DBLOCATE", hbrt.FsPublic, rtlDbLocate),
hbrt.Sym("__DBLOCATE", hbrt.FsPublic, rtlDbLocate),
hbrt.Sym("__DBCONTINUE", hbrt.FsPublic, rtlDbContinue),
hbrt.Sym("__DBAVERAGE", hbrt.FsPublic, rtlDbAverage),
hbrt.Sym("DBSETFILTER", hbrt.FsPublic, rtlDbSetFilter),
hbrt.Sym("DBCLEARFILTER", hbrt.FsPublic, rtlDbClearFilter),
hbrt.Sym("DBFILTER", hbrt.FsPublic, rtlDbFilter),