diff --git a/compiler/parser/parser.go b/compiler/parser/parser.go index a32f385..ccec679 100644 --- a/compiler/parser/parser.go +++ b/compiler/parser/parser.go @@ -1155,7 +1155,7 @@ func (p *Parser) parseIdentStmt() ast.Stmt { // rewritten by compiler/pp/std.ch into function calls before the // parser sees them. switch upper { - case "COPY", "SORT", "COUNT", "SUM", "AVERAGE", "TOTAL", "UPDATE", + case "COPY", "SORT", "TOTAL", "UPDATE", "LABEL", "REPORT", "ACCEPT", "INPUT", "JOIN", "RELEASE", "SAVE", "RESTORE", "DIR", "STORE", "NOTE", "TEXT", "ENDTEXT", diff --git a/compiler/pp/command.go b/compiler/pp/command.go index 4078ce6..ce2c177 100644 --- a/compiler/pp/command.go +++ b/compiler/pp/command.go @@ -304,9 +304,16 @@ func (r *Rule) matchPattern(line string) map[string]string { bodyEnd++ } body := patternWords[bodyStart:bodyEnd] + // Outer-pattern tail (everything after the matching `]`) is + // needed so a regular marker at the end of `body` knows where + // to stop capturing. Without this, `[TO ] [FOR ]` + // against `TO n FOR age >= 30` would let `` swallow the + // rest of the line because `body` itself has no literal that + // follows the marker. + outerTail := patternWords[bodyEnd+1:] for li < len(lineWords) { snapshotLi := li - iterCaps, newLi, ok := matchSegment(body, lineWords, li, r.CaseSens) + iterCaps, newLi, ok := matchSegment(body, lineWords, li, r.CaseSens, outerTail) if !ok { li = snapshotLi break @@ -380,7 +387,7 @@ func (r *Rule) matchPattern(line string) map[string]string { // A "mini-matcher" that mirrors the main loop for MarkerRegular and // literal keywords. MarkerList and MarkerWild inside `[...]` would // need additional plumbing; defer those until real patterns need them. -func matchSegment(segment, lineWords []string, startLi int, caseSens bool) (map[string]string, int, bool) { +func matchSegment(segment, lineWords []string, startLi int, caseSens bool, outerTail []string) (map[string]string, int, bool) { caps := make(map[string]string) li := startLi @@ -406,12 +413,20 @@ func matchSegment(segment, lineWords []string, startLi int, caseSens bool) (map[ return nil, startLi, false } // Build a pseudo-pattern tail so captureExpression picks the - // right delimiter. If there's a next literal inside `segment`, - // use it; otherwise fall back to the repeat boundary so the - // capture stops before the next iteration starts. + // right delimiter. Priority: + // 1. Next literal inside the same segment. + // 2. First literal in the outer-pattern tail — this is what + // stops `[TO ] [FOR ]` from letting `` swallow + // the FOR clause. + // 3. Repeat boundary (the segment's leading literal) so a + // multi-iteration capture stops before the next iter. tail := segment[pi+1:] - if !hasLiteralAfter(tail) && repeatBoundary != "" { - tail = []string{repeatBoundary} + if !hasLiteralAfter(tail) { + if outerLit := firstLiteral(outerTail); outerLit != "" { + tail = []string{outerLit} + } else if repeatBoundary != "" { + tail = []string{repeatBoundary} + } } captured := captureExpression(lineWords, &li, tail, 0, caseSens) caps[m.Name] = captured @@ -425,6 +440,22 @@ func matchSegment(segment, lineWords []string, startLi int, caseSens bool) (map[ return caps, li, true } +// firstLiteral returns the first non-marker, non-bracket token in pw, +// or "" if none. Used to give matchSegment a stop-boundary drawn from +// the outer pattern when its body ends in a regular marker. +func firstLiteral(pw []string) string { + for _, w := range pw { + if w == "[" || w == "]" || w == "" { + continue + } + if strings.HasPrefix(w, "<") && strings.HasSuffix(w, ">") { + continue + } + return w + } + return "" +} + // hasLiteralAfter reports whether a pattern slice contains any literal // keyword token (non-marker, non-bracket) — used to decide whether a // marker's capture has a real delimiter or needs a synthetic one. @@ -510,16 +541,58 @@ func (r *Rule) applyResult(captures map[string]string) string { } else { result = strings.ReplaceAll(result, "<."+name+".>", ".F.") } + // <{name}> — blockify: wrap captured expression in {|| ... }. + // Empty capture → NIL so the call site sees a nil block, matching + // how Harbour's std.ch expects __dbLocate / dbEval to interpret a + // missing FOR/WHILE clause. + if val != "" { + result = strings.ReplaceAll(result, "<{"+name+"}>", "{|| "+val+" }") + } else { + result = strings.ReplaceAll(result, "<{"+name+"}>", "NIL") + } // — bare substitution (must be LAST, after all wrappers). result = strings.ReplaceAll(result, "<"+name+">", val) } + // Any `<{name}>` still in the template means `name` was never + // captured — emit NIL so call sites see a missing block argument + // (matches Harbour: empty FOR/WHILE → NIL → bypass the condition). + result = replaceUnreferencedBlockify(result) + // Clean up unreferenced markers: , <(name)>, <.name.>, #, <"name"> result = cleanUnreferencedMarkers(result) return result } +// replaceUnreferencedBlockify rewrites every remaining `<{ident}>` to +// NIL. Run after the main substitution loop, before the generic +// unreferenced-marker cleanup. +func replaceUnreferencedBlockify(s string) string { + var out strings.Builder + i := 0 + for i < len(s) { + if i+2 < len(s) && s[i] == '<' && s[i+1] == '{' { + j := i + 2 + // Identifier + if j < len(s) && (s[j] == '_' || (s[j] >= 'a' && s[j] <= 'z') || (s[j] >= 'A' && s[j] <= 'Z')) { + j++ + for j < len(s) && (s[j] == '_' || (s[j] >= 'a' && s[j] <= 'z') || (s[j] >= 'A' && s[j] <= 'Z') || (s[j] >= '0' && s[j] <= '9')) { + j++ + } + if j+1 < len(s) && s[j] == '}' && s[j+1] == '>' { + out.WriteString("NIL") + i = j + 2 + continue + } + } + } + out.WriteByte(s[i]) + i++ + } + return out.String() +} + // expandOptionalRepeat walks a result template and rewrites each top- // level `[ ... ]` block by examining the captures referenced inside: // @@ -737,8 +810,9 @@ func findMarkerEnd(s string, start int) int { return 0 } i := start + 1 - // Skip optional ( or . prefix - if i < len(s) && (s[i] == '(' || s[i] == '.' || s[i] == '"') { + // Skip optional ( or . or " or { prefix (smart-stringify, logify, + // stringify, blockify respectively) + if i < len(s) && (s[i] == '(' || s[i] == '.' || s[i] == '"' || s[i] == '{') { i++ } // Must start with letter or underscore (identifier) @@ -749,8 +823,8 @@ func findMarkerEnd(s string, start int) int { for i < len(s) && (s[i] >= 'a' && s[i] <= 'z' || s[i] >= 'A' && s[i] <= 'Z' || s[i] >= '0' && s[i] <= '9' || s[i] == '_') { i++ } - // Skip optional ) or . or " or ,... suffix - for i < len(s) && (s[i] == ')' || s[i] == '.' || s[i] == '"' || s[i] == ',' || s[i] == ' ') { + // Skip optional ) or . or " or } or , suffix + for i < len(s) && (s[i] == ')' || s[i] == '.' || s[i] == '"' || s[i] == '}' || s[i] == ',' || s[i] == ' ') { i++ } if i < len(s) && s[i] == '>' { diff --git a/compiler/pp/pp.go b/compiler/pp/pp.go index f4ad75e..767f2fa 100644 --- a/compiler/pp/pp.go +++ b/compiler/pp/pp.go @@ -97,7 +97,19 @@ func (pp *Preprocessor) processLines(filename, source string, depth int) string dumpStartLine := 0 // 1-based line where BEGINDUMP appeared var dumpLines []string // accumulate Go code lines - for i, line := range lines { + for i := 0; i < len(lines); i++ { + line := lines[i] + // `#command`/`#translate` directives that end with a trailing `;` + // continue on the next physical line — this is how harbour-core + // formats its std.ch rules. Join the continuation here so the + // directive parser sees one logical line. Only `#`-directives + // participate; user code uses `;` differently. + if t := strings.TrimSpace(line); strings.HasPrefix(t, "#") { + for strings.HasSuffix(strings.TrimRight(line, " \t"), ";") && i+1 < len(lines) { + line = strings.TrimRight(line, " \t;") + " " + strings.TrimSpace(lines[i+1]) + i++ + } + } // Handle #pragma BEGINDUMP ... ENDDUMP (inline Go code blocks) if inPragmaDump { trimCheck := strings.TrimSpace(line) diff --git a/compiler/pp/std.ch b/compiler/pp/std.ch index 7d85c96..52f30d0 100644 --- a/compiler/pp/std.ch +++ b/compiler/pp/std.ch @@ -40,6 +40,28 @@ #command CONTINUE => __dbContinue() +/* --- analytical (no extra RTL — just dbEval) --- + These mirror Harbour's std.ch but use single-value forms. Multi- + expression SUM/AVERAGE (`SUM x, y TO sx, sy`) use optional-repeat + syntax in Harbour and can be added here once a real test exercises + the more elaborate form. */ +#command COUNT [TO ] [FOR ] [WHILE ] ; + [NEXT ] [RECORD ] [] [ALL] => ; + := 0 ; dbEval( {|| := + 1 }, ; + <{for}>, <{while}>, , , <.rest.> ) + +#command SUM TO ; + [FOR ] [WHILE ] [NEXT ] ; + [RECORD ] [] [ALL] => ; + := 0 ; dbEval( {|| := + }, ; + <{for}>, <{while}>, , , <.rest.> ) + +#command AVERAGE TO ; + [FOR ] [WHILE ] [NEXT ] ; + [RECORD ] [] [ALL] => ; + := __dbAverage( <{x}>, ; + <{for}>, <{while}>, , , <.rest.> ) + /* --- bulk maintenance --- */ #command REINDEX => DbReindex() #command PACK => DbPack() diff --git a/hbrtl/database.go b/hbrtl/database.go index 376804a..dce125d 100644 --- a/hbrtl/database.go +++ b/hbrtl/database.go @@ -679,6 +679,93 @@ func rtlDbContinue(t *hbrt.Thread) { t.RetBool(found) } +// rtlDbAverage implements __dbAverage(bExpr, bFor, bWhile, nNext, nRec, +// lRest) — sum the expression over visible records and return the +// arithmetic mean. Returns 0 when the loop visits no records (mirrors +// Harbour's idiom of avoiding a divide-by-zero in the expansion). +// +// Used by `AVERAGE TO ` in std.ch. +func rtlDbAverage(t *hbrt.Thread) { + nParams := t.ParamCount() + t.Frame(nParams, 0) + defer t.EndProcFast() + + wam := getWA(t) + if wam == nil { + t.RetDouble(0, 10, 2) + return + } + area := wam.Current() + if area == nil { + t.RetDouble(0, 10, 2) + return + } + + bExpr := t.Local(1) + if !bExpr.IsBlock() { + t.RetDouble(0, 10, 2) + return + } + var bFor, bWhile hbrt.Value + if nParams >= 2 { + bFor = t.Local(2) + } + if nParams >= 3 { + bWhile = t.Local(3) + } + nCount := -1 + if nParams >= 4 && !t.Local(4).IsNil() { + nCount = t.Local(4).AsInt() + } + if nParams >= 5 && !t.Local(5).IsNil() { + area.GoTo(uint32(t.Local(5).AsInt())) + } + lRest := false + if nParams >= 6 && !t.Local(6).IsNil() { + lRest = t.Local(6).AsBool() + } + if !lRest && (nParams < 5 || t.Local(5).IsNil()) { + area.GoTop() + } + + sum := 0.0 + n := 0 + scanned := 0 + for !area.EOF() { + if nCount >= 0 && scanned >= nCount { + break + } + // WHILE + if bWhile.IsBlock() { + t.PendingParams2(0) + bWhile.AsBlock().Fn(t) + if !t.GetRetValue().AsBool() { + break + } + } + // FOR + eval := true + if bFor.IsBlock() { + t.PendingParams2(0) + bFor.AsBlock().Fn(t) + eval = t.GetRetValue().AsBool() + } + if eval { + t.PendingParams2(0) + bExpr.AsBlock().Fn(t) + sum += t.GetRetValue().AsNumDouble() + n++ + } + area.Skip(1) + scanned++ + } + if n == 0 { + t.RetDouble(0, 10, 2) + return + } + t.RetDouble(sum/float64(n), 10, 2) +} + // --- DBSETFILTER / DBCLEARFILTER / DBFILTER --- // DBSETFILTER(bCondition [, cCondition]) diff --git a/hbrtl/register.go b/hbrtl/register.go index 9a7945f..0cef4bb 100644 --- a/hbrtl/register.go +++ b/hbrtl/register.go @@ -198,6 +198,7 @@ func RegisterRTL(vm *hbrt.VM) { hbrt.Sym("DBLOCATE", hbrt.FsPublic, rtlDbLocate), hbrt.Sym("__DBLOCATE", hbrt.FsPublic, rtlDbLocate), hbrt.Sym("__DBCONTINUE", hbrt.FsPublic, rtlDbContinue), + hbrt.Sym("__DBAVERAGE", hbrt.FsPublic, rtlDbAverage), hbrt.Sym("DBSETFILTER", hbrt.FsPublic, rtlDbSetFilter), hbrt.Sym("DBCLEARFILTER", hbrt.FsPublic, rtlDbClearFilter), hbrt.Sym("DBFILTER", hbrt.FsPublic, rtlDbFilter),