feat(pp): Phase A — preprocessor std.ch as single source of truth

Introduce compiler/pp/std.ch with 19 #command rules so that ERASE,
RENAME, DELETE FILE, CLOSE [<a>|ALL|DATABASES], COMMIT, UNLOCK,
LOCATE/CONTINUE, REINDEX, PACK, ZAP, KEYBOARD, RUN, MENU TO, and
CLEAR GETS reach the parser pre-rewritten as plain function calls.
Embedded into the compiler binary via //go:embed so it auto-loads
without an explicit #include in user code, exactly the way Harbour
auto-loads its std.ch.

This is a pure dispatch move, not a behavior change for the
already-working forms: the same Five RTL functions get called.
But it does fix three regressions that the parser was masking:

  * ERASE / RENAME / DELETE FILE used to be silent no-ops — the
    parser swallowed the entire line and returned NIL. They now
    actually delete/rename files (FErase / FRename).
  * CLOSE <alias> used to silently ignore the alias and close the
    current area. It now switches to the named area first
    (<a>->( DbCloseArea() )).
  * Two latent #command matcher bugs that surfaced while wiring
    std.ch up:
      - bare `CLOSE` would match rule `CLOSE ALL` because the tail
        of the pattern wasn't checked for unconsumed literals.
      - bare `CLOSE` would match rule `CLOSE <a>` because all
        unconsumed pattern markers were unconditionally treated as
        optional. They are only optional when nested inside `[...]`.

Parser cleanup: parseIdentStmt + parseExprStmt no longer hardcode
ERASE / RENAME / RUN / KEYBOARD / REINDEX / LOCATE / CONTINUE /
COMMIT / CLOSE — the rewriter handles them. Other xBase verbs
(COPY / SORT / COUNT / SUM / AVERAGE / TOTAL / JOIN / LIST /
DISPLAY / LABEL / REPORT / DIR ...) still no-op in the parser
because their RTL backends aren't implemented yet — once the
backends land they move into std.ch the same way.

Gates green:
  go test ./...      : PASS
  FiveSql2 SQL:1999  : 43/43
  Harbour compat     : 56/56

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-04-30 12:03:30 +09:00
parent f4ed42556b
commit c4f85f494c
4 changed files with 109 additions and 56 deletions

View File

@@ -1148,13 +1148,18 @@ func (p *Parser) parseIdentStmt() ast.Stmt {
return p.parseWithTimeout()
}
// xBase commands that consume entire line
// xBase commands that consume entire line. These are silent no-ops
// for now — they have no RTL backend, so std.ch deliberately omits
// rules for them. ERASE / RENAME / LOCATE / CONTINUE / COMMIT /
// CLOSE / REINDEX / PACK / ZAP / UNLOCK / KEYBOARD / RUN are now
// rewritten by compiler/pp/std.ch into function calls before the
// parser sees them.
switch upper {
case "COPY", "SORT", "COUNT", "SUM", "AVERAGE", "TOTAL", "UPDATE",
"LABEL", "REPORT", "ACCEPT", "INPUT", "LOCATE", "CONTINUE",
"JOIN", "RELEASE", "SAVE", "RESTORE", "ERASE", "RENAME",
"RUN", "DIR", "STORE", "NOTE", "TEXT", "ENDTEXT",
"WITH", "KEYBOARD", "CLEAR", "DISPLAY", "LIST", "REINDEX":
"LABEL", "REPORT", "ACCEPT", "INPUT",
"JOIN", "RELEASE", "SAVE", "RESTORE",
"DIR", "STORE", "NOTE", "TEXT", "ENDTEXT",
"WITH", "CLEAR", "DISPLAY", "LIST":
p.advance()
for p.current.Kind != token.NEWLINE && p.current.Kind != token.EOF {
p.advance()
@@ -1162,13 +1167,6 @@ func (p *Parser) parseIdentStmt() ast.Stmt {
p.expectEndOfStmt()
return &ast.ExprStmt{X: &ast.LiteralExpr{Kind: token.NIL_LIT, Value: "NIL"}}
case "COMMIT":
p.advance()
p.expectEndOfStmt()
return &ast.ExprStmt{X: &ast.CallExpr{
Func: &ast.IdentExpr{Name: "DbCommit"},
}}
case "FIVE_GODUMP__":
// GoDump is a Decl, wrap as ExprStmt for statement context
p.advance() // consume FIVE_GODUMP__
@@ -1211,19 +1209,10 @@ func (p *Parser) parseExprStmt() ast.Stmt {
if p.current.Kind == token.IDENT && p.currentUpper() == "TRY" {
return p.parseTryCatch()
}
// CLOSE [DATABASES|ALL] — close work areas
if p.current.Kind == token.IDENT && p.currentUpper() == "CLOSE" {
p.advance()
// Skip optional DATABASES/ALL keyword
if p.current.Kind == token.IDENT {
p.advance()
}
p.expectEndOfStmt()
return &ast.ExprStmt{X: &ast.CallExpr{
Func: &ast.IdentExpr{Name: "DbCloseArea"},
}}
}
// xBase commands that consume entire line (COPY, SORT, COUNT, SUM, etc.)
// xBase commands that consume entire line — duplicate of the switch
// in parseIdentStmt(). The keyword set is kept in sync; std.ch covers
// ERASE/RENAME/LOCATE/CONTINUE/COMMIT/CLOSE/REINDEX/PACK/ZAP/UNLOCK/
// KEYBOARD/RUN, so they're absent here.
if p.current.Kind == token.IDENT {
// WITH TIMEOUT n / body / ENDWITH
if p.currentUpper() == "WITH" &&
@@ -1232,10 +1221,10 @@ func (p *Parser) parseExprStmt() ast.Stmt {
}
switch p.currentUpper() {
case "COPY", "SORT", "COUNT", "SUM", "AVERAGE", "TOTAL", "UPDATE",
"LABEL", "REPORT", "ACCEPT", "INPUT", "LOCATE", "CONTINUE",
"JOIN", "RELEASE", "SAVE", "RESTORE", "ERASE", "RENAME",
"RUN", "DIR", "STORE", "NOTE", "TEXT", "ENDTEXT",
"WITH", "KEYBOARD", "CLEAR", "DISPLAY", "LIST", "REINDEX":
"LABEL", "REPORT", "ACCEPT", "INPUT",
"JOIN", "RELEASE", "SAVE", "RESTORE",
"DIR", "STORE", "NOTE", "TEXT", "ENDTEXT",
"WITH", "CLEAR", "DISPLAY", "LIST":
// Consume entire line — these are complex multi-word commands
p.advance()
for p.current.Kind != token.NEWLINE && p.current.Kind != token.EOF {
@@ -1246,14 +1235,6 @@ func (p *Parser) parseExprStmt() ast.Stmt {
}
}
// COMMIT — flush work area
if p.current.Kind == token.IDENT && p.currentUpper() == "COMMIT" {
p.advance()
p.expectEndOfStmt()
return &ast.ExprStmt{X: &ast.CallExpr{
Func: &ast.IdentExpr{Name: "DbCommit"},
}}
}
expr := p.parseExpr()
// ch <- value (channel send)

View File

@@ -336,14 +336,29 @@ func (r *Rule) matchPattern(line string) map[string]string {
}
}
// Skip remaining optional markers in pattern
// Walk any tail of the pattern that wasn't matched against the
// line. We accept it only if everything that remains is *optional*
// — i.e. a `[...]` block (which by definition can be absent) or
// markers/literals that are nested inside one. A bare `<a>` or a
// literal token outside of brackets is required, so encountering
// one means the pattern isn't satisfied: bare `CLOSE` must not
// match rule `CLOSE <a>`.
depth := 0
for pi < len(patternWords) {
pw := patternWords[pi]
if pw == "[" || pw == "]" || (strings.HasPrefix(pw, "<") && strings.HasSuffix(pw, ">")) {
pi++
} else {
break
switch {
case pw == "[":
depth++
case pw == "]":
if depth > 0 {
depth--
}
default:
if depth == 0 {
return nil
}
}
pi++
}
// For #command with no markers and no optional clauses:

View File

@@ -16,12 +16,21 @@
package pp
import (
_ "embed"
"fmt"
"os"
"path/filepath"
"strings"
)
// embeddedStdCh is include/std.ch baked into the compiler binary so
// xBase commands like ERASE, RENAME, COMMIT, LOCATE, ... reach the
// parser already rewritten as plain function calls. Equivalent to
// Harbour's auto-included std.ch.
//
//go:embed std.ch
var embeddedStdCh string
// Preprocessor processes source code before lexing.
type Preprocessor struct {
defines map[string]string // #define name → value
@@ -43,21 +52,15 @@ func New() *Preprocessor {
return pp
}
// addStdRules registers built-in #command rules equivalent to Harbour's std.ch.
// addStdRules registers built-in #command rules from the embedded
// std.ch file. processLines walks the directives and stores #command
// entries in pp.commands as a side effect; we discard its output.
//
// Anything not safely expressible as a #command (e.g. parser-handled
// constructs like @ SAY/GET, READ, TRY/CATCH, WITH TIMEOUT) is left
// to the parser.
func (pp *Preprocessor) addStdRules() {
stdCommands := []string{
// MENU TO
`MENU TO <var> => <var> := __MenuTo(<var>)`,
// CLEAR GETS
`CLEAR GETS => GetList := {}`,
// Note: @ SAY, @ GET, @ PROMPT, READ are handled by the parser directly.
// @ PROMPT rules removed — parser handles them with proper token parsing.
}
for _, cmd := range stdCommands {
if rule := ParseRule(cmd, true, false); rule != nil {
pp.commands = append(pp.commands, rule)
}
}
pp.processLines("std.ch", embeddedStdCh, 0)
}
// AddIncludeDir adds a directory to search for #include files.

54
compiler/pp/std.ch Normal file
View File

@@ -0,0 +1,54 @@
/*
* std.ch — Five standard preprocessor rules
*
* Equivalent to harbour-core/include/std.ch. Translates xBase legacy
* commands into function calls so the parser does not have to know
* about them. Auto-loaded by compiler/pp at startup.
*
* Phase A: only rules whose backend RTL function already exists in
* Five. Rules whose backend is not yet implemented (COPY, SORT,
* COUNT, SUM, AVERAGE, TOTAL, JOIN, LIST, DISPLAY, LABEL, REPORT,
* DIR) are deliberately NOT included here — the parser still handles
* them as silent no-ops until their RTL backend lands.
*
* Copyright (c) 2026 Charles KWON OhJun (charleskwonohjun@gmail.com)
* All rights reserved.
*/
/* --- file system --- */
#command ERASE <(f)> => FErase(<(f)>)
#command DELETE FILE <(f)> => FErase(<(f)>)
#command RENAME <(s)> TO <(d)> => FRename(<(s)>, <(d)>)
/* --- workarea lifecycle ---
Order matters: literal-keyword forms first, then bare CLOSE,
then the alias-form last so it doesn't shadow the others. */
#command CLOSE ALL => DbCloseAll()
#command CLOSE DATABASES => DbCloseAll()
#command CLOSE => DbCloseArea()
#command CLOSE <a> => <a>->( DbCloseArea() )
/* --- record state --- */
#command COMMIT => DbCommit()
#command UNLOCK ALL => DbUnlock()
#command UNLOCK => DbRUnlock()
/* --- record search --- */
#command LOCATE [FOR <for>] [WHILE <while>] ;
[NEXT <next>] [RECORD <rec>] [<rest:REST>] [ALL] => ;
__dbLocate(<{for}>, <{while}>, <next>, <rec>, <.rest.>)
#command CONTINUE => __dbContinue()
/* --- bulk maintenance --- */
#command REINDEX => DbReindex()
#command PACK => DbPack()
#command ZAP => DbZap()
/* --- input / shell --- */
#command KEYBOARD <text> => Keyboard(<text>)
#command RUN <*cmd*> => hb_Run(<(cmd)>)
/* --- legacy GET system --- */
#command MENU TO <var> => <var> := __MenuTo(<var>)
#command CLEAR GETS => GetList := {}