feat(pp): Phase A — preprocessor std.ch as single source of truth
Introduce compiler/pp/std.ch with 19 #command rules so that ERASE,
RENAME, DELETE FILE, CLOSE [<a>|ALL|DATABASES], COMMIT, UNLOCK,
LOCATE/CONTINUE, REINDEX, PACK, ZAP, KEYBOARD, RUN, MENU TO, and
CLEAR GETS reach the parser pre-rewritten as plain function calls.
Embedded into the compiler binary via //go:embed so it auto-loads
without an explicit #include in user code, exactly the way Harbour
auto-loads its std.ch.
This is a pure dispatch move, not a behavior change for the
already-working forms: the same Five RTL functions get called.
But it does fix three regressions that the parser was masking:
* ERASE / RENAME / DELETE FILE used to be silent no-ops — the
parser swallowed the entire line and returned NIL. They now
actually delete/rename files (FErase / FRename).
* CLOSE <alias> used to silently ignore the alias and close the
current area. It now switches to the named area first
(<a>->( DbCloseArea() )).
* Two latent #command matcher bugs that surfaced while wiring
std.ch up:
- bare `CLOSE` would match rule `CLOSE ALL` because the tail
of the pattern wasn't checked for unconsumed literals.
- bare `CLOSE` would match rule `CLOSE <a>` because all
unconsumed pattern markers were unconditionally treated as
optional. They are only optional when nested inside `[...]`.
Parser cleanup: parseIdentStmt + parseExprStmt no longer hardcode
ERASE / RENAME / RUN / KEYBOARD / REINDEX / LOCATE / CONTINUE /
COMMIT / CLOSE — the rewriter handles them. Other xBase verbs
(COPY / SORT / COUNT / SUM / AVERAGE / TOTAL / JOIN / LIST /
DISPLAY / LABEL / REPORT / DIR ...) still no-op in the parser
because their RTL backends aren't implemented yet — once the
backends land they move into std.ch the same way.
Gates green:
go test ./... : PASS
FiveSql2 SQL:1999 : 43/43
Harbour compat : 56/56
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -1148,13 +1148,18 @@ func (p *Parser) parseIdentStmt() ast.Stmt {
|
||||
return p.parseWithTimeout()
|
||||
}
|
||||
|
||||
// xBase commands that consume entire line
|
||||
// xBase commands that consume entire line. These are silent no-ops
|
||||
// for now — they have no RTL backend, so std.ch deliberately omits
|
||||
// rules for them. ERASE / RENAME / LOCATE / CONTINUE / COMMIT /
|
||||
// CLOSE / REINDEX / PACK / ZAP / UNLOCK / KEYBOARD / RUN are now
|
||||
// rewritten by compiler/pp/std.ch into function calls before the
|
||||
// parser sees them.
|
||||
switch upper {
|
||||
case "COPY", "SORT", "COUNT", "SUM", "AVERAGE", "TOTAL", "UPDATE",
|
||||
"LABEL", "REPORT", "ACCEPT", "INPUT", "LOCATE", "CONTINUE",
|
||||
"JOIN", "RELEASE", "SAVE", "RESTORE", "ERASE", "RENAME",
|
||||
"RUN", "DIR", "STORE", "NOTE", "TEXT", "ENDTEXT",
|
||||
"WITH", "KEYBOARD", "CLEAR", "DISPLAY", "LIST", "REINDEX":
|
||||
"LABEL", "REPORT", "ACCEPT", "INPUT",
|
||||
"JOIN", "RELEASE", "SAVE", "RESTORE",
|
||||
"DIR", "STORE", "NOTE", "TEXT", "ENDTEXT",
|
||||
"WITH", "CLEAR", "DISPLAY", "LIST":
|
||||
p.advance()
|
||||
for p.current.Kind != token.NEWLINE && p.current.Kind != token.EOF {
|
||||
p.advance()
|
||||
@@ -1162,13 +1167,6 @@ func (p *Parser) parseIdentStmt() ast.Stmt {
|
||||
p.expectEndOfStmt()
|
||||
return &ast.ExprStmt{X: &ast.LiteralExpr{Kind: token.NIL_LIT, Value: "NIL"}}
|
||||
|
||||
case "COMMIT":
|
||||
p.advance()
|
||||
p.expectEndOfStmt()
|
||||
return &ast.ExprStmt{X: &ast.CallExpr{
|
||||
Func: &ast.IdentExpr{Name: "DbCommit"},
|
||||
}}
|
||||
|
||||
case "FIVE_GODUMP__":
|
||||
// GoDump is a Decl, wrap as ExprStmt for statement context
|
||||
p.advance() // consume FIVE_GODUMP__
|
||||
@@ -1211,19 +1209,10 @@ func (p *Parser) parseExprStmt() ast.Stmt {
|
||||
if p.current.Kind == token.IDENT && p.currentUpper() == "TRY" {
|
||||
return p.parseTryCatch()
|
||||
}
|
||||
// CLOSE [DATABASES|ALL] — close work areas
|
||||
if p.current.Kind == token.IDENT && p.currentUpper() == "CLOSE" {
|
||||
p.advance()
|
||||
// Skip optional DATABASES/ALL keyword
|
||||
if p.current.Kind == token.IDENT {
|
||||
p.advance()
|
||||
}
|
||||
p.expectEndOfStmt()
|
||||
return &ast.ExprStmt{X: &ast.CallExpr{
|
||||
Func: &ast.IdentExpr{Name: "DbCloseArea"},
|
||||
}}
|
||||
}
|
||||
// xBase commands that consume entire line (COPY, SORT, COUNT, SUM, etc.)
|
||||
// xBase commands that consume entire line — duplicate of the switch
|
||||
// in parseIdentStmt(). The keyword set is kept in sync; std.ch covers
|
||||
// ERASE/RENAME/LOCATE/CONTINUE/COMMIT/CLOSE/REINDEX/PACK/ZAP/UNLOCK/
|
||||
// KEYBOARD/RUN, so they're absent here.
|
||||
if p.current.Kind == token.IDENT {
|
||||
// WITH TIMEOUT n / body / ENDWITH
|
||||
if p.currentUpper() == "WITH" &&
|
||||
@@ -1232,10 +1221,10 @@ func (p *Parser) parseExprStmt() ast.Stmt {
|
||||
}
|
||||
switch p.currentUpper() {
|
||||
case "COPY", "SORT", "COUNT", "SUM", "AVERAGE", "TOTAL", "UPDATE",
|
||||
"LABEL", "REPORT", "ACCEPT", "INPUT", "LOCATE", "CONTINUE",
|
||||
"JOIN", "RELEASE", "SAVE", "RESTORE", "ERASE", "RENAME",
|
||||
"RUN", "DIR", "STORE", "NOTE", "TEXT", "ENDTEXT",
|
||||
"WITH", "KEYBOARD", "CLEAR", "DISPLAY", "LIST", "REINDEX":
|
||||
"LABEL", "REPORT", "ACCEPT", "INPUT",
|
||||
"JOIN", "RELEASE", "SAVE", "RESTORE",
|
||||
"DIR", "STORE", "NOTE", "TEXT", "ENDTEXT",
|
||||
"WITH", "CLEAR", "DISPLAY", "LIST":
|
||||
// Consume entire line — these are complex multi-word commands
|
||||
p.advance()
|
||||
for p.current.Kind != token.NEWLINE && p.current.Kind != token.EOF {
|
||||
@@ -1246,14 +1235,6 @@ func (p *Parser) parseExprStmt() ast.Stmt {
|
||||
}
|
||||
}
|
||||
|
||||
// COMMIT — flush work area
|
||||
if p.current.Kind == token.IDENT && p.currentUpper() == "COMMIT" {
|
||||
p.advance()
|
||||
p.expectEndOfStmt()
|
||||
return &ast.ExprStmt{X: &ast.CallExpr{
|
||||
Func: &ast.IdentExpr{Name: "DbCommit"},
|
||||
}}
|
||||
}
|
||||
expr := p.parseExpr()
|
||||
|
||||
// ch <- value (channel send)
|
||||
|
||||
@@ -336,14 +336,29 @@ func (r *Rule) matchPattern(line string) map[string]string {
|
||||
}
|
||||
}
|
||||
|
||||
// Skip remaining optional markers in pattern
|
||||
// Walk any tail of the pattern that wasn't matched against the
|
||||
// line. We accept it only if everything that remains is *optional*
|
||||
// — i.e. a `[...]` block (which by definition can be absent) or
|
||||
// markers/literals that are nested inside one. A bare `<a>` or a
|
||||
// literal token outside of brackets is required, so encountering
|
||||
// one means the pattern isn't satisfied: bare `CLOSE` must not
|
||||
// match rule `CLOSE <a>`.
|
||||
depth := 0
|
||||
for pi < len(patternWords) {
|
||||
pw := patternWords[pi]
|
||||
if pw == "[" || pw == "]" || (strings.HasPrefix(pw, "<") && strings.HasSuffix(pw, ">")) {
|
||||
pi++
|
||||
} else {
|
||||
break
|
||||
switch {
|
||||
case pw == "[":
|
||||
depth++
|
||||
case pw == "]":
|
||||
if depth > 0 {
|
||||
depth--
|
||||
}
|
||||
default:
|
||||
if depth == 0 {
|
||||
return nil
|
||||
}
|
||||
}
|
||||
pi++
|
||||
}
|
||||
|
||||
// For #command with no markers and no optional clauses:
|
||||
|
||||
@@ -16,12 +16,21 @@
|
||||
package pp
|
||||
|
||||
import (
|
||||
_ "embed"
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// embeddedStdCh is include/std.ch baked into the compiler binary so
|
||||
// xBase commands like ERASE, RENAME, COMMIT, LOCATE, ... reach the
|
||||
// parser already rewritten as plain function calls. Equivalent to
|
||||
// Harbour's auto-included std.ch.
|
||||
//
|
||||
//go:embed std.ch
|
||||
var embeddedStdCh string
|
||||
|
||||
// Preprocessor processes source code before lexing.
|
||||
type Preprocessor struct {
|
||||
defines map[string]string // #define name → value
|
||||
@@ -43,21 +52,15 @@ func New() *Preprocessor {
|
||||
return pp
|
||||
}
|
||||
|
||||
// addStdRules registers built-in #command rules equivalent to Harbour's std.ch.
|
||||
// addStdRules registers built-in #command rules from the embedded
|
||||
// std.ch file. processLines walks the directives and stores #command
|
||||
// entries in pp.commands as a side effect; we discard its output.
|
||||
//
|
||||
// Anything not safely expressible as a #command (e.g. parser-handled
|
||||
// constructs like @ SAY/GET, READ, TRY/CATCH, WITH TIMEOUT) is left
|
||||
// to the parser.
|
||||
func (pp *Preprocessor) addStdRules() {
|
||||
stdCommands := []string{
|
||||
// MENU TO
|
||||
`MENU TO <var> => <var> := __MenuTo(<var>)`,
|
||||
// CLEAR GETS
|
||||
`CLEAR GETS => GetList := {}`,
|
||||
// Note: @ SAY, @ GET, @ PROMPT, READ are handled by the parser directly.
|
||||
// @ PROMPT rules removed — parser handles them with proper token parsing.
|
||||
}
|
||||
for _, cmd := range stdCommands {
|
||||
if rule := ParseRule(cmd, true, false); rule != nil {
|
||||
pp.commands = append(pp.commands, rule)
|
||||
}
|
||||
}
|
||||
pp.processLines("std.ch", embeddedStdCh, 0)
|
||||
}
|
||||
|
||||
// AddIncludeDir adds a directory to search for #include files.
|
||||
|
||||
54
compiler/pp/std.ch
Normal file
54
compiler/pp/std.ch
Normal file
@@ -0,0 +1,54 @@
|
||||
/*
|
||||
* std.ch — Five standard preprocessor rules
|
||||
*
|
||||
* Equivalent to harbour-core/include/std.ch. Translates xBase legacy
|
||||
* commands into function calls so the parser does not have to know
|
||||
* about them. Auto-loaded by compiler/pp at startup.
|
||||
*
|
||||
* Phase A: only rules whose backend RTL function already exists in
|
||||
* Five. Rules whose backend is not yet implemented (COPY, SORT,
|
||||
* COUNT, SUM, AVERAGE, TOTAL, JOIN, LIST, DISPLAY, LABEL, REPORT,
|
||||
* DIR) are deliberately NOT included here — the parser still handles
|
||||
* them as silent no-ops until their RTL backend lands.
|
||||
*
|
||||
* Copyright (c) 2026 Charles KWON OhJun (charleskwonohjun@gmail.com)
|
||||
* All rights reserved.
|
||||
*/
|
||||
|
||||
/* --- file system --- */
|
||||
#command ERASE <(f)> => FErase(<(f)>)
|
||||
#command DELETE FILE <(f)> => FErase(<(f)>)
|
||||
#command RENAME <(s)> TO <(d)> => FRename(<(s)>, <(d)>)
|
||||
|
||||
/* --- workarea lifecycle ---
|
||||
Order matters: literal-keyword forms first, then bare CLOSE,
|
||||
then the alias-form last so it doesn't shadow the others. */
|
||||
#command CLOSE ALL => DbCloseAll()
|
||||
#command CLOSE DATABASES => DbCloseAll()
|
||||
#command CLOSE => DbCloseArea()
|
||||
#command CLOSE <a> => <a>->( DbCloseArea() )
|
||||
|
||||
/* --- record state --- */
|
||||
#command COMMIT => DbCommit()
|
||||
#command UNLOCK ALL => DbUnlock()
|
||||
#command UNLOCK => DbRUnlock()
|
||||
|
||||
/* --- record search --- */
|
||||
#command LOCATE [FOR <for>] [WHILE <while>] ;
|
||||
[NEXT <next>] [RECORD <rec>] [<rest:REST>] [ALL] => ;
|
||||
__dbLocate(<{for}>, <{while}>, <next>, <rec>, <.rest.>)
|
||||
|
||||
#command CONTINUE => __dbContinue()
|
||||
|
||||
/* --- bulk maintenance --- */
|
||||
#command REINDEX => DbReindex()
|
||||
#command PACK => DbPack()
|
||||
#command ZAP => DbZap()
|
||||
|
||||
/* --- input / shell --- */
|
||||
#command KEYBOARD <text> => Keyboard(<text>)
|
||||
#command RUN <*cmd*> => hb_Run(<(cmd)>)
|
||||
|
||||
/* --- legacy GET system --- */
|
||||
#command MENU TO <var> => <var> := __MenuTo(<var>)
|
||||
#command CLEAR GETS => GetList := {}
|
||||
Reference in New Issue
Block a user