From c4f85f494c9d1a9c68cbd0cbc1296bceab8c5d20 Mon Sep 17 00:00:00 2001 From: CharlesKWON Date: Thu, 30 Apr 2026 12:03:30 +0900 Subject: [PATCH] =?UTF-8?q?feat(pp):=20Phase=20A=20=E2=80=94=20preprocesso?= =?UTF-8?q?r=20std.ch=20as=20single=20source=20of=20truth?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduce compiler/pp/std.ch with 19 #command rules so that ERASE, RENAME, DELETE FILE, CLOSE [|ALL|DATABASES], COMMIT, UNLOCK, LOCATE/CONTINUE, REINDEX, PACK, ZAP, KEYBOARD, RUN, MENU TO, and CLEAR GETS reach the parser pre-rewritten as plain function calls. Embedded into the compiler binary via //go:embed so it auto-loads without an explicit #include in user code, exactly the way Harbour auto-loads its std.ch. This is a pure dispatch move, not a behavior change for the already-working forms: the same Five RTL functions get called. But it does fix three regressions that the parser was masking: * ERASE / RENAME / DELETE FILE used to be silent no-ops — the parser swallowed the entire line and returned NIL. They now actually delete/rename files (FErase / FRename). * CLOSE used to silently ignore the alias and close the current area. It now switches to the named area first (->( DbCloseArea() )). * Two latent #command matcher bugs that surfaced while wiring std.ch up: - bare `CLOSE` would match rule `CLOSE ALL` because the tail of the pattern wasn't checked for unconsumed literals. - bare `CLOSE` would match rule `CLOSE ` because all unconsumed pattern markers were unconditionally treated as optional. They are only optional when nested inside `[...]`. Parser cleanup: parseIdentStmt + parseExprStmt no longer hardcode ERASE / RENAME / RUN / KEYBOARD / REINDEX / LOCATE / CONTINUE / COMMIT / CLOSE — the rewriter handles them. Other xBase verbs (COPY / SORT / COUNT / SUM / AVERAGE / TOTAL / JOIN / LIST / DISPLAY / LABEL / REPORT / DIR ...) still no-op in the parser because their RTL backends aren't implemented yet — once the backends land they move into std.ch the same way. Gates green: go test ./... : PASS FiveSql2 SQL:1999 : 43/43 Harbour compat : 56/56 Co-Authored-By: Claude Opus 4.7 (1M context) --- compiler/parser/parser.go | 55 +++++++++++++-------------------------- compiler/pp/command.go | 25 ++++++++++++++---- compiler/pp/pp.go | 31 ++++++++++++---------- compiler/pp/std.ch | 54 ++++++++++++++++++++++++++++++++++++++ 4 files changed, 109 insertions(+), 56 deletions(-) create mode 100644 compiler/pp/std.ch diff --git a/compiler/parser/parser.go b/compiler/parser/parser.go index 7438b48..a32f385 100644 --- a/compiler/parser/parser.go +++ b/compiler/parser/parser.go @@ -1148,13 +1148,18 @@ func (p *Parser) parseIdentStmt() ast.Stmt { return p.parseWithTimeout() } - // xBase commands that consume entire line + // xBase commands that consume entire line. These are silent no-ops + // for now — they have no RTL backend, so std.ch deliberately omits + // rules for them. ERASE / RENAME / LOCATE / CONTINUE / COMMIT / + // CLOSE / REINDEX / PACK / ZAP / UNLOCK / KEYBOARD / RUN are now + // rewritten by compiler/pp/std.ch into function calls before the + // parser sees them. switch upper { case "COPY", "SORT", "COUNT", "SUM", "AVERAGE", "TOTAL", "UPDATE", - "LABEL", "REPORT", "ACCEPT", "INPUT", "LOCATE", "CONTINUE", - "JOIN", "RELEASE", "SAVE", "RESTORE", "ERASE", "RENAME", - "RUN", "DIR", "STORE", "NOTE", "TEXT", "ENDTEXT", - "WITH", "KEYBOARD", "CLEAR", "DISPLAY", "LIST", "REINDEX": + "LABEL", "REPORT", "ACCEPT", "INPUT", + "JOIN", "RELEASE", "SAVE", "RESTORE", + "DIR", "STORE", "NOTE", "TEXT", "ENDTEXT", + "WITH", "CLEAR", "DISPLAY", "LIST": p.advance() for p.current.Kind != token.NEWLINE && p.current.Kind != token.EOF { p.advance() @@ -1162,13 +1167,6 @@ func (p *Parser) parseIdentStmt() ast.Stmt { p.expectEndOfStmt() return &ast.ExprStmt{X: &ast.LiteralExpr{Kind: token.NIL_LIT, Value: "NIL"}} - case "COMMIT": - p.advance() - p.expectEndOfStmt() - return &ast.ExprStmt{X: &ast.CallExpr{ - Func: &ast.IdentExpr{Name: "DbCommit"}, - }} - case "FIVE_GODUMP__": // GoDump is a Decl, wrap as ExprStmt for statement context p.advance() // consume FIVE_GODUMP__ @@ -1211,19 +1209,10 @@ func (p *Parser) parseExprStmt() ast.Stmt { if p.current.Kind == token.IDENT && p.currentUpper() == "TRY" { return p.parseTryCatch() } - // CLOSE [DATABASES|ALL] — close work areas - if p.current.Kind == token.IDENT && p.currentUpper() == "CLOSE" { - p.advance() - // Skip optional DATABASES/ALL keyword - if p.current.Kind == token.IDENT { - p.advance() - } - p.expectEndOfStmt() - return &ast.ExprStmt{X: &ast.CallExpr{ - Func: &ast.IdentExpr{Name: "DbCloseArea"}, - }} - } - // xBase commands that consume entire line (COPY, SORT, COUNT, SUM, etc.) + // xBase commands that consume entire line — duplicate of the switch + // in parseIdentStmt(). The keyword set is kept in sync; std.ch covers + // ERASE/RENAME/LOCATE/CONTINUE/COMMIT/CLOSE/REINDEX/PACK/ZAP/UNLOCK/ + // KEYBOARD/RUN, so they're absent here. if p.current.Kind == token.IDENT { // WITH TIMEOUT n / body / ENDWITH if p.currentUpper() == "WITH" && @@ -1232,10 +1221,10 @@ func (p *Parser) parseExprStmt() ast.Stmt { } switch p.currentUpper() { case "COPY", "SORT", "COUNT", "SUM", "AVERAGE", "TOTAL", "UPDATE", - "LABEL", "REPORT", "ACCEPT", "INPUT", "LOCATE", "CONTINUE", - "JOIN", "RELEASE", "SAVE", "RESTORE", "ERASE", "RENAME", - "RUN", "DIR", "STORE", "NOTE", "TEXT", "ENDTEXT", - "WITH", "KEYBOARD", "CLEAR", "DISPLAY", "LIST", "REINDEX": + "LABEL", "REPORT", "ACCEPT", "INPUT", + "JOIN", "RELEASE", "SAVE", "RESTORE", + "DIR", "STORE", "NOTE", "TEXT", "ENDTEXT", + "WITH", "CLEAR", "DISPLAY", "LIST": // Consume entire line — these are complex multi-word commands p.advance() for p.current.Kind != token.NEWLINE && p.current.Kind != token.EOF { @@ -1246,14 +1235,6 @@ func (p *Parser) parseExprStmt() ast.Stmt { } } - // COMMIT — flush work area - if p.current.Kind == token.IDENT && p.currentUpper() == "COMMIT" { - p.advance() - p.expectEndOfStmt() - return &ast.ExprStmt{X: &ast.CallExpr{ - Func: &ast.IdentExpr{Name: "DbCommit"}, - }} - } expr := p.parseExpr() // ch <- value (channel send) diff --git a/compiler/pp/command.go b/compiler/pp/command.go index d8665b9..4078ce6 100644 --- a/compiler/pp/command.go +++ b/compiler/pp/command.go @@ -336,14 +336,29 @@ func (r *Rule) matchPattern(line string) map[string]string { } } - // Skip remaining optional markers in pattern + // Walk any tail of the pattern that wasn't matched against the + // line. We accept it only if everything that remains is *optional* + // — i.e. a `[...]` block (which by definition can be absent) or + // markers/literals that are nested inside one. A bare `` or a + // literal token outside of brackets is required, so encountering + // one means the pattern isn't satisfied: bare `CLOSE` must not + // match rule `CLOSE `. + depth := 0 for pi < len(patternWords) { pw := patternWords[pi] - if pw == "[" || pw == "]" || (strings.HasPrefix(pw, "<") && strings.HasSuffix(pw, ">")) { - pi++ - } else { - break + switch { + case pw == "[": + depth++ + case pw == "]": + if depth > 0 { + depth-- + } + default: + if depth == 0 { + return nil + } } + pi++ } // For #command with no markers and no optional clauses: diff --git a/compiler/pp/pp.go b/compiler/pp/pp.go index a7b7492..f4ad75e 100644 --- a/compiler/pp/pp.go +++ b/compiler/pp/pp.go @@ -16,12 +16,21 @@ package pp import ( + _ "embed" "fmt" "os" "path/filepath" "strings" ) +// embeddedStdCh is include/std.ch baked into the compiler binary so +// xBase commands like ERASE, RENAME, COMMIT, LOCATE, ... reach the +// parser already rewritten as plain function calls. Equivalent to +// Harbour's auto-included std.ch. +// +//go:embed std.ch +var embeddedStdCh string + // Preprocessor processes source code before lexing. type Preprocessor struct { defines map[string]string // #define name → value @@ -43,21 +52,15 @@ func New() *Preprocessor { return pp } -// addStdRules registers built-in #command rules equivalent to Harbour's std.ch. +// addStdRules registers built-in #command rules from the embedded +// std.ch file. processLines walks the directives and stores #command +// entries in pp.commands as a side effect; we discard its output. +// +// Anything not safely expressible as a #command (e.g. parser-handled +// constructs like @ SAY/GET, READ, TRY/CATCH, WITH TIMEOUT) is left +// to the parser. func (pp *Preprocessor) addStdRules() { - stdCommands := []string{ - // MENU TO - `MENU TO => := __MenuTo()`, - // CLEAR GETS - `CLEAR GETS => GetList := {}`, - // Note: @ SAY, @ GET, @ PROMPT, READ are handled by the parser directly. - // @ PROMPT rules removed — parser handles them with proper token parsing. - } - for _, cmd := range stdCommands { - if rule := ParseRule(cmd, true, false); rule != nil { - pp.commands = append(pp.commands, rule) - } - } + pp.processLines("std.ch", embeddedStdCh, 0) } // AddIncludeDir adds a directory to search for #include files. diff --git a/compiler/pp/std.ch b/compiler/pp/std.ch new file mode 100644 index 0000000..7d85c96 --- /dev/null +++ b/compiler/pp/std.ch @@ -0,0 +1,54 @@ +/* + * std.ch — Five standard preprocessor rules + * + * Equivalent to harbour-core/include/std.ch. Translates xBase legacy + * commands into function calls so the parser does not have to know + * about them. Auto-loaded by compiler/pp at startup. + * + * Phase A: only rules whose backend RTL function already exists in + * Five. Rules whose backend is not yet implemented (COPY, SORT, + * COUNT, SUM, AVERAGE, TOTAL, JOIN, LIST, DISPLAY, LABEL, REPORT, + * DIR) are deliberately NOT included here — the parser still handles + * them as silent no-ops until their RTL backend lands. + * + * Copyright (c) 2026 Charles KWON OhJun (charleskwonohjun@gmail.com) + * All rights reserved. + */ + +/* --- file system --- */ +#command ERASE <(f)> => FErase(<(f)>) +#command DELETE FILE <(f)> => FErase(<(f)>) +#command RENAME <(s)> TO <(d)> => FRename(<(s)>, <(d)>) + +/* --- workarea lifecycle --- + Order matters: literal-keyword forms first, then bare CLOSE, + then the alias-form last so it doesn't shadow the others. */ +#command CLOSE ALL => DbCloseAll() +#command CLOSE DATABASES => DbCloseAll() +#command CLOSE => DbCloseArea() +#command CLOSE => ->( DbCloseArea() ) + +/* --- record state --- */ +#command COMMIT => DbCommit() +#command UNLOCK ALL => DbUnlock() +#command UNLOCK => DbRUnlock() + +/* --- record search --- */ +#command LOCATE [FOR ] [WHILE ] ; + [NEXT ] [RECORD ] [] [ALL] => ; + __dbLocate(<{for}>, <{while}>, , , <.rest.>) + +#command CONTINUE => __dbContinue() + +/* --- bulk maintenance --- */ +#command REINDEX => DbReindex() +#command PACK => DbPack() +#command ZAP => DbZap() + +/* --- input / shell --- */ +#command KEYBOARD => Keyboard() +#command RUN <*cmd*> => hb_Run(<(cmd)>) + +/* --- legacy GET system --- */ +#command MENU TO => := __MenuTo() +#command CLEAR GETS => GetList := {}