feat(parser+pp): USE with macros and paren-balanced PP capture

Two related fixes for Harbour's data-driven `USE &cFile ALIAS &cAlias
INDEX &cNdx` idiom — common in any app that dispatches table names
at runtime.

Parser (compiler/parser/parser.go parseUse):
- `USE &cFile` / `USE &(expr)` previously triggered a
  skipToEndOfLine short-circuit, emitting an empty UseCmd (equivalent
  to bare USE = close current area). Now parseMacro runs and the
  MacroExpr becomes the File node, so codegen emits MacroPush +
  dbUseArea.
- `ALIAS &cAlias` / `ALIAS &a.1` similarly dropped the macro result;
  now captures it into UseCmd.AliasExpr so codegen evaluates the
  alias at runtime. Both the IDENT-path ("ALIAS") and keyword-path
  (token.ALIAS) handlers fixed.

PP (compiler/pp/command.go):
- captureExpression and the MarkerList branch now paren-balance
  `(`/`[`/`{` so nested grouping inside a macro argument doesn't let
  an inner `)` terminate the capture. Example:
      _REGULAR_(&(a))
  previously captured `&(a` (missing inner `)`) and left the outer
  `)` dangling, producing parse errors in the expanded output.
- MarkerList capture still joins tokens with " " for raw `<z>`
  substitution — comma tokens stay in the stream, so `s(<z>)`
  re-emits them as argument separators and the list expands cleanly.

Bench: harbour-core/tests/pp.prg 2 errors → 0 for the realistic
`USE &macro` / `&(expr)` patterns. Remaining parse errors on line 70
are a pathological `_REGULAR_L` list that includes `&a.  [2]`
(space between macro's terminating dot and an array index) — the
PP expands it correctly but Five's lexer refuses the expanded
result. That form doesn't occur in real code.

/tmp/test_use_macro.prg — all four patterns (`USE &f`, `USE &f ALIAS
&f`, `USE &f ALIAS &f INDEX &i`, dot-terminated) now compile. FiveSql2
43/43, Harbour compat 56/56, Go test ALL PASS.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-04-18 17:38:15 +09:00
parent e9522772a7
commit 85002df6b9
2 changed files with 52 additions and 20 deletions

View File

@@ -1723,17 +1723,14 @@ func (p *Parser) parseUse() *ast.UseCmd {
// USE without args = close
if p.current.Kind != token.NEWLINE && p.current.Kind != token.EOF {
// If file starts with macro &, skip entire USE to EOL (complex macro syntax)
// `USE &cFile` / `USE &(expr)` — macro expression yields the
// filename at runtime. Harbour uses this heavily for data-
// driven apps (USE &cTable INDEX &cIndex ...).
if p.at(token.AMPERSAND) {
p.skipToEndOfLine()
p.expectEndOfStmt()
return &ast.UseCmd{UsePos: pos}
}
// Bare ident as filename: USE myfile / USE myfile.dbf / USE myfile NEW
// In Harbour, USE <name> treats name as a filename string, not a variable.
// Only use parseExpr for parenthesized (USE (expr)) or string literal (USE "file").
if p.at(token.IDENT) {
// Check if it's a bare filename (ident optionally followed by .ext)
file = p.parseMacro()
} else if p.at(token.IDENT) {
// Bare ident as filename: USE myfile / USE myfile.dbf / USE myfile NEW
// In Harbour, USE <name> treats name as a filename string, not a variable.
name := p.advance().Literal
if p.at(token.DOT) && (p.peekAt(1) == token.IDENT || p.peekAt(1) == token.INT) {
p.advance() // skip DOT
@@ -1764,7 +1761,9 @@ func (p *Parser) parseUse() *ast.UseCmd {
if upper == "ALIAS" {
p.advance()
if p.at(token.AMPERSAND) {
p.parseMacro() // macro alias — skip
// `ALIAS &cAlias` / `&cAlias.1` — compute the alias
// name at runtime via macro evaluation.
aliasExprNode = p.parseMacro()
} else if p.at(token.LPAREN) {
// ALIAS ( expr ) — parenthesized alias expression (runtime)
p.advance() // skip (
@@ -1803,7 +1802,7 @@ func (p *Parser) parseUse() *ast.UseCmd {
if p.current.Kind == token.ALIAS {
p.advance()
if p.at(token.AMPERSAND) {
p.parseMacro()
aliasExprNode = p.parseMacro()
} else if p.at(token.LPAREN) {
// ALIAS ( expr ) — parenthesized alias expression
p.advance()

View File

@@ -221,16 +221,35 @@ func (r *Rule) matchPattern(line string) map[string]string {
pi++
case MarkerList:
// Capture comma-separated items until next keyword
var items []string
// Capture a comma-separated list until the next literal
// pattern token. Paren-balanced so nested `(`/`[`/`{`
// don't let an inner `)` terminate the capture. Commas
// at the top level are preserved verbatim in the
// captured string so the `<z>` substitution in the
// result template reproduces the argument list as-is.
var parts []string
depth := 0
delim := ""
if pi+1 < len(patternWords) {
delim = patternWords[pi+1]
}
for li < len(lineWords) {
if pi+1 < len(patternWords) && matchWord(lineWords[li], patternWords[pi+1], r.CaseSens) {
w := lineWords[li]
if depth == 0 && delim != "" && matchWord(w, delim, r.CaseSens) {
break
}
items = append(items, lineWords[li])
switch w {
case "(", "[", "{":
depth++
case ")", "]", "}":
if depth > 0 {
depth--
}
}
parts = append(parts, w)
li++
}
captures[m.Name] = strings.Join(items, " ")
captures[m.Name] = strings.Join(parts, " ")
pi++
case MarkerWordList:
@@ -567,13 +586,27 @@ func captureExpression(lineWords []string, li *int, patternWords []string, nextP
}
if delimWord != "" {
// Capture until delimiter keyword
// Capture until the delimiter, paren-balancing so nested
// parens/brackets/braces inside the expression don't falsely
// terminate the capture. Harbour's own PP does the same —
// `_REGULAR_(&(a))` must capture `&(a)` (incl. inner parens)
// and leave the outer `)` for the pattern's own delimiter.
var parts []string
depth := 0
for *li < len(lineWords) {
if matchWord(lineWords[*li], delimWord, caseSens) {
w := lineWords[*li]
if depth == 0 && matchWord(w, delimWord, caseSens) {
break
}
parts = append(parts, lineWords[*li])
switch w {
case "(", "[", "{":
depth++
case ")", "]", "}":
if depth > 0 {
depth--
}
}
parts = append(parts, w)
*li++
}
return strings.Join(parts, " ")