fix(pp): codeblock-in-macro + multi-line ;-continuation for #command

Three silent-miscompile fixes in the preprocessor that were
masking real bugs in Harbour-style PRG.

1. Brace tokenizer (compiler/pp/command.go)

`{` and `}` now tokenize as standalone separator tokens. The
matcher previously only split on `,()[]"'` etc., so a codeblock
literal `{|| ... }` in a macro argument became the tokens `{||`,
`""`, `}`. The capture-depth tracker only matched exact `{`/`}`,
so `{||` was invisible as an opener while the standalone `}`
wrongly decremented depth — `TEST_LINE( o:VarPut({|| "" }) )`
truncated mid-argument and the parser later choked at the inner
`}` with `expected ), got } "}"`.

Fix: add `{` and `}` to tokenizeLine's separator set. Now
`{|| ... }` lexes as `{`, `||`, `""`, `}` and balances cleanly.

2. ;-continuation join for non-`#` lines (compiler/pp/pp.go)

The existing line-joiner only collapsed trailing `;` continuations
on `#`-prefixed directives. Plain source code using the same
convention — e.g. Harbour's TEST macro:

   TEST t004 STATIC s_once := NIL, S_C ;
             INIT hb_threadOnce( @s_once, {|| ... } ) ;
             CODE x := S_C

was processed one physical line at a time, so the TEST pattern
never matched the full logical statement. The first row passed
through unrewritten, fell through to the parser as an expression,
and gengo silently absorbed it as part of the *previous*
function's body. Six TEST macros' STATIC declarations all ended
up tagged with t003's function name, producing duplicate
`static_T003_S_ONCE` decls and a Go compile failure.

Fix: add the same trailing-`;` join logic to user code, with
blank-line fillers inserted post-join so source line numbers in
parser errors still align with the original file.

3. Block-comment-aware continuation join

Inline `/* ... */` at the end of a continuation row hid the
trailing `;` from the joiner's HasSuffix check. The fix calls
stripBlockComments on the next-line peek before testing for `;`,
so chains like

   AAdd( aResult, { cChildBase, ;
                    aRefs[ "fk" ][ j ][ 1 ], ;     /* child col */
                    aRefs[ "fk" ][ j ][ 3 ], ;     /* parent col */
                    ...

keep folding instead of stopping after one row and leaving a
dangling `,` at end of line.

Results
-------
Harbour-core compat sweep: 25/30 → 28/30 (remaining lnlenli1 +
keywords are //NOTEST stress files, intentionally unbalanced).
All 6 release gates green: go test ./..., FiveSql2 43/43,
Harbour compat 56/56, std.ch 17/17, FRB 7/7, examples 65/71.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-05-13 05:28:54 +09:00
parent ce7b067785
commit c5dd74c044
2 changed files with 81 additions and 3 deletions

View File

@@ -1326,18 +1326,23 @@ func tokenizeLine(line string) []string {
}
switch line[i] {
case ',', '(', ')', '[', ']':
case ',', '(', ')', '[', ']', '{', '}':
tokens = append(tokens, string(line[i]))
i++
continue
}
// Word — stop at whitespace, brackets, parens, comma, quotes.
// Word — stop at whitespace, brackets, parens, braces, comma, quotes.
// Braces split out so codeblock literals `{|| ... }` and array
// literals `{1, 2}` balance correctly during capture: without this
// `{||` fuses into one word that fails the depth-tracker's exact
// `{` match, while a trailing `}` token (alone before `)`) does
// match `case "}":` and falsely decrements depth.
start := i
for i < len(line) {
c := line[i]
if c == ' ' || c == '\t' || c == ',' || c == '(' || c == ')' ||
c == '[' || c == ']' || c == '"' || c == '\'' {
c == '[' || c == ']' || c == '{' || c == '}' || c == '"' || c == '\'' {
break
}
i++

View File

@@ -212,6 +212,51 @@ func (pp *Preprocessor) processLines(filename, source string, depth int) string
continue // skip lines in inactive #ifdef sections
}
// `;`-continuation in user code. Join physical lines ending in
// a top-level `;` (paren/string-balanced) so multi-line
// `#command` invocations like
// TEST t004 STATIC s_once := NIL ;
// INIT ... ;
// CODE x := S_C
// match the rule pattern as a single logical line. Without this
// the pattern only sees the first physical line, fails to match,
// and the residual `TEST t004 STATIC ...` falls through to the
// parser as a bare expression — silently merged into the
// previous function's body, producing duplicate static decls
// tagged with the wrong function name. Insert blank fillers
// for each consumed line so post-PP source line numbers still
// align with the original file for error reporting.
consumedFiller := 0
for i+1 < len(lines) {
t := stripTrailingLineComment(strings.TrimRight(line, " \t"))
t = strings.TrimRight(t, " \t")
if !strings.HasSuffix(t, ";") {
break
}
// Strip block comments from the next line *the same way*
// the main loop will. Without this, an inline `/* ... */`
// at the end of a continuation row hides the trailing
// `;` from our HasSuffix check below — the joined chain
// truncates after just one row, leaving a dangling comma
// at end of line that the parser later mis-reports.
rawNext := lines[i+1]
strippedNext := stripBlockComments(rawNext, &inBlockComment)
nextTrim := strings.TrimSpace(strippedNext)
if strings.HasPrefix(nextTrim, "#") {
inBlockComment = false // not actually consumed
break
}
// Don't fold across an unterminated `/*` — the rest of
// the file would be treated as code by the join.
if inBlockComment {
inBlockComment = false
break
}
line = strings.TrimSuffix(t, ";") + " " + nextTrim
i++
consumedFiller++
}
// Apply #command/#translate rules
if len(pp.commands) > 0 || len(pp.translates) > 0 {
line = pp.applyRules(line)
@@ -223,6 +268,9 @@ func (pp *Preprocessor) processLines(filename, source string, depth int) string
}
result = append(result, line)
for k := 0; k < consumedFiller; k++ {
result = append(result, "")
}
}
if len(ifStack) > 0 {
@@ -465,6 +513,31 @@ func (pp *Preprocessor) resolveInclude(currentFile, inclFile string) string {
return ""
}
// stripTrailingLineComment removes a trailing `// ...` from s, but only
// if the `//` sits outside any string literal. Block comments are
// already handled by stripBlockComments before continuation joining.
func stripTrailingLineComment(s string) string {
inStr := byte(0)
for i := 0; i < len(s); i++ {
c := s[i]
if inStr != 0 {
if c == inStr {
inStr = 0
}
continue
}
switch c {
case '"', '\'':
inStr = c
case '/':
if i+1 < len(s) && s[i+1] == '/' {
return s[:i]
}
}
}
return s
}
// hasTopLevelSemi reports whether s contains a `;` outside of any
// string literal or paren/bracket/brace nesting. Used by applyRules
// to decide whether a line carries multiple PRG statements.