From c5dd74c0441d19a8f1dfa745917031d5b1a97181 Mon Sep 17 00:00:00 2001 From: CharlesKWON Date: Wed, 13 May 2026 05:28:54 +0900 Subject: [PATCH] fix(pp): codeblock-in-macro + multi-line ;-continuation for #command MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three silent-miscompile fixes in the preprocessor that were masking real bugs in Harbour-style PRG. 1. Brace tokenizer (compiler/pp/command.go) `{` and `}` now tokenize as standalone separator tokens. The matcher previously only split on `,()[]"'` etc., so a codeblock literal `{|| ... }` in a macro argument became the tokens `{||`, `""`, `}`. The capture-depth tracker only matched exact `{`/`}`, so `{||` was invisible as an opener while the standalone `}` wrongly decremented depth — `TEST_LINE( o:VarPut({|| "" }) )` truncated mid-argument and the parser later choked at the inner `}` with `expected ), got } "}"`. Fix: add `{` and `}` to tokenizeLine's separator set. Now `{|| ... }` lexes as `{`, `||`, `""`, `}` and balances cleanly. 2. ;-continuation join for non-`#` lines (compiler/pp/pp.go) The existing line-joiner only collapsed trailing `;` continuations on `#`-prefixed directives. Plain source code using the same convention — e.g. Harbour's TEST macro: TEST t004 STATIC s_once := NIL, S_C ; INIT hb_threadOnce( @s_once, {|| ... } ) ; CODE x := S_C was processed one physical line at a time, so the TEST pattern never matched the full logical statement. The first row passed through unrewritten, fell through to the parser as an expression, and gengo silently absorbed it as part of the *previous* function's body. Six TEST macros' STATIC declarations all ended up tagged with t003's function name, producing duplicate `static_T003_S_ONCE` decls and a Go compile failure. Fix: add the same trailing-`;` join logic to user code, with blank-line fillers inserted post-join so source line numbers in parser errors still align with the original file. 3. Block-comment-aware continuation join Inline `/* ... */` at the end of a continuation row hid the trailing `;` from the joiner's HasSuffix check. The fix calls stripBlockComments on the next-line peek before testing for `;`, so chains like AAdd( aResult, { cChildBase, ; aRefs[ "fk" ][ j ][ 1 ], ; /* child col */ aRefs[ "fk" ][ j ][ 3 ], ; /* parent col */ ... keep folding instead of stopping after one row and leaving a dangling `,` at end of line. Results ------- Harbour-core compat sweep: 25/30 → 28/30 (remaining lnlenli1 + keywords are //NOTEST stress files, intentionally unbalanced). All 6 release gates green: go test ./..., FiveSql2 43/43, Harbour compat 56/56, std.ch 17/17, FRB 7/7, examples 65/71. Co-Authored-By: Claude Opus 4.7 (1M context) --- compiler/pp/command.go | 11 +++++-- compiler/pp/pp.go | 73 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 81 insertions(+), 3 deletions(-) diff --git a/compiler/pp/command.go b/compiler/pp/command.go index 5b91d94..ecf28c6 100644 --- a/compiler/pp/command.go +++ b/compiler/pp/command.go @@ -1326,18 +1326,23 @@ func tokenizeLine(line string) []string { } switch line[i] { - case ',', '(', ')', '[', ']': + case ',', '(', ')', '[', ']', '{', '}': tokens = append(tokens, string(line[i])) i++ continue } - // Word — stop at whitespace, brackets, parens, comma, quotes. + // Word — stop at whitespace, brackets, parens, braces, comma, quotes. + // Braces split out so codeblock literals `{|| ... }` and array + // literals `{1, 2}` balance correctly during capture: without this + // `{||` fuses into one word that fails the depth-tracker's exact + // `{` match, while a trailing `}` token (alone before `)`) does + // match `case "}":` and falsely decrements depth. start := i for i < len(line) { c := line[i] if c == ' ' || c == '\t' || c == ',' || c == '(' || c == ')' || - c == '[' || c == ']' || c == '"' || c == '\'' { + c == '[' || c == ']' || c == '{' || c == '}' || c == '"' || c == '\'' { break } i++ diff --git a/compiler/pp/pp.go b/compiler/pp/pp.go index ba5d062..d689899 100644 --- a/compiler/pp/pp.go +++ b/compiler/pp/pp.go @@ -212,6 +212,51 @@ func (pp *Preprocessor) processLines(filename, source string, depth int) string continue // skip lines in inactive #ifdef sections } + // `;`-continuation in user code. Join physical lines ending in + // a top-level `;` (paren/string-balanced) so multi-line + // `#command` invocations like + // TEST t004 STATIC s_once := NIL ; + // INIT ... ; + // CODE x := S_C + // match the rule pattern as a single logical line. Without this + // the pattern only sees the first physical line, fails to match, + // and the residual `TEST t004 STATIC ...` falls through to the + // parser as a bare expression — silently merged into the + // previous function's body, producing duplicate static decls + // tagged with the wrong function name. Insert blank fillers + // for each consumed line so post-PP source line numbers still + // align with the original file for error reporting. + consumedFiller := 0 + for i+1 < len(lines) { + t := stripTrailingLineComment(strings.TrimRight(line, " \t")) + t = strings.TrimRight(t, " \t") + if !strings.HasSuffix(t, ";") { + break + } + // Strip block comments from the next line *the same way* + // the main loop will. Without this, an inline `/* ... */` + // at the end of a continuation row hides the trailing + // `;` from our HasSuffix check below — the joined chain + // truncates after just one row, leaving a dangling comma + // at end of line that the parser later mis-reports. + rawNext := lines[i+1] + strippedNext := stripBlockComments(rawNext, &inBlockComment) + nextTrim := strings.TrimSpace(strippedNext) + if strings.HasPrefix(nextTrim, "#") { + inBlockComment = false // not actually consumed + break + } + // Don't fold across an unterminated `/*` — the rest of + // the file would be treated as code by the join. + if inBlockComment { + inBlockComment = false + break + } + line = strings.TrimSuffix(t, ";") + " " + nextTrim + i++ + consumedFiller++ + } + // Apply #command/#translate rules if len(pp.commands) > 0 || len(pp.translates) > 0 { line = pp.applyRules(line) @@ -223,6 +268,9 @@ func (pp *Preprocessor) processLines(filename, source string, depth int) string } result = append(result, line) + for k := 0; k < consumedFiller; k++ { + result = append(result, "") + } } if len(ifStack) > 0 { @@ -465,6 +513,31 @@ func (pp *Preprocessor) resolveInclude(currentFile, inclFile string) string { return "" } +// stripTrailingLineComment removes a trailing `// ...` from s, but only +// if the `//` sits outside any string literal. Block comments are +// already handled by stripBlockComments before continuation joining. +func stripTrailingLineComment(s string) string { + inStr := byte(0) + for i := 0; i < len(s); i++ { + c := s[i] + if inStr != 0 { + if c == inStr { + inStr = 0 + } + continue + } + switch c { + case '"', '\'': + inStr = c + case '/': + if i+1 < len(s) && s[i+1] == '/' { + return s[:i] + } + } + } + return s +} + // hasTopLevelSemi reports whether s contains a `;` outside of any // string literal or paren/bracket/brace nesting. Used by applyRules // to decide whether a line carries multiple PRG statements.