From 5514780b110708fbe20a927e67cff575193da454 Mon Sep 17 00:00:00 2001 From: CharlesKWON Date: Sat, 18 Apr 2026 17:53:44 +0900 Subject: [PATCH] feat(pp): detect Harbour inline C in #pragma BEGINDUMP and fail fast MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Harbour's #pragma BEGINDUMP ... #pragma ENDDUMP blocks carry C source that the Harbour toolchain embeds verbatim. Five takes the same directive but targets Go — any `.prg` ported from Harbour that ships inline C gets its C shoveled into the Go codegen pipeline and fails with opaque errors like "invalid character U+0023 '#'" from the Go compiler, dozens of lines downstream of the actual cause. Detect the C shape at PP time and report a clear, actionable error: pp: file.prg:N: #pragma BEGINDUMP contains C code — Five accepts inline Go only. Port the block to Go (or use an RTL function), then wrap in #pragma BEGINDUMP ... #pragma ENDDUMP. looksLikeInlineC uses conservative signals that don't false-positive on legitimate inline Go (which calls `hbrt.HB_FUNC("NAME", fn)` with a package prefix and a quoted string, distinct from C's bare `HB_FUNC(NAME)` macro). Signals: - `#include <...>` / `#include "..."` — unambiguous C preprocessor - line-starting `HB_FUNC(` / `HB_FUNC_STATIC(` — C FFI macro - `typedef ` / `struct ` / `int main(` / `void main(` at line start main.go now aborts the build when PP returns errors (previously printed but continued — same behavior the parser already had for its own errors). Keeps build output short: one pp line + one summary line, no gengo noise. Verified: - harbour-core/tests/inline_c.prg → clean PP error, exit 1 - examples/godump_demo.prg (legitimate inline Go) → passes PP (hits a separate pre-existing gengo import-ordering bug, not related to this change) FiveSql2 43/43, Harbour compat 56/56, Go test ALL PASS. Co-Authored-By: Claude Opus 4.7 (1M context) --- cmd/five/main.go | 15 ++++++++++ compiler/pp/pp.go | 72 ++++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 86 insertions(+), 1 deletion(-) diff --git a/cmd/five/main.go b/cmd/five/main.go index 8a096a7..3483034 100644 --- a/cmd/five/main.go +++ b/cmd/five/main.go @@ -326,6 +326,9 @@ func parsePRGFile(prgFile string) *ast.File { for _, e := range ppErrors { fmt.Fprintf(os.Stderr, "pp: %s\n", e) } + if len(ppErrors) > 0 { + fatal(fmt.Sprintf("%d preprocessor error(s) in %s", len(ppErrors), prgFile)) + } file, errs := parser.ParseWithGoDumps(prgFile, processed, pre.GoDumps) if len(errs) > 0 { @@ -371,6 +374,9 @@ func compilePRGMode(prgFile string, isLibrary bool) string { for _, e := range ppErrors { fmt.Fprintf(os.Stderr, "pp: %s\n", e) } + if len(ppErrors) > 0 { + fatal(fmt.Sprintf("%d preprocessor error(s) in %s", len(ppErrors), prgFile)) + } file, errs := parser.ParseWithGoDumps(prgFile, processed, pre.GoDumps) if len(errs) > 0 { @@ -429,6 +435,9 @@ func compilePRG(prgFile string) string { for _, e := range ppErrors { fmt.Fprintf(os.Stderr, "pp: %s\n", e) } + if len(ppErrors) > 0 { + fatal(fmt.Sprintf("%d preprocessor error(s) in %s", len(ppErrors), prgFile)) + } // Phase 2: Parse file, errs := parser.ParseWithGoDumps(prgFile, processed, pre.GoDumps) @@ -543,6 +552,9 @@ func buildFRB(prgFile, outputFile string) { for _, e := range ppErrors { fmt.Fprintf(os.Stderr, "pp: %s\n", e) } + if len(ppErrors) > 0 { + fatal(fmt.Sprintf("%d preprocessor error(s) in %s", len(ppErrors), prgFile)) + } // Phase 2: Parse file, parseErrors := parser.ParseWithGoDumps(prgFile, processed, pre.GoDumps) @@ -657,6 +669,9 @@ func debugPRG(prgFile string) { for _, e := range ppErrors { fmt.Fprintf(os.Stderr, "pp: %s\n", e) } + if len(ppErrors) > 0 { + fatal(fmt.Sprintf("%d preprocessor error(s) in %s", len(ppErrors), prgFile)) + } // Phase 2: Parse file, parseErrors := parser.ParseWithGoDumps(prgFile, processed, pre.GoDumps) diff --git a/compiler/pp/pp.go b/compiler/pp/pp.go index 2fa9f8d..a7b7492 100644 --- a/compiler/pp/pp.go +++ b/compiler/pp/pp.go @@ -91,6 +91,7 @@ func (pp *Preprocessor) processLines(filename, source string, depth int) string active := true inBlockComment := false // track multi-line /* */ comments inPragmaDump := false // track #pragma BEGINDUMP ... ENDDUMP + dumpStartLine := 0 // 1-based line where BEGINDUMP appeared var dumpLines []string // accumulate Go code lines for i, line := range lines { @@ -101,7 +102,26 @@ func (pp *Preprocessor) processLines(filename, source string, depth int) string dir := strings.TrimSpace(strings.TrimPrefix(trimCheck, "#")) if strings.HasPrefix(strings.ToUpper(dir), "PRAGMA ") && strings.Contains(strings.ToUpper(dir), "ENDDUMP") { inPragmaDump = false - pp.GoDumps = append(pp.GoDumps, strings.Join(dumpLines, "\n")) + body := strings.Join(dumpLines, "\n") + // Five's inline dumps are Go, not C. Harbour's own + // #pragma BEGINDUMP convention is C (hb_ret*, HB_FUNC, + // #include etc.), so `.prg` files ported + // from Harbour will attempt to shove C through Five's + // Go-emit pipeline and fail with cryptic errors like + // "invalid character U+0023 '#'". Detect the C shape + // and report a clear, actionable error up front. + if looksLikeInlineC(body) { + pp.errors = append(pp.errors, fmt.Sprintf( + "%s:%d: #pragma BEGINDUMP contains C code — Five accepts inline Go only. Port the block to Go (or use an RTL function), then wrap in #pragma BEGINDUMP ... #pragma ENDDUMP.", + filename, dumpStartLine)) + // Emit a syntactically invalid line so the parser + // also fails at the expected position rather than + // the build silently continuing. + result = append(result, "__FIVE_INLINE_C_ERROR__") + dumpLines = nil + continue + } + pp.GoDumps = append(pp.GoDumps, body) dumpLines = nil result = append(result, fmt.Sprintf("FIVE_GODUMP__ %d", len(pp.GoDumps)-1)) continue @@ -148,6 +168,7 @@ func (pp *Preprocessor) processLines(filename, source string, depth int) string upperDir := strings.ToUpper(directive) if strings.HasPrefix(upperDir, "PRAGMA ") && strings.Contains(upperDir, "BEGINDUMP") { inPragmaDump = true + dumpStartLine = i + 1 // 1-based for error reporting dumpLines = nil result = append(result, "") continue @@ -551,3 +572,52 @@ func replaceWord(line, old, new string) string { func isWordChar(c byte) bool { return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') || c == '_' } + +// looksLikeInlineC heuristically detects Harbour-style inline C inside +// a #pragma BEGINDUMP block. Any ONE strong signal triggers detection +// so the user sees an early, readable error rather than an obscure Go +// syntax complaint far downstream. +// +// Signals (any match): +// - `HB_FUNC(` / `HB_FUNC_STATIC(` / `HB_FUNC_TRANSLATE(` — Harbour's C FFI macro +// - `hb_ret*(` / `hb_param*(` / `hb_stor*(` / `hb_itemNew(` — Harbour C API +// - `#include <` or `#include "` — C preprocessor include +// - `#define (` followed by typed arg list — C-style macro +// - bare `int main(` / `void main(` — C entry point +// - `typedef ` / `struct ` at line start — C declarations +// +// Go programs can use `import`, `package`, `func`, `var`, `:=` — none +// of which overlap with these C signatures, so false positives are +// unlikely. +func looksLikeInlineC(body string) bool { + // Quick-reject: empty body. + trimmed := strings.TrimSpace(body) + if trimmed == "" { + return false + } + + for _, line := range strings.Split(body, "\n") { + l := strings.TrimSpace(line) + // #include / "hbapi.h" — unambiguous C preprocessor. + // Go doesn't use #include at all. + if strings.HasPrefix(l, "#include <") || strings.HasPrefix(l, `#include "`) { + return true + } + // Bare `HB_FUNC( NAME )` with an unquoted identifier is the + // Harbour C FFI macro. The Go-side counterpart is + // `hbrt.HB_FUNC("NAME", fn)` — lowercase package prefix and a + // quoted string. Match the C form strictly. + if strings.HasPrefix(l, "HB_FUNC(") || + strings.HasPrefix(l, "HB_FUNC_STATIC(") || + strings.HasPrefix(l, "HB_FUNC_TRANSLATE(") { + return true + } + // C declarations at line start that have no Go analogue. + if strings.HasPrefix(l, "typedef ") || strings.HasPrefix(l, "struct ") || + strings.HasPrefix(l, "int main(") || strings.HasPrefix(l, "void main(") { + return true + } + } + + return false +}