feat(pp): detect Harbour inline C in #pragma BEGINDUMP and fail fast

Harbour's #pragma BEGINDUMP ... #pragma ENDDUMP blocks carry C source
that the Harbour toolchain embeds verbatim. Five takes the same
directive but targets Go — any `.prg` ported from Harbour that ships
inline C gets its C shoveled into the Go codegen pipeline and fails
with opaque errors like "invalid character U+0023 '#'" from the Go
compiler, dozens of lines downstream of the actual cause.

Detect the C shape at PP time and report a clear, actionable error:

  pp: file.prg:N: #pragma BEGINDUMP contains C code — Five accepts
  inline Go only. Port the block to Go (or use an RTL function),
  then wrap in #pragma BEGINDUMP ... #pragma ENDDUMP.

looksLikeInlineC uses conservative signals that don't false-positive
on legitimate inline Go (which calls `hbrt.HB_FUNC("NAME", fn)` with
a package prefix and a quoted string, distinct from C's bare
`HB_FUNC(NAME)` macro). Signals:

  - `#include <...>` / `#include "..."` — unambiguous C preprocessor
  - line-starting `HB_FUNC(` / `HB_FUNC_STATIC(` — C FFI macro
  - `typedef ` / `struct ` / `int main(` / `void main(` at line start

main.go now aborts the build when PP returns errors (previously
printed but continued — same behavior the parser already had for
its own errors). Keeps build output short: one pp line + one
summary line, no gengo noise.

Verified:
  - harbour-core/tests/inline_c.prg → clean PP error, exit 1
  - examples/godump_demo.prg (legitimate inline Go) → passes PP
    (hits a separate pre-existing gengo import-ordering bug, not
    related to this change)

FiveSql2 43/43, Harbour compat 56/56, Go test ALL PASS.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-04-18 17:53:44 +09:00
parent 85002df6b9
commit 5514780b11
2 changed files with 86 additions and 1 deletions

View File

@@ -326,6 +326,9 @@ func parsePRGFile(prgFile string) *ast.File {
for _, e := range ppErrors {
fmt.Fprintf(os.Stderr, "pp: %s\n", e)
}
if len(ppErrors) > 0 {
fatal(fmt.Sprintf("%d preprocessor error(s) in %s", len(ppErrors), prgFile))
}
file, errs := parser.ParseWithGoDumps(prgFile, processed, pre.GoDumps)
if len(errs) > 0 {
@@ -371,6 +374,9 @@ func compilePRGMode(prgFile string, isLibrary bool) string {
for _, e := range ppErrors {
fmt.Fprintf(os.Stderr, "pp: %s\n", e)
}
if len(ppErrors) > 0 {
fatal(fmt.Sprintf("%d preprocessor error(s) in %s", len(ppErrors), prgFile))
}
file, errs := parser.ParseWithGoDumps(prgFile, processed, pre.GoDumps)
if len(errs) > 0 {
@@ -429,6 +435,9 @@ func compilePRG(prgFile string) string {
for _, e := range ppErrors {
fmt.Fprintf(os.Stderr, "pp: %s\n", e)
}
if len(ppErrors) > 0 {
fatal(fmt.Sprintf("%d preprocessor error(s) in %s", len(ppErrors), prgFile))
}
// Phase 2: Parse
file, errs := parser.ParseWithGoDumps(prgFile, processed, pre.GoDumps)
@@ -543,6 +552,9 @@ func buildFRB(prgFile, outputFile string) {
for _, e := range ppErrors {
fmt.Fprintf(os.Stderr, "pp: %s\n", e)
}
if len(ppErrors) > 0 {
fatal(fmt.Sprintf("%d preprocessor error(s) in %s", len(ppErrors), prgFile))
}
// Phase 2: Parse
file, parseErrors := parser.ParseWithGoDumps(prgFile, processed, pre.GoDumps)
@@ -657,6 +669,9 @@ func debugPRG(prgFile string) {
for _, e := range ppErrors {
fmt.Fprintf(os.Stderr, "pp: %s\n", e)
}
if len(ppErrors) > 0 {
fatal(fmt.Sprintf("%d preprocessor error(s) in %s", len(ppErrors), prgFile))
}
// Phase 2: Parse
file, parseErrors := parser.ParseWithGoDumps(prgFile, processed, pre.GoDumps)

View File

@@ -91,6 +91,7 @@ func (pp *Preprocessor) processLines(filename, source string, depth int) string
active := true
inBlockComment := false // track multi-line /* */ comments
inPragmaDump := false // track #pragma BEGINDUMP ... ENDDUMP
dumpStartLine := 0 // 1-based line where BEGINDUMP appeared
var dumpLines []string // accumulate Go code lines
for i, line := range lines {
@@ -101,7 +102,26 @@ func (pp *Preprocessor) processLines(filename, source string, depth int) string
dir := strings.TrimSpace(strings.TrimPrefix(trimCheck, "#"))
if strings.HasPrefix(strings.ToUpper(dir), "PRAGMA ") && strings.Contains(strings.ToUpper(dir), "ENDDUMP") {
inPragmaDump = false
pp.GoDumps = append(pp.GoDumps, strings.Join(dumpLines, "\n"))
body := strings.Join(dumpLines, "\n")
// Five's inline dumps are Go, not C. Harbour's own
// #pragma BEGINDUMP convention is C (hb_ret*, HB_FUNC,
// #include <stdio.h> etc.), so `.prg` files ported
// from Harbour will attempt to shove C through Five's
// Go-emit pipeline and fail with cryptic errors like
// "invalid character U+0023 '#'". Detect the C shape
// and report a clear, actionable error up front.
if looksLikeInlineC(body) {
pp.errors = append(pp.errors, fmt.Sprintf(
"%s:%d: #pragma BEGINDUMP contains C code — Five accepts inline Go only. Port the block to Go (or use an RTL function), then wrap in #pragma BEGINDUMP ... #pragma ENDDUMP.",
filename, dumpStartLine))
// Emit a syntactically invalid line so the parser
// also fails at the expected position rather than
// the build silently continuing.
result = append(result, "__FIVE_INLINE_C_ERROR__")
dumpLines = nil
continue
}
pp.GoDumps = append(pp.GoDumps, body)
dumpLines = nil
result = append(result, fmt.Sprintf("FIVE_GODUMP__ %d", len(pp.GoDumps)-1))
continue
@@ -148,6 +168,7 @@ func (pp *Preprocessor) processLines(filename, source string, depth int) string
upperDir := strings.ToUpper(directive)
if strings.HasPrefix(upperDir, "PRAGMA ") && strings.Contains(upperDir, "BEGINDUMP") {
inPragmaDump = true
dumpStartLine = i + 1 // 1-based for error reporting
dumpLines = nil
result = append(result, "")
continue
@@ -551,3 +572,52 @@ func replaceWord(line, old, new string) string {
func isWordChar(c byte) bool {
return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') || c == '_'
}
// looksLikeInlineC heuristically detects Harbour-style inline C inside
// a #pragma BEGINDUMP block. Any ONE strong signal triggers detection
// so the user sees an early, readable error rather than an obscure Go
// syntax complaint far downstream.
//
// Signals (any match):
// - `HB_FUNC(` / `HB_FUNC_STATIC(` / `HB_FUNC_TRANSLATE(` — Harbour's C FFI macro
// - `hb_ret*(` / `hb_param*(` / `hb_stor*(` / `hb_itemNew(` — Harbour C API
// - `#include <` or `#include "` — C preprocessor include
// - `#define <ident>(` followed by typed arg list — C-style macro
// - bare `int main(` / `void main(` — C entry point
// - `typedef ` / `struct ` at line start — C declarations
//
// Go programs can use `import`, `package`, `func`, `var`, `:=` — none
// of which overlap with these C signatures, so false positives are
// unlikely.
func looksLikeInlineC(body string) bool {
// Quick-reject: empty body.
trimmed := strings.TrimSpace(body)
if trimmed == "" {
return false
}
for _, line := range strings.Split(body, "\n") {
l := strings.TrimSpace(line)
// #include <stdio.h> / "hbapi.h" — unambiguous C preprocessor.
// Go doesn't use #include at all.
if strings.HasPrefix(l, "#include <") || strings.HasPrefix(l, `#include "`) {
return true
}
// Bare `HB_FUNC( NAME )` with an unquoted identifier is the
// Harbour C FFI macro. The Go-side counterpart is
// `hbrt.HB_FUNC("NAME", fn)` — lowercase package prefix and a
// quoted string. Match the C form strictly.
if strings.HasPrefix(l, "HB_FUNC(") ||
strings.HasPrefix(l, "HB_FUNC_STATIC(") ||
strings.HasPrefix(l, "HB_FUNC_TRANSLATE(") {
return true
}
// C declarations at line start that have no Go analogue.
if strings.HasPrefix(l, "typedef ") || strings.HasPrefix(l, "struct ") ||
strings.HasPrefix(l, "int main(") || strings.HasPrefix(l, "void main(") {
return true
}
}
return false
}