feat(pp): detect Harbour inline C in #pragma BEGINDUMP and fail fast
Harbour's #pragma BEGINDUMP ... #pragma ENDDUMP blocks carry C source
that the Harbour toolchain embeds verbatim. Five takes the same
directive but targets Go — any `.prg` ported from Harbour that ships
inline C gets its C shoveled into the Go codegen pipeline and fails
with opaque errors like "invalid character U+0023 '#'" from the Go
compiler, dozens of lines downstream of the actual cause.
Detect the C shape at PP time and report a clear, actionable error:
pp: file.prg:N: #pragma BEGINDUMP contains C code — Five accepts
inline Go only. Port the block to Go (or use an RTL function),
then wrap in #pragma BEGINDUMP ... #pragma ENDDUMP.
looksLikeInlineC uses conservative signals that don't false-positive
on legitimate inline Go (which calls `hbrt.HB_FUNC("NAME", fn)` with
a package prefix and a quoted string, distinct from C's bare
`HB_FUNC(NAME)` macro). Signals:
- `#include <...>` / `#include "..."` — unambiguous C preprocessor
- line-starting `HB_FUNC(` / `HB_FUNC_STATIC(` — C FFI macro
- `typedef ` / `struct ` / `int main(` / `void main(` at line start
main.go now aborts the build when PP returns errors (previously
printed but continued — same behavior the parser already had for
its own errors). Keeps build output short: one pp line + one
summary line, no gengo noise.
Verified:
- harbour-core/tests/inline_c.prg → clean PP error, exit 1
- examples/godump_demo.prg (legitimate inline Go) → passes PP
(hits a separate pre-existing gengo import-ordering bug, not
related to this change)
FiveSql2 43/43, Harbour compat 56/56, Go test ALL PASS.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -326,6 +326,9 @@ func parsePRGFile(prgFile string) *ast.File {
|
||||
for _, e := range ppErrors {
|
||||
fmt.Fprintf(os.Stderr, "pp: %s\n", e)
|
||||
}
|
||||
if len(ppErrors) > 0 {
|
||||
fatal(fmt.Sprintf("%d preprocessor error(s) in %s", len(ppErrors), prgFile))
|
||||
}
|
||||
|
||||
file, errs := parser.ParseWithGoDumps(prgFile, processed, pre.GoDumps)
|
||||
if len(errs) > 0 {
|
||||
@@ -371,6 +374,9 @@ func compilePRGMode(prgFile string, isLibrary bool) string {
|
||||
for _, e := range ppErrors {
|
||||
fmt.Fprintf(os.Stderr, "pp: %s\n", e)
|
||||
}
|
||||
if len(ppErrors) > 0 {
|
||||
fatal(fmt.Sprintf("%d preprocessor error(s) in %s", len(ppErrors), prgFile))
|
||||
}
|
||||
|
||||
file, errs := parser.ParseWithGoDumps(prgFile, processed, pre.GoDumps)
|
||||
if len(errs) > 0 {
|
||||
@@ -429,6 +435,9 @@ func compilePRG(prgFile string) string {
|
||||
for _, e := range ppErrors {
|
||||
fmt.Fprintf(os.Stderr, "pp: %s\n", e)
|
||||
}
|
||||
if len(ppErrors) > 0 {
|
||||
fatal(fmt.Sprintf("%d preprocessor error(s) in %s", len(ppErrors), prgFile))
|
||||
}
|
||||
|
||||
// Phase 2: Parse
|
||||
file, errs := parser.ParseWithGoDumps(prgFile, processed, pre.GoDumps)
|
||||
@@ -543,6 +552,9 @@ func buildFRB(prgFile, outputFile string) {
|
||||
for _, e := range ppErrors {
|
||||
fmt.Fprintf(os.Stderr, "pp: %s\n", e)
|
||||
}
|
||||
if len(ppErrors) > 0 {
|
||||
fatal(fmt.Sprintf("%d preprocessor error(s) in %s", len(ppErrors), prgFile))
|
||||
}
|
||||
|
||||
// Phase 2: Parse
|
||||
file, parseErrors := parser.ParseWithGoDumps(prgFile, processed, pre.GoDumps)
|
||||
@@ -657,6 +669,9 @@ func debugPRG(prgFile string) {
|
||||
for _, e := range ppErrors {
|
||||
fmt.Fprintf(os.Stderr, "pp: %s\n", e)
|
||||
}
|
||||
if len(ppErrors) > 0 {
|
||||
fatal(fmt.Sprintf("%d preprocessor error(s) in %s", len(ppErrors), prgFile))
|
||||
}
|
||||
|
||||
// Phase 2: Parse
|
||||
file, parseErrors := parser.ParseWithGoDumps(prgFile, processed, pre.GoDumps)
|
||||
|
||||
@@ -91,6 +91,7 @@ func (pp *Preprocessor) processLines(filename, source string, depth int) string
|
||||
active := true
|
||||
inBlockComment := false // track multi-line /* */ comments
|
||||
inPragmaDump := false // track #pragma BEGINDUMP ... ENDDUMP
|
||||
dumpStartLine := 0 // 1-based line where BEGINDUMP appeared
|
||||
var dumpLines []string // accumulate Go code lines
|
||||
|
||||
for i, line := range lines {
|
||||
@@ -101,7 +102,26 @@ func (pp *Preprocessor) processLines(filename, source string, depth int) string
|
||||
dir := strings.TrimSpace(strings.TrimPrefix(trimCheck, "#"))
|
||||
if strings.HasPrefix(strings.ToUpper(dir), "PRAGMA ") && strings.Contains(strings.ToUpper(dir), "ENDDUMP") {
|
||||
inPragmaDump = false
|
||||
pp.GoDumps = append(pp.GoDumps, strings.Join(dumpLines, "\n"))
|
||||
body := strings.Join(dumpLines, "\n")
|
||||
// Five's inline dumps are Go, not C. Harbour's own
|
||||
// #pragma BEGINDUMP convention is C (hb_ret*, HB_FUNC,
|
||||
// #include <stdio.h> etc.), so `.prg` files ported
|
||||
// from Harbour will attempt to shove C through Five's
|
||||
// Go-emit pipeline and fail with cryptic errors like
|
||||
// "invalid character U+0023 '#'". Detect the C shape
|
||||
// and report a clear, actionable error up front.
|
||||
if looksLikeInlineC(body) {
|
||||
pp.errors = append(pp.errors, fmt.Sprintf(
|
||||
"%s:%d: #pragma BEGINDUMP contains C code — Five accepts inline Go only. Port the block to Go (or use an RTL function), then wrap in #pragma BEGINDUMP ... #pragma ENDDUMP.",
|
||||
filename, dumpStartLine))
|
||||
// Emit a syntactically invalid line so the parser
|
||||
// also fails at the expected position rather than
|
||||
// the build silently continuing.
|
||||
result = append(result, "__FIVE_INLINE_C_ERROR__")
|
||||
dumpLines = nil
|
||||
continue
|
||||
}
|
||||
pp.GoDumps = append(pp.GoDumps, body)
|
||||
dumpLines = nil
|
||||
result = append(result, fmt.Sprintf("FIVE_GODUMP__ %d", len(pp.GoDumps)-1))
|
||||
continue
|
||||
@@ -148,6 +168,7 @@ func (pp *Preprocessor) processLines(filename, source string, depth int) string
|
||||
upperDir := strings.ToUpper(directive)
|
||||
if strings.HasPrefix(upperDir, "PRAGMA ") && strings.Contains(upperDir, "BEGINDUMP") {
|
||||
inPragmaDump = true
|
||||
dumpStartLine = i + 1 // 1-based for error reporting
|
||||
dumpLines = nil
|
||||
result = append(result, "")
|
||||
continue
|
||||
@@ -551,3 +572,52 @@ func replaceWord(line, old, new string) string {
|
||||
func isWordChar(c byte) bool {
|
||||
return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') || c == '_'
|
||||
}
|
||||
|
||||
// looksLikeInlineC heuristically detects Harbour-style inline C inside
|
||||
// a #pragma BEGINDUMP block. Any ONE strong signal triggers detection
|
||||
// so the user sees an early, readable error rather than an obscure Go
|
||||
// syntax complaint far downstream.
|
||||
//
|
||||
// Signals (any match):
|
||||
// - `HB_FUNC(` / `HB_FUNC_STATIC(` / `HB_FUNC_TRANSLATE(` — Harbour's C FFI macro
|
||||
// - `hb_ret*(` / `hb_param*(` / `hb_stor*(` / `hb_itemNew(` — Harbour C API
|
||||
// - `#include <` or `#include "` — C preprocessor include
|
||||
// - `#define <ident>(` followed by typed arg list — C-style macro
|
||||
// - bare `int main(` / `void main(` — C entry point
|
||||
// - `typedef ` / `struct ` at line start — C declarations
|
||||
//
|
||||
// Go programs can use `import`, `package`, `func`, `var`, `:=` — none
|
||||
// of which overlap with these C signatures, so false positives are
|
||||
// unlikely.
|
||||
func looksLikeInlineC(body string) bool {
|
||||
// Quick-reject: empty body.
|
||||
trimmed := strings.TrimSpace(body)
|
||||
if trimmed == "" {
|
||||
return false
|
||||
}
|
||||
|
||||
for _, line := range strings.Split(body, "\n") {
|
||||
l := strings.TrimSpace(line)
|
||||
// #include <stdio.h> / "hbapi.h" — unambiguous C preprocessor.
|
||||
// Go doesn't use #include at all.
|
||||
if strings.HasPrefix(l, "#include <") || strings.HasPrefix(l, `#include "`) {
|
||||
return true
|
||||
}
|
||||
// Bare `HB_FUNC( NAME )` with an unquoted identifier is the
|
||||
// Harbour C FFI macro. The Go-side counterpart is
|
||||
// `hbrt.HB_FUNC("NAME", fn)` — lowercase package prefix and a
|
||||
// quoted string. Match the C form strictly.
|
||||
if strings.HasPrefix(l, "HB_FUNC(") ||
|
||||
strings.HasPrefix(l, "HB_FUNC_STATIC(") ||
|
||||
strings.HasPrefix(l, "HB_FUNC_TRANSLATE(") {
|
||||
return true
|
||||
}
|
||||
// C declarations at line start that have no Go analogue.
|
||||
if strings.HasPrefix(l, "typedef ") || strings.HasPrefix(l, "struct ") ||
|
||||
strings.HasPrefix(l, "int main(") || strings.HasPrefix(l, "void main(") {
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user