Files
five/compiler/pp/pp.go
CharlesKWON a8f6e53785 fix(pp): // line comment containing /* no longer eats subsequent lines
stripBlockComments scanned each line for a /* block-comment opener
while tracking string literals but had no notion of // or && line
comments. A line like `// see app/api/*.prg` would open a block
comment from /*.prg that ran until EOF or the next */, silently
dropping every FUNCTION declaration in between. The compiled file
ended up with an empty symbols slice, and callers in other files
panicked at runtime with "no function symbol for call".

Hit while writing app/lib/text.prg in solmade — its `// build's
\`app/api/*.prg\` glob doesn't pick it up` line dropped all three
of QueryParamRaw / UrlDecodeBytes / IsAllDigits.

Fix: detect // and && line-comment markers before the /* check.
When one is seen, copy the rest of the line through verbatim (the
lexer and #command machinery still need it) and stop scanning so
the embedded /* can't open a block comment.

Two regression tests cover both markers. Full mandatory test suite
(go test ./..., FiveSql2 43/43, compat 56/56, std.ch 17/17, FRB 7/7,
pgserver 11/11) still passes.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-29 08:47:55 +09:00

913 lines
27 KiB
Go

// Copyright (c) 2026 Charles KWON OhJun (charleskwonohjun@gmail.com)
// All rights reserved.
// Preprocessor for Five — handles #include, #define, #ifdef/#endif.
// Harbour: /mnt/d/harbour-core/src/pp/ppcore.c (6383 lines)
//
// Five PP is simplified but covers the essential directives:
// #include "file.ch" — file inclusion
// #define NAME VALUE — simple text substitution
// #undef NAME — remove definition
// #ifdef NAME / #ifndef NAME / #else / #endif — conditional compilation
// #pragma — compiler hints
//
// #command/#translate is supported via command.go (pattern matching + substitution).
// Five also handles CLASS syntax natively in the parser.
package pp
import (
_ "embed"
"fmt"
"os"
"path/filepath"
"strings"
)
// embeddedStdCh is include/std.ch baked into the compiler binary so
// xBase commands like ERASE, RENAME, COMMIT, LOCATE, ... reach the
// parser already rewritten as plain function calls. Equivalent to
// Harbour's auto-included std.ch.
//
//go:embed std.ch
var embeddedStdCh string
// Preprocessor processes source code before lexing.
type Preprocessor struct {
defines map[string]string // #define name → value
includeDirs []string // search paths for #include
included map[string]bool // prevent circular inclusion
commands []*Rule // #command rules
translates []*Rule // #translate rules
errors []string
GoDumps []string // collected #pragma BEGINDUMP Go code blocks
}
// New creates a new Preprocessor.
func New() *Preprocessor {
pp := &Preprocessor{
defines: make(map[string]string),
included: make(map[string]bool),
}
pp.addStdRules()
return pp
}
// addStdRules registers built-in #command rules from the embedded
// std.ch file. processLines walks the directives and stores #command
// entries in pp.commands as a side effect; we discard its output.
//
// Anything not safely expressible as a #command (e.g. parser-handled
// constructs like @ SAY/GET, READ, TRY/CATCH, WITH TIMEOUT) is left
// to the parser.
func (pp *Preprocessor) addStdRules() {
pp.processLines("std.ch", embeddedStdCh, 0)
}
// AddIncludeDir adds a directory to search for #include files.
func (pp *Preprocessor) AddIncludeDir(dir string) {
pp.includeDirs = append(pp.includeDirs, dir)
}
// Define adds a #define.
func (pp *Preprocessor) Define(name, value string) {
pp.defines[name] = value
}
// Process preprocesses the source code, resolving #include and #define.
func (pp *Preprocessor) Process(filename, source string) (string, []string) {
pp.errors = nil
result := pp.processLines(filename, source, 0)
return result, pp.errors
}
const maxIncludeDepth = 20
func (pp *Preprocessor) processLines(filename, source string, depth int) string {
if depth > maxIncludeDepth {
pp.errors = append(pp.errors, fmt.Sprintf("%s: #include depth exceeded (max %d)", filename, maxIncludeDepth))
return source
}
lines := strings.Split(source, "\n")
var result []string
var ifStack []bool // true = active section, false = skipping
active := true
inBlockComment := false // track multi-line /* */ comments
inPragmaDump := false // track #pragma BEGINDUMP ... ENDDUMP
dumpStartLine := 0 // 1-based line where BEGINDUMP appeared
var dumpLines []string // accumulate Go code lines
for i := 0; i < len(lines); i++ {
line := lines[i]
// `#command`/`#translate` directives that end with a trailing `;`
// continue on the next physical line — this is how harbour-core
// formats its std.ch rules. Strip exactly one trailing `;` per
// iteration so Harbour's `;;` convention ("literal `;` plus
// continuation") survives: the inner `;` ends up as part of the
// joined directive, the outer one drives the continuation.
// Only `#`-directives participate; user code uses `;` differently.
if t := strings.TrimSpace(line); strings.HasPrefix(t, "#") {
for i+1 < len(lines) {
trimmed := strings.TrimRight(line, " \t")
if !strings.HasSuffix(trimmed, ";") {
break
}
line = strings.TrimSuffix(trimmed, ";") + " " + strings.TrimSpace(lines[i+1])
i++
}
}
// Handle #pragma BEGINDUMP ... ENDDUMP (inline Go code blocks)
if inPragmaDump {
trimCheck := strings.TrimSpace(line)
if strings.HasPrefix(trimCheck, "#") {
dir := strings.TrimSpace(strings.TrimPrefix(trimCheck, "#"))
if strings.HasPrefix(strings.ToUpper(dir), "PRAGMA ") && strings.Contains(strings.ToUpper(dir), "ENDDUMP") {
inPragmaDump = false
body := strings.Join(dumpLines, "\n")
// Five's inline dumps are Go, not C. Harbour's own
// #pragma BEGINDUMP convention is C (hb_ret*, HB_FUNC,
// #include <stdio.h> etc.), so `.prg` files ported
// from Harbour will attempt to shove C through Five's
// Go-emit pipeline and fail with cryptic errors like
// "invalid character U+0023 '#'". Detect the C shape
// and report a clear, actionable error up front.
if looksLikeInlineC(body) {
pp.errors = append(pp.errors, fmt.Sprintf(
"%s:%d: #pragma BEGINDUMP contains C code — Five accepts inline Go only. Port the block to Go (or use an RTL function), then wrap in #pragma BEGINDUMP ... #pragma ENDDUMP.",
filename, dumpStartLine))
// Emit a syntactically invalid line so the parser
// also fails at the expected position rather than
// the build silently continuing.
result = append(result, "__FIVE_INLINE_C_ERROR__")
dumpLines = nil
continue
}
pp.GoDumps = append(pp.GoDumps, body)
dumpLines = nil
result = append(result, fmt.Sprintf("FIVE_GODUMP__ %d", len(pp.GoDumps)-1))
continue
}
}
dumpLines = append(dumpLines, line)
result = append(result, "") // blank out for line counting
continue
}
trimmed := strings.TrimSpace(line)
// Handle multi-line block comments
if inBlockComment {
if idx := strings.Index(line, "*/"); idx >= 0 {
inBlockComment = false
line = line[idx+2:] // keep content after */
trimmed = strings.TrimSpace(line)
if trimmed == "" {
result = append(result, "")
continue
}
} else {
result = append(result, "") // blank out comment lines
continue
}
}
// Strip block comments within a single line and detect opening /*
line = stripBlockComments(line, &inBlockComment)
trimmed = strings.TrimSpace(line)
// Check if in active section
if len(ifStack) > 0 {
active = ifStack[len(ifStack)-1]
} else {
active = true
}
// Preprocessor directives (always processed regardless of active state)
if strings.HasPrefix(trimmed, "#") {
directive := strings.TrimPrefix(trimmed, "#")
directive = strings.TrimSpace(directive)
// Detect #pragma BEGINDUMP
upperDir := strings.ToUpper(directive)
if strings.HasPrefix(upperDir, "PRAGMA ") && strings.Contains(upperDir, "BEGINDUMP") {
inPragmaDump = true
dumpStartLine = i + 1 // 1-based for error reporting
dumpLines = nil
result = append(result, "")
continue
}
if pp.handleConditional(directive, &ifStack, active) {
continue
}
if !active {
continue // skip non-conditional directives in inactive sections
}
if pp.handleDirective(filename, directive, depth, &result, i+1) {
continue
}
}
if !active {
continue // skip lines in inactive #ifdef sections
}
// `;`-continuation in user code. Join physical lines ending in
// a top-level `;` (paren/string-balanced) so multi-line
// `#command` invocations like
// TEST t004 STATIC s_once := NIL ;
// INIT ... ;
// CODE x := S_C
// match the rule pattern as a single logical line. Without this
// the pattern only sees the first physical line, fails to match,
// and the residual `TEST t004 STATIC ...` falls through to the
// parser as a bare expression — silently merged into the
// previous function's body, producing duplicate static decls
// tagged with the wrong function name. Insert blank fillers
// for each consumed line so post-PP source line numbers still
// align with the original file for error reporting.
consumedFiller := 0
for i+1 < len(lines) {
t := stripTrailingLineComment(strings.TrimRight(line, " \t"))
t = strings.TrimRight(t, " \t")
if !strings.HasSuffix(t, ";") {
break
}
// Strip block comments from the next line *the same way*
// the main loop will. Without this, an inline `/* ... */`
// at the end of a continuation row hides the trailing
// `;` from our HasSuffix check below — the joined chain
// truncates after just one row, leaving a dangling comma
// at end of line that the parser later mis-reports.
rawNext := lines[i+1]
strippedNext := stripBlockComments(rawNext, &inBlockComment)
nextTrim := strings.TrimSpace(strippedNext)
if strings.HasPrefix(nextTrim, "#") {
inBlockComment = false // not actually consumed
break
}
// Don't fold across an unterminated `/*` — the rest of
// the file would be treated as code by the join.
if inBlockComment {
inBlockComment = false
break
}
line = strings.TrimSuffix(t, ";") + " " + nextTrim
i++
consumedFiller++
}
// Apply #command/#translate rules
if len(pp.commands) > 0 || len(pp.translates) > 0 {
line = pp.applyRules(line)
}
// Apply #define substitutions
if len(pp.defines) > 0 {
line = pp.applyDefines(line)
}
result = append(result, line)
for k := 0; k < consumedFiller; k++ {
result = append(result, "")
}
}
if len(ifStack) > 0 {
pp.errors = append(pp.errors, fmt.Sprintf("%s: unterminated #ifdef/#ifndef", filename))
}
return strings.Join(result, "\n")
}
// handleConditional processes #ifdef, #ifndef, #else, #endif.
// Returns true if the line was a conditional directive.
func (pp *Preprocessor) handleConditional(directive string, ifStack *[]bool, active bool) bool {
upper := strings.ToUpper(directive)
if strings.HasPrefix(upper, "IFDEF ") {
name := strings.TrimSpace(directive[6:])
_, defined := pp.defines[name]
*ifStack = append(*ifStack, defined && active)
return true
}
if strings.HasPrefix(upper, "IFNDEF ") {
name := strings.TrimSpace(directive[7:])
_, defined := pp.defines[name]
*ifStack = append(*ifStack, !defined && active)
return true
}
// #if expr — simplified: support #if 0 (always false), #if 1 (always true),
// and #if __pragma(...) (treat as false for compatibility)
if strings.HasPrefix(upper, "IF ") || upper == "IF" {
rest := strings.TrimSpace(directive[2:])
val := false
if rest == "1" || rest == ".T." {
val = true
} else if rest == "0" || rest == ".F." {
val = false
} else {
// Unknown expression — default to false (conservative)
val = false
}
*ifStack = append(*ifStack, val && active)
return true
}
// #else — may have trailing comment
if upper == "ELSE" || strings.HasPrefix(upper, "ELSE ") || strings.HasPrefix(upper, "ELSE\t") {
if len(*ifStack) > 0 {
// Flip the top of stack (only if parent was active)
parentActive := true
if len(*ifStack) > 1 {
parentActive = (*ifStack)[len(*ifStack)-2]
}
(*ifStack)[len(*ifStack)-1] = !(*ifStack)[len(*ifStack)-1] && parentActive
}
return true
}
// #endif — may have trailing comment: #endif /* COMMENT */
stripped := strings.TrimSpace(upper)
if idx := strings.Index(stripped, " "); idx > 0 {
stripped = stripped[:idx]
}
if idx := strings.Index(stripped, "\t"); idx > 0 {
stripped = stripped[:idx]
}
if stripped == "ENDIF" {
if len(*ifStack) > 0 {
*ifStack = (*ifStack)[:len(*ifStack)-1]
}
return true
}
return false
}
// handleDirective processes non-conditional directives.
func (pp *Preprocessor) handleDirective(filename, directive string, depth int, result *[]string, lineNo int) bool {
upper := strings.ToUpper(directive)
// #include "file" or #include <file>
if strings.HasPrefix(upper, "INCLUDE ") {
rest := strings.TrimSpace(directive[8:])
inclFile := pp.extractIncludeFile(rest)
if inclFile == "" {
pp.errors = append(pp.errors, fmt.Sprintf("%s:%d: invalid #include", filename, lineNo))
return true
}
content := pp.resolveInclude(filename, inclFile)
if content == "" {
// Not found — not an error for Five (some .ch files are optional)
*result = append(*result, fmt.Sprintf("// #include %q — not found (skipped)", inclFile))
return true
}
// Process included content recursively
processed := pp.processLines(inclFile, content, depth+1)
*result = append(*result, strings.Split(processed, "\n")...)
return true
}
// #define NAME [VALUE]
if strings.HasPrefix(upper, "DEFINE ") {
rest := strings.TrimSpace(directive[7:])
// Detect function-like macro: #define NAME( params ) body
// For now, skip these (don't register as simple text substitution)
if idx := strings.IndexByte(rest, '('); idx > 0 && idx < strings.IndexAny(rest+" ", " \t") {
// Function-like macro — not yet supported, skip
return true
}
parts := strings.SplitN(rest, " ", 2)
name := parts[0]
value := ""
if len(parts) > 1 {
value = strings.TrimSpace(parts[1])
}
// Strip trailing // comment and /* */ comment from value
if idx := strings.Index(value, "//"); idx >= 0 {
// Make sure // is not inside a string literal
inStr := false
for i := 0; i < idx; i++ {
if value[i] == '"' || value[i] == '\'' {
inStr = !inStr
}
}
if !inStr {
value = strings.TrimSpace(value[:idx])
}
}
if idx := strings.Index(value, "/*"); idx >= 0 {
value = strings.TrimSpace(value[:idx])
}
pp.defines[name] = value
return true
}
// #undef NAME
if strings.HasPrefix(upper, "UNDEF ") {
name := strings.TrimSpace(directive[6:])
delete(pp.defines, name)
return true
}
// #pragma — just pass through as comment
if strings.HasPrefix(upper, "PRAGMA ") {
*result = append(*result, "// "+directive)
return true
}
// #warning, #error, #stdout — skip (emit as comment)
if strings.HasPrefix(upper, "WARNING") || strings.HasPrefix(upper, "ERROR") || strings.HasPrefix(upper, "STDOUT") {
*result = append(*result, "// #"+directive)
return true
}
// #command / #translate — parse and store rules. ParseRule now
// validates that result-template marker references resolve to a
// pattern marker; any unresolved name flows back as a warning
// surfaced via pp.errors with the directive's filename:line so
// the user can find the typo (e.g. case-sensitive `<For>` vs
// `<for>` in an #xcommand). Without surfacing, the broken
// expansion silently produced empty / mangled output at every
// call site.
registerRule := func(r *Rule, store *[]*Rule) {
if r == nil {
return
}
*store = append(*store, r)
for _, w := range r.Warnings {
pp.errors = append(pp.errors, fmt.Sprintf("%s:%d: #command: %s", filename, lineNo, w))
}
}
if strings.HasPrefix(upper, "COMMAND ") {
registerRule(ParseRule(directive[8:], true, false), &pp.commands)
return true
}
if strings.HasPrefix(upper, "TRANSLATE ") {
registerRule(ParseRule(directive[10:], false, false), &pp.translates)
return true
}
if strings.HasPrefix(upper, "XCOMMAND ") {
registerRule(ParseRule(directive[9:], true, true), &pp.commands)
return true
}
if strings.HasPrefix(upper, "XTRANSLATE ") {
registerRule(ParseRule(directive[11:], false, true), &pp.translates)
return true
}
return false
}
// extractIncludeFile gets the filename from #include "file" or #include <file>
func (pp *Preprocessor) extractIncludeFile(s string) string {
s = strings.TrimSpace(s)
if len(s) >= 2 {
if (s[0] == '"' && s[len(s)-1] == '"') || (s[0] == '<' && s[len(s)-1] == '>') {
return s[1 : len(s)-1]
}
}
return s // bare filename
}
// resolveInclude searches for an include file and returns its content.
func (pp *Preprocessor) resolveInclude(currentFile, inclFile string) string {
// Prevent circular inclusion
absKey := inclFile
if pp.included[absKey] {
return ""
}
pp.included[absKey] = true
defer func() { delete(pp.included, absKey) }()
// Search order:
// 1. Relative to current file
// 2. Include directories
// 3. Harbour include dir (for hbclass.ch etc.)
searchPaths := []string{}
// Relative to current file
if currentFile != "" {
dir := filepath.Dir(currentFile)
searchPaths = append(searchPaths, filepath.Join(dir, inclFile))
}
// Include directories
for _, dir := range pp.includeDirs {
searchPaths = append(searchPaths, filepath.Join(dir, inclFile))
}
// Try each path
for _, path := range searchPaths {
data, err := os.ReadFile(path)
if err == nil {
return string(data)
}
}
return ""
}
// stripTrailingLineComment removes a trailing `// ...` from s, but only
// if the `//` sits outside any string literal. Block comments are
// already handled by stripBlockComments before continuation joining.
func stripTrailingLineComment(s string) string {
inStr := byte(0)
for i := 0; i < len(s); i++ {
c := s[i]
if inStr != 0 {
if c == inStr {
inStr = 0
}
continue
}
switch c {
case '"', '\'':
inStr = c
case '/':
if i+1 < len(s) && s[i+1] == '/' {
return s[:i]
}
}
}
return s
}
// hasTopLevelSemi reports whether s contains a `;` outside of any
// string literal or paren/bracket/brace nesting. Used by applyRules
// to decide whether a line carries multiple PRG statements.
func hasTopLevelSemi(s string) bool {
depth := 0
inStr := byte(0)
for i := 0; i < len(s); i++ {
c := s[i]
if inStr != 0 {
if c == inStr {
inStr = 0
}
continue
}
switch c {
case '"', '\'':
inStr = c
case '(', '[', '{':
depth++
case ')', ']', '}':
if depth > 0 {
depth--
}
case ';':
if depth == 0 {
return true
}
}
}
return false
}
// splitTopLevelSemi splits s on top-level `;`, respecting string
// literals and paren/bracket/brace nesting. Empty trailing splits
// (caused by a trailing `;`) are preserved so the caller can rejoin
// without losing the separator's significance for line-continuation.
func splitTopLevelSemi(s string) []string {
var parts []string
depth := 0
inStr := byte(0)
start := 0
for i := 0; i < len(s); i++ {
c := s[i]
if inStr != 0 {
if c == inStr {
inStr = 0
}
continue
}
switch c {
case '"', '\'':
inStr = c
case '(', '[', '{':
depth++
case ')', ']', '}':
if depth > 0 {
depth--
}
case ';':
if depth == 0 {
parts = append(parts, s[start:i])
start = i + 1
}
}
}
parts = append(parts, s[start:])
return parts
}
// applyRules applies #command and #translate rules to a line.
// #command rules are tried first (they match complete statements).
// #translate rules are tried on any part of a line.
//
// `;`-separated statements share a line in PRG (`dbCommit(); CLOSE
// ALL`); each sub-statement is matched against the rule list
// independently. Without this, only the first statement on the line
// would have rules applied, and subsequent ones would reach the
// parser unrewritten — `CLOSE ALL` after a semicolon used to fall
// through to the parser as IDENT tokens, blowing up at runtime
// when "CLOSE" tried to dispatch as a function name.
func (pp *Preprocessor) applyRules(line string) string {
trimmed := strings.TrimSpace(line)
if trimmed == "" || strings.HasPrefix(trimmed, "//") {
return line
}
// Multi-statement line: split on top-level `;` (paren / string
// balanced), apply rules to each segment, rejoin.
if hasTopLevelSemi(trimmed) {
parts := splitTopLevelSemi(line)
if len(parts) > 1 {
out := make([]string, len(parts))
for i, p := range parts {
out[i] = pp.applyRules(p)
}
return strings.Join(out, ";")
}
}
// Try #command rules (match from start of line)
for _, rule := range pp.commands {
if result, ok := rule.MatchLine(trimmed); ok {
// Preserve leading whitespace
indent := line[:len(line)-len(strings.TrimLeft(line, " \t"))]
return indent + result
}
}
// Try #translate rules (can match substrings)
for _, rule := range pp.translates {
if result, ok := rule.MatchLine(trimmed); ok {
indent := line[:len(line)-len(strings.TrimLeft(line, " \t"))]
return indent + result
}
}
return line
}
// stripBlockComments removes /* ... */ comments from a line.
// If a /* is found without closing */, sets inBlock to true.
//
// `//` and `&&` line-comment markers are detected first so a `/*`
// substring inside one of them (e.g. `// see app/api/*.prg`) doesn't
// start a runaway block comment that eats subsequent lines.
func stripBlockComments(line string, inBlock *bool) string {
var out strings.Builder
i := 0
inStr := byte(0)
for i < len(line) {
// Track string literals
if inStr == 0 && (line[i] == '"' || line[i] == '\'') {
inStr = line[i]
out.WriteByte(line[i])
i++
continue
}
if inStr != 0 {
if line[i] == inStr {
inStr = 0
}
out.WriteByte(line[i])
i++
continue
}
// // line comment — copy the rest of the line through verbatim
// (the lexer/`#command` machinery still needs to see it) but
// don't scan it for `/*` so an embedded `/*` substring (e.g.
// `// see app/api/*.prg`) can't open a runaway block comment
// that eats subsequent lines.
if i+1 < len(line) && line[i] == '/' && line[i+1] == '/' {
out.WriteString(line[i:])
return out.String()
}
// && Harbour-style line comment — same rule.
if i+1 < len(line) && line[i] == '&' && line[i+1] == '&' {
out.WriteString(line[i:])
return out.String()
}
// Block comment start
if i+1 < len(line) && line[i] == '/' && line[i+1] == '*' {
// Find closing */
end := strings.Index(line[i+2:], "*/")
if end >= 0 {
i = i + 2 + end + 2 // skip past */
out.WriteByte(' ') // replace comment with space
} else {
*inBlock = true
return out.String() // rest of line is comment
}
continue
}
out.WriteByte(line[i])
i++
}
return out.String()
}
// applyDefines substitutes #define macros in a line.
// Simple word-boundary replacement (not full macro expansion).
func (pp *Preprocessor) applyDefines(line string) string {
for name, value := range pp.defines {
if value == "" {
continue // flag-only define, no substitution
}
// Simple word replacement (not inside strings)
line = replaceWord(line, name, value)
}
return line
}
// replaceWord replaces whole-word occurrences of old with new,
// avoiding replacements inside string literals.
func replaceWord(line, old, new string) string {
if !strings.Contains(line, old) {
return line
}
var result strings.Builder
inString := byte(0)
i := 0
for i < len(line) {
// Track string literals
if inString == 0 && (line[i] == '"' || line[i] == '\'') {
inString = line[i]
result.WriteByte(line[i])
i++
continue
}
if inString != 0 && line[i] == inString {
inString = 0
result.WriteByte(line[i])
i++
continue
}
if inString != 0 {
result.WriteByte(line[i])
i++
continue
}
// Check for word match
if i+len(old) <= len(line) && line[i:i+len(old)] == old {
// Check word boundaries
before := i == 0 || !isWordChar(line[i-1])
after := i+len(old) >= len(line) || !isWordChar(line[i+len(old)])
if before && after {
result.WriteString(new)
i += len(old)
continue
}
}
result.WriteByte(line[i])
i++
}
return result.String()
}
func isWordChar(c byte) bool {
return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') || c == '_'
}
// looksLikeInlineC heuristically detects Harbour-style inline C inside
// a #pragma BEGINDUMP block. Any ONE strong signal triggers detection
// so the user sees an early, readable error rather than an obscure Go
// syntax complaint far downstream.
//
// Signals (any match):
// - `HB_FUNC(` / `HB_FUNC_STATIC(` / `HB_FUNC_TRANSLATE(` — Harbour's C FFI macro
// - `hb_ret*(` / `hb_param*(` / `hb_stor*(` / `hb_itemNew(` — Harbour C API
// - `#include <` or `#include "` — C preprocessor include
// - `#define <ident>(` followed by typed arg list — C-style macro
// - bare `int main(` / `void main(` — C entry point
// - `typedef ` / `struct ` at line start — C declarations
//
// Go programs can use `import`, `package`, `func`, `var`, `:=` — none
// of which overlap with these C signatures, so false positives are
// unlikely.
func looksLikeInlineC(body string) bool {
// Quick-reject: empty body.
trimmed := strings.TrimSpace(body)
if trimmed == "" {
return false
}
for _, line := range strings.Split(body, "\n") {
l := strings.TrimSpace(line)
// #include <stdio.h> / "hbapi.h" — unambiguous C preprocessor.
// Go doesn't use #include at all.
if strings.HasPrefix(l, "#include <") || strings.HasPrefix(l, `#include "`) {
return true
}
// Function-like #define is C-only — Go uses const / generics.
// `#define FOO(x) ...`
if strings.HasPrefix(l, "#define ") {
// Find the name and check for `(` immediately after with
// no space (function-like macro signature).
rest := strings.TrimSpace(l[8:])
if i := strings.IndexAny(rest, " \t("); i > 0 && i < len(rest) && rest[i] == '(' {
return true
}
}
// Bare `HB_FUNC( NAME )` with an unquoted identifier is the
// Harbour C FFI macro. The Go-side counterpart is
// `hbrt.HB_FUNC("NAME", fn)` — lowercase package prefix and a
// quoted string. Match the C form strictly.
if strings.HasPrefix(l, "HB_FUNC(") ||
strings.HasPrefix(l, "HB_FUNC_STATIC(") ||
strings.HasPrefix(l, "HB_FUNC_TRANSLATE(") {
return true
}
// `extern "C"` — C / C++ linkage block, never Go.
if strings.HasPrefix(l, `extern "C"`) {
return true
}
// C declarations at line start that have no Go analogue.
if strings.HasPrefix(l, "typedef ") || strings.HasPrefix(l, "struct ") ||
strings.HasPrefix(l, "int main(") || strings.HasPrefix(l, "void main(") {
return true
}
// C return-type declarations: `int name(`, `char *name(`, etc.
// Matching exactly `<C-type> <ident>(` keeps us off Go's
// `func name(` (which starts with `func`, not a type word)
// and Go variable declarations (which use `:=` or `var`).
if isCReturnTypeDecl(l) {
return true
}
// hb_ret*(...) helpers — Harbour's C-side return setters.
// hb_retc / hb_retni / hb_retnl / hb_retd / hb_retl / hb_retptr
if strings.HasPrefix(l, "hb_ret") {
rest := l[6:]
if i := strings.IndexByte(rest, '('); i >= 0 {
name := rest[:i]
if name != "" && allLetters(name) {
return true
}
}
}
}
return false
}
// isCReturnTypeDecl reports whether the line opens a C function
// declaration like `int foo(` / `static char* bar(`. We match a
// short prefix list of C-only types so a Go declaration like
// `func name() int { ... }` doesn't trip this.
func isCReturnTypeDecl(l string) bool {
cTypePrefixes := []string{
"int ", "void ", "char ", "long ", "short ", "double ", "float ",
"unsigned ", "signed ", "size_t ", "ssize_t ", "uint",
"static int ", "static void ", "static char ", "static long ",
}
for _, p := range cTypePrefixes {
if strings.HasPrefix(l, p) {
rest := strings.TrimLeft(l[len(p):], " \t*")
// rest should now start with an identifier followed by `(`.
if i := strings.IndexByte(rest, '('); i > 0 && i < 50 {
name := rest[:i]
if allIdentChars(name) {
return true
}
}
}
}
return false
}
func allLetters(s string) bool {
for _, c := range s {
if !((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')) {
return false
}
}
return s != ""
}
func allIdentChars(s string) bool {
for i, c := range s {
if c == '_' || (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') {
continue
}
if i > 0 && c >= '0' && c <= '9' {
continue
}
return false
}
return s != ""
}