Files
five/compiler/pp/pp.go
CharlesKWON 2008266da7 feat(pp,rtl): Tier 2 audit followups — JOIN hash + PP validation + C heuristic
Three medium-priority audit items in one commit, each independently
revertible.

  * **#18 JOIN hash-join fast path.** New std.ch shape:
        JOIN WITH <alias> TO <file> [FIELDS ...] ON <mfield> = <dfield>
    expands to a 6-arg __dbJoin call with the master/detail key
    field names. Runtime detects the extra args, builds an O(M)
    hash over the detail's key column, then probes per master row
    for O(N+M) total — vs the FOR form's O(N*M). For 1k×1k that's
    2k vs 1M operations; the gap widens with N. The original FOR
    form is unchanged and stays the fallback for arbitrary
    predicates. New helper dbHashKey type-tags the key string so
    `1` (numeric), `"1"` (string), and `.T.` (logical) don't
    collide in the bucket map.

  * **#38 PP rule result-marker validation.** ParseRule now walks
    the result template after parseMarkers and warns about every
    `<name>` (or `<(name)>` / `<.name.>` / `<{name}>` / `#<name>`
    / `<"name">`) that doesn't match a pattern marker. Warnings
    flow into pp.errors via handleDirective with the directive's
    filename:line, so a typo'd `<NaMe>` in an `#xcommand`
    case-sensitive rule fails the build with a clear diagnostic
    instead of silently producing broken expansions.

  * **#44 looksLikeInlineC heuristic strengthened.** Catches more
    of the common Harbour-PRG-with-C-inline-block shapes that
    used to fall through and produce cryptic Go-side errors:
    function-like #define, `extern "C"` linkage blocks, C return-
    type declarations (`int foo(`, `static char* bar(`), and the
    hb_ret*() helper family used by Harbour's C FFI return
    setters. Two small predicate helpers (allLetters,
    allIdentChars) keep the C-vs-Go disambiguation tight enough
    that legit Go code (`func name() int { ... }`) doesn't trip.

  * **#28 LIST/DISPLAY pagination** — explicitly deferred. Proper
    pagination requires interactive terminal handling (Inkey(0)
    for the keypress) which would hang in CI / batch mode. Will
    revisit when an interactive terminal layer needs it for
    other reasons.

Test fixtures: tests/std_ch/test_join_hash.prg verifies the new
ON-form path produces the same output as the FOR form would.
std.ch runner now stands at 16/16.

Other gates green:
  go test ./...      : PASS
  FiveSql2 SQL:1999  : 43/43
  Harbour compat     : 56/56
  std.ch suite       : 16/16
  FRB suite          : 7/7

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-04 19:21:19 +09:00

822 lines
24 KiB
Go

// Copyright (c) 2026 Charles KWON OhJun (charleskwonohjun@gmail.com)
// All rights reserved.
// Preprocessor for Five — handles #include, #define, #ifdef/#endif.
// Harbour: /mnt/d/harbour-core/src/pp/ppcore.c (6383 lines)
//
// Five PP is simplified but covers the essential directives:
// #include "file.ch" — file inclusion
// #define NAME VALUE — simple text substitution
// #undef NAME — remove definition
// #ifdef NAME / #ifndef NAME / #else / #endif — conditional compilation
// #pragma — compiler hints
//
// #command/#translate is supported via command.go (pattern matching + substitution).
// Five also handles CLASS syntax natively in the parser.
package pp
import (
_ "embed"
"fmt"
"os"
"path/filepath"
"strings"
)
// embeddedStdCh is include/std.ch baked into the compiler binary so
// xBase commands like ERASE, RENAME, COMMIT, LOCATE, ... reach the
// parser already rewritten as plain function calls. Equivalent to
// Harbour's auto-included std.ch.
//
//go:embed std.ch
var embeddedStdCh string
// Preprocessor processes source code before lexing.
type Preprocessor struct {
defines map[string]string // #define name → value
includeDirs []string // search paths for #include
included map[string]bool // prevent circular inclusion
commands []*Rule // #command rules
translates []*Rule // #translate rules
errors []string
GoDumps []string // collected #pragma BEGINDUMP Go code blocks
}
// New creates a new Preprocessor.
func New() *Preprocessor {
pp := &Preprocessor{
defines: make(map[string]string),
included: make(map[string]bool),
}
pp.addStdRules()
return pp
}
// addStdRules registers built-in #command rules from the embedded
// std.ch file. processLines walks the directives and stores #command
// entries in pp.commands as a side effect; we discard its output.
//
// Anything not safely expressible as a #command (e.g. parser-handled
// constructs like @ SAY/GET, READ, TRY/CATCH, WITH TIMEOUT) is left
// to the parser.
func (pp *Preprocessor) addStdRules() {
pp.processLines("std.ch", embeddedStdCh, 0)
}
// AddIncludeDir adds a directory to search for #include files.
func (pp *Preprocessor) AddIncludeDir(dir string) {
pp.includeDirs = append(pp.includeDirs, dir)
}
// Define adds a #define.
func (pp *Preprocessor) Define(name, value string) {
pp.defines[name] = value
}
// Process preprocesses the source code, resolving #include and #define.
func (pp *Preprocessor) Process(filename, source string) (string, []string) {
pp.errors = nil
result := pp.processLines(filename, source, 0)
return result, pp.errors
}
const maxIncludeDepth = 20
func (pp *Preprocessor) processLines(filename, source string, depth int) string {
if depth > maxIncludeDepth {
pp.errors = append(pp.errors, fmt.Sprintf("%s: #include depth exceeded (max %d)", filename, maxIncludeDepth))
return source
}
lines := strings.Split(source, "\n")
var result []string
var ifStack []bool // true = active section, false = skipping
active := true
inBlockComment := false // track multi-line /* */ comments
inPragmaDump := false // track #pragma BEGINDUMP ... ENDDUMP
dumpStartLine := 0 // 1-based line where BEGINDUMP appeared
var dumpLines []string // accumulate Go code lines
for i := 0; i < len(lines); i++ {
line := lines[i]
// `#command`/`#translate` directives that end with a trailing `;`
// continue on the next physical line — this is how harbour-core
// formats its std.ch rules. Strip exactly one trailing `;` per
// iteration so Harbour's `;;` convention ("literal `;` plus
// continuation") survives: the inner `;` ends up as part of the
// joined directive, the outer one drives the continuation.
// Only `#`-directives participate; user code uses `;` differently.
if t := strings.TrimSpace(line); strings.HasPrefix(t, "#") {
for i+1 < len(lines) {
trimmed := strings.TrimRight(line, " \t")
if !strings.HasSuffix(trimmed, ";") {
break
}
line = strings.TrimSuffix(trimmed, ";") + " " + strings.TrimSpace(lines[i+1])
i++
}
}
// Handle #pragma BEGINDUMP ... ENDDUMP (inline Go code blocks)
if inPragmaDump {
trimCheck := strings.TrimSpace(line)
if strings.HasPrefix(trimCheck, "#") {
dir := strings.TrimSpace(strings.TrimPrefix(trimCheck, "#"))
if strings.HasPrefix(strings.ToUpper(dir), "PRAGMA ") && strings.Contains(strings.ToUpper(dir), "ENDDUMP") {
inPragmaDump = false
body := strings.Join(dumpLines, "\n")
// Five's inline dumps are Go, not C. Harbour's own
// #pragma BEGINDUMP convention is C (hb_ret*, HB_FUNC,
// #include <stdio.h> etc.), so `.prg` files ported
// from Harbour will attempt to shove C through Five's
// Go-emit pipeline and fail with cryptic errors like
// "invalid character U+0023 '#'". Detect the C shape
// and report a clear, actionable error up front.
if looksLikeInlineC(body) {
pp.errors = append(pp.errors, fmt.Sprintf(
"%s:%d: #pragma BEGINDUMP contains C code — Five accepts inline Go only. Port the block to Go (or use an RTL function), then wrap in #pragma BEGINDUMP ... #pragma ENDDUMP.",
filename, dumpStartLine))
// Emit a syntactically invalid line so the parser
// also fails at the expected position rather than
// the build silently continuing.
result = append(result, "__FIVE_INLINE_C_ERROR__")
dumpLines = nil
continue
}
pp.GoDumps = append(pp.GoDumps, body)
dumpLines = nil
result = append(result, fmt.Sprintf("FIVE_GODUMP__ %d", len(pp.GoDumps)-1))
continue
}
}
dumpLines = append(dumpLines, line)
result = append(result, "") // blank out for line counting
continue
}
trimmed := strings.TrimSpace(line)
// Handle multi-line block comments
if inBlockComment {
if idx := strings.Index(line, "*/"); idx >= 0 {
inBlockComment = false
line = line[idx+2:] // keep content after */
trimmed = strings.TrimSpace(line)
if trimmed == "" {
result = append(result, "")
continue
}
} else {
result = append(result, "") // blank out comment lines
continue
}
}
// Strip block comments within a single line and detect opening /*
line = stripBlockComments(line, &inBlockComment)
trimmed = strings.TrimSpace(line)
// Check if in active section
if len(ifStack) > 0 {
active = ifStack[len(ifStack)-1]
} else {
active = true
}
// Preprocessor directives (always processed regardless of active state)
if strings.HasPrefix(trimmed, "#") {
directive := strings.TrimPrefix(trimmed, "#")
directive = strings.TrimSpace(directive)
// Detect #pragma BEGINDUMP
upperDir := strings.ToUpper(directive)
if strings.HasPrefix(upperDir, "PRAGMA ") && strings.Contains(upperDir, "BEGINDUMP") {
inPragmaDump = true
dumpStartLine = i + 1 // 1-based for error reporting
dumpLines = nil
result = append(result, "")
continue
}
if pp.handleConditional(directive, &ifStack, active) {
continue
}
if !active {
continue // skip non-conditional directives in inactive sections
}
if pp.handleDirective(filename, directive, depth, &result, i+1) {
continue
}
}
if !active {
continue // skip lines in inactive #ifdef sections
}
// Apply #command/#translate rules
if len(pp.commands) > 0 || len(pp.translates) > 0 {
line = pp.applyRules(line)
}
// Apply #define substitutions
if len(pp.defines) > 0 {
line = pp.applyDefines(line)
}
result = append(result, line)
}
if len(ifStack) > 0 {
pp.errors = append(pp.errors, fmt.Sprintf("%s: unterminated #ifdef/#ifndef", filename))
}
return strings.Join(result, "\n")
}
// handleConditional processes #ifdef, #ifndef, #else, #endif.
// Returns true if the line was a conditional directive.
func (pp *Preprocessor) handleConditional(directive string, ifStack *[]bool, active bool) bool {
upper := strings.ToUpper(directive)
if strings.HasPrefix(upper, "IFDEF ") {
name := strings.TrimSpace(directive[6:])
_, defined := pp.defines[name]
*ifStack = append(*ifStack, defined && active)
return true
}
if strings.HasPrefix(upper, "IFNDEF ") {
name := strings.TrimSpace(directive[7:])
_, defined := pp.defines[name]
*ifStack = append(*ifStack, !defined && active)
return true
}
// #if expr — simplified: support #if 0 (always false), #if 1 (always true),
// and #if __pragma(...) (treat as false for compatibility)
if strings.HasPrefix(upper, "IF ") || upper == "IF" {
rest := strings.TrimSpace(directive[2:])
val := false
if rest == "1" || rest == ".T." {
val = true
} else if rest == "0" || rest == ".F." {
val = false
} else {
// Unknown expression — default to false (conservative)
val = false
}
*ifStack = append(*ifStack, val && active)
return true
}
// #else — may have trailing comment
if upper == "ELSE" || strings.HasPrefix(upper, "ELSE ") || strings.HasPrefix(upper, "ELSE\t") {
if len(*ifStack) > 0 {
// Flip the top of stack (only if parent was active)
parentActive := true
if len(*ifStack) > 1 {
parentActive = (*ifStack)[len(*ifStack)-2]
}
(*ifStack)[len(*ifStack)-1] = !(*ifStack)[len(*ifStack)-1] && parentActive
}
return true
}
// #endif — may have trailing comment: #endif /* COMMENT */
stripped := strings.TrimSpace(upper)
if idx := strings.Index(stripped, " "); idx > 0 {
stripped = stripped[:idx]
}
if idx := strings.Index(stripped, "\t"); idx > 0 {
stripped = stripped[:idx]
}
if stripped == "ENDIF" {
if len(*ifStack) > 0 {
*ifStack = (*ifStack)[:len(*ifStack)-1]
}
return true
}
return false
}
// handleDirective processes non-conditional directives.
func (pp *Preprocessor) handleDirective(filename, directive string, depth int, result *[]string, lineNo int) bool {
upper := strings.ToUpper(directive)
// #include "file" or #include <file>
if strings.HasPrefix(upper, "INCLUDE ") {
rest := strings.TrimSpace(directive[8:])
inclFile := pp.extractIncludeFile(rest)
if inclFile == "" {
pp.errors = append(pp.errors, fmt.Sprintf("%s:%d: invalid #include", filename, lineNo))
return true
}
content := pp.resolveInclude(filename, inclFile)
if content == "" {
// Not found — not an error for Five (some .ch files are optional)
*result = append(*result, fmt.Sprintf("// #include %q — not found (skipped)", inclFile))
return true
}
// Process included content recursively
processed := pp.processLines(inclFile, content, depth+1)
*result = append(*result, strings.Split(processed, "\n")...)
return true
}
// #define NAME [VALUE]
if strings.HasPrefix(upper, "DEFINE ") {
rest := strings.TrimSpace(directive[7:])
// Detect function-like macro: #define NAME( params ) body
// For now, skip these (don't register as simple text substitution)
if idx := strings.IndexByte(rest, '('); idx > 0 && idx < strings.IndexAny(rest+" ", " \t") {
// Function-like macro — not yet supported, skip
return true
}
parts := strings.SplitN(rest, " ", 2)
name := parts[0]
value := ""
if len(parts) > 1 {
value = strings.TrimSpace(parts[1])
}
// Strip trailing // comment and /* */ comment from value
if idx := strings.Index(value, "//"); idx >= 0 {
// Make sure // is not inside a string literal
inStr := false
for i := 0; i < idx; i++ {
if value[i] == '"' || value[i] == '\'' {
inStr = !inStr
}
}
if !inStr {
value = strings.TrimSpace(value[:idx])
}
}
if idx := strings.Index(value, "/*"); idx >= 0 {
value = strings.TrimSpace(value[:idx])
}
pp.defines[name] = value
return true
}
// #undef NAME
if strings.HasPrefix(upper, "UNDEF ") {
name := strings.TrimSpace(directive[6:])
delete(pp.defines, name)
return true
}
// #pragma — just pass through as comment
if strings.HasPrefix(upper, "PRAGMA ") {
*result = append(*result, "// "+directive)
return true
}
// #warning, #error, #stdout — skip (emit as comment)
if strings.HasPrefix(upper, "WARNING") || strings.HasPrefix(upper, "ERROR") || strings.HasPrefix(upper, "STDOUT") {
*result = append(*result, "// #"+directive)
return true
}
// #command / #translate — parse and store rules. ParseRule now
// validates that result-template marker references resolve to a
// pattern marker; any unresolved name flows back as a warning
// surfaced via pp.errors with the directive's filename:line so
// the user can find the typo (e.g. case-sensitive `<For>` vs
// `<for>` in an #xcommand). Without surfacing, the broken
// expansion silently produced empty / mangled output at every
// call site.
registerRule := func(r *Rule, store *[]*Rule) {
if r == nil {
return
}
*store = append(*store, r)
for _, w := range r.Warnings {
pp.errors = append(pp.errors, fmt.Sprintf("%s:%d: #command: %s", filename, lineNo, w))
}
}
if strings.HasPrefix(upper, "COMMAND ") {
registerRule(ParseRule(directive[8:], true, false), &pp.commands)
return true
}
if strings.HasPrefix(upper, "TRANSLATE ") {
registerRule(ParseRule(directive[10:], false, false), &pp.translates)
return true
}
if strings.HasPrefix(upper, "XCOMMAND ") {
registerRule(ParseRule(directive[9:], true, true), &pp.commands)
return true
}
if strings.HasPrefix(upper, "XTRANSLATE ") {
registerRule(ParseRule(directive[11:], false, true), &pp.translates)
return true
}
return false
}
// extractIncludeFile gets the filename from #include "file" or #include <file>
func (pp *Preprocessor) extractIncludeFile(s string) string {
s = strings.TrimSpace(s)
if len(s) >= 2 {
if (s[0] == '"' && s[len(s)-1] == '"') || (s[0] == '<' && s[len(s)-1] == '>') {
return s[1 : len(s)-1]
}
}
return s // bare filename
}
// resolveInclude searches for an include file and returns its content.
func (pp *Preprocessor) resolveInclude(currentFile, inclFile string) string {
// Prevent circular inclusion
absKey := inclFile
if pp.included[absKey] {
return ""
}
pp.included[absKey] = true
defer func() { delete(pp.included, absKey) }()
// Search order:
// 1. Relative to current file
// 2. Include directories
// 3. Harbour include dir (for hbclass.ch etc.)
searchPaths := []string{}
// Relative to current file
if currentFile != "" {
dir := filepath.Dir(currentFile)
searchPaths = append(searchPaths, filepath.Join(dir, inclFile))
}
// Include directories
for _, dir := range pp.includeDirs {
searchPaths = append(searchPaths, filepath.Join(dir, inclFile))
}
// Try each path
for _, path := range searchPaths {
data, err := os.ReadFile(path)
if err == nil {
return string(data)
}
}
return ""
}
// hasTopLevelSemi reports whether s contains a `;` outside of any
// string literal or paren/bracket/brace nesting. Used by applyRules
// to decide whether a line carries multiple PRG statements.
func hasTopLevelSemi(s string) bool {
depth := 0
inStr := byte(0)
for i := 0; i < len(s); i++ {
c := s[i]
if inStr != 0 {
if c == inStr {
inStr = 0
}
continue
}
switch c {
case '"', '\'':
inStr = c
case '(', '[', '{':
depth++
case ')', ']', '}':
if depth > 0 {
depth--
}
case ';':
if depth == 0 {
return true
}
}
}
return false
}
// splitTopLevelSemi splits s on top-level `;`, respecting string
// literals and paren/bracket/brace nesting. Empty trailing splits
// (caused by a trailing `;`) are preserved so the caller can rejoin
// without losing the separator's significance for line-continuation.
func splitTopLevelSemi(s string) []string {
var parts []string
depth := 0
inStr := byte(0)
start := 0
for i := 0; i < len(s); i++ {
c := s[i]
if inStr != 0 {
if c == inStr {
inStr = 0
}
continue
}
switch c {
case '"', '\'':
inStr = c
case '(', '[', '{':
depth++
case ')', ']', '}':
if depth > 0 {
depth--
}
case ';':
if depth == 0 {
parts = append(parts, s[start:i])
start = i + 1
}
}
}
parts = append(parts, s[start:])
return parts
}
// applyRules applies #command and #translate rules to a line.
// #command rules are tried first (they match complete statements).
// #translate rules are tried on any part of a line.
//
// `;`-separated statements share a line in PRG (`dbCommit(); CLOSE
// ALL`); each sub-statement is matched against the rule list
// independently. Without this, only the first statement on the line
// would have rules applied, and subsequent ones would reach the
// parser unrewritten — `CLOSE ALL` after a semicolon used to fall
// through to the parser as IDENT tokens, blowing up at runtime
// when "CLOSE" tried to dispatch as a function name.
func (pp *Preprocessor) applyRules(line string) string {
trimmed := strings.TrimSpace(line)
if trimmed == "" || strings.HasPrefix(trimmed, "//") {
return line
}
// Multi-statement line: split on top-level `;` (paren / string
// balanced), apply rules to each segment, rejoin.
if hasTopLevelSemi(trimmed) {
parts := splitTopLevelSemi(line)
if len(parts) > 1 {
out := make([]string, len(parts))
for i, p := range parts {
out[i] = pp.applyRules(p)
}
return strings.Join(out, ";")
}
}
// Try #command rules (match from start of line)
for _, rule := range pp.commands {
if result, ok := rule.MatchLine(trimmed); ok {
// Preserve leading whitespace
indent := line[:len(line)-len(strings.TrimLeft(line, " \t"))]
return indent + result
}
}
// Try #translate rules (can match substrings)
for _, rule := range pp.translates {
if result, ok := rule.MatchLine(trimmed); ok {
indent := line[:len(line)-len(strings.TrimLeft(line, " \t"))]
return indent + result
}
}
return line
}
// stripBlockComments removes /* ... */ comments from a line.
// If a /* is found without closing */, sets inBlock to true.
func stripBlockComments(line string, inBlock *bool) string {
var out strings.Builder
i := 0
inStr := byte(0)
for i < len(line) {
// Track string literals
if inStr == 0 && (line[i] == '"' || line[i] == '\'') {
inStr = line[i]
out.WriteByte(line[i])
i++
continue
}
if inStr != 0 {
if line[i] == inStr {
inStr = 0
}
out.WriteByte(line[i])
i++
continue
}
// Block comment start
if i+1 < len(line) && line[i] == '/' && line[i+1] == '*' {
// Find closing */
end := strings.Index(line[i+2:], "*/")
if end >= 0 {
i = i + 2 + end + 2 // skip past */
out.WriteByte(' ') // replace comment with space
} else {
*inBlock = true
return out.String() // rest of line is comment
}
continue
}
out.WriteByte(line[i])
i++
}
return out.String()
}
// applyDefines substitutes #define macros in a line.
// Simple word-boundary replacement (not full macro expansion).
func (pp *Preprocessor) applyDefines(line string) string {
for name, value := range pp.defines {
if value == "" {
continue // flag-only define, no substitution
}
// Simple word replacement (not inside strings)
line = replaceWord(line, name, value)
}
return line
}
// replaceWord replaces whole-word occurrences of old with new,
// avoiding replacements inside string literals.
func replaceWord(line, old, new string) string {
if !strings.Contains(line, old) {
return line
}
var result strings.Builder
inString := byte(0)
i := 0
for i < len(line) {
// Track string literals
if inString == 0 && (line[i] == '"' || line[i] == '\'') {
inString = line[i]
result.WriteByte(line[i])
i++
continue
}
if inString != 0 && line[i] == inString {
inString = 0
result.WriteByte(line[i])
i++
continue
}
if inString != 0 {
result.WriteByte(line[i])
i++
continue
}
// Check for word match
if i+len(old) <= len(line) && line[i:i+len(old)] == old {
// Check word boundaries
before := i == 0 || !isWordChar(line[i-1])
after := i+len(old) >= len(line) || !isWordChar(line[i+len(old)])
if before && after {
result.WriteString(new)
i += len(old)
continue
}
}
result.WriteByte(line[i])
i++
}
return result.String()
}
func isWordChar(c byte) bool {
return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') || c == '_'
}
// looksLikeInlineC heuristically detects Harbour-style inline C inside
// a #pragma BEGINDUMP block. Any ONE strong signal triggers detection
// so the user sees an early, readable error rather than an obscure Go
// syntax complaint far downstream.
//
// Signals (any match):
// - `HB_FUNC(` / `HB_FUNC_STATIC(` / `HB_FUNC_TRANSLATE(` — Harbour's C FFI macro
// - `hb_ret*(` / `hb_param*(` / `hb_stor*(` / `hb_itemNew(` — Harbour C API
// - `#include <` or `#include "` — C preprocessor include
// - `#define <ident>(` followed by typed arg list — C-style macro
// - bare `int main(` / `void main(` — C entry point
// - `typedef ` / `struct ` at line start — C declarations
//
// Go programs can use `import`, `package`, `func`, `var`, `:=` — none
// of which overlap with these C signatures, so false positives are
// unlikely.
func looksLikeInlineC(body string) bool {
// Quick-reject: empty body.
trimmed := strings.TrimSpace(body)
if trimmed == "" {
return false
}
for _, line := range strings.Split(body, "\n") {
l := strings.TrimSpace(line)
// #include <stdio.h> / "hbapi.h" — unambiguous C preprocessor.
// Go doesn't use #include at all.
if strings.HasPrefix(l, "#include <") || strings.HasPrefix(l, `#include "`) {
return true
}
// Function-like #define is C-only — Go uses const / generics.
// `#define FOO(x) ...`
if strings.HasPrefix(l, "#define ") {
// Find the name and check for `(` immediately after with
// no space (function-like macro signature).
rest := strings.TrimSpace(l[8:])
if i := strings.IndexAny(rest, " \t("); i > 0 && i < len(rest) && rest[i] == '(' {
return true
}
}
// Bare `HB_FUNC( NAME )` with an unquoted identifier is the
// Harbour C FFI macro. The Go-side counterpart is
// `hbrt.HB_FUNC("NAME", fn)` — lowercase package prefix and a
// quoted string. Match the C form strictly.
if strings.HasPrefix(l, "HB_FUNC(") ||
strings.HasPrefix(l, "HB_FUNC_STATIC(") ||
strings.HasPrefix(l, "HB_FUNC_TRANSLATE(") {
return true
}
// `extern "C"` — C / C++ linkage block, never Go.
if strings.HasPrefix(l, `extern "C"`) {
return true
}
// C declarations at line start that have no Go analogue.
if strings.HasPrefix(l, "typedef ") || strings.HasPrefix(l, "struct ") ||
strings.HasPrefix(l, "int main(") || strings.HasPrefix(l, "void main(") {
return true
}
// C return-type declarations: `int name(`, `char *name(`, etc.
// Matching exactly `<C-type> <ident>(` keeps us off Go's
// `func name(` (which starts with `func`, not a type word)
// and Go variable declarations (which use `:=` or `var`).
if isCReturnTypeDecl(l) {
return true
}
// hb_ret*(...) helpers — Harbour's C-side return setters.
// hb_retc / hb_retni / hb_retnl / hb_retd / hb_retl / hb_retptr
if strings.HasPrefix(l, "hb_ret") {
rest := l[6:]
if i := strings.IndexByte(rest, '('); i >= 0 {
name := rest[:i]
if name != "" && allLetters(name) {
return true
}
}
}
}
return false
}
// isCReturnTypeDecl reports whether the line opens a C function
// declaration like `int foo(` / `static char* bar(`. We match a
// short prefix list of C-only types so a Go declaration like
// `func name() int { ... }` doesn't trip this.
func isCReturnTypeDecl(l string) bool {
cTypePrefixes := []string{
"int ", "void ", "char ", "long ", "short ", "double ", "float ",
"unsigned ", "signed ", "size_t ", "ssize_t ", "uint",
"static int ", "static void ", "static char ", "static long ",
}
for _, p := range cTypePrefixes {
if strings.HasPrefix(l, p) {
rest := strings.TrimLeft(l[len(p):], " \t*")
// rest should now start with an identifier followed by `(`.
if i := strings.IndexByte(rest, '('); i > 0 && i < 50 {
name := rest[:i]
if allIdentChars(name) {
return true
}
}
}
}
return false
}
func allLetters(s string) bool {
for _, c := range s {
if !((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')) {
return false
}
}
return s != ""
}
func allIdentChars(s string) bool {
for i, c := range s {
if c == '_' || (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') {
continue
}
if i > 0 && c >= '0' && c <= '9' {
continue
}
return false
}
return s != ""
}