Three silent-miscompile fixes in the preprocessor that were
masking real bugs in Harbour-style PRG.
1. Brace tokenizer (compiler/pp/command.go)
`{` and `}` now tokenize as standalone separator tokens. The
matcher previously only split on `,()[]"'` etc., so a codeblock
literal `{|| ... }` in a macro argument became the tokens `{||`,
`""`, `}`. The capture-depth tracker only matched exact `{`/`}`,
so `{||` was invisible as an opener while the standalone `}`
wrongly decremented depth — `TEST_LINE( o:VarPut({|| "" }) )`
truncated mid-argument and the parser later choked at the inner
`}` with `expected ), got } "}"`.
Fix: add `{` and `}` to tokenizeLine's separator set. Now
`{|| ... }` lexes as `{`, `||`, `""`, `}` and balances cleanly.
2. ;-continuation join for non-`#` lines (compiler/pp/pp.go)
The existing line-joiner only collapsed trailing `;` continuations
on `#`-prefixed directives. Plain source code using the same
convention — e.g. Harbour's TEST macro:
TEST t004 STATIC s_once := NIL, S_C ;
INIT hb_threadOnce( @s_once, {|| ... } ) ;
CODE x := S_C
was processed one physical line at a time, so the TEST pattern
never matched the full logical statement. The first row passed
through unrewritten, fell through to the parser as an expression,
and gengo silently absorbed it as part of the *previous*
function's body. Six TEST macros' STATIC declarations all ended
up tagged with t003's function name, producing duplicate
`static_T003_S_ONCE` decls and a Go compile failure.
Fix: add the same trailing-`;` join logic to user code, with
blank-line fillers inserted post-join so source line numbers in
parser errors still align with the original file.
3. Block-comment-aware continuation join
Inline `/* ... */` at the end of a continuation row hid the
trailing `;` from the joiner's HasSuffix check. The fix calls
stripBlockComments on the next-line peek before testing for `;`,
so chains like
AAdd( aResult, { cChildBase, ;
aRefs[ "fk" ][ j ][ 1 ], ; /* child col */
aRefs[ "fk" ][ j ][ 3 ], ; /* parent col */
...
keep folding instead of stopping after one row and leaving a
dangling `,` at end of line.
Results
-------
Harbour-core compat sweep: 25/30 → 28/30 (remaining lnlenli1 +
keywords are //NOTEST stress files, intentionally unbalanced).
All 6 release gates green: go test ./..., FiveSql2 43/43,
Harbour compat 56/56, std.ch 17/17, FRB 7/7, examples 65/71.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
895 lines
26 KiB
Go
895 lines
26 KiB
Go
// Copyright (c) 2026 Charles KWON OhJun (charleskwonohjun@gmail.com)
|
|
// All rights reserved.
|
|
|
|
// Preprocessor for Five — handles #include, #define, #ifdef/#endif.
|
|
// Harbour: /mnt/d/harbour-core/src/pp/ppcore.c (6383 lines)
|
|
//
|
|
// Five PP is simplified but covers the essential directives:
|
|
// #include "file.ch" — file inclusion
|
|
// #define NAME VALUE — simple text substitution
|
|
// #undef NAME — remove definition
|
|
// #ifdef NAME / #ifndef NAME / #else / #endif — conditional compilation
|
|
// #pragma — compiler hints
|
|
//
|
|
// #command/#translate is supported via command.go (pattern matching + substitution).
|
|
// Five also handles CLASS syntax natively in the parser.
|
|
package pp
|
|
|
|
import (
|
|
_ "embed"
|
|
"fmt"
|
|
"os"
|
|
"path/filepath"
|
|
"strings"
|
|
)
|
|
|
|
// embeddedStdCh is include/std.ch baked into the compiler binary so
|
|
// xBase commands like ERASE, RENAME, COMMIT, LOCATE, ... reach the
|
|
// parser already rewritten as plain function calls. Equivalent to
|
|
// Harbour's auto-included std.ch.
|
|
//
|
|
//go:embed std.ch
|
|
var embeddedStdCh string
|
|
|
|
// Preprocessor processes source code before lexing.
|
|
type Preprocessor struct {
|
|
defines map[string]string // #define name → value
|
|
includeDirs []string // search paths for #include
|
|
included map[string]bool // prevent circular inclusion
|
|
commands []*Rule // #command rules
|
|
translates []*Rule // #translate rules
|
|
errors []string
|
|
GoDumps []string // collected #pragma BEGINDUMP Go code blocks
|
|
}
|
|
|
|
// New creates a new Preprocessor.
|
|
func New() *Preprocessor {
|
|
pp := &Preprocessor{
|
|
defines: make(map[string]string),
|
|
included: make(map[string]bool),
|
|
}
|
|
pp.addStdRules()
|
|
return pp
|
|
}
|
|
|
|
// addStdRules registers built-in #command rules from the embedded
|
|
// std.ch file. processLines walks the directives and stores #command
|
|
// entries in pp.commands as a side effect; we discard its output.
|
|
//
|
|
// Anything not safely expressible as a #command (e.g. parser-handled
|
|
// constructs like @ SAY/GET, READ, TRY/CATCH, WITH TIMEOUT) is left
|
|
// to the parser.
|
|
func (pp *Preprocessor) addStdRules() {
|
|
pp.processLines("std.ch", embeddedStdCh, 0)
|
|
}
|
|
|
|
// AddIncludeDir adds a directory to search for #include files.
|
|
func (pp *Preprocessor) AddIncludeDir(dir string) {
|
|
pp.includeDirs = append(pp.includeDirs, dir)
|
|
}
|
|
|
|
// Define adds a #define.
|
|
func (pp *Preprocessor) Define(name, value string) {
|
|
pp.defines[name] = value
|
|
}
|
|
|
|
// Process preprocesses the source code, resolving #include and #define.
|
|
func (pp *Preprocessor) Process(filename, source string) (string, []string) {
|
|
pp.errors = nil
|
|
result := pp.processLines(filename, source, 0)
|
|
return result, pp.errors
|
|
}
|
|
|
|
const maxIncludeDepth = 20
|
|
|
|
func (pp *Preprocessor) processLines(filename, source string, depth int) string {
|
|
if depth > maxIncludeDepth {
|
|
pp.errors = append(pp.errors, fmt.Sprintf("%s: #include depth exceeded (max %d)", filename, maxIncludeDepth))
|
|
return source
|
|
}
|
|
|
|
lines := strings.Split(source, "\n")
|
|
var result []string
|
|
var ifStack []bool // true = active section, false = skipping
|
|
active := true
|
|
inBlockComment := false // track multi-line /* */ comments
|
|
inPragmaDump := false // track #pragma BEGINDUMP ... ENDDUMP
|
|
dumpStartLine := 0 // 1-based line where BEGINDUMP appeared
|
|
var dumpLines []string // accumulate Go code lines
|
|
|
|
for i := 0; i < len(lines); i++ {
|
|
line := lines[i]
|
|
// `#command`/`#translate` directives that end with a trailing `;`
|
|
// continue on the next physical line — this is how harbour-core
|
|
// formats its std.ch rules. Strip exactly one trailing `;` per
|
|
// iteration so Harbour's `;;` convention ("literal `;` plus
|
|
// continuation") survives: the inner `;` ends up as part of the
|
|
// joined directive, the outer one drives the continuation.
|
|
// Only `#`-directives participate; user code uses `;` differently.
|
|
if t := strings.TrimSpace(line); strings.HasPrefix(t, "#") {
|
|
for i+1 < len(lines) {
|
|
trimmed := strings.TrimRight(line, " \t")
|
|
if !strings.HasSuffix(trimmed, ";") {
|
|
break
|
|
}
|
|
line = strings.TrimSuffix(trimmed, ";") + " " + strings.TrimSpace(lines[i+1])
|
|
i++
|
|
}
|
|
}
|
|
// Handle #pragma BEGINDUMP ... ENDDUMP (inline Go code blocks)
|
|
if inPragmaDump {
|
|
trimCheck := strings.TrimSpace(line)
|
|
if strings.HasPrefix(trimCheck, "#") {
|
|
dir := strings.TrimSpace(strings.TrimPrefix(trimCheck, "#"))
|
|
if strings.HasPrefix(strings.ToUpper(dir), "PRAGMA ") && strings.Contains(strings.ToUpper(dir), "ENDDUMP") {
|
|
inPragmaDump = false
|
|
body := strings.Join(dumpLines, "\n")
|
|
// Five's inline dumps are Go, not C. Harbour's own
|
|
// #pragma BEGINDUMP convention is C (hb_ret*, HB_FUNC,
|
|
// #include <stdio.h> etc.), so `.prg` files ported
|
|
// from Harbour will attempt to shove C through Five's
|
|
// Go-emit pipeline and fail with cryptic errors like
|
|
// "invalid character U+0023 '#'". Detect the C shape
|
|
// and report a clear, actionable error up front.
|
|
if looksLikeInlineC(body) {
|
|
pp.errors = append(pp.errors, fmt.Sprintf(
|
|
"%s:%d: #pragma BEGINDUMP contains C code — Five accepts inline Go only. Port the block to Go (or use an RTL function), then wrap in #pragma BEGINDUMP ... #pragma ENDDUMP.",
|
|
filename, dumpStartLine))
|
|
// Emit a syntactically invalid line so the parser
|
|
// also fails at the expected position rather than
|
|
// the build silently continuing.
|
|
result = append(result, "__FIVE_INLINE_C_ERROR__")
|
|
dumpLines = nil
|
|
continue
|
|
}
|
|
pp.GoDumps = append(pp.GoDumps, body)
|
|
dumpLines = nil
|
|
result = append(result, fmt.Sprintf("FIVE_GODUMP__ %d", len(pp.GoDumps)-1))
|
|
continue
|
|
}
|
|
}
|
|
dumpLines = append(dumpLines, line)
|
|
result = append(result, "") // blank out for line counting
|
|
continue
|
|
}
|
|
trimmed := strings.TrimSpace(line)
|
|
|
|
// Handle multi-line block comments
|
|
if inBlockComment {
|
|
if idx := strings.Index(line, "*/"); idx >= 0 {
|
|
inBlockComment = false
|
|
line = line[idx+2:] // keep content after */
|
|
trimmed = strings.TrimSpace(line)
|
|
if trimmed == "" {
|
|
result = append(result, "")
|
|
continue
|
|
}
|
|
} else {
|
|
result = append(result, "") // blank out comment lines
|
|
continue
|
|
}
|
|
}
|
|
// Strip block comments within a single line and detect opening /*
|
|
line = stripBlockComments(line, &inBlockComment)
|
|
trimmed = strings.TrimSpace(line)
|
|
|
|
// Check if in active section
|
|
if len(ifStack) > 0 {
|
|
active = ifStack[len(ifStack)-1]
|
|
} else {
|
|
active = true
|
|
}
|
|
|
|
// Preprocessor directives (always processed regardless of active state)
|
|
if strings.HasPrefix(trimmed, "#") {
|
|
directive := strings.TrimPrefix(trimmed, "#")
|
|
directive = strings.TrimSpace(directive)
|
|
|
|
// Detect #pragma BEGINDUMP
|
|
upperDir := strings.ToUpper(directive)
|
|
if strings.HasPrefix(upperDir, "PRAGMA ") && strings.Contains(upperDir, "BEGINDUMP") {
|
|
inPragmaDump = true
|
|
dumpStartLine = i + 1 // 1-based for error reporting
|
|
dumpLines = nil
|
|
result = append(result, "")
|
|
continue
|
|
}
|
|
|
|
if pp.handleConditional(directive, &ifStack, active) {
|
|
continue
|
|
}
|
|
|
|
if !active {
|
|
continue // skip non-conditional directives in inactive sections
|
|
}
|
|
|
|
if pp.handleDirective(filename, directive, depth, &result, i+1) {
|
|
continue
|
|
}
|
|
}
|
|
|
|
if !active {
|
|
continue // skip lines in inactive #ifdef sections
|
|
}
|
|
|
|
// `;`-continuation in user code. Join physical lines ending in
|
|
// a top-level `;` (paren/string-balanced) so multi-line
|
|
// `#command` invocations like
|
|
// TEST t004 STATIC s_once := NIL ;
|
|
// INIT ... ;
|
|
// CODE x := S_C
|
|
// match the rule pattern as a single logical line. Without this
|
|
// the pattern only sees the first physical line, fails to match,
|
|
// and the residual `TEST t004 STATIC ...` falls through to the
|
|
// parser as a bare expression — silently merged into the
|
|
// previous function's body, producing duplicate static decls
|
|
// tagged with the wrong function name. Insert blank fillers
|
|
// for each consumed line so post-PP source line numbers still
|
|
// align with the original file for error reporting.
|
|
consumedFiller := 0
|
|
for i+1 < len(lines) {
|
|
t := stripTrailingLineComment(strings.TrimRight(line, " \t"))
|
|
t = strings.TrimRight(t, " \t")
|
|
if !strings.HasSuffix(t, ";") {
|
|
break
|
|
}
|
|
// Strip block comments from the next line *the same way*
|
|
// the main loop will. Without this, an inline `/* ... */`
|
|
// at the end of a continuation row hides the trailing
|
|
// `;` from our HasSuffix check below — the joined chain
|
|
// truncates after just one row, leaving a dangling comma
|
|
// at end of line that the parser later mis-reports.
|
|
rawNext := lines[i+1]
|
|
strippedNext := stripBlockComments(rawNext, &inBlockComment)
|
|
nextTrim := strings.TrimSpace(strippedNext)
|
|
if strings.HasPrefix(nextTrim, "#") {
|
|
inBlockComment = false // not actually consumed
|
|
break
|
|
}
|
|
// Don't fold across an unterminated `/*` — the rest of
|
|
// the file would be treated as code by the join.
|
|
if inBlockComment {
|
|
inBlockComment = false
|
|
break
|
|
}
|
|
line = strings.TrimSuffix(t, ";") + " " + nextTrim
|
|
i++
|
|
consumedFiller++
|
|
}
|
|
|
|
// Apply #command/#translate rules
|
|
if len(pp.commands) > 0 || len(pp.translates) > 0 {
|
|
line = pp.applyRules(line)
|
|
}
|
|
|
|
// Apply #define substitutions
|
|
if len(pp.defines) > 0 {
|
|
line = pp.applyDefines(line)
|
|
}
|
|
|
|
result = append(result, line)
|
|
for k := 0; k < consumedFiller; k++ {
|
|
result = append(result, "")
|
|
}
|
|
}
|
|
|
|
if len(ifStack) > 0 {
|
|
pp.errors = append(pp.errors, fmt.Sprintf("%s: unterminated #ifdef/#ifndef", filename))
|
|
}
|
|
|
|
return strings.Join(result, "\n")
|
|
}
|
|
|
|
// handleConditional processes #ifdef, #ifndef, #else, #endif.
|
|
// Returns true if the line was a conditional directive.
|
|
func (pp *Preprocessor) handleConditional(directive string, ifStack *[]bool, active bool) bool {
|
|
upper := strings.ToUpper(directive)
|
|
|
|
if strings.HasPrefix(upper, "IFDEF ") {
|
|
name := strings.TrimSpace(directive[6:])
|
|
_, defined := pp.defines[name]
|
|
*ifStack = append(*ifStack, defined && active)
|
|
return true
|
|
}
|
|
|
|
if strings.HasPrefix(upper, "IFNDEF ") {
|
|
name := strings.TrimSpace(directive[7:])
|
|
_, defined := pp.defines[name]
|
|
*ifStack = append(*ifStack, !defined && active)
|
|
return true
|
|
}
|
|
|
|
// #if expr — simplified: support #if 0 (always false), #if 1 (always true),
|
|
// and #if __pragma(...) (treat as false for compatibility)
|
|
if strings.HasPrefix(upper, "IF ") || upper == "IF" {
|
|
rest := strings.TrimSpace(directive[2:])
|
|
val := false
|
|
if rest == "1" || rest == ".T." {
|
|
val = true
|
|
} else if rest == "0" || rest == ".F." {
|
|
val = false
|
|
} else {
|
|
// Unknown expression — default to false (conservative)
|
|
val = false
|
|
}
|
|
*ifStack = append(*ifStack, val && active)
|
|
return true
|
|
}
|
|
|
|
// #else — may have trailing comment
|
|
if upper == "ELSE" || strings.HasPrefix(upper, "ELSE ") || strings.HasPrefix(upper, "ELSE\t") {
|
|
if len(*ifStack) > 0 {
|
|
// Flip the top of stack (only if parent was active)
|
|
parentActive := true
|
|
if len(*ifStack) > 1 {
|
|
parentActive = (*ifStack)[len(*ifStack)-2]
|
|
}
|
|
(*ifStack)[len(*ifStack)-1] = !(*ifStack)[len(*ifStack)-1] && parentActive
|
|
}
|
|
return true
|
|
}
|
|
|
|
// #endif — may have trailing comment: #endif /* COMMENT */
|
|
stripped := strings.TrimSpace(upper)
|
|
if idx := strings.Index(stripped, " "); idx > 0 {
|
|
stripped = stripped[:idx]
|
|
}
|
|
if idx := strings.Index(stripped, "\t"); idx > 0 {
|
|
stripped = stripped[:idx]
|
|
}
|
|
if stripped == "ENDIF" {
|
|
if len(*ifStack) > 0 {
|
|
*ifStack = (*ifStack)[:len(*ifStack)-1]
|
|
}
|
|
return true
|
|
}
|
|
|
|
return false
|
|
}
|
|
|
|
// handleDirective processes non-conditional directives.
|
|
func (pp *Preprocessor) handleDirective(filename, directive string, depth int, result *[]string, lineNo int) bool {
|
|
upper := strings.ToUpper(directive)
|
|
|
|
// #include "file" or #include <file>
|
|
if strings.HasPrefix(upper, "INCLUDE ") {
|
|
rest := strings.TrimSpace(directive[8:])
|
|
inclFile := pp.extractIncludeFile(rest)
|
|
if inclFile == "" {
|
|
pp.errors = append(pp.errors, fmt.Sprintf("%s:%d: invalid #include", filename, lineNo))
|
|
return true
|
|
}
|
|
|
|
content := pp.resolveInclude(filename, inclFile)
|
|
if content == "" {
|
|
// Not found — not an error for Five (some .ch files are optional)
|
|
*result = append(*result, fmt.Sprintf("// #include %q — not found (skipped)", inclFile))
|
|
return true
|
|
}
|
|
|
|
// Process included content recursively
|
|
processed := pp.processLines(inclFile, content, depth+1)
|
|
*result = append(*result, strings.Split(processed, "\n")...)
|
|
return true
|
|
}
|
|
|
|
// #define NAME [VALUE]
|
|
if strings.HasPrefix(upper, "DEFINE ") {
|
|
rest := strings.TrimSpace(directive[7:])
|
|
// Detect function-like macro: #define NAME( params ) body
|
|
// For now, skip these (don't register as simple text substitution)
|
|
if idx := strings.IndexByte(rest, '('); idx > 0 && idx < strings.IndexAny(rest+" ", " \t") {
|
|
// Function-like macro — not yet supported, skip
|
|
return true
|
|
}
|
|
parts := strings.SplitN(rest, " ", 2)
|
|
name := parts[0]
|
|
value := ""
|
|
if len(parts) > 1 {
|
|
value = strings.TrimSpace(parts[1])
|
|
}
|
|
// Strip trailing // comment and /* */ comment from value
|
|
if idx := strings.Index(value, "//"); idx >= 0 {
|
|
// Make sure // is not inside a string literal
|
|
inStr := false
|
|
for i := 0; i < idx; i++ {
|
|
if value[i] == '"' || value[i] == '\'' {
|
|
inStr = !inStr
|
|
}
|
|
}
|
|
if !inStr {
|
|
value = strings.TrimSpace(value[:idx])
|
|
}
|
|
}
|
|
if idx := strings.Index(value, "/*"); idx >= 0 {
|
|
value = strings.TrimSpace(value[:idx])
|
|
}
|
|
pp.defines[name] = value
|
|
return true
|
|
}
|
|
|
|
// #undef NAME
|
|
if strings.HasPrefix(upper, "UNDEF ") {
|
|
name := strings.TrimSpace(directive[6:])
|
|
delete(pp.defines, name)
|
|
return true
|
|
}
|
|
|
|
// #pragma — just pass through as comment
|
|
if strings.HasPrefix(upper, "PRAGMA ") {
|
|
*result = append(*result, "// "+directive)
|
|
return true
|
|
}
|
|
// #warning, #error, #stdout — skip (emit as comment)
|
|
if strings.HasPrefix(upper, "WARNING") || strings.HasPrefix(upper, "ERROR") || strings.HasPrefix(upper, "STDOUT") {
|
|
*result = append(*result, "// #"+directive)
|
|
return true
|
|
}
|
|
|
|
// #command / #translate — parse and store rules. ParseRule now
|
|
// validates that result-template marker references resolve to a
|
|
// pattern marker; any unresolved name flows back as a warning
|
|
// surfaced via pp.errors with the directive's filename:line so
|
|
// the user can find the typo (e.g. case-sensitive `<For>` vs
|
|
// `<for>` in an #xcommand). Without surfacing, the broken
|
|
// expansion silently produced empty / mangled output at every
|
|
// call site.
|
|
registerRule := func(r *Rule, store *[]*Rule) {
|
|
if r == nil {
|
|
return
|
|
}
|
|
*store = append(*store, r)
|
|
for _, w := range r.Warnings {
|
|
pp.errors = append(pp.errors, fmt.Sprintf("%s:%d: #command: %s", filename, lineNo, w))
|
|
}
|
|
}
|
|
if strings.HasPrefix(upper, "COMMAND ") {
|
|
registerRule(ParseRule(directive[8:], true, false), &pp.commands)
|
|
return true
|
|
}
|
|
if strings.HasPrefix(upper, "TRANSLATE ") {
|
|
registerRule(ParseRule(directive[10:], false, false), &pp.translates)
|
|
return true
|
|
}
|
|
if strings.HasPrefix(upper, "XCOMMAND ") {
|
|
registerRule(ParseRule(directive[9:], true, true), &pp.commands)
|
|
return true
|
|
}
|
|
if strings.HasPrefix(upper, "XTRANSLATE ") {
|
|
registerRule(ParseRule(directive[11:], false, true), &pp.translates)
|
|
return true
|
|
}
|
|
|
|
return false
|
|
}
|
|
|
|
// extractIncludeFile gets the filename from #include "file" or #include <file>
|
|
func (pp *Preprocessor) extractIncludeFile(s string) string {
|
|
s = strings.TrimSpace(s)
|
|
if len(s) >= 2 {
|
|
if (s[0] == '"' && s[len(s)-1] == '"') || (s[0] == '<' && s[len(s)-1] == '>') {
|
|
return s[1 : len(s)-1]
|
|
}
|
|
}
|
|
return s // bare filename
|
|
}
|
|
|
|
// resolveInclude searches for an include file and returns its content.
|
|
func (pp *Preprocessor) resolveInclude(currentFile, inclFile string) string {
|
|
// Prevent circular inclusion
|
|
absKey := inclFile
|
|
if pp.included[absKey] {
|
|
return ""
|
|
}
|
|
pp.included[absKey] = true
|
|
defer func() { delete(pp.included, absKey) }()
|
|
|
|
// Search order:
|
|
// 1. Relative to current file
|
|
// 2. Include directories
|
|
// 3. Harbour include dir (for hbclass.ch etc.)
|
|
|
|
searchPaths := []string{}
|
|
|
|
// Relative to current file
|
|
if currentFile != "" {
|
|
dir := filepath.Dir(currentFile)
|
|
searchPaths = append(searchPaths, filepath.Join(dir, inclFile))
|
|
}
|
|
|
|
// Include directories
|
|
for _, dir := range pp.includeDirs {
|
|
searchPaths = append(searchPaths, filepath.Join(dir, inclFile))
|
|
}
|
|
|
|
// Try each path
|
|
for _, path := range searchPaths {
|
|
data, err := os.ReadFile(path)
|
|
if err == nil {
|
|
return string(data)
|
|
}
|
|
}
|
|
|
|
return ""
|
|
}
|
|
|
|
// stripTrailingLineComment removes a trailing `// ...` from s, but only
|
|
// if the `//` sits outside any string literal. Block comments are
|
|
// already handled by stripBlockComments before continuation joining.
|
|
func stripTrailingLineComment(s string) string {
|
|
inStr := byte(0)
|
|
for i := 0; i < len(s); i++ {
|
|
c := s[i]
|
|
if inStr != 0 {
|
|
if c == inStr {
|
|
inStr = 0
|
|
}
|
|
continue
|
|
}
|
|
switch c {
|
|
case '"', '\'':
|
|
inStr = c
|
|
case '/':
|
|
if i+1 < len(s) && s[i+1] == '/' {
|
|
return s[:i]
|
|
}
|
|
}
|
|
}
|
|
return s
|
|
}
|
|
|
|
// hasTopLevelSemi reports whether s contains a `;` outside of any
|
|
// string literal or paren/bracket/brace nesting. Used by applyRules
|
|
// to decide whether a line carries multiple PRG statements.
|
|
func hasTopLevelSemi(s string) bool {
|
|
depth := 0
|
|
inStr := byte(0)
|
|
for i := 0; i < len(s); i++ {
|
|
c := s[i]
|
|
if inStr != 0 {
|
|
if c == inStr {
|
|
inStr = 0
|
|
}
|
|
continue
|
|
}
|
|
switch c {
|
|
case '"', '\'':
|
|
inStr = c
|
|
case '(', '[', '{':
|
|
depth++
|
|
case ')', ']', '}':
|
|
if depth > 0 {
|
|
depth--
|
|
}
|
|
case ';':
|
|
if depth == 0 {
|
|
return true
|
|
}
|
|
}
|
|
}
|
|
return false
|
|
}
|
|
|
|
// splitTopLevelSemi splits s on top-level `;`, respecting string
|
|
// literals and paren/bracket/brace nesting. Empty trailing splits
|
|
// (caused by a trailing `;`) are preserved so the caller can rejoin
|
|
// without losing the separator's significance for line-continuation.
|
|
func splitTopLevelSemi(s string) []string {
|
|
var parts []string
|
|
depth := 0
|
|
inStr := byte(0)
|
|
start := 0
|
|
for i := 0; i < len(s); i++ {
|
|
c := s[i]
|
|
if inStr != 0 {
|
|
if c == inStr {
|
|
inStr = 0
|
|
}
|
|
continue
|
|
}
|
|
switch c {
|
|
case '"', '\'':
|
|
inStr = c
|
|
case '(', '[', '{':
|
|
depth++
|
|
case ')', ']', '}':
|
|
if depth > 0 {
|
|
depth--
|
|
}
|
|
case ';':
|
|
if depth == 0 {
|
|
parts = append(parts, s[start:i])
|
|
start = i + 1
|
|
}
|
|
}
|
|
}
|
|
parts = append(parts, s[start:])
|
|
return parts
|
|
}
|
|
|
|
// applyRules applies #command and #translate rules to a line.
|
|
// #command rules are tried first (they match complete statements).
|
|
// #translate rules are tried on any part of a line.
|
|
//
|
|
// `;`-separated statements share a line in PRG (`dbCommit(); CLOSE
|
|
// ALL`); each sub-statement is matched against the rule list
|
|
// independently. Without this, only the first statement on the line
|
|
// would have rules applied, and subsequent ones would reach the
|
|
// parser unrewritten — `CLOSE ALL` after a semicolon used to fall
|
|
// through to the parser as IDENT tokens, blowing up at runtime
|
|
// when "CLOSE" tried to dispatch as a function name.
|
|
func (pp *Preprocessor) applyRules(line string) string {
|
|
trimmed := strings.TrimSpace(line)
|
|
if trimmed == "" || strings.HasPrefix(trimmed, "//") {
|
|
return line
|
|
}
|
|
|
|
// Multi-statement line: split on top-level `;` (paren / string
|
|
// balanced), apply rules to each segment, rejoin.
|
|
if hasTopLevelSemi(trimmed) {
|
|
parts := splitTopLevelSemi(line)
|
|
if len(parts) > 1 {
|
|
out := make([]string, len(parts))
|
|
for i, p := range parts {
|
|
out[i] = pp.applyRules(p)
|
|
}
|
|
return strings.Join(out, ";")
|
|
}
|
|
}
|
|
|
|
// Try #command rules (match from start of line)
|
|
for _, rule := range pp.commands {
|
|
if result, ok := rule.MatchLine(trimmed); ok {
|
|
// Preserve leading whitespace
|
|
indent := line[:len(line)-len(strings.TrimLeft(line, " \t"))]
|
|
return indent + result
|
|
}
|
|
}
|
|
|
|
// Try #translate rules (can match substrings)
|
|
for _, rule := range pp.translates {
|
|
if result, ok := rule.MatchLine(trimmed); ok {
|
|
indent := line[:len(line)-len(strings.TrimLeft(line, " \t"))]
|
|
return indent + result
|
|
}
|
|
}
|
|
|
|
return line
|
|
}
|
|
|
|
// stripBlockComments removes /* ... */ comments from a line.
|
|
// If a /* is found without closing */, sets inBlock to true.
|
|
func stripBlockComments(line string, inBlock *bool) string {
|
|
var out strings.Builder
|
|
i := 0
|
|
inStr := byte(0)
|
|
for i < len(line) {
|
|
// Track string literals
|
|
if inStr == 0 && (line[i] == '"' || line[i] == '\'') {
|
|
inStr = line[i]
|
|
out.WriteByte(line[i])
|
|
i++
|
|
continue
|
|
}
|
|
if inStr != 0 {
|
|
if line[i] == inStr {
|
|
inStr = 0
|
|
}
|
|
out.WriteByte(line[i])
|
|
i++
|
|
continue
|
|
}
|
|
// Block comment start
|
|
if i+1 < len(line) && line[i] == '/' && line[i+1] == '*' {
|
|
// Find closing */
|
|
end := strings.Index(line[i+2:], "*/")
|
|
if end >= 0 {
|
|
i = i + 2 + end + 2 // skip past */
|
|
out.WriteByte(' ') // replace comment with space
|
|
} else {
|
|
*inBlock = true
|
|
return out.String() // rest of line is comment
|
|
}
|
|
continue
|
|
}
|
|
out.WriteByte(line[i])
|
|
i++
|
|
}
|
|
return out.String()
|
|
}
|
|
|
|
// applyDefines substitutes #define macros in a line.
|
|
// Simple word-boundary replacement (not full macro expansion).
|
|
func (pp *Preprocessor) applyDefines(line string) string {
|
|
for name, value := range pp.defines {
|
|
if value == "" {
|
|
continue // flag-only define, no substitution
|
|
}
|
|
// Simple word replacement (not inside strings)
|
|
line = replaceWord(line, name, value)
|
|
}
|
|
return line
|
|
}
|
|
|
|
// replaceWord replaces whole-word occurrences of old with new,
|
|
// avoiding replacements inside string literals.
|
|
func replaceWord(line, old, new string) string {
|
|
if !strings.Contains(line, old) {
|
|
return line
|
|
}
|
|
|
|
var result strings.Builder
|
|
inString := byte(0)
|
|
i := 0
|
|
|
|
for i < len(line) {
|
|
// Track string literals
|
|
if inString == 0 && (line[i] == '"' || line[i] == '\'') {
|
|
inString = line[i]
|
|
result.WriteByte(line[i])
|
|
i++
|
|
continue
|
|
}
|
|
if inString != 0 && line[i] == inString {
|
|
inString = 0
|
|
result.WriteByte(line[i])
|
|
i++
|
|
continue
|
|
}
|
|
if inString != 0 {
|
|
result.WriteByte(line[i])
|
|
i++
|
|
continue
|
|
}
|
|
|
|
// Check for word match
|
|
if i+len(old) <= len(line) && line[i:i+len(old)] == old {
|
|
// Check word boundaries
|
|
before := i == 0 || !isWordChar(line[i-1])
|
|
after := i+len(old) >= len(line) || !isWordChar(line[i+len(old)])
|
|
if before && after {
|
|
result.WriteString(new)
|
|
i += len(old)
|
|
continue
|
|
}
|
|
}
|
|
|
|
result.WriteByte(line[i])
|
|
i++
|
|
}
|
|
|
|
return result.String()
|
|
}
|
|
|
|
func isWordChar(c byte) bool {
|
|
return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') || c == '_'
|
|
}
|
|
|
|
// looksLikeInlineC heuristically detects Harbour-style inline C inside
|
|
// a #pragma BEGINDUMP block. Any ONE strong signal triggers detection
|
|
// so the user sees an early, readable error rather than an obscure Go
|
|
// syntax complaint far downstream.
|
|
//
|
|
// Signals (any match):
|
|
// - `HB_FUNC(` / `HB_FUNC_STATIC(` / `HB_FUNC_TRANSLATE(` — Harbour's C FFI macro
|
|
// - `hb_ret*(` / `hb_param*(` / `hb_stor*(` / `hb_itemNew(` — Harbour C API
|
|
// - `#include <` or `#include "` — C preprocessor include
|
|
// - `#define <ident>(` followed by typed arg list — C-style macro
|
|
// - bare `int main(` / `void main(` — C entry point
|
|
// - `typedef ` / `struct ` at line start — C declarations
|
|
//
|
|
// Go programs can use `import`, `package`, `func`, `var`, `:=` — none
|
|
// of which overlap with these C signatures, so false positives are
|
|
// unlikely.
|
|
func looksLikeInlineC(body string) bool {
|
|
// Quick-reject: empty body.
|
|
trimmed := strings.TrimSpace(body)
|
|
if trimmed == "" {
|
|
return false
|
|
}
|
|
|
|
for _, line := range strings.Split(body, "\n") {
|
|
l := strings.TrimSpace(line)
|
|
// #include <stdio.h> / "hbapi.h" — unambiguous C preprocessor.
|
|
// Go doesn't use #include at all.
|
|
if strings.HasPrefix(l, "#include <") || strings.HasPrefix(l, `#include "`) {
|
|
return true
|
|
}
|
|
// Function-like #define is C-only — Go uses const / generics.
|
|
// `#define FOO(x) ...`
|
|
if strings.HasPrefix(l, "#define ") {
|
|
// Find the name and check for `(` immediately after with
|
|
// no space (function-like macro signature).
|
|
rest := strings.TrimSpace(l[8:])
|
|
if i := strings.IndexAny(rest, " \t("); i > 0 && i < len(rest) && rest[i] == '(' {
|
|
return true
|
|
}
|
|
}
|
|
// Bare `HB_FUNC( NAME )` with an unquoted identifier is the
|
|
// Harbour C FFI macro. The Go-side counterpart is
|
|
// `hbrt.HB_FUNC("NAME", fn)` — lowercase package prefix and a
|
|
// quoted string. Match the C form strictly.
|
|
if strings.HasPrefix(l, "HB_FUNC(") ||
|
|
strings.HasPrefix(l, "HB_FUNC_STATIC(") ||
|
|
strings.HasPrefix(l, "HB_FUNC_TRANSLATE(") {
|
|
return true
|
|
}
|
|
// `extern "C"` — C / C++ linkage block, never Go.
|
|
if strings.HasPrefix(l, `extern "C"`) {
|
|
return true
|
|
}
|
|
// C declarations at line start that have no Go analogue.
|
|
if strings.HasPrefix(l, "typedef ") || strings.HasPrefix(l, "struct ") ||
|
|
strings.HasPrefix(l, "int main(") || strings.HasPrefix(l, "void main(") {
|
|
return true
|
|
}
|
|
// C return-type declarations: `int name(`, `char *name(`, etc.
|
|
// Matching exactly `<C-type> <ident>(` keeps us off Go's
|
|
// `func name(` (which starts with `func`, not a type word)
|
|
// and Go variable declarations (which use `:=` or `var`).
|
|
if isCReturnTypeDecl(l) {
|
|
return true
|
|
}
|
|
// hb_ret*(...) helpers — Harbour's C-side return setters.
|
|
// hb_retc / hb_retni / hb_retnl / hb_retd / hb_retl / hb_retptr
|
|
if strings.HasPrefix(l, "hb_ret") {
|
|
rest := l[6:]
|
|
if i := strings.IndexByte(rest, '('); i >= 0 {
|
|
name := rest[:i]
|
|
if name != "" && allLetters(name) {
|
|
return true
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
return false
|
|
}
|
|
|
|
// isCReturnTypeDecl reports whether the line opens a C function
|
|
// declaration like `int foo(` / `static char* bar(`. We match a
|
|
// short prefix list of C-only types so a Go declaration like
|
|
// `func name() int { ... }` doesn't trip this.
|
|
func isCReturnTypeDecl(l string) bool {
|
|
cTypePrefixes := []string{
|
|
"int ", "void ", "char ", "long ", "short ", "double ", "float ",
|
|
"unsigned ", "signed ", "size_t ", "ssize_t ", "uint",
|
|
"static int ", "static void ", "static char ", "static long ",
|
|
}
|
|
for _, p := range cTypePrefixes {
|
|
if strings.HasPrefix(l, p) {
|
|
rest := strings.TrimLeft(l[len(p):], " \t*")
|
|
// rest should now start with an identifier followed by `(`.
|
|
if i := strings.IndexByte(rest, '('); i > 0 && i < 50 {
|
|
name := rest[:i]
|
|
if allIdentChars(name) {
|
|
return true
|
|
}
|
|
}
|
|
}
|
|
}
|
|
return false
|
|
}
|
|
|
|
func allLetters(s string) bool {
|
|
for _, c := range s {
|
|
if !((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')) {
|
|
return false
|
|
}
|
|
}
|
|
return s != ""
|
|
}
|
|
|
|
func allIdentChars(s string) bool {
|
|
for i, c := range s {
|
|
if c == '_' || (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') {
|
|
continue
|
|
}
|
|
if i > 0 && c >= '0' && c <= '9' {
|
|
continue
|
|
}
|
|
return false
|
|
}
|
|
return s != ""
|
|
}
|