Files
five/compiler/pp/pp.go
CharlesKWON 000500e034 fix(pp,parser,gengo): pre-release blocker round (Wave 1)
Six audit-driven blockers landed together because they're tangled:

  * MENU TO removed from std.ch — the rule expanded to a call to a
    nonexistent __MenuTo() RTL symbol, so any user code with `MENU
    TO choice` compiled clean and panicked at runtime. Behavior
    pre-this-round was a parser silent no-op, which is at least
    consistent. Restore that until @ PROMPT (the companion command)
    actually lands.

  * COUNT now requires `TO <var>`. The earlier `[TO <v>]` optional
    bracket was a Harbour-pattern transcription error: the result
    template references `<v>` unconditionally, so a bare `COUNT`
    expanded to ungrammatical ` := 0 ; dbEval(...)` and the
    PRG parser rejected it. Match Harbour's std.ch which makes TO
    mandatory.

  * UPDATE FROM ... REPLACE now requires `FROM`/`ON`/`REPLACE` all
    three. Same root cause as COUNT: the result template uses
    `<key>`, `<f1>`, `<x1>` unconditionally; missing any of them
    produced broken syntax. Tightened to fail loudly rather than
    silently mis-expand.

  * CLOSE <unknown_alias> no longer closes the *current* workarea.
    SelectByAlias was a silent no-op when the alias was missing,
    leaving WASaveAndSelectAlias to evaluate the inner DbCloseArea()
    against the originally-selected WA — a real data-loss footgun.
    SelectByAlias now returns bool; WASaveAndSelectAlias switches to
    the no-area sentinel (0) on miss so the inner expression's
    Current() returns nil and short-circuits.

  * SUM <x1>, <xN> TO <v1>, <vN> — multi-pair form supported.
    Required two pieces:

       1. matchSegment's regular-marker stop-boundary now combines
          outerTail literals AND the segment's repeat boundary so
          `[, <xN>]` doesn't let `<xN>` swallow past the next ','.

       2. **Five parser miscompiled comma-separated expressions in
          code blocks.** `{|| e1, e2, e3 }` kept only the last expr
          and threw away earlier ones at *AST level*, so all their
          side effects vanished. New SeqExpr AST node + emitter
          (emit each, pop intermediate results) + folding/walk
          updates fix the underlying bug, which also unbreaks any
          other block that relied on comma sequencing.

  * pp.go's `;` continuation joiner now strips exactly one trailing
    `;` per iteration, preserving Harbour's `;;` convention (literal
    `;` followed by a continuation marker). Without this the SUM
    rule's chained `<v1> :=[ <vN> :=] 0 ; ; dbEval(...)` collapsed
    to a missing statement separator.

  * parseExprStmt's xBase fallback switch is back in sync with
    parseIdentStmt — COPY/SORT/COUNT/SUM/AVERAGE/TOTAL/UPDATE/JOIN/
    DISPLAY/LIST removed (std.ch handles all of them now). Leaving
    them in the fallback masked typos as silent no-ops.

Gates green:
  go test ./...      : PASS
  FiveSql2 SQL:1999  : 43/43
  Harbour compat     : 56/56

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-01 07:45:20 +09:00

645 lines
19 KiB
Go

// Copyright (c) 2026 Charles KWON OhJun (charleskwonohjun@gmail.com)
// All rights reserved.
// Preprocessor for Five — handles #include, #define, #ifdef/#endif.
// Harbour: /mnt/d/harbour-core/src/pp/ppcore.c (6383 lines)
//
// Five PP is simplified but covers the essential directives:
// #include "file.ch" — file inclusion
// #define NAME VALUE — simple text substitution
// #undef NAME — remove definition
// #ifdef NAME / #ifndef NAME / #else / #endif — conditional compilation
// #pragma — compiler hints
//
// #command/#translate is supported via command.go (pattern matching + substitution).
// Five also handles CLASS syntax natively in the parser.
package pp
import (
_ "embed"
"fmt"
"os"
"path/filepath"
"strings"
)
// embeddedStdCh is include/std.ch baked into the compiler binary so
// xBase commands like ERASE, RENAME, COMMIT, LOCATE, ... reach the
// parser already rewritten as plain function calls. Equivalent to
// Harbour's auto-included std.ch.
//
//go:embed std.ch
var embeddedStdCh string
// Preprocessor processes source code before lexing.
type Preprocessor struct {
defines map[string]string // #define name → value
includeDirs []string // search paths for #include
included map[string]bool // prevent circular inclusion
commands []*Rule // #command rules
translates []*Rule // #translate rules
errors []string
GoDumps []string // collected #pragma BEGINDUMP Go code blocks
}
// New creates a new Preprocessor.
func New() *Preprocessor {
pp := &Preprocessor{
defines: make(map[string]string),
included: make(map[string]bool),
}
pp.addStdRules()
return pp
}
// addStdRules registers built-in #command rules from the embedded
// std.ch file. processLines walks the directives and stores #command
// entries in pp.commands as a side effect; we discard its output.
//
// Anything not safely expressible as a #command (e.g. parser-handled
// constructs like @ SAY/GET, READ, TRY/CATCH, WITH TIMEOUT) is left
// to the parser.
func (pp *Preprocessor) addStdRules() {
pp.processLines("std.ch", embeddedStdCh, 0)
}
// AddIncludeDir adds a directory to search for #include files.
func (pp *Preprocessor) AddIncludeDir(dir string) {
pp.includeDirs = append(pp.includeDirs, dir)
}
// Define adds a #define.
func (pp *Preprocessor) Define(name, value string) {
pp.defines[name] = value
}
// Process preprocesses the source code, resolving #include and #define.
func (pp *Preprocessor) Process(filename, source string) (string, []string) {
pp.errors = nil
result := pp.processLines(filename, source, 0)
return result, pp.errors
}
const maxIncludeDepth = 20
func (pp *Preprocessor) processLines(filename, source string, depth int) string {
if depth > maxIncludeDepth {
pp.errors = append(pp.errors, fmt.Sprintf("%s: #include depth exceeded (max %d)", filename, maxIncludeDepth))
return source
}
lines := strings.Split(source, "\n")
var result []string
var ifStack []bool // true = active section, false = skipping
active := true
inBlockComment := false // track multi-line /* */ comments
inPragmaDump := false // track #pragma BEGINDUMP ... ENDDUMP
dumpStartLine := 0 // 1-based line where BEGINDUMP appeared
var dumpLines []string // accumulate Go code lines
for i := 0; i < len(lines); i++ {
line := lines[i]
// `#command`/`#translate` directives that end with a trailing `;`
// continue on the next physical line — this is how harbour-core
// formats its std.ch rules. Strip exactly one trailing `;` per
// iteration so Harbour's `;;` convention ("literal `;` plus
// continuation") survives: the inner `;` ends up as part of the
// joined directive, the outer one drives the continuation.
// Only `#`-directives participate; user code uses `;` differently.
if t := strings.TrimSpace(line); strings.HasPrefix(t, "#") {
for i+1 < len(lines) {
trimmed := strings.TrimRight(line, " \t")
if !strings.HasSuffix(trimmed, ";") {
break
}
line = strings.TrimSuffix(trimmed, ";") + " " + strings.TrimSpace(lines[i+1])
i++
}
}
// Handle #pragma BEGINDUMP ... ENDDUMP (inline Go code blocks)
if inPragmaDump {
trimCheck := strings.TrimSpace(line)
if strings.HasPrefix(trimCheck, "#") {
dir := strings.TrimSpace(strings.TrimPrefix(trimCheck, "#"))
if strings.HasPrefix(strings.ToUpper(dir), "PRAGMA ") && strings.Contains(strings.ToUpper(dir), "ENDDUMP") {
inPragmaDump = false
body := strings.Join(dumpLines, "\n")
// Five's inline dumps are Go, not C. Harbour's own
// #pragma BEGINDUMP convention is C (hb_ret*, HB_FUNC,
// #include <stdio.h> etc.), so `.prg` files ported
// from Harbour will attempt to shove C through Five's
// Go-emit pipeline and fail with cryptic errors like
// "invalid character U+0023 '#'". Detect the C shape
// and report a clear, actionable error up front.
if looksLikeInlineC(body) {
pp.errors = append(pp.errors, fmt.Sprintf(
"%s:%d: #pragma BEGINDUMP contains C code — Five accepts inline Go only. Port the block to Go (or use an RTL function), then wrap in #pragma BEGINDUMP ... #pragma ENDDUMP.",
filename, dumpStartLine))
// Emit a syntactically invalid line so the parser
// also fails at the expected position rather than
// the build silently continuing.
result = append(result, "__FIVE_INLINE_C_ERROR__")
dumpLines = nil
continue
}
pp.GoDumps = append(pp.GoDumps, body)
dumpLines = nil
result = append(result, fmt.Sprintf("FIVE_GODUMP__ %d", len(pp.GoDumps)-1))
continue
}
}
dumpLines = append(dumpLines, line)
result = append(result, "") // blank out for line counting
continue
}
trimmed := strings.TrimSpace(line)
// Handle multi-line block comments
if inBlockComment {
if idx := strings.Index(line, "*/"); idx >= 0 {
inBlockComment = false
line = line[idx+2:] // keep content after */
trimmed = strings.TrimSpace(line)
if trimmed == "" {
result = append(result, "")
continue
}
} else {
result = append(result, "") // blank out comment lines
continue
}
}
// Strip block comments within a single line and detect opening /*
line = stripBlockComments(line, &inBlockComment)
trimmed = strings.TrimSpace(line)
// Check if in active section
if len(ifStack) > 0 {
active = ifStack[len(ifStack)-1]
} else {
active = true
}
// Preprocessor directives (always processed regardless of active state)
if strings.HasPrefix(trimmed, "#") {
directive := strings.TrimPrefix(trimmed, "#")
directive = strings.TrimSpace(directive)
// Detect #pragma BEGINDUMP
upperDir := strings.ToUpper(directive)
if strings.HasPrefix(upperDir, "PRAGMA ") && strings.Contains(upperDir, "BEGINDUMP") {
inPragmaDump = true
dumpStartLine = i + 1 // 1-based for error reporting
dumpLines = nil
result = append(result, "")
continue
}
if pp.handleConditional(directive, &ifStack, active) {
continue
}
if !active {
continue // skip non-conditional directives in inactive sections
}
if pp.handleDirective(filename, directive, depth, &result, i+1) {
continue
}
}
if !active {
continue // skip lines in inactive #ifdef sections
}
// Apply #command/#translate rules
if len(pp.commands) > 0 || len(pp.translates) > 0 {
line = pp.applyRules(line)
}
// Apply #define substitutions
if len(pp.defines) > 0 {
line = pp.applyDefines(line)
}
result = append(result, line)
}
if len(ifStack) > 0 {
pp.errors = append(pp.errors, fmt.Sprintf("%s: unterminated #ifdef/#ifndef", filename))
}
return strings.Join(result, "\n")
}
// handleConditional processes #ifdef, #ifndef, #else, #endif.
// Returns true if the line was a conditional directive.
func (pp *Preprocessor) handleConditional(directive string, ifStack *[]bool, active bool) bool {
upper := strings.ToUpper(directive)
if strings.HasPrefix(upper, "IFDEF ") {
name := strings.TrimSpace(directive[6:])
_, defined := pp.defines[name]
*ifStack = append(*ifStack, defined && active)
return true
}
if strings.HasPrefix(upper, "IFNDEF ") {
name := strings.TrimSpace(directive[7:])
_, defined := pp.defines[name]
*ifStack = append(*ifStack, !defined && active)
return true
}
// #if expr — simplified: support #if 0 (always false), #if 1 (always true),
// and #if __pragma(...) (treat as false for compatibility)
if strings.HasPrefix(upper, "IF ") || upper == "IF" {
rest := strings.TrimSpace(directive[2:])
val := false
if rest == "1" || rest == ".T." {
val = true
} else if rest == "0" || rest == ".F." {
val = false
} else {
// Unknown expression — default to false (conservative)
val = false
}
*ifStack = append(*ifStack, val && active)
return true
}
// #else — may have trailing comment
if upper == "ELSE" || strings.HasPrefix(upper, "ELSE ") || strings.HasPrefix(upper, "ELSE\t") {
if len(*ifStack) > 0 {
// Flip the top of stack (only if parent was active)
parentActive := true
if len(*ifStack) > 1 {
parentActive = (*ifStack)[len(*ifStack)-2]
}
(*ifStack)[len(*ifStack)-1] = !(*ifStack)[len(*ifStack)-1] && parentActive
}
return true
}
// #endif — may have trailing comment: #endif /* COMMENT */
stripped := strings.TrimSpace(upper)
if idx := strings.Index(stripped, " "); idx > 0 {
stripped = stripped[:idx]
}
if idx := strings.Index(stripped, "\t"); idx > 0 {
stripped = stripped[:idx]
}
if stripped == "ENDIF" {
if len(*ifStack) > 0 {
*ifStack = (*ifStack)[:len(*ifStack)-1]
}
return true
}
return false
}
// handleDirective processes non-conditional directives.
func (pp *Preprocessor) handleDirective(filename, directive string, depth int, result *[]string, lineNo int) bool {
upper := strings.ToUpper(directive)
// #include "file" or #include <file>
if strings.HasPrefix(upper, "INCLUDE ") {
rest := strings.TrimSpace(directive[8:])
inclFile := pp.extractIncludeFile(rest)
if inclFile == "" {
pp.errors = append(pp.errors, fmt.Sprintf("%s:%d: invalid #include", filename, lineNo))
return true
}
content := pp.resolveInclude(filename, inclFile)
if content == "" {
// Not found — not an error for Five (some .ch files are optional)
*result = append(*result, fmt.Sprintf("// #include %q — not found (skipped)", inclFile))
return true
}
// Process included content recursively
processed := pp.processLines(inclFile, content, depth+1)
*result = append(*result, strings.Split(processed, "\n")...)
return true
}
// #define NAME [VALUE]
if strings.HasPrefix(upper, "DEFINE ") {
rest := strings.TrimSpace(directive[7:])
// Detect function-like macro: #define NAME( params ) body
// For now, skip these (don't register as simple text substitution)
if idx := strings.IndexByte(rest, '('); idx > 0 && idx < strings.IndexAny(rest+" ", " \t") {
// Function-like macro — not yet supported, skip
return true
}
parts := strings.SplitN(rest, " ", 2)
name := parts[0]
value := ""
if len(parts) > 1 {
value = strings.TrimSpace(parts[1])
}
// Strip trailing // comment and /* */ comment from value
if idx := strings.Index(value, "//"); idx >= 0 {
// Make sure // is not inside a string literal
inStr := false
for i := 0; i < idx; i++ {
if value[i] == '"' || value[i] == '\'' {
inStr = !inStr
}
}
if !inStr {
value = strings.TrimSpace(value[:idx])
}
}
if idx := strings.Index(value, "/*"); idx >= 0 {
value = strings.TrimSpace(value[:idx])
}
pp.defines[name] = value
return true
}
// #undef NAME
if strings.HasPrefix(upper, "UNDEF ") {
name := strings.TrimSpace(directive[6:])
delete(pp.defines, name)
return true
}
// #pragma — just pass through as comment
if strings.HasPrefix(upper, "PRAGMA ") {
*result = append(*result, "// "+directive)
return true
}
// #warning, #error, #stdout — skip (emit as comment)
if strings.HasPrefix(upper, "WARNING") || strings.HasPrefix(upper, "ERROR") || strings.HasPrefix(upper, "STDOUT") {
*result = append(*result, "// #"+directive)
return true
}
// #command / #translate — parse and store rules
if strings.HasPrefix(upper, "COMMAND ") {
if rule := ParseRule(directive[8:], true, false); rule != nil {
pp.commands = append(pp.commands, rule)
}
return true
}
if strings.HasPrefix(upper, "TRANSLATE ") {
if rule := ParseRule(directive[10:], false, false); rule != nil {
pp.translates = append(pp.translates, rule)
}
return true
}
if strings.HasPrefix(upper, "XCOMMAND ") {
if rule := ParseRule(directive[9:], true, true); rule != nil {
pp.commands = append(pp.commands, rule)
}
return true
}
if strings.HasPrefix(upper, "XTRANSLATE ") {
if rule := ParseRule(directive[11:], false, true); rule != nil {
pp.translates = append(pp.translates, rule)
}
return true
}
return false
}
// extractIncludeFile gets the filename from #include "file" or #include <file>
func (pp *Preprocessor) extractIncludeFile(s string) string {
s = strings.TrimSpace(s)
if len(s) >= 2 {
if (s[0] == '"' && s[len(s)-1] == '"') || (s[0] == '<' && s[len(s)-1] == '>') {
return s[1 : len(s)-1]
}
}
return s // bare filename
}
// resolveInclude searches for an include file and returns its content.
func (pp *Preprocessor) resolveInclude(currentFile, inclFile string) string {
// Prevent circular inclusion
absKey := inclFile
if pp.included[absKey] {
return ""
}
pp.included[absKey] = true
defer func() { delete(pp.included, absKey) }()
// Search order:
// 1. Relative to current file
// 2. Include directories
// 3. Harbour include dir (for hbclass.ch etc.)
searchPaths := []string{}
// Relative to current file
if currentFile != "" {
dir := filepath.Dir(currentFile)
searchPaths = append(searchPaths, filepath.Join(dir, inclFile))
}
// Include directories
for _, dir := range pp.includeDirs {
searchPaths = append(searchPaths, filepath.Join(dir, inclFile))
}
// Try each path
for _, path := range searchPaths {
data, err := os.ReadFile(path)
if err == nil {
return string(data)
}
}
return ""
}
// applyRules applies #command and #translate rules to a line.
// #command rules are tried first (they match complete statements).
// #translate rules are tried on any part of a line.
func (pp *Preprocessor) applyRules(line string) string {
trimmed := strings.TrimSpace(line)
if trimmed == "" || strings.HasPrefix(trimmed, "//") {
return line
}
// Try #command rules (match from start of line)
for _, rule := range pp.commands {
if result, ok := rule.MatchLine(trimmed); ok {
// Preserve leading whitespace
indent := line[:len(line)-len(strings.TrimLeft(line, " \t"))]
return indent + result
}
}
// Try #translate rules (can match substrings)
for _, rule := range pp.translates {
if result, ok := rule.MatchLine(trimmed); ok {
indent := line[:len(line)-len(strings.TrimLeft(line, " \t"))]
return indent + result
}
}
return line
}
// stripBlockComments removes /* ... */ comments from a line.
// If a /* is found without closing */, sets inBlock to true.
func stripBlockComments(line string, inBlock *bool) string {
var out strings.Builder
i := 0
inStr := byte(0)
for i < len(line) {
// Track string literals
if inStr == 0 && (line[i] == '"' || line[i] == '\'') {
inStr = line[i]
out.WriteByte(line[i])
i++
continue
}
if inStr != 0 {
if line[i] == inStr {
inStr = 0
}
out.WriteByte(line[i])
i++
continue
}
// Block comment start
if i+1 < len(line) && line[i] == '/' && line[i+1] == '*' {
// Find closing */
end := strings.Index(line[i+2:], "*/")
if end >= 0 {
i = i + 2 + end + 2 // skip past */
out.WriteByte(' ') // replace comment with space
} else {
*inBlock = true
return out.String() // rest of line is comment
}
continue
}
out.WriteByte(line[i])
i++
}
return out.String()
}
// applyDefines substitutes #define macros in a line.
// Simple word-boundary replacement (not full macro expansion).
func (pp *Preprocessor) applyDefines(line string) string {
for name, value := range pp.defines {
if value == "" {
continue // flag-only define, no substitution
}
// Simple word replacement (not inside strings)
line = replaceWord(line, name, value)
}
return line
}
// replaceWord replaces whole-word occurrences of old with new,
// avoiding replacements inside string literals.
func replaceWord(line, old, new string) string {
if !strings.Contains(line, old) {
return line
}
var result strings.Builder
inString := byte(0)
i := 0
for i < len(line) {
// Track string literals
if inString == 0 && (line[i] == '"' || line[i] == '\'') {
inString = line[i]
result.WriteByte(line[i])
i++
continue
}
if inString != 0 && line[i] == inString {
inString = 0
result.WriteByte(line[i])
i++
continue
}
if inString != 0 {
result.WriteByte(line[i])
i++
continue
}
// Check for word match
if i+len(old) <= len(line) && line[i:i+len(old)] == old {
// Check word boundaries
before := i == 0 || !isWordChar(line[i-1])
after := i+len(old) >= len(line) || !isWordChar(line[i+len(old)])
if before && after {
result.WriteString(new)
i += len(old)
continue
}
}
result.WriteByte(line[i])
i++
}
return result.String()
}
func isWordChar(c byte) bool {
return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') || c == '_'
}
// looksLikeInlineC heuristically detects Harbour-style inline C inside
// a #pragma BEGINDUMP block. Any ONE strong signal triggers detection
// so the user sees an early, readable error rather than an obscure Go
// syntax complaint far downstream.
//
// Signals (any match):
// - `HB_FUNC(` / `HB_FUNC_STATIC(` / `HB_FUNC_TRANSLATE(` — Harbour's C FFI macro
// - `hb_ret*(` / `hb_param*(` / `hb_stor*(` / `hb_itemNew(` — Harbour C API
// - `#include <` or `#include "` — C preprocessor include
// - `#define <ident>(` followed by typed arg list — C-style macro
// - bare `int main(` / `void main(` — C entry point
// - `typedef ` / `struct ` at line start — C declarations
//
// Go programs can use `import`, `package`, `func`, `var`, `:=` — none
// of which overlap with these C signatures, so false positives are
// unlikely.
func looksLikeInlineC(body string) bool {
// Quick-reject: empty body.
trimmed := strings.TrimSpace(body)
if trimmed == "" {
return false
}
for _, line := range strings.Split(body, "\n") {
l := strings.TrimSpace(line)
// #include <stdio.h> / "hbapi.h" — unambiguous C preprocessor.
// Go doesn't use #include at all.
if strings.HasPrefix(l, "#include <") || strings.HasPrefix(l, `#include "`) {
return true
}
// Bare `HB_FUNC( NAME )` with an unquoted identifier is the
// Harbour C FFI macro. The Go-side counterpart is
// `hbrt.HB_FUNC("NAME", fn)` — lowercase package prefix and a
// quoted string. Match the C form strictly.
if strings.HasPrefix(l, "HB_FUNC(") ||
strings.HasPrefix(l, "HB_FUNC_STATIC(") ||
strings.HasPrefix(l, "HB_FUNC_TRANSLATE(") {
return true
}
// C declarations at line start that have no Go analogue.
if strings.HasPrefix(l, "typedef ") || strings.HasPrefix(l, "struct ") ||
strings.HasPrefix(l, "int main(") || strings.HasPrefix(l, "void main(") {
return true
}
}
return false
}