feat(pp,rtl): Tier 2 audit followups — JOIN hash + PP validation + C heuristic
Three medium-priority audit items in one commit, each independently
revertible.
* **#18 JOIN hash-join fast path.** New std.ch shape:
JOIN WITH <alias> TO <file> [FIELDS ...] ON <mfield> = <dfield>
expands to a 6-arg __dbJoin call with the master/detail key
field names. Runtime detects the extra args, builds an O(M)
hash over the detail's key column, then probes per master row
for O(N+M) total — vs the FOR form's O(N*M). For 1k×1k that's
2k vs 1M operations; the gap widens with N. The original FOR
form is unchanged and stays the fallback for arbitrary
predicates. New helper dbHashKey type-tags the key string so
`1` (numeric), `"1"` (string), and `.T.` (logical) don't
collide in the bucket map.
* **#38 PP rule result-marker validation.** ParseRule now walks
the result template after parseMarkers and warns about every
`<name>` (or `<(name)>` / `<.name.>` / `<{name}>` / `#<name>`
/ `<"name">`) that doesn't match a pattern marker. Warnings
flow into pp.errors via handleDirective with the directive's
filename:line, so a typo'd `<NaMe>` in an `#xcommand`
case-sensitive rule fails the build with a clear diagnostic
instead of silently producing broken expansions.
* **#44 looksLikeInlineC heuristic strengthened.** Catches more
of the common Harbour-PRG-with-C-inline-block shapes that
used to fall through and produce cryptic Go-side errors:
function-like #define, `extern "C"` linkage blocks, C return-
type declarations (`int foo(`, `static char* bar(`), and the
hb_ret*() helper family used by Harbour's C FFI return
setters. Two small predicate helpers (allLetters,
allIdentChars) keep the C-vs-Go disambiguation tight enough
that legit Go code (`func name() int { ... }`) doesn't trip.
* **#28 LIST/DISPLAY pagination** — explicitly deferred. Proper
pagination requires interactive terminal handling (Inkey(0)
for the keypress) which would hang in CI / batch mode. Will
revisit when an interactive terminal layer needs it for
other reasons.
Test fixtures: tests/std_ch/test_join_hash.prg verifies the new
ON-form path produces the same output as the FOR form would.
std.ch runner now stands at 16/16.
Other gates green:
go test ./... : PASS
FiveSql2 SQL:1999 : 43/43
Harbour compat : 56/56
std.ch suite : 16/16
FRB suite : 7/7
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -378,29 +378,37 @@ func (pp *Preprocessor) handleDirective(filename, directive string, depth int, r
|
||||
return true
|
||||
}
|
||||
|
||||
// #command / #translate — parse and store rules
|
||||
if strings.HasPrefix(upper, "COMMAND ") {
|
||||
if rule := ParseRule(directive[8:], true, false); rule != nil {
|
||||
pp.commands = append(pp.commands, rule)
|
||||
// #command / #translate — parse and store rules. ParseRule now
|
||||
// validates that result-template marker references resolve to a
|
||||
// pattern marker; any unresolved name flows back as a warning
|
||||
// surfaced via pp.errors with the directive's filename:line so
|
||||
// the user can find the typo (e.g. case-sensitive `<For>` vs
|
||||
// `<for>` in an #xcommand). Without surfacing, the broken
|
||||
// expansion silently produced empty / mangled output at every
|
||||
// call site.
|
||||
registerRule := func(r *Rule, store *[]*Rule) {
|
||||
if r == nil {
|
||||
return
|
||||
}
|
||||
*store = append(*store, r)
|
||||
for _, w := range r.Warnings {
|
||||
pp.errors = append(pp.errors, fmt.Sprintf("%s:%d: #command: %s", filename, lineNo, w))
|
||||
}
|
||||
}
|
||||
if strings.HasPrefix(upper, "COMMAND ") {
|
||||
registerRule(ParseRule(directive[8:], true, false), &pp.commands)
|
||||
return true
|
||||
}
|
||||
if strings.HasPrefix(upper, "TRANSLATE ") {
|
||||
if rule := ParseRule(directive[10:], false, false); rule != nil {
|
||||
pp.translates = append(pp.translates, rule)
|
||||
}
|
||||
registerRule(ParseRule(directive[10:], false, false), &pp.translates)
|
||||
return true
|
||||
}
|
||||
if strings.HasPrefix(upper, "XCOMMAND ") {
|
||||
if rule := ParseRule(directive[9:], true, true); rule != nil {
|
||||
pp.commands = append(pp.commands, rule)
|
||||
}
|
||||
registerRule(ParseRule(directive[9:], true, true), &pp.commands)
|
||||
return true
|
||||
}
|
||||
if strings.HasPrefix(upper, "XTRANSLATE ") {
|
||||
if rule := ParseRule(directive[11:], false, true); rule != nil {
|
||||
pp.translates = append(pp.translates, rule)
|
||||
}
|
||||
registerRule(ParseRule(directive[11:], false, true), &pp.translates)
|
||||
return true
|
||||
}
|
||||
|
||||
@@ -714,6 +722,16 @@ func looksLikeInlineC(body string) bool {
|
||||
if strings.HasPrefix(l, "#include <") || strings.HasPrefix(l, `#include "`) {
|
||||
return true
|
||||
}
|
||||
// Function-like #define is C-only — Go uses const / generics.
|
||||
// `#define FOO(x) ...`
|
||||
if strings.HasPrefix(l, "#define ") {
|
||||
// Find the name and check for `(` immediately after with
|
||||
// no space (function-like macro signature).
|
||||
rest := strings.TrimSpace(l[8:])
|
||||
if i := strings.IndexAny(rest, " \t("); i > 0 && i < len(rest) && rest[i] == '(' {
|
||||
return true
|
||||
}
|
||||
}
|
||||
// Bare `HB_FUNC( NAME )` with an unquoted identifier is the
|
||||
// Harbour C FFI macro. The Go-side counterpart is
|
||||
// `hbrt.HB_FUNC("NAME", fn)` — lowercase package prefix and a
|
||||
@@ -723,12 +741,81 @@ func looksLikeInlineC(body string) bool {
|
||||
strings.HasPrefix(l, "HB_FUNC_TRANSLATE(") {
|
||||
return true
|
||||
}
|
||||
// `extern "C"` — C / C++ linkage block, never Go.
|
||||
if strings.HasPrefix(l, `extern "C"`) {
|
||||
return true
|
||||
}
|
||||
// C declarations at line start that have no Go analogue.
|
||||
if strings.HasPrefix(l, "typedef ") || strings.HasPrefix(l, "struct ") ||
|
||||
strings.HasPrefix(l, "int main(") || strings.HasPrefix(l, "void main(") {
|
||||
return true
|
||||
}
|
||||
// C return-type declarations: `int name(`, `char *name(`, etc.
|
||||
// Matching exactly `<C-type> <ident>(` keeps us off Go's
|
||||
// `func name(` (which starts with `func`, not a type word)
|
||||
// and Go variable declarations (which use `:=` or `var`).
|
||||
if isCReturnTypeDecl(l) {
|
||||
return true
|
||||
}
|
||||
// hb_ret*(...) helpers — Harbour's C-side return setters.
|
||||
// hb_retc / hb_retni / hb_retnl / hb_retd / hb_retl / hb_retptr
|
||||
if strings.HasPrefix(l, "hb_ret") {
|
||||
rest := l[6:]
|
||||
if i := strings.IndexByte(rest, '('); i >= 0 {
|
||||
name := rest[:i]
|
||||
if name != "" && allLetters(name) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
// isCReturnTypeDecl reports whether the line opens a C function
|
||||
// declaration like `int foo(` / `static char* bar(`. We match a
|
||||
// short prefix list of C-only types so a Go declaration like
|
||||
// `func name() int { ... }` doesn't trip this.
|
||||
func isCReturnTypeDecl(l string) bool {
|
||||
cTypePrefixes := []string{
|
||||
"int ", "void ", "char ", "long ", "short ", "double ", "float ",
|
||||
"unsigned ", "signed ", "size_t ", "ssize_t ", "uint",
|
||||
"static int ", "static void ", "static char ", "static long ",
|
||||
}
|
||||
for _, p := range cTypePrefixes {
|
||||
if strings.HasPrefix(l, p) {
|
||||
rest := strings.TrimLeft(l[len(p):], " \t*")
|
||||
// rest should now start with an identifier followed by `(`.
|
||||
if i := strings.IndexByte(rest, '('); i > 0 && i < 50 {
|
||||
name := rest[:i]
|
||||
if allIdentChars(name) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func allLetters(s string) bool {
|
||||
for _, c := range s {
|
||||
if !((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')) {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return s != ""
|
||||
}
|
||||
|
||||
func allIdentChars(s string) bool {
|
||||
for i, c := range s {
|
||||
if c == '_' || (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') {
|
||||
continue
|
||||
}
|
||||
if i > 0 && c >= '0' && c <= '9' {
|
||||
continue
|
||||
}
|
||||
return false
|
||||
}
|
||||
return s != ""
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user