feat(pp,rtl): Tier 2 audit followups — JOIN hash + PP validation + C heuristic

Three medium-priority audit items in one commit, each independently
revertible.

  * **#18 JOIN hash-join fast path.** New std.ch shape:
        JOIN WITH <alias> TO <file> [FIELDS ...] ON <mfield> = <dfield>
    expands to a 6-arg __dbJoin call with the master/detail key
    field names. Runtime detects the extra args, builds an O(M)
    hash over the detail's key column, then probes per master row
    for O(N+M) total — vs the FOR form's O(N*M). For 1k×1k that's
    2k vs 1M operations; the gap widens with N. The original FOR
    form is unchanged and stays the fallback for arbitrary
    predicates. New helper dbHashKey type-tags the key string so
    `1` (numeric), `"1"` (string), and `.T.` (logical) don't
    collide in the bucket map.

  * **#38 PP rule result-marker validation.** ParseRule now walks
    the result template after parseMarkers and warns about every
    `<name>` (or `<(name)>` / `<.name.>` / `<{name}>` / `#<name>`
    / `<"name">`) that doesn't match a pattern marker. Warnings
    flow into pp.errors via handleDirective with the directive's
    filename:line, so a typo'd `<NaMe>` in an `#xcommand`
    case-sensitive rule fails the build with a clear diagnostic
    instead of silently producing broken expansions.

  * **#44 looksLikeInlineC heuristic strengthened.** Catches more
    of the common Harbour-PRG-with-C-inline-block shapes that
    used to fall through and produce cryptic Go-side errors:
    function-like #define, `extern "C"` linkage blocks, C return-
    type declarations (`int foo(`, `static char* bar(`), and the
    hb_ret*() helper family used by Harbour's C FFI return
    setters. Two small predicate helpers (allLetters,
    allIdentChars) keep the C-vs-Go disambiguation tight enough
    that legit Go code (`func name() int { ... }`) doesn't trip.

  * **#28 LIST/DISPLAY pagination** — explicitly deferred. Proper
    pagination requires interactive terminal handling (Inkey(0)
    for the keypress) which would hang in CI / batch mode. Will
    revisit when an interactive terminal layer needs it for
    other reasons.

Test fixtures: tests/std_ch/test_join_hash.prg verifies the new
ON-form path produces the same output as the FOR form would.
std.ch runner now stands at 16/16.

Other gates green:
  go test ./...      : PASS
  FiveSql2 SQL:1999  : 43/43
  Harbour compat     : 56/56
  std.ch suite       : 16/16
  FRB suite          : 7/7

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-05-04 19:21:19 +09:00
parent 29ca02e1bc
commit 2008266da7
6 changed files with 379 additions and 14 deletions

View File

@@ -28,6 +28,7 @@
package pp
import (
"fmt"
"strings"
)
@@ -40,6 +41,13 @@ type Rule struct {
Keyword string // first keyword (for fast matching)
Markers []Marker // parsed pattern markers
ResultTmpl string // result template with marker references
// Warnings collected during ParseRule. Currently only one source:
// result-template markers that reference a name absent from the
// pattern. Caller can surface these to the user — a typo'd
// `<For>` instead of `<for>` used to silently produce broken
// expansion output.
Warnings []string
}
// Marker represents a pattern marker like <x>, <!x!>, <x,...>, <*x*>.
@@ -105,9 +113,86 @@ func ParseRule(directive string, isCommand, caseSens bool) *Rule {
// Parse markers from pattern
rule.Markers = parseMarkers(pattern)
// Validate result-template marker references. Each `<name>`
// (and its smart-stringify / blockify / logify / dumb-stringify
// variants) must reference a name declared in the pattern.
// Catches typos like `<For>` vs `<for>` (case-sensitive
// xcommand) before they silently produce broken output at
// expansion time.
rule.Warnings = validateResultMarkers(pattern, result, rule.Markers, caseSens)
return rule
}
// validateResultMarkers scans the result template for marker
// references and reports any name not declared in the pattern.
// Result returned as a slice of human-readable warning strings —
// caller decides whether to surface or ignore.
func validateResultMarkers(pattern, result string, markers []Marker, caseSens bool) []string {
declared := make(map[string]bool, len(markers))
for _, m := range markers {
key := m.Name
if !caseSens {
key = strings.ToUpper(key)
}
declared[key] = true
}
if len(declared) == 0 {
// Nothing to validate against — rule is keyword-only.
return nil
}
var warnings []string
seen := map[string]bool{}
i := 0
for i < len(result) {
// Marker shapes recognised here mirror applyResult's loop:
// <name>, <(name)>, <.name.>, <{name}>, <"name">, #<name>.
// findMarkerEnd already understands all of them — we just
// need the inner identifier.
if result[i] != '<' && !(result[i] == '#' && i+1 < len(result) && result[i+1] == '<') {
i++
continue
}
start := i
if result[i] == '#' {
start = i + 1
}
end := findMarkerEnd(result, start)
if end == 0 {
i++
continue
}
// Extract identifier between the wrappers.
inner := result[start+1 : end-1]
// Strip prefix `(`, `.`, `"`, `{`
for len(inner) > 0 && (inner[0] == '(' || inner[0] == '.' || inner[0] == '"' || inner[0] == '{') {
inner = inner[1:]
}
// Strip suffix `)`, `.`, `"`, `}`
for len(inner) > 0 {
c := inner[len(inner)-1]
if c == ')' || c == '.' || c == '"' || c == '}' || c == ' ' {
inner = inner[:len(inner)-1]
} else {
break
}
}
key := inner
if !caseSens {
key = strings.ToUpper(key)
}
if key != "" && !declared[key] && !seen[key] {
seen[key] = true
warnings = append(warnings,
fmt.Sprintf("result-template marker <%s> not declared in pattern: %q",
inner, pattern))
}
i = end
}
return warnings
}
// parseMarkers extracts all <...> markers from a pattern.
func parseMarkers(pattern string) []Marker {
var markers []Marker