feat(pp,rtl): Tier 2 audit followups — JOIN hash + PP validation + C heuristic
Three medium-priority audit items in one commit, each independently
revertible.
* **#18 JOIN hash-join fast path.** New std.ch shape:
JOIN WITH <alias> TO <file> [FIELDS ...] ON <mfield> = <dfield>
expands to a 6-arg __dbJoin call with the master/detail key
field names. Runtime detects the extra args, builds an O(M)
hash over the detail's key column, then probes per master row
for O(N+M) total — vs the FOR form's O(N*M). For 1k×1k that's
2k vs 1M operations; the gap widens with N. The original FOR
form is unchanged and stays the fallback for arbitrary
predicates. New helper dbHashKey type-tags the key string so
`1` (numeric), `"1"` (string), and `.T.` (logical) don't
collide in the bucket map.
* **#38 PP rule result-marker validation.** ParseRule now walks
the result template after parseMarkers and warns about every
`<name>` (or `<(name)>` / `<.name.>` / `<{name}>` / `#<name>`
/ `<"name">`) that doesn't match a pattern marker. Warnings
flow into pp.errors via handleDirective with the directive's
filename:line, so a typo'd `<NaMe>` in an `#xcommand`
case-sensitive rule fails the build with a clear diagnostic
instead of silently producing broken expansions.
* **#44 looksLikeInlineC heuristic strengthened.** Catches more
of the common Harbour-PRG-with-C-inline-block shapes that
used to fall through and produce cryptic Go-side errors:
function-like #define, `extern "C"` linkage blocks, C return-
type declarations (`int foo(`, `static char* bar(`), and the
hb_ret*() helper family used by Harbour's C FFI return
setters. Two small predicate helpers (allLetters,
allIdentChars) keep the C-vs-Go disambiguation tight enough
that legit Go code (`func name() int { ... }`) doesn't trip.
* **#28 LIST/DISPLAY pagination** — explicitly deferred. Proper
pagination requires interactive terminal handling (Inkey(0)
for the keypress) which would hang in CI / batch mode. Will
revisit when an interactive terminal layer needs it for
other reasons.
Test fixtures: tests/std_ch/test_join_hash.prg verifies the new
ON-form path produces the same output as the FOR form would.
std.ch runner now stands at 16/16.
Other gates green:
go test ./... : PASS
FiveSql2 SQL:1999 : 43/43
Harbour compat : 56/56
std.ch suite : 16/16
FRB suite : 7/7
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -28,6 +28,7 @@
|
||||
package pp
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
)
|
||||
|
||||
@@ -40,6 +41,13 @@ type Rule struct {
|
||||
Keyword string // first keyword (for fast matching)
|
||||
Markers []Marker // parsed pattern markers
|
||||
ResultTmpl string // result template with marker references
|
||||
|
||||
// Warnings collected during ParseRule. Currently only one source:
|
||||
// result-template markers that reference a name absent from the
|
||||
// pattern. Caller can surface these to the user — a typo'd
|
||||
// `<For>` instead of `<for>` used to silently produce broken
|
||||
// expansion output.
|
||||
Warnings []string
|
||||
}
|
||||
|
||||
// Marker represents a pattern marker like <x>, <!x!>, <x,...>, <*x*>.
|
||||
@@ -105,9 +113,86 @@ func ParseRule(directive string, isCommand, caseSens bool) *Rule {
|
||||
// Parse markers from pattern
|
||||
rule.Markers = parseMarkers(pattern)
|
||||
|
||||
// Validate result-template marker references. Each `<name>`
|
||||
// (and its smart-stringify / blockify / logify / dumb-stringify
|
||||
// variants) must reference a name declared in the pattern.
|
||||
// Catches typos like `<For>` vs `<for>` (case-sensitive
|
||||
// xcommand) before they silently produce broken output at
|
||||
// expansion time.
|
||||
rule.Warnings = validateResultMarkers(pattern, result, rule.Markers, caseSens)
|
||||
|
||||
return rule
|
||||
}
|
||||
|
||||
// validateResultMarkers scans the result template for marker
|
||||
// references and reports any name not declared in the pattern.
|
||||
// Result returned as a slice of human-readable warning strings —
|
||||
// caller decides whether to surface or ignore.
|
||||
func validateResultMarkers(pattern, result string, markers []Marker, caseSens bool) []string {
|
||||
declared := make(map[string]bool, len(markers))
|
||||
for _, m := range markers {
|
||||
key := m.Name
|
||||
if !caseSens {
|
||||
key = strings.ToUpper(key)
|
||||
}
|
||||
declared[key] = true
|
||||
}
|
||||
if len(declared) == 0 {
|
||||
// Nothing to validate against — rule is keyword-only.
|
||||
return nil
|
||||
}
|
||||
|
||||
var warnings []string
|
||||
seen := map[string]bool{}
|
||||
i := 0
|
||||
for i < len(result) {
|
||||
// Marker shapes recognised here mirror applyResult's loop:
|
||||
// <name>, <(name)>, <.name.>, <{name}>, <"name">, #<name>.
|
||||
// findMarkerEnd already understands all of them — we just
|
||||
// need the inner identifier.
|
||||
if result[i] != '<' && !(result[i] == '#' && i+1 < len(result) && result[i+1] == '<') {
|
||||
i++
|
||||
continue
|
||||
}
|
||||
start := i
|
||||
if result[i] == '#' {
|
||||
start = i + 1
|
||||
}
|
||||
end := findMarkerEnd(result, start)
|
||||
if end == 0 {
|
||||
i++
|
||||
continue
|
||||
}
|
||||
// Extract identifier between the wrappers.
|
||||
inner := result[start+1 : end-1]
|
||||
// Strip prefix `(`, `.`, `"`, `{`
|
||||
for len(inner) > 0 && (inner[0] == '(' || inner[0] == '.' || inner[0] == '"' || inner[0] == '{') {
|
||||
inner = inner[1:]
|
||||
}
|
||||
// Strip suffix `)`, `.`, `"`, `}`
|
||||
for len(inner) > 0 {
|
||||
c := inner[len(inner)-1]
|
||||
if c == ')' || c == '.' || c == '"' || c == '}' || c == ' ' {
|
||||
inner = inner[:len(inner)-1]
|
||||
} else {
|
||||
break
|
||||
}
|
||||
}
|
||||
key := inner
|
||||
if !caseSens {
|
||||
key = strings.ToUpper(key)
|
||||
}
|
||||
if key != "" && !declared[key] && !seen[key] {
|
||||
seen[key] = true
|
||||
warnings = append(warnings,
|
||||
fmt.Sprintf("result-template marker <%s> not declared in pattern: %q",
|
||||
inner, pattern))
|
||||
}
|
||||
i = end
|
||||
}
|
||||
return warnings
|
||||
}
|
||||
|
||||
// parseMarkers extracts all <...> markers from a pattern.
|
||||
func parseMarkers(pattern string) []Marker {
|
||||
var markers []Marker
|
||||
|
||||
Reference in New Issue
Block a user