feat(pp,rtl): Tier 2 audit followups — JOIN hash + PP validation + C heuristic
Three medium-priority audit items in one commit, each independently
revertible.
* **#18 JOIN hash-join fast path.** New std.ch shape:
JOIN WITH <alias> TO <file> [FIELDS ...] ON <mfield> = <dfield>
expands to a 6-arg __dbJoin call with the master/detail key
field names. Runtime detects the extra args, builds an O(M)
hash over the detail's key column, then probes per master row
for O(N+M) total — vs the FOR form's O(N*M). For 1k×1k that's
2k vs 1M operations; the gap widens with N. The original FOR
form is unchanged and stays the fallback for arbitrary
predicates. New helper dbHashKey type-tags the key string so
`1` (numeric), `"1"` (string), and `.T.` (logical) don't
collide in the bucket map.
* **#38 PP rule result-marker validation.** ParseRule now walks
the result template after parseMarkers and warns about every
`<name>` (or `<(name)>` / `<.name.>` / `<{name}>` / `#<name>`
/ `<"name">`) that doesn't match a pattern marker. Warnings
flow into pp.errors via handleDirective with the directive's
filename:line, so a typo'd `<NaMe>` in an `#xcommand`
case-sensitive rule fails the build with a clear diagnostic
instead of silently producing broken expansions.
* **#44 looksLikeInlineC heuristic strengthened.** Catches more
of the common Harbour-PRG-with-C-inline-block shapes that
used to fall through and produce cryptic Go-side errors:
function-like #define, `extern "C"` linkage blocks, C return-
type declarations (`int foo(`, `static char* bar(`), and the
hb_ret*() helper family used by Harbour's C FFI return
setters. Two small predicate helpers (allLetters,
allIdentChars) keep the C-vs-Go disambiguation tight enough
that legit Go code (`func name() int { ... }`) doesn't trip.
* **#28 LIST/DISPLAY pagination** — explicitly deferred. Proper
pagination requires interactive terminal handling (Inkey(0)
for the keypress) which would hang in CI / batch mode. Will
revisit when an interactive terminal layer needs it for
other reasons.
Test fixtures: tests/std_ch/test_join_hash.prg verifies the new
ON-form path produces the same output as the FOR form would.
std.ch runner now stands at 16/16.
Other gates green:
go test ./... : PASS
FiveSql2 SQL:1999 : 43/43
Harbour compat : 56/56
std.ch suite : 16/16
FRB suite : 7/7
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -28,6 +28,7 @@
|
|||||||
package pp
|
package pp
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"fmt"
|
||||||
"strings"
|
"strings"
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -40,6 +41,13 @@ type Rule struct {
|
|||||||
Keyword string // first keyword (for fast matching)
|
Keyword string // first keyword (for fast matching)
|
||||||
Markers []Marker // parsed pattern markers
|
Markers []Marker // parsed pattern markers
|
||||||
ResultTmpl string // result template with marker references
|
ResultTmpl string // result template with marker references
|
||||||
|
|
||||||
|
// Warnings collected during ParseRule. Currently only one source:
|
||||||
|
// result-template markers that reference a name absent from the
|
||||||
|
// pattern. Caller can surface these to the user — a typo'd
|
||||||
|
// `<For>` instead of `<for>` used to silently produce broken
|
||||||
|
// expansion output.
|
||||||
|
Warnings []string
|
||||||
}
|
}
|
||||||
|
|
||||||
// Marker represents a pattern marker like <x>, <!x!>, <x,...>, <*x*>.
|
// Marker represents a pattern marker like <x>, <!x!>, <x,...>, <*x*>.
|
||||||
@@ -105,9 +113,86 @@ func ParseRule(directive string, isCommand, caseSens bool) *Rule {
|
|||||||
// Parse markers from pattern
|
// Parse markers from pattern
|
||||||
rule.Markers = parseMarkers(pattern)
|
rule.Markers = parseMarkers(pattern)
|
||||||
|
|
||||||
|
// Validate result-template marker references. Each `<name>`
|
||||||
|
// (and its smart-stringify / blockify / logify / dumb-stringify
|
||||||
|
// variants) must reference a name declared in the pattern.
|
||||||
|
// Catches typos like `<For>` vs `<for>` (case-sensitive
|
||||||
|
// xcommand) before they silently produce broken output at
|
||||||
|
// expansion time.
|
||||||
|
rule.Warnings = validateResultMarkers(pattern, result, rule.Markers, caseSens)
|
||||||
|
|
||||||
return rule
|
return rule
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// validateResultMarkers scans the result template for marker
|
||||||
|
// references and reports any name not declared in the pattern.
|
||||||
|
// Result returned as a slice of human-readable warning strings —
|
||||||
|
// caller decides whether to surface or ignore.
|
||||||
|
func validateResultMarkers(pattern, result string, markers []Marker, caseSens bool) []string {
|
||||||
|
declared := make(map[string]bool, len(markers))
|
||||||
|
for _, m := range markers {
|
||||||
|
key := m.Name
|
||||||
|
if !caseSens {
|
||||||
|
key = strings.ToUpper(key)
|
||||||
|
}
|
||||||
|
declared[key] = true
|
||||||
|
}
|
||||||
|
if len(declared) == 0 {
|
||||||
|
// Nothing to validate against — rule is keyword-only.
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
var warnings []string
|
||||||
|
seen := map[string]bool{}
|
||||||
|
i := 0
|
||||||
|
for i < len(result) {
|
||||||
|
// Marker shapes recognised here mirror applyResult's loop:
|
||||||
|
// <name>, <(name)>, <.name.>, <{name}>, <"name">, #<name>.
|
||||||
|
// findMarkerEnd already understands all of them — we just
|
||||||
|
// need the inner identifier.
|
||||||
|
if result[i] != '<' && !(result[i] == '#' && i+1 < len(result) && result[i+1] == '<') {
|
||||||
|
i++
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
start := i
|
||||||
|
if result[i] == '#' {
|
||||||
|
start = i + 1
|
||||||
|
}
|
||||||
|
end := findMarkerEnd(result, start)
|
||||||
|
if end == 0 {
|
||||||
|
i++
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
// Extract identifier between the wrappers.
|
||||||
|
inner := result[start+1 : end-1]
|
||||||
|
// Strip prefix `(`, `.`, `"`, `{`
|
||||||
|
for len(inner) > 0 && (inner[0] == '(' || inner[0] == '.' || inner[0] == '"' || inner[0] == '{') {
|
||||||
|
inner = inner[1:]
|
||||||
|
}
|
||||||
|
// Strip suffix `)`, `.`, `"`, `}`
|
||||||
|
for len(inner) > 0 {
|
||||||
|
c := inner[len(inner)-1]
|
||||||
|
if c == ')' || c == '.' || c == '"' || c == '}' || c == ' ' {
|
||||||
|
inner = inner[:len(inner)-1]
|
||||||
|
} else {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
key := inner
|
||||||
|
if !caseSens {
|
||||||
|
key = strings.ToUpper(key)
|
||||||
|
}
|
||||||
|
if key != "" && !declared[key] && !seen[key] {
|
||||||
|
seen[key] = true
|
||||||
|
warnings = append(warnings,
|
||||||
|
fmt.Sprintf("result-template marker <%s> not declared in pattern: %q",
|
||||||
|
inner, pattern))
|
||||||
|
}
|
||||||
|
i = end
|
||||||
|
}
|
||||||
|
return warnings
|
||||||
|
}
|
||||||
|
|
||||||
// parseMarkers extracts all <...> markers from a pattern.
|
// parseMarkers extracts all <...> markers from a pattern.
|
||||||
func parseMarkers(pattern string) []Marker {
|
func parseMarkers(pattern string) []Marker {
|
||||||
var markers []Marker
|
var markers []Marker
|
||||||
|
|||||||
@@ -378,29 +378,37 @@ func (pp *Preprocessor) handleDirective(filename, directive string, depth int, r
|
|||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
|
|
||||||
// #command / #translate — parse and store rules
|
// #command / #translate — parse and store rules. ParseRule now
|
||||||
if strings.HasPrefix(upper, "COMMAND ") {
|
// validates that result-template marker references resolve to a
|
||||||
if rule := ParseRule(directive[8:], true, false); rule != nil {
|
// pattern marker; any unresolved name flows back as a warning
|
||||||
pp.commands = append(pp.commands, rule)
|
// surfaced via pp.errors with the directive's filename:line so
|
||||||
|
// the user can find the typo (e.g. case-sensitive `<For>` vs
|
||||||
|
// `<for>` in an #xcommand). Without surfacing, the broken
|
||||||
|
// expansion silently produced empty / mangled output at every
|
||||||
|
// call site.
|
||||||
|
registerRule := func(r *Rule, store *[]*Rule) {
|
||||||
|
if r == nil {
|
||||||
|
return
|
||||||
}
|
}
|
||||||
|
*store = append(*store, r)
|
||||||
|
for _, w := range r.Warnings {
|
||||||
|
pp.errors = append(pp.errors, fmt.Sprintf("%s:%d: #command: %s", filename, lineNo, w))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if strings.HasPrefix(upper, "COMMAND ") {
|
||||||
|
registerRule(ParseRule(directive[8:], true, false), &pp.commands)
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
if strings.HasPrefix(upper, "TRANSLATE ") {
|
if strings.HasPrefix(upper, "TRANSLATE ") {
|
||||||
if rule := ParseRule(directive[10:], false, false); rule != nil {
|
registerRule(ParseRule(directive[10:], false, false), &pp.translates)
|
||||||
pp.translates = append(pp.translates, rule)
|
|
||||||
}
|
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
if strings.HasPrefix(upper, "XCOMMAND ") {
|
if strings.HasPrefix(upper, "XCOMMAND ") {
|
||||||
if rule := ParseRule(directive[9:], true, true); rule != nil {
|
registerRule(ParseRule(directive[9:], true, true), &pp.commands)
|
||||||
pp.commands = append(pp.commands, rule)
|
|
||||||
}
|
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
if strings.HasPrefix(upper, "XTRANSLATE ") {
|
if strings.HasPrefix(upper, "XTRANSLATE ") {
|
||||||
if rule := ParseRule(directive[11:], false, true); rule != nil {
|
registerRule(ParseRule(directive[11:], false, true), &pp.translates)
|
||||||
pp.translates = append(pp.translates, rule)
|
|
||||||
}
|
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -714,6 +722,16 @@ func looksLikeInlineC(body string) bool {
|
|||||||
if strings.HasPrefix(l, "#include <") || strings.HasPrefix(l, `#include "`) {
|
if strings.HasPrefix(l, "#include <") || strings.HasPrefix(l, `#include "`) {
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
|
// Function-like #define is C-only — Go uses const / generics.
|
||||||
|
// `#define FOO(x) ...`
|
||||||
|
if strings.HasPrefix(l, "#define ") {
|
||||||
|
// Find the name and check for `(` immediately after with
|
||||||
|
// no space (function-like macro signature).
|
||||||
|
rest := strings.TrimSpace(l[8:])
|
||||||
|
if i := strings.IndexAny(rest, " \t("); i > 0 && i < len(rest) && rest[i] == '(' {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
// Bare `HB_FUNC( NAME )` with an unquoted identifier is the
|
// Bare `HB_FUNC( NAME )` with an unquoted identifier is the
|
||||||
// Harbour C FFI macro. The Go-side counterpart is
|
// Harbour C FFI macro. The Go-side counterpart is
|
||||||
// `hbrt.HB_FUNC("NAME", fn)` — lowercase package prefix and a
|
// `hbrt.HB_FUNC("NAME", fn)` — lowercase package prefix and a
|
||||||
@@ -723,12 +741,81 @@ func looksLikeInlineC(body string) bool {
|
|||||||
strings.HasPrefix(l, "HB_FUNC_TRANSLATE(") {
|
strings.HasPrefix(l, "HB_FUNC_TRANSLATE(") {
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
|
// `extern "C"` — C / C++ linkage block, never Go.
|
||||||
|
if strings.HasPrefix(l, `extern "C"`) {
|
||||||
|
return true
|
||||||
|
}
|
||||||
// C declarations at line start that have no Go analogue.
|
// C declarations at line start that have no Go analogue.
|
||||||
if strings.HasPrefix(l, "typedef ") || strings.HasPrefix(l, "struct ") ||
|
if strings.HasPrefix(l, "typedef ") || strings.HasPrefix(l, "struct ") ||
|
||||||
strings.HasPrefix(l, "int main(") || strings.HasPrefix(l, "void main(") {
|
strings.HasPrefix(l, "int main(") || strings.HasPrefix(l, "void main(") {
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
|
// C return-type declarations: `int name(`, `char *name(`, etc.
|
||||||
|
// Matching exactly `<C-type> <ident>(` keeps us off Go's
|
||||||
|
// `func name(` (which starts with `func`, not a type word)
|
||||||
|
// and Go variable declarations (which use `:=` or `var`).
|
||||||
|
if isCReturnTypeDecl(l) {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
// hb_ret*(...) helpers — Harbour's C-side return setters.
|
||||||
|
// hb_retc / hb_retni / hb_retnl / hb_retd / hb_retl / hb_retptr
|
||||||
|
if strings.HasPrefix(l, "hb_ret") {
|
||||||
|
rest := l[6:]
|
||||||
|
if i := strings.IndexByte(rest, '('); i >= 0 {
|
||||||
|
name := rest[:i]
|
||||||
|
if name != "" && allLetters(name) {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// isCReturnTypeDecl reports whether the line opens a C function
|
||||||
|
// declaration like `int foo(` / `static char* bar(`. We match a
|
||||||
|
// short prefix list of C-only types so a Go declaration like
|
||||||
|
// `func name() int { ... }` doesn't trip this.
|
||||||
|
func isCReturnTypeDecl(l string) bool {
|
||||||
|
cTypePrefixes := []string{
|
||||||
|
"int ", "void ", "char ", "long ", "short ", "double ", "float ",
|
||||||
|
"unsigned ", "signed ", "size_t ", "ssize_t ", "uint",
|
||||||
|
"static int ", "static void ", "static char ", "static long ",
|
||||||
|
}
|
||||||
|
for _, p := range cTypePrefixes {
|
||||||
|
if strings.HasPrefix(l, p) {
|
||||||
|
rest := strings.TrimLeft(l[len(p):], " \t*")
|
||||||
|
// rest should now start with an identifier followed by `(`.
|
||||||
|
if i := strings.IndexByte(rest, '('); i > 0 && i < 50 {
|
||||||
|
name := rest[:i]
|
||||||
|
if allIdentChars(name) {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
func allLetters(s string) bool {
|
||||||
|
for _, c := range s {
|
||||||
|
if !((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')) {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return s != ""
|
||||||
|
}
|
||||||
|
|
||||||
|
func allIdentChars(s string) bool {
|
||||||
|
for i, c := range s {
|
||||||
|
if c == '_' || (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if i > 0 && c >= '0' && c <= '9' {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
return s != ""
|
||||||
|
}
|
||||||
|
|||||||
@@ -154,7 +154,26 @@
|
|||||||
|
|
||||||
/* JOIN merges the current ("master") workarea with the named
|
/* JOIN merges the current ("master") workarea with the named
|
||||||
detail alias into a fresh DBF, emitting one output row per
|
detail alias into a fresh DBF, emitting one output row per
|
||||||
master/detail pair where FOR evaluates true. */
|
master/detail pair where FOR evaluates true.
|
||||||
|
|
||||||
|
The ON form takes the equality-key field names directly and
|
||||||
|
activates a hash-join fast path: build a hash over the detail's
|
||||||
|
key column once, then probe per master row — O(N+M) total instead
|
||||||
|
of the FOR form's O(N*M) nested-loop. Use it whenever the
|
||||||
|
join predicate is a simple `master.k = detail.k` equality. The
|
||||||
|
FOR form remains available for arbitrary predicates. Order
|
||||||
|
matters: the ON rule is more specific so it wins.
|
||||||
|
|
||||||
|
Note for callers: ON expects bare field names, not expressions.
|
||||||
|
Five doesn't auto-resolve bare identifiers to fields, but std.ch
|
||||||
|
passes them as quoted strings via <(mfield)> / <(dfield)> so the
|
||||||
|
PP captures the field names verbatim — runtime-side __dbJoin
|
||||||
|
does its own field lookup. */
|
||||||
|
#command JOIN WITH <(alias)> TO <(f)> [FIELDS <fields,...>] ;
|
||||||
|
ON <mfield> = <dfield> => ;
|
||||||
|
__dbJoin( <(alias)>, <(f)>, { <(fields)> }, NIL, ;
|
||||||
|
<(mfield)>, <(dfield)> )
|
||||||
|
|
||||||
#command JOIN [WITH <(alias)>] [TO <(f)>] [FIELDS <fields,...>] ;
|
#command JOIN [WITH <(alias)>] [TO <(f)>] [FIELDS <fields,...>] ;
|
||||||
[FOR <for>] => ;
|
[FOR <for>] => ;
|
||||||
__dbJoin( <(alias)>, <(f)>, { <(fields)> }, <{for}> )
|
__dbJoin( <(alias)>, <(f)>, { <(fields)> }, <{for}> )
|
||||||
|
|||||||
@@ -786,6 +786,33 @@ func nextTmpAlias(prefix string) string {
|
|||||||
return fmt.Sprintf("%s_%d", prefix, n)
|
return fmt.Sprintf("%s_%d", prefix, n)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// dbHashKey turns a workarea field value into a hash-table key
|
||||||
|
// string. Numeric / Date / Logical / String types are encoded with a
|
||||||
|
// distinct one-byte tag prefix so values that happen to share the
|
||||||
|
// same string form across types ("1" vs 1 vs .T.) don't collide.
|
||||||
|
// NIL is its own bucket — Harbour's `==` says NIL never equals
|
||||||
|
// anything, but for join semantics we treat NIL keys as a single
|
||||||
|
// bucket so the user can still JOIN over rows with missing keys
|
||||||
|
// when both sides are NIL.
|
||||||
|
func dbHashKey(v hbrt.Value) string {
|
||||||
|
switch {
|
||||||
|
case v.IsNil():
|
||||||
|
return "\x00"
|
||||||
|
case v.IsNumeric():
|
||||||
|
return fmt.Sprintf("N\x01%g", v.AsNumDouble())
|
||||||
|
case v.IsLogical():
|
||||||
|
if v.AsBool() {
|
||||||
|
return "L\x01T"
|
||||||
|
}
|
||||||
|
return "L\x01F"
|
||||||
|
case v.IsDate():
|
||||||
|
return fmt.Sprintf("D\x01%d", v.AsJulian())
|
||||||
|
case v.IsString():
|
||||||
|
return "S\x01" + strings.TrimRight(v.AsString(), " ")
|
||||||
|
}
|
||||||
|
return "?\x01" + v.AsString()
|
||||||
|
}
|
||||||
|
|
||||||
// rtlDbNotImpl raises a runtime error explaining which xBase clause
|
// rtlDbNotImpl raises a runtime error explaining which xBase clause
|
||||||
// the user invoked that Five doesn't yet implement. std.ch routes
|
// the user invoked that Five doesn't yet implement. std.ch routes
|
||||||
// SDF / DELIMITED / TO PRINTER / TO FILE variants here so they fail
|
// SDF / DELIMITED / TO PRINTER / TO FILE variants here so they fail
|
||||||
@@ -1780,6 +1807,100 @@ func rtlDbJoin(t *hbrt.Thread) {
|
|||||||
}
|
}
|
||||||
dstArea := wam.AreaAt(dstSel)
|
dstArea := wam.AreaAt(dstSel)
|
||||||
|
|
||||||
|
// Hash-join fast path. When the caller passes master and detail
|
||||||
|
// key field names (params 5 + 6), build a hash table over the
|
||||||
|
// detail in O(M), then scan master in O(N) and probe — total
|
||||||
|
// O(N+M) instead of the nested-loop's O(N*M). For 1k×1k that's
|
||||||
|
// 2k vs 1M operations, and the gap widens fast.
|
||||||
|
masterKeyName := ""
|
||||||
|
detailKeyName := ""
|
||||||
|
if nParams >= 5 && t.Local(5).IsString() {
|
||||||
|
masterKeyName = strings.ToUpper(strings.TrimSpace(t.Local(5).AsString()))
|
||||||
|
}
|
||||||
|
if nParams >= 6 && t.Local(6).IsString() {
|
||||||
|
detailKeyName = strings.ToUpper(strings.TrimSpace(t.Local(6).AsString()))
|
||||||
|
}
|
||||||
|
if masterKeyName != "" && detailKeyName != "" {
|
||||||
|
mkIdx, dkIdx := -1, -1
|
||||||
|
for i := 0; i < master.FieldCount(); i++ {
|
||||||
|
if strings.EqualFold(master.GetFieldInfo(i).Name, masterKeyName) {
|
||||||
|
mkIdx = i
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for i := 0; i < detail.FieldCount(); i++ {
|
||||||
|
if strings.EqualFold(detail.GetFieldInfo(i).Name, detailKeyName) {
|
||||||
|
dkIdx = i
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if mkIdx >= 0 && dkIdx >= 0 {
|
||||||
|
// Build detail hash: key string → list of cached field rows.
|
||||||
|
// We capture each detail row's wanted-field VALUES (not just
|
||||||
|
// rec numbers) so we don't have to re-select the detail area
|
||||||
|
// per probe — saves the WA-switch round trip and keeps the
|
||||||
|
// inner loop tight.
|
||||||
|
type detailRow struct {
|
||||||
|
vals []hbrt.Value
|
||||||
|
}
|
||||||
|
buckets := make(map[string][]detailRow, 1024)
|
||||||
|
wam.SelectByNum(detailSel)
|
||||||
|
detail.GoTop()
|
||||||
|
for !detail.EOF() {
|
||||||
|
k, _ := detail.GetValue(dkIdx)
|
||||||
|
key := dbHashKey(k)
|
||||||
|
row := detailRow{vals: make([]hbrt.Value, 0, len(srcRefs))}
|
||||||
|
for _, r := range srcRefs {
|
||||||
|
if r.isMaster {
|
||||||
|
row.vals = append(row.vals, hbrt.MakeNil()) // master fills later
|
||||||
|
} else {
|
||||||
|
v, _ := detail.GetValue(r.idx)
|
||||||
|
row.vals = append(row.vals, v)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
buckets[key] = append(buckets[key], row)
|
||||||
|
detail.Skip(1)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Scan master, probe detail buckets.
|
||||||
|
wam.SelectByNum(masterSel)
|
||||||
|
master.GoTop()
|
||||||
|
for !master.EOF() {
|
||||||
|
mk, _ := master.GetValue(mkIdx)
|
||||||
|
key := dbHashKey(mk)
|
||||||
|
rows, hit := buckets[key]
|
||||||
|
if hit {
|
||||||
|
// Cache master-side values for this row once.
|
||||||
|
mvals := make([]hbrt.Value, len(srcRefs))
|
||||||
|
for k, r := range srcRefs {
|
||||||
|
if r.isMaster {
|
||||||
|
v, _ := master.GetValue(r.idx)
|
||||||
|
mvals[k] = v
|
||||||
|
}
|
||||||
|
}
|
||||||
|
wam.SelectByNum(dstSel)
|
||||||
|
for _, drow := range rows {
|
||||||
|
dstArea.Append()
|
||||||
|
for k, r := range srcRefs {
|
||||||
|
if r.isMaster {
|
||||||
|
dstArea.PutValue(k, mvals[k])
|
||||||
|
} else {
|
||||||
|
dstArea.PutValue(k, drow.vals[k])
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
wam.SelectByNum(masterSel)
|
||||||
|
}
|
||||||
|
master.Skip(1)
|
||||||
|
}
|
||||||
|
goto closeDst
|
||||||
|
}
|
||||||
|
// Key names didn't resolve — fall through to nested-loop with
|
||||||
|
// (likely empty) bFor. User typo is reported as a no-result
|
||||||
|
// JOIN rather than crash; the destination DBF still gets
|
||||||
|
// created (matches Harbour: NO ROWS != error).
|
||||||
|
}
|
||||||
|
|
||||||
wam.SelectByNum(masterSel)
|
wam.SelectByNum(masterSel)
|
||||||
master.GoTop()
|
master.GoTop()
|
||||||
for !master.EOF() {
|
for !master.EOF() {
|
||||||
@@ -1818,6 +1939,8 @@ func rtlDbJoin(t *hbrt.Thread) {
|
|||||||
master.Skip(1)
|
master.Skip(1)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
closeDst:
|
||||||
|
|
||||||
wam.SelectByNum(dstSel)
|
wam.SelectByNum(dstSel)
|
||||||
closeErr := wam.Close()
|
closeErr := wam.Close()
|
||||||
wam.SelectByNum(masterSel)
|
wam.SelectByNum(masterSel)
|
||||||
|
|||||||
@@ -31,6 +31,7 @@ TESTS=(
|
|||||||
test_unsupported
|
test_unsupported
|
||||||
test_block_comma
|
test_block_comma
|
||||||
test_compound_lhs
|
test_compound_lhs
|
||||||
|
test_join_hash
|
||||||
)
|
)
|
||||||
|
|
||||||
work="$(mktemp -d)"
|
work="$(mktemp -d)"
|
||||||
|
|||||||
50
tests/std_ch/test_join_hash.prg
Normal file
50
tests/std_ch/test_join_hash.prg
Normal file
@@ -0,0 +1,50 @@
|
|||||||
|
/* JOIN ... ON hash-join fast path. */
|
||||||
|
|
||||||
|
PROCEDURE Main()
|
||||||
|
LOCAL n
|
||||||
|
|
||||||
|
FErase("cust.dbf"); FErase("ord.dbf"); FErase("out.dbf")
|
||||||
|
|
||||||
|
/* Customers (master) */
|
||||||
|
dbCreate("cust.dbf", { {"CID","N",4,0}, {"CNAME","C",12,0} })
|
||||||
|
USE cust.dbf NEW EXCLUSIVE ALIAS cu
|
||||||
|
dbAppend(); FieldPut(1, 1); FieldPut(2, "Alice")
|
||||||
|
dbAppend(); FieldPut(1, 2); FieldPut(2, "Bob")
|
||||||
|
dbAppend(); FieldPut(1, 3); FieldPut(2, "Carol")
|
||||||
|
dbCommit()
|
||||||
|
dbCloseArea()
|
||||||
|
|
||||||
|
/* Orders (detail) */
|
||||||
|
dbCreate("ord.dbf", { {"OID","N",4,0}, {"CID","N",4,0}, {"AMT","N",8,2} })
|
||||||
|
USE ord.dbf NEW EXCLUSIVE ALIAS od
|
||||||
|
dbAppend(); FieldPut(1, 100); FieldPut(2, 1); FieldPut(3, 50)
|
||||||
|
dbAppend(); FieldPut(1, 101); FieldPut(2, 1); FieldPut(3, 30)
|
||||||
|
dbAppend(); FieldPut(1, 102); FieldPut(2, 2); FieldPut(3, 200)
|
||||||
|
dbAppend(); FieldPut(1, 103); FieldPut(2, 4); FieldPut(3, 99) /* no-match */
|
||||||
|
dbCommit()
|
||||||
|
dbCloseArea()
|
||||||
|
|
||||||
|
USE cust.dbf NEW EXCLUSIVE ALIAS cu
|
||||||
|
USE ord.dbf NEW EXCLUSIVE ALIAS od
|
||||||
|
SELECT cu
|
||||||
|
|
||||||
|
/* Hash-join form */
|
||||||
|
JOIN WITH od TO out.dbf FIELDS cid, cname, oid, amt ON cid = cid
|
||||||
|
|
||||||
|
SELECT cu ; dbCloseArea()
|
||||||
|
SELECT od ; dbCloseArea()
|
||||||
|
|
||||||
|
USE out.dbf NEW EXCLUSIVE
|
||||||
|
COUNT TO n
|
||||||
|
? "Hash-joined rows =", n, "(expect 3)"
|
||||||
|
dbGoTop()
|
||||||
|
DO WHILE !Eof()
|
||||||
|
? " ", FieldGet(1), AllTrim(FieldGet(2)), FieldGet(3), FieldGet(4)
|
||||||
|
dbSkip()
|
||||||
|
ENDDO
|
||||||
|
? "(expect three rows: Alice/100/50, Alice/101/30, Bob/102/200)"
|
||||||
|
dbCloseArea()
|
||||||
|
|
||||||
|
FErase("cust.dbf"); FErase("ord.dbf"); FErase("out.dbf")
|
||||||
|
? "DONE"
|
||||||
|
RETURN
|
||||||
Reference in New Issue
Block a user