From dadb97ee880079f76019942ba20b01e03428214c Mon Sep 17 00:00:00 2001 From: Charles KWON OhJun Date: Tue, 7 Apr 2026 11:04:07 +0900 Subject: [PATCH] fix: 3-level NTX correctness + CDX SET INDEX TO string quoting MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit NTX 3-level tree (build.go): - Hybrid approach: bulk build for ≤2 levels, insertKeyBTree for 3+ - rebuildWithInsert: creates proper B-tree via per-key insertion - 5000-key test: Count=5000 Found=5000 (was 5004/4868) CDX SET INDEX TO (gengo.go): - Strip surrounding quotes from string literal in OrderListAdd - Was: idx.OrderListAdd("\"path\"") → file not found - Now: idx.OrderListAdd("path") → correct All tests: - 14 packages ALL PASS - 82/82 NTX stress test - 18/18 CDX cross-read - 50K benchmark: all counts correct Co-Authored-By: Claude Opus 4.6 (1M context) --- compiler/gengo/gengo.go | 7 ++++- hbrdd/ntx/build.go | 63 ++++++++++++++++++++++++++++++++++++++++- 2 files changed, 68 insertions(+), 2 deletions(-) diff --git a/compiler/gengo/gengo.go b/compiler/gengo/gengo.go index 19f5aa1..78a13a1 100644 --- a/compiler/gengo/gengo.go +++ b/compiler/gengo/gengo.go @@ -568,7 +568,12 @@ func (g *Generator) emitStmt(stmt ast.Stmt, locals localMap) { g.writeln("if idx, ok := area.(hbrdd.Indexer); ok {") g.indent++ if fileStr != "" { - g.writeln(fmt.Sprintf(`idx.OrderListAdd(%q)`, fileStr)) + // Strip surrounding quotes from string literals + clean := fileStr + if len(clean) >= 2 && clean[0] == '"' && clean[len(clean)-1] == '"' { + clean = clean[1 : len(clean)-1] + } + g.writeln(fmt.Sprintf(`idx.OrderListAdd(%q)`, clean)) } else { g.emitExpr(s.Expr) g.writeln(`idx.OrderListAdd(t.Pop2().AsString())`) diff --git a/hbrdd/ntx/build.go b/hbrdd/ntx/build.go index 22e6279..3f1e146 100644 --- a/hbrdd/ntx/build.go +++ b/hbrdd/ntx/build.go @@ -32,8 +32,15 @@ func CreateIndex(path string, keyExpr string, keyLen int, unique bool, descend b maxItem := calculateMaxItems(itemSize) halfPage := maxItem / 2 + // Determine tree depth: if 3+ levels needed, use per-key insertion + // for correct B-tree structure. Bulk build only for ≤2 levels. + nLeafPages := (len(keys) + maxItem - 1) / maxItem + if nLeafPages == 0 { + nLeafPages = 1 + } + use3LevelFallback := nLeafPages > maxItem+1 + // Bulk build: all pages in memory buffer, single write at end. - // Ported from rddfive/ntx_engine.c fa_ntx_create(). var buf pageBuffer buf.init() @@ -166,9 +173,63 @@ func CreateIndex(path string, keyExpr string, keyLen int, unique bool, descend b } f.Close() + + if use3LevelFallback && len(keys) > 0 { + // 3+ level tree: rebuild using per-key insertion for correct B-tree + return rebuildWithInsert(path, keyExpr, keyLen, unique, descend, keys) + } + return OpenIndex(path) } +// rebuildWithInsert creates an NTX using per-key insertion (proper B-tree). +// Used for 3+ level trees where bulk build has separator duplication issues. +func rebuildWithInsert(path, keyExpr string, keyLen int, unique, descend bool, keys []KeyRecord) (*Index, error) { + itemSize := 8 + keyLen + maxItem := calculateMaxItems(itemSize) + halfPage := maxItem / 2 + + f, err := os.Create(path) + if err != nil { + return nil, err + } + + rootOff := int64(HeaderSize) + emptyRoot := [BlockSize]byte{} + initPageOffsets(emptyRoot[:], maxItem, itemSize) + + hdr := Header{ + Type: 0x0401, Version: 1, + Root: uint32(rootOff), NextPage: uint32(rootOff + BlockSize), + ItemSize: uint16(itemSize), KeySize: uint16(keyLen), + MaxItem: uint16(maxItem), HalfPage: uint16(halfPage), + } + copy(hdr.KeyExpr[:], keyExpr) + if unique { + hdr.Unique = 1 + } + if descend { + hdr.Descend = 1 + } + WriteHeader(f, &hdr) + f.WriteAt(emptyRoot[:], rootOff) + f.Close() + + idx, err := OpenIndex(path) + if err != nil { + return nil, err + } + for _, kr := range keys { + k := make([]byte, keyLen) + copy(k, kr.Key) + if err := idx.insertKeyBTree(k, kr.RecNo); err != nil { + idx.Close() + return nil, err + } + } + return idx, nil +} + // --- Bulk build buffer (ported from rddfive/ntx_engine.c) --- type pageBuffer struct {