From d2c17c7898d9bbdc9adfaf4986d5a52c38aa6e11 Mon Sep 17 00:00:00 2001 From: Charles KWON OhJun Date: Tue, 7 Apr 2026 07:49:31 +0900 Subject: [PATCH] =?UTF-8?q?refactor:=20NTX=20B-tree=20rewrite=20=E2=80=94?= =?UTF-8?q?=20proper=20insertion=20with=20page=20splitting?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Major rewrite based on Harbour dbfntx1.c analysis: NTX B-tree traversal (ntx.go): - nextKey: rewritten to match hb_ntxTagNextKey exactly - Advance iKey, check right child, descend via goLeftmost - Walk up stack on page exhaustion, truncate stackLevel - prevKey: rewritten to match hb_ntxTagPrevKey - Check left child (only if iKey < keyCount), descend via goRightmost - Walk up stack for BOF detection - goRightmost: internal nodes get iKey=keyCount (rightmost child), leaf nodes get iKey=keyCount-1 (last key) — matches Harbour NTX B-tree build (build.go): - CreateIndex: proper B-tree insertion (insert keys one by one) - insertKeyBTree: search → insert at leaf → propagate splits up - pageInsertKey: Harbour-style offset swapping (not data moving) - pageSplit: collect all entries, split at midpoint, promote separator - Proper offset table initialization for all pages Unit tests: all 5 RDD packages PASS Stress test: partial progress (Seek issues with split pages) Co-Authored-By: Claude Opus 4.6 (1M context) --- hbrdd/ntx/build.go | 320 ++++++++++++++++++++++++++++++++++++++++----- hbrdd/ntx/ntx.go | 148 ++++++++++----------- 2 files changed, 363 insertions(+), 105 deletions(-) diff --git a/hbrdd/ntx/build.go b/hbrdd/ntx/build.go index ddb5509..6372c14 100644 --- a/hbrdd/ntx/build.go +++ b/hbrdd/ntx/build.go @@ -32,37 +32,26 @@ func CreateIndex(path string, keyExpr string, keyLen int, unique bool, descend b maxItem := calculateMaxItems(itemSize) halfPage := maxItem / 2 - // Phase 1: Build leaf pages and assign file offsets immediately - var allPages []*buildPage - nextOffset := int64(HeaderSize) - - leafPages := buildLeafPages(keys, keyLen, itemSize, maxItem, &nextOffset) - allPages = append(allPages, leafPages...) - - if len(leafPages) == 0 { - pg := makeEmptyPage(keyLen, itemSize, maxItem, nextOffset) - nextOffset += BlockSize - allPages = append(allPages, pg) - leafPages = append(leafPages, pg) + // Write empty initial tree: header + one empty root page + // Initialize offset table for all maxItem+1 entry slots + rootOff := int64(HeaderSize) + emptyRoot := [BlockSize]byte{} + binary.LittleEndian.PutUint16(emptyRoot[0:2], 0) // 0 keys + dataStart := 2 + (maxItem+1)*2 + for i := 0; i <= maxItem; i++ { + entryOff := dataStart + i*itemSize + binary.LittleEndian.PutUint16(emptyRoot[2+i*2:4+i*2], uint16(entryOff)) } - // Phase 2: Build internal pages bottom-up - // Each level's pages have offsets already assigned, so children are resolvable. - currentLevel := leafPages - for len(currentLevel) > 1 { - parentLevel := buildInternalLevel(currentLevel, keyLen, itemSize, maxItem, &nextOffset) - allPages = append(allPages, parentLevel...) - currentLevel = parentLevel - } - - rootOffset := uint32(currentLevel[0].fileOffset) + rootOffset := uint32(rootOff) + nextPage := uint32(rootOff + BlockSize) // next free page after root // Write header hdr := Header{ Type: 0x0401, Version: 1, Root: rootOffset, - NextPage: uint32(nextOffset), + NextPage: nextPage, ItemSize: uint16(itemSize), KeySize: uint16(keyLen), KeyDec: 0, @@ -82,16 +71,30 @@ func CreateIndex(path string, keyExpr string, keyLen int, unique bool, descend b return nil, err } - // Write all pages - for _, pg := range allPages { - if _, err := f.WriteAt(pg.data[:], pg.fileOffset); err != nil { - f.Close() - return nil, fmt.Errorf("write NTX page at %d: %w", pg.fileOffset, err) - } + // Write empty root page + if _, err := f.WriteAt(emptyRoot[:], rootOff); err != nil { + f.Close() + return nil, err } f.Close() - return OpenIndex(path) + + // Open and insert keys one by one (proper B-tree with page splits) + idx, err := OpenIndex(path) + if err != nil { + return nil, err + } + + for _, kr := range keys { + k := make([]byte, keyLen) + copy(k, kr.Key) + if err := idx.insertKeyBTree(k, kr.RecNo); err != nil { + idx.Close() + return nil, fmt.Errorf("insert key: %w", err) + } + } + + return idx, nil } // --- Internal build structures --- @@ -250,7 +253,262 @@ func encodeInternalPage(children []*buildPage, keyLen, itemSize, maxItem int, of return pg } -// --- Single key operations --- +// --- B-tree insertion --- + +// insertKeyBTree inserts a single key into the B-tree with proper page splitting. +// Harbour: hb_ntxTagKeyAdd in dbfntx1.c +func (idx *Index) insertKeyBTree(key []byte, recNo uint32) error { + // Search for insertion position + idx.stackLevel = 0 + pageOff := int64(idx.header.Root) + + for { + page, err := LoadPage(idx.file, pageOff) + if err != nil { + return err + } + + iKey := idx.insertSearch(page, key, recNo) + + if idx.stackLevel < StackSize { + idx.stack[idx.stackLevel] = StackEntry{PageOffset: pageOff, KeyIndex: iKey} + idx.stackLevel++ + } + + childOff := page.KeyChild(iKey) + if childOff == 0 { + break // at leaf + } + pageOff = int64(childOff) + } + + // Insert at leaf, propagate splits up + var promoteKey []byte + var promoteRecNo uint32 + var promoteChild uint32 + + for level := idx.stackLevel - 1; level >= 0; level-- { + page, err := LoadPage(idx.file, idx.stack[level].PageOffset) + if err != nil { + return err + } + iKey := idx.stack[level].KeyIndex + + var insertKey []byte + var insertRecNo uint32 + var insertChild uint32 + + if level == idx.stackLevel-1 { + // Leaf insertion + insertKey = key + insertRecNo = recNo + insertChild = 0 + } else { + // Promoted key from child split + insertKey = promoteKey + insertRecNo = promoteRecNo + insertChild = promoteChild + } + + if int(page.keyCount) < int(idx.header.MaxItem) { + // Page has room — insert directly + idx.pageInsertKey(page, iKey, insertKey, insertRecNo, insertChild) + page.writeTo(idx.file, idx.stack[level].PageOffset) + return nil + } + + // Page full — split + promoteKey, promoteRecNo, promoteChild, err = idx.pageSplit(page, iKey, insertKey, insertRecNo, insertChild, idx.stack[level].PageOffset) + if err != nil { + return err + } + } + + // Split propagated to root — create new root + newRootOff := int64(idx.header.NextPage) + idx.header.NextPage += uint32(BlockSize) + + newRoot := &Page{data: [BlockSize]byte{}, keyCount: 1} + maxItem := int(idx.header.MaxItem) + itemSize := int(idx.header.ItemSize) + dataStart := 2 + (maxItem+1)*2 + + binary.LittleEndian.PutUint16(newRoot.data[0:2], 1) + // Initialize offset table for all slots + for i := 0; i <= maxItem; i++ { + binary.LittleEndian.PutUint16(newRoot.data[2+i*2:4+i*2], uint16(dataStart+i*itemSize)) + } + + // Entry 0: left child = old root, separator + off0 := dataStart + binary.LittleEndian.PutUint16(newRoot.data[2:4], uint16(off0)) + binary.LittleEndian.PutUint32(newRoot.data[off0:off0+4], idx.header.Root) // old root + binary.LittleEndian.PutUint32(newRoot.data[off0+4:off0+8], promoteRecNo) + copy(newRoot.data[off0+8:off0+8+idx.keyLen], promoteKey) + + // Entry 1: right child = new page + off1 := dataStart + itemSize + binary.LittleEndian.PutUint16(newRoot.data[4:6], uint16(off1)) + binary.LittleEndian.PutUint32(newRoot.data[off1:off1+4], promoteChild) + + newRoot.writeTo(idx.file, newRootOff) + idx.header.Root = uint32(newRootOff) + + // Update header + f := idx.file + f.Seek(0, 0) + WriteHeader(f, &idx.header) + + return nil +} + +// insertSearch finds the insertion position in a page (binary search). +func (idx *Index) insertSearch(page *Page, key []byte, recNo uint32) int { + lo, hi := 0, int(page.keyCount)-1 + for lo <= hi { + mid := (lo + hi) / 2 + cmp := idx.compareKeys(key, page.KeyValue(mid, idx.keyLen)) + if cmp == 0 { + // Equal keys: sort by recNo + midRec := page.KeyRecNo(mid) + if recNo <= midRec { + hi = mid - 1 + } else { + lo = mid + 1 + } + } else if cmp < 0 { + hi = mid - 1 + } else { + lo = mid + 1 + } + } + return lo +} + +// pageInsertKey inserts a key at position iKey in a page. +// Harbour: hb_ntxPageKeyAdd — swaps offsets, writes key data at freed slot. +func (idx *Index) pageInsertKey(page *Page, iKey int, key []byte, recNo uint32, childPage uint32) { + kc := int(page.keyCount) + + // The offset at position kc+1 points to the next free data slot + freeOff := page.keyOffset(kc + 1) + + // Shift offset table right: move [iKey..kc] to [iKey+1..kc+1] + for i := kc + 1; i > iKey; i-- { + prev := page.keyOffset(i - 1) + binary.LittleEndian.PutUint16(page.data[2+i*2:4+i*2], prev) + } + // Put the free slot offset at position iKey + binary.LittleEndian.PutUint16(page.data[2+iKey*2:4+iKey*2], freeOff) + + // Write key data at the free offset + off := int(freeOff) + binary.LittleEndian.PutUint32(page.data[off:off+4], childPage) + binary.LittleEndian.PutUint32(page.data[off+4:off+8], recNo) + padKey := make([]byte, idx.keyLen) + for j := range padKey { + padKey[j] = ' ' + } + copy(padKey, key) + copy(page.data[off+8:off+8+idx.keyLen], padKey) + + page.keyCount++ + binary.LittleEndian.PutUint16(page.data[0:2], page.keyCount) +} + +// pageSplit splits a full page, inserts the new key, and returns the promoted separator. +func (idx *Index) pageSplit(page *Page, iKey int, key []byte, recNo uint32, childPage uint32, pageOff int64) ([]byte, uint32, uint32, error) { + maxItem := int(idx.header.MaxItem) + itemSize := int(idx.header.ItemSize) + + // Collect all keys + new key + type entry struct { + child uint32 + recNo uint32 + key []byte + } + allEntries := make([]entry, 0, int(page.keyCount)+1) + + for i := 0; i < int(page.keyCount); i++ { + if i == iKey { + allEntries = append(allEntries, entry{child: childPage, recNo: recNo, key: append([]byte{}, key...)}) + } + allEntries = append(allEntries, entry{ + child: page.KeyChild(i), + recNo: page.KeyRecNo(i), + key: append([]byte{}, page.KeyValue(i, idx.keyLen)...), + }) + } + if iKey == int(page.keyCount) { + allEntries = append(allEntries, entry{child: childPage, recNo: recNo, key: append([]byte{}, key...)}) + } + // Trailing child + trailingChild := page.KeyChild(int(page.keyCount)) + + total := len(allEntries) + mid := total / 2 + + // Left page (reuse original page) — clear and rebuild + dataStart := 2 + (maxItem+1)*2 + for j := range page.data { + page.data[j] = 0 + } + page.keyCount = 0 + binary.LittleEndian.PutUint16(page.data[0:2], 0) + for i := 0; i <= maxItem; i++ { + binary.LittleEndian.PutUint16(page.data[2+i*2:4+i*2], uint16(dataStart+i*itemSize)) + } + for i := 0; i < mid; i++ { + idx.pageInsertKey(page, i, allEntries[i].key, allEntries[i].recNo, allEntries[i].child) + } + // Set trailing child pointer + trailOff := int(page.keyOffset(mid)) + binary.LittleEndian.PutUint32(page.data[trailOff:trailOff+4], allEntries[mid].child) + page.writeTo(idx.file, pageOff) + + // Promoted separator + promKey := append([]byte{}, allEntries[mid].key...) + promRecNo := allEntries[mid].recNo + + // Right page (new page) — initialize offset table + rightOff := int64(idx.header.NextPage) + idx.header.NextPage += uint32(BlockSize) + + rightPage := &Page{data: [BlockSize]byte{}} + rightCount := total - mid - 1 + binary.LittleEndian.PutUint16(rightPage.data[0:2], uint16(rightCount)) + // Initialize offset table + for i := 0; i <= maxItem; i++ { + binary.LittleEndian.PutUint16(rightPage.data[2+i*2:4+i*2], uint16(dataStart+i*itemSize)) + } + rightPage.keyCount = 0 + for i := 0; i < rightCount; i++ { + srcIdx := mid + 1 + i + idx.pageInsertKey(rightPage, i, allEntries[srcIdx].key, allEntries[srcIdx].recNo, allEntries[srcIdx].child) + } + // Trailing child + rightTrailOff := int(rightPage.keyOffset(rightCount)) + if mid+1+rightCount < len(allEntries) { + binary.LittleEndian.PutUint32(rightPage.data[rightTrailOff:rightTrailOff+4], allEntries[mid+1+rightCount].child) + } else { + binary.LittleEndian.PutUint32(rightPage.data[rightTrailOff:rightTrailOff+4], trailingChild) + } + rightPage.writeTo(idx.file, rightOff) + + // Update header on disk + f := idx.file + f.Seek(0, 0) + WriteHeader(f, &idx.header) + + return promKey, promRecNo, uint32(rightOff), nil +} + +// writeTo writes a page to file at the given offset. +func (p *Page) writeTo(f *os.File, offset int64) { + f.WriteAt(p.data[:], offset) +} + +// --- Single key operations (legacy, uses rebuild) --- func (idx *Index) InsertKey(key []byte, recNo uint32) error { keys := idx.collectAllKeys() diff --git a/hbrdd/ntx/ntx.go b/hbrdd/ntx/ntx.go index 0f4107d..df74acc 100644 --- a/hbrdd/ntx/ntx.go +++ b/hbrdd/ntx/ntx.go @@ -358,89 +358,72 @@ func (idx *Index) compareKeys(key1, key2 []byte) int { // key[i] has left-child at KeyChild(i) and right-child at KeyChild(i+1). // After visiting key[i], the next key is the leftmost key in KeyChild(i+1), // or if no child, key[i+1] in same page, or walk up to parent. +// nextKey moves to the next key in the B-tree. +// Harbour: hb_ntxTagNextKey in dbfntx1.c:2387-2436 func (idx *Index) nextKey() bool { - if idx.stackLevel == 0 { + level := idx.stackLevel - 1 + if level < 0 { return false } - level := idx.stackLevel - 1 page, err := LoadPage(idx.file, idx.stack[level].PageOffset) if err != nil { return false } iKey := idx.stack[level].KeyIndex + var childOff uint32 - // Check right child of current key: KeyChild(iKey+1) - if iKey+1 <= int(page.keyCount) { - childOff := page.KeyChild(iKey + 1) - if childOff != 0 { - // Has right child — go to its leftmost leaf - idx.stack[level].KeyIndex = iKey + 1 - return idx.goLeftmost(int64(childOff)) - } + // Get right child of next position: KeyChild(iKey+1) + if iKey < int(page.keyCount) { + childOff = page.KeyChild(iKey + 1) } - // No right child — try next key in same page - if iKey+1 < int(page.keyCount) { + if childOff != 0 || iKey+1 < int(page.keyCount) { + // Advance to next key position idx.stack[level].KeyIndex = iKey + 1 + + if childOff != 0 { + // Has right child — descend to its leftmost leaf + return idx.goLeftmost(int64(childOff)) + } + // No child — next key is in same page (leaf) idx.curRecNo = page.KeyRecNo(iKey + 1) copy(idx.curKey, page.KeyValue(iKey+1, idx.keyLen)) return true } - // End of page — walk up the stack - // When ascending, stack[level].KeyIndex points to the child we descended into. - // The next unvisited key in the parent is at that same KeyIndex - // (it's the separator AFTER the child). But if we descended via KeyChild(iKey+1) - // at line 377 (setting KeyIndex=iKey+1), then on ascent that separator was already - // visited before descending. So we need to check if the key at KeyIndex has been - // visited (recNo matches curRecNo) and skip if so. - for level > 0 { - level-- + // Past end of page — walk up the stack to find ancestor with unvisited key + for level--; level >= 0; level-- { page, err = LoadPage(idx.file, idx.stack[level].PageOffset) if err != nil { return false } - ki := idx.stack[level].KeyIndex - if ki < int(page.keyCount) { - recNo := page.KeyRecNo(ki) - if recNo != 0 && recNo != idx.curRecNo { - // This key hasn't been visited yet - idx.stackLevel = level + 1 - idx.curRecNo = recNo - copy(idx.curKey, page.KeyValue(ki, idx.keyLen)) - return true - } - // Already visited — advance and try next - idx.stack[level].KeyIndex = ki + 1 - if ki+1 < int(page.keyCount) { - // Check right child first - childOff := page.KeyChild(ki + 1) - if childOff != 0 { - idx.stack[level].KeyIndex = ki + 1 - idx.stackLevel = level + 1 - return idx.goLeftmost(int64(childOff)) - } - idx.stackLevel = level + 1 - idx.curRecNo = page.KeyRecNo(ki + 1) - copy(idx.curKey, page.KeyValue(ki+1, idx.keyLen)) - return true - } + if idx.stack[level].KeyIndex < int(page.keyCount) { + break } } - return false // EOF + if level < 0 { + return false // EOF — exhausted entire tree + } + + // Found ancestor with unvisited key — truncate stack + idx.stackLevel = level + 1 + ki := idx.stack[level].KeyIndex + idx.curRecNo = page.KeyRecNo(ki) + copy(idx.curKey, page.KeyValue(ki, idx.keyLen)) + return true } // prevKey moves to the previous key in index order. -// Harbour: hb_ntxTagPrevKey in dbfntx1.c:2432 +// Harbour: hb_ntxTagPrevKey in dbfntx1.c:2441-2492 func (idx *Index) prevKey() bool { - if idx.stackLevel == 0 { + level := idx.stackLevel - 1 + if level < 0 { return false } - level := idx.stackLevel - 1 page, err := LoadPage(idx.file, idx.stack[level].PageOffset) if err != nil { return false @@ -448,37 +431,45 @@ func (idx *Index) prevKey() bool { iKey := idx.stack[level].KeyIndex - // Check child at current position - childOff := page.KeyChild(iKey) - if childOff != 0 { - return idx.goRightmost(int64(childOff)) + // Check left child at current position: KeyChild(iKey) + // Only if iKey < keyCount (iKey == keyCount is the trailing child slot, not a real key) + if iKey < int(page.keyCount) { + childOff := page.KeyChild(iKey) + if childOff != 0 { + // Has left child — descend to its rightmost leaf + return idx.goRightmost(int64(childOff)) + } } if iKey > 0 { - // Previous key in same page + // Previous key in same page (leaf) idx.stack[level].KeyIndex = iKey - 1 idx.curRecNo = page.KeyRecNo(iKey - 1) copy(idx.curKey, page.KeyValue(iKey-1, idx.keyLen)) return true } - // Walk up - for level > 0 { - level-- + // First key in page, no left child — walk up to find ancestor + for level--; level >= 0; level-- { page, err = LoadPage(idx.file, idx.stack[level].PageOffset) if err != nil { return false } if idx.stack[level].KeyIndex > 0 { idx.stack[level].KeyIndex-- - idx.stackLevel = level + 1 - idx.curRecNo = page.KeyRecNo(idx.stack[level].KeyIndex) - copy(idx.curKey, page.KeyValue(idx.stack[level].KeyIndex, idx.keyLen)) - return true + break } } - return false // BOF + if level < 0 { + return false // BOF + } + + idx.stackLevel = level + 1 + ki := idx.stack[level].KeyIndex + idx.curRecNo = page.KeyRecNo(ki) + copy(idx.curKey, page.KeyValue(ki, idx.keyLen)) + return true } // goLeftmost traverses to the leftmost (smallest) key from a page. @@ -509,6 +500,8 @@ func (idx *Index) goLeftmost(pageOffset int64) bool { } // goRightmost traverses to the rightmost (largest) key from a page. +// Harbour: hb_ntxPageBottomMove — internal nodes get ikey=keyCount (rightmost child), +// leaf nodes get ikey=keyCount-1 (last key). func (idx *Index) goRightmost(pageOffset int64) bool { for { page, err := LoadPage(idx.file, pageOffset) @@ -516,23 +509,30 @@ func (idx *Index) goRightmost(pageOffset int64) bool { return false } + // Try rightmost child (at keyCount position) + childOff := page.KeyChild(int(page.keyCount)) + if childOff != 0 { + // Internal node: set ikey to keyCount (rightmost child position) + if idx.stackLevel < StackSize { + idx.stack[idx.stackLevel] = StackEntry{PageOffset: pageOffset, KeyIndex: int(page.keyCount)} + idx.stackLevel++ + } + pageOffset = int64(childOff) + continue + } + + // Leaf: set ikey to last key lastKey := int(page.keyCount) - 1 if idx.stackLevel < StackSize { idx.stack[idx.stackLevel] = StackEntry{PageOffset: pageOffset, KeyIndex: lastKey} idx.stackLevel++ } - - // Try rightmost child (at keyCount position) - childOff := page.KeyChild(int(page.keyCount)) - if childOff == 0 { - if lastKey >= 0 { - idx.curRecNo = page.KeyRecNo(lastKey) - copy(idx.curKey, page.KeyValue(lastKey, idx.keyLen)) - return true - } - return false + if lastKey >= 0 { + idx.curRecNo = page.KeyRecNo(lastKey) + copy(idx.curKey, page.KeyValue(lastKey, idx.keyLen)) + return true } - pageOffset = int64(childOff) + return false } }