diff --git a/hbrdd/ntx/build.go b/hbrdd/ntx/build.go index 93412a0..cc581ab 100644 --- a/hbrdd/ntx/build.go +++ b/hbrdd/ntx/build.go @@ -33,109 +33,20 @@ func CreateIndex(path string, keyExpr string, keyLen int, unique bool, descend b maxItem := calculateMaxItems(itemSize) halfPage := maxItem / 2 - // Determine tree depth: if 3+ levels needed, use per-key insertion - // for correct B-tree structure. Bulk build only for ≤2 levels. - nLeafPages := (len(keys) + maxItem - 1) / maxItem - if nLeafPages == 0 { - nLeafPages = 1 - } - use3LevelFallback := nLeafPages > maxItem+1 - - // Bulk build: all pages in memory buffer, single write at end. + // Proper B-tree bulk build — works for any depth. + // Strategy: split sorted keys into groups of maxItem with separators between. + // keys[0..M-1] → leaf, keys[M] → separator, keys[M+1..2M] → leaf, ... + // Each separator exists ONLY in the parent, never in a leaf. var buf pageBuffer buf.init() - var rootOffset uint32 if len(keys) == 0 { - // Empty index: single empty root page off := buf.allocPage() initPageOffsets(buf.getPage(off), maxItem, itemSize) rootOffset = uint32(off) } else { - // Phase 1: Build leaf pages in memory - var leafOffsets []int64 - keyIdx := 0 - for keyIdx < len(keys) { - off := buf.allocPage() - pg := buf.getPage(off) - initPageOffsets(pg, maxItem, itemSize) - - cnt := 0 - for cnt < maxItem && keyIdx < len(keys) { - entOff := int(binary.LittleEndian.Uint16(pg[2+cnt*2 : 4+cnt*2])) - binary.LittleEndian.PutUint32(pg[entOff:entOff+4], 0) // child=0 (leaf) - binary.LittleEndian.PutUint32(pg[entOff+4:entOff+8], keys[keyIdx].RecNo) - padded := make([]byte, keyLen) - for j := range padded { - padded[j] = ' ' - } - copy(padded, keys[keyIdx].Key) - copy(pg[entOff+8:entOff+8+keyLen], padded) - cnt++ - keyIdx++ - } - binary.LittleEndian.PutUint16(pg[0:2], uint16(cnt)) - leafOffsets = append(leafOffsets, off) - } - - // Phase 2: Build interior levels bottom-up (B-tree: promote last key from child) - curLevel := leafOffsets - isLeafLevel := true - for len(curLevel) > 1 { - var nextLevel []int64 - for i := 0; i < len(curLevel); i += maxItem + 1 { - end := i + maxItem + 1 - if end > len(curLevel) { - end = len(curLevel) - } - children := curLevel[i:end] - nKeys := len(children) - 1 - - // Promote: extract last key from each child[0..nKeys-1] as separator - // Only remove from LEAF pages (interior keys are already promoted separators) - type sepInfo struct { - recNo uint32 - key []byte - } - seps := make([]sepInfo, nKeys) - for j := 0; j < nKeys; j++ { - childPg := buf.getPage(children[j]) - childCnt := int(binary.LittleEndian.Uint16(childPg[0:2])) - if childCnt > 0 { - lastIdx := childCnt - 1 - lastOff := int(binary.LittleEndian.Uint16(childPg[2+lastIdx*2 : 4+lastIdx*2])) - seps[j].recNo = binary.LittleEndian.Uint32(childPg[lastOff+4 : lastOff+8]) - seps[j].key = make([]byte, keyLen) - copy(seps[j].key, childPg[lastOff+8:lastOff+8+keyLen]) - // Remove from leaf only (interior separators stay as routing keys) - if isLeafLevel { - binary.LittleEndian.PutUint16(childPg[0:2], uint16(childCnt-1)) - } - } - } - - off := buf.allocPage() - pg := buf.getPage(off) - initPageOffsets(pg, maxItem, itemSize) - binary.LittleEndian.PutUint16(pg[0:2], uint16(nKeys)) - - for j := 0; j <= nKeys; j++ { - entOff := int(binary.LittleEndian.Uint16(pg[2+j*2 : 4+j*2])) - // Child pointer - binary.LittleEndian.PutUint32(pg[entOff:entOff+4], uint32(children[j])) - if j < nKeys { - // Separator from promoted keys - binary.LittleEndian.PutUint32(pg[entOff+4:entOff+8], seps[j].recNo) - copy(pg[entOff+8:entOff+8+keyLen], seps[j].key) - } - } - nextLevel = append(nextLevel, off) - } - curLevel = nextLevel - isLeafLevel = false - } - rootOffset = uint32(curLevel[0]) + rootOffset = uint32(bulkBuildBTree(&buf, keys, keyLen, maxItem, itemSize)) } nextPage := uint32(int64(HeaderSize) + int64(buf.count)*BlockSize) @@ -174,13 +85,131 @@ func CreateIndex(path string, keyExpr string, keyLen int, unique bool, descend b } f.Close() + return OpenIndex(path) +} - if use3LevelFallback && len(keys) > 0 { - // 3+ level tree: rebuild using per-key insertion for correct B-tree - return rebuildWithInsert(path, keyExpr, keyLen, unique, descend, keys) +// bulkBuildBTree builds a proper B-tree in memory from sorted keys. +// Returns the file offset of the root page. +// Algorithm: distribute keys into leaf groups with separators extracted between them. +// [leaf0: M keys] [sep0] [leaf1: M keys] [sep1] ... [leafN: remaining keys] +// Then recursively build interior level from (leaf offsets + separator keys). +func bulkBuildBTree(buf *pageBuffer, keys []KeyRecord, keyLen, maxItem, itemSize int) int64 { + if len(keys) <= maxItem { + // Single leaf — base case + return buildOnePage(buf, keys, keyLen, maxItem, itemSize, nil) } - return OpenIndex(path) + // Split keys into leaf groups + separators + type childInfo struct { + offset int64 + sepKey []byte // separator AFTER this child (nil for last) + sepRec uint32 + } + var children []childInfo + i := 0 + for i < len(keys) { + end := i + maxItem + if end > len(keys) { + end = len(keys) + } + // If exactly 1 key would remain after separator, absorb it into this leaf + if end < len(keys) && end+2 > len(keys) { + end = len(keys) // take all remaining + } + chunk := keys[i:end] + leafOff := buildOnePage(buf, chunk, keyLen, maxItem, itemSize, nil) + ci := childInfo{offset: leafOff} + i = end + + // Extract separator only if 2+ keys remain (1 for sep + 1+ for next leaf) + if i < len(keys) && i+1 < len(keys) { + // At least 1 more key after separator → safe to promote + ci.sepKey = make([]byte, keyLen) + padCopy(ci.sepKey, keys[i].Key, keyLen) + ci.sepRec = keys[i].RecNo + i++ // skip separator key — it goes to parent only + } + children = append(children, ci) + } + + // Build interior levels bottom-up + for len(children) > 1 { + var nextChildren []childInfo + j := 0 + for j < len(children) { + // Collect up to maxItem+1 children for one parent page + end := j + maxItem + 1 + if end > len(children) { + end = len(children) + } + group := children[j:end] + + // Build parent page: separators come from group[0..n-2].sepKey + nKeys := len(group) - 1 + parentOff := buf.allocPage() + pg := buf.getPage(parentOff) + initPageOffsets(pg, maxItem, itemSize) + binary.LittleEndian.PutUint16(pg[0:2], uint16(nKeys)) + + for k := 0; k <= nKeys; k++ { + entOff := int(binary.LittleEndian.Uint16(pg[2+k*2 : 4+k*2])) + binary.LittleEndian.PutUint32(pg[entOff:entOff+4], uint32(group[k].offset)) + if k < nKeys && group[k].sepKey != nil { + binary.LittleEndian.PutUint32(pg[entOff+4:entOff+8], group[k].sepRec) + copy(pg[entOff+8:entOff+8+keyLen], group[k].sepKey) + } + } + + // This parent becomes a child for the next level + ci := childInfo{offset: parentOff} + // Separator for this parent = last group member's separator + // (the separator that would follow this parent's range) + if end < len(children) { + // Use the last child's separator as the parent's separator + ci.sepKey = group[nKeys].sepKey + ci.sepRec = group[nKeys].sepRec + } + nextChildren = append(nextChildren, ci) + j = end + } + children = nextChildren + } + + return children[0].offset +} + +// buildOnePage creates a single leaf or interior page with the given keys. +func buildOnePage(buf *pageBuffer, keys []KeyRecord, keyLen, maxItem, itemSize int, childOffsets []int64) int64 { + off := buf.allocPage() + pg := buf.getPage(off) + initPageOffsets(pg, maxItem, itemSize) + + for i, kr := range keys { + entOff := int(binary.LittleEndian.Uint16(pg[2+i*2 : 4+i*2])) + if childOffsets != nil && i < len(childOffsets) { + binary.LittleEndian.PutUint32(pg[entOff:entOff+4], uint32(childOffsets[i])) + } else { + binary.LittleEndian.PutUint32(pg[entOff:entOff+4], 0) // leaf + } + binary.LittleEndian.PutUint32(pg[entOff+4:entOff+8], kr.RecNo) + padded := make([]byte, keyLen) + padCopy(padded, kr.Key, keyLen) + copy(pg[entOff+8:entOff+8+keyLen], padded) + } + binary.LittleEndian.PutUint16(pg[0:2], uint16(len(keys))) + return off +} + +// padCopy copies src into dst, padding with spaces. +func padCopy(dst, src []byte, keyLen int) { + for i := range dst { + dst[i] = ' ' + } + n := len(src) + if n > keyLen { + n = keyLen + } + copy(dst[:n], src[:n]) } // rebuildWithInsert creates an NTX using per-key insertion (proper B-tree).