perf: proper 3-level bulk build — INDEX 50K: 180ms → 28ms (6.4x)
bulkBuildBTree: distributes sorted keys as [M leaf] [sep] [M leaf] [sep] ... - Separator exists ONLY in parent, never in leaf (proper B-tree) - Works for any depth (tested 10 to 50000 keys, all correct) - Edge case: absorb trailing 1-key into previous leaf Eliminated per-key insertion fallback (rebuildWithInsert). All sizes now use O(N) bulk build instead of O(N log N) insertion. Benchmark on ext4 (home dir): ┌──────────────┬──────────┬──────────┬───────┐ │ 50K Items │ Harbour │ Five │ Ratio │ ├──────────────┼──────────┼──────────┼───────┤ │ APPEND 50K │ 61ms │ 124ms │ 2x │ │ INDEX NAME │ 6ms │ 28ms │ 4.7x │ │ INDEX CITY │ 5ms │ 36ms │ 7.2x │ │ SEEK 50K seq │ 23ms │ 97ms │ 4.2x │ │ SEEK 50K rnd │ 63ms │ 122ms │ 1.9x │ │ SCAN 50K │ 5ms │ 24ms │ 4.8x │ │ DUPKEY 50K │ 23ms │ 38ms │ 1.7x │ │ PACK 50K │ 16ms │ 20ms │ 1.25x │ └──────────────┴──────────┴──────────┴───────┘ All counts correct: 50000/50000/40000 Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -33,109 +33,20 @@ func CreateIndex(path string, keyExpr string, keyLen int, unique bool, descend b
|
||||
maxItem := calculateMaxItems(itemSize)
|
||||
halfPage := maxItem / 2
|
||||
|
||||
// Determine tree depth: if 3+ levels needed, use per-key insertion
|
||||
// for correct B-tree structure. Bulk build only for ≤2 levels.
|
||||
nLeafPages := (len(keys) + maxItem - 1) / maxItem
|
||||
if nLeafPages == 0 {
|
||||
nLeafPages = 1
|
||||
}
|
||||
use3LevelFallback := nLeafPages > maxItem+1
|
||||
|
||||
// Bulk build: all pages in memory buffer, single write at end.
|
||||
// Proper B-tree bulk build — works for any depth.
|
||||
// Strategy: split sorted keys into groups of maxItem with separators between.
|
||||
// keys[0..M-1] → leaf, keys[M] → separator, keys[M+1..2M] → leaf, ...
|
||||
// Each separator exists ONLY in the parent, never in a leaf.
|
||||
var buf pageBuffer
|
||||
buf.init()
|
||||
|
||||
var rootOffset uint32
|
||||
|
||||
if len(keys) == 0 {
|
||||
// Empty index: single empty root page
|
||||
off := buf.allocPage()
|
||||
initPageOffsets(buf.getPage(off), maxItem, itemSize)
|
||||
rootOffset = uint32(off)
|
||||
} else {
|
||||
// Phase 1: Build leaf pages in memory
|
||||
var leafOffsets []int64
|
||||
keyIdx := 0
|
||||
for keyIdx < len(keys) {
|
||||
off := buf.allocPage()
|
||||
pg := buf.getPage(off)
|
||||
initPageOffsets(pg, maxItem, itemSize)
|
||||
|
||||
cnt := 0
|
||||
for cnt < maxItem && keyIdx < len(keys) {
|
||||
entOff := int(binary.LittleEndian.Uint16(pg[2+cnt*2 : 4+cnt*2]))
|
||||
binary.LittleEndian.PutUint32(pg[entOff:entOff+4], 0) // child=0 (leaf)
|
||||
binary.LittleEndian.PutUint32(pg[entOff+4:entOff+8], keys[keyIdx].RecNo)
|
||||
padded := make([]byte, keyLen)
|
||||
for j := range padded {
|
||||
padded[j] = ' '
|
||||
}
|
||||
copy(padded, keys[keyIdx].Key)
|
||||
copy(pg[entOff+8:entOff+8+keyLen], padded)
|
||||
cnt++
|
||||
keyIdx++
|
||||
}
|
||||
binary.LittleEndian.PutUint16(pg[0:2], uint16(cnt))
|
||||
leafOffsets = append(leafOffsets, off)
|
||||
}
|
||||
|
||||
// Phase 2: Build interior levels bottom-up (B-tree: promote last key from child)
|
||||
curLevel := leafOffsets
|
||||
isLeafLevel := true
|
||||
for len(curLevel) > 1 {
|
||||
var nextLevel []int64
|
||||
for i := 0; i < len(curLevel); i += maxItem + 1 {
|
||||
end := i + maxItem + 1
|
||||
if end > len(curLevel) {
|
||||
end = len(curLevel)
|
||||
}
|
||||
children := curLevel[i:end]
|
||||
nKeys := len(children) - 1
|
||||
|
||||
// Promote: extract last key from each child[0..nKeys-1] as separator
|
||||
// Only remove from LEAF pages (interior keys are already promoted separators)
|
||||
type sepInfo struct {
|
||||
recNo uint32
|
||||
key []byte
|
||||
}
|
||||
seps := make([]sepInfo, nKeys)
|
||||
for j := 0; j < nKeys; j++ {
|
||||
childPg := buf.getPage(children[j])
|
||||
childCnt := int(binary.LittleEndian.Uint16(childPg[0:2]))
|
||||
if childCnt > 0 {
|
||||
lastIdx := childCnt - 1
|
||||
lastOff := int(binary.LittleEndian.Uint16(childPg[2+lastIdx*2 : 4+lastIdx*2]))
|
||||
seps[j].recNo = binary.LittleEndian.Uint32(childPg[lastOff+4 : lastOff+8])
|
||||
seps[j].key = make([]byte, keyLen)
|
||||
copy(seps[j].key, childPg[lastOff+8:lastOff+8+keyLen])
|
||||
// Remove from leaf only (interior separators stay as routing keys)
|
||||
if isLeafLevel {
|
||||
binary.LittleEndian.PutUint16(childPg[0:2], uint16(childCnt-1))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
off := buf.allocPage()
|
||||
pg := buf.getPage(off)
|
||||
initPageOffsets(pg, maxItem, itemSize)
|
||||
binary.LittleEndian.PutUint16(pg[0:2], uint16(nKeys))
|
||||
|
||||
for j := 0; j <= nKeys; j++ {
|
||||
entOff := int(binary.LittleEndian.Uint16(pg[2+j*2 : 4+j*2]))
|
||||
// Child pointer
|
||||
binary.LittleEndian.PutUint32(pg[entOff:entOff+4], uint32(children[j]))
|
||||
if j < nKeys {
|
||||
// Separator from promoted keys
|
||||
binary.LittleEndian.PutUint32(pg[entOff+4:entOff+8], seps[j].recNo)
|
||||
copy(pg[entOff+8:entOff+8+keyLen], seps[j].key)
|
||||
}
|
||||
}
|
||||
nextLevel = append(nextLevel, off)
|
||||
}
|
||||
curLevel = nextLevel
|
||||
isLeafLevel = false
|
||||
}
|
||||
rootOffset = uint32(curLevel[0])
|
||||
rootOffset = uint32(bulkBuildBTree(&buf, keys, keyLen, maxItem, itemSize))
|
||||
}
|
||||
|
||||
nextPage := uint32(int64(HeaderSize) + int64(buf.count)*BlockSize)
|
||||
@@ -174,13 +85,131 @@ func CreateIndex(path string, keyExpr string, keyLen int, unique bool, descend b
|
||||
}
|
||||
|
||||
f.Close()
|
||||
return OpenIndex(path)
|
||||
}
|
||||
|
||||
if use3LevelFallback && len(keys) > 0 {
|
||||
// 3+ level tree: rebuild using per-key insertion for correct B-tree
|
||||
return rebuildWithInsert(path, keyExpr, keyLen, unique, descend, keys)
|
||||
// bulkBuildBTree builds a proper B-tree in memory from sorted keys.
|
||||
// Returns the file offset of the root page.
|
||||
// Algorithm: distribute keys into leaf groups with separators extracted between them.
|
||||
// [leaf0: M keys] [sep0] [leaf1: M keys] [sep1] ... [leafN: remaining keys]
|
||||
// Then recursively build interior level from (leaf offsets + separator keys).
|
||||
func bulkBuildBTree(buf *pageBuffer, keys []KeyRecord, keyLen, maxItem, itemSize int) int64 {
|
||||
if len(keys) <= maxItem {
|
||||
// Single leaf — base case
|
||||
return buildOnePage(buf, keys, keyLen, maxItem, itemSize, nil)
|
||||
}
|
||||
|
||||
return OpenIndex(path)
|
||||
// Split keys into leaf groups + separators
|
||||
type childInfo struct {
|
||||
offset int64
|
||||
sepKey []byte // separator AFTER this child (nil for last)
|
||||
sepRec uint32
|
||||
}
|
||||
var children []childInfo
|
||||
i := 0
|
||||
for i < len(keys) {
|
||||
end := i + maxItem
|
||||
if end > len(keys) {
|
||||
end = len(keys)
|
||||
}
|
||||
// If exactly 1 key would remain after separator, absorb it into this leaf
|
||||
if end < len(keys) && end+2 > len(keys) {
|
||||
end = len(keys) // take all remaining
|
||||
}
|
||||
chunk := keys[i:end]
|
||||
leafOff := buildOnePage(buf, chunk, keyLen, maxItem, itemSize, nil)
|
||||
ci := childInfo{offset: leafOff}
|
||||
i = end
|
||||
|
||||
// Extract separator only if 2+ keys remain (1 for sep + 1+ for next leaf)
|
||||
if i < len(keys) && i+1 < len(keys) {
|
||||
// At least 1 more key after separator → safe to promote
|
||||
ci.sepKey = make([]byte, keyLen)
|
||||
padCopy(ci.sepKey, keys[i].Key, keyLen)
|
||||
ci.sepRec = keys[i].RecNo
|
||||
i++ // skip separator key — it goes to parent only
|
||||
}
|
||||
children = append(children, ci)
|
||||
}
|
||||
|
||||
// Build interior levels bottom-up
|
||||
for len(children) > 1 {
|
||||
var nextChildren []childInfo
|
||||
j := 0
|
||||
for j < len(children) {
|
||||
// Collect up to maxItem+1 children for one parent page
|
||||
end := j + maxItem + 1
|
||||
if end > len(children) {
|
||||
end = len(children)
|
||||
}
|
||||
group := children[j:end]
|
||||
|
||||
// Build parent page: separators come from group[0..n-2].sepKey
|
||||
nKeys := len(group) - 1
|
||||
parentOff := buf.allocPage()
|
||||
pg := buf.getPage(parentOff)
|
||||
initPageOffsets(pg, maxItem, itemSize)
|
||||
binary.LittleEndian.PutUint16(pg[0:2], uint16(nKeys))
|
||||
|
||||
for k := 0; k <= nKeys; k++ {
|
||||
entOff := int(binary.LittleEndian.Uint16(pg[2+k*2 : 4+k*2]))
|
||||
binary.LittleEndian.PutUint32(pg[entOff:entOff+4], uint32(group[k].offset))
|
||||
if k < nKeys && group[k].sepKey != nil {
|
||||
binary.LittleEndian.PutUint32(pg[entOff+4:entOff+8], group[k].sepRec)
|
||||
copy(pg[entOff+8:entOff+8+keyLen], group[k].sepKey)
|
||||
}
|
||||
}
|
||||
|
||||
// This parent becomes a child for the next level
|
||||
ci := childInfo{offset: parentOff}
|
||||
// Separator for this parent = last group member's separator
|
||||
// (the separator that would follow this parent's range)
|
||||
if end < len(children) {
|
||||
// Use the last child's separator as the parent's separator
|
||||
ci.sepKey = group[nKeys].sepKey
|
||||
ci.sepRec = group[nKeys].sepRec
|
||||
}
|
||||
nextChildren = append(nextChildren, ci)
|
||||
j = end
|
||||
}
|
||||
children = nextChildren
|
||||
}
|
||||
|
||||
return children[0].offset
|
||||
}
|
||||
|
||||
// buildOnePage creates a single leaf or interior page with the given keys.
|
||||
func buildOnePage(buf *pageBuffer, keys []KeyRecord, keyLen, maxItem, itemSize int, childOffsets []int64) int64 {
|
||||
off := buf.allocPage()
|
||||
pg := buf.getPage(off)
|
||||
initPageOffsets(pg, maxItem, itemSize)
|
||||
|
||||
for i, kr := range keys {
|
||||
entOff := int(binary.LittleEndian.Uint16(pg[2+i*2 : 4+i*2]))
|
||||
if childOffsets != nil && i < len(childOffsets) {
|
||||
binary.LittleEndian.PutUint32(pg[entOff:entOff+4], uint32(childOffsets[i]))
|
||||
} else {
|
||||
binary.LittleEndian.PutUint32(pg[entOff:entOff+4], 0) // leaf
|
||||
}
|
||||
binary.LittleEndian.PutUint32(pg[entOff+4:entOff+8], kr.RecNo)
|
||||
padded := make([]byte, keyLen)
|
||||
padCopy(padded, kr.Key, keyLen)
|
||||
copy(pg[entOff+8:entOff+8+keyLen], padded)
|
||||
}
|
||||
binary.LittleEndian.PutUint16(pg[0:2], uint16(len(keys)))
|
||||
return off
|
||||
}
|
||||
|
||||
// padCopy copies src into dst, padding with spaces.
|
||||
func padCopy(dst, src []byte, keyLen int) {
|
||||
for i := range dst {
|
||||
dst[i] = ' '
|
||||
}
|
||||
n := len(src)
|
||||
if n > keyLen {
|
||||
n = keyLen
|
||||
}
|
||||
copy(dst[:n], src[:n])
|
||||
}
|
||||
|
||||
// rebuildWithInsert creates an NTX using per-key insertion (proper B-tree).
|
||||
|
||||
Reference in New Issue
Block a user