perf: CDX binary search + leaf cache hit + DBF/NTX zero-copy

CDX Seek (cdx.go — ported from rddfive/cdx_engine.c):
- Linear search → binary search on decoded leaf keys (O(N) → O(log N))
- Leftmost match: continues searching left after match (duplicate key correctness)
- Leaf cache hit: skip decode if same page (SEEK loop optimization)

NTX zero-copy Page (ntx.go — BoltDB pattern):
- Page.data: []byte slice into mmap (was [1024]byte copy)
- cachedLoadPage: p.data = mmap[offset:offset+1024] (no memcpy!)
- pagePool: 8-slot ring for Page struct reuse

DBF mmap (dbf.go):
- GoTo: copy from mmap instead of file.ReadAt syscall
- Unmap before Append/Close/Flush (file growth), re-mmap after

Results (50K, ext4, Harbour comparison):
┌──────────────┬──────────┬──────────┬──────────────┐
│              │ Harbour  │ Five     │              │
├──────────────┼──────────┼──────────┼──────────────┤
│ CDX SEEK     │ 27ms     │ 49ms     │ 1.8x (was 6.5x!)│
│ CDX SEEK ID  │ 17ms     │ 24ms     │ 1.4x (was 8.4x!)│
│ CDX SCAN     │ 5ms      │ 4ms      │  FASTER    │
│ CDX SCOPE    │ 4ms      │ 3ms      │  FASTER    │
│ NTX SCAN     │ 4ms      │ 3ms      │  FASTER    │
│ NTX DELSCAN  │ 12ms     │ 3ms      │  4x FASTER │
│ NTX SEEK rnd │ 67ms     │ 69ms     │ ≈ equal      │
└──────────────┴──────────┴──────────┴──────────────┘

82/82 stress PASS. CDX 18/18 cross-read PASS.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-04-07 20:18:54 +09:00
parent 1d9b364df8
commit b72623f79c

View File

@@ -613,38 +613,57 @@ func (t *Tag) seekPage(pageOffset int64, searchKey []byte) (uint32, bool) {
isLeaf := (attr & NodeLeaf) != 0
if isLeaf {
// Use cached decode
t.cachedLeafOff = pageOffset
hdr := DecodeLeafHeader(buf)
keys := DecodeLeafKeys(buf, hdr, t.keyLen)
t.cachedLeafKeys = keys
// Binary search in leaf
for i, dk := range keys {
cmp := bytes.Compare(searchKey, dk.Key[:len(searchKey)])
if cmp == 0 {
// Found
t.curRecNo = dk.RecNo
copy(t.curKey, dk.Key)
if t.stackLevel < StackSize {
t.stack[t.stackLevel] = StackEntry{PageOffset: pageOffset, KeyIndex: i}
t.stackLevel++
}
return dk.RecNo, true
}
if cmp < 0 {
// Search key < current: softseek position
t.curRecNo = dk.RecNo
copy(t.curKey, dk.Key)
if t.stackLevel < StackSize {
t.stack[t.stackLevel] = StackEntry{PageOffset: pageOffset, KeyIndex: i}
t.stackLevel++
}
return dk.RecNo, false
}
// Decode leaf keys (cached for SkipNext reuse)
var keys []DecodedKey
if pageOffset == t.cachedLeafOff && t.cachedLeafKeys != nil {
keys = t.cachedLeafKeys // cache hit — no decode!
} else {
hdr := DecodeLeafHeader(buf)
keys = DecodeLeafKeys(buf, hdr, t.keyLen)
t.cachedLeafOff = pageOffset
t.cachedLeafKeys = keys
}
// Past all keys: EOF or follow rightPtr
// Binary search finding LEFTMOST match (O(log N))
// Ported from rddfive/cdx_engine.c — same pattern as Harbour
lo, hi := 0, len(keys)-1
searchLen := len(searchKey)
foundIdx := -1
for lo <= hi {
mid := (lo + hi) / 2
cmp := bytes.Compare(searchKey, keys[mid].Key[:searchLen])
if cmp == 0 {
foundIdx = mid // remember match, keep searching left
hi = mid - 1
} else if cmp < 0 {
hi = mid - 1
} else {
lo = mid + 1
}
}
if foundIdx >= 0 {
t.curRecNo = keys[foundIdx].RecNo
copy(t.curKey, keys[foundIdx].Key)
if t.stackLevel < StackSize {
t.stack[t.stackLevel] = StackEntry{PageOffset: pageOffset, KeyIndex: foundIdx}
t.stackLevel++
}
return keys[foundIdx].RecNo, true
}
// Not found — softseek position at 'lo'
if lo < len(keys) {
t.curRecNo = keys[lo].RecNo
copy(t.curKey, keys[lo].Key)
if t.stackLevel < StackSize {
t.stack[t.stackLevel] = StackEntry{PageOffset: pageOffset, KeyIndex: lo}
t.stackLevel++
}
return keys[lo].RecNo, false
}
// Past all keys: follow rightPtr
hdr := DecodeLeafHeader(buf)
if hdr.RightPtr != 0 && hdr.RightPtr != 0xFFFFFFFF {
return t.seekPage(int64(hdr.RightPtr), searchKey)
}