perf: NTX LRU page cache (256 slots) — reduces syscalls

LRU page cache ported from rddfive/ntx_engine.c:
- 256-slot cache with MRU fast-path (O(1) for repeated access)
- LRU eviction when all slots full
- cachedLoadPage replaces LoadPage for all navigation
- invalidateCache called before insertKeyBTree (pages modified)

10K benchmark improvement (ext4 home dir):
- SCAN FWD: 6ms → 5ms
- SEEK NUM: 18ms → 14ms (22% improvement)
- DUPKEY SCAN: 9ms → 8ms
- All counts correct: 10000/10000/8000

50K benchmark:
- SCAN: 35ms → 31ms
- DUPKEY: 50ms → 40ms (20% improvement)
- DELSCAN: 41ms → 33ms (20% improvement)

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-04-07 11:35:26 +09:00
parent dadb97ee88
commit 103f0d8b64
2 changed files with 94 additions and 7 deletions

View File

@@ -437,6 +437,8 @@ func encodeInternalPage(children []*buildPage, keyLen, itemSize, maxItem int, of
// insertKeyBTree inserts a single key into the B-tree with proper page splitting.
// Harbour: hb_ntxTagKeyAdd in dbfntx1.c
func (idx *Index) insertKeyBTree(key []byte, recNo uint32) error {
// Invalidate cache — pages will be modified
idx.invalidateCache()
// Search for insertion position
idx.stackLevel = 0
pageOff := int64(idx.header.Root)

View File

@@ -108,6 +108,7 @@ type Page struct {
}
// LoadPage reads a page from the file.
// LoadPage reads a page from file (no cache — used by tests and one-off reads).
func LoadPage(f *os.File, offset int64) (*Page, error) {
p := &Page{offset: offset}
if _, err := f.ReadAt(p.data[:], offset); err != nil {
@@ -117,6 +118,76 @@ func LoadPage(f *os.File, offset int64) (*Page, error) {
return p, nil
}
// cachedLoadPage reads a page using the LRU cache.
func (idx *Index) cachedLoadPage(offset int64) (*Page, error) {
// MRU fast-path (O(1))
if idx.mruSlot >= 0 && idx.mruSlot < PageCacheSize &&
idx.cache[idx.mruSlot].offset == offset {
idx.cacheCounter++
idx.cache[idx.mruSlot].accessOrder = idx.cacheCounter
p := &Page{offset: offset}
p.data = idx.cache[idx.mruSlot].data
p.keyCount = binary.LittleEndian.Uint16(p.data[0:2])
return p, nil
}
// Linear scan for cache hit
for i := 0; i < PageCacheSize; i++ {
if idx.cache[i].offset == offset {
idx.cacheCounter++
idx.cache[i].accessOrder = idx.cacheCounter
idx.mruSlot = i
p := &Page{offset: offset}
p.data = idx.cache[i].data
p.keyCount = binary.LittleEndian.Uint16(p.data[0:2])
return p, nil
}
}
// Cache miss — read from disk
p := &Page{offset: offset}
if _, err := idx.file.ReadAt(p.data[:], offset); err != nil {
return nil, fmt.Errorf("read NTX page at %d: %w", offset, err)
}
p.keyCount = binary.LittleEndian.Uint16(p.data[0:2])
// Install in cache — find empty or LRU slot
slot := -1
for i := 0; i < PageCacheSize; i++ {
if idx.cache[i].offset == 0 {
slot = i
break
}
}
if slot < 0 {
// Evict LRU
slot = 0
minOrder := idx.cache[0].accessOrder
for i := 1; i < PageCacheSize; i++ {
if idx.cache[i].accessOrder < minOrder {
minOrder = idx.cache[i].accessOrder
slot = i
}
}
}
idx.cacheCounter++
idx.cache[slot].offset = offset
idx.cache[slot].data = p.data
idx.cache[slot].accessOrder = idx.cacheCounter
idx.mruSlot = slot
return p, nil
}
// invalidateCache clears the page cache (called after index modification).
func (idx *Index) invalidateCache() {
for i := range idx.cache {
idx.cache[i].offset = 0
}
idx.mruSlot = -1
}
// WritePage writes a page to the file.
func WritePage(f *os.File, p *Page) error {
binary.LittleEndian.PutUint16(p.data[0:2], p.keyCount)
@@ -169,6 +240,15 @@ type StackEntry struct {
// --- Index file ---
// LRU page cache constants (ported from rddfive/ntx_engine.c)
const PageCacheSize = 256
type pageCacheEntry struct {
offset int64
data [BlockSize]byte
accessOrder uint64
}
// Index represents an open NTX index file.
type Index struct {
file *os.File
@@ -188,6 +268,11 @@ type Index struct {
ascendKey bool
uniqueKey bool
keyType byte // 'C', 'N', 'D', 'L'
// LRU page cache — eliminates repeated disk reads
cache [PageCacheSize]pageCacheEntry
cacheCounter uint64
mruSlot int
}
// OpenIndex opens an existing NTX index file.
@@ -249,7 +334,7 @@ func (idx *Index) Seek(searchKey []byte) (uint32, bool) {
// fStop tracks whether any page had an exact match along the path.
fStop := false
for {
page, err := LoadPage(idx.file, pageOffset)
page, err := idx.cachedLoadPage( pageOffset)
if err != nil {
idx.tagEOF = true
return 0, false
@@ -382,7 +467,7 @@ func (idx *Index) nextKey() bool {
return false
}
page, err := LoadPage(idx.file, idx.stack[level].PageOffset)
page, err := idx.cachedLoadPage( idx.stack[level].PageOffset)
if err != nil {
return false
}
@@ -411,7 +496,7 @@ func (idx *Index) nextKey() bool {
// Past end of page — walk up the stack to find ancestor with unvisited key
for level--; level >= 0; level-- {
page, err = LoadPage(idx.file, idx.stack[level].PageOffset)
page, err = idx.cachedLoadPage( idx.stack[level].PageOffset)
if err != nil {
return false
}
@@ -440,7 +525,7 @@ func (idx *Index) prevKey() bool {
return false
}
page, err := LoadPage(idx.file, idx.stack[level].PageOffset)
page, err := idx.cachedLoadPage( idx.stack[level].PageOffset)
if err != nil {
return false
}
@@ -467,7 +552,7 @@ func (idx *Index) prevKey() bool {
// First key in page, no left child — walk up to find ancestor
for level--; level >= 0; level-- {
page, err = LoadPage(idx.file, idx.stack[level].PageOffset)
page, err = idx.cachedLoadPage( idx.stack[level].PageOffset)
if err != nil {
return false
}
@@ -491,7 +576,7 @@ func (idx *Index) prevKey() bool {
// goLeftmost traverses to the leftmost (smallest) key from a page.
func (idx *Index) goLeftmost(pageOffset int64) bool {
for {
page, err := LoadPage(idx.file, pageOffset)
page, err := idx.cachedLoadPage( pageOffset)
if err != nil {
return false
}
@@ -520,7 +605,7 @@ func (idx *Index) goLeftmost(pageOffset int64) bool {
// leaf nodes get ikey=keyCount-1 (last key).
func (idx *Index) goRightmost(pageOffset int64) bool {
for {
page, err := LoadPage(idx.file, pageOffset)
page, err := idx.cachedLoadPage( pageOffset)
if err != nil {
return false
}