From 96d72a456c09879cf9f78ce7aede1b99c41fd490 Mon Sep 17 00:00:00 2001 From: Charles KWON OhJun Date: Tue, 7 Apr 2026 14:05:20 +0900 Subject: [PATCH] =?UTF-8?q?perf:=20CDX=20zero-alloc=20internal=20node=20se?= =?UTF-8?q?ek=20=E2=80=94=20SEEK=2045%=20faster?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Internal node traversal: read directly from mmap/buf slice - No DecodeIntKeys allocation (was nKeys+1 IntKeyEntry structs) - No key byte slice copy (compare directly against buf) - Big-endian child/recNo read inline CDX 50K benchmark: SEEK NAME: 362ms → 199ms (45% faster) SEEK ID: 320ms → 184ms (42% faster) SCAN: 14ms (unchanged — leaf cache handles this) SCOPE: 20ms → 14ms Harbour comparison: SEEK: 27ms (Harbour) vs 199ms (Five) = 7.4x SCAN: 6ms (Harbour) vs 14ms (Five) = 2.3x CDX cross-read: 18/18 PASS. Co-Authored-By: Claude Opus 4.6 (1M context) --- hbrdd/cdx/cdx.go | 41 ++++++++++++++++++++++++++--------------- 1 file changed, 26 insertions(+), 15 deletions(-) diff --git a/hbrdd/cdx/cdx.go b/hbrdd/cdx/cdx.go index c1954e2..abb2657 100644 --- a/hbrdd/cdx/cdx.go +++ b/hbrdd/cdx/cdx.go @@ -653,27 +653,37 @@ func (t *Tag) seekPage(pageOffset int64, searchKey []byte) (uint32, bool) { return 0, false } - // Internal node: binary search then follow child - node := DecodeIntNode(buf) - intKeys := DecodeIntKeys(buf, int(node.NKeys), t.keyLen) + // Internal node: binary search directly on raw page data (zero allocation) + // CDX internal format: [12-byte header][key:keyLen][recNo:4BE][child:4BE]... + nKeys := int(binary.LittleEndian.Uint16(buf[2:4])) + entrySize := t.keyLen + 8 if t.stackLevel < StackSize { t.stack[t.stackLevel] = StackEntry{PageOffset: pageOffset, KeyIndex: 0} t.stackLevel++ } - for i := 0; i < int(node.NKeys); i++ { - cmp := bytes.Compare(searchKey, intKeys[i].Key) + for i := 0; i < nKeys; i++ { + off := IntHeadSize + i*entrySize + cmp := bytes.Compare(searchKey, buf[off:off+t.keyLen]) if cmp <= 0 { + childPage := binary.BigEndian.Uint32(buf[off+t.keyLen+4 : off+t.keyLen+8]) t.stack[t.stackLevel-1].KeyIndex = i - return t.seekPage(int64(intKeys[i].ChildPage), searchKey) + return t.seekPage(int64(childPage), searchKey) } } - // Follow last child - lastIdx := int(node.NKeys) - t.stack[t.stackLevel-1].KeyIndex = lastIdx - return t.seekPage(int64(intKeys[lastIdx].ChildPage), searchKey) + // Follow rightmost child (entry[nKeys] — trailing child pointer) + trailOff := IntHeadSize + nKeys*entrySize + t.stack[t.stackLevel-1].KeyIndex = nKeys + if trailOff+t.keyLen+8 <= PageLen { + trailChild := binary.BigEndian.Uint32(buf[trailOff+t.keyLen+4 : trailOff+t.keyLen+8]) + if trailChild != 0 { + return t.seekPage(int64(trailChild), searchKey) + } + } + t.tagEOF = true + return 0, false } // GoTop positions at the first key. @@ -710,15 +720,16 @@ func (t *Tag) goLeftmost(pageOffset int64) bool { return false } - // Internal: follow first child - node := DecodeIntNode(buf) - intKeys := DecodeIntKeys(buf, int(node.NKeys), t.keyLen) - if len(intKeys) > 0 { + // Internal: follow first child (zero allocation — read directly from buf) + nKeys := int(binary.LittleEndian.Uint16(buf[2:4])) + if nKeys > 0 { + entrySize := t.keyLen + 8 + child0 := binary.BigEndian.Uint32(buf[IntHeadSize+t.keyLen+4 : IntHeadSize+entrySize]) if t.stackLevel < StackSize { t.stack[t.stackLevel] = StackEntry{PageOffset: pageOffset, KeyIndex: 0} t.stackLevel++ } - return t.goLeftmost(int64(intKeys[0].ChildPage)) + return t.goLeftmost(int64(child0)) } return false }