perf: CGo review — slab alloc, compareKeys simplify, zero-alloc padCopy

From CGo expert review (verdict: stay pure Go, CGo would be slower):

CDX DecodeLeafKeys slab allocation (cdx.go):
- Single make() for all keys + prevKey (was 30+ allocs per page)
- Keys are slices into pre-allocated slab (zero copy)

NTX compareKeys simplified (ntx.go):
- bytes.Compare already returns normalized -1/0/+1
- Removed redundant normalization branches

NTX build.go zero-alloc:
- padCopy: copy+fill instead of make+fill+copy
- setKeyEntry: write directly to page data (no temp buffer)

82/82 stress PASS. 14 packages ALL PASS.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-04-07 22:44:56 +09:00
parent 197720f869
commit 0102c3c94e
3 changed files with 16 additions and 32 deletions

View File

@@ -167,8 +167,10 @@ func DecodeLeafKeys(data []byte, hdr LeafHeader, keyLen int) []DecodedKey {
dcMask := uint32((1 << uint(dupBits)) - 1)
tcMask := uint32((1 << uint(trlBits)) - 1)
// Slab allocation: one alloc for all keys (avoids 30+ allocations per page)
keys := make([]DecodedKey, nKeys)
prevKey := make([]byte, keyLen)
slab := make([]byte, nKeys*keyLen+keyLen) // +keyLen for prevKey
prevKey := slab[nKeys*keyLen:]
for j := range prevKey {
prevKey[j] = ' '
}
@@ -191,7 +193,7 @@ func DecodeLeafKeys(data []byte, hdr LeafHeader, keyLen int) []DecodedKey {
newBytes := keyLen - dup - trl
key := make([]byte, keyLen)
key := slab[i*keyLen : (i+1)*keyLen]
if dup > 0 {
copy(key[:dup], prevKey[:dup])
}

View File

@@ -200,16 +200,12 @@ func buildOnePage(buf *pageBuffer, keys []KeyRecord, keyLen, maxItem, itemSize i
return off
}
// padCopy copies src into dst, padding with spaces.
// padCopy copies src into dst, padding with spaces. Zero extra allocation.
func padCopy(dst, src []byte, keyLen int) {
for i := range dst {
n := copy(dst, src)
for i := n; i < keyLen; i++ {
dst[i] = ' '
}
n := len(src)
if n > keyLen {
n = keyLen
}
copy(dst[:n], src[:n])
}
// rebuildWithInsert creates an NTX using per-key insertion (proper B-tree).
@@ -718,17 +714,16 @@ func (idx *Index) pageSplit(page *Page, iKey int, key []byte, recNo uint32, chil
return promKey, promRecNo, uint32(newPageOff), nil
}
// setKeyEntry writes a key entry at position i in a page.
// setKeyEntry writes a key entry at position i in a page. Zero extra allocation.
func (idx *Index) setKeyEntry(page *Page, i int, child uint32, recNo uint32, key []byte) {
off := int(page.keyOffset(i))
binary.LittleEndian.PutUint32(page.data[off:off+4], child)
binary.LittleEndian.PutUint32(page.data[off+4:off+8], recNo)
padKey := make([]byte, idx.keyLen)
for j := range padKey {
padKey[j] = ' '
dest := page.data[off+8 : off+8+idx.keyLen]
n := copy(dest, key)
for j := n; j < idx.keyLen; j++ {
dest[j] = ' '
}
copy(padKey, key)
copy(page.data[off+8:off+8+idx.keyLen], padKey)
}
// copyKeyEntry copies a full key entry (child+recNo+key) from src page position srcI to dst position dstI.

View File

@@ -420,24 +420,11 @@ func (idx *Index) pageKeyFind(page *Page, searchKey []byte, fNext bool, recNo ui
// compareKeys compares two key values.
// Harbour: hb_ntxValCompare in dbfntx1.c:679
// Returns: -1, 0, +1
// compareKeys compares two key values.
// bytes.Compare already returns -1/0/+1 using SIMD-optimized memcmp.
// No normalization needed — callers use cmp < 0, cmp > 0, cmp == 0.
func (idx *Index) compareKeys(key1, key2 []byte) int {
limit := len(key1)
if len(key2) < limit {
limit = len(key2)
}
cmp := bytes.Compare(key1[:limit], key2[:limit])
if cmp != 0 {
if cmp > 0 {
return 1
}
return -1
}
if len(key1) > len(key2) {
return 1
}
return 0
return bytes.Compare(key1, key2)
}
// --- SKIP: navigate through index ---