perf: NTX bulk build + APPEND deferred write (from rddfive C port)
NTX Bulk Build (build.go — ported from rddfive/ntx_engine.c): - pageBuffer: dynamic memory buffer for all pages - Phase 1: Build leaf pages in sequential memory (zero disk I/O) - Phase 2: Build interior levels from cached leaf data (zero I/O) - Separator promotion: remove last key from leaf only (not interior) - Single bulk WriteAt for all pages at end - INDEX ON 10K: 34ms → 5-8ms (4-6x improvement) NTX Seek (ntx.go): - Always descend to leaf on match (find first occurrence) - fStop flag tracks path match, verified at leaf APPEND Buffering (dbf.go): - Append marks dirty without immediate disk write - flushRecord writes record data only (no header/EOF per record) - Close/Flush writes EOF marker + header once Results: 14 packages ALL PASS, 82/82 stress test Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -315,6 +315,8 @@ func (a *DBFArea) Close() error {
|
||||
if a.dirty {
|
||||
a.flushRecord()
|
||||
}
|
||||
// Write EOF + header once on close
|
||||
a.dataFile.WriteAt([]byte{EOFMarker}, a.header.EOFOffset())
|
||||
a.updateHeader()
|
||||
if a.memoFile != nil {
|
||||
a.memoFile.Close()
|
||||
@@ -334,6 +336,8 @@ func (a *DBFArea) Flush() error {
|
||||
return err
|
||||
}
|
||||
}
|
||||
a.dataFile.WriteAt([]byte{EOFMarker}, a.header.EOFOffset())
|
||||
a.updateHeader()
|
||||
return a.dataFile.Sync()
|
||||
}
|
||||
|
||||
@@ -557,7 +561,7 @@ func (a *DBFArea) PutValue(fieldIndex int, val hbrt.Value) error {
|
||||
// --- Record operations ---
|
||||
|
||||
// Append adds a new blank record.
|
||||
// Harbour: hb_dbfAppend
|
||||
// Harbour: hb_dbfAppend — writes are deferred until Flush/Close/GoTo.
|
||||
func (a *DBFArea) Append() error {
|
||||
if a.readOnly {
|
||||
return fmt.Errorf("table is read-only")
|
||||
@@ -576,23 +580,9 @@ func (a *DBFArea) Append() error {
|
||||
a.recBuf[i] = ' '
|
||||
}
|
||||
|
||||
// Write blank record
|
||||
offset := a.header.RecordOffset(a.recNo)
|
||||
if _, err := a.dataFile.WriteAt(a.recBuf, offset); err != nil {
|
||||
a.recCount--
|
||||
return fmt.Errorf("append record: %w", err)
|
||||
}
|
||||
|
||||
// Write EOF marker
|
||||
eofOffset := a.header.EOFOffset()
|
||||
a.dataFile.WriteAt([]byte{EOFMarker}, eofOffset)
|
||||
|
||||
// Update header
|
||||
a.updateHeader()
|
||||
|
||||
a.FEof = false
|
||||
a.FBof = false
|
||||
a.dirty = false
|
||||
a.dirty = true // mark dirty — will be written on Flush/Close/GoTo
|
||||
a.ghost = true
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -32,19 +32,105 @@ func CreateIndex(path string, keyExpr string, keyLen int, unique bool, descend b
|
||||
maxItem := calculateMaxItems(itemSize)
|
||||
halfPage := maxItem / 2
|
||||
|
||||
// Write empty initial tree: header + one empty root page
|
||||
// Initialize offset table for all maxItem+1 entry slots
|
||||
rootOff := int64(HeaderSize)
|
||||
emptyRoot := [BlockSize]byte{}
|
||||
binary.LittleEndian.PutUint16(emptyRoot[0:2], 0) // 0 keys
|
||||
dataStart := 2 + (maxItem+1)*2
|
||||
for i := 0; i <= maxItem; i++ {
|
||||
entryOff := dataStart + i*itemSize
|
||||
binary.LittleEndian.PutUint16(emptyRoot[2+i*2:4+i*2], uint16(entryOff))
|
||||
// Bulk build: all pages in memory buffer, single write at end.
|
||||
// Ported from rddfive/ntx_engine.c fa_ntx_create().
|
||||
var buf pageBuffer
|
||||
buf.init()
|
||||
|
||||
var rootOffset uint32
|
||||
|
||||
if len(keys) == 0 {
|
||||
// Empty index: single empty root page
|
||||
off := buf.allocPage()
|
||||
initPageOffsets(buf.getPage(off), maxItem, itemSize)
|
||||
rootOffset = uint32(off)
|
||||
} else {
|
||||
// Phase 1: Build leaf pages in memory
|
||||
var leafOffsets []int64
|
||||
keyIdx := 0
|
||||
for keyIdx < len(keys) {
|
||||
off := buf.allocPage()
|
||||
pg := buf.getPage(off)
|
||||
initPageOffsets(pg, maxItem, itemSize)
|
||||
|
||||
cnt := 0
|
||||
for cnt < maxItem && keyIdx < len(keys) {
|
||||
entOff := int(binary.LittleEndian.Uint16(pg[2+cnt*2 : 4+cnt*2]))
|
||||
binary.LittleEndian.PutUint32(pg[entOff:entOff+4], 0) // child=0 (leaf)
|
||||
binary.LittleEndian.PutUint32(pg[entOff+4:entOff+8], keys[keyIdx].RecNo)
|
||||
padded := make([]byte, keyLen)
|
||||
for j := range padded {
|
||||
padded[j] = ' '
|
||||
}
|
||||
copy(padded, keys[keyIdx].Key)
|
||||
copy(pg[entOff+8:entOff+8+keyLen], padded)
|
||||
cnt++
|
||||
keyIdx++
|
||||
}
|
||||
binary.LittleEndian.PutUint16(pg[0:2], uint16(cnt))
|
||||
leafOffsets = append(leafOffsets, off)
|
||||
}
|
||||
|
||||
// Phase 2: Build interior levels bottom-up (B-tree: promote last key from child)
|
||||
curLevel := leafOffsets
|
||||
isLeafLevel := true
|
||||
for len(curLevel) > 1 {
|
||||
var nextLevel []int64
|
||||
for i := 0; i < len(curLevel); i += maxItem + 1 {
|
||||
end := i + maxItem + 1
|
||||
if end > len(curLevel) {
|
||||
end = len(curLevel)
|
||||
}
|
||||
children := curLevel[i:end]
|
||||
nKeys := len(children) - 1
|
||||
|
||||
// Promote: extract last key from each child[0..nKeys-1] as separator
|
||||
// Only remove from LEAF pages (interior keys are already promoted separators)
|
||||
type sepInfo struct {
|
||||
recNo uint32
|
||||
key []byte
|
||||
}
|
||||
seps := make([]sepInfo, nKeys)
|
||||
for j := 0; j < nKeys; j++ {
|
||||
childPg := buf.getPage(children[j])
|
||||
childCnt := int(binary.LittleEndian.Uint16(childPg[0:2]))
|
||||
if childCnt > 0 {
|
||||
lastIdx := childCnt - 1
|
||||
lastOff := int(binary.LittleEndian.Uint16(childPg[2+lastIdx*2 : 4+lastIdx*2]))
|
||||
seps[j].recNo = binary.LittleEndian.Uint32(childPg[lastOff+4 : lastOff+8])
|
||||
seps[j].key = make([]byte, keyLen)
|
||||
copy(seps[j].key, childPg[lastOff+8:lastOff+8+keyLen])
|
||||
// Only remove from leaf pages — interior separators stay
|
||||
if isLeafLevel {
|
||||
binary.LittleEndian.PutUint16(childPg[0:2], uint16(childCnt-1))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
off := buf.allocPage()
|
||||
pg := buf.getPage(off)
|
||||
initPageOffsets(pg, maxItem, itemSize)
|
||||
binary.LittleEndian.PutUint16(pg[0:2], uint16(nKeys))
|
||||
|
||||
for j := 0; j <= nKeys; j++ {
|
||||
entOff := int(binary.LittleEndian.Uint16(pg[2+j*2 : 4+j*2]))
|
||||
// Child pointer
|
||||
binary.LittleEndian.PutUint32(pg[entOff:entOff+4], uint32(children[j]))
|
||||
if j < nKeys {
|
||||
// Separator from promoted keys
|
||||
binary.LittleEndian.PutUint32(pg[entOff+4:entOff+8], seps[j].recNo)
|
||||
copy(pg[entOff+8:entOff+8+keyLen], seps[j].key)
|
||||
}
|
||||
}
|
||||
nextLevel = append(nextLevel, off)
|
||||
}
|
||||
curLevel = nextLevel
|
||||
isLeafLevel = false
|
||||
}
|
||||
rootOffset = uint32(curLevel[0])
|
||||
}
|
||||
|
||||
rootOffset := uint32(rootOff)
|
||||
nextPage := uint32(rootOff + BlockSize) // next free page after root
|
||||
nextPage := uint32(int64(HeaderSize) + int64(buf.count)*BlockSize)
|
||||
|
||||
// Write header
|
||||
hdr := Header{
|
||||
@@ -71,33 +157,65 @@ func CreateIndex(path string, keyExpr string, keyLen int, unique bool, descend b
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Write empty root page
|
||||
if _, err := f.WriteAt(emptyRoot[:], rootOff); err != nil {
|
||||
f.Close()
|
||||
return nil, err
|
||||
}
|
||||
|
||||
f.Close()
|
||||
|
||||
// Open and insert keys one by one (proper B-tree with page splits)
|
||||
idx, err := OpenIndex(path)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
for _, kr := range keys {
|
||||
k := make([]byte, keyLen)
|
||||
copy(k, kr.Key)
|
||||
if err := idx.insertKeyBTree(k, kr.RecNo); err != nil {
|
||||
idx.Close()
|
||||
return nil, fmt.Errorf("insert key: %w", err)
|
||||
// Bulk write all pages at correct offset (after header)
|
||||
if buf.count > 0 {
|
||||
if _, err := f.WriteAt(buf.data[:int64(buf.count)*BlockSize], int64(HeaderSize)); err != nil {
|
||||
f.Close()
|
||||
return nil, fmt.Errorf("bulk write NTX pages: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
return idx, nil
|
||||
f.Close()
|
||||
return OpenIndex(path)
|
||||
}
|
||||
|
||||
// --- Internal build structures ---
|
||||
// --- Bulk build buffer (ported from rddfive/ntx_engine.c) ---
|
||||
|
||||
type pageBuffer struct {
|
||||
data []byte
|
||||
count int
|
||||
capacity int
|
||||
}
|
||||
|
||||
func (pb *pageBuffer) init() {
|
||||
pb.capacity = 64
|
||||
pb.data = make([]byte, int64(pb.capacity)*BlockSize)
|
||||
}
|
||||
|
||||
func (pb *pageBuffer) grow() {
|
||||
if pb.count >= pb.capacity {
|
||||
newCap := pb.capacity * 2
|
||||
newData := make([]byte, int64(newCap)*BlockSize)
|
||||
copy(newData, pb.data[:int64(pb.count)*BlockSize])
|
||||
pb.data = newData
|
||||
pb.capacity = newCap
|
||||
}
|
||||
}
|
||||
|
||||
// allocPage allocates a new page and returns its file offset.
|
||||
func (pb *pageBuffer) allocPage() int64 {
|
||||
pb.grow()
|
||||
off := int64(HeaderSize) + int64(pb.count)*BlockSize
|
||||
pb.count++
|
||||
return off
|
||||
}
|
||||
|
||||
// getPage returns a slice into the buffer for the page at the given file offset.
|
||||
func (pb *pageBuffer) getPage(fileOffset int64) []byte {
|
||||
idx := (fileOffset - int64(HeaderSize)) / BlockSize
|
||||
start := idx * BlockSize
|
||||
return pb.data[start : start+BlockSize]
|
||||
}
|
||||
|
||||
// initPageOffsets pre-initializes the offset table for a page.
|
||||
func initPageOffsets(pg []byte, maxItem, itemSize int) {
|
||||
dataStart := 2 + (maxItem+1)*2
|
||||
for i := 0; i <= maxItem; i++ {
|
||||
binary.LittleEndian.PutUint16(pg[2+i*2:4+i*2], uint16(dataStart+i*itemSize))
|
||||
}
|
||||
}
|
||||
|
||||
// --- Internal build structures (legacy) ---
|
||||
|
||||
type buildPage struct {
|
||||
data [BlockSize]byte
|
||||
|
||||
Reference in New Issue
Block a user