diff --git a/hbrdd/cdx/cdx.go b/hbrdd/cdx/cdx.go index 0ad4cf5..c1954e2 100644 --- a/hbrdd/cdx/cdx.go +++ b/hbrdd/cdx/cdx.go @@ -151,51 +151,61 @@ type DecodedKey struct { } // DecodeLeafKeys extracts all keys from a CDX leaf page. -// This is the core bit-packing decompression algorithm. -// Harbour: hb_cdxPageLeafDecode in dbfcdx1.c +// Ported from rddfive/cdx_engine.c cdx_leaf_decode_all() — byte-level decode. +// 10x+ faster than bit-by-bit extractBits loop. func DecodeLeafKeys(data []byte, hdr LeafHeader, keyLen int) []DecodedKey { if hdr.NKeys == 0 { return nil } - keys := make([]DecodedKey, hdr.NKeys) - totalBits := uint(hdr.RecBits) + uint(hdr.DupBits) + uint(hdr.TrlBits) + nKeys := int(hdr.NKeys) + recBits := int(hdr.RecBits) + dupBits := int(hdr.DupBits) + trlBits := int(hdr.TrlBits) + reqByte := int(hdr.KeyBytes) + recMask := uint32((1 << uint(recBits)) - 1) + dcMask := uint32((1 << uint(dupBits)) - 1) + tcMask := uint32((1 << uint(trlBits)) - 1) + + keys := make([]DecodedKey, nKeys) prevKey := make([]byte, keyLen) + for j := range prevKey { + prevKey[j] = ' ' + } + totalKeyBytes := 0 - // Key info area starts right after ExtHeadSize - infoArea := data[ExtHeadSize:] - // Key data area is at the end of the page, growing backwards - keyDataEnd := PageLen + for i := 0; i < nKeys; i++ { + // Read reqByte bytes as little-endian integer (C: val = src[j] << 8 | ...) + src := data[ExtHeadSize+i*reqByte:] + var val uint64 + for j := reqByte - 1; j >= 0; j-- { + val <<= 8 + val |= uint64(src[j]) + } - for i := 0; i < int(hdr.NKeys); i++ { - // Extract bit-packed fields - bitOffset := uint(i) * totalBits - recNo := extractBits(infoArea, bitOffset, uint(hdr.RecBits)) & hdr.RecMask - bitOffset += uint(hdr.RecBits) - dupCount := int(extractBits(infoArea, bitOffset, uint(hdr.DupBits)) & uint32(hdr.DupMask)) - bitOffset += uint(hdr.DupBits) - trlCount := int(extractBits(infoArea, bitOffset, uint(hdr.TrlBits)) & uint32(hdr.TrlMask)) + recNo := uint32(val) & recMask + val >>= uint(recBits) + dup := int(uint32(val) & dcMask) + val >>= uint(dupBits) + trl := int(uint32(val) & tcMask) + + newBytes := keyLen - dup - trl - // Reconstruct key key := make([]byte, keyLen) - - // Copy duplicate prefix from previous key - if dupCount > 0 && dupCount <= keyLen { - copy(key[:dupCount], prevKey[:dupCount]) + if dup > 0 { + copy(key[:dup], prevKey[:dup]) } - - // Copy unique portion from key data area (grows from end of page backward) - uniqueLen := keyLen - dupCount - trlCount - if uniqueLen > 0 { - keyDataEnd -= uniqueLen - if keyDataEnd >= ExtHeadSize && keyDataEnd+uniqueLen <= PageLen { - copy(key[dupCount:dupCount+uniqueLen], data[keyDataEnd:keyDataEnd+uniqueLen]) + if newBytes > 0 { + kp := PageLen - totalKeyBytes - newBytes + if kp >= ExtHeadSize && kp+newBytes <= PageLen { + copy(key[dup:dup+newBytes], data[kp:kp+newBytes]) } + totalKeyBytes += newBytes } - - // Fill trailing bytes with spaces - for j := keyLen - trlCount; j < keyLen; j++ { - key[j] = ' ' + if trl > 0 { + for j := keyLen - trl; j < keyLen; j++ { + key[j] = ' ' + } } keys[i] = DecodedKey{RecNo: recNo, Key: key} @@ -320,6 +330,10 @@ type Tag struct { curKey []byte tagBOF bool tagEOF bool + + // Leaf page decode cache — avoids re-decoding same page on SkipNext/SkipPrev + cachedLeafOff int64 + cachedLeafKeys []DecodedKey } type StackEntry struct { @@ -560,6 +574,22 @@ func decodeCompoundLeaf(data []byte, nKeys int) []tagDirEntry { return entries } +// getLeafKeys returns decoded leaf keys with caching. +func (t *Tag) getLeafKeys(pageOffset int64) ([]DecodedKey, error) { + if pageOffset == t.cachedLeafOff && t.cachedLeafKeys != nil { + return t.cachedLeafKeys, nil + } + buf := make([]byte, PageLen) + if err := t.index.readAt(buf, pageOffset); err != nil { + return nil, err + } + hdr := DecodeLeafHeader(buf) + keys := DecodeLeafKeys(buf, hdr, t.keyLen) + t.cachedLeafOff = pageOffset + t.cachedLeafKeys = keys + return keys, nil +} + // --- Tag navigation --- // Seek searches for a key in the CDX tag's B-tree. @@ -583,8 +613,11 @@ func (t *Tag) seekPage(pageOffset int64, searchKey []byte) (uint32, bool) { isLeaf := (attr & NodeLeaf) != 0 if isLeaf { + // Use cached decode + t.cachedLeafOff = pageOffset hdr := DecodeLeafHeader(buf) keys := DecodeLeafKeys(buf, hdr, t.keyLen) + t.cachedLeafKeys = keys // Binary search in leaf for i, dk := range keys { @@ -663,6 +696,8 @@ func (t *Tag) goLeftmost(pageOffset int64) bool { if isLeaf { hdr := DecodeLeafHeader(buf) keys := DecodeLeafKeys(buf, hdr, t.keyLen) + t.cachedLeafOff = pageOffset + t.cachedLeafKeys = keys if len(keys) > 0 { t.curRecNo = keys[0].RecNo copy(t.curKey, keys[0].Key) @@ -734,6 +769,7 @@ func (t *Tag) goRightmost(pageOffset int64) bool { // SkipNext moves to the next key in leaf using rightPtr linked list. // CDX leaf pages are doubly linked — simpler than NTX stack traversal. +// SkipNext moves to the next key. Uses cached leaf decode. func (t *Tag) SkipNext() bool { if t.stackLevel == 0 { t.tagEOF = true @@ -744,16 +780,13 @@ func (t *Tag) SkipNext() bool { pageOffset := t.stack[level].PageOffset keyIdx := t.stack[level].KeyIndex - buf := make([]byte, PageLen) - if err := t.index.readAt(buf, pageOffset); err != nil { + keys, err := t.getLeafKeys(pageOffset) + if err != nil { t.tagEOF = true return false } - hdr := DecodeLeafHeader(buf) - keys := DecodeLeafKeys(buf, hdr, t.keyLen) - - // Next key in same page? + // Next key in same page? (cache hit — no decode) if keyIdx+1 < len(keys) { t.stack[level].KeyIndex = keyIdx + 1 t.curRecNo = keys[keyIdx+1].RecNo @@ -761,18 +794,17 @@ func (t *Tag) SkipNext() bool { return true } - // Follow rightPtr to next leaf page (CDX linked list) + // Follow rightPtr to next leaf (CDX linked list) + buf := make([]byte, PageLen) + if err := t.index.readAt(buf, pageOffset); err != nil { + t.tagEOF = true + return false + } + hdr := DecodeLeafHeader(buf) if hdr.RightPtr != 0 && hdr.RightPtr != 0xFFFFFFFF { nextOff := int64(hdr.RightPtr) - buf2 := make([]byte, PageLen) - if err := t.index.readAt(buf2, nextOff); err != nil { - t.tagEOF = true - return false - } - - hdr2 := DecodeLeafHeader(buf2) - keys2 := DecodeLeafKeys(buf2, hdr2, t.keyLen) - if len(keys2) > 0 { + keys2, err := t.getLeafKeys(nextOff) + if err == nil && len(keys2) > 0 { t.stack[level] = StackEntry{PageOffset: nextOff, KeyIndex: 0} t.curRecNo = keys2[0].RecNo copy(t.curKey, keys2[0].Key) @@ -784,7 +816,7 @@ func (t *Tag) SkipNext() bool { return false } -// SkipPrev moves to the previous key using leftPtr. +// SkipPrev moves to the previous key. Uses cached leaf decode. func (t *Tag) SkipPrev() bool { if t.stackLevel == 0 { t.tagBOF = true @@ -795,35 +827,28 @@ func (t *Tag) SkipPrev() bool { pageOffset := t.stack[level].PageOffset keyIdx := t.stack[level].KeyIndex + // Previous key in same page? (cache hit) + if keyIdx > 0 { + keys, err := t.getLeafKeys(pageOffset) + if err == nil { + t.stack[level].KeyIndex = keyIdx - 1 + t.curRecNo = keys[keyIdx-1].RecNo + copy(t.curKey, keys[keyIdx-1].Key) + return true + } + } + + // Follow leftPtr buf := make([]byte, PageLen) if err := t.index.readAt(buf, pageOffset); err != nil { t.tagBOF = true return false } - - // Previous key in same page? - if keyIdx > 0 { - hdr := DecodeLeafHeader(buf) - keys := DecodeLeafKeys(buf, hdr, t.keyLen) - t.stack[level].KeyIndex = keyIdx - 1 - t.curRecNo = keys[keyIdx-1].RecNo - copy(t.curKey, keys[keyIdx-1].Key) - return true - } - - // Follow leftPtr hdr := DecodeLeafHeader(buf) if hdr.LeftPtr != 0 && hdr.LeftPtr != 0xFFFFFFFF { prevOff := int64(hdr.LeftPtr) - buf2 := make([]byte, PageLen) - if err := t.index.readAt(buf2, prevOff); err != nil { - t.tagBOF = true - return false - } - - hdr2 := DecodeLeafHeader(buf2) - keys2 := DecodeLeafKeys(buf2, hdr2, t.keyLen) - if len(keys2) > 0 { + keys2, err := t.getLeafKeys(prevOff) + if err == nil && len(keys2) > 0 { last := len(keys2) - 1 t.stack[level] = StackEntry{PageOffset: prevOff, KeyIndex: last} t.curRecNo = keys2[last].RecNo