CDX internal node format fix: - Was: [child LE][recNo LE][key] (NTX-style) - Now: [key][recNo BE][child BE] (correct CDX format) - Fixes GoTop/Seek/Scan for large CDX files (50K+ records) CDX mmap: - syscall.Mmap on OpenIndex for zero-copy reads - idx.readAt() helper: mmap slice or file fallback - All ReadAt calls in Tag navigation replaced - Close: munmap CDX 50K benchmark (all counts correct): SEEK NAME 50K: 362ms (f=50000) SCAN 50K: 276ms (c=50000) SCOPE 35K: 238ms (c=35000) SEEK ID 50K: 320ms (f=50000) CDX is slower than NTX due to bit-packed leaf decompression per page. Cross-read test: 18/18 still PASS. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
876 lines
23 KiB
Go
876 lines
23 KiB
Go
// Copyright (c) 2026 Charles KWON OhJun (charleskwonohjun@gmail.com)
|
|
// All rights reserved.
|
|
|
|
// CDX compound index engine for Five.
|
|
// Byte-compatible with Harbour/FoxPro CDX files.
|
|
// CDX uses FPT memo format (not DBT).
|
|
//
|
|
// Key differences from NTX:
|
|
// - 512-byte pages (vs NTX 1024)
|
|
// - Compound index: multiple tags per file
|
|
// - Bit-packed leaf keys: recBits/dupBits/trlBits compression
|
|
// - Linked leaf pages (leftPtr/rightPtr)
|
|
//
|
|
// Reference:
|
|
// /mnt/d/harbour-core/include/hbrddcdx.h
|
|
// /mnt/d/harbour-core/src/rdd/dbfcdx/dbfcdx1.c
|
|
// docs/rdd-architecture-spec.md
|
|
package cdx
|
|
|
|
import (
|
|
"bytes"
|
|
"encoding/binary"
|
|
"fmt"
|
|
"os"
|
|
"strings"
|
|
"syscall"
|
|
)
|
|
|
|
// CDX constants — matching Harbour.
|
|
const (
|
|
PageLen = 512 // CDX_PAGELEN (1 << 9)
|
|
HeaderLen = 1024 // CDX_HEADERLEN
|
|
MaxKey = 240 // CDX_MAXKEY
|
|
MaxTagNameLen = 10 // CDX_MAXTAGNAMELEN
|
|
StackSize = 64 // CDX_STACKSIZE
|
|
IntHeadSize = 12 // CDX_INT_HEADSIZE
|
|
ExtHeadSize = 24 // CDX_EXT_HEADSIZE
|
|
HeaderExpLen = HeaderLen - 512
|
|
|
|
// Node types
|
|
NodeBranch = 0 // CDX_NODE_BRANCH
|
|
NodeRoot = 1 // CDX_NODE_ROOT
|
|
NodeLeaf = 2 // CDX_NODE_LEAF
|
|
NodeUnused = 0xFF
|
|
|
|
// Type flags
|
|
TypeUnique = 0x01
|
|
TypePartial = 0x02
|
|
TypeCustom = 0x04
|
|
TypeForFilter = 0x08
|
|
TypeCompact = 0x20
|
|
TypeCompound = 0x40
|
|
TypeStructure = 0x80
|
|
)
|
|
|
|
// --- Tag Header (512 bytes in file, at start of each tag's header page) ---
|
|
|
|
// TagHeader holds a CDX tag's metadata.
|
|
// Harbour: CDXTAGHEADER in hbrddcdx.h:188
|
|
type TagHeader struct {
|
|
RootPtr uint32 // root page offset
|
|
FreePtr uint32 // free page list
|
|
Counter uint32 // update counter
|
|
KeySize uint16 // key length (max 240)
|
|
IndexOpt byte // CDX_TYPE_* flags
|
|
IndexSig byte // signature
|
|
HeaderLen uint16 // 0x0400 typically
|
|
PageLen uint16 // page length
|
|
KeyExpr string // key expression
|
|
ForExpr string // FOR filter expression
|
|
Descending bool
|
|
IgnoreCase bool
|
|
}
|
|
|
|
// ReadTagHeader reads a CDX tag header from file at given offset.
|
|
func ReadTagHeader(f *os.File, offset int64) (*TagHeader, error) {
|
|
buf := make([]byte, HeaderLen)
|
|
if _, err := f.ReadAt(buf, offset); err != nil {
|
|
return nil, fmt.Errorf("read CDX tag header at %d: %w", offset, err)
|
|
}
|
|
|
|
th := &TagHeader{
|
|
RootPtr: binary.LittleEndian.Uint32(buf[0:4]),
|
|
FreePtr: binary.LittleEndian.Uint32(buf[4:8]),
|
|
Counter: binary.LittleEndian.Uint32(buf[8:12]),
|
|
KeySize: binary.LittleEndian.Uint16(buf[12:14]),
|
|
IndexOpt: buf[14],
|
|
IndexSig: buf[15],
|
|
HeaderLen: binary.LittleEndian.Uint16(buf[16:18]),
|
|
PageLen: binary.LittleEndian.Uint16(buf[18:20]),
|
|
}
|
|
|
|
th.IgnoreCase = buf[503] != 0
|
|
th.Descending = binary.LittleEndian.Uint16(buf[504:506]) != 0
|
|
|
|
// Key/For expressions — stored directly at offset 512 (0x200) within the header block.
|
|
// CDX format: key expression at byte 512, for expression follows after null terminator.
|
|
keyExprStart := 512
|
|
th.KeyExpr = trimNull(buf[keyExprStart:])
|
|
|
|
// FOR expression follows key expression (after null terminator)
|
|
forStart := keyExprStart + len(th.KeyExpr) + 1
|
|
if forStart < len(buf) {
|
|
th.ForExpr = trimNull(buf[forStart:])
|
|
}
|
|
|
|
return th, nil
|
|
}
|
|
|
|
// --- Leaf page: bit-packed key extraction ---
|
|
|
|
// LeafHeader holds decoded leaf page metadata.
|
|
// Harbour: CDXEXTNODE in hbrddcdx.h:224
|
|
type LeafHeader struct {
|
|
Attr uint16
|
|
NKeys uint16
|
|
LeftPtr uint32
|
|
RightPtr uint32
|
|
FreeSpc uint16
|
|
RecMask uint32
|
|
DupMask byte
|
|
TrlMask byte
|
|
RecBits byte
|
|
DupBits byte
|
|
TrlBits byte
|
|
KeyBytes byte // total bytes per key info entry
|
|
}
|
|
|
|
// DecodeLeafHeader extracts the 24-byte leaf header from page data.
|
|
func DecodeLeafHeader(data []byte) LeafHeader {
|
|
return LeafHeader{
|
|
Attr: binary.LittleEndian.Uint16(data[0:2]),
|
|
NKeys: binary.LittleEndian.Uint16(data[2:4]),
|
|
LeftPtr: binary.LittleEndian.Uint32(data[4:8]),
|
|
RightPtr: binary.LittleEndian.Uint32(data[8:12]),
|
|
FreeSpc: binary.LittleEndian.Uint16(data[12:14]),
|
|
RecMask: binary.LittleEndian.Uint32(data[14:18]),
|
|
DupMask: data[18],
|
|
TrlMask: data[19],
|
|
RecBits: data[20],
|
|
DupBits: data[21],
|
|
TrlBits: data[22],
|
|
KeyBytes: data[23],
|
|
}
|
|
}
|
|
|
|
// DecodedKey holds a single decoded key from a leaf page.
|
|
type DecodedKey struct {
|
|
RecNo uint32
|
|
Key []byte
|
|
}
|
|
|
|
// DecodeLeafKeys extracts all keys from a CDX leaf page.
|
|
// This is the core bit-packing decompression algorithm.
|
|
// Harbour: hb_cdxPageLeafDecode in dbfcdx1.c
|
|
func DecodeLeafKeys(data []byte, hdr LeafHeader, keyLen int) []DecodedKey {
|
|
if hdr.NKeys == 0 {
|
|
return nil
|
|
}
|
|
|
|
keys := make([]DecodedKey, hdr.NKeys)
|
|
totalBits := uint(hdr.RecBits) + uint(hdr.DupBits) + uint(hdr.TrlBits)
|
|
prevKey := make([]byte, keyLen)
|
|
|
|
// Key info area starts right after ExtHeadSize
|
|
infoArea := data[ExtHeadSize:]
|
|
// Key data area is at the end of the page, growing backwards
|
|
keyDataEnd := PageLen
|
|
|
|
for i := 0; i < int(hdr.NKeys); i++ {
|
|
// Extract bit-packed fields
|
|
bitOffset := uint(i) * totalBits
|
|
recNo := extractBits(infoArea, bitOffset, uint(hdr.RecBits)) & hdr.RecMask
|
|
bitOffset += uint(hdr.RecBits)
|
|
dupCount := int(extractBits(infoArea, bitOffset, uint(hdr.DupBits)) & uint32(hdr.DupMask))
|
|
bitOffset += uint(hdr.DupBits)
|
|
trlCount := int(extractBits(infoArea, bitOffset, uint(hdr.TrlBits)) & uint32(hdr.TrlMask))
|
|
|
|
// Reconstruct key
|
|
key := make([]byte, keyLen)
|
|
|
|
// Copy duplicate prefix from previous key
|
|
if dupCount > 0 && dupCount <= keyLen {
|
|
copy(key[:dupCount], prevKey[:dupCount])
|
|
}
|
|
|
|
// Copy unique portion from key data area (grows from end of page backward)
|
|
uniqueLen := keyLen - dupCount - trlCount
|
|
if uniqueLen > 0 {
|
|
keyDataEnd -= uniqueLen
|
|
if keyDataEnd >= ExtHeadSize && keyDataEnd+uniqueLen <= PageLen {
|
|
copy(key[dupCount:dupCount+uniqueLen], data[keyDataEnd:keyDataEnd+uniqueLen])
|
|
}
|
|
}
|
|
|
|
// Fill trailing bytes with spaces
|
|
for j := keyLen - trlCount; j < keyLen; j++ {
|
|
key[j] = ' '
|
|
}
|
|
|
|
keys[i] = DecodedKey{RecNo: recNo, Key: key}
|
|
copy(prevKey, key)
|
|
}
|
|
|
|
return keys
|
|
}
|
|
|
|
// extractBits extracts n bits from a byte array starting at bit offset.
|
|
func extractBits(data []byte, bitOffset, nBits uint) uint32 {
|
|
if nBits == 0 {
|
|
return 0
|
|
}
|
|
var result uint32
|
|
for i := uint(0); i < nBits; i++ {
|
|
bytePos := (bitOffset + i) / 8
|
|
bitPos := (bitOffset + i) % 8
|
|
if int(bytePos) < len(data) {
|
|
if data[bytePos]&(1<<bitPos) != 0 {
|
|
result |= 1 << i
|
|
}
|
|
}
|
|
}
|
|
return result
|
|
}
|
|
|
|
// --- Interior node ---
|
|
|
|
// IntNode holds decoded internal node data.
|
|
type IntNode struct {
|
|
Attr uint16
|
|
NKeys uint16
|
|
LeftPtr uint32
|
|
RightPtr uint32
|
|
}
|
|
|
|
func DecodeIntNode(data []byte) IntNode {
|
|
return IntNode{
|
|
Attr: binary.LittleEndian.Uint16(data[0:2]),
|
|
NKeys: binary.LittleEndian.Uint16(data[2:4]),
|
|
LeftPtr: binary.LittleEndian.Uint32(data[4:8]),
|
|
RightPtr: binary.LittleEndian.Uint32(data[8:12]),
|
|
}
|
|
}
|
|
|
|
// IntKeyEntry is one entry in an internal node.
|
|
// Each entry = child page (4 LE) + recNo (4 LE) + key (keyLen bytes)
|
|
type IntKeyEntry struct {
|
|
ChildPage uint32
|
|
RecNo uint32
|
|
Key []byte
|
|
}
|
|
|
|
// DecodeIntKeys extracts keys from a CDX internal node.
|
|
// CDX internal format: [key: keyLen bytes][recNo: 4 BE][childPage: 4 BE]
|
|
// This is different from NTX which uses [child LE][recNo LE][key].
|
|
func DecodeIntKeys(data []byte, nKeys int, keyLen int) []IntKeyEntry {
|
|
entries := make([]IntKeyEntry, nKeys+1) // nKeys separators + 1 trailing child
|
|
entrySize := keyLen + 8 // key + recNo(4BE) + child(4BE)
|
|
off := IntHeadSize
|
|
|
|
for i := 0; i <= nKeys; i++ {
|
|
if off+entrySize > PageLen {
|
|
break
|
|
}
|
|
e := IntKeyEntry{
|
|
Key: make([]byte, keyLen),
|
|
}
|
|
if i < nKeys {
|
|
copy(e.Key, data[off:off+keyLen])
|
|
e.RecNo = binary.BigEndian.Uint32(data[off+keyLen : off+keyLen+4])
|
|
e.ChildPage = binary.BigEndian.Uint32(data[off+keyLen+4 : off+keyLen+8])
|
|
} else {
|
|
// Trailing child — no key/recNo, just child pointer
|
|
// In CDX, the rightmost child is the child of the last separator entry
|
|
// Actually, for the last entry we only need the child pointer
|
|
// The previous entry's child was the LEFT child; we need RIGHT child
|
|
// stored at the position after last key entry
|
|
copy(e.Key, data[off:off+keyLen])
|
|
e.RecNo = binary.BigEndian.Uint32(data[off+keyLen : off+keyLen+4])
|
|
e.ChildPage = binary.BigEndian.Uint32(data[off+keyLen+4 : off+keyLen+8])
|
|
}
|
|
entries[i] = e
|
|
off += entrySize
|
|
}
|
|
|
|
return entries
|
|
}
|
|
|
|
// --- CDX Index (compound, multi-tag) ---
|
|
|
|
// Index represents an open CDX index file.
|
|
type Index struct {
|
|
file *os.File
|
|
tags []*Tag
|
|
mmapData []byte // mmap'd file for zero-copy reads
|
|
}
|
|
|
|
// readAt reads len(buf) bytes at offset — from mmap or file fallback.
|
|
func (idx *Index) readAt(buf []byte, offset int64) error {
|
|
if idx.mmapData != nil && offset >= 0 && int(offset)+len(buf) <= len(idx.mmapData) {
|
|
copy(buf, idx.mmapData[offset:offset+int64(len(buf))])
|
|
return nil
|
|
}
|
|
_, err := idx.file.ReadAt(buf, offset)
|
|
return err
|
|
}
|
|
|
|
// Tag represents one index tag within a CDX file.
|
|
type Tag struct {
|
|
Name string // tag name (e.g., "BYNAME")
|
|
index *Index
|
|
header TagHeader
|
|
headerOff int64 // file offset of this tag's header
|
|
keyLen int
|
|
|
|
// Current position
|
|
stack [StackSize]StackEntry
|
|
stackLevel int
|
|
curRecNo uint32
|
|
curKey []byte
|
|
tagBOF bool
|
|
tagEOF bool
|
|
}
|
|
|
|
type StackEntry struct {
|
|
PageOffset int64
|
|
KeyIndex int
|
|
}
|
|
|
|
// OpenIndex opens a CDX file and reads all tags.
|
|
func OpenIndex(path string) (*Index, error) {
|
|
if !strings.HasSuffix(strings.ToLower(path), ".cdx") {
|
|
path += ".cdx"
|
|
}
|
|
|
|
f, err := os.OpenFile(path, os.O_RDWR, 0)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
idx := &Index{file: f}
|
|
|
|
// mmap for zero-copy reads
|
|
if fi, err2 := f.Stat(); err2 == nil && fi.Size() > 0 {
|
|
if data, err2 := syscall.Mmap(int(f.Fd()), 0, int(fi.Size()),
|
|
syscall.PROT_READ, syscall.MAP_SHARED); err2 == nil {
|
|
idx.mmapData = data
|
|
}
|
|
}
|
|
|
|
// Read compound header (structural root at offset 0)
|
|
rootHdr, err := ReadTagHeader(f, 0)
|
|
if err != nil {
|
|
f.Close()
|
|
return nil, err
|
|
}
|
|
|
|
// Parse compound tag directory from the structural root's B-tree
|
|
// The structural index keys are 10-byte tag names, and each leaf entry
|
|
// points to the tag header at a specific file offset.
|
|
tagEntries := readCompoundTagList(idx, rootHdr)
|
|
|
|
for _, entry := range tagEntries {
|
|
tagHdr, err := ReadTagHeader(f, entry.offset)
|
|
if err != nil {
|
|
continue
|
|
}
|
|
tag := &Tag{
|
|
Name: entry.name,
|
|
index: idx,
|
|
header: *tagHdr,
|
|
headerOff: entry.offset,
|
|
keyLen: int(tagHdr.KeySize),
|
|
curKey: make([]byte, tagHdr.KeySize),
|
|
}
|
|
idx.tags = append(idx.tags, tag)
|
|
}
|
|
|
|
// If no tags found via compound directory, fall back to root as single tag
|
|
if len(idx.tags) == 0 {
|
|
tag := &Tag{
|
|
Name: "TAG1",
|
|
index: idx,
|
|
header: *rootHdr,
|
|
headerOff: 0,
|
|
keyLen: int(rootHdr.KeySize),
|
|
curKey: make([]byte, rootHdr.KeySize),
|
|
}
|
|
idx.tags = append(idx.tags, tag)
|
|
}
|
|
|
|
return idx, nil
|
|
}
|
|
|
|
// Close closes the CDX file.
|
|
func (idx *Index) Close() error {
|
|
if idx.mmapData != nil {
|
|
syscall.Munmap(idx.mmapData)
|
|
idx.mmapData = nil
|
|
}
|
|
return idx.file.Close()
|
|
}
|
|
|
|
// TagCount returns the number of tags.
|
|
func (idx *Index) TagCount() int {
|
|
return len(idx.tags)
|
|
}
|
|
|
|
// GetTag returns a tag by index.
|
|
func (idx *Index) GetTag(i int) *Tag {
|
|
if i >= 0 && i < len(idx.tags) {
|
|
return idx.tags[i]
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// Tags returns all tags in the CDX.
|
|
func (idx *Index) Tags() []*Tag { return idx.tags }
|
|
|
|
// FindTag returns a tag by name.
|
|
func (idx *Index) FindTag(name string) *Tag {
|
|
upper := strings.ToUpper(name)
|
|
for _, t := range idx.tags {
|
|
if strings.ToUpper(t.Name) == upper {
|
|
return t
|
|
}
|
|
// Also try key expression match
|
|
if strings.ToUpper(t.header.KeyExpr) == upper {
|
|
return t
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// tagDirEntry is a compound tag directory entry.
|
|
type tagDirEntry struct {
|
|
name string
|
|
offset int64
|
|
}
|
|
|
|
// readCompoundTagList reads tag names and offsets from the structural root.
|
|
// CDX compound header: root page is a B-tree of tag entries.
|
|
// Each leaf key = 10-byte tag name, record number = page offset / 512.
|
|
func readCompoundTagList(idx *Index, rootHdr *TagHeader) []tagDirEntry {
|
|
var entries []tagDirEntry
|
|
if rootHdr.RootPtr == 0 {
|
|
return entries
|
|
}
|
|
|
|
// Read the root page of the structural index
|
|
pageData := make([]byte, 512)
|
|
err := idx.readAt(pageData, int64(rootHdr.RootPtr))
|
|
if err != nil {
|
|
return entries
|
|
}
|
|
|
|
// CDX page header: [attr:2][nKeys:2][leftPtr:4][rightPtr:4]
|
|
nKeys := int(binary.LittleEndian.Uint16(pageData[2:4]))
|
|
attr := binary.LittleEndian.Uint16(pageData[0:2])
|
|
|
|
isLeaf := (attr & 0x02) != 0
|
|
|
|
if isLeaf {
|
|
entries = decodeCompoundLeaf(pageData, nKeys)
|
|
}
|
|
|
|
// If compound leaf decoding didn't find entries, scan for tag headers
|
|
if len(entries) == 0 {
|
|
entries = scanCompoundLeaves(idx, rootHdr)
|
|
}
|
|
|
|
return entries
|
|
}
|
|
|
|
// scanCompoundLeaves scans the CDX file for tag headers.
|
|
// CDX tag headers are at 0x400 (1024) byte boundaries.
|
|
// Each tag header is followed by a page with the key expression string.
|
|
func scanCompoundLeaves(idx *Index, rootHdr *TagHeader) []tagDirEntry {
|
|
var entries []tagDirEntry
|
|
|
|
fileInfo, err := idx.file.Stat()
|
|
if err != nil {
|
|
return entries
|
|
}
|
|
fileSize := fileInfo.Size()
|
|
|
|
// Scan at 0x400 intervals; tag headers have:
|
|
// - RootPtr (uint32 at offset 0) pointing to a valid page
|
|
// - KeySize (uint16 at offset 12) between 1..240
|
|
// - Key expression string at +0x200 (offset 0x106 from header start)
|
|
// Skip offset 0 (compound root) and scan the rest
|
|
// Skip compound header at 0x0000; scan from 0x0400 onwards
|
|
// Tag headers are at 0x400 boundaries but NOT the compound root itself
|
|
for off := int64(0x400); off < fileSize; off += 0x200 {
|
|
buf := make([]byte, 0x400)
|
|
err := idx.readAt(buf, off)
|
|
if err != nil {
|
|
continue
|
|
}
|
|
|
|
rootPtr := binary.LittleEndian.Uint32(buf[0:4])
|
|
keySize := binary.LittleEndian.Uint16(buf[12:14])
|
|
|
|
if keySize == 0 || keySize > 240 || rootPtr == 0 {
|
|
continue
|
|
}
|
|
// Validate rootPtr is within file and at a valid page boundary
|
|
if int64(rootPtr) >= fileSize || rootPtr%512 != 0 {
|
|
continue
|
|
}
|
|
|
|
// Read key expression from offset 0x106 within the header
|
|
keyExpr := ""
|
|
for i := 0x106; i < 0x206 && i < len(buf) && buf[i] != 0; i++ {
|
|
keyExpr += string(buf[i])
|
|
}
|
|
if keyExpr == "" {
|
|
// Key expression might be in the next page (+0x200 from header)
|
|
exprBuf := make([]byte, 256)
|
|
idx.readAt(exprBuf, off+0x200)
|
|
for i := 0; i < len(exprBuf) && exprBuf[i] != 0; i++ {
|
|
keyExpr += string(exprBuf[i])
|
|
}
|
|
}
|
|
|
|
if keyExpr == "" {
|
|
continue
|
|
}
|
|
|
|
name := strings.ToUpper(strings.TrimSpace(keyExpr))
|
|
// Use "BY" + field name convention, or just the expression
|
|
entries = append(entries, tagDirEntry{name: name, offset: off})
|
|
}
|
|
|
|
return entries
|
|
}
|
|
|
|
// decodeCompoundLeaf decodes tag entries from a compound leaf page.
|
|
// Compound index uses the same bit-packed format as data leaves,
|
|
// with keyLen=10 (tag name) and recNo = page offset / PageLen.
|
|
func decodeCompoundLeaf(data []byte, nKeys int) []tagDirEntry {
|
|
if nKeys <= 0 || len(data) < ExtHeadSize {
|
|
return nil
|
|
}
|
|
|
|
// Use the standard leaf key decoder with keyLen=10 (compound tag name size)
|
|
hdr := DecodeLeafHeader(data)
|
|
keys := DecodeLeafKeys(data, hdr, 10)
|
|
|
|
var entries []tagDirEntry
|
|
for _, dk := range keys {
|
|
name := trimNull(dk.Key)
|
|
name = strings.TrimSpace(name)
|
|
if name == "" {
|
|
continue
|
|
}
|
|
// RecNo in compound index = direct byte offset to tag header
|
|
entries = append(entries, tagDirEntry{name: name, offset: int64(dk.RecNo)})
|
|
}
|
|
return entries
|
|
}
|
|
|
|
// --- Tag navigation ---
|
|
|
|
// Seek searches for a key in the CDX tag's B-tree.
|
|
func (t *Tag) Seek(searchKey []byte) (uint32, bool) {
|
|
t.stackLevel = 0
|
|
t.tagBOF = false
|
|
t.tagEOF = false
|
|
|
|
pageOffset := int64(t.header.RootPtr)
|
|
return t.seekPage(pageOffset, searchKey)
|
|
}
|
|
|
|
func (t *Tag) seekPage(pageOffset int64, searchKey []byte) (uint32, bool) {
|
|
buf := make([]byte, PageLen)
|
|
if err := t.index.readAt(buf, pageOffset); err != nil {
|
|
t.tagEOF = true
|
|
return 0, false
|
|
}
|
|
|
|
attr := binary.LittleEndian.Uint16(buf[0:2])
|
|
isLeaf := (attr & NodeLeaf) != 0
|
|
|
|
if isLeaf {
|
|
hdr := DecodeLeafHeader(buf)
|
|
keys := DecodeLeafKeys(buf, hdr, t.keyLen)
|
|
|
|
// Binary search in leaf
|
|
for i, dk := range keys {
|
|
cmp := bytes.Compare(searchKey, dk.Key[:len(searchKey)])
|
|
if cmp == 0 {
|
|
// Found
|
|
t.curRecNo = dk.RecNo
|
|
copy(t.curKey, dk.Key)
|
|
if t.stackLevel < StackSize {
|
|
t.stack[t.stackLevel] = StackEntry{PageOffset: pageOffset, KeyIndex: i}
|
|
t.stackLevel++
|
|
}
|
|
return dk.RecNo, true
|
|
}
|
|
if cmp < 0 {
|
|
// Search key < current: softseek position
|
|
t.curRecNo = dk.RecNo
|
|
copy(t.curKey, dk.Key)
|
|
if t.stackLevel < StackSize {
|
|
t.stack[t.stackLevel] = StackEntry{PageOffset: pageOffset, KeyIndex: i}
|
|
t.stackLevel++
|
|
}
|
|
return dk.RecNo, false
|
|
}
|
|
}
|
|
|
|
// Past all keys: EOF or follow rightPtr
|
|
if hdr.RightPtr != 0 && hdr.RightPtr != 0xFFFFFFFF {
|
|
return t.seekPage(int64(hdr.RightPtr), searchKey)
|
|
}
|
|
t.tagEOF = true
|
|
t.curRecNo = 0
|
|
return 0, false
|
|
}
|
|
|
|
// Internal node: binary search then follow child
|
|
node := DecodeIntNode(buf)
|
|
intKeys := DecodeIntKeys(buf, int(node.NKeys), t.keyLen)
|
|
|
|
if t.stackLevel < StackSize {
|
|
t.stack[t.stackLevel] = StackEntry{PageOffset: pageOffset, KeyIndex: 0}
|
|
t.stackLevel++
|
|
}
|
|
|
|
for i := 0; i < int(node.NKeys); i++ {
|
|
cmp := bytes.Compare(searchKey, intKeys[i].Key)
|
|
if cmp <= 0 {
|
|
t.stack[t.stackLevel-1].KeyIndex = i
|
|
return t.seekPage(int64(intKeys[i].ChildPage), searchKey)
|
|
}
|
|
}
|
|
|
|
// Follow last child
|
|
lastIdx := int(node.NKeys)
|
|
t.stack[t.stackLevel-1].KeyIndex = lastIdx
|
|
return t.seekPage(int64(intKeys[lastIdx].ChildPage), searchKey)
|
|
}
|
|
|
|
// GoTop positions at the first key.
|
|
func (t *Tag) GoTop() bool {
|
|
t.stackLevel = 0
|
|
t.tagBOF = false
|
|
t.tagEOF = false
|
|
return t.goLeftmost(int64(t.header.RootPtr))
|
|
}
|
|
|
|
func (t *Tag) goLeftmost(pageOffset int64) bool {
|
|
buf := make([]byte, PageLen)
|
|
if err := t.index.readAt(buf, pageOffset); err != nil {
|
|
return false
|
|
}
|
|
|
|
attr := binary.LittleEndian.Uint16(buf[0:2])
|
|
isLeaf := (attr & NodeLeaf) != 0
|
|
|
|
if isLeaf {
|
|
hdr := DecodeLeafHeader(buf)
|
|
keys := DecodeLeafKeys(buf, hdr, t.keyLen)
|
|
if len(keys) > 0 {
|
|
t.curRecNo = keys[0].RecNo
|
|
copy(t.curKey, keys[0].Key)
|
|
if t.stackLevel < StackSize {
|
|
t.stack[t.stackLevel] = StackEntry{PageOffset: pageOffset, KeyIndex: 0}
|
|
t.stackLevel++
|
|
}
|
|
return true
|
|
}
|
|
return false
|
|
}
|
|
|
|
// Internal: follow first child
|
|
node := DecodeIntNode(buf)
|
|
intKeys := DecodeIntKeys(buf, int(node.NKeys), t.keyLen)
|
|
if len(intKeys) > 0 {
|
|
if t.stackLevel < StackSize {
|
|
t.stack[t.stackLevel] = StackEntry{PageOffset: pageOffset, KeyIndex: 0}
|
|
t.stackLevel++
|
|
}
|
|
return t.goLeftmost(int64(intKeys[0].ChildPage))
|
|
}
|
|
return false
|
|
}
|
|
|
|
// GoBottom positions at the last key.
|
|
func (t *Tag) GoBottom() bool {
|
|
t.stackLevel = 0
|
|
t.tagBOF = false
|
|
t.tagEOF = false
|
|
return t.goRightmost(int64(t.header.RootPtr))
|
|
}
|
|
|
|
func (t *Tag) goRightmost(pageOffset int64) bool {
|
|
buf := make([]byte, PageLen)
|
|
if err := t.index.readAt(buf, pageOffset); err != nil {
|
|
return false
|
|
}
|
|
|
|
attr := binary.LittleEndian.Uint16(buf[0:2])
|
|
isLeaf := (attr & NodeLeaf) != 0
|
|
|
|
if isLeaf {
|
|
hdr := DecodeLeafHeader(buf)
|
|
keys := DecodeLeafKeys(buf, hdr, t.keyLen)
|
|
if len(keys) > 0 {
|
|
last := len(keys) - 1
|
|
t.curRecNo = keys[last].RecNo
|
|
copy(t.curKey, keys[last].Key)
|
|
if t.stackLevel < StackSize {
|
|
t.stack[t.stackLevel] = StackEntry{PageOffset: pageOffset, KeyIndex: last}
|
|
t.stackLevel++
|
|
}
|
|
return true
|
|
}
|
|
return false
|
|
}
|
|
|
|
// Internal: follow last child
|
|
node := DecodeIntNode(buf)
|
|
intKeys := DecodeIntKeys(buf, int(node.NKeys), t.keyLen)
|
|
lastIdx := int(node.NKeys)
|
|
if t.stackLevel < StackSize {
|
|
t.stack[t.stackLevel] = StackEntry{PageOffset: pageOffset, KeyIndex: lastIdx}
|
|
t.stackLevel++
|
|
}
|
|
return t.goRightmost(int64(intKeys[lastIdx].ChildPage))
|
|
}
|
|
|
|
// SkipNext moves to the next key in leaf using rightPtr linked list.
|
|
// CDX leaf pages are doubly linked — simpler than NTX stack traversal.
|
|
func (t *Tag) SkipNext() bool {
|
|
if t.stackLevel == 0 {
|
|
t.tagEOF = true
|
|
return false
|
|
}
|
|
|
|
level := t.stackLevel - 1
|
|
pageOffset := t.stack[level].PageOffset
|
|
keyIdx := t.stack[level].KeyIndex
|
|
|
|
buf := make([]byte, PageLen)
|
|
if err := t.index.readAt(buf, pageOffset); err != nil {
|
|
t.tagEOF = true
|
|
return false
|
|
}
|
|
|
|
hdr := DecodeLeafHeader(buf)
|
|
keys := DecodeLeafKeys(buf, hdr, t.keyLen)
|
|
|
|
// Next key in same page?
|
|
if keyIdx+1 < len(keys) {
|
|
t.stack[level].KeyIndex = keyIdx + 1
|
|
t.curRecNo = keys[keyIdx+1].RecNo
|
|
copy(t.curKey, keys[keyIdx+1].Key)
|
|
return true
|
|
}
|
|
|
|
// Follow rightPtr to next leaf page (CDX linked list)
|
|
if hdr.RightPtr != 0 && hdr.RightPtr != 0xFFFFFFFF {
|
|
nextOff := int64(hdr.RightPtr)
|
|
buf2 := make([]byte, PageLen)
|
|
if err := t.index.readAt(buf2, nextOff); err != nil {
|
|
t.tagEOF = true
|
|
return false
|
|
}
|
|
|
|
hdr2 := DecodeLeafHeader(buf2)
|
|
keys2 := DecodeLeafKeys(buf2, hdr2, t.keyLen)
|
|
if len(keys2) > 0 {
|
|
t.stack[level] = StackEntry{PageOffset: nextOff, KeyIndex: 0}
|
|
t.curRecNo = keys2[0].RecNo
|
|
copy(t.curKey, keys2[0].Key)
|
|
return true
|
|
}
|
|
}
|
|
|
|
t.tagEOF = true
|
|
return false
|
|
}
|
|
|
|
// SkipPrev moves to the previous key using leftPtr.
|
|
func (t *Tag) SkipPrev() bool {
|
|
if t.stackLevel == 0 {
|
|
t.tagBOF = true
|
|
return false
|
|
}
|
|
|
|
level := t.stackLevel - 1
|
|
pageOffset := t.stack[level].PageOffset
|
|
keyIdx := t.stack[level].KeyIndex
|
|
|
|
buf := make([]byte, PageLen)
|
|
if err := t.index.readAt(buf, pageOffset); err != nil {
|
|
t.tagBOF = true
|
|
return false
|
|
}
|
|
|
|
// Previous key in same page?
|
|
if keyIdx > 0 {
|
|
hdr := DecodeLeafHeader(buf)
|
|
keys := DecodeLeafKeys(buf, hdr, t.keyLen)
|
|
t.stack[level].KeyIndex = keyIdx - 1
|
|
t.curRecNo = keys[keyIdx-1].RecNo
|
|
copy(t.curKey, keys[keyIdx-1].Key)
|
|
return true
|
|
}
|
|
|
|
// Follow leftPtr
|
|
hdr := DecodeLeafHeader(buf)
|
|
if hdr.LeftPtr != 0 && hdr.LeftPtr != 0xFFFFFFFF {
|
|
prevOff := int64(hdr.LeftPtr)
|
|
buf2 := make([]byte, PageLen)
|
|
if err := t.index.readAt(buf2, prevOff); err != nil {
|
|
t.tagBOF = true
|
|
return false
|
|
}
|
|
|
|
hdr2 := DecodeLeafHeader(buf2)
|
|
keys2 := DecodeLeafKeys(buf2, hdr2, t.keyLen)
|
|
if len(keys2) > 0 {
|
|
last := len(keys2) - 1
|
|
t.stack[level] = StackEntry{PageOffset: prevOff, KeyIndex: last}
|
|
t.curRecNo = keys2[last].RecNo
|
|
copy(t.curKey, keys2[last].Key)
|
|
return true
|
|
}
|
|
}
|
|
|
|
t.tagBOF = true
|
|
return false
|
|
}
|
|
|
|
// CurRecNo returns the current record number.
|
|
func (t *Tag) CurRecNo() uint32 { return t.curRecNo }
|
|
|
|
// CurKey returns the current key.
|
|
func (t *Tag) CurKey() []byte { return t.curKey[:t.keyLen] }
|
|
|
|
// IsEOF returns true if past end.
|
|
func (t *Tag) IsEOF() bool { return t.tagEOF }
|
|
|
|
// IsBOF returns true if before start.
|
|
func (t *Tag) IsBOF() bool { return t.tagBOF }
|
|
|
|
// KeyLen returns the key length.
|
|
func (t *Tag) KeyLen() int { return t.keyLen }
|
|
|
|
// KeyExpr returns the key expression string stored in the CDX header.
|
|
func (t *Tag) KeyExpr() string { return t.header.KeyExpr }
|
|
|
|
// ForExpr returns the FOR condition expression.
|
|
func (t *Tag) ForExpr() string { return t.header.ForExpr }
|
|
|
|
// IsDescending returns true if the tag sorts in descending order.
|
|
func (t *Tag) IsDescending() bool { return t.header.Descending }
|
|
|
|
// Close is a no-op for tags (the parent Index owns the file).
|
|
func (t *Tag) Close() error { return nil }
|
|
|
|
// --- Helpers ---
|
|
|
|
func trimNull(b []byte) string {
|
|
for i, c := range b {
|
|
if c == 0 {
|
|
return strings.TrimSpace(string(b[:i]))
|
|
}
|
|
}
|
|
return strings.TrimSpace(string(b))
|
|
}
|