// Copyright (c) 2026 Charles KWON OhJun (charleskwonohjun@gmail.com) // All rights reserved. // CDX compound index engine for Five. // Byte-compatible with Harbour/FoxPro CDX files. // CDX uses FPT memo format (not DBT). // // Key differences from NTX: // - 512-byte pages (vs NTX 1024) // - Compound index: multiple tags per file // - Bit-packed leaf keys: recBits/dupBits/trlBits compression // - Linked leaf pages (leftPtr/rightPtr) // // Reference: // /mnt/d/harbour-core/include/hbrddcdx.h // /mnt/d/harbour-core/src/rdd/dbfcdx/dbfcdx1.c // docs/rdd-architecture-spec.md package cdx import ( "bytes" "encoding/binary" "fmt" "os" "sort" "strings" ) // CDX constants — matching Harbour. const ( PageLen = 512 // CDX_PAGELEN (1 << 9) HeaderLen = 1024 // CDX_HEADERLEN MaxKey = 240 // CDX_MAXKEY MaxTagNameLen = 10 // CDX_MAXTAGNAMELEN StackSize = 64 // CDX_STACKSIZE IntHeadSize = 12 // CDX_INT_HEADSIZE ExtHeadSize = 24 // CDX_EXT_HEADSIZE HeaderExpLen = HeaderLen - 512 // Node types NodeBranch = 0 // CDX_NODE_BRANCH NodeRoot = 1 // CDX_NODE_ROOT NodeLeaf = 2 // CDX_NODE_LEAF NodeUnused = 0xFF // Type flags TypeUnique = 0x01 TypePartial = 0x02 TypeCustom = 0x04 TypeForFilter = 0x08 TypeCompact = 0x20 TypeCompound = 0x40 TypeStructure = 0x80 ) // --- Tag Header (512 bytes in file, at start of each tag's header page) --- // TagHeader holds a CDX tag's metadata. // Harbour: CDXTAGHEADER in hbrddcdx.h:188 type TagHeader struct { RootPtr uint32 // root page offset FreePtr uint32 // free page list Counter uint32 // update counter KeySize uint16 // key length (max 240) IndexOpt byte // CDX_TYPE_* flags IndexSig byte // signature HeaderLen uint16 // 0x0400 typically PageLen uint16 // page length KeyExpr string // key expression ForExpr string // FOR filter expression Descending bool IgnoreCase bool } // ReadTagHeader reads a CDX tag header from file at given offset. func ReadTagHeader(f *os.File, offset int64) (*TagHeader, error) { buf := make([]byte, HeaderLen) if _, err := f.ReadAt(buf, offset); err != nil { return nil, fmt.Errorf("read CDX tag header at %d: %w", offset, err) } th := &TagHeader{ RootPtr: binary.LittleEndian.Uint32(buf[0:4]), FreePtr: binary.LittleEndian.Uint32(buf[4:8]), Counter: binary.LittleEndian.Uint32(buf[8:12]), KeySize: binary.LittleEndian.Uint16(buf[12:14]), IndexOpt: buf[14], IndexSig: buf[15], HeaderLen: binary.LittleEndian.Uint16(buf[16:18]), PageLen: binary.LittleEndian.Uint16(buf[18:20]), } th.IgnoreCase = buf[503] != 0 th.Descending = binary.LittleEndian.Uint16(buf[504:506]) != 0 // Key/For expressions — stored directly at offset 512 (0x200) within the header block. // CDX format: key expression at byte 512, for expression follows after null terminator. keyExprStart := 512 th.KeyExpr = trimNull(buf[keyExprStart:]) // FOR expression follows key expression (after null terminator) forStart := keyExprStart + len(th.KeyExpr) + 1 if forStart < len(buf) { th.ForExpr = trimNull(buf[forStart:]) } return th, nil } // --- Leaf page: bit-packed key extraction --- // LeafHeader holds decoded leaf page metadata. // Harbour: CDXEXTNODE in hbrddcdx.h:224 type LeafHeader struct { Attr uint16 NKeys uint16 LeftPtr uint32 RightPtr uint32 FreeSpc uint16 RecMask uint32 DupMask byte TrlMask byte RecBits byte DupBits byte TrlBits byte KeyBytes byte // total bytes per key info entry } // DecodeLeafHeader extracts the 24-byte leaf header from page data. func DecodeLeafHeader(data []byte) LeafHeader { return LeafHeader{ Attr: binary.LittleEndian.Uint16(data[0:2]), NKeys: binary.LittleEndian.Uint16(data[2:4]), LeftPtr: binary.LittleEndian.Uint32(data[4:8]), RightPtr: binary.LittleEndian.Uint32(data[8:12]), FreeSpc: binary.LittleEndian.Uint16(data[12:14]), RecMask: binary.LittleEndian.Uint32(data[14:18]), DupMask: data[18], TrlMask: data[19], RecBits: data[20], DupBits: data[21], TrlBits: data[22], KeyBytes: data[23], } } // DecodedKey holds a single decoded key from a leaf page. type DecodedKey struct { RecNo uint32 Key []byte } // DecodeLeafKeys extracts all keys from a CDX leaf page — convenience // wrapper that allocates fresh buffers. Hot call sites (per-tag seek // loops) should use DecodeLeafKeysInto to recycle storage. func DecodeLeafKeys(data []byte, hdr LeafHeader, keyLen int) []DecodedKey { keys, _ := DecodeLeafKeysInto(data, hdr, keyLen, nil, nil) return keys } // DecodeLeafKeysInto is the allocation-aware variant. `slabReuse` and // `keysReuse` are previous buffers (may be nil on first call, or carry // stale data from a prior decode). Returns fresh `keys` and the backing // `slab` — both valid until the next call that reuses them. Ported // from rddfive/cdx_engine.c cdx_leaf_decode_all() — byte-level decode, // 10× faster than bit-by-bit extractBits loop. // // Reuse contract: caller must not retain pointers into an earlier // slab after passing it here. CDX.Tag's page cache already observes // this invariant because it overwrites cachedLeafKeys on miss. func DecodeLeafKeysInto(data []byte, hdr LeafHeader, keyLen int, slabReuse []byte, keysReuse []DecodedKey) ([]DecodedKey, []byte) { if hdr.NKeys == 0 { return nil, slabReuse } nKeys := int(hdr.NKeys) recBits := int(hdr.RecBits) dupBits := int(hdr.DupBits) trlBits := int(hdr.TrlBits) reqByte := int(hdr.KeyBytes) recMask := uint32((1 << uint(recBits)) - 1) dcMask := uint32((1 << uint(dupBits)) - 1) tcMask := uint32((1 << uint(trlBits)) - 1) // Reuse or grow the DecodedKey slice. var keys []DecodedKey if cap(keysReuse) >= nKeys { keys = keysReuse[:nKeys] } else { keys = make([]DecodedKey, nKeys) } // Reuse or grow the byte slab (one alloc replaces 30+ per page). needBytes := nKeys*keyLen + keyLen // +keyLen for prevKey var slab []byte if cap(slabReuse) >= needBytes { slab = slabReuse[:needBytes] } else { slab = make([]byte, needBytes) } prevKey := slab[nKeys*keyLen:] for j := range prevKey { prevKey[j] = ' ' } totalKeyBytes := 0 for i := 0; i < nKeys; i++ { // Read reqByte bytes as little-endian integer (C: val = src[j] << 8 | ...) src := data[ExtHeadSize+i*reqByte:] var val uint64 for j := reqByte - 1; j >= 0; j-- { val <<= 8 val |= uint64(src[j]) } recNo := uint32(val) & recMask val >>= uint(recBits) dup := int(uint32(val) & dcMask) val >>= uint(dupBits) trl := int(uint32(val) & tcMask) newBytes := keyLen - dup - trl key := slab[i*keyLen : (i+1)*keyLen] if dup > 0 { copy(key[:dup], prevKey[:dup]) } if newBytes > 0 { kp := PageLen - totalKeyBytes - newBytes if kp >= ExtHeadSize && kp+newBytes <= PageLen { copy(key[dup:dup+newBytes], data[kp:kp+newBytes]) } totalKeyBytes += newBytes } if trl > 0 { for j := keyLen - trl; j < keyLen; j++ { key[j] = ' ' } } keys[i] = DecodedKey{RecNo: recNo, Key: key} copy(prevKey, key) } return keys, slab } // extractBits extracts n bits from a byte array starting at bit offset. func extractBits(data []byte, bitOffset, nBits uint) uint32 { if nBits == 0 { return 0 } var result uint32 for i := uint(0); i < nBits; i++ { bytePos := (bitOffset + i) / 8 bitPos := (bitOffset + i) % 8 if int(bytePos) < len(data) { if data[bytePos]&(1< PageLen { break } e := IntKeyEntry{ Key: make([]byte, keyLen), } if i < nKeys { copy(e.Key, data[off:off+keyLen]) e.RecNo = binary.BigEndian.Uint32(data[off+keyLen : off+keyLen+4]) e.ChildPage = binary.BigEndian.Uint32(data[off+keyLen+4 : off+keyLen+8]) } else { // Trailing child — no key/recNo, just child pointer // In CDX, the rightmost child is the child of the last separator entry // Actually, for the last entry we only need the child pointer // The previous entry's child was the LEFT child; we need RIGHT child // stored at the position after last key entry copy(e.Key, data[off:off+keyLen]) e.RecNo = binary.BigEndian.Uint32(data[off+keyLen : off+keyLen+4]) e.ChildPage = binary.BigEndian.Uint32(data[off+keyLen+4 : off+keyLen+8]) } entries[i] = e off += entrySize } return entries } // --- CDX Index (compound, multi-tag) --- // Index represents an open CDX index file. type Index struct { file *os.File tags []*Tag mmapData []byte // mmap'd file for zero-copy reads } // readAt reads len(buf) bytes at offset — from mmap or file fallback. // Prefer pageSlice() on hot paths; this entry point stays for callers // that need a writable, caller-owned copy. func (idx *Index) readAt(buf []byte, offset int64) error { if idx.mmapData != nil && offset >= 0 && int(offset)+len(buf) <= len(idx.mmapData) { copy(buf, idx.mmapData[offset:offset+int64(len(buf))]) return nil } _, err := idx.file.ReadAt(buf, offset) return err } // pageSlice returns a read-only view of one B-tree page at offset — a // direct slice of mmap when possible (zero copy), else a read into the // caller's fallback buffer. Returns nil on read error. The returned // slice is valid until the index is remapped, closed, or until the // next fallbackBuf reuse — callers must not retain it across those // events. The seek loop uses this: same Tag.seekBuf gets handed back // for every internal-node visit, so the non-mmap path allocates once, // and the mmap path allocates nothing. func (idx *Index) pageSlice(offset int64, fallbackBuf []byte) []byte { if idx.mmapData != nil && offset >= 0 && int(offset)+PageLen <= len(idx.mmapData) { return idx.mmapData[offset : offset+PageLen] } if len(fallbackBuf) < PageLen { fallbackBuf = make([]byte, PageLen) } if _, err := idx.file.ReadAt(fallbackBuf[:PageLen], offset); err != nil { return nil } return fallbackBuf[:PageLen] } // Tag represents one index tag within a CDX file. type Tag struct { Name string // tag name (e.g., "BYNAME") index *Index header TagHeader headerOff int64 // file offset of this tag's header keyLen int // Current position stack [StackSize]StackEntry stackLevel int curRecNo uint32 curKey []byte tagBOF bool tagEOF bool // Leaf page decode cache — avoids re-decoding same page on SkipNext/SkipPrev cachedLeafOff int64 cachedLeafKeys []DecodedKey // Reusable backing storage for the key slab and DecodedKey slice. // cachedLeafKeys[i].Key aliases slices of cachedLeafSlab, so the // slab stays alive as long as cachedLeafKeys is in use. On cache // miss we hand both buffers back to DecodeLeafKeysInto which // reuses them if big enough — saving one alloc per leaf decode. cachedLeafSlab []byte // seekBuf is handed to Index.pageSlice as the fallback when mmap // isn't available (Windows, or file grown past mapped size). The // mmap path ignores it and returns a slice directly into mapped // memory — zero copy. Either way, allocating a single 512-byte // buffer per Tag (not per Seek) eliminates the per-seek alloc. seekBuf []byte } type StackEntry struct { PageOffset int64 KeyIndex int } // OpenIndex opens a CDX file and reads all tags. func OpenIndex(path string) (*Index, error) { if !strings.HasSuffix(strings.ToLower(path), ".cdx") { path += ".cdx" } f, err := os.OpenFile(path, os.O_RDWR, 0) if err != nil { return nil, err } idx := &Index{file: f} // mmap for zero-copy reads (platform-specific, no-op on Windows) if fi, err2 := f.Stat(); err2 == nil && fi.Size() > 0 { if data, err2 := mmapFile(f, int(fi.Size())); err2 == nil { idx.mmapData = data } } // Read compound header (structural root at offset 0) rootHdr, err := ReadTagHeader(f, 0) if err != nil { f.Close() return nil, err } // Parse compound tag directory from the structural root's B-tree // The structural index keys are 10-byte tag names, and each leaf entry // points to the tag header at a specific file offset. tagEntries := readCompoundTagList(idx, rootHdr) // Harbour orders tags by file offset (TagBlock) ascending, which // corresponds to creation order. The compound B-tree stores entries // alphabetically, so we must re-sort by offset to match Harbour. // See: hb_cdxIndexLoadAvailTags in dbfcdx1.c sort.Slice(tagEntries, func(i, j int) bool { return tagEntries[i].offset < tagEntries[j].offset }) for _, entry := range tagEntries { tagHdr, err := ReadTagHeader(f, entry.offset) if err != nil { continue } tag := &Tag{ Name: entry.name, index: idx, header: *tagHdr, headerOff: entry.offset, keyLen: int(tagHdr.KeySize), curKey: make([]byte, tagHdr.KeySize), } idx.tags = append(idx.tags, tag) } // If no tags found via compound directory, fall back to root as single tag if len(idx.tags) == 0 { tag := &Tag{ Name: "TAG1", index: idx, header: *rootHdr, headerOff: 0, keyLen: int(rootHdr.KeySize), curKey: make([]byte, rootHdr.KeySize), } idx.tags = append(idx.tags, tag) } return idx, nil } // Close closes the CDX file. func (idx *Index) Close() error { if idx.mmapData != nil { munmapFile(idx.mmapData) idx.mmapData = nil } return idx.file.Close() } // TagCount returns the number of tags. func (idx *Index) TagCount() int { return len(idx.tags) } // GetTag returns a tag by index. func (idx *Index) GetTag(i int) *Tag { if i >= 0 && i < len(idx.tags) { return idx.tags[i] } return nil } // Tags returns all tags in the CDX. func (idx *Index) Tags() []*Tag { return idx.tags } // FindTag returns a tag by name. func (idx *Index) FindTag(name string) *Tag { upper := strings.ToUpper(name) for _, t := range idx.tags { if strings.ToUpper(t.Name) == upper { return t } // Also try key expression match if strings.ToUpper(t.header.KeyExpr) == upper { return t } } return nil } // tagDirEntry is a compound tag directory entry. type tagDirEntry struct { name string offset int64 } // readCompoundTagList reads tag names and offsets from the structural root. // CDX compound header: root page is a B-tree of tag entries. // Each leaf key = 10-byte tag name, record number = page offset / 512. func readCompoundTagList(idx *Index, rootHdr *TagHeader) []tagDirEntry { var entries []tagDirEntry if rootHdr.RootPtr == 0 { return entries } // Read the root page of the structural index pageData := make([]byte, 512) err := idx.readAt(pageData, int64(rootHdr.RootPtr)) if err != nil { return entries } // CDX page header: [attr:2][nKeys:2][leftPtr:4][rightPtr:4] nKeys := int(binary.LittleEndian.Uint16(pageData[2:4])) attr := binary.LittleEndian.Uint16(pageData[0:2]) isLeaf := (attr & 0x02) != 0 if isLeaf { entries = decodeCompoundLeaf(pageData, nKeys) } // If compound leaf decoding didn't find entries, scan for tag headers if len(entries) == 0 { entries = scanCompoundLeaves(idx, rootHdr) } return entries } // scanCompoundLeaves scans the CDX file for tag headers. // CDX tag headers are at 0x400 (1024) byte boundaries. // Each tag header is followed by a page with the key expression string. func scanCompoundLeaves(idx *Index, rootHdr *TagHeader) []tagDirEntry { var entries []tagDirEntry fileInfo, err := idx.file.Stat() if err != nil { return entries } fileSize := fileInfo.Size() // Scan at 0x400 intervals; tag headers have: // - RootPtr (uint32 at offset 0) pointing to a valid page // - KeySize (uint16 at offset 12) between 1..240 // - Key expression string at +0x200 (offset 0x106 from header start) // Skip offset 0 (compound root) and scan the rest // Skip compound header at 0x0000; scan from 0x0400 onwards // Tag headers are at 0x400 boundaries but NOT the compound root itself for off := int64(0x400); off < fileSize; off += 0x200 { buf := make([]byte, 0x400) err := idx.readAt(buf, off) if err != nil { continue } rootPtr := binary.LittleEndian.Uint32(buf[0:4]) keySize := binary.LittleEndian.Uint16(buf[12:14]) if keySize == 0 || keySize > 240 || rootPtr == 0 { continue } // Validate rootPtr is within file and at a valid page boundary if int64(rootPtr) >= fileSize || rootPtr%512 != 0 { continue } // Read key expression from offset 0x106 within the header keyExpr := "" for i := 0x106; i < 0x206 && i < len(buf) && buf[i] != 0; i++ { keyExpr += string(buf[i]) } if keyExpr == "" { // Key expression might be in the next page (+0x200 from header) exprBuf := make([]byte, 256) idx.readAt(exprBuf, off+0x200) for i := 0; i < len(exprBuf) && exprBuf[i] != 0; i++ { keyExpr += string(exprBuf[i]) } } if keyExpr == "" { continue } name := strings.ToUpper(strings.TrimSpace(keyExpr)) // Use "BY" + field name convention, or just the expression entries = append(entries, tagDirEntry{name: name, offset: off}) } return entries } // decodeCompoundLeaf decodes tag entries from a compound leaf page. // Compound index uses the same bit-packed format as data leaves, // with keyLen=10 (tag name) and recNo = page offset / PageLen. func decodeCompoundLeaf(data []byte, nKeys int) []tagDirEntry { if nKeys <= 0 || len(data) < ExtHeadSize { return nil } // Use the standard leaf key decoder with keyLen=10 (compound tag name size) hdr := DecodeLeafHeader(data) keys := DecodeLeafKeys(data, hdr, 10) var entries []tagDirEntry for _, dk := range keys { name := trimNull(dk.Key) name = strings.TrimSpace(name) if name == "" { continue } // RecNo in compound index = direct byte offset to tag header entries = append(entries, tagDirEntry{name: name, offset: int64(dk.RecNo)}) } return entries } // getLeafKeys returns decoded leaf keys with caching. On cache miss the // previous slab + key slice are recycled into DecodeLeafKeysInto so we // avoid a fresh alloc for every leaf traversed during a seek-heavy // workload (which is the whole point of caching them per-Tag). func (t *Tag) getLeafKeys(pageOffset int64) ([]DecodedKey, error) { if pageOffset == t.cachedLeafOff && t.cachedLeafKeys != nil { return t.cachedLeafKeys, nil } buf := make([]byte, PageLen) if err := t.index.readAt(buf, pageOffset); err != nil { return nil, err } hdr := DecodeLeafHeader(buf) keys, slab := DecodeLeafKeysInto(buf, hdr, t.keyLen, t.cachedLeafSlab, t.cachedLeafKeys) t.cachedLeafOff = pageOffset t.cachedLeafKeys = keys t.cachedLeafSlab = slab return keys, nil } // --- Tag navigation --- // Seek searches for a key in the CDX tag's B-tree. // Seek searches the B-tree iteratively (no recursion, single buffer reuse). func (t *Tag) Seek(searchKey []byte) (uint32, bool) { t.stackLevel = 0 t.tagBOF = false t.tagEOF = false pageOffset := int64(t.header.RootPtr) // Reuse seekBuf across seeks so the non-mmap fallback path only // allocates once per Tag lifetime. With mmap, pageSlice returns a // view directly into mapped memory and seekBuf stays unused. if cap(t.seekBuf) < PageLen { t.seekBuf = make([]byte, PageLen) } entrySize := t.keyLen + 8 searchLen := len(searchKey) for { buf := t.index.pageSlice(pageOffset, t.seekBuf) if buf == nil { t.tagEOF = true return 0, false } attr := binary.LittleEndian.Uint16(buf[0:2]) if (attr & NodeLeaf) != 0 { // === LEAF NODE === var keys []DecodedKey if pageOffset == t.cachedLeafOff && t.cachedLeafKeys != nil { keys = t.cachedLeafKeys } else { hdr := DecodeLeafHeader(buf) var slab []byte keys, slab = DecodeLeafKeysInto(buf, hdr, t.keyLen, t.cachedLeafSlab, t.cachedLeafKeys) t.cachedLeafOff = pageOffset t.cachedLeafKeys = keys t.cachedLeafSlab = slab } // Binary search — leftmost match lo, hi := 0, len(keys)-1 foundIdx := -1 for lo <= hi { mid := (lo + hi) / 2 cmp := bytes.Compare(searchKey, keys[mid].Key[:searchLen]) if cmp == 0 { foundIdx = mid hi = mid - 1 } else if cmp < 0 { hi = mid - 1 } else { lo = mid + 1 } } if foundIdx >= 0 { t.curRecNo = keys[foundIdx].RecNo copy(t.curKey, keys[foundIdx].Key) if t.stackLevel < StackSize { t.stack[t.stackLevel] = StackEntry{PageOffset: pageOffset, KeyIndex: foundIdx} t.stackLevel++ } return keys[foundIdx].RecNo, true } if lo < len(keys) { t.curRecNo = keys[lo].RecNo copy(t.curKey, keys[lo].Key) if t.stackLevel < StackSize { t.stack[t.stackLevel] = StackEntry{PageOffset: pageOffset, KeyIndex: lo} t.stackLevel++ } return keys[lo].RecNo, false } // Past all keys: follow rightPtr hdr := DecodeLeafHeader(buf) if hdr.RightPtr != 0 && hdr.RightPtr != 0xFFFFFFFF { pageOffset = int64(hdr.RightPtr) continue // iterate instead of recurse } t.tagEOF = true t.curRecNo = 0 return 0, false } // === INTERNAL NODE (inline binary search, zero alloc) === nKeys := int(binary.LittleEndian.Uint16(buf[2:4])) if t.stackLevel < StackSize { t.stack[t.stackLevel] = StackEntry{PageOffset: pageOffset, KeyIndex: 0} t.stackLevel++ } // Binary search on internal keys found := false lo, hi := 0, nKeys-1 for lo <= hi { mid := (lo + hi) / 2 off := IntHeadSize + mid*entrySize cmp := bytes.Compare(searchKey, buf[off:off+t.keyLen]) if cmp <= 0 { hi = mid - 1 } else { lo = mid + 1 } } // lo = insertion point; follow child at lo if lo < nKeys { off := IntHeadSize + lo*entrySize childPage := binary.BigEndian.Uint32(buf[off+t.keyLen+4 : off+t.keyLen+8]) t.stack[t.stackLevel-1].KeyIndex = lo if childPage != 0 { pageOffset = int64(childPage) found = true } } if !found { // Follow trailing child trailOff := IntHeadSize + nKeys*entrySize t.stack[t.stackLevel-1].KeyIndex = nKeys if trailOff+t.keyLen+8 <= PageLen { trailChild := binary.BigEndian.Uint32(buf[trailOff+t.keyLen+4 : trailOff+t.keyLen+8]) if trailChild != 0 { pageOffset = int64(trailChild) found = true } } } if !found { t.tagEOF = true return 0, false } // continue loop with new pageOffset } } // GoTop positions at the first key. func (t *Tag) GoTop() bool { t.stackLevel = 0 t.tagBOF = false t.tagEOF = false return t.goLeftmost(int64(t.header.RootPtr)) } func (t *Tag) goLeftmost(pageOffset int64) bool { buf := make([]byte, PageLen) if err := t.index.readAt(buf, pageOffset); err != nil { return false } attr := binary.LittleEndian.Uint16(buf[0:2]) isLeaf := (attr & NodeLeaf) != 0 if isLeaf { hdr := DecodeLeafHeader(buf) keys, slab := DecodeLeafKeysInto(buf, hdr, t.keyLen, t.cachedLeafSlab, t.cachedLeafKeys) t.cachedLeafOff = pageOffset t.cachedLeafKeys = keys t.cachedLeafSlab = slab if len(keys) > 0 { t.curRecNo = keys[0].RecNo copy(t.curKey, keys[0].Key) if t.stackLevel < StackSize { t.stack[t.stackLevel] = StackEntry{PageOffset: pageOffset, KeyIndex: 0} t.stackLevel++ } return true } return false } // Internal: follow first child (zero allocation — read directly from buf) nKeys := int(binary.LittleEndian.Uint16(buf[2:4])) if nKeys > 0 { entrySize := t.keyLen + 8 child0 := binary.BigEndian.Uint32(buf[IntHeadSize+t.keyLen+4 : IntHeadSize+entrySize]) if t.stackLevel < StackSize { t.stack[t.stackLevel] = StackEntry{PageOffset: pageOffset, KeyIndex: 0} t.stackLevel++ } return t.goLeftmost(int64(child0)) } return false } // GoBottom positions at the last key. func (t *Tag) GoBottom() bool { t.stackLevel = 0 t.tagBOF = false t.tagEOF = false return t.goRightmost(int64(t.header.RootPtr)) } func (t *Tag) goRightmost(pageOffset int64) bool { buf := make([]byte, PageLen) if err := t.index.readAt(buf, pageOffset); err != nil { return false } attr := binary.LittleEndian.Uint16(buf[0:2]) isLeaf := (attr & NodeLeaf) != 0 if isLeaf { hdr := DecodeLeafHeader(buf) keys, slab := DecodeLeafKeysInto(buf, hdr, t.keyLen, t.cachedLeafSlab, t.cachedLeafKeys) t.cachedLeafOff = pageOffset t.cachedLeafKeys = keys t.cachedLeafSlab = slab if len(keys) > 0 { last := len(keys) - 1 t.curRecNo = keys[last].RecNo copy(t.curKey, keys[last].Key) if t.stackLevel < StackSize { t.stack[t.stackLevel] = StackEntry{PageOffset: pageOffset, KeyIndex: last} t.stackLevel++ } return true } return false } // Internal: follow last child node := DecodeIntNode(buf) intKeys := DecodeIntKeys(buf, int(node.NKeys), t.keyLen) lastIdx := int(node.NKeys) if t.stackLevel < StackSize { t.stack[t.stackLevel] = StackEntry{PageOffset: pageOffset, KeyIndex: lastIdx} t.stackLevel++ } return t.goRightmost(int64(intKeys[lastIdx].ChildPage)) } // SkipNext moves to the next key in leaf using rightPtr linked list. // CDX leaf pages are doubly linked — simpler than NTX stack traversal. // SkipNext moves to the next key. Uses cached leaf decode. func (t *Tag) SkipNext() bool { if t.stackLevel == 0 { t.tagEOF = true return false } level := t.stackLevel - 1 pageOffset := t.stack[level].PageOffset keyIdx := t.stack[level].KeyIndex keys, err := t.getLeafKeys(pageOffset) if err != nil { t.tagEOF = true return false } // Next key in same page? (cache hit — no decode) if keyIdx+1 < len(keys) { t.stack[level].KeyIndex = keyIdx + 1 t.curRecNo = keys[keyIdx+1].RecNo copy(t.curKey, keys[keyIdx+1].Key) return true } // Follow rightPtr to next leaf (CDX linked list) buf := make([]byte, PageLen) if err := t.index.readAt(buf, pageOffset); err != nil { t.tagEOF = true return false } hdr := DecodeLeafHeader(buf) if hdr.RightPtr != 0 && hdr.RightPtr != 0xFFFFFFFF { nextOff := int64(hdr.RightPtr) keys2, err := t.getLeafKeys(nextOff) if err == nil && len(keys2) > 0 { t.stack[level] = StackEntry{PageOffset: nextOff, KeyIndex: 0} t.curRecNo = keys2[0].RecNo copy(t.curKey, keys2[0].Key) return true } } t.tagEOF = true return false } // SkipPrev moves to the previous key. Uses cached leaf decode. func (t *Tag) SkipPrev() bool { if t.stackLevel == 0 { t.tagBOF = true return false } level := t.stackLevel - 1 pageOffset := t.stack[level].PageOffset keyIdx := t.stack[level].KeyIndex // Previous key in same page? (cache hit) if keyIdx > 0 { keys, err := t.getLeafKeys(pageOffset) if err == nil { t.stack[level].KeyIndex = keyIdx - 1 t.curRecNo = keys[keyIdx-1].RecNo copy(t.curKey, keys[keyIdx-1].Key) return true } } // Follow leftPtr buf := make([]byte, PageLen) if err := t.index.readAt(buf, pageOffset); err != nil { t.tagBOF = true return false } hdr := DecodeLeafHeader(buf) if hdr.LeftPtr != 0 && hdr.LeftPtr != 0xFFFFFFFF { prevOff := int64(hdr.LeftPtr) keys2, err := t.getLeafKeys(prevOff) if err == nil && len(keys2) > 0 { last := len(keys2) - 1 t.stack[level] = StackEntry{PageOffset: prevOff, KeyIndex: last} t.curRecNo = keys2[last].RecNo copy(t.curKey, keys2[last].Key) return true } } t.tagBOF = true return false } // CurRecNo returns the current record number. func (t *Tag) CurRecNo() uint32 { return t.curRecNo } // CurKey returns the current key. func (t *Tag) CurKey() []byte { return t.curKey[:t.keyLen] } // IsEOF returns true if past end. func (t *Tag) IsEOF() bool { return t.tagEOF } // IsBOF returns true if before start. func (t *Tag) IsBOF() bool { return t.tagBOF } // KeyLen returns the key length. func (t *Tag) KeyLen() int { return t.keyLen } // KeyExpr returns the key expression string stored in the CDX header. func (t *Tag) KeyExpr() string { return t.header.KeyExpr } // ForExpr returns the FOR condition expression. func (t *Tag) ForExpr() string { return t.header.ForExpr } // IsDescending returns true if the tag sorts in descending order. func (t *Tag) IsDescending() bool { return t.header.Descending } // HeaderOffset returns the file offset of this tag's header block. func (t *Tag) HeaderOffset() int64 { return t.headerOff } // RootPtr returns the root page offset for this tag's B-tree. func (t *Tag) RootPtr() uint32 { return t.header.RootPtr } // Close is a no-op for tags (the parent Index owns the file). func (t *Tag) Close() error { return nil } // --- Helpers --- func trimNull(b []byte) string { for i, c := range b { if c == 0 { return strings.TrimSpace(string(b[:i])) } } return strings.TrimSpace(string(b)) }