All 3 remaining known constraints resolved. CLAUDE.md now shows zero.
1. CDX compound index WRITE support (was read-only)
New file: hbrdd/cdx/build.go (~400 LOC)
- CreateOrAddTag() builds Harbour-compatible CDX files
- Bit-packed leaf pages (RecBits/DupBits/TrlBits compression)
- Interior nodes with big-endian RecNo/ChildPage
- Compound root directory (structural B-tree of tag names)
- Append-safe: preserves existing tags when adding new ones
- Linked leaf pages (LeftPtr/RightPtr for sequential scan)
Pipeline: INDEX ON expr TAG tagname TO file
- ast.IndexCmd gains TagName field
- Parser captures TAG name (was discarded)
- gengo passes TagName to OrderCreateParams
- indexer.go routes to cdx.CreateOrAddTag when TAG specified
Verified: 3 tags (BYNAME/BYCITY/BYAGE), OrdSetFocus by name,
SEEK, GoTop/GoBottom, close+reopen with SET INDEX TO
2. {||} empty code block parsing in function arguments
Parser's parseArrayOrBlock() called parseExpr() unconditionally
after closing |, failing when body was empty ({||}).
Fix: check for RBRACE after closing | and emit NIL literal body.
{=>} empty hash already worked.
3. Semicolon IF...ENDIF — already worked (removed from constraints)
Tests:
go test ./... 14 packages ALL PASS
FiveSql2 43/43 100%
compat_harbour 51/51
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
959 lines
25 KiB
Go
959 lines
25 KiB
Go
// Copyright (c) 2026 Charles KWON OhJun (charleskwonohjun@gmail.com)
|
|
// All rights reserved.
|
|
|
|
// CDX compound index engine for Five.
|
|
// Byte-compatible with Harbour/FoxPro CDX files.
|
|
// CDX uses FPT memo format (not DBT).
|
|
//
|
|
// Key differences from NTX:
|
|
// - 512-byte pages (vs NTX 1024)
|
|
// - Compound index: multiple tags per file
|
|
// - Bit-packed leaf keys: recBits/dupBits/trlBits compression
|
|
// - Linked leaf pages (leftPtr/rightPtr)
|
|
//
|
|
// Reference:
|
|
// /mnt/d/harbour-core/include/hbrddcdx.h
|
|
// /mnt/d/harbour-core/src/rdd/dbfcdx/dbfcdx1.c
|
|
// docs/rdd-architecture-spec.md
|
|
package cdx
|
|
|
|
import (
|
|
"bytes"
|
|
"encoding/binary"
|
|
"fmt"
|
|
"os"
|
|
"sort"
|
|
"strings"
|
|
)
|
|
|
|
// CDX constants — matching Harbour.
|
|
const (
|
|
PageLen = 512 // CDX_PAGELEN (1 << 9)
|
|
HeaderLen = 1024 // CDX_HEADERLEN
|
|
MaxKey = 240 // CDX_MAXKEY
|
|
MaxTagNameLen = 10 // CDX_MAXTAGNAMELEN
|
|
StackSize = 64 // CDX_STACKSIZE
|
|
IntHeadSize = 12 // CDX_INT_HEADSIZE
|
|
ExtHeadSize = 24 // CDX_EXT_HEADSIZE
|
|
HeaderExpLen = HeaderLen - 512
|
|
|
|
// Node types
|
|
NodeBranch = 0 // CDX_NODE_BRANCH
|
|
NodeRoot = 1 // CDX_NODE_ROOT
|
|
NodeLeaf = 2 // CDX_NODE_LEAF
|
|
NodeUnused = 0xFF
|
|
|
|
// Type flags
|
|
TypeUnique = 0x01
|
|
TypePartial = 0x02
|
|
TypeCustom = 0x04
|
|
TypeForFilter = 0x08
|
|
TypeCompact = 0x20
|
|
TypeCompound = 0x40
|
|
TypeStructure = 0x80
|
|
)
|
|
|
|
// --- Tag Header (512 bytes in file, at start of each tag's header page) ---
|
|
|
|
// TagHeader holds a CDX tag's metadata.
|
|
// Harbour: CDXTAGHEADER in hbrddcdx.h:188
|
|
type TagHeader struct {
|
|
RootPtr uint32 // root page offset
|
|
FreePtr uint32 // free page list
|
|
Counter uint32 // update counter
|
|
KeySize uint16 // key length (max 240)
|
|
IndexOpt byte // CDX_TYPE_* flags
|
|
IndexSig byte // signature
|
|
HeaderLen uint16 // 0x0400 typically
|
|
PageLen uint16 // page length
|
|
KeyExpr string // key expression
|
|
ForExpr string // FOR filter expression
|
|
Descending bool
|
|
IgnoreCase bool
|
|
}
|
|
|
|
// ReadTagHeader reads a CDX tag header from file at given offset.
|
|
func ReadTagHeader(f *os.File, offset int64) (*TagHeader, error) {
|
|
buf := make([]byte, HeaderLen)
|
|
if _, err := f.ReadAt(buf, offset); err != nil {
|
|
return nil, fmt.Errorf("read CDX tag header at %d: %w", offset, err)
|
|
}
|
|
|
|
th := &TagHeader{
|
|
RootPtr: binary.LittleEndian.Uint32(buf[0:4]),
|
|
FreePtr: binary.LittleEndian.Uint32(buf[4:8]),
|
|
Counter: binary.LittleEndian.Uint32(buf[8:12]),
|
|
KeySize: binary.LittleEndian.Uint16(buf[12:14]),
|
|
IndexOpt: buf[14],
|
|
IndexSig: buf[15],
|
|
HeaderLen: binary.LittleEndian.Uint16(buf[16:18]),
|
|
PageLen: binary.LittleEndian.Uint16(buf[18:20]),
|
|
}
|
|
|
|
th.IgnoreCase = buf[503] != 0
|
|
th.Descending = binary.LittleEndian.Uint16(buf[504:506]) != 0
|
|
|
|
// Key/For expressions — stored directly at offset 512 (0x200) within the header block.
|
|
// CDX format: key expression at byte 512, for expression follows after null terminator.
|
|
keyExprStart := 512
|
|
th.KeyExpr = trimNull(buf[keyExprStart:])
|
|
|
|
// FOR expression follows key expression (after null terminator)
|
|
forStart := keyExprStart + len(th.KeyExpr) + 1
|
|
if forStart < len(buf) {
|
|
th.ForExpr = trimNull(buf[forStart:])
|
|
}
|
|
|
|
return th, nil
|
|
}
|
|
|
|
// --- Leaf page: bit-packed key extraction ---
|
|
|
|
// LeafHeader holds decoded leaf page metadata.
|
|
// Harbour: CDXEXTNODE in hbrddcdx.h:224
|
|
type LeafHeader struct {
|
|
Attr uint16
|
|
NKeys uint16
|
|
LeftPtr uint32
|
|
RightPtr uint32
|
|
FreeSpc uint16
|
|
RecMask uint32
|
|
DupMask byte
|
|
TrlMask byte
|
|
RecBits byte
|
|
DupBits byte
|
|
TrlBits byte
|
|
KeyBytes byte // total bytes per key info entry
|
|
}
|
|
|
|
// DecodeLeafHeader extracts the 24-byte leaf header from page data.
|
|
func DecodeLeafHeader(data []byte) LeafHeader {
|
|
return LeafHeader{
|
|
Attr: binary.LittleEndian.Uint16(data[0:2]),
|
|
NKeys: binary.LittleEndian.Uint16(data[2:4]),
|
|
LeftPtr: binary.LittleEndian.Uint32(data[4:8]),
|
|
RightPtr: binary.LittleEndian.Uint32(data[8:12]),
|
|
FreeSpc: binary.LittleEndian.Uint16(data[12:14]),
|
|
RecMask: binary.LittleEndian.Uint32(data[14:18]),
|
|
DupMask: data[18],
|
|
TrlMask: data[19],
|
|
RecBits: data[20],
|
|
DupBits: data[21],
|
|
TrlBits: data[22],
|
|
KeyBytes: data[23],
|
|
}
|
|
}
|
|
|
|
// DecodedKey holds a single decoded key from a leaf page.
|
|
type DecodedKey struct {
|
|
RecNo uint32
|
|
Key []byte
|
|
}
|
|
|
|
// DecodeLeafKeys extracts all keys from a CDX leaf page.
|
|
// Ported from rddfive/cdx_engine.c cdx_leaf_decode_all() — byte-level decode.
|
|
// 10x+ faster than bit-by-bit extractBits loop.
|
|
func DecodeLeafKeys(data []byte, hdr LeafHeader, keyLen int) []DecodedKey {
|
|
if hdr.NKeys == 0 {
|
|
return nil
|
|
}
|
|
|
|
nKeys := int(hdr.NKeys)
|
|
recBits := int(hdr.RecBits)
|
|
dupBits := int(hdr.DupBits)
|
|
trlBits := int(hdr.TrlBits)
|
|
reqByte := int(hdr.KeyBytes)
|
|
recMask := uint32((1 << uint(recBits)) - 1)
|
|
dcMask := uint32((1 << uint(dupBits)) - 1)
|
|
tcMask := uint32((1 << uint(trlBits)) - 1)
|
|
|
|
// Slab allocation: one alloc for all keys (avoids 30+ allocations per page)
|
|
keys := make([]DecodedKey, nKeys)
|
|
slab := make([]byte, nKeys*keyLen+keyLen) // +keyLen for prevKey
|
|
prevKey := slab[nKeys*keyLen:]
|
|
for j := range prevKey {
|
|
prevKey[j] = ' '
|
|
}
|
|
totalKeyBytes := 0
|
|
|
|
for i := 0; i < nKeys; i++ {
|
|
// Read reqByte bytes as little-endian integer (C: val = src[j] << 8 | ...)
|
|
src := data[ExtHeadSize+i*reqByte:]
|
|
var val uint64
|
|
for j := reqByte - 1; j >= 0; j-- {
|
|
val <<= 8
|
|
val |= uint64(src[j])
|
|
}
|
|
|
|
recNo := uint32(val) & recMask
|
|
val >>= uint(recBits)
|
|
dup := int(uint32(val) & dcMask)
|
|
val >>= uint(dupBits)
|
|
trl := int(uint32(val) & tcMask)
|
|
|
|
newBytes := keyLen - dup - trl
|
|
|
|
key := slab[i*keyLen : (i+1)*keyLen]
|
|
if dup > 0 {
|
|
copy(key[:dup], prevKey[:dup])
|
|
}
|
|
if newBytes > 0 {
|
|
kp := PageLen - totalKeyBytes - newBytes
|
|
if kp >= ExtHeadSize && kp+newBytes <= PageLen {
|
|
copy(key[dup:dup+newBytes], data[kp:kp+newBytes])
|
|
}
|
|
totalKeyBytes += newBytes
|
|
}
|
|
if trl > 0 {
|
|
for j := keyLen - trl; j < keyLen; j++ {
|
|
key[j] = ' '
|
|
}
|
|
}
|
|
|
|
keys[i] = DecodedKey{RecNo: recNo, Key: key}
|
|
copy(prevKey, key)
|
|
}
|
|
|
|
return keys
|
|
}
|
|
|
|
// extractBits extracts n bits from a byte array starting at bit offset.
|
|
func extractBits(data []byte, bitOffset, nBits uint) uint32 {
|
|
if nBits == 0 {
|
|
return 0
|
|
}
|
|
var result uint32
|
|
for i := uint(0); i < nBits; i++ {
|
|
bytePos := (bitOffset + i) / 8
|
|
bitPos := (bitOffset + i) % 8
|
|
if int(bytePos) < len(data) {
|
|
if data[bytePos]&(1<<bitPos) != 0 {
|
|
result |= 1 << i
|
|
}
|
|
}
|
|
}
|
|
return result
|
|
}
|
|
|
|
// --- Interior node ---
|
|
|
|
// IntNode holds decoded internal node data.
|
|
type IntNode struct {
|
|
Attr uint16
|
|
NKeys uint16
|
|
LeftPtr uint32
|
|
RightPtr uint32
|
|
}
|
|
|
|
func DecodeIntNode(data []byte) IntNode {
|
|
return IntNode{
|
|
Attr: binary.LittleEndian.Uint16(data[0:2]),
|
|
NKeys: binary.LittleEndian.Uint16(data[2:4]),
|
|
LeftPtr: binary.LittleEndian.Uint32(data[4:8]),
|
|
RightPtr: binary.LittleEndian.Uint32(data[8:12]),
|
|
}
|
|
}
|
|
|
|
// IntKeyEntry is one entry in an internal node.
|
|
// Each entry = child page (4 LE) + recNo (4 LE) + key (keyLen bytes)
|
|
type IntKeyEntry struct {
|
|
ChildPage uint32
|
|
RecNo uint32
|
|
Key []byte
|
|
}
|
|
|
|
// DecodeIntKeys extracts keys from a CDX internal node.
|
|
// CDX internal format: [key: keyLen bytes][recNo: 4 BE][childPage: 4 BE]
|
|
// This is different from NTX which uses [child LE][recNo LE][key].
|
|
func DecodeIntKeys(data []byte, nKeys int, keyLen int) []IntKeyEntry {
|
|
entries := make([]IntKeyEntry, nKeys+1) // nKeys separators + 1 trailing child
|
|
entrySize := keyLen + 8 // key + recNo(4BE) + child(4BE)
|
|
off := IntHeadSize
|
|
|
|
for i := 0; i <= nKeys; i++ {
|
|
if off+entrySize > PageLen {
|
|
break
|
|
}
|
|
e := IntKeyEntry{
|
|
Key: make([]byte, keyLen),
|
|
}
|
|
if i < nKeys {
|
|
copy(e.Key, data[off:off+keyLen])
|
|
e.RecNo = binary.BigEndian.Uint32(data[off+keyLen : off+keyLen+4])
|
|
e.ChildPage = binary.BigEndian.Uint32(data[off+keyLen+4 : off+keyLen+8])
|
|
} else {
|
|
// Trailing child — no key/recNo, just child pointer
|
|
// In CDX, the rightmost child is the child of the last separator entry
|
|
// Actually, for the last entry we only need the child pointer
|
|
// The previous entry's child was the LEFT child; we need RIGHT child
|
|
// stored at the position after last key entry
|
|
copy(e.Key, data[off:off+keyLen])
|
|
e.RecNo = binary.BigEndian.Uint32(data[off+keyLen : off+keyLen+4])
|
|
e.ChildPage = binary.BigEndian.Uint32(data[off+keyLen+4 : off+keyLen+8])
|
|
}
|
|
entries[i] = e
|
|
off += entrySize
|
|
}
|
|
|
|
return entries
|
|
}
|
|
|
|
// --- CDX Index (compound, multi-tag) ---
|
|
|
|
// Index represents an open CDX index file.
|
|
type Index struct {
|
|
file *os.File
|
|
tags []*Tag
|
|
mmapData []byte // mmap'd file for zero-copy reads
|
|
}
|
|
|
|
// readAt reads len(buf) bytes at offset — from mmap or file fallback.
|
|
func (idx *Index) readAt(buf []byte, offset int64) error {
|
|
if idx.mmapData != nil && offset >= 0 && int(offset)+len(buf) <= len(idx.mmapData) {
|
|
copy(buf, idx.mmapData[offset:offset+int64(len(buf))])
|
|
return nil
|
|
}
|
|
_, err := idx.file.ReadAt(buf, offset)
|
|
return err
|
|
}
|
|
|
|
// Tag represents one index tag within a CDX file.
|
|
type Tag struct {
|
|
Name string // tag name (e.g., "BYNAME")
|
|
index *Index
|
|
header TagHeader
|
|
headerOff int64 // file offset of this tag's header
|
|
keyLen int
|
|
|
|
// Current position
|
|
stack [StackSize]StackEntry
|
|
stackLevel int
|
|
curRecNo uint32
|
|
curKey []byte
|
|
tagBOF bool
|
|
tagEOF bool
|
|
|
|
// Leaf page decode cache — avoids re-decoding same page on SkipNext/SkipPrev
|
|
cachedLeafOff int64
|
|
cachedLeafKeys []DecodedKey
|
|
}
|
|
|
|
type StackEntry struct {
|
|
PageOffset int64
|
|
KeyIndex int
|
|
}
|
|
|
|
// OpenIndex opens a CDX file and reads all tags.
|
|
func OpenIndex(path string) (*Index, error) {
|
|
if !strings.HasSuffix(strings.ToLower(path), ".cdx") {
|
|
path += ".cdx"
|
|
}
|
|
|
|
f, err := os.OpenFile(path, os.O_RDWR, 0)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
idx := &Index{file: f}
|
|
|
|
// mmap for zero-copy reads (platform-specific, no-op on Windows)
|
|
if fi, err2 := f.Stat(); err2 == nil && fi.Size() > 0 {
|
|
if data, err2 := mmapFile(f, int(fi.Size())); err2 == nil {
|
|
idx.mmapData = data
|
|
}
|
|
}
|
|
|
|
// Read compound header (structural root at offset 0)
|
|
rootHdr, err := ReadTagHeader(f, 0)
|
|
if err != nil {
|
|
f.Close()
|
|
return nil, err
|
|
}
|
|
|
|
// Parse compound tag directory from the structural root's B-tree
|
|
// The structural index keys are 10-byte tag names, and each leaf entry
|
|
// points to the tag header at a specific file offset.
|
|
tagEntries := readCompoundTagList(idx, rootHdr)
|
|
|
|
// Harbour orders tags by file offset (TagBlock) ascending, which
|
|
// corresponds to creation order. The compound B-tree stores entries
|
|
// alphabetically, so we must re-sort by offset to match Harbour.
|
|
// See: hb_cdxIndexLoadAvailTags in dbfcdx1.c
|
|
sort.Slice(tagEntries, func(i, j int) bool {
|
|
return tagEntries[i].offset < tagEntries[j].offset
|
|
})
|
|
|
|
for _, entry := range tagEntries {
|
|
tagHdr, err := ReadTagHeader(f, entry.offset)
|
|
if err != nil {
|
|
continue
|
|
}
|
|
tag := &Tag{
|
|
Name: entry.name,
|
|
index: idx,
|
|
header: *tagHdr,
|
|
headerOff: entry.offset,
|
|
keyLen: int(tagHdr.KeySize),
|
|
curKey: make([]byte, tagHdr.KeySize),
|
|
}
|
|
idx.tags = append(idx.tags, tag)
|
|
}
|
|
|
|
// If no tags found via compound directory, fall back to root as single tag
|
|
if len(idx.tags) == 0 {
|
|
tag := &Tag{
|
|
Name: "TAG1",
|
|
index: idx,
|
|
header: *rootHdr,
|
|
headerOff: 0,
|
|
keyLen: int(rootHdr.KeySize),
|
|
curKey: make([]byte, rootHdr.KeySize),
|
|
}
|
|
idx.tags = append(idx.tags, tag)
|
|
}
|
|
|
|
return idx, nil
|
|
}
|
|
|
|
// Close closes the CDX file.
|
|
func (idx *Index) Close() error {
|
|
if idx.mmapData != nil {
|
|
munmapFile(idx.mmapData)
|
|
idx.mmapData = nil
|
|
}
|
|
return idx.file.Close()
|
|
}
|
|
|
|
// TagCount returns the number of tags.
|
|
func (idx *Index) TagCount() int {
|
|
return len(idx.tags)
|
|
}
|
|
|
|
// GetTag returns a tag by index.
|
|
func (idx *Index) GetTag(i int) *Tag {
|
|
if i >= 0 && i < len(idx.tags) {
|
|
return idx.tags[i]
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// Tags returns all tags in the CDX.
|
|
func (idx *Index) Tags() []*Tag { return idx.tags }
|
|
|
|
// FindTag returns a tag by name.
|
|
func (idx *Index) FindTag(name string) *Tag {
|
|
upper := strings.ToUpper(name)
|
|
for _, t := range idx.tags {
|
|
if strings.ToUpper(t.Name) == upper {
|
|
return t
|
|
}
|
|
// Also try key expression match
|
|
if strings.ToUpper(t.header.KeyExpr) == upper {
|
|
return t
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// tagDirEntry is a compound tag directory entry.
|
|
type tagDirEntry struct {
|
|
name string
|
|
offset int64
|
|
}
|
|
|
|
// readCompoundTagList reads tag names and offsets from the structural root.
|
|
// CDX compound header: root page is a B-tree of tag entries.
|
|
// Each leaf key = 10-byte tag name, record number = page offset / 512.
|
|
func readCompoundTagList(idx *Index, rootHdr *TagHeader) []tagDirEntry {
|
|
var entries []tagDirEntry
|
|
if rootHdr.RootPtr == 0 {
|
|
return entries
|
|
}
|
|
|
|
// Read the root page of the structural index
|
|
pageData := make([]byte, 512)
|
|
err := idx.readAt(pageData, int64(rootHdr.RootPtr))
|
|
if err != nil {
|
|
return entries
|
|
}
|
|
|
|
// CDX page header: [attr:2][nKeys:2][leftPtr:4][rightPtr:4]
|
|
nKeys := int(binary.LittleEndian.Uint16(pageData[2:4]))
|
|
attr := binary.LittleEndian.Uint16(pageData[0:2])
|
|
|
|
isLeaf := (attr & 0x02) != 0
|
|
|
|
if isLeaf {
|
|
entries = decodeCompoundLeaf(pageData, nKeys)
|
|
}
|
|
|
|
// If compound leaf decoding didn't find entries, scan for tag headers
|
|
if len(entries) == 0 {
|
|
entries = scanCompoundLeaves(idx, rootHdr)
|
|
}
|
|
|
|
return entries
|
|
}
|
|
|
|
// scanCompoundLeaves scans the CDX file for tag headers.
|
|
// CDX tag headers are at 0x400 (1024) byte boundaries.
|
|
// Each tag header is followed by a page with the key expression string.
|
|
func scanCompoundLeaves(idx *Index, rootHdr *TagHeader) []tagDirEntry {
|
|
var entries []tagDirEntry
|
|
|
|
fileInfo, err := idx.file.Stat()
|
|
if err != nil {
|
|
return entries
|
|
}
|
|
fileSize := fileInfo.Size()
|
|
|
|
// Scan at 0x400 intervals; tag headers have:
|
|
// - RootPtr (uint32 at offset 0) pointing to a valid page
|
|
// - KeySize (uint16 at offset 12) between 1..240
|
|
// - Key expression string at +0x200 (offset 0x106 from header start)
|
|
// Skip offset 0 (compound root) and scan the rest
|
|
// Skip compound header at 0x0000; scan from 0x0400 onwards
|
|
// Tag headers are at 0x400 boundaries but NOT the compound root itself
|
|
for off := int64(0x400); off < fileSize; off += 0x200 {
|
|
buf := make([]byte, 0x400)
|
|
err := idx.readAt(buf, off)
|
|
if err != nil {
|
|
continue
|
|
}
|
|
|
|
rootPtr := binary.LittleEndian.Uint32(buf[0:4])
|
|
keySize := binary.LittleEndian.Uint16(buf[12:14])
|
|
|
|
if keySize == 0 || keySize > 240 || rootPtr == 0 {
|
|
continue
|
|
}
|
|
// Validate rootPtr is within file and at a valid page boundary
|
|
if int64(rootPtr) >= fileSize || rootPtr%512 != 0 {
|
|
continue
|
|
}
|
|
|
|
// Read key expression from offset 0x106 within the header
|
|
keyExpr := ""
|
|
for i := 0x106; i < 0x206 && i < len(buf) && buf[i] != 0; i++ {
|
|
keyExpr += string(buf[i])
|
|
}
|
|
if keyExpr == "" {
|
|
// Key expression might be in the next page (+0x200 from header)
|
|
exprBuf := make([]byte, 256)
|
|
idx.readAt(exprBuf, off+0x200)
|
|
for i := 0; i < len(exprBuf) && exprBuf[i] != 0; i++ {
|
|
keyExpr += string(exprBuf[i])
|
|
}
|
|
}
|
|
|
|
if keyExpr == "" {
|
|
continue
|
|
}
|
|
|
|
name := strings.ToUpper(strings.TrimSpace(keyExpr))
|
|
// Use "BY" + field name convention, or just the expression
|
|
entries = append(entries, tagDirEntry{name: name, offset: off})
|
|
}
|
|
|
|
return entries
|
|
}
|
|
|
|
// decodeCompoundLeaf decodes tag entries from a compound leaf page.
|
|
// Compound index uses the same bit-packed format as data leaves,
|
|
// with keyLen=10 (tag name) and recNo = page offset / PageLen.
|
|
func decodeCompoundLeaf(data []byte, nKeys int) []tagDirEntry {
|
|
if nKeys <= 0 || len(data) < ExtHeadSize {
|
|
return nil
|
|
}
|
|
|
|
// Use the standard leaf key decoder with keyLen=10 (compound tag name size)
|
|
hdr := DecodeLeafHeader(data)
|
|
keys := DecodeLeafKeys(data, hdr, 10)
|
|
|
|
var entries []tagDirEntry
|
|
for _, dk := range keys {
|
|
name := trimNull(dk.Key)
|
|
name = strings.TrimSpace(name)
|
|
if name == "" {
|
|
continue
|
|
}
|
|
// RecNo in compound index = direct byte offset to tag header
|
|
entries = append(entries, tagDirEntry{name: name, offset: int64(dk.RecNo)})
|
|
}
|
|
return entries
|
|
}
|
|
|
|
// getLeafKeys returns decoded leaf keys with caching.
|
|
func (t *Tag) getLeafKeys(pageOffset int64) ([]DecodedKey, error) {
|
|
if pageOffset == t.cachedLeafOff && t.cachedLeafKeys != nil {
|
|
return t.cachedLeafKeys, nil
|
|
}
|
|
buf := make([]byte, PageLen)
|
|
if err := t.index.readAt(buf, pageOffset); err != nil {
|
|
return nil, err
|
|
}
|
|
hdr := DecodeLeafHeader(buf)
|
|
keys := DecodeLeafKeys(buf, hdr, t.keyLen)
|
|
t.cachedLeafOff = pageOffset
|
|
t.cachedLeafKeys = keys
|
|
return keys, nil
|
|
}
|
|
|
|
// --- Tag navigation ---
|
|
|
|
// Seek searches for a key in the CDX tag's B-tree.
|
|
// Seek searches the B-tree iteratively (no recursion, single buffer reuse).
|
|
func (t *Tag) Seek(searchKey []byte) (uint32, bool) {
|
|
t.stackLevel = 0
|
|
t.tagBOF = false
|
|
t.tagEOF = false
|
|
|
|
pageOffset := int64(t.header.RootPtr)
|
|
buf := make([]byte, PageLen) // single buffer reused across all levels
|
|
entrySize := t.keyLen + 8
|
|
searchLen := len(searchKey)
|
|
|
|
for {
|
|
if err := t.index.readAt(buf, pageOffset); err != nil {
|
|
t.tagEOF = true
|
|
return 0, false
|
|
}
|
|
|
|
attr := binary.LittleEndian.Uint16(buf[0:2])
|
|
if (attr & NodeLeaf) != 0 {
|
|
// === LEAF NODE ===
|
|
var keys []DecodedKey
|
|
if pageOffset == t.cachedLeafOff && t.cachedLeafKeys != nil {
|
|
keys = t.cachedLeafKeys
|
|
} else {
|
|
hdr := DecodeLeafHeader(buf)
|
|
keys = DecodeLeafKeys(buf, hdr, t.keyLen)
|
|
t.cachedLeafOff = pageOffset
|
|
t.cachedLeafKeys = keys
|
|
}
|
|
|
|
// Binary search — leftmost match
|
|
lo, hi := 0, len(keys)-1
|
|
foundIdx := -1
|
|
for lo <= hi {
|
|
mid := (lo + hi) / 2
|
|
cmp := bytes.Compare(searchKey, keys[mid].Key[:searchLen])
|
|
if cmp == 0 {
|
|
foundIdx = mid
|
|
hi = mid - 1
|
|
} else if cmp < 0 {
|
|
hi = mid - 1
|
|
} else {
|
|
lo = mid + 1
|
|
}
|
|
}
|
|
if foundIdx >= 0 {
|
|
t.curRecNo = keys[foundIdx].RecNo
|
|
copy(t.curKey, keys[foundIdx].Key)
|
|
if t.stackLevel < StackSize {
|
|
t.stack[t.stackLevel] = StackEntry{PageOffset: pageOffset, KeyIndex: foundIdx}
|
|
t.stackLevel++
|
|
}
|
|
return keys[foundIdx].RecNo, true
|
|
}
|
|
if lo < len(keys) {
|
|
t.curRecNo = keys[lo].RecNo
|
|
copy(t.curKey, keys[lo].Key)
|
|
if t.stackLevel < StackSize {
|
|
t.stack[t.stackLevel] = StackEntry{PageOffset: pageOffset, KeyIndex: lo}
|
|
t.stackLevel++
|
|
}
|
|
return keys[lo].RecNo, false
|
|
}
|
|
// Past all keys: follow rightPtr
|
|
hdr := DecodeLeafHeader(buf)
|
|
if hdr.RightPtr != 0 && hdr.RightPtr != 0xFFFFFFFF {
|
|
pageOffset = int64(hdr.RightPtr)
|
|
continue // iterate instead of recurse
|
|
}
|
|
t.tagEOF = true
|
|
t.curRecNo = 0
|
|
return 0, false
|
|
}
|
|
|
|
// === INTERNAL NODE (inline binary search, zero alloc) ===
|
|
nKeys := int(binary.LittleEndian.Uint16(buf[2:4]))
|
|
|
|
if t.stackLevel < StackSize {
|
|
t.stack[t.stackLevel] = StackEntry{PageOffset: pageOffset, KeyIndex: 0}
|
|
t.stackLevel++
|
|
}
|
|
|
|
// Binary search on internal keys
|
|
found := false
|
|
lo, hi := 0, nKeys-1
|
|
for lo <= hi {
|
|
mid := (lo + hi) / 2
|
|
off := IntHeadSize + mid*entrySize
|
|
cmp := bytes.Compare(searchKey, buf[off:off+t.keyLen])
|
|
if cmp <= 0 {
|
|
hi = mid - 1
|
|
} else {
|
|
lo = mid + 1
|
|
}
|
|
}
|
|
// lo = insertion point; follow child at lo
|
|
if lo < nKeys {
|
|
off := IntHeadSize + lo*entrySize
|
|
childPage := binary.BigEndian.Uint32(buf[off+t.keyLen+4 : off+t.keyLen+8])
|
|
t.stack[t.stackLevel-1].KeyIndex = lo
|
|
if childPage != 0 {
|
|
pageOffset = int64(childPage)
|
|
found = true
|
|
}
|
|
}
|
|
if !found {
|
|
// Follow trailing child
|
|
trailOff := IntHeadSize + nKeys*entrySize
|
|
t.stack[t.stackLevel-1].KeyIndex = nKeys
|
|
if trailOff+t.keyLen+8 <= PageLen {
|
|
trailChild := binary.BigEndian.Uint32(buf[trailOff+t.keyLen+4 : trailOff+t.keyLen+8])
|
|
if trailChild != 0 {
|
|
pageOffset = int64(trailChild)
|
|
found = true
|
|
}
|
|
}
|
|
}
|
|
if !found {
|
|
t.tagEOF = true
|
|
return 0, false
|
|
}
|
|
// continue loop with new pageOffset
|
|
}
|
|
}
|
|
|
|
// GoTop positions at the first key.
|
|
func (t *Tag) GoTop() bool {
|
|
t.stackLevel = 0
|
|
t.tagBOF = false
|
|
t.tagEOF = false
|
|
return t.goLeftmost(int64(t.header.RootPtr))
|
|
}
|
|
|
|
func (t *Tag) goLeftmost(pageOffset int64) bool {
|
|
buf := make([]byte, PageLen)
|
|
if err := t.index.readAt(buf, pageOffset); err != nil {
|
|
return false
|
|
}
|
|
|
|
attr := binary.LittleEndian.Uint16(buf[0:2])
|
|
isLeaf := (attr & NodeLeaf) != 0
|
|
|
|
if isLeaf {
|
|
hdr := DecodeLeafHeader(buf)
|
|
keys := DecodeLeafKeys(buf, hdr, t.keyLen)
|
|
t.cachedLeafOff = pageOffset
|
|
t.cachedLeafKeys = keys
|
|
if len(keys) > 0 {
|
|
t.curRecNo = keys[0].RecNo
|
|
copy(t.curKey, keys[0].Key)
|
|
if t.stackLevel < StackSize {
|
|
t.stack[t.stackLevel] = StackEntry{PageOffset: pageOffset, KeyIndex: 0}
|
|
t.stackLevel++
|
|
}
|
|
return true
|
|
}
|
|
return false
|
|
}
|
|
|
|
// Internal: follow first child (zero allocation — read directly from buf)
|
|
nKeys := int(binary.LittleEndian.Uint16(buf[2:4]))
|
|
if nKeys > 0 {
|
|
entrySize := t.keyLen + 8
|
|
child0 := binary.BigEndian.Uint32(buf[IntHeadSize+t.keyLen+4 : IntHeadSize+entrySize])
|
|
if t.stackLevel < StackSize {
|
|
t.stack[t.stackLevel] = StackEntry{PageOffset: pageOffset, KeyIndex: 0}
|
|
t.stackLevel++
|
|
}
|
|
return t.goLeftmost(int64(child0))
|
|
}
|
|
return false
|
|
}
|
|
|
|
// GoBottom positions at the last key.
|
|
func (t *Tag) GoBottom() bool {
|
|
t.stackLevel = 0
|
|
t.tagBOF = false
|
|
t.tagEOF = false
|
|
return t.goRightmost(int64(t.header.RootPtr))
|
|
}
|
|
|
|
func (t *Tag) goRightmost(pageOffset int64) bool {
|
|
buf := make([]byte, PageLen)
|
|
if err := t.index.readAt(buf, pageOffset); err != nil {
|
|
return false
|
|
}
|
|
|
|
attr := binary.LittleEndian.Uint16(buf[0:2])
|
|
isLeaf := (attr & NodeLeaf) != 0
|
|
|
|
if isLeaf {
|
|
hdr := DecodeLeafHeader(buf)
|
|
keys := DecodeLeafKeys(buf, hdr, t.keyLen)
|
|
if len(keys) > 0 {
|
|
last := len(keys) - 1
|
|
t.curRecNo = keys[last].RecNo
|
|
copy(t.curKey, keys[last].Key)
|
|
if t.stackLevel < StackSize {
|
|
t.stack[t.stackLevel] = StackEntry{PageOffset: pageOffset, KeyIndex: last}
|
|
t.stackLevel++
|
|
}
|
|
return true
|
|
}
|
|
return false
|
|
}
|
|
|
|
// Internal: follow last child
|
|
node := DecodeIntNode(buf)
|
|
intKeys := DecodeIntKeys(buf, int(node.NKeys), t.keyLen)
|
|
lastIdx := int(node.NKeys)
|
|
if t.stackLevel < StackSize {
|
|
t.stack[t.stackLevel] = StackEntry{PageOffset: pageOffset, KeyIndex: lastIdx}
|
|
t.stackLevel++
|
|
}
|
|
return t.goRightmost(int64(intKeys[lastIdx].ChildPage))
|
|
}
|
|
|
|
// SkipNext moves to the next key in leaf using rightPtr linked list.
|
|
// CDX leaf pages are doubly linked — simpler than NTX stack traversal.
|
|
// SkipNext moves to the next key. Uses cached leaf decode.
|
|
func (t *Tag) SkipNext() bool {
|
|
if t.stackLevel == 0 {
|
|
t.tagEOF = true
|
|
return false
|
|
}
|
|
|
|
level := t.stackLevel - 1
|
|
pageOffset := t.stack[level].PageOffset
|
|
keyIdx := t.stack[level].KeyIndex
|
|
|
|
keys, err := t.getLeafKeys(pageOffset)
|
|
if err != nil {
|
|
t.tagEOF = true
|
|
return false
|
|
}
|
|
|
|
// Next key in same page? (cache hit — no decode)
|
|
if keyIdx+1 < len(keys) {
|
|
t.stack[level].KeyIndex = keyIdx + 1
|
|
t.curRecNo = keys[keyIdx+1].RecNo
|
|
copy(t.curKey, keys[keyIdx+1].Key)
|
|
return true
|
|
}
|
|
|
|
// Follow rightPtr to next leaf (CDX linked list)
|
|
buf := make([]byte, PageLen)
|
|
if err := t.index.readAt(buf, pageOffset); err != nil {
|
|
t.tagEOF = true
|
|
return false
|
|
}
|
|
hdr := DecodeLeafHeader(buf)
|
|
if hdr.RightPtr != 0 && hdr.RightPtr != 0xFFFFFFFF {
|
|
nextOff := int64(hdr.RightPtr)
|
|
keys2, err := t.getLeafKeys(nextOff)
|
|
if err == nil && len(keys2) > 0 {
|
|
t.stack[level] = StackEntry{PageOffset: nextOff, KeyIndex: 0}
|
|
t.curRecNo = keys2[0].RecNo
|
|
copy(t.curKey, keys2[0].Key)
|
|
return true
|
|
}
|
|
}
|
|
|
|
t.tagEOF = true
|
|
return false
|
|
}
|
|
|
|
// SkipPrev moves to the previous key. Uses cached leaf decode.
|
|
func (t *Tag) SkipPrev() bool {
|
|
if t.stackLevel == 0 {
|
|
t.tagBOF = true
|
|
return false
|
|
}
|
|
|
|
level := t.stackLevel - 1
|
|
pageOffset := t.stack[level].PageOffset
|
|
keyIdx := t.stack[level].KeyIndex
|
|
|
|
// Previous key in same page? (cache hit)
|
|
if keyIdx > 0 {
|
|
keys, err := t.getLeafKeys(pageOffset)
|
|
if err == nil {
|
|
t.stack[level].KeyIndex = keyIdx - 1
|
|
t.curRecNo = keys[keyIdx-1].RecNo
|
|
copy(t.curKey, keys[keyIdx-1].Key)
|
|
return true
|
|
}
|
|
}
|
|
|
|
// Follow leftPtr
|
|
buf := make([]byte, PageLen)
|
|
if err := t.index.readAt(buf, pageOffset); err != nil {
|
|
t.tagBOF = true
|
|
return false
|
|
}
|
|
hdr := DecodeLeafHeader(buf)
|
|
if hdr.LeftPtr != 0 && hdr.LeftPtr != 0xFFFFFFFF {
|
|
prevOff := int64(hdr.LeftPtr)
|
|
keys2, err := t.getLeafKeys(prevOff)
|
|
if err == nil && len(keys2) > 0 {
|
|
last := len(keys2) - 1
|
|
t.stack[level] = StackEntry{PageOffset: prevOff, KeyIndex: last}
|
|
t.curRecNo = keys2[last].RecNo
|
|
copy(t.curKey, keys2[last].Key)
|
|
return true
|
|
}
|
|
}
|
|
|
|
t.tagBOF = true
|
|
return false
|
|
}
|
|
|
|
// CurRecNo returns the current record number.
|
|
func (t *Tag) CurRecNo() uint32 { return t.curRecNo }
|
|
|
|
// CurKey returns the current key.
|
|
func (t *Tag) CurKey() []byte { return t.curKey[:t.keyLen] }
|
|
|
|
// IsEOF returns true if past end.
|
|
func (t *Tag) IsEOF() bool { return t.tagEOF }
|
|
|
|
// IsBOF returns true if before start.
|
|
func (t *Tag) IsBOF() bool { return t.tagBOF }
|
|
|
|
// KeyLen returns the key length.
|
|
func (t *Tag) KeyLen() int { return t.keyLen }
|
|
|
|
// KeyExpr returns the key expression string stored in the CDX header.
|
|
func (t *Tag) KeyExpr() string { return t.header.KeyExpr }
|
|
|
|
// ForExpr returns the FOR condition expression.
|
|
func (t *Tag) ForExpr() string { return t.header.ForExpr }
|
|
|
|
// IsDescending returns true if the tag sorts in descending order.
|
|
func (t *Tag) IsDescending() bool { return t.header.Descending }
|
|
|
|
// HeaderOffset returns the file offset of this tag's header block.
|
|
func (t *Tag) HeaderOffset() int64 { return t.headerOff }
|
|
|
|
// RootPtr returns the root page offset for this tag's B-tree.
|
|
func (t *Tag) RootPtr() uint32 { return t.header.RootPtr }
|
|
|
|
// Close is a no-op for tags (the parent Index owns the file).
|
|
func (t *Tag) Close() error { return nil }
|
|
|
|
// --- Helpers ---
|
|
|
|
func trimNull(b []byte) string {
|
|
for i, c := range b {
|
|
if c == 0 {
|
|
return strings.TrimSpace(string(b[:i]))
|
|
}
|
|
}
|
|
return strings.TrimSpace(string(b))
|
|
}
|