Cumulative season's silent-bug hunting (~62 fixes) across the FiveSql2 SQL engine, the Five compiler/runtime, and the hbrdd RDD layer. Saved as a single checkpoint before refactoring the parser to delegate xBase command translation to the preprocessor. Highlights: FiveSql2 engine (_FiveSql2/src/) - prefix-glob index attach -> explicit convention (<table>_pk.ntx, <table>_uq.ntx, <table>.cdx) — fixes silent multi-row INSERT row-drop - DROP/CREATE TABLE FErase chain extended (.cdx, .fsc, .fsv, .dbt, .fpt) - COUNT(DISTINCT col) parsed + aggregated via hSeen hash - UNION column-count mismatch returns SQL_ERR_GRAMMAR (was silent) - DISTINCT + ORDER BY hidden-col leak fixed (trim before DISTINCT) - Derived table FROM (SELECT...) + JOIN right-side derived - Self-FK CASCADE depth 2+ via SqlGetSingleColPK pre-collect - LAG/LEAD default arg uses SqlEvalRowExpr (handles -N const exprs) - DATE literal round-trip validation (Feb 29 non-leap rejected) - CREATE OR REPLACE VIEW; CREATE VIEW errors on already-exists - AlterTable type dispatcher comma-wrapped (1-char type "A" no longer matches CHARACTER) Compiler / runtime - gengo: HB_ -> FV_ prefix on emitted Go function names (Five identity) - gengo split: emit_block.go, emit_stmt.go, folding.go extracted - parser/stmtreg.go nudges - hbrt: debug TUI/CLI restructure (debugcmd, debugkey, termios_*), windows debug stubs collapsed - thread/vm/value/class/pcinterp tightening from panic traces RDD layer (hbrdd/) - dbf: null bitmap support (null.go + null_test.go), mmap split (mmap_posix.go / mmap_windows.go), byte-level numeric parse - ntx/cdx: windows mmap parity - workarea + mem RDD: cross-area state-bleed fixes RTL (hbrtl/) - errorlog rewrite with platform-specific FD (errorlog_fd_unix / errorlog_fd_other) - sqlscan, sqlhelpers, indexrtl, datetime extensions Gates green at checkpoint: - go test ./... : PASS - FiveSql2 SQL:1999 : 43/43 - Harbour compat : 56/56 Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1040 lines
29 KiB
Go
1040 lines
29 KiB
Go
// Copyright (c) 2026 Charles KWON OhJun (charleskwonohjun@gmail.com)
|
||
// All rights reserved.
|
||
|
||
// CDX compound index engine for Five.
|
||
// Byte-compatible with Harbour/FoxPro CDX files.
|
||
// CDX uses FPT memo format (not DBT).
|
||
//
|
||
// Key differences from NTX:
|
||
// - 512-byte pages (vs NTX 1024)
|
||
// - Compound index: multiple tags per file
|
||
// - Bit-packed leaf keys: recBits/dupBits/trlBits compression
|
||
// - Linked leaf pages (leftPtr/rightPtr)
|
||
//
|
||
// Reference:
|
||
// /mnt/d/harbour-core/include/hbrddcdx.h
|
||
// /mnt/d/harbour-core/src/rdd/dbfcdx/dbfcdx1.c
|
||
// docs/rdd-architecture-spec.md
|
||
package cdx
|
||
|
||
import (
|
||
"bytes"
|
||
"encoding/binary"
|
||
"fmt"
|
||
"os"
|
||
"sort"
|
||
"strings"
|
||
)
|
||
|
||
// CDX constants — matching Harbour.
|
||
const (
|
||
PageLen = 512 // CDX_PAGELEN (1 << 9)
|
||
HeaderLen = 1024 // CDX_HEADERLEN
|
||
MaxKey = 240 // CDX_MAXKEY
|
||
MaxTagNameLen = 10 // CDX_MAXTAGNAMELEN
|
||
StackSize = 64 // CDX_STACKSIZE
|
||
IntHeadSize = 12 // CDX_INT_HEADSIZE
|
||
ExtHeadSize = 24 // CDX_EXT_HEADSIZE
|
||
HeaderExpLen = HeaderLen - 512
|
||
|
||
// Node types
|
||
NodeBranch = 0 // CDX_NODE_BRANCH
|
||
NodeRoot = 1 // CDX_NODE_ROOT
|
||
NodeLeaf = 2 // CDX_NODE_LEAF
|
||
NodeUnused = 0xFF
|
||
|
||
// Type flags
|
||
TypeUnique = 0x01
|
||
TypePartial = 0x02
|
||
TypeCustom = 0x04
|
||
TypeForFilter = 0x08
|
||
TypeCompact = 0x20
|
||
TypeCompound = 0x40
|
||
TypeStructure = 0x80
|
||
)
|
||
|
||
// --- Tag Header (512 bytes in file, at start of each tag's header page) ---
|
||
|
||
// TagHeader holds a CDX tag's metadata.
|
||
// Harbour: CDXTAGHEADER in hbrddcdx.h:188
|
||
type TagHeader struct {
|
||
RootPtr uint32 // root page offset
|
||
FreePtr uint32 // free page list
|
||
Counter uint32 // update counter
|
||
KeySize uint16 // key length (max 240)
|
||
IndexOpt byte // CDX_TYPE_* flags
|
||
IndexSig byte // signature
|
||
HeaderLen uint16 // 0x0400 typically
|
||
PageLen uint16 // page length
|
||
KeyExpr string // key expression
|
||
ForExpr string // FOR filter expression
|
||
Descending bool
|
||
IgnoreCase bool
|
||
}
|
||
|
||
// ReadTagHeader reads a CDX tag header from file at given offset.
|
||
func ReadTagHeader(f *os.File, offset int64) (*TagHeader, error) {
|
||
buf := make([]byte, HeaderLen)
|
||
if _, err := f.ReadAt(buf, offset); err != nil {
|
||
return nil, fmt.Errorf("read CDX tag header at %d: %w", offset, err)
|
||
}
|
||
|
||
th := &TagHeader{
|
||
RootPtr: binary.LittleEndian.Uint32(buf[0:4]),
|
||
FreePtr: binary.LittleEndian.Uint32(buf[4:8]),
|
||
Counter: binary.LittleEndian.Uint32(buf[8:12]),
|
||
KeySize: binary.LittleEndian.Uint16(buf[12:14]),
|
||
IndexOpt: buf[14],
|
||
IndexSig: buf[15],
|
||
HeaderLen: binary.LittleEndian.Uint16(buf[16:18]),
|
||
PageLen: binary.LittleEndian.Uint16(buf[18:20]),
|
||
}
|
||
|
||
th.IgnoreCase = buf[503] != 0
|
||
th.Descending = binary.LittleEndian.Uint16(buf[504:506]) != 0
|
||
|
||
// Key/For expressions — stored directly at offset 512 (0x200) within the header block.
|
||
// CDX format: key expression at byte 512, for expression follows after null terminator.
|
||
keyExprStart := 512
|
||
th.KeyExpr = trimNull(buf[keyExprStart:])
|
||
|
||
// FOR expression follows key expression (after null terminator)
|
||
forStart := keyExprStart + len(th.KeyExpr) + 1
|
||
if forStart < len(buf) {
|
||
th.ForExpr = trimNull(buf[forStart:])
|
||
}
|
||
|
||
return th, nil
|
||
}
|
||
|
||
// --- Leaf page: bit-packed key extraction ---
|
||
|
||
// LeafHeader holds decoded leaf page metadata.
|
||
// Harbour: CDXEXTNODE in hbrddcdx.h:224
|
||
type LeafHeader struct {
|
||
Attr uint16
|
||
NKeys uint16
|
||
LeftPtr uint32
|
||
RightPtr uint32
|
||
FreeSpc uint16
|
||
RecMask uint32
|
||
DupMask byte
|
||
TrlMask byte
|
||
RecBits byte
|
||
DupBits byte
|
||
TrlBits byte
|
||
KeyBytes byte // total bytes per key info entry
|
||
}
|
||
|
||
// DecodeLeafHeader extracts the 24-byte leaf header from page data.
|
||
func DecodeLeafHeader(data []byte) LeafHeader {
|
||
return LeafHeader{
|
||
Attr: binary.LittleEndian.Uint16(data[0:2]),
|
||
NKeys: binary.LittleEndian.Uint16(data[2:4]),
|
||
LeftPtr: binary.LittleEndian.Uint32(data[4:8]),
|
||
RightPtr: binary.LittleEndian.Uint32(data[8:12]),
|
||
FreeSpc: binary.LittleEndian.Uint16(data[12:14]),
|
||
RecMask: binary.LittleEndian.Uint32(data[14:18]),
|
||
DupMask: data[18],
|
||
TrlMask: data[19],
|
||
RecBits: data[20],
|
||
DupBits: data[21],
|
||
TrlBits: data[22],
|
||
KeyBytes: data[23],
|
||
}
|
||
}
|
||
|
||
// DecodedKey holds a single decoded key from a leaf page.
|
||
type DecodedKey struct {
|
||
RecNo uint32
|
||
Key []byte
|
||
}
|
||
|
||
// DecodeLeafKeys extracts all keys from a CDX leaf page — convenience
|
||
// wrapper that allocates fresh buffers. Hot call sites (per-tag seek
|
||
// loops) should use DecodeLeafKeysInto to recycle storage.
|
||
func DecodeLeafKeys(data []byte, hdr LeafHeader, keyLen int) []DecodedKey {
|
||
keys, _ := DecodeLeafKeysInto(data, hdr, keyLen, nil, nil)
|
||
return keys
|
||
}
|
||
|
||
// DecodeLeafKeysInto is the allocation-aware variant. `slabReuse` and
|
||
// `keysReuse` are previous buffers (may be nil on first call, or carry
|
||
// stale data from a prior decode). Returns fresh `keys` and the backing
|
||
// `slab` — both valid until the next call that reuses them. Ported
|
||
// from rddfive/cdx_engine.c cdx_leaf_decode_all() — byte-level decode,
|
||
// 10× faster than bit-by-bit extractBits loop.
|
||
//
|
||
// Reuse contract: caller must not retain pointers into an earlier
|
||
// slab after passing it here. CDX.Tag's page cache already observes
|
||
// this invariant because it overwrites cachedLeafKeys on miss.
|
||
func DecodeLeafKeysInto(data []byte, hdr LeafHeader, keyLen int,
|
||
slabReuse []byte, keysReuse []DecodedKey) ([]DecodedKey, []byte) {
|
||
if hdr.NKeys == 0 {
|
||
return nil, slabReuse
|
||
}
|
||
|
||
nKeys := int(hdr.NKeys)
|
||
recBits := int(hdr.RecBits)
|
||
dupBits := int(hdr.DupBits)
|
||
trlBits := int(hdr.TrlBits)
|
||
reqByte := int(hdr.KeyBytes)
|
||
recMask := uint32((1 << uint(recBits)) - 1)
|
||
dcMask := uint32((1 << uint(dupBits)) - 1)
|
||
tcMask := uint32((1 << uint(trlBits)) - 1)
|
||
|
||
// Reuse or grow the DecodedKey slice.
|
||
var keys []DecodedKey
|
||
if cap(keysReuse) >= nKeys {
|
||
keys = keysReuse[:nKeys]
|
||
} else {
|
||
keys = make([]DecodedKey, nKeys)
|
||
}
|
||
|
||
// Reuse or grow the byte slab (one alloc replaces 30+ per page).
|
||
needBytes := nKeys*keyLen + keyLen // +keyLen for prevKey
|
||
var slab []byte
|
||
if cap(slabReuse) >= needBytes {
|
||
slab = slabReuse[:needBytes]
|
||
} else {
|
||
slab = make([]byte, needBytes)
|
||
}
|
||
prevKey := slab[nKeys*keyLen:]
|
||
for j := range prevKey {
|
||
prevKey[j] = ' '
|
||
}
|
||
totalKeyBytes := 0
|
||
|
||
for i := 0; i < nKeys; i++ {
|
||
// Read reqByte bytes as little-endian integer (C: val = src[j] << 8 | ...)
|
||
src := data[ExtHeadSize+i*reqByte:]
|
||
var val uint64
|
||
for j := reqByte - 1; j >= 0; j-- {
|
||
val <<= 8
|
||
val |= uint64(src[j])
|
||
}
|
||
|
||
recNo := uint32(val) & recMask
|
||
val >>= uint(recBits)
|
||
dup := int(uint32(val) & dcMask)
|
||
val >>= uint(dupBits)
|
||
trl := int(uint32(val) & tcMask)
|
||
|
||
newBytes := keyLen - dup - trl
|
||
|
||
key := slab[i*keyLen : (i+1)*keyLen]
|
||
if dup > 0 {
|
||
copy(key[:dup], prevKey[:dup])
|
||
}
|
||
if newBytes > 0 {
|
||
kp := PageLen - totalKeyBytes - newBytes
|
||
if kp >= ExtHeadSize && kp+newBytes <= PageLen {
|
||
copy(key[dup:dup+newBytes], data[kp:kp+newBytes])
|
||
}
|
||
totalKeyBytes += newBytes
|
||
}
|
||
if trl > 0 {
|
||
for j := keyLen - trl; j < keyLen; j++ {
|
||
key[j] = ' '
|
||
}
|
||
}
|
||
|
||
keys[i] = DecodedKey{RecNo: recNo, Key: key}
|
||
copy(prevKey, key)
|
||
}
|
||
|
||
return keys, slab
|
||
}
|
||
|
||
// extractBits extracts n bits from a byte array starting at bit offset.
|
||
func extractBits(data []byte, bitOffset, nBits uint) uint32 {
|
||
if nBits == 0 {
|
||
return 0
|
||
}
|
||
var result uint32
|
||
for i := uint(0); i < nBits; i++ {
|
||
bytePos := (bitOffset + i) / 8
|
||
bitPos := (bitOffset + i) % 8
|
||
if int(bytePos) < len(data) {
|
||
if data[bytePos]&(1<<bitPos) != 0 {
|
||
result |= 1 << i
|
||
}
|
||
}
|
||
}
|
||
return result
|
||
}
|
||
|
||
// --- Interior node ---
|
||
|
||
// IntNode holds decoded internal node data.
|
||
type IntNode struct {
|
||
Attr uint16
|
||
NKeys uint16
|
||
LeftPtr uint32
|
||
RightPtr uint32
|
||
}
|
||
|
||
func DecodeIntNode(data []byte) IntNode {
|
||
return IntNode{
|
||
Attr: binary.LittleEndian.Uint16(data[0:2]),
|
||
NKeys: binary.LittleEndian.Uint16(data[2:4]),
|
||
LeftPtr: binary.LittleEndian.Uint32(data[4:8]),
|
||
RightPtr: binary.LittleEndian.Uint32(data[8:12]),
|
||
}
|
||
}
|
||
|
||
// IntKeyEntry is one entry in an internal node.
|
||
// Each entry = child page (4 LE) + recNo (4 LE) + key (keyLen bytes)
|
||
type IntKeyEntry struct {
|
||
ChildPage uint32
|
||
RecNo uint32
|
||
Key []byte
|
||
}
|
||
|
||
// DecodeIntKeys extracts keys from a CDX internal node.
|
||
// CDX internal format: [key: keyLen bytes][recNo: 4 BE][childPage: 4 BE]
|
||
// This is different from NTX which uses [child LE][recNo LE][key].
|
||
func DecodeIntKeys(data []byte, nKeys int, keyLen int) []IntKeyEntry {
|
||
entries := make([]IntKeyEntry, nKeys+1) // nKeys separators + 1 trailing child
|
||
entrySize := keyLen + 8 // key + recNo(4BE) + child(4BE)
|
||
off := IntHeadSize
|
||
|
||
for i := 0; i <= nKeys; i++ {
|
||
if off+entrySize > PageLen {
|
||
break
|
||
}
|
||
e := IntKeyEntry{
|
||
Key: make([]byte, keyLen),
|
||
}
|
||
if i < nKeys {
|
||
copy(e.Key, data[off:off+keyLen])
|
||
e.RecNo = binary.BigEndian.Uint32(data[off+keyLen : off+keyLen+4])
|
||
e.ChildPage = binary.BigEndian.Uint32(data[off+keyLen+4 : off+keyLen+8])
|
||
} else {
|
||
// Trailing child — no key/recNo, just child pointer
|
||
// In CDX, the rightmost child is the child of the last separator entry
|
||
// Actually, for the last entry we only need the child pointer
|
||
// The previous entry's child was the LEFT child; we need RIGHT child
|
||
// stored at the position after last key entry
|
||
copy(e.Key, data[off:off+keyLen])
|
||
e.RecNo = binary.BigEndian.Uint32(data[off+keyLen : off+keyLen+4])
|
||
e.ChildPage = binary.BigEndian.Uint32(data[off+keyLen+4 : off+keyLen+8])
|
||
}
|
||
entries[i] = e
|
||
off += entrySize
|
||
}
|
||
|
||
return entries
|
||
}
|
||
|
||
// --- CDX Index (compound, multi-tag) ---
|
||
|
||
// Index represents an open CDX index file.
|
||
type Index struct {
|
||
file *os.File
|
||
tags []*Tag
|
||
mmapData []byte // mmap'd file for zero-copy reads
|
||
}
|
||
|
||
// readAt reads len(buf) bytes at offset — from mmap or file fallback.
|
||
// Prefer pageSlice() on hot paths; this entry point stays for callers
|
||
// that need a writable, caller-owned copy.
|
||
func (idx *Index) readAt(buf []byte, offset int64) error {
|
||
if idx.mmapData != nil && offset >= 0 && int(offset)+len(buf) <= len(idx.mmapData) {
|
||
copy(buf, idx.mmapData[offset:offset+int64(len(buf))])
|
||
return nil
|
||
}
|
||
_, err := idx.file.ReadAt(buf, offset)
|
||
return err
|
||
}
|
||
|
||
// pageSlice returns a read-only view of one B-tree page at offset — a
|
||
// direct slice of mmap when possible (zero copy), else a read into the
|
||
// caller's fallback buffer. Returns nil on read error. The returned
|
||
// slice is valid until the index is remapped, closed, or until the
|
||
// next fallbackBuf reuse — callers must not retain it across those
|
||
// events. The seek loop uses this: same Tag.seekBuf gets handed back
|
||
// for every internal-node visit, so the non-mmap path allocates once,
|
||
// and the mmap path allocates nothing.
|
||
func (idx *Index) pageSlice(offset int64, fallbackBuf []byte) []byte {
|
||
if idx.mmapData != nil && offset >= 0 && int(offset)+PageLen <= len(idx.mmapData) {
|
||
return idx.mmapData[offset : offset+PageLen]
|
||
}
|
||
if len(fallbackBuf) < PageLen {
|
||
fallbackBuf = make([]byte, PageLen)
|
||
}
|
||
if _, err := idx.file.ReadAt(fallbackBuf[:PageLen], offset); err != nil {
|
||
return nil
|
||
}
|
||
return fallbackBuf[:PageLen]
|
||
}
|
||
|
||
// Tag represents one index tag within a CDX file.
|
||
type Tag struct {
|
||
Name string // tag name (e.g., "BYNAME")
|
||
index *Index
|
||
header TagHeader
|
||
headerOff int64 // file offset of this tag's header
|
||
keyLen int
|
||
|
||
// Current position
|
||
stack [StackSize]StackEntry
|
||
stackLevel int
|
||
curRecNo uint32
|
||
curKey []byte
|
||
tagBOF bool
|
||
tagEOF bool
|
||
|
||
// Leaf page decode cache — avoids re-decoding same page on SkipNext/SkipPrev
|
||
cachedLeafOff int64
|
||
cachedLeafKeys []DecodedKey
|
||
// Reusable backing storage for the key slab and DecodedKey slice.
|
||
// cachedLeafKeys[i].Key aliases slices of cachedLeafSlab, so the
|
||
// slab stays alive as long as cachedLeafKeys is in use. On cache
|
||
// miss we hand both buffers back to DecodeLeafKeysInto which
|
||
// reuses them if big enough — saving one alloc per leaf decode.
|
||
cachedLeafSlab []byte
|
||
|
||
// seekBuf is handed to Index.pageSlice as the fallback when mmap
|
||
// isn't available (Windows, or file grown past mapped size). The
|
||
// mmap path ignores it and returns a slice directly into mapped
|
||
// memory — zero copy. Either way, allocating a single 512-byte
|
||
// buffer per Tag (not per Seek) eliminates the per-seek alloc.
|
||
seekBuf []byte
|
||
}
|
||
|
||
type StackEntry struct {
|
||
PageOffset int64
|
||
KeyIndex int
|
||
}
|
||
|
||
// OpenIndex opens a CDX file and reads all tags.
|
||
func OpenIndex(path string) (*Index, error) {
|
||
if !strings.HasSuffix(strings.ToLower(path), ".cdx") {
|
||
path += ".cdx"
|
||
}
|
||
|
||
f, err := os.OpenFile(path, os.O_RDWR, 0)
|
||
if err != nil {
|
||
return nil, err
|
||
}
|
||
|
||
idx := &Index{file: f}
|
||
|
||
// mmap for zero-copy reads (platform-specific, no-op on Windows)
|
||
if fi, err2 := f.Stat(); err2 == nil && fi.Size() > 0 {
|
||
if data, err2 := mmapFile(f, int(fi.Size())); err2 == nil {
|
||
idx.mmapData = data
|
||
}
|
||
}
|
||
|
||
// Read compound header (structural root at offset 0)
|
||
rootHdr, err := ReadTagHeader(f, 0)
|
||
if err != nil {
|
||
f.Close()
|
||
return nil, err
|
||
}
|
||
|
||
// Parse compound tag directory from the structural root's B-tree
|
||
// The structural index keys are 10-byte tag names, and each leaf entry
|
||
// points to the tag header at a specific file offset.
|
||
tagEntries := readCompoundTagList(idx, rootHdr)
|
||
|
||
// Harbour orders tags by file offset (TagBlock) ascending, which
|
||
// corresponds to creation order. The compound B-tree stores entries
|
||
// alphabetically, so we must re-sort by offset to match Harbour.
|
||
// See: hb_cdxIndexLoadAvailTags in dbfcdx1.c
|
||
sort.Slice(tagEntries, func(i, j int) bool {
|
||
return tagEntries[i].offset < tagEntries[j].offset
|
||
})
|
||
|
||
for _, entry := range tagEntries {
|
||
tagHdr, err := ReadTagHeader(f, entry.offset)
|
||
if err != nil {
|
||
continue
|
||
}
|
||
tag := &Tag{
|
||
Name: entry.name,
|
||
index: idx,
|
||
header: *tagHdr,
|
||
headerOff: entry.offset,
|
||
keyLen: int(tagHdr.KeySize),
|
||
curKey: make([]byte, tagHdr.KeySize),
|
||
}
|
||
idx.tags = append(idx.tags, tag)
|
||
}
|
||
|
||
// If no tags found via compound directory, fall back to root as single tag
|
||
if len(idx.tags) == 0 {
|
||
tag := &Tag{
|
||
Name: "TAG1",
|
||
index: idx,
|
||
header: *rootHdr,
|
||
headerOff: 0,
|
||
keyLen: int(rootHdr.KeySize),
|
||
curKey: make([]byte, rootHdr.KeySize),
|
||
}
|
||
idx.tags = append(idx.tags, tag)
|
||
}
|
||
|
||
return idx, nil
|
||
}
|
||
|
||
// Close closes the CDX file.
|
||
func (idx *Index) Close() error {
|
||
if idx.mmapData != nil {
|
||
munmapFile(idx.mmapData)
|
||
idx.mmapData = nil
|
||
}
|
||
return idx.file.Close()
|
||
}
|
||
|
||
// TagCount returns the number of tags.
|
||
func (idx *Index) TagCount() int {
|
||
return len(idx.tags)
|
||
}
|
||
|
||
// GetTag returns a tag by index.
|
||
func (idx *Index) GetTag(i int) *Tag {
|
||
if i >= 0 && i < len(idx.tags) {
|
||
return idx.tags[i]
|
||
}
|
||
return nil
|
||
}
|
||
|
||
// Tags returns all tags in the CDX.
|
||
func (idx *Index) Tags() []*Tag { return idx.tags }
|
||
|
||
// FindTag returns a tag by name.
|
||
func (idx *Index) FindTag(name string) *Tag {
|
||
upper := strings.ToUpper(name)
|
||
for _, t := range idx.tags {
|
||
if strings.ToUpper(t.Name) == upper {
|
||
return t
|
||
}
|
||
// Also try key expression match
|
||
if strings.ToUpper(t.header.KeyExpr) == upper {
|
||
return t
|
||
}
|
||
}
|
||
return nil
|
||
}
|
||
|
||
// tagDirEntry is a compound tag directory entry.
|
||
type tagDirEntry struct {
|
||
name string
|
||
offset int64
|
||
}
|
||
|
||
// readCompoundTagList reads tag names and offsets from the structural root.
|
||
// CDX compound header: root page is a B-tree of tag entries.
|
||
// Each leaf key = 10-byte tag name, record number = page offset / 512.
|
||
func readCompoundTagList(idx *Index, rootHdr *TagHeader) []tagDirEntry {
|
||
var entries []tagDirEntry
|
||
if rootHdr.RootPtr == 0 {
|
||
return entries
|
||
}
|
||
|
||
// Read the root page of the structural index
|
||
pageData := make([]byte, 512)
|
||
err := idx.readAt(pageData, int64(rootHdr.RootPtr))
|
||
if err != nil {
|
||
return entries
|
||
}
|
||
|
||
// CDX page header: [attr:2][nKeys:2][leftPtr:4][rightPtr:4]
|
||
nKeys := int(binary.LittleEndian.Uint16(pageData[2:4]))
|
||
attr := binary.LittleEndian.Uint16(pageData[0:2])
|
||
|
||
isLeaf := (attr & 0x02) != 0
|
||
|
||
if isLeaf {
|
||
entries = decodeCompoundLeaf(pageData, nKeys)
|
||
}
|
||
|
||
// If compound leaf decoding didn't find entries, scan for tag headers
|
||
if len(entries) == 0 {
|
||
entries = scanCompoundLeaves(idx, rootHdr)
|
||
}
|
||
|
||
return entries
|
||
}
|
||
|
||
// scanCompoundLeaves scans the CDX file for tag headers.
|
||
// CDX tag headers are at 0x400 (1024) byte boundaries.
|
||
// Each tag header is followed by a page with the key expression string.
|
||
func scanCompoundLeaves(idx *Index, rootHdr *TagHeader) []tagDirEntry {
|
||
var entries []tagDirEntry
|
||
|
||
fileInfo, err := idx.file.Stat()
|
||
if err != nil {
|
||
return entries
|
||
}
|
||
fileSize := fileInfo.Size()
|
||
|
||
// Scan at 0x400 intervals; tag headers have:
|
||
// - RootPtr (uint32 at offset 0) pointing to a valid page
|
||
// - KeySize (uint16 at offset 12) between 1..240
|
||
// - Key expression string at +0x200 (offset 0x106 from header start)
|
||
// Skip offset 0 (compound root) and scan the rest
|
||
// Skip compound header at 0x0000; scan from 0x0400 onwards
|
||
// Tag headers are at 0x400 boundaries but NOT the compound root itself
|
||
for off := int64(0x400); off < fileSize; off += 0x200 {
|
||
buf := make([]byte, 0x400)
|
||
err := idx.readAt(buf, off)
|
||
if err != nil {
|
||
continue
|
||
}
|
||
|
||
rootPtr := binary.LittleEndian.Uint32(buf[0:4])
|
||
keySize := binary.LittleEndian.Uint16(buf[12:14])
|
||
|
||
if keySize == 0 || keySize > 240 || rootPtr == 0 {
|
||
continue
|
||
}
|
||
// Validate rootPtr is within file and at a valid page boundary
|
||
if int64(rootPtr) >= fileSize || rootPtr%512 != 0 {
|
||
continue
|
||
}
|
||
|
||
// Read key expression from offset 0x106 within the header
|
||
keyExpr := ""
|
||
for i := 0x106; i < 0x206 && i < len(buf) && buf[i] != 0; i++ {
|
||
keyExpr += string(buf[i])
|
||
}
|
||
if keyExpr == "" {
|
||
// Key expression might be in the next page (+0x200 from header)
|
||
exprBuf := make([]byte, 256)
|
||
idx.readAt(exprBuf, off+0x200)
|
||
for i := 0; i < len(exprBuf) && exprBuf[i] != 0; i++ {
|
||
keyExpr += string(exprBuf[i])
|
||
}
|
||
}
|
||
|
||
if keyExpr == "" {
|
||
continue
|
||
}
|
||
|
||
name := strings.ToUpper(strings.TrimSpace(keyExpr))
|
||
// Use "BY" + field name convention, or just the expression
|
||
entries = append(entries, tagDirEntry{name: name, offset: off})
|
||
}
|
||
|
||
return entries
|
||
}
|
||
|
||
// decodeCompoundLeaf decodes tag entries from a compound leaf page.
|
||
// Compound index uses the same bit-packed format as data leaves,
|
||
// with keyLen=10 (tag name) and recNo = page offset / PageLen.
|
||
func decodeCompoundLeaf(data []byte, nKeys int) []tagDirEntry {
|
||
if nKeys <= 0 || len(data) < ExtHeadSize {
|
||
return nil
|
||
}
|
||
|
||
// Use the standard leaf key decoder with keyLen=10 (compound tag name size)
|
||
hdr := DecodeLeafHeader(data)
|
||
keys := DecodeLeafKeys(data, hdr, 10)
|
||
|
||
var entries []tagDirEntry
|
||
for _, dk := range keys {
|
||
name := trimNull(dk.Key)
|
||
name = strings.TrimSpace(name)
|
||
if name == "" {
|
||
continue
|
||
}
|
||
// RecNo in compound index = direct byte offset to tag header
|
||
entries = append(entries, tagDirEntry{name: name, offset: int64(dk.RecNo)})
|
||
}
|
||
return entries
|
||
}
|
||
|
||
// getLeafKeys returns decoded leaf keys with caching. On cache miss the
|
||
// previous slab + key slice are recycled into DecodeLeafKeysInto so we
|
||
// avoid a fresh alloc for every leaf traversed during a seek-heavy
|
||
// workload (which is the whole point of caching them per-Tag).
|
||
func (t *Tag) getLeafKeys(pageOffset int64) ([]DecodedKey, error) {
|
||
if pageOffset == t.cachedLeafOff && t.cachedLeafKeys != nil {
|
||
return t.cachedLeafKeys, nil
|
||
}
|
||
buf := make([]byte, PageLen)
|
||
if err := t.index.readAt(buf, pageOffset); err != nil {
|
||
return nil, err
|
||
}
|
||
hdr := DecodeLeafHeader(buf)
|
||
keys, slab := DecodeLeafKeysInto(buf, hdr, t.keyLen, t.cachedLeafSlab, t.cachedLeafKeys)
|
||
t.cachedLeafOff = pageOffset
|
||
t.cachedLeafKeys = keys
|
||
t.cachedLeafSlab = slab
|
||
return keys, nil
|
||
}
|
||
|
||
// --- Tag navigation ---
|
||
|
||
// Seek searches for a key in the CDX tag's B-tree.
|
||
// Seek searches the B-tree iteratively (no recursion, single buffer reuse).
|
||
func (t *Tag) Seek(searchKey []byte) (uint32, bool) {
|
||
t.stackLevel = 0
|
||
t.tagBOF = false
|
||
t.tagEOF = false
|
||
|
||
pageOffset := int64(t.header.RootPtr)
|
||
// Reuse seekBuf across seeks so the non-mmap fallback path only
|
||
// allocates once per Tag lifetime. With mmap, pageSlice returns a
|
||
// view directly into mapped memory and seekBuf stays unused.
|
||
if cap(t.seekBuf) < PageLen {
|
||
t.seekBuf = make([]byte, PageLen)
|
||
}
|
||
entrySize := t.keyLen + 8
|
||
searchLen := len(searchKey)
|
||
|
||
for {
|
||
buf := t.index.pageSlice(pageOffset, t.seekBuf)
|
||
if buf == nil {
|
||
t.tagEOF = true
|
||
return 0, false
|
||
}
|
||
|
||
attr := binary.LittleEndian.Uint16(buf[0:2])
|
||
if (attr & NodeLeaf) != 0 {
|
||
// === LEAF NODE ===
|
||
var keys []DecodedKey
|
||
if pageOffset == t.cachedLeafOff && t.cachedLeafKeys != nil {
|
||
keys = t.cachedLeafKeys
|
||
} else {
|
||
hdr := DecodeLeafHeader(buf)
|
||
var slab []byte
|
||
keys, slab = DecodeLeafKeysInto(buf, hdr, t.keyLen, t.cachedLeafSlab, t.cachedLeafKeys)
|
||
t.cachedLeafOff = pageOffset
|
||
t.cachedLeafKeys = keys
|
||
t.cachedLeafSlab = slab
|
||
}
|
||
|
||
// Binary search — leftmost match
|
||
lo, hi := 0, len(keys)-1
|
||
foundIdx := -1
|
||
for lo <= hi {
|
||
mid := (lo + hi) / 2
|
||
cmp := bytes.Compare(searchKey, keys[mid].Key[:searchLen])
|
||
if cmp == 0 {
|
||
foundIdx = mid
|
||
hi = mid - 1
|
||
} else if cmp < 0 {
|
||
hi = mid - 1
|
||
} else {
|
||
lo = mid + 1
|
||
}
|
||
}
|
||
if foundIdx >= 0 {
|
||
t.curRecNo = keys[foundIdx].RecNo
|
||
copy(t.curKey, keys[foundIdx].Key)
|
||
if t.stackLevel < StackSize {
|
||
t.stack[t.stackLevel] = StackEntry{PageOffset: pageOffset, KeyIndex: foundIdx}
|
||
t.stackLevel++
|
||
}
|
||
return keys[foundIdx].RecNo, true
|
||
}
|
||
if lo < len(keys) {
|
||
t.curRecNo = keys[lo].RecNo
|
||
copy(t.curKey, keys[lo].Key)
|
||
if t.stackLevel < StackSize {
|
||
t.stack[t.stackLevel] = StackEntry{PageOffset: pageOffset, KeyIndex: lo}
|
||
t.stackLevel++
|
||
}
|
||
return keys[lo].RecNo, false
|
||
}
|
||
// Past all keys: follow rightPtr
|
||
hdr := DecodeLeafHeader(buf)
|
||
if hdr.RightPtr != 0 && hdr.RightPtr != 0xFFFFFFFF {
|
||
pageOffset = int64(hdr.RightPtr)
|
||
continue // iterate instead of recurse
|
||
}
|
||
t.tagEOF = true
|
||
t.curRecNo = 0
|
||
return 0, false
|
||
}
|
||
|
||
// === INTERNAL NODE (inline binary search, zero alloc) ===
|
||
nKeys := int(binary.LittleEndian.Uint16(buf[2:4]))
|
||
|
||
if t.stackLevel < StackSize {
|
||
t.stack[t.stackLevel] = StackEntry{PageOffset: pageOffset, KeyIndex: 0}
|
||
t.stackLevel++
|
||
}
|
||
|
||
// Binary search on internal keys
|
||
found := false
|
||
lo, hi := 0, nKeys-1
|
||
for lo <= hi {
|
||
mid := (lo + hi) / 2
|
||
off := IntHeadSize + mid*entrySize
|
||
cmp := bytes.Compare(searchKey, buf[off:off+t.keyLen])
|
||
if cmp <= 0 {
|
||
hi = mid - 1
|
||
} else {
|
||
lo = mid + 1
|
||
}
|
||
}
|
||
// lo = insertion point; follow child at lo
|
||
if lo < nKeys {
|
||
off := IntHeadSize + lo*entrySize
|
||
childPage := binary.BigEndian.Uint32(buf[off+t.keyLen+4 : off+t.keyLen+8])
|
||
t.stack[t.stackLevel-1].KeyIndex = lo
|
||
if childPage != 0 {
|
||
pageOffset = int64(childPage)
|
||
found = true
|
||
}
|
||
}
|
||
if !found {
|
||
// Follow trailing child
|
||
trailOff := IntHeadSize + nKeys*entrySize
|
||
t.stack[t.stackLevel-1].KeyIndex = nKeys
|
||
if trailOff+t.keyLen+8 <= PageLen {
|
||
trailChild := binary.BigEndian.Uint32(buf[trailOff+t.keyLen+4 : trailOff+t.keyLen+8])
|
||
if trailChild != 0 {
|
||
pageOffset = int64(trailChild)
|
||
found = true
|
||
}
|
||
}
|
||
}
|
||
if !found {
|
||
t.tagEOF = true
|
||
return 0, false
|
||
}
|
||
// continue loop with new pageOffset
|
||
}
|
||
}
|
||
|
||
// GoTop positions at the first key.
|
||
func (t *Tag) GoTop() bool {
|
||
t.stackLevel = 0
|
||
t.tagBOF = false
|
||
t.tagEOF = false
|
||
return t.goLeftmost(int64(t.header.RootPtr))
|
||
}
|
||
|
||
func (t *Tag) goLeftmost(pageOffset int64) bool {
|
||
buf := make([]byte, PageLen)
|
||
if err := t.index.readAt(buf, pageOffset); err != nil {
|
||
return false
|
||
}
|
||
|
||
attr := binary.LittleEndian.Uint16(buf[0:2])
|
||
isLeaf := (attr & NodeLeaf) != 0
|
||
|
||
if isLeaf {
|
||
hdr := DecodeLeafHeader(buf)
|
||
keys, slab := DecodeLeafKeysInto(buf, hdr, t.keyLen, t.cachedLeafSlab, t.cachedLeafKeys)
|
||
t.cachedLeafOff = pageOffset
|
||
t.cachedLeafKeys = keys
|
||
t.cachedLeafSlab = slab
|
||
if len(keys) > 0 {
|
||
t.curRecNo = keys[0].RecNo
|
||
copy(t.curKey, keys[0].Key)
|
||
if t.stackLevel < StackSize {
|
||
t.stack[t.stackLevel] = StackEntry{PageOffset: pageOffset, KeyIndex: 0}
|
||
t.stackLevel++
|
||
}
|
||
return true
|
||
}
|
||
return false
|
||
}
|
||
|
||
// Internal: follow first child (zero allocation — read directly from buf)
|
||
nKeys := int(binary.LittleEndian.Uint16(buf[2:4]))
|
||
if nKeys > 0 {
|
||
entrySize := t.keyLen + 8
|
||
child0 := binary.BigEndian.Uint32(buf[IntHeadSize+t.keyLen+4 : IntHeadSize+entrySize])
|
||
if t.stackLevel < StackSize {
|
||
t.stack[t.stackLevel] = StackEntry{PageOffset: pageOffset, KeyIndex: 0}
|
||
t.stackLevel++
|
||
}
|
||
return t.goLeftmost(int64(child0))
|
||
}
|
||
return false
|
||
}
|
||
|
||
// GoBottom positions at the last key.
|
||
func (t *Tag) GoBottom() bool {
|
||
t.stackLevel = 0
|
||
t.tagBOF = false
|
||
t.tagEOF = false
|
||
return t.goRightmost(int64(t.header.RootPtr))
|
||
}
|
||
|
||
func (t *Tag) goRightmost(pageOffset int64) bool {
|
||
buf := make([]byte, PageLen)
|
||
if err := t.index.readAt(buf, pageOffset); err != nil {
|
||
return false
|
||
}
|
||
|
||
attr := binary.LittleEndian.Uint16(buf[0:2])
|
||
isLeaf := (attr & NodeLeaf) != 0
|
||
|
||
if isLeaf {
|
||
hdr := DecodeLeafHeader(buf)
|
||
keys, slab := DecodeLeafKeysInto(buf, hdr, t.keyLen, t.cachedLeafSlab, t.cachedLeafKeys)
|
||
t.cachedLeafOff = pageOffset
|
||
t.cachedLeafKeys = keys
|
||
t.cachedLeafSlab = slab
|
||
if len(keys) > 0 {
|
||
last := len(keys) - 1
|
||
t.curRecNo = keys[last].RecNo
|
||
copy(t.curKey, keys[last].Key)
|
||
if t.stackLevel < StackSize {
|
||
t.stack[t.stackLevel] = StackEntry{PageOffset: pageOffset, KeyIndex: last}
|
||
t.stackLevel++
|
||
}
|
||
return true
|
||
}
|
||
return false
|
||
}
|
||
|
||
// Internal: follow last child
|
||
node := DecodeIntNode(buf)
|
||
intKeys := DecodeIntKeys(buf, int(node.NKeys), t.keyLen)
|
||
lastIdx := int(node.NKeys)
|
||
if t.stackLevel < StackSize {
|
||
t.stack[t.stackLevel] = StackEntry{PageOffset: pageOffset, KeyIndex: lastIdx}
|
||
t.stackLevel++
|
||
}
|
||
return t.goRightmost(int64(intKeys[lastIdx].ChildPage))
|
||
}
|
||
|
||
// SkipNext moves to the next key in leaf using rightPtr linked list.
|
||
// CDX leaf pages are doubly linked — simpler than NTX stack traversal.
|
||
// SkipNext moves to the next key. Uses cached leaf decode.
|
||
func (t *Tag) SkipNext() bool {
|
||
if t.stackLevel == 0 {
|
||
t.tagEOF = true
|
||
return false
|
||
}
|
||
|
||
level := t.stackLevel - 1
|
||
pageOffset := t.stack[level].PageOffset
|
||
keyIdx := t.stack[level].KeyIndex
|
||
|
||
keys, err := t.getLeafKeys(pageOffset)
|
||
if err != nil {
|
||
t.tagEOF = true
|
||
return false
|
||
}
|
||
|
||
// Next key in same page? (cache hit — no decode)
|
||
if keyIdx+1 < len(keys) {
|
||
t.stack[level].KeyIndex = keyIdx + 1
|
||
t.curRecNo = keys[keyIdx+1].RecNo
|
||
copy(t.curKey, keys[keyIdx+1].Key)
|
||
return true
|
||
}
|
||
|
||
// Follow rightPtr to next leaf (CDX linked list)
|
||
buf := make([]byte, PageLen)
|
||
if err := t.index.readAt(buf, pageOffset); err != nil {
|
||
t.tagEOF = true
|
||
return false
|
||
}
|
||
hdr := DecodeLeafHeader(buf)
|
||
if hdr.RightPtr != 0 && hdr.RightPtr != 0xFFFFFFFF {
|
||
nextOff := int64(hdr.RightPtr)
|
||
keys2, err := t.getLeafKeys(nextOff)
|
||
if err == nil && len(keys2) > 0 {
|
||
t.stack[level] = StackEntry{PageOffset: nextOff, KeyIndex: 0}
|
||
t.curRecNo = keys2[0].RecNo
|
||
copy(t.curKey, keys2[0].Key)
|
||
return true
|
||
}
|
||
}
|
||
|
||
t.tagEOF = true
|
||
return false
|
||
}
|
||
|
||
// SkipPrev moves to the previous key. Uses cached leaf decode.
|
||
func (t *Tag) SkipPrev() bool {
|
||
if t.stackLevel == 0 {
|
||
t.tagBOF = true
|
||
return false
|
||
}
|
||
|
||
level := t.stackLevel - 1
|
||
pageOffset := t.stack[level].PageOffset
|
||
keyIdx := t.stack[level].KeyIndex
|
||
|
||
// Previous key in same page? (cache hit)
|
||
if keyIdx > 0 {
|
||
keys, err := t.getLeafKeys(pageOffset)
|
||
if err == nil {
|
||
t.stack[level].KeyIndex = keyIdx - 1
|
||
t.curRecNo = keys[keyIdx-1].RecNo
|
||
copy(t.curKey, keys[keyIdx-1].Key)
|
||
return true
|
||
}
|
||
}
|
||
|
||
// Follow leftPtr
|
||
buf := make([]byte, PageLen)
|
||
if err := t.index.readAt(buf, pageOffset); err != nil {
|
||
t.tagBOF = true
|
||
return false
|
||
}
|
||
hdr := DecodeLeafHeader(buf)
|
||
if hdr.LeftPtr != 0 && hdr.LeftPtr != 0xFFFFFFFF {
|
||
prevOff := int64(hdr.LeftPtr)
|
||
keys2, err := t.getLeafKeys(prevOff)
|
||
if err == nil && len(keys2) > 0 {
|
||
last := len(keys2) - 1
|
||
t.stack[level] = StackEntry{PageOffset: prevOff, KeyIndex: last}
|
||
t.curRecNo = keys2[last].RecNo
|
||
copy(t.curKey, keys2[last].Key)
|
||
return true
|
||
}
|
||
}
|
||
|
||
t.tagBOF = true
|
||
return false
|
||
}
|
||
|
||
// CurRecNo returns the current record number.
|
||
func (t *Tag) CurRecNo() uint32 { return t.curRecNo }
|
||
|
||
// CurKey returns the current key.
|
||
func (t *Tag) CurKey() []byte { return t.curKey[:t.keyLen] }
|
||
|
||
// IsEOF returns true if past end.
|
||
func (t *Tag) IsEOF() bool { return t.tagEOF }
|
||
|
||
// IsBOF returns true if before start.
|
||
func (t *Tag) IsBOF() bool { return t.tagBOF }
|
||
|
||
// KeyLen returns the key length.
|
||
func (t *Tag) KeyLen() int { return t.keyLen }
|
||
|
||
// KeyExpr returns the key expression string stored in the CDX header.
|
||
func (t *Tag) KeyExpr() string { return t.header.KeyExpr }
|
||
|
||
// ForExpr returns the FOR condition expression.
|
||
func (t *Tag) ForExpr() string { return t.header.ForExpr }
|
||
|
||
// IsDescending returns true if the tag sorts in descending order.
|
||
func (t *Tag) IsDescending() bool { return t.header.Descending }
|
||
|
||
// HeaderOffset returns the file offset of this tag's header block.
|
||
func (t *Tag) HeaderOffset() int64 { return t.headerOff }
|
||
|
||
// RootPtr returns the root page offset for this tag's B-tree.
|
||
func (t *Tag) RootPtr() uint32 { return t.header.RootPtr }
|
||
|
||
// Close is a no-op for tags (the parent Index owns the file).
|
||
func (t *Tag) Close() error { return nil }
|
||
|
||
// --- Helpers ---
|
||
|
||
func trimNull(b []byte) string {
|
||
for i, c := range b {
|
||
if c == 0 {
|
||
return strings.TrimSpace(string(b[:i]))
|
||
}
|
||
}
|
||
return strings.TrimSpace(string(b))
|
||
}
|