// Copyright (c) 2026 Charles KWON OhJun (charleskwonohjun@gmail.com) // All rights reserved. // CDX compound index engine for Five. // Byte-compatible with Harbour/FoxPro CDX files. // CDX uses FPT memo format (not DBT). // // Key differences from NTX: // - 512-byte pages (vs NTX 1024) // - Compound index: multiple tags per file // - Bit-packed leaf keys: recBits/dupBits/trlBits compression // - Linked leaf pages (leftPtr/rightPtr) // // Reference: // /mnt/d/harbour-core/include/hbrddcdx.h // /mnt/d/harbour-core/src/rdd/dbfcdx/dbfcdx1.c // docs/rdd-architecture-spec.md package cdx import ( "bytes" "encoding/binary" "fmt" "os" "strings" ) // CDX constants — matching Harbour. const ( PageLen = 512 // CDX_PAGELEN (1 << 9) HeaderLen = 1024 // CDX_HEADERLEN MaxKey = 240 // CDX_MAXKEY MaxTagNameLen = 10 // CDX_MAXTAGNAMELEN StackSize = 64 // CDX_STACKSIZE IntHeadSize = 12 // CDX_INT_HEADSIZE ExtHeadSize = 24 // CDX_EXT_HEADSIZE HeaderExpLen = HeaderLen - 512 // Node types NodeBranch = 0 // CDX_NODE_BRANCH NodeRoot = 1 // CDX_NODE_ROOT NodeLeaf = 2 // CDX_NODE_LEAF NodeUnused = 0xFF // Type flags TypeUnique = 0x01 TypePartial = 0x02 TypeCustom = 0x04 TypeForFilter = 0x08 TypeCompact = 0x20 TypeCompound = 0x40 TypeStructure = 0x80 ) // --- Tag Header (512 bytes in file, at start of each tag's header page) --- // TagHeader holds a CDX tag's metadata. // Harbour: CDXTAGHEADER in hbrddcdx.h:188 type TagHeader struct { RootPtr uint32 // root page offset FreePtr uint32 // free page list Counter uint32 // update counter KeySize uint16 // key length (max 240) IndexOpt byte // CDX_TYPE_* flags IndexSig byte // signature HeaderLen uint16 // 0x0400 typically PageLen uint16 // page length KeyExpr string // key expression ForExpr string // FOR filter expression Descending bool IgnoreCase bool } // ReadTagHeader reads a CDX tag header from file at given offset. func ReadTagHeader(f *os.File, offset int64) (*TagHeader, error) { buf := make([]byte, HeaderLen) if _, err := f.ReadAt(buf, offset); err != nil { return nil, fmt.Errorf("read CDX tag header at %d: %w", offset, err) } th := &TagHeader{ RootPtr: binary.LittleEndian.Uint32(buf[0:4]), FreePtr: binary.LittleEndian.Uint32(buf[4:8]), Counter: binary.LittleEndian.Uint32(buf[8:12]), KeySize: binary.LittleEndian.Uint16(buf[12:14]), IndexOpt: buf[14], IndexSig: buf[15], HeaderLen: binary.LittleEndian.Uint16(buf[16:18]), PageLen: binary.LittleEndian.Uint16(buf[18:20]), } th.IgnoreCase = buf[503] != 0 th.Descending = binary.LittleEndian.Uint16(buf[504:506]) != 0 // Key/For expressions — stored directly at offset 512 (0x200) within the header block. // CDX format: key expression at byte 512, for expression follows after null terminator. keyExprStart := 512 th.KeyExpr = trimNull(buf[keyExprStart:]) // FOR expression follows key expression (after null terminator) forStart := keyExprStart + len(th.KeyExpr) + 1 if forStart < len(buf) { th.ForExpr = trimNull(buf[forStart:]) } return th, nil } // --- Leaf page: bit-packed key extraction --- // LeafHeader holds decoded leaf page metadata. // Harbour: CDXEXTNODE in hbrddcdx.h:224 type LeafHeader struct { Attr uint16 NKeys uint16 LeftPtr uint32 RightPtr uint32 FreeSpc uint16 RecMask uint32 DupMask byte TrlMask byte RecBits byte DupBits byte TrlBits byte KeyBytes byte // total bytes per key info entry } // DecodeLeafHeader extracts the 24-byte leaf header from page data. func DecodeLeafHeader(data []byte) LeafHeader { return LeafHeader{ Attr: binary.LittleEndian.Uint16(data[0:2]), NKeys: binary.LittleEndian.Uint16(data[2:4]), LeftPtr: binary.LittleEndian.Uint32(data[4:8]), RightPtr: binary.LittleEndian.Uint32(data[8:12]), FreeSpc: binary.LittleEndian.Uint16(data[12:14]), RecMask: binary.LittleEndian.Uint32(data[14:18]), DupMask: data[18], TrlMask: data[19], RecBits: data[20], DupBits: data[21], TrlBits: data[22], KeyBytes: data[23], } } // DecodedKey holds a single decoded key from a leaf page. type DecodedKey struct { RecNo uint32 Key []byte } // DecodeLeafKeys extracts all keys from a CDX leaf page. // This is the core bit-packing decompression algorithm. // Harbour: hb_cdxPageLeafDecode in dbfcdx1.c func DecodeLeafKeys(data []byte, hdr LeafHeader, keyLen int) []DecodedKey { if hdr.NKeys == 0 { return nil } keys := make([]DecodedKey, hdr.NKeys) totalBits := uint(hdr.RecBits) + uint(hdr.DupBits) + uint(hdr.TrlBits) prevKey := make([]byte, keyLen) // Key info area starts right after ExtHeadSize infoArea := data[ExtHeadSize:] // Key data area is at the end of the page, growing backwards keyDataEnd := PageLen for i := 0; i < int(hdr.NKeys); i++ { // Extract bit-packed fields bitOffset := uint(i) * totalBits recNo := extractBits(infoArea, bitOffset, uint(hdr.RecBits)) & hdr.RecMask bitOffset += uint(hdr.RecBits) dupCount := int(extractBits(infoArea, bitOffset, uint(hdr.DupBits)) & uint32(hdr.DupMask)) bitOffset += uint(hdr.DupBits) trlCount := int(extractBits(infoArea, bitOffset, uint(hdr.TrlBits)) & uint32(hdr.TrlMask)) // Reconstruct key key := make([]byte, keyLen) // Copy duplicate prefix from previous key if dupCount > 0 && dupCount <= keyLen { copy(key[:dupCount], prevKey[:dupCount]) } // Copy unique portion from key data area (grows from end of page backward) uniqueLen := keyLen - dupCount - trlCount if uniqueLen > 0 { keyDataEnd -= uniqueLen if keyDataEnd >= ExtHeadSize && keyDataEnd+uniqueLen <= PageLen { copy(key[dupCount:dupCount+uniqueLen], data[keyDataEnd:keyDataEnd+uniqueLen]) } } // Fill trailing bytes with spaces for j := keyLen - trlCount; j < keyLen; j++ { key[j] = ' ' } keys[i] = DecodedKey{RecNo: recNo, Key: key} copy(prevKey, key) } return keys } // extractBits extracts n bits from a byte array starting at bit offset. func extractBits(data []byte, bitOffset, nBits uint) uint32 { if nBits == 0 { return 0 } var result uint32 for i := uint(0); i < nBits; i++ { bytePos := (bitOffset + i) / 8 bitPos := (bitOffset + i) % 8 if int(bytePos) < len(data) { if data[bytePos]&(1< PageLen { break } e := IntKeyEntry{ ChildPage: binary.LittleEndian.Uint32(data[off : off+4]), RecNo: binary.LittleEndian.Uint32(data[off+4 : off+8]), Key: make([]byte, keyLen), } if i < nKeys { copy(e.Key, data[off+8:off+8+keyLen]) } entries[i] = e off += entrySize } return entries } // --- CDX Index (compound, multi-tag) --- // Index represents an open CDX index file. type Index struct { file *os.File tags []*Tag } // Tag represents one index tag within a CDX file. type Tag struct { Name string // tag name (e.g., "BYNAME") index *Index header TagHeader headerOff int64 // file offset of this tag's header keyLen int // Current position stack [StackSize]StackEntry stackLevel int curRecNo uint32 curKey []byte tagBOF bool tagEOF bool } type StackEntry struct { PageOffset int64 KeyIndex int } // OpenIndex opens a CDX file and reads all tags. func OpenIndex(path string) (*Index, error) { if !strings.HasSuffix(strings.ToLower(path), ".cdx") { path += ".cdx" } f, err := os.OpenFile(path, os.O_RDWR, 0) if err != nil { return nil, err } idx := &Index{file: f} // Read compound header (structural root at offset 0) rootHdr, err := ReadTagHeader(f, 0) if err != nil { f.Close() return nil, err } // Parse compound tag directory from the structural root's B-tree // The structural index keys are 10-byte tag names, and each leaf entry // points to the tag header at a specific file offset. tagEntries := readCompoundTagList(f, rootHdr) for _, entry := range tagEntries { tagHdr, err := ReadTagHeader(f, entry.offset) if err != nil { continue } tag := &Tag{ Name: entry.name, index: idx, header: *tagHdr, headerOff: entry.offset, keyLen: int(tagHdr.KeySize), curKey: make([]byte, tagHdr.KeySize), } idx.tags = append(idx.tags, tag) } // If no tags found via compound directory, fall back to root as single tag if len(idx.tags) == 0 { tag := &Tag{ Name: "TAG1", index: idx, header: *rootHdr, headerOff: 0, keyLen: int(rootHdr.KeySize), curKey: make([]byte, rootHdr.KeySize), } idx.tags = append(idx.tags, tag) } return idx, nil } // Close closes the CDX file. func (idx *Index) Close() error { return idx.file.Close() } // TagCount returns the number of tags. func (idx *Index) TagCount() int { return len(idx.tags) } // GetTag returns a tag by index. func (idx *Index) GetTag(i int) *Tag { if i >= 0 && i < len(idx.tags) { return idx.tags[i] } return nil } // Tags returns all tags in the CDX. func (idx *Index) Tags() []*Tag { return idx.tags } // FindTag returns a tag by name. func (idx *Index) FindTag(name string) *Tag { upper := strings.ToUpper(name) for _, t := range idx.tags { if strings.ToUpper(t.Name) == upper { return t } // Also try key expression match if strings.ToUpper(t.header.KeyExpr) == upper { return t } } return nil } // tagDirEntry is a compound tag directory entry. type tagDirEntry struct { name string offset int64 } // readCompoundTagList reads tag names and offsets from the structural root. // CDX compound header: root page is a B-tree of tag entries. // Each leaf key = 10-byte tag name, record number = page offset / 512. func readCompoundTagList(f *os.File, rootHdr *TagHeader) []tagDirEntry { var entries []tagDirEntry if rootHdr.RootPtr == 0 { return entries } // Read the root page of the structural index pageData := make([]byte, 512) _, err := f.ReadAt(pageData, int64(rootHdr.RootPtr)) if err != nil { return entries } // CDX page header: [attr:2][nKeys:2][leftPtr:4][rightPtr:4] nKeys := int(binary.LittleEndian.Uint16(pageData[2:4])) attr := binary.LittleEndian.Uint16(pageData[0:2]) isLeaf := (attr & 0x02) != 0 if isLeaf { entries = decodeCompoundLeaf(pageData, nKeys) } // If compound leaf decoding didn't find entries, scan for tag headers if len(entries) == 0 { entries = scanCompoundLeaves(f, rootHdr) } return entries } // scanCompoundLeaves scans the CDX file for tag headers. // CDX tag headers are at 0x400 (1024) byte boundaries. // Each tag header is followed by a page with the key expression string. func scanCompoundLeaves(f *os.File, rootHdr *TagHeader) []tagDirEntry { var entries []tagDirEntry fileInfo, err := f.Stat() if err != nil { return entries } fileSize := fileInfo.Size() // Scan at 0x400 intervals; tag headers have: // - RootPtr (uint32 at offset 0) pointing to a valid page // - KeySize (uint16 at offset 12) between 1..240 // - Key expression string at +0x200 (offset 0x106 from header start) // Skip offset 0 (compound root) and scan the rest // Skip compound header at 0x0000; scan from 0x0400 onwards // Tag headers are at 0x400 boundaries but NOT the compound root itself for off := int64(0x400); off < fileSize; off += 0x200 { buf := make([]byte, 0x400) n, err := f.ReadAt(buf, off) if err != nil || n < 0x200 { continue } rootPtr := binary.LittleEndian.Uint32(buf[0:4]) keySize := binary.LittleEndian.Uint16(buf[12:14]) if keySize == 0 || keySize > 240 || rootPtr == 0 { continue } // Validate rootPtr is within file and at a valid page boundary if int64(rootPtr) >= fileSize || rootPtr%512 != 0 { continue } // Read key expression from offset 0x106 within the header keyExpr := "" for i := 0x106; i < 0x206 && i < len(buf) && buf[i] != 0; i++ { keyExpr += string(buf[i]) } if keyExpr == "" { // Key expression might be in the next page (+0x200 from header) exprBuf := make([]byte, 256) f.ReadAt(exprBuf, off+0x200) for i := 0; i < len(exprBuf) && exprBuf[i] != 0; i++ { keyExpr += string(exprBuf[i]) } } if keyExpr == "" { continue } name := strings.ToUpper(strings.TrimSpace(keyExpr)) // Use "BY" + field name convention, or just the expression entries = append(entries, tagDirEntry{name: name, offset: off}) } return entries } // decodeCompoundLeaf decodes tag entries from a compound leaf page. // Compound index uses the same bit-packed format as data leaves, // with keyLen=10 (tag name) and recNo = page offset / PageLen. func decodeCompoundLeaf(data []byte, nKeys int) []tagDirEntry { if nKeys <= 0 || len(data) < ExtHeadSize { return nil } // Use the standard leaf key decoder with keyLen=10 (compound tag name size) hdr := DecodeLeafHeader(data) keys := DecodeLeafKeys(data, hdr, 10) var entries []tagDirEntry for _, dk := range keys { name := trimNull(dk.Key) name = strings.TrimSpace(name) if name == "" { continue } // RecNo in compound index = direct byte offset to tag header entries = append(entries, tagDirEntry{name: name, offset: int64(dk.RecNo)}) } return entries } // --- Tag navigation --- // Seek searches for a key in the CDX tag's B-tree. func (t *Tag) Seek(searchKey []byte) (uint32, bool) { t.stackLevel = 0 t.tagBOF = false t.tagEOF = false pageOffset := int64(t.header.RootPtr) return t.seekPage(pageOffset, searchKey) } func (t *Tag) seekPage(pageOffset int64, searchKey []byte) (uint32, bool) { buf := make([]byte, PageLen) if _, err := t.index.file.ReadAt(buf, pageOffset); err != nil { t.tagEOF = true return 0, false } attr := binary.LittleEndian.Uint16(buf[0:2]) isLeaf := (attr & NodeLeaf) != 0 if isLeaf { hdr := DecodeLeafHeader(buf) keys := DecodeLeafKeys(buf, hdr, t.keyLen) // Binary search in leaf for i, dk := range keys { cmp := bytes.Compare(searchKey, dk.Key[:len(searchKey)]) if cmp == 0 { // Found t.curRecNo = dk.RecNo copy(t.curKey, dk.Key) if t.stackLevel < StackSize { t.stack[t.stackLevel] = StackEntry{PageOffset: pageOffset, KeyIndex: i} t.stackLevel++ } return dk.RecNo, true } if cmp < 0 { // Search key < current: softseek position t.curRecNo = dk.RecNo copy(t.curKey, dk.Key) if t.stackLevel < StackSize { t.stack[t.stackLevel] = StackEntry{PageOffset: pageOffset, KeyIndex: i} t.stackLevel++ } return dk.RecNo, false } } // Past all keys: EOF or follow rightPtr if hdr.RightPtr != 0 && hdr.RightPtr != 0xFFFFFFFF { return t.seekPage(int64(hdr.RightPtr), searchKey) } t.tagEOF = true t.curRecNo = 0 return 0, false } // Internal node: binary search then follow child node := DecodeIntNode(buf) intKeys := DecodeIntKeys(buf, int(node.NKeys), t.keyLen) if t.stackLevel < StackSize { t.stack[t.stackLevel] = StackEntry{PageOffset: pageOffset, KeyIndex: 0} t.stackLevel++ } for i := 0; i < int(node.NKeys); i++ { cmp := bytes.Compare(searchKey, intKeys[i].Key) if cmp <= 0 { t.stack[t.stackLevel-1].KeyIndex = i return t.seekPage(int64(intKeys[i].ChildPage), searchKey) } } // Follow last child lastIdx := int(node.NKeys) t.stack[t.stackLevel-1].KeyIndex = lastIdx return t.seekPage(int64(intKeys[lastIdx].ChildPage), searchKey) } // GoTop positions at the first key. func (t *Tag) GoTop() bool { t.stackLevel = 0 t.tagBOF = false t.tagEOF = false return t.goLeftmost(int64(t.header.RootPtr)) } func (t *Tag) goLeftmost(pageOffset int64) bool { buf := make([]byte, PageLen) if _, err := t.index.file.ReadAt(buf, pageOffset); err != nil { return false } attr := binary.LittleEndian.Uint16(buf[0:2]) isLeaf := (attr & NodeLeaf) != 0 if isLeaf { hdr := DecodeLeafHeader(buf) keys := DecodeLeafKeys(buf, hdr, t.keyLen) if len(keys) > 0 { t.curRecNo = keys[0].RecNo copy(t.curKey, keys[0].Key) if t.stackLevel < StackSize { t.stack[t.stackLevel] = StackEntry{PageOffset: pageOffset, KeyIndex: 0} t.stackLevel++ } return true } return false } // Internal: follow first child node := DecodeIntNode(buf) intKeys := DecodeIntKeys(buf, int(node.NKeys), t.keyLen) if len(intKeys) > 0 { if t.stackLevel < StackSize { t.stack[t.stackLevel] = StackEntry{PageOffset: pageOffset, KeyIndex: 0} t.stackLevel++ } return t.goLeftmost(int64(intKeys[0].ChildPage)) } return false } // GoBottom positions at the last key. func (t *Tag) GoBottom() bool { t.stackLevel = 0 t.tagBOF = false t.tagEOF = false return t.goRightmost(int64(t.header.RootPtr)) } func (t *Tag) goRightmost(pageOffset int64) bool { buf := make([]byte, PageLen) if _, err := t.index.file.ReadAt(buf, pageOffset); err != nil { return false } attr := binary.LittleEndian.Uint16(buf[0:2]) isLeaf := (attr & NodeLeaf) != 0 if isLeaf { hdr := DecodeLeafHeader(buf) keys := DecodeLeafKeys(buf, hdr, t.keyLen) if len(keys) > 0 { last := len(keys) - 1 t.curRecNo = keys[last].RecNo copy(t.curKey, keys[last].Key) if t.stackLevel < StackSize { t.stack[t.stackLevel] = StackEntry{PageOffset: pageOffset, KeyIndex: last} t.stackLevel++ } return true } return false } // Internal: follow last child node := DecodeIntNode(buf) intKeys := DecodeIntKeys(buf, int(node.NKeys), t.keyLen) lastIdx := int(node.NKeys) if t.stackLevel < StackSize { t.stack[t.stackLevel] = StackEntry{PageOffset: pageOffset, KeyIndex: lastIdx} t.stackLevel++ } return t.goRightmost(int64(intKeys[lastIdx].ChildPage)) } // SkipNext moves to the next key in leaf using rightPtr linked list. // CDX leaf pages are doubly linked — simpler than NTX stack traversal. func (t *Tag) SkipNext() bool { if t.stackLevel == 0 { t.tagEOF = true return false } level := t.stackLevel - 1 pageOffset := t.stack[level].PageOffset keyIdx := t.stack[level].KeyIndex buf := make([]byte, PageLen) if _, err := t.index.file.ReadAt(buf, pageOffset); err != nil { t.tagEOF = true return false } hdr := DecodeLeafHeader(buf) keys := DecodeLeafKeys(buf, hdr, t.keyLen) // Next key in same page? if keyIdx+1 < len(keys) { t.stack[level].KeyIndex = keyIdx + 1 t.curRecNo = keys[keyIdx+1].RecNo copy(t.curKey, keys[keyIdx+1].Key) return true } // Follow rightPtr to next leaf page (CDX linked list) if hdr.RightPtr != 0 && hdr.RightPtr != 0xFFFFFFFF { nextOff := int64(hdr.RightPtr) buf2 := make([]byte, PageLen) if _, err := t.index.file.ReadAt(buf2, nextOff); err != nil { t.tagEOF = true return false } hdr2 := DecodeLeafHeader(buf2) keys2 := DecodeLeafKeys(buf2, hdr2, t.keyLen) if len(keys2) > 0 { t.stack[level] = StackEntry{PageOffset: nextOff, KeyIndex: 0} t.curRecNo = keys2[0].RecNo copy(t.curKey, keys2[0].Key) return true } } t.tagEOF = true return false } // SkipPrev moves to the previous key using leftPtr. func (t *Tag) SkipPrev() bool { if t.stackLevel == 0 { t.tagBOF = true return false } level := t.stackLevel - 1 pageOffset := t.stack[level].PageOffset keyIdx := t.stack[level].KeyIndex buf := make([]byte, PageLen) if _, err := t.index.file.ReadAt(buf, pageOffset); err != nil { t.tagBOF = true return false } // Previous key in same page? if keyIdx > 0 { hdr := DecodeLeafHeader(buf) keys := DecodeLeafKeys(buf, hdr, t.keyLen) t.stack[level].KeyIndex = keyIdx - 1 t.curRecNo = keys[keyIdx-1].RecNo copy(t.curKey, keys[keyIdx-1].Key) return true } // Follow leftPtr hdr := DecodeLeafHeader(buf) if hdr.LeftPtr != 0 && hdr.LeftPtr != 0xFFFFFFFF { prevOff := int64(hdr.LeftPtr) buf2 := make([]byte, PageLen) if _, err := t.index.file.ReadAt(buf2, prevOff); err != nil { t.tagBOF = true return false } hdr2 := DecodeLeafHeader(buf2) keys2 := DecodeLeafKeys(buf2, hdr2, t.keyLen) if len(keys2) > 0 { last := len(keys2) - 1 t.stack[level] = StackEntry{PageOffset: prevOff, KeyIndex: last} t.curRecNo = keys2[last].RecNo copy(t.curKey, keys2[last].Key) return true } } t.tagBOF = true return false } // CurRecNo returns the current record number. func (t *Tag) CurRecNo() uint32 { return t.curRecNo } // CurKey returns the current key. func (t *Tag) CurKey() []byte { return t.curKey[:t.keyLen] } // IsEOF returns true if past end. func (t *Tag) IsEOF() bool { return t.tagEOF } // IsBOF returns true if before start. func (t *Tag) IsBOF() bool { return t.tagBOF } // KeyLen returns the key length. func (t *Tag) KeyLen() int { return t.keyLen } // KeyExpr returns the key expression string stored in the CDX header. func (t *Tag) KeyExpr() string { return t.header.KeyExpr } // ForExpr returns the FOR condition expression. func (t *Tag) ForExpr() string { return t.header.ForExpr } // IsDescending returns true if the tag sorts in descending order. func (t *Tag) IsDescending() bool { return t.header.Descending } // Close is a no-op for tags (the parent Index owns the file). func (t *Tag) Close() error { return nil } // --- Helpers --- func trimNull(b []byte) string { for i, c := range b { if c == 0 { return strings.TrimSpace(string(b[:i])) } } return strings.TrimSpace(string(b)) }