// Copyright (c) 2026 Charles KWON OhJun (charleskwonohjun@gmail.com) // All rights reserved. // CDX compound index creation. // Builds Harbour-compatible CDX files with bit-packed leaf pages. package cdx import ( "encoding/binary" "io" "math/bits" "os" "sort" "strings" "five/hbrdd/ntx" ) // cdxTagMeta holds metadata for a tag during file assembly. type cdxTagMeta struct { name string headerOff int64 rootPage uint32 keyExpr string forExpr string keyLen int unique bool desc bool } // CreateOrAddTag creates a CDX file with a new tag, or appends a tag to // an existing CDX file. The existing file's bytes are preserved verbatim; // only the compound root directory is rebuilt to include the new tag. // // Harbour: ordCreate() → hb_cdxIndexCreateTag() func CreateOrAddTag(path string, tagName, keyExpr, forExpr string, keyLen int, unique, descending bool, keys []ntx.KeyRecord) (*Index, error) { if !strings.HasSuffix(strings.ToLower(path), ".cdx") { path += ".cdx" } tagName = strings.ToUpper(tagName) if len(tagName) > MaxTagNameLen { tagName = tagName[:MaxTagNameLen] } // Read existing file contents (if any) and tag metadata var existingData []byte var existingTags []cdxTagMeta if fi, err := os.Stat(path); err == nil && fi.Size() > 0 { existing, err := OpenIndex(path) if err == nil { for _, t := range existing.Tags() { if strings.ToUpper(t.Name) == tagName { continue // will be replaced } existingTags = append(existingTags, cdxTagMeta{ name: t.Name, headerOff: t.HeaderOffset(), rootPage: t.RootPtr(), keyExpr: t.KeyExpr(), forExpr: t.ForExpr(), keyLen: t.KeyLen(), }) } existing.Close() } // Read entire old file existingData, _ = os.ReadFile(path) } // Create/truncate the file f, err := os.Create(path) if err != nil { return nil, err } var appendOff int64 // where to start writing new data // Harbour CDX layout: // 0x0000: Compound root header (1024 bytes) // 0x0400: Compound directory leaf page (512 bytes) ← RootPtr points here // 0x0600: Tag 1 header (1024 bytes = 2 pages) // Tag 1 B-tree pages... // Tag 2 header... // etc. // The compound leaf is always at offset HeaderLen (1024). // Tag headers start at HeaderLen + PageLen (1536). compoundLeafOff := int64(HeaderLen) // 1024 — fixed position if len(existingData) > 0 { // Write back existing data verbatim (preserves all old tag B-trees) f.Write(existingData) appendOff = int64(len(existingData)) // Align to page boundary if appendOff%int64(PageLen) != 0 { appendOff = (appendOff/int64(PageLen) + 1) * int64(PageLen) } } else { // New file: skip compound root (1024) + compound leaf (512) appendOff = int64(HeaderLen) + int64(PageLen) // 1536 } // Write the new tag's header (1024 bytes) + B-tree pages newTagHeaderOff := appendOff appendOff += int64(HeaderLen) // 1024 bytes for tag header // Build B-tree pages for the new tag var rootPageOff uint32 if len(keys) == 0 { pg := make([]byte, PageLen) binary.LittleEndian.PutUint16(pg[0:2], NodeLeaf|NodeRoot) f.WriteAt(pg, appendOff) rootPageOff = uint32(appendOff) appendOff += PageLen } else { root, next := buildCDXBTree(f, keys, keyLen, appendOff) rootPageOff = uint32(root) appendOff = next } // Write the new tag header writeTagHeader(f, newTagHeaderOff, rootPageOff, keyExpr, forExpr, uint16(keyLen), unique, descending) newTag := cdxTagMeta{ name: tagName, headerOff: newTagHeaderOff, rootPage: rootPageOff, keyExpr: keyExpr, forExpr: forExpr, keyLen: keyLen, unique: unique, desc: descending, } // Collect all tags (existing + new) in offset order (= creation order) allTags := append(existingTags, newTag) // Write compound directory leaf page at fixed offset 1024. // Harbour's compound B-tree stores entries in ALPHABETICAL order // (it's a B-tree keyed by tag name). Sort a copy for the leaf. sortedTags := make([]cdxTagMeta, len(allTags)) copy(sortedTags, allTags) sort.Slice(sortedTags, func(i, j int) bool { return strings.ToUpper(sortedTags[i].name) < strings.ToUpper(sortedTags[j].name) }) writeCompoundLeaf(f, compoundLeafOff, sortedTags) // Write compound root header at offset 0 writeCompoundHeader(f, uint32(compoundLeafOff), len(allTags)) f.Close() return OpenIndex(path) } // --- Tag header writing --- func writeTagHeader(f *os.File, offset int64, rootPtr uint32, keyExpr, forExpr string, keySize uint16, unique, desc bool) { buf := make([]byte, HeaderLen) binary.LittleEndian.PutUint32(buf[0:4], rootPtr) // Counter = 0 initially (Harbour convention) binary.LittleEndian.PutUint16(buf[12:14], keySize) // Harbour sets TypeCompact | TypeCompound on data tags (0x60) opt := byte(TypeCompact | TypeCompound) if unique { opt |= TypeUnique } if forExpr != "" { opt |= TypeForFilter } buf[14] = opt buf[15] = 0x01 binary.LittleEndian.PutUint16(buf[16:18], uint16(HeaderLen)) binary.LittleEndian.PutUint16(buf[18:20], uint16(PageLen)) if desc { binary.LittleEndian.PutUint16(buf[504:506], 1) } copy(buf[512:], []byte(keyExpr)) if forExpr != "" { forOff := 512 + len(keyExpr) + 1 if forOff+len(forExpr) < HeaderLen { copy(buf[forOff:], []byte(forExpr)) } } f.WriteAt(buf, offset) } // --- Compound root --- func writeCompoundHeader(f *os.File, rootPagePtr uint32, nTags int) { hdr := make([]byte, HeaderLen) binary.LittleEndian.PutUint32(hdr[0:4], rootPagePtr) // FreePtr = 0, Counter = 0 (Harbour convention for compound root) binary.LittleEndian.PutUint16(hdr[12:14], MaxTagNameLen) hdr[14] = TypeCompound | TypeStructure | TypeCompact // 0xE0 hdr[15] = 0x01 binary.LittleEndian.PutUint16(hdr[16:18], uint16(HeaderLen)) binary.LittleEndian.PutUint16(hdr[18:20], uint16(PageLen)) // Harbour writes "RCHB" signature at offset 20 in compound root copy(hdr[20:24], []byte("RCHB")) // IgnoreCase=1, Descending flags at offset 503-505 (Harbour convention) hdr[503] = 1 // IgnoreCase binary.LittleEndian.PutUint16(hdr[504:506], 1) // collation flag f.WriteAt(hdr, 0) } func writeCompoundLeaf(f *os.File, offset int64, tags []cdxTagMeta) { leaf := make([]byte, PageLen) nTags := len(tags) compKeyLen := MaxTagNameLen // Harbour uses fixed 16-bit recBits for compound leaf (offsets < 64KB) recBits := 16 dupBits := bitsNeeded(uint32(compKeyLen)) // 4 trlBits := bitsNeeded(uint32(compKeyLen)) // 4 keyBytes := (recBits + dupBits + trlBits + 7) / 8 // 3 binary.LittleEndian.PutUint16(leaf[0:2], NodeLeaf|NodeRoot) binary.LittleEndian.PutUint16(leaf[2:4], uint16(nTags)) // Harbour sets LeftPtr/RightPtr to 0xFFFFFFFF for compound leaf binary.LittleEndian.PutUint32(leaf[4:8], 0xFFFFFFFF) binary.LittleEndian.PutUint32(leaf[8:12], 0xFFFFFFFF) binary.LittleEndian.PutUint32(leaf[14:18], (1< 0 { keyDataPos -= newBytes copy(leaf[keyDataPos:], key[dup:dup+newBytes]) } recNo := uint32(t.headerOff) var val uint64 val = uint64(trl) val <<= uint(dupBits) val |= uint64(dup) val <<= uint(recBits) val |= uint64(recNo) entryOff := ExtHeadSize + i*int(keyBytes) for j := 0; j < int(keyBytes); j++ { leaf[entryOff+j] = byte(val & 0xFF) val >>= 8 } copy(prevKey, key) } freeSpc := keyDataPos - (ExtHeadSize + nTags*int(keyBytes)) if freeSpc < 0 { freeSpc = 0 } binary.LittleEndian.PutUint16(leaf[12:14], uint16(freeSpc)) f.WriteAt(leaf, offset) } // --- B-tree builder --- func buildCDXBTree(f *os.File, keys []ntx.KeyRecord, keyLen int, startOff int64) (rootOff int64, nextOff int64) { maxRecNo := uint32(0) for _, k := range keys { if k.RecNo > maxRecNo { maxRecNo = k.RecNo } } recBits := bitsNeeded(maxRecNo) dupBits := bitsNeeded(uint32(keyLen)) trlBits := bitsNeeded(uint32(keyLen)) keyBytesPerEntry := (recBits + dupBits + trlBits + 7) / 8 maxKeysPerLeaf := (PageLen - ExtHeadSize) / (int(keyBytesPerEntry) + keyLen) if maxKeysPerLeaf < 2 { maxKeysPerLeaf = 2 } intEntrySize := keyLen + 8 maxKeysPerInt := (PageLen - 12) / intEntrySize if maxKeysPerInt < 2 { maxKeysPerInt = 2 } curOff := startOff type leafInfo struct { pageOff int64 lastKey []byte lastRec uint32 } var leaves []leafInfo for i := 0; i < len(keys); { end := i + maxKeysPerLeaf if end > len(keys) { end = len(keys) } chunk := keys[i:end] pageOff := curOff writeCDXLeafPage(f, pageOff, chunk, keyLen, recBits, dupBits, trlBits, keyBytesPerEntry) curOff += PageLen leaves = append(leaves, leafInfo{ pageOff: pageOff, lastKey: chunk[len(chunk)-1].Key, lastRec: chunk[len(chunk)-1].RecNo, }) i = end } // Link leaf pages for i := range leaves { pg := make([]byte, PageLen) f.ReadAt(pg, leaves[i].pageOff) if i > 0 { binary.LittleEndian.PutUint32(pg[4:8], uint32(leaves[i-1].pageOff)) } if i < len(leaves)-1 { binary.LittleEndian.PutUint32(pg[8:12], uint32(leaves[i+1].pageOff)) } f.WriteAt(pg, leaves[i].pageOff) } if len(leaves) == 1 { pg := make([]byte, 2) f.ReadAt(pg, leaves[0].pageOff) attr := binary.LittleEndian.Uint16(pg) attr |= NodeRoot binary.LittleEndian.PutUint16(pg, attr) f.WriteAt(pg, leaves[0].pageOff) return leaves[0].pageOff, curOff } type childInfo struct { pageOff int64 sepKey []byte sepRec uint32 } children := make([]childInfo, len(leaves)) for i, l := range leaves { children[i] = childInfo{pageOff: l.pageOff, sepKey: l.lastKey, sepRec: l.lastRec} } for len(children) > 1 { var nextLevel []childInfo for i := 0; i < len(children); { end := i + maxKeysPerInt + 1 if end > len(children) { end = len(children) } group := children[i:end] nKeys := len(group) - 1 pg := make([]byte, PageLen) attr := uint16(0) if len(children) <= maxKeysPerInt+1 { attr |= NodeRoot } binary.LittleEndian.PutUint16(pg[0:2], attr) binary.LittleEndian.PutUint16(pg[2:4], uint16(nKeys)) binary.LittleEndian.PutUint32(pg[4:8], uint32(group[0].pageOff)) off := 12 for k := 0; k < nKeys; k++ { key := group[k].sepKey if len(key) < keyLen { padded := make([]byte, keyLen) copy(padded, key) for j := len(key); j < keyLen; j++ { padded[j] = ' ' } key = padded } copy(pg[off:off+keyLen], key[:keyLen]) off += keyLen binary.BigEndian.PutUint32(pg[off:off+4], group[k].sepRec) off += 4 binary.BigEndian.PutUint32(pg[off:off+4], uint32(group[k+1].pageOff)) off += 4 } pageOff := curOff f.WriteAt(pg, pageOff) curOff += PageLen ci := childInfo{pageOff: pageOff} if end < len(children) { ci.sepKey = group[nKeys].sepKey ci.sepRec = group[nKeys].sepRec } nextLevel = append(nextLevel, ci) i = end } children = nextLevel } return children[0].pageOff, curOff } // --- Leaf page writer --- func writeCDXLeafPage(f *os.File, offset int64, keys []ntx.KeyRecord, keyLen, recBits, dupBits, trlBits, keyBytesPerEntry int) { pg := make([]byte, PageLen) binary.LittleEndian.PutUint16(pg[0:2], NodeLeaf) binary.LittleEndian.PutUint16(pg[2:4], uint16(len(keys))) binary.LittleEndian.PutUint32(pg[14:18], (1< keyLen { key = key[:keyLen] } dup := commonPrefix(key, prevKey, keyLen) trl := trailingSpaces(key, keyLen) newBytes := keyLen - dup - trl if newBytes > 0 { keyDataPos -= newBytes copy(pg[keyDataPos:], key[dup:dup+newBytes]) } var val uint64 val = uint64(trl) val <<= uint(dupBits) val |= uint64(dup) val <<= uint(recBits) val |= uint64(kr.RecNo) entryOff := ExtHeadSize + i*keyBytesPerEntry for j := 0; j < keyBytesPerEntry; j++ { pg[entryOff+j] = byte(val & 0xFF) val >>= 8 } copy(prevKey, key) } freeSpc := keyDataPos - (ExtHeadSize + len(keys)*keyBytesPerEntry) if freeSpc < 0 { freeSpc = 0 } binary.LittleEndian.PutUint16(pg[12:14], uint16(freeSpc)) f.WriteAt(pg, offset) } // --- Helpers --- func bitsNeeded(maxVal uint32) int { if maxVal == 0 { return 1 } return bits.Len32(maxVal) } func commonPrefix(a, b []byte, maxLen int) int { n := maxLen if len(a) < n { n = len(a) } if len(b) < n { n = len(b) } for i := 0; i < n; i++ { if a[i] != b[i] { return i } } return n } func trailingSpaces(key []byte, keyLen int) int { n := keyLen if len(key) < n { n = len(key) } count := 0 for i := n - 1; i >= 0; i-- { if key[i] == ' ' { count++ } else { break } } return count } func padTagName(name string) []byte { b := make([]byte, MaxTagNameLen) copy(b, []byte(strings.ToUpper(name))) for i := len(name); i < MaxTagNameLen; i++ { b[i] = ' ' } return b } // Ensure io import is used (for potential future reads) var _ = io.EOF