Files
five/hbrdd/cdx/build.go
CharlesKWON 66882c30bd fix(cdx): Harbour-compatible layout — compound root, RCHB sig, leaf format
Align Five's CDX file layout with Harbour's expectations:
- Compound root header at 0, compound leaf at 1024, tags at 1536+
- "RCHB" signature at offset 20 in compound root
- IgnoreCase/collation flags at offset 503-505
- Compound leaf: LeftPtr/RightPtr = 0xFFFFFFFF, recBits=16 fixed
- Tags sorted alphabetically in compound directory B-tree
- Tag IndexOpt: TypeCompact | TypeCompound (0x60)

Status of Harbour cross-read verification:
- CHAR-only CDX tags: layout matches Harbour byte-for-byte
- Numeric tags: Harbour uses IEEE double (8-byte) key encoding,
  Five uses DBF ASCII key bytes — causes DBFCDX/1012 corruption
  when Harbour reads Five-created CDX with numeric tags
- Five reading Harbour CDX: works perfectly (existing)
- Five reading Five CDX: works perfectly

Remaining: numeric key encoding for full Harbour write-compatibility.
CLAUDE.md updated to reflect this single remaining limitation.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-14 01:33:52 +09:00

533 lines
14 KiB
Go

// Copyright (c) 2026 Charles KWON OhJun (charleskwonohjun@gmail.com)
// All rights reserved.
// CDX compound index creation.
// Builds Harbour-compatible CDX files with bit-packed leaf pages.
package cdx
import (
"encoding/binary"
"io"
"math/bits"
"os"
"sort"
"strings"
"five/hbrdd/ntx"
)
// cdxTagMeta holds metadata for a tag during file assembly.
type cdxTagMeta struct {
name string
headerOff int64
rootPage uint32
keyExpr string
forExpr string
keyLen int
unique bool
desc bool
}
// CreateOrAddTag creates a CDX file with a new tag, or appends a tag to
// an existing CDX file. The existing file's bytes are preserved verbatim;
// only the compound root directory is rebuilt to include the new tag.
//
// Harbour: ordCreate() → hb_cdxIndexCreateTag()
func CreateOrAddTag(path string, tagName, keyExpr, forExpr string,
keyLen int, unique, descending bool, keys []ntx.KeyRecord) (*Index, error) {
if !strings.HasSuffix(strings.ToLower(path), ".cdx") {
path += ".cdx"
}
tagName = strings.ToUpper(tagName)
if len(tagName) > MaxTagNameLen {
tagName = tagName[:MaxTagNameLen]
}
// Read existing file contents (if any) and tag metadata
var existingData []byte
var existingTags []cdxTagMeta
if fi, err := os.Stat(path); err == nil && fi.Size() > 0 {
existing, err := OpenIndex(path)
if err == nil {
for _, t := range existing.Tags() {
if strings.ToUpper(t.Name) == tagName {
continue // will be replaced
}
existingTags = append(existingTags, cdxTagMeta{
name: t.Name,
headerOff: t.HeaderOffset(),
rootPage: t.RootPtr(),
keyExpr: t.KeyExpr(),
forExpr: t.ForExpr(),
keyLen: t.KeyLen(),
})
}
existing.Close()
}
// Read entire old file
existingData, _ = os.ReadFile(path)
}
// Create/truncate the file
f, err := os.Create(path)
if err != nil {
return nil, err
}
var appendOff int64 // where to start writing new data
// Harbour CDX layout:
// 0x0000: Compound root header (1024 bytes)
// 0x0400: Compound directory leaf page (512 bytes) ← RootPtr points here
// 0x0600: Tag 1 header (1024 bytes = 2 pages)
// Tag 1 B-tree pages...
// Tag 2 header...
// etc.
// The compound leaf is always at offset HeaderLen (1024).
// Tag headers start at HeaderLen + PageLen (1536).
compoundLeafOff := int64(HeaderLen) // 1024 — fixed position
if len(existingData) > 0 {
// Write back existing data verbatim (preserves all old tag B-trees)
f.Write(existingData)
appendOff = int64(len(existingData))
// Align to page boundary
if appendOff%int64(PageLen) != 0 {
appendOff = (appendOff/int64(PageLen) + 1) * int64(PageLen)
}
} else {
// New file: skip compound root (1024) + compound leaf (512)
appendOff = int64(HeaderLen) + int64(PageLen) // 1536
}
// Write the new tag's header (1024 bytes) + B-tree pages
newTagHeaderOff := appendOff
appendOff += int64(HeaderLen) // 1024 bytes for tag header
// Build B-tree pages for the new tag
var rootPageOff uint32
if len(keys) == 0 {
pg := make([]byte, PageLen)
binary.LittleEndian.PutUint16(pg[0:2], NodeLeaf|NodeRoot)
f.WriteAt(pg, appendOff)
rootPageOff = uint32(appendOff)
appendOff += PageLen
} else {
root, next := buildCDXBTree(f, keys, keyLen, appendOff)
rootPageOff = uint32(root)
appendOff = next
}
// Write the new tag header
writeTagHeader(f, newTagHeaderOff, rootPageOff, keyExpr, forExpr,
uint16(keyLen), unique, descending)
newTag := cdxTagMeta{
name: tagName, headerOff: newTagHeaderOff, rootPage: rootPageOff,
keyExpr: keyExpr, forExpr: forExpr, keyLen: keyLen,
unique: unique, desc: descending,
}
// Collect all tags (existing + new) in offset order (= creation order)
allTags := append(existingTags, newTag)
// Write compound directory leaf page at fixed offset 1024.
// Harbour's compound B-tree stores entries in ALPHABETICAL order
// (it's a B-tree keyed by tag name). Sort a copy for the leaf.
sortedTags := make([]cdxTagMeta, len(allTags))
copy(sortedTags, allTags)
sort.Slice(sortedTags, func(i, j int) bool {
return strings.ToUpper(sortedTags[i].name) < strings.ToUpper(sortedTags[j].name)
})
writeCompoundLeaf(f, compoundLeafOff, sortedTags)
// Write compound root header at offset 0
writeCompoundHeader(f, uint32(compoundLeafOff), len(allTags))
f.Close()
return OpenIndex(path)
}
// --- Tag header writing ---
func writeTagHeader(f *os.File, offset int64, rootPtr uint32,
keyExpr, forExpr string, keySize uint16, unique, desc bool) {
buf := make([]byte, HeaderLen)
binary.LittleEndian.PutUint32(buf[0:4], rootPtr)
// Counter = 0 initially (Harbour convention)
binary.LittleEndian.PutUint16(buf[12:14], keySize)
// Harbour sets TypeCompact | TypeCompound on data tags (0x60)
opt := byte(TypeCompact | TypeCompound)
if unique {
opt |= TypeUnique
}
if forExpr != "" {
opt |= TypeForFilter
}
buf[14] = opt
buf[15] = 0x01
binary.LittleEndian.PutUint16(buf[16:18], uint16(HeaderLen))
binary.LittleEndian.PutUint16(buf[18:20], uint16(PageLen))
if desc {
binary.LittleEndian.PutUint16(buf[504:506], 1)
}
copy(buf[512:], []byte(keyExpr))
if forExpr != "" {
forOff := 512 + len(keyExpr) + 1
if forOff+len(forExpr) < HeaderLen {
copy(buf[forOff:], []byte(forExpr))
}
}
f.WriteAt(buf, offset)
}
// --- Compound root ---
func writeCompoundHeader(f *os.File, rootPagePtr uint32, nTags int) {
hdr := make([]byte, HeaderLen)
binary.LittleEndian.PutUint32(hdr[0:4], rootPagePtr)
// FreePtr = 0, Counter = 0 (Harbour convention for compound root)
binary.LittleEndian.PutUint16(hdr[12:14], MaxTagNameLen)
hdr[14] = TypeCompound | TypeStructure | TypeCompact // 0xE0
hdr[15] = 0x01
binary.LittleEndian.PutUint16(hdr[16:18], uint16(HeaderLen))
binary.LittleEndian.PutUint16(hdr[18:20], uint16(PageLen))
// Harbour writes "RCHB" signature at offset 20 in compound root
copy(hdr[20:24], []byte("RCHB"))
// IgnoreCase=1, Descending flags at offset 503-505 (Harbour convention)
hdr[503] = 1 // IgnoreCase
binary.LittleEndian.PutUint16(hdr[504:506], 1) // collation flag
f.WriteAt(hdr, 0)
}
func writeCompoundLeaf(f *os.File, offset int64, tags []cdxTagMeta) {
leaf := make([]byte, PageLen)
nTags := len(tags)
compKeyLen := MaxTagNameLen
// Harbour uses fixed 16-bit recBits for compound leaf (offsets < 64KB)
recBits := 16
dupBits := bitsNeeded(uint32(compKeyLen)) // 4
trlBits := bitsNeeded(uint32(compKeyLen)) // 4
keyBytes := (recBits + dupBits + trlBits + 7) / 8 // 3
binary.LittleEndian.PutUint16(leaf[0:2], NodeLeaf|NodeRoot)
binary.LittleEndian.PutUint16(leaf[2:4], uint16(nTags))
// Harbour sets LeftPtr/RightPtr to 0xFFFFFFFF for compound leaf
binary.LittleEndian.PutUint32(leaf[4:8], 0xFFFFFFFF)
binary.LittleEndian.PutUint32(leaf[8:12], 0xFFFFFFFF)
binary.LittleEndian.PutUint32(leaf[14:18], (1<<uint(recBits))-1)
leaf[18] = byte((1 << uint(dupBits)) - 1)
leaf[19] = byte((1 << uint(trlBits)) - 1)
leaf[20] = byte(recBits)
leaf[21] = byte(dupBits)
leaf[22] = byte(trlBits)
leaf[23] = byte(keyBytes)
prevKey := make([]byte, compKeyLen)
for j := range prevKey {
prevKey[j] = ' '
}
keyDataPos := PageLen
for i, t := range tags {
key := padTagName(t.name)
dup := commonPrefix(key, prevKey, compKeyLen)
trl := trailingSpaces(key, compKeyLen)
newBytes := compKeyLen - dup - trl
if newBytes > 0 {
keyDataPos -= newBytes
copy(leaf[keyDataPos:], key[dup:dup+newBytes])
}
recNo := uint32(t.headerOff)
var val uint64
val = uint64(trl)
val <<= uint(dupBits)
val |= uint64(dup)
val <<= uint(recBits)
val |= uint64(recNo)
entryOff := ExtHeadSize + i*int(keyBytes)
for j := 0; j < int(keyBytes); j++ {
leaf[entryOff+j] = byte(val & 0xFF)
val >>= 8
}
copy(prevKey, key)
}
freeSpc := keyDataPos - (ExtHeadSize + nTags*int(keyBytes))
if freeSpc < 0 {
freeSpc = 0
}
binary.LittleEndian.PutUint16(leaf[12:14], uint16(freeSpc))
f.WriteAt(leaf, offset)
}
// --- B-tree builder ---
func buildCDXBTree(f *os.File, keys []ntx.KeyRecord, keyLen int,
startOff int64) (rootOff int64, nextOff int64) {
maxRecNo := uint32(0)
for _, k := range keys {
if k.RecNo > maxRecNo {
maxRecNo = k.RecNo
}
}
recBits := bitsNeeded(maxRecNo)
dupBits := bitsNeeded(uint32(keyLen))
trlBits := bitsNeeded(uint32(keyLen))
keyBytesPerEntry := (recBits + dupBits + trlBits + 7) / 8
maxKeysPerLeaf := (PageLen - ExtHeadSize) / (int(keyBytesPerEntry) + keyLen)
if maxKeysPerLeaf < 2 {
maxKeysPerLeaf = 2
}
intEntrySize := keyLen + 8
maxKeysPerInt := (PageLen - 12) / intEntrySize
if maxKeysPerInt < 2 {
maxKeysPerInt = 2
}
curOff := startOff
type leafInfo struct {
pageOff int64
lastKey []byte
lastRec uint32
}
var leaves []leafInfo
for i := 0; i < len(keys); {
end := i + maxKeysPerLeaf
if end > len(keys) {
end = len(keys)
}
chunk := keys[i:end]
pageOff := curOff
writeCDXLeafPage(f, pageOff, chunk, keyLen, recBits, dupBits, trlBits, keyBytesPerEntry)
curOff += PageLen
leaves = append(leaves, leafInfo{
pageOff: pageOff,
lastKey: chunk[len(chunk)-1].Key,
lastRec: chunk[len(chunk)-1].RecNo,
})
i = end
}
// Link leaf pages
for i := range leaves {
pg := make([]byte, PageLen)
f.ReadAt(pg, leaves[i].pageOff)
if i > 0 {
binary.LittleEndian.PutUint32(pg[4:8], uint32(leaves[i-1].pageOff))
}
if i < len(leaves)-1 {
binary.LittleEndian.PutUint32(pg[8:12], uint32(leaves[i+1].pageOff))
}
f.WriteAt(pg, leaves[i].pageOff)
}
if len(leaves) == 1 {
pg := make([]byte, 2)
f.ReadAt(pg, leaves[0].pageOff)
attr := binary.LittleEndian.Uint16(pg)
attr |= NodeRoot
binary.LittleEndian.PutUint16(pg, attr)
f.WriteAt(pg, leaves[0].pageOff)
return leaves[0].pageOff, curOff
}
type childInfo struct {
pageOff int64
sepKey []byte
sepRec uint32
}
children := make([]childInfo, len(leaves))
for i, l := range leaves {
children[i] = childInfo{pageOff: l.pageOff, sepKey: l.lastKey, sepRec: l.lastRec}
}
for len(children) > 1 {
var nextLevel []childInfo
for i := 0; i < len(children); {
end := i + maxKeysPerInt + 1
if end > len(children) {
end = len(children)
}
group := children[i:end]
nKeys := len(group) - 1
pg := make([]byte, PageLen)
attr := uint16(0)
if len(children) <= maxKeysPerInt+1 {
attr |= NodeRoot
}
binary.LittleEndian.PutUint16(pg[0:2], attr)
binary.LittleEndian.PutUint16(pg[2:4], uint16(nKeys))
binary.LittleEndian.PutUint32(pg[4:8], uint32(group[0].pageOff))
off := 12
for k := 0; k < nKeys; k++ {
key := group[k].sepKey
if len(key) < keyLen {
padded := make([]byte, keyLen)
copy(padded, key)
for j := len(key); j < keyLen; j++ {
padded[j] = ' '
}
key = padded
}
copy(pg[off:off+keyLen], key[:keyLen])
off += keyLen
binary.BigEndian.PutUint32(pg[off:off+4], group[k].sepRec)
off += 4
binary.BigEndian.PutUint32(pg[off:off+4], uint32(group[k+1].pageOff))
off += 4
}
pageOff := curOff
f.WriteAt(pg, pageOff)
curOff += PageLen
ci := childInfo{pageOff: pageOff}
if end < len(children) {
ci.sepKey = group[nKeys].sepKey
ci.sepRec = group[nKeys].sepRec
}
nextLevel = append(nextLevel, ci)
i = end
}
children = nextLevel
}
return children[0].pageOff, curOff
}
// --- Leaf page writer ---
func writeCDXLeafPage(f *os.File, offset int64, keys []ntx.KeyRecord,
keyLen, recBits, dupBits, trlBits, keyBytesPerEntry int) {
pg := make([]byte, PageLen)
binary.LittleEndian.PutUint16(pg[0:2], NodeLeaf)
binary.LittleEndian.PutUint16(pg[2:4], uint16(len(keys)))
binary.LittleEndian.PutUint32(pg[14:18], (1<<uint(recBits))-1)
pg[18] = byte((1 << uint(dupBits)) - 1)
pg[19] = byte((1 << uint(trlBits)) - 1)
pg[20] = byte(recBits)
pg[21] = byte(dupBits)
pg[22] = byte(trlBits)
pg[23] = byte(keyBytesPerEntry)
prevKey := make([]byte, keyLen)
for j := range prevKey {
prevKey[j] = ' '
}
keyDataPos := PageLen
for i, kr := range keys {
key := kr.Key
if len(key) < keyLen {
padded := make([]byte, keyLen)
copy(padded, key)
for j := len(key); j < keyLen; j++ {
padded[j] = ' '
}
key = padded
} else if len(key) > keyLen {
key = key[:keyLen]
}
dup := commonPrefix(key, prevKey, keyLen)
trl := trailingSpaces(key, keyLen)
newBytes := keyLen - dup - trl
if newBytes > 0 {
keyDataPos -= newBytes
copy(pg[keyDataPos:], key[dup:dup+newBytes])
}
var val uint64
val = uint64(trl)
val <<= uint(dupBits)
val |= uint64(dup)
val <<= uint(recBits)
val |= uint64(kr.RecNo)
entryOff := ExtHeadSize + i*keyBytesPerEntry
for j := 0; j < keyBytesPerEntry; j++ {
pg[entryOff+j] = byte(val & 0xFF)
val >>= 8
}
copy(prevKey, key)
}
freeSpc := keyDataPos - (ExtHeadSize + len(keys)*keyBytesPerEntry)
if freeSpc < 0 {
freeSpc = 0
}
binary.LittleEndian.PutUint16(pg[12:14], uint16(freeSpc))
f.WriteAt(pg, offset)
}
// --- Helpers ---
func bitsNeeded(maxVal uint32) int {
if maxVal == 0 {
return 1
}
return bits.Len32(maxVal)
}
func commonPrefix(a, b []byte, maxLen int) int {
n := maxLen
if len(a) < n {
n = len(a)
}
if len(b) < n {
n = len(b)
}
for i := 0; i < n; i++ {
if a[i] != b[i] {
return i
}
}
return n
}
func trailingSpaces(key []byte, keyLen int) int {
n := keyLen
if len(key) < n {
n = len(key)
}
count := 0
for i := n - 1; i >= 0; i-- {
if key[i] == ' ' {
count++
} else {
break
}
}
return count
}
func padTagName(name string) []byte {
b := make([]byte, MaxTagNameLen)
copy(b, []byte(strings.ToUpper(name)))
for i := len(name); i < MaxTagNameLen; i++ {
b[i] = ' '
}
return b
}
// Ensure io import is used (for potential future reads)
var _ = io.EOF