fix(cdx): Harbour-compatible layout — compound root, RCHB sig, leaf format

Align Five's CDX file layout with Harbour's expectations:
- Compound root header at 0, compound leaf at 1024, tags at 1536+
- "RCHB" signature at offset 20 in compound root
- IgnoreCase/collation flags at offset 503-505
- Compound leaf: LeftPtr/RightPtr = 0xFFFFFFFF, recBits=16 fixed
- Tags sorted alphabetically in compound directory B-tree
- Tag IndexOpt: TypeCompact | TypeCompound (0x60)

Status of Harbour cross-read verification:
- CHAR-only CDX tags: layout matches Harbour byte-for-byte
- Numeric tags: Harbour uses IEEE double (8-byte) key encoding,
  Five uses DBF ASCII key bytes — causes DBFCDX/1012 corruption
  when Harbour reads Five-created CDX with numeric tags
- Five reading Harbour CDX: works perfectly (existing)
- Five reading Five CDX: works perfectly

Remaining: numeric key encoding for full Harbour write-compatibility.
CLAUDE.md updated to reflect this single remaining limitation.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-04-14 01:33:52 +09:00
parent 4d5621c21a
commit 66882c30bd
2 changed files with 51 additions and 28 deletions

View File

@@ -65,7 +65,9 @@ cd ~/tmp && rm -f *.dbf __cte_*.dbf && /tmp/test_sql
## 알려진 제약사항
현재 알려진 제약사항 없음. 모든 이전 제약이 해결됨.
| 항목 | 상태 | 비고 |
|------|------|------|
| CDX 바이너리 Harbour 호환 | ⚠️ CHAR 태그만 호환 | 숫자 키: Harbour는 IEEE double 8B, Five는 DBF ASCII. Five↔Five 완벽 동작. Harbour가 Five CDX 읽기 시 숫자 태그에서 corruption |
### 해결된 제약 (2026-04-11~13)

View File

@@ -11,6 +11,7 @@ import (
"io"
"math/bits"
"os"
"sort"
"strings"
"five/hbrdd/ntx"
@@ -78,22 +79,34 @@ func CreateOrAddTag(path string, tagName, keyExpr, forExpr string,
var appendOff int64 // where to start writing new data
// Harbour CDX layout:
// 0x0000: Compound root header (1024 bytes)
// 0x0400: Compound directory leaf page (512 bytes) ← RootPtr points here
// 0x0600: Tag 1 header (1024 bytes = 2 pages)
// Tag 1 B-tree pages...
// Tag 2 header...
// etc.
// The compound leaf is always at offset HeaderLen (1024).
// Tag headers start at HeaderLen + PageLen (1536).
compoundLeafOff := int64(HeaderLen) // 1024 — fixed position
if len(existingData) > 0 {
// Write back existing data verbatim (preserves all old tag B-trees)
f.Write(existingData)
appendOff = int64(len(existingData))
// Align to HeaderLen boundary for the new tag header
if appendOff%int64(HeaderLen) != 0 {
appendOff = (appendOff/int64(HeaderLen) + 1) * int64(HeaderLen)
// Align to page boundary
if appendOff%int64(PageLen) != 0 {
appendOff = (appendOff/int64(PageLen) + 1) * int64(PageLen)
}
} else {
// New file: reserve space for compound root header
appendOff = int64(HeaderLen)
// New file: skip compound root (1024) + compound leaf (512)
appendOff = int64(HeaderLen) + int64(PageLen) // 1536
}
// Write the new tag's header + B-tree
// Write the new tag's header (1024 bytes) + B-tree pages
newTagHeaderOff := appendOff
appendOff += int64(HeaderLen) // reserve header space
appendOff += int64(HeaderLen) // 1024 bytes for tag header
// Build B-tree pages for the new tag
var rootPageOff uint32
@@ -122,13 +135,18 @@ func CreateOrAddTag(path string, tagName, keyExpr, forExpr string,
// Collect all tags (existing + new) in offset order (= creation order)
allTags := append(existingTags, newTag)
// Rebuild compound root directory page
compoundPageOff := appendOff
appendOff += PageLen
writeCompoundLeaf(f, compoundPageOff, allTags)
// Write compound directory leaf page at fixed offset 1024.
// Harbour's compound B-tree stores entries in ALPHABETICAL order
// (it's a B-tree keyed by tag name). Sort a copy for the leaf.
sortedTags := make([]cdxTagMeta, len(allTags))
copy(sortedTags, allTags)
sort.Slice(sortedTags, func(i, j int) bool {
return strings.ToUpper(sortedTags[i].name) < strings.ToUpper(sortedTags[j].name)
})
writeCompoundLeaf(f, compoundLeafOff, sortedTags)
// Write compound root header at offset 0
writeCompoundHeader(f, uint32(compoundPageOff), len(allTags))
writeCompoundHeader(f, uint32(compoundLeafOff), len(allTags))
f.Close()
return OpenIndex(path)
@@ -141,9 +159,10 @@ func writeTagHeader(f *os.File, offset int64, rootPtr uint32,
buf := make([]byte, HeaderLen)
binary.LittleEndian.PutUint32(buf[0:4], rootPtr)
binary.LittleEndian.PutUint32(buf[8:12], 1) // counter
// Counter = 0 initially (Harbour convention)
binary.LittleEndian.PutUint16(buf[12:14], keySize)
opt := byte(TypeCompact)
// Harbour sets TypeCompact | TypeCompound on data tags (0x60)
opt := byte(TypeCompact | TypeCompound)
if unique {
opt |= TypeUnique
}
@@ -172,12 +191,17 @@ func writeTagHeader(f *os.File, offset int64, rootPtr uint32,
func writeCompoundHeader(f *os.File, rootPagePtr uint32, nTags int) {
hdr := make([]byte, HeaderLen)
binary.LittleEndian.PutUint32(hdr[0:4], rootPagePtr)
binary.LittleEndian.PutUint32(hdr[8:12], 1)
// FreePtr = 0, Counter = 0 (Harbour convention for compound root)
binary.LittleEndian.PutUint16(hdr[12:14], MaxTagNameLen)
hdr[14] = TypeCompound | TypeStructure | TypeCompact
hdr[14] = TypeCompound | TypeStructure | TypeCompact // 0xE0
hdr[15] = 0x01
binary.LittleEndian.PutUint16(hdr[16:18], uint16(HeaderLen))
binary.LittleEndian.PutUint16(hdr[18:20], uint16(PageLen))
// Harbour writes "RCHB" signature at offset 20 in compound root
copy(hdr[20:24], []byte("RCHB"))
// IgnoreCase=1, Descending flags at offset 503-505 (Harbour convention)
hdr[503] = 1 // IgnoreCase
binary.LittleEndian.PutUint16(hdr[504:506], 1) // collation flag
f.WriteAt(hdr, 0)
}
@@ -186,20 +210,17 @@ func writeCompoundLeaf(f *os.File, offset int64, tags []cdxTagMeta) {
nTags := len(tags)
compKeyLen := MaxTagNameLen
maxOff := uint32(0)
for _, t := range tags {
if uint32(t.headerOff) > maxOff {
maxOff = uint32(t.headerOff)
}
}
recBits := bitsNeeded(maxOff)
dupBits := bitsNeeded(uint32(compKeyLen))
trlBits := bitsNeeded(uint32(compKeyLen))
keyBytes := (recBits + dupBits + trlBits + 7) / 8
// Harbour uses fixed 16-bit recBits for compound leaf (offsets < 64KB)
recBits := 16
dupBits := bitsNeeded(uint32(compKeyLen)) // 4
trlBits := bitsNeeded(uint32(compKeyLen)) // 4
keyBytes := (recBits + dupBits + trlBits + 7) / 8 // 3
binary.LittleEndian.PutUint16(leaf[0:2], NodeLeaf|NodeRoot)
binary.LittleEndian.PutUint16(leaf[2:4], uint16(nTags))
// Harbour sets LeftPtr/RightPtr to 0xFFFFFFFF for compound leaf
binary.LittleEndian.PutUint32(leaf[4:8], 0xFFFFFFFF)
binary.LittleEndian.PutUint32(leaf[8:12], 0xFFFFFFFF)
binary.LittleEndian.PutUint32(leaf[14:18], (1<<uint(recBits))-1)
leaf[18] = byte((1 << uint(dupBits)) - 1)
leaf[19] = byte((1 << uint(trlBits)) - 1)