Files
five/hbrdd/ntx/ntx.go
CharlesKWON f4ed42556b checkpoint: season-wide bug fix campaign + infra
Cumulative season's silent-bug hunting (~62 fixes) across the FiveSql2
SQL engine, the Five compiler/runtime, and the hbrdd RDD layer. Saved
as a single checkpoint before refactoring the parser to delegate xBase
command translation to the preprocessor.

Highlights:

FiveSql2 engine (_FiveSql2/src/)
- prefix-glob index attach -> explicit convention (<table>_pk.ntx,
  <table>_uq.ntx, <table>.cdx) — fixes silent multi-row INSERT row-drop
- DROP/CREATE TABLE FErase chain extended (.cdx, .fsc, .fsv, .dbt, .fpt)
- COUNT(DISTINCT col) parsed + aggregated via hSeen hash
- UNION column-count mismatch returns SQL_ERR_GRAMMAR (was silent)
- DISTINCT + ORDER BY hidden-col leak fixed (trim before DISTINCT)
- Derived table FROM (SELECT...) + JOIN right-side derived
- Self-FK CASCADE depth 2+ via SqlGetSingleColPK pre-collect
- LAG/LEAD default arg uses SqlEvalRowExpr (handles -N const exprs)
- DATE literal round-trip validation (Feb 29 non-leap rejected)
- CREATE OR REPLACE VIEW; CREATE VIEW errors on already-exists
- AlterTable type dispatcher comma-wrapped (1-char type "A" no longer
  matches CHARACTER)

Compiler / runtime
- gengo: HB_ -> FV_ prefix on emitted Go function names (Five identity)
- gengo split: emit_block.go, emit_stmt.go, folding.go extracted
- parser/stmtreg.go nudges
- hbrt: debug TUI/CLI restructure (debugcmd, debugkey, termios_*),
  windows debug stubs collapsed
- thread/vm/value/class/pcinterp tightening from panic traces

RDD layer (hbrdd/)
- dbf: null bitmap support (null.go + null_test.go), mmap split
  (mmap_posix.go / mmap_windows.go), byte-level numeric parse
- ntx/cdx: windows mmap parity
- workarea + mem RDD: cross-area state-bleed fixes

RTL (hbrtl/)
- errorlog rewrite with platform-specific FD (errorlog_fd_unix /
  errorlog_fd_other)
- sqlscan, sqlhelpers, indexrtl, datetime extensions

Gates green at checkpoint:
- go test ./...        : PASS
- FiveSql2 SQL:1999    : 43/43
- Harbour compat       : 56/56

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-30 09:26:25 +09:00

682 lines
18 KiB
Go

// Copyright (c) 2026 Charles KWON OhJun (charleskwonohjun@gmail.com)
// All rights reserved.
// NTX index engine for Five.
// B-tree index with 1024-byte pages, byte-compatible with Harbour/Clipper NTX files.
//
// Reference:
// /mnt/d/harbour-core/include/hbrddntx.h — structures
// /mnt/d/harbour-core/src/rdd/dbfntx/dbfntx1.c — algorithms
// docs/rdd-architecture-spec.md Section 6 — SEEK→Index chain
package ntx
import (
"bytes"
"encoding/binary"
"fmt"
"os"
"strings"
)
// NTX constants — matching Harbour exactly.
const (
BlockSize = 1024 // NTXBLOCKSIZE (1 << 10)
HeaderSize = 1024 // NTX header occupies first block
MaxKey = 256 // NTX_MAX_KEY
MaxExpr = 256 // NTX_MAX_EXP
MaxTagName = 10 // NTX_MAX_TAGNAME
StackSize = 32 // NTX_STACKSIZE
HdrUnused = 473 // NTX_HDR_UNUSED
IgnoreRecNum = 0x00000000 // NTX_IGNORE_REC_NUM
MaxRecNum = 0xFFFFFFFF // NTX_MAX_REC_NUM
)
// Header is the NTX file header (1024 bytes on disk).
// Harbour: NTXHEADER in hbrddntx.h:93
type Header struct {
Type uint16 // offset 0 (0x0401 = NTX)
Version uint16 // offset 2
Root uint32 // offset 4 (root page byte offset)
NextPage uint32 // offset 8 (next free page byte offset)
ItemSize uint16 // offset 12 (key entry size: 8 + keyLen)
KeySize uint16 // offset 14 (key value length)
KeyDec uint16 // offset 16
MaxItem uint16 // offset 18 (max keys per page)
HalfPage uint16 // offset 20
KeyExpr [MaxExpr]byte // offset 22
Unique byte // offset 278
Pad1 byte // offset 279
Descend byte // offset 280
Pad2 byte // offset 281
ForExpr [MaxExpr]byte // offset 282
TagName [MaxTagName+2]byte // offset 538
Custom byte // offset 550
Unused [HdrUnused]byte // offset 551
}
// ReadHeader reads the NTX header from a file.
func ReadHeader(f *os.File) (*Header, error) {
buf := make([]byte, HeaderSize)
if _, err := f.ReadAt(buf, 0); err != nil {
return nil, fmt.Errorf("read NTX header: %w", err)
}
h := &Header{}
r := bytes.NewReader(buf)
if err := binary.Read(r, binary.LittleEndian, h); err != nil {
return nil, err
}
return h, nil
}
// WriteHeader writes the NTX header to a file.
func WriteHeader(f *os.File, h *Header) error {
var buf bytes.Buffer
if err := binary.Write(&buf, binary.LittleEndian, h); err != nil {
return err
}
// Pad to BlockSize
pad := make([]byte, HeaderSize-buf.Len())
buf.Write(pad)
_, err := f.WriteAt(buf.Bytes(), 0)
return err
}
func (h *Header) GetKeyExpr() string { return trimNull(h.KeyExpr[:]) }
func (h *Header) GetForExpr() string { return trimNull(h.ForExpr[:]) }
func (h *Header) GetTagName() string { return trimNull(h.TagName[:]) }
// --- Page ---
// Page represents an NTX B-tree page (1024 bytes).
// Harbour: HB_PAGEINFO in hbrddntx.h:180
//
// On-disk layout:
// [keyCount: 2 bytes LE]
// [keyOffsets: (maxItem+1) * 2 bytes LE] — indices into key data area
// [key data area]
//
// Each key entry:
// [childPage: 4 bytes LE] — child page offset (0 = leaf)
// [recNo: 4 bytes LE] — record number
// [keyValue: keyLen bytes] — key data
type Page struct {
offset int64
data []byte // BoltDB-style: slice into mmap (zero-copy) or owned buffer
keyCount uint16
changed bool
}
// acquirePage returns a reusable Page from the per-Index pool.
// Pool is per-Index (not global) to avoid data races across goroutines.
func (idx *Index) acquirePage() *Page {
p := &idx.pagePool[idx.pagePoolIdx%len(idx.pagePool)]
idx.pagePoolIdx++
return p
}
// LoadPage reads a page from the file (used by tests and one-off reads).
func LoadPage(f *os.File, offset int64) (*Page, error) {
p := &Page{offset: offset, data: make([]byte, BlockSize)}
if _, err := f.ReadAt(p.data, offset); err != nil {
return nil, fmt.Errorf("read NTX page at %d: %w", offset, err)
}
p.keyCount = binary.LittleEndian.Uint16(p.data[0:2])
return p, nil
}
// cachedLoadPage — BoltDB-style zero-copy: returns slice into mmap.
// No 1024-byte copy. Page.data points directly into mmap memory.
func (idx *Index) cachedLoadPage(offset int64) (*Page, error) {
p := idx.acquirePage()
p.offset = offset
if idx.mmapData != nil && offset >= 0 && int(offset)+BlockSize <= len(idx.mmapData) {
// Zero-copy: data is a slice into mmap (no copy!)
p.data = idx.mmapData[offset : offset+BlockSize]
p.keyCount = binary.LittleEndian.Uint16(p.data[0:2])
return p, nil
}
// Fallback: allocate and read from file
if p.data == nil || cap(p.data) < BlockSize {
p.data = make([]byte, BlockSize)
} else {
p.data = p.data[:BlockSize]
}
if _, err := idx.file.ReadAt(p.data, offset); err != nil {
return nil, fmt.Errorf("read NTX page at %d: %w", offset, err)
}
p.keyCount = binary.LittleEndian.Uint16(p.data[0:2])
return p, nil
}
// invalidateCache re-maps the file after modifications (insert/split).
func (idx *Index) invalidateCache() {
idx.remapFile()
}
// WritePage writes a page to the file.
func WritePage(f *os.File, p *Page) error {
binary.LittleEndian.PutUint16(p.data[0:2], p.keyCount)
_, err := f.WriteAt(p.data[:], p.offset)
return err
}
// keyOffset returns the byte offset within the page for key at index i.
// BCE: single slice expression eliminates subsequent bounds checks.
func (p *Page) keyOffset(i int) uint16 {
return binary.LittleEndian.Uint16(p.data[2+i*2:])
}
// KeyChild returns the child page offset for key at index i.
func (p *Page) KeyChild(i int) uint32 {
off := int(binary.LittleEndian.Uint16(p.data[2+i*2:]))
return binary.LittleEndian.Uint32(p.data[off:])
}
// KeyRecNo returns the record number for key at index i.
func (p *Page) KeyRecNo(i int) uint32 {
off := int(binary.LittleEndian.Uint16(p.data[2+i*2:])) + 4
return binary.LittleEndian.Uint32(p.data[off:])
}
// KeyValue returns the key bytes for key at index i.
func (p *Page) KeyValue(i int, keyLen int) []byte {
off := p.keyOffset(i) + 8
return p.data[off : off+uint16(keyLen)]
}
// --- Stack entry for tree traversal ---
// StackEntry tracks position during B-tree traversal.
// Harbour: TREE_STACK in hbrddntx.h:173
type StackEntry struct {
PageOffset int64 // page file offset
KeyIndex int // key position within page
}
// --- Index file ---
// Index represents an open NTX index file.
// Uses mmap for zero-copy page access (Go-native optimization).
type Index struct {
file *os.File
header Header
keyLen int
itemSize int // 8 + keyLen
// Memory-mapped file — zero-copy page access, no syscalls per read
mmapData []byte // mmap'd region (nil if mmap failed, falls back to file)
// Current position
stack [StackSize]StackEntry
stackLevel int
curRecNo uint32
curKey []byte
tagBOF bool
tagEOF bool
// Tag properties
ascendKey bool
uniqueKey bool
keyType byte // 'C', 'N', 'D', 'L'
// Per-index page pool (avoids global state / data race)
pagePool [8]Page
pagePoolIdx int
}
// OpenIndex opens an existing NTX index file.
func OpenIndex(path string) (*Index, error) {
if !strings.HasSuffix(strings.ToLower(path), ".ntx") {
path += ".ntx"
}
f, err := os.OpenFile(path, os.O_RDWR, 0)
if err != nil {
return nil, err
}
hdr, err := ReadHeader(f)
if err != nil {
f.Close()
return nil, err
}
idx := &Index{
file: f,
header: *hdr,
keyLen: int(hdr.KeySize),
itemSize: int(hdr.ItemSize),
ascendKey: hdr.Descend == 0,
uniqueKey: hdr.Unique != 0,
curKey: make([]byte, hdr.KeySize),
}
idx.keyType = 'C'
// mmap the file for zero-copy page access
idx.mmapFile()
return idx, nil
}
// mmapFile maps the NTX file into memory.
func (idx *Index) mmapFile() {
fi, err := idx.file.Stat()
if err != nil || fi.Size() < HeaderSize {
return
}
data, err := mmapFile(idx.file, int(fi.Size()))
if err != nil {
return // fallback to file reads
}
idx.mmapData = data
}
// remapFile re-maps after file size changed (e.g., after insert/split).
// Also invalidates page pool (data slices pointed into old mmap).
func (idx *Index) remapFile() {
if idx.mmapData != nil {
munmapFile(idx.mmapData)
idx.mmapData = nil
}
// Invalidate page pool — data slices pointed into old mmap
for i := range idx.pagePool {
idx.pagePool[i].data = nil
}
idx.mmapFile()
}
func (idx *Index) KeyLen() int { return idx.keyLen }
func (idx *Index) KeyExpr() string { return idx.header.GetKeyExpr() }
func (idx *Index) TestGetMmap() []byte { return idx.mmapData }
func (idx *Index) Close() error {
if idx.mmapData != nil {
munmapFile(idx.mmapData)
idx.mmapData = nil
}
return idx.file.Close()
}
// --- SEEK: B-tree search ---
// Harbour: hb_ntxTagKeyFind in dbfntx1.c:2564
// Seek searches for a key in the B-tree.
// Returns (recordNumber, exactMatch).
// If not found: positions at next higher key (for SOFTSEEK).
func (idx *Index) Seek(searchKey []byte) (uint32, bool) {
idx.stackLevel = 0
idx.tagBOF = false
idx.tagEOF = false
pageOffset := int64(idx.header.Root)
// Phase 1: Traverse from root to leaf
// Harbour: always descend to leaf, even if match found in internal page.
// This ensures SEEK finds the FIRST (lowest RecNo) occurrence of duplicate keys.
// fStop tracks whether any page had an exact match along the path.
fStop := false
for {
page, err := idx.cachedLoadPage( pageOffset)
if err != nil {
idx.tagEOF = true
return 0, false
}
iKey, found := idx.pageKeyFind(page, searchKey, false, 0)
if found {
fStop = true
}
// Push onto stack
if idx.stackLevel < StackSize {
idx.stack[idx.stackLevel] = StackEntry{
PageOffset: pageOffset,
KeyIndex: iKey,
}
idx.stackLevel++
}
// Follow child pointer (always descend, even on match)
childOffset := page.KeyChild(iKey)
if childOffset == 0 {
// At leaf — check if the current position has a matching key
if iKey < int(page.keyCount) {
idx.curRecNo = page.KeyRecNo(iKey)
copy(idx.curKey, page.KeyValue(iKey, idx.keyLen))
// Check if this leaf key actually matches the search key
leafMatch := (bytes.Compare(searchKey, page.KeyValue(iKey, len(searchKey))) == 0)
if leafMatch || fStop {
// Verify it's a real match by comparing actual key content
if bytes.Compare(idx.curKey[:len(searchKey)], searchKey[:len(searchKey)]) == 0 {
return idx.curRecNo, true
}
}
} else {
// Past end of page — try next via stack
if fStop {
// We matched on an internal page but descended to wrong child
// The match is the parent separator — go back via nextKey
if idx.nextKey() {
// Check if nextKey is actually a match
if bytes.Compare(idx.curKey[:len(searchKey)], searchKey[:len(searchKey)]) == 0 {
return idx.curRecNo, true
}
}
} else if idx.nextKey() {
return idx.curRecNo, false
}
idx.tagEOF = true
idx.curRecNo = 0
}
return idx.curRecNo, false
}
pageOffset = int64(childOffset)
}
}
// pageKeyFind performs binary search within a page.
// Harbour: hb_ntxPageKeyFind in dbfntx1.c:2497
// Returns (keyIndex, exactMatch).
func (idx *Index) pageKeyFind(page *Page, searchKey []byte, fNext bool, recNo uint32) (int, bool) {
lo, hi := 0, int(page.keyCount)-1
found := false
last := -1
data := page.data // local ref avoids repeated field access
kl := idx.keyLen
for lo <= hi {
mid := (lo + hi) / 2
// Inline key access: offset table → key value (BCE optimized)
off := int(binary.LittleEndian.Uint16(data[2+mid*2:])) + 8
cmp := idx.compareKeys(searchKey, data[off:off+kl])
// Descending index: flip comparison
if cmp != 0 && !idx.ascendKey {
cmp = -cmp
}
if fNext && cmp >= 0 || !fNext && cmp > 0 {
lo = mid + 1
} else {
if cmp == 0 && recNo == 0 {
found = true
}
last = mid
hi = mid - 1
}
}
if last >= 0 {
return last, found
}
return int(page.keyCount), found
}
// compareKeys compares two key values.
// Harbour: hb_ntxValCompare in dbfntx1.c:679
// Returns: -1, 0, +1
// compareKeys compares two key values.
// bytes.Compare already returns -1/0/+1 using SIMD-optimized memcmp.
// No normalization needed — callers use cmp < 0, cmp > 0, cmp == 0.
func (idx *Index) compareKeys(key1, key2 []byte) int {
return bytes.Compare(key1, key2)
}
// --- SKIP: navigate through index ---
// nextKey moves to the next key in index order.
// Harbour: hb_ntxTagNextKey in dbfntx1.c:2387
//
// NTX B-tree traversal:
// key[i] has left-child at KeyChild(i) and right-child at KeyChild(i+1).
// After visiting key[i], the next key is the leftmost key in KeyChild(i+1),
// or if no child, key[i+1] in same page, or walk up to parent.
// nextKey moves to the next key in the B-tree.
// Harbour: hb_ntxTagNextKey in dbfntx1.c:2387-2436
func (idx *Index) nextKey() bool {
level := idx.stackLevel - 1
if level < 0 {
return false
}
page, err := idx.cachedLoadPage( idx.stack[level].PageOffset)
if err != nil {
return false
}
iKey := idx.stack[level].KeyIndex
var childOff uint32
// Get right child of next position: KeyChild(iKey+1)
if iKey < int(page.keyCount) {
childOff = page.KeyChild(iKey + 1)
}
if childOff != 0 || iKey+1 < int(page.keyCount) {
// Advance to next key position
idx.stack[level].KeyIndex = iKey + 1
if childOff != 0 {
// Has right child — descend to its leftmost leaf
return idx.goLeftmost(int64(childOff))
}
// No child — next key is in same page (leaf)
idx.curRecNo = page.KeyRecNo(iKey + 1)
copy(idx.curKey, page.KeyValue(iKey+1, idx.keyLen))
return true
}
// Past end of page — walk up the stack to find ancestor with unvisited key
for level--; level >= 0; level-- {
page, err = idx.cachedLoadPage( idx.stack[level].PageOffset)
if err != nil {
return false
}
if idx.stack[level].KeyIndex < int(page.keyCount) {
break
}
}
if level < 0 {
return false // EOF — exhausted entire tree
}
// Found ancestor with unvisited key — truncate stack
idx.stackLevel = level + 1
ki := idx.stack[level].KeyIndex
idx.curRecNo = page.KeyRecNo(ki)
copy(idx.curKey, page.KeyValue(ki, idx.keyLen))
return true
}
// prevKey moves to the previous key in index order.
// Harbour: hb_ntxTagPrevKey in dbfntx1.c:2441-2492
func (idx *Index) prevKey() bool {
level := idx.stackLevel - 1
if level < 0 {
return false
}
page, err := idx.cachedLoadPage( idx.stack[level].PageOffset)
if err != nil {
return false
}
iKey := idx.stack[level].KeyIndex
// Check left child at current position: KeyChild(iKey)
// Only if iKey < keyCount (iKey == keyCount is the trailing child slot, not a real key)
if iKey < int(page.keyCount) {
childOff := page.KeyChild(iKey)
if childOff != 0 {
// Has left child — descend to its rightmost leaf
return idx.goRightmost(int64(childOff))
}
}
if iKey > 0 {
// Previous key in same page (leaf)
idx.stack[level].KeyIndex = iKey - 1
idx.curRecNo = page.KeyRecNo(iKey - 1)
copy(idx.curKey, page.KeyValue(iKey-1, idx.keyLen))
return true
}
// First key in page, no left child — walk up to find ancestor
for level--; level >= 0; level-- {
page, err = idx.cachedLoadPage( idx.stack[level].PageOffset)
if err != nil {
return false
}
if idx.stack[level].KeyIndex > 0 {
idx.stack[level].KeyIndex--
break
}
}
if level < 0 {
return false // BOF
}
idx.stackLevel = level + 1
ki := idx.stack[level].KeyIndex
idx.curRecNo = page.KeyRecNo(ki)
copy(idx.curKey, page.KeyValue(ki, idx.keyLen))
return true
}
// goLeftmost traverses to the leftmost (smallest) key from a page.
func (idx *Index) goLeftmost(pageOffset int64) bool {
for {
page, err := idx.cachedLoadPage( pageOffset)
if err != nil {
return false
}
if idx.stackLevel < StackSize {
idx.stack[idx.stackLevel] = StackEntry{PageOffset: pageOffset, KeyIndex: 0}
idx.stackLevel++
}
childOff := page.KeyChild(0)
if childOff == 0 {
// Leaf reached
if page.keyCount > 0 {
idx.curRecNo = page.KeyRecNo(0)
copy(idx.curKey, page.KeyValue(0, idx.keyLen))
return true
}
return false
}
pageOffset = int64(childOff)
}
}
// goRightmost traverses to the rightmost (largest) key from a page.
// Harbour: hb_ntxPageBottomMove — internal nodes get ikey=keyCount (rightmost child),
// leaf nodes get ikey=keyCount-1 (last key).
func (idx *Index) goRightmost(pageOffset int64) bool {
for {
page, err := idx.cachedLoadPage( pageOffset)
if err != nil {
return false
}
// Try rightmost child (at keyCount position)
childOff := page.KeyChild(int(page.keyCount))
if childOff != 0 {
// Internal node: set ikey to keyCount (rightmost child position)
if idx.stackLevel < StackSize {
idx.stack[idx.stackLevel] = StackEntry{PageOffset: pageOffset, KeyIndex: int(page.keyCount)}
idx.stackLevel++
}
pageOffset = int64(childOff)
continue
}
// Leaf: set ikey to last key
lastKey := int(page.keyCount) - 1
if idx.stackLevel < StackSize {
idx.stack[idx.stackLevel] = StackEntry{PageOffset: pageOffset, KeyIndex: lastKey}
idx.stackLevel++
}
if lastKey >= 0 {
idx.curRecNo = page.KeyRecNo(lastKey)
copy(idx.curKey, page.KeyValue(lastKey, idx.keyLen))
return true
}
return false
}
}
// GoTop positions at the first key in index order.
func (idx *Index) GoTop() bool {
idx.stackLevel = 0
idx.tagBOF = false
idx.tagEOF = false
return idx.goLeftmost(int64(idx.header.Root))
}
// GoBottom positions at the last key in index order.
func (idx *Index) GoBottom() bool {
idx.stackLevel = 0
idx.tagBOF = false
idx.tagEOF = false
return idx.goRightmost(int64(idx.header.Root))
}
// SkipNext moves to the next key. Returns false at EOF.
func (idx *Index) SkipNext() bool {
idx.tagBOF = false
if idx.stackLevel == 0 {
idx.tagEOF = true
return false
}
if !idx.nextKey() {
idx.tagEOF = true
return false
}
return true
}
// SkipPrev moves to the previous key. Returns false at BOF.
func (idx *Index) SkipPrev() bool {
idx.tagEOF = false
if idx.stackLevel == 0 {
idx.tagBOF = true
return false
}
if !idx.prevKey() {
idx.tagBOF = true
return false
}
return true
}
// CurRecNo returns the current record number.
func (idx *Index) CurRecNo() uint32 { return idx.curRecNo }
// CurKey returns the current key value.
func (idx *Index) CurKey() []byte { return idx.curKey[:idx.keyLen] }
// IsEOF returns true if past end of index.
func (idx *Index) IsEOF() bool { return idx.tagEOF }
// IsBOF returns true if before start of index.
func (idx *Index) IsBOF() bool { return idx.tagBOF }
// --- Helpers ---
func trimNull(b []byte) string {
for i, c := range b {
if c == 0 {
return strings.TrimSpace(string(b[:i]))
}
}
return strings.TrimSpace(string(b))
}