Cumulative season's silent-bug hunting (~62 fixes) across the FiveSql2 SQL engine, the Five compiler/runtime, and the hbrdd RDD layer. Saved as a single checkpoint before refactoring the parser to delegate xBase command translation to the preprocessor. Highlights: FiveSql2 engine (_FiveSql2/src/) - prefix-glob index attach -> explicit convention (<table>_pk.ntx, <table>_uq.ntx, <table>.cdx) — fixes silent multi-row INSERT row-drop - DROP/CREATE TABLE FErase chain extended (.cdx, .fsc, .fsv, .dbt, .fpt) - COUNT(DISTINCT col) parsed + aggregated via hSeen hash - UNION column-count mismatch returns SQL_ERR_GRAMMAR (was silent) - DISTINCT + ORDER BY hidden-col leak fixed (trim before DISTINCT) - Derived table FROM (SELECT...) + JOIN right-side derived - Self-FK CASCADE depth 2+ via SqlGetSingleColPK pre-collect - LAG/LEAD default arg uses SqlEvalRowExpr (handles -N const exprs) - DATE literal round-trip validation (Feb 29 non-leap rejected) - CREATE OR REPLACE VIEW; CREATE VIEW errors on already-exists - AlterTable type dispatcher comma-wrapped (1-char type "A" no longer matches CHARACTER) Compiler / runtime - gengo: HB_ -> FV_ prefix on emitted Go function names (Five identity) - gengo split: emit_block.go, emit_stmt.go, folding.go extracted - parser/stmtreg.go nudges - hbrt: debug TUI/CLI restructure (debugcmd, debugkey, termios_*), windows debug stubs collapsed - thread/vm/value/class/pcinterp tightening from panic traces RDD layer (hbrdd/) - dbf: null bitmap support (null.go + null_test.go), mmap split (mmap_posix.go / mmap_windows.go), byte-level numeric parse - ntx/cdx: windows mmap parity - workarea + mem RDD: cross-area state-bleed fixes RTL (hbrtl/) - errorlog rewrite with platform-specific FD (errorlog_fd_unix / errorlog_fd_other) - sqlscan, sqlhelpers, indexrtl, datetime extensions Gates green at checkpoint: - go test ./... : PASS - FiveSql2 SQL:1999 : 43/43 - Harbour compat : 56/56 Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
532 lines
13 KiB
Go
532 lines
13 KiB
Go
// Copyright (c) 2026 Charles KWON OhJun (charleskwonohjun@gmail.com)
|
||
// All rights reserved.
|
||
|
||
// DBF field type conversion: raw bytes ↔ Five Value.
|
||
// Each field type (C, N, L, D, M, I, B, @, etc.) has exact byte format.
|
||
//
|
||
// Reference: /mnt/d/harbour-core/src/rdd/dbf1.c (getValue/putValue)
|
||
// docs/dbf-engine-spec.md Section 3
|
||
package dbf
|
||
|
||
import (
|
||
"encoding/binary"
|
||
"five/hbrt"
|
||
"fmt"
|
||
"math"
|
||
"strconv"
|
||
"strings"
|
||
)
|
||
|
||
// GetFieldValue converts raw record bytes to a Five Value.
|
||
// Harbour: hb_dbfGetValue in dbf1.c
|
||
func GetFieldValue(recBuf []byte, offset uint16, field *FieldDesc) hbrt.Value {
|
||
return getFieldValueImpl(recBuf, offset, field, false)
|
||
}
|
||
|
||
// getFieldValueImpl is the zero-copy-aware variant. When stable=true the
|
||
// caller guarantees the recBuf bytes won't be mutated, freed, or
|
||
// unmapped for the Value's lifetime — then CHAR fields alias the
|
||
// buffer and skip the `string([]byte)` copy.
|
||
//
|
||
// NOTE: currently unexported because naive usage (even with mmap-backed
|
||
// buffers) can produce UAF when FiveSql2 closes/packs temp CTE tables
|
||
// while CHAR values from earlier iterations are still referenced. The
|
||
// machinery is kept for a future refcounted mmap lifetime scheme.
|
||
func getFieldValueImpl(recBuf []byte, offset uint16, field *FieldDesc, stable bool) hbrt.Value {
|
||
raw := recBuf[offset : offset+uint16(field.Len)]
|
||
|
||
switch field.Type {
|
||
case 'C', 'c': // Character
|
||
if stable {
|
||
return hbrt.MakeStringBytes(raw)
|
||
}
|
||
return hbrt.MakeString(string(raw))
|
||
|
||
case 'N', 'n': // Numeric (ASCII)
|
||
return parseNumericField(raw, field.Dec)
|
||
|
||
case 'L', 'l': // Logical
|
||
return parseLogicalField(raw[0])
|
||
|
||
case 'D', 'd': // Date
|
||
return parseDateField(raw, field.Len)
|
||
|
||
case 'M', 'm': // Memo (block reference)
|
||
return parseMemoRef(raw, field.Len)
|
||
|
||
case 'I', 'i': // Integer (binary LE)
|
||
return parseIntegerField(raw, field.Len)
|
||
|
||
case 'B', 'b': // Double (IEEE 754 LE)
|
||
if field.Len == 8 {
|
||
bits := binary.LittleEndian.Uint64(raw)
|
||
return hbrt.MakeDoubleAuto(math.Float64frombits(bits))
|
||
}
|
||
return hbrt.MakeNil()
|
||
|
||
case '@': // Timestamp (4 bytes date + 4 bytes time, LE)
|
||
if field.Len >= 8 {
|
||
julian := int64(binary.LittleEndian.Uint32(raw[0:4]))
|
||
timeMs := int32(binary.LittleEndian.Uint32(raw[4:8]))
|
||
return hbrt.MakeTimestamp(julian, timeMs)
|
||
}
|
||
return hbrt.MakeNil()
|
||
|
||
case '+': // Autoincrement (binary LE integer)
|
||
return parseIntegerField(raw, field.Len)
|
||
|
||
case '=': // Modtime (same as Timestamp)
|
||
if field.Len >= 8 {
|
||
julian := int64(binary.LittleEndian.Uint32(raw[0:4]))
|
||
timeMs := int32(binary.LittleEndian.Uint32(raw[4:8]))
|
||
return hbrt.MakeTimestamp(julian, timeMs)
|
||
}
|
||
return hbrt.MakeNil()
|
||
|
||
case '^': // RowVersion (uint64 LE)
|
||
if field.Len == 8 {
|
||
return hbrt.MakeLong(int64(binary.LittleEndian.Uint64(raw)))
|
||
}
|
||
return hbrt.MakeNil()
|
||
|
||
case 'Y', 'y': // Currency (int64 LE, implicit 4 decimal places)
|
||
if field.Len == 8 {
|
||
cents := int64(binary.LittleEndian.Uint64(raw))
|
||
return hbrt.MakeDouble(float64(cents)/10000.0, 20, 4)
|
||
}
|
||
return hbrt.MakeNil()
|
||
|
||
case 'T', 't': // Timestamp (Harbour extension)
|
||
if field.Len >= 8 {
|
||
julian := int64(binary.LittleEndian.Uint32(raw[0:4]))
|
||
timeMs := int32(binary.LittleEndian.Uint32(raw[4:8]))
|
||
return hbrt.MakeTimestamp(julian, timeMs)
|
||
}
|
||
if field.Len == 4 {
|
||
// Time only
|
||
timeMs := int32(binary.LittleEndian.Uint32(raw[0:4]))
|
||
return hbrt.MakeTimestamp(0, timeMs)
|
||
}
|
||
return hbrt.MakeNil()
|
||
|
||
default:
|
||
// Unknown type: return as string
|
||
return hbrt.MakeString(string(raw))
|
||
}
|
||
}
|
||
|
||
// PutFieldValue converts a Five Value to raw record bytes.
|
||
// Harbour: hb_dbfPutValue in dbf1.c
|
||
func PutFieldValue(recBuf []byte, offset uint16, field *FieldDesc, val hbrt.Value) {
|
||
raw := recBuf[offset : offset+uint16(field.Len)]
|
||
|
||
switch field.Type {
|
||
case 'C', 'c': // Character
|
||
s := val.AsString()
|
||
copy(raw, s)
|
||
// Pad with spaces
|
||
if len(s) < int(field.Len) {
|
||
for i := len(s); i < int(field.Len); i++ {
|
||
raw[i] = ' '
|
||
}
|
||
}
|
||
|
||
case 'N', 'n': // Numeric (ASCII, right-aligned, space-padded)
|
||
formatNumericField(raw, field.Len, field.Dec, val)
|
||
|
||
case 'L', 'l': // Logical
|
||
if val.IsNil() {
|
||
raw[0] = ' '
|
||
} else if val.AsBool() {
|
||
raw[0] = 'T'
|
||
} else {
|
||
raw[0] = 'F'
|
||
}
|
||
|
||
case 'D', 'd': // Date
|
||
putDateField(raw, field.Len, val)
|
||
|
||
case 'M', 'm': // Memo (block reference)
|
||
// Memo writes handled by MemoHandler
|
||
// Here just store block number
|
||
if val.IsNumInt() {
|
||
putMemoRef(raw, field.Len, uint32(val.AsNumInt()))
|
||
}
|
||
|
||
case 'I', 'i', '+': // Integer / Autoincrement
|
||
putIntegerField(raw, field.Len, val)
|
||
|
||
case 'B', 'b': // Double (IEEE 754 LE)
|
||
if field.Len == 8 {
|
||
binary.LittleEndian.PutUint64(raw, math.Float64bits(val.AsNumDouble()))
|
||
}
|
||
|
||
case '@', '=', 'T', 't': // Timestamp / Modtime
|
||
if field.Len >= 8 {
|
||
binary.LittleEndian.PutUint32(raw[0:4], uint32(val.AsJulian()))
|
||
binary.LittleEndian.PutUint32(raw[4:8], uint32(val.AsTimeMs()))
|
||
}
|
||
|
||
case 'Y', 'y': // Currency
|
||
if field.Len == 8 {
|
||
cents := int64(val.AsNumDouble() * 10000.0)
|
||
binary.LittleEndian.PutUint64(raw, uint64(cents))
|
||
}
|
||
|
||
case '^': // RowVersion
|
||
if field.Len == 8 {
|
||
binary.LittleEndian.PutUint64(raw, uint64(val.AsLong()))
|
||
}
|
||
|
||
default:
|
||
// Unknown: write as string
|
||
s := val.AsString()
|
||
copy(raw, s)
|
||
}
|
||
}
|
||
|
||
// --- Internal parsers ---
|
||
|
||
func parseNumericField(raw []byte, dec byte) hbrt.Value {
|
||
// Byte-level fast path — avoids `string(raw)` + TrimSpace + ParseInt
|
||
// allocations on the hot scan path. Numeric DBF fields are ASCII,
|
||
// right-aligned, space-padded, optional leading sign, optional `.`
|
||
// for decimals. A full 50k-row scan can hit this fn 100 k+ times,
|
||
// so every allocation matters.
|
||
//
|
||
// Algorithm:
|
||
// 1. Walk past leading spaces.
|
||
// 2. Detect sign.
|
||
// 3. Accumulate int64 digit-by-digit.
|
||
// 4. If we hit `.` or the field has dec > 0, bail to float parser
|
||
// (that path is rare on integer-typed DBF fields like IDs /
|
||
// counters, which dominate WHERE predicates).
|
||
// 5. Walk past trailing spaces.
|
||
//
|
||
// All operations are byte comparisons on the raw record buffer —
|
||
// no heap allocation unless the field is genuinely fractional.
|
||
|
||
start := 0
|
||
end := len(raw)
|
||
for start < end && raw[start] == ' ' {
|
||
start++
|
||
}
|
||
for end > start && raw[end-1] == ' ' {
|
||
end--
|
||
}
|
||
if start == end {
|
||
return hbrt.MakeInt(0)
|
||
}
|
||
|
||
if dec == 0 {
|
||
// Fast integer path
|
||
i := start
|
||
neg := false
|
||
if raw[i] == '-' {
|
||
neg = true
|
||
i++
|
||
} else if raw[i] == '+' {
|
||
i++
|
||
}
|
||
var n int64
|
||
ok := i < end
|
||
for ; i < end; i++ {
|
||
c := raw[i]
|
||
if c == '.' {
|
||
ok = false
|
||
break
|
||
}
|
||
if c < '0' || c > '9' {
|
||
ok = false
|
||
break
|
||
}
|
||
n = n*10 + int64(c-'0')
|
||
}
|
||
if ok {
|
||
if neg {
|
||
n = -n
|
||
}
|
||
return hbrt.MakeNumInt(n)
|
||
}
|
||
// Fall through: has a `.` or unexpected char → use float path
|
||
}
|
||
|
||
// Byte-level float parse for N(w,d) with d > 0 — avoids the
|
||
// string(raw) + strconv.ParseFloat allocation on the hot path.
|
||
// Profile (bench_bulk): parseNumericField was 23% of flat CPU,
|
||
// dominated by this allocation.
|
||
i := start
|
||
neg := false
|
||
if raw[i] == '-' {
|
||
neg = true
|
||
i++
|
||
} else if raw[i] == '+' {
|
||
i++
|
||
}
|
||
|
||
var intPart int64
|
||
var sawDigit bool
|
||
for ; i < end; i++ {
|
||
c := raw[i]
|
||
if c == '.' {
|
||
break
|
||
}
|
||
if c < '0' || c > '9' {
|
||
// Unexpected char — fall back to strconv for correctness.
|
||
if f, err := strconv.ParseFloat(string(raw[start:end]), 64); err == nil {
|
||
return hbrt.MakeDouble(f, uint16(len(raw)), uint16(dec))
|
||
}
|
||
return hbrt.MakeInt(0)
|
||
}
|
||
intPart = intPart*10 + int64(c-'0')
|
||
sawDigit = true
|
||
}
|
||
|
||
var fracPart int64
|
||
var fracLen int
|
||
if i < end && raw[i] == '.' {
|
||
i++
|
||
for ; i < end; i++ {
|
||
c := raw[i]
|
||
if c < '0' || c > '9' {
|
||
if f, err := strconv.ParseFloat(string(raw[start:end]), 64); err == nil {
|
||
return hbrt.MakeDouble(f, uint16(len(raw)), uint16(dec))
|
||
}
|
||
return hbrt.MakeInt(0)
|
||
}
|
||
fracPart = fracPart*10 + int64(c-'0')
|
||
fracLen++
|
||
sawDigit = true
|
||
}
|
||
}
|
||
|
||
if !sawDigit {
|
||
return hbrt.MakeDouble(0, uint16(len(raw)), uint16(dec))
|
||
}
|
||
|
||
var f float64
|
||
if fracLen == 0 {
|
||
f = float64(intPart)
|
||
} else {
|
||
f = float64(intPart) + float64(fracPart)/pow10f(fracLen)
|
||
}
|
||
if neg {
|
||
f = -f
|
||
}
|
||
return hbrt.MakeDouble(f, uint16(len(raw)), uint16(dec))
|
||
}
|
||
|
||
// pow10Table — precomputed 10^n for small n. DBF numeric fields rarely
|
||
// exceed 10 decimal places; the table covers the common range without
|
||
// calling math.Pow on the hot path.
|
||
var pow10Table = [20]float64{
|
||
1, 10, 100, 1000, 10000, 100000, 1000000, 10000000,
|
||
1e8, 1e9, 1e10, 1e11, 1e12, 1e13, 1e14, 1e15,
|
||
1e16, 1e17, 1e18, 1e19,
|
||
}
|
||
|
||
func pow10f(n int) float64 {
|
||
if n >= 0 && n < len(pow10Table) {
|
||
return pow10Table[n]
|
||
}
|
||
return math.Pow(10, float64(n))
|
||
}
|
||
|
||
func parseLogicalField(b byte) hbrt.Value {
|
||
switch b {
|
||
case 'T', 't', 'Y', 'y':
|
||
return hbrt.MakeBool(true)
|
||
case 'F', 'f', 'N', 'n':
|
||
return hbrt.MakeBool(false)
|
||
default:
|
||
return hbrt.MakeNil() // space = uninitialized
|
||
}
|
||
}
|
||
|
||
func parseDateField(raw []byte, fieldLen byte) hbrt.Value {
|
||
if fieldLen == 8 {
|
||
// Standard: YYYYMMDD ASCII
|
||
s := string(raw)
|
||
if strings.TrimSpace(s) == "" {
|
||
return hbrt.MakeDate(0) // empty date
|
||
}
|
||
y := parseInt(s[0:4])
|
||
m := parseInt(s[4:6])
|
||
d := parseInt(s[6:8])
|
||
if y > 0 {
|
||
return hbrt.MakeDate(dateToJulian(y, m, d))
|
||
}
|
||
return hbrt.MakeDate(0)
|
||
}
|
||
if fieldLen == 3 {
|
||
// Short: LE uint24
|
||
julian := int64(raw[0]) | int64(raw[1])<<8 | int64(raw[2])<<16
|
||
return hbrt.MakeDate(julian)
|
||
}
|
||
if fieldLen == 4 {
|
||
// VFP: LE uint32 Julian
|
||
return hbrt.MakeDate(int64(binary.LittleEndian.Uint32(raw)))
|
||
}
|
||
return hbrt.MakeDate(0)
|
||
}
|
||
|
||
func parseMemoRef(raw []byte, fieldLen byte) hbrt.Value {
|
||
if fieldLen == 4 {
|
||
blockNo := binary.LittleEndian.Uint32(raw)
|
||
return hbrt.MakeLong(int64(blockNo))
|
||
}
|
||
if fieldLen == 10 {
|
||
// Inline byte-level parse: same pattern as parseNumericField.
|
||
// Avoids string(raw) + strings.TrimSpace + strconv.ParseInt
|
||
// — roughly 3× faster and allocation-free.
|
||
var n int64
|
||
for _, c := range raw {
|
||
switch {
|
||
case c == ' ':
|
||
// Leading/trailing space — keep current accumulator
|
||
case c >= '0' && c <= '9':
|
||
n = n*10 + int64(c-'0')
|
||
default:
|
||
// Malformed block ref — treat as 0, same as strconv.ParseInt
|
||
// would on the non-digit prefix.
|
||
return hbrt.MakeLong(0)
|
||
}
|
||
}
|
||
return hbrt.MakeLong(n)
|
||
}
|
||
return hbrt.MakeLong(0)
|
||
}
|
||
|
||
func parseIntegerField(raw []byte, fieldLen byte) hbrt.Value {
|
||
switch fieldLen {
|
||
case 1:
|
||
return hbrt.MakeInt(int(int8(raw[0])))
|
||
case 2:
|
||
return hbrt.MakeInt(int(int16(binary.LittleEndian.Uint16(raw))))
|
||
case 3:
|
||
v := int32(raw[0]) | int32(raw[1])<<8 | int32(raw[2])<<16
|
||
if v&0x800000 != 0 {
|
||
v |= ^0xFFFFFF // sign extend
|
||
}
|
||
return hbrt.MakeInt(int(v))
|
||
case 4:
|
||
return hbrt.MakeInt(int(int32(binary.LittleEndian.Uint32(raw))))
|
||
case 8:
|
||
return hbrt.MakeLong(int64(binary.LittleEndian.Uint64(raw)))
|
||
default:
|
||
return hbrt.MakeInt(0)
|
||
}
|
||
}
|
||
|
||
// --- Internal formatters ---
|
||
|
||
func formatNumericField(raw []byte, fieldLen, dec byte, val hbrt.Value) {
|
||
d := val.AsNumDouble()
|
||
|
||
// NaN/Inf → asterisks (Harbour: field width overflow marker)
|
||
if math.IsNaN(d) || math.IsInf(d, 0) {
|
||
for i := range raw {
|
||
raw[i] = '*'
|
||
}
|
||
return
|
||
}
|
||
|
||
// Use strconv.AppendFloat into a stack-allocated scratch buffer.
|
||
// Skips fmt.Sprintf's format-string parsing and its temporary
|
||
// string allocation — 3–5× faster per write, zero heap allocs on
|
||
// the hot path. 48 bytes fits any DBF numeric field (max 20 len).
|
||
var scratch [48]byte
|
||
s := strconv.AppendFloat(scratch[:0], d, 'f', int(dec), 64)
|
||
|
||
// Overflow → asterisks, same as before.
|
||
if len(s) > int(fieldLen) {
|
||
for i := range raw {
|
||
raw[i] = '*'
|
||
}
|
||
return
|
||
}
|
||
|
||
// Right-align, space-pad left.
|
||
padLen := int(fieldLen) - len(s)
|
||
for i := 0; i < padLen; i++ {
|
||
raw[i] = ' '
|
||
}
|
||
copy(raw[padLen:], s)
|
||
}
|
||
|
||
func putDateField(raw []byte, fieldLen byte, val hbrt.Value) {
|
||
if fieldLen == 8 {
|
||
if !val.IsDateTime() || val.AsJulian() == 0 {
|
||
copy(raw, " ")
|
||
return
|
||
}
|
||
y, m, d := julianToDate(val.AsJulian())
|
||
s := fmt.Sprintf("%04d%02d%02d", y, m, d)
|
||
copy(raw, s)
|
||
} else if fieldLen == 4 {
|
||
binary.LittleEndian.PutUint32(raw, uint32(val.AsJulian()))
|
||
}
|
||
}
|
||
|
||
func putMemoRef(raw []byte, fieldLen byte, blockNo uint32) {
|
||
if fieldLen == 4 {
|
||
binary.LittleEndian.PutUint32(raw, blockNo)
|
||
} else if fieldLen == 10 {
|
||
s := fmt.Sprintf("%10d", blockNo)
|
||
copy(raw, s)
|
||
}
|
||
}
|
||
|
||
func putIntegerField(raw []byte, fieldLen byte, val hbrt.Value) {
|
||
n := val.AsNumInt()
|
||
switch fieldLen {
|
||
case 1:
|
||
raw[0] = byte(int8(n))
|
||
case 2:
|
||
binary.LittleEndian.PutUint16(raw, uint16(int16(n)))
|
||
case 4:
|
||
binary.LittleEndian.PutUint32(raw, uint32(int32(n)))
|
||
case 8:
|
||
binary.LittleEndian.PutUint64(raw, uint64(n))
|
||
}
|
||
}
|
||
|
||
// --- Julian date helpers ---
|
||
|
||
func dateToJulian(y, m, d int) int64 {
|
||
if m <= 2 {
|
||
y--
|
||
m += 12
|
||
}
|
||
a := y / 100
|
||
b := 2 - a + a/4
|
||
return int64(365.25*float64(y+4716)) + int64(30.6001*float64(m+1)) + int64(d+b) - 1524
|
||
}
|
||
|
||
func julianToDate(julian int64) (y, m, d int) {
|
||
if julian <= 0 {
|
||
return 0, 0, 0
|
||
}
|
||
l := julian + 68569
|
||
n := 4 * l / 146097
|
||
l = l - (146097*n+3)/4
|
||
i := 4000 * (l + 1) / 1461001
|
||
l = l - 1461*i/4 + 31
|
||
j := 80 * l / 2447
|
||
d = int(l - 2447*j/80)
|
||
l = j / 11
|
||
m = int(j + 2 - 12*l)
|
||
y = int(100*(n-49) + i + l)
|
||
return
|
||
}
|
||
|
||
func parseInt(s string) int {
|
||
n := 0
|
||
for _, c := range s {
|
||
if c >= '0' && c <= '9' {
|
||
n = n*10 + int(c-'0')
|
||
}
|
||
}
|
||
return n
|
||
}
|