Files
five/hbrdd/dbf/field.go
CharlesKWON f4ed42556b checkpoint: season-wide bug fix campaign + infra
Cumulative season's silent-bug hunting (~62 fixes) across the FiveSql2
SQL engine, the Five compiler/runtime, and the hbrdd RDD layer. Saved
as a single checkpoint before refactoring the parser to delegate xBase
command translation to the preprocessor.

Highlights:

FiveSql2 engine (_FiveSql2/src/)
- prefix-glob index attach -> explicit convention (<table>_pk.ntx,
  <table>_uq.ntx, <table>.cdx) — fixes silent multi-row INSERT row-drop
- DROP/CREATE TABLE FErase chain extended (.cdx, .fsc, .fsv, .dbt, .fpt)
- COUNT(DISTINCT col) parsed + aggregated via hSeen hash
- UNION column-count mismatch returns SQL_ERR_GRAMMAR (was silent)
- DISTINCT + ORDER BY hidden-col leak fixed (trim before DISTINCT)
- Derived table FROM (SELECT...) + JOIN right-side derived
- Self-FK CASCADE depth 2+ via SqlGetSingleColPK pre-collect
- LAG/LEAD default arg uses SqlEvalRowExpr (handles -N const exprs)
- DATE literal round-trip validation (Feb 29 non-leap rejected)
- CREATE OR REPLACE VIEW; CREATE VIEW errors on already-exists
- AlterTable type dispatcher comma-wrapped (1-char type "A" no longer
  matches CHARACTER)

Compiler / runtime
- gengo: HB_ -> FV_ prefix on emitted Go function names (Five identity)
- gengo split: emit_block.go, emit_stmt.go, folding.go extracted
- parser/stmtreg.go nudges
- hbrt: debug TUI/CLI restructure (debugcmd, debugkey, termios_*),
  windows debug stubs collapsed
- thread/vm/value/class/pcinterp tightening from panic traces

RDD layer (hbrdd/)
- dbf: null bitmap support (null.go + null_test.go), mmap split
  (mmap_posix.go / mmap_windows.go), byte-level numeric parse
- ntx/cdx: windows mmap parity
- workarea + mem RDD: cross-area state-bleed fixes

RTL (hbrtl/)
- errorlog rewrite with platform-specific FD (errorlog_fd_unix /
  errorlog_fd_other)
- sqlscan, sqlhelpers, indexrtl, datetime extensions

Gates green at checkpoint:
- go test ./...        : PASS
- FiveSql2 SQL:1999    : 43/43
- Harbour compat       : 56/56

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-30 09:26:25 +09:00

532 lines
13 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
// Copyright (c) 2026 Charles KWON OhJun (charleskwonohjun@gmail.com)
// All rights reserved.
// DBF field type conversion: raw bytes ↔ Five Value.
// Each field type (C, N, L, D, M, I, B, @, etc.) has exact byte format.
//
// Reference: /mnt/d/harbour-core/src/rdd/dbf1.c (getValue/putValue)
// docs/dbf-engine-spec.md Section 3
package dbf
import (
"encoding/binary"
"five/hbrt"
"fmt"
"math"
"strconv"
"strings"
)
// GetFieldValue converts raw record bytes to a Five Value.
// Harbour: hb_dbfGetValue in dbf1.c
func GetFieldValue(recBuf []byte, offset uint16, field *FieldDesc) hbrt.Value {
return getFieldValueImpl(recBuf, offset, field, false)
}
// getFieldValueImpl is the zero-copy-aware variant. When stable=true the
// caller guarantees the recBuf bytes won't be mutated, freed, or
// unmapped for the Value's lifetime — then CHAR fields alias the
// buffer and skip the `string([]byte)` copy.
//
// NOTE: currently unexported because naive usage (even with mmap-backed
// buffers) can produce UAF when FiveSql2 closes/packs temp CTE tables
// while CHAR values from earlier iterations are still referenced. The
// machinery is kept for a future refcounted mmap lifetime scheme.
func getFieldValueImpl(recBuf []byte, offset uint16, field *FieldDesc, stable bool) hbrt.Value {
raw := recBuf[offset : offset+uint16(field.Len)]
switch field.Type {
case 'C', 'c': // Character
if stable {
return hbrt.MakeStringBytes(raw)
}
return hbrt.MakeString(string(raw))
case 'N', 'n': // Numeric (ASCII)
return parseNumericField(raw, field.Dec)
case 'L', 'l': // Logical
return parseLogicalField(raw[0])
case 'D', 'd': // Date
return parseDateField(raw, field.Len)
case 'M', 'm': // Memo (block reference)
return parseMemoRef(raw, field.Len)
case 'I', 'i': // Integer (binary LE)
return parseIntegerField(raw, field.Len)
case 'B', 'b': // Double (IEEE 754 LE)
if field.Len == 8 {
bits := binary.LittleEndian.Uint64(raw)
return hbrt.MakeDoubleAuto(math.Float64frombits(bits))
}
return hbrt.MakeNil()
case '@': // Timestamp (4 bytes date + 4 bytes time, LE)
if field.Len >= 8 {
julian := int64(binary.LittleEndian.Uint32(raw[0:4]))
timeMs := int32(binary.LittleEndian.Uint32(raw[4:8]))
return hbrt.MakeTimestamp(julian, timeMs)
}
return hbrt.MakeNil()
case '+': // Autoincrement (binary LE integer)
return parseIntegerField(raw, field.Len)
case '=': // Modtime (same as Timestamp)
if field.Len >= 8 {
julian := int64(binary.LittleEndian.Uint32(raw[0:4]))
timeMs := int32(binary.LittleEndian.Uint32(raw[4:8]))
return hbrt.MakeTimestamp(julian, timeMs)
}
return hbrt.MakeNil()
case '^': // RowVersion (uint64 LE)
if field.Len == 8 {
return hbrt.MakeLong(int64(binary.LittleEndian.Uint64(raw)))
}
return hbrt.MakeNil()
case 'Y', 'y': // Currency (int64 LE, implicit 4 decimal places)
if field.Len == 8 {
cents := int64(binary.LittleEndian.Uint64(raw))
return hbrt.MakeDouble(float64(cents)/10000.0, 20, 4)
}
return hbrt.MakeNil()
case 'T', 't': // Timestamp (Harbour extension)
if field.Len >= 8 {
julian := int64(binary.LittleEndian.Uint32(raw[0:4]))
timeMs := int32(binary.LittleEndian.Uint32(raw[4:8]))
return hbrt.MakeTimestamp(julian, timeMs)
}
if field.Len == 4 {
// Time only
timeMs := int32(binary.LittleEndian.Uint32(raw[0:4]))
return hbrt.MakeTimestamp(0, timeMs)
}
return hbrt.MakeNil()
default:
// Unknown type: return as string
return hbrt.MakeString(string(raw))
}
}
// PutFieldValue converts a Five Value to raw record bytes.
// Harbour: hb_dbfPutValue in dbf1.c
func PutFieldValue(recBuf []byte, offset uint16, field *FieldDesc, val hbrt.Value) {
raw := recBuf[offset : offset+uint16(field.Len)]
switch field.Type {
case 'C', 'c': // Character
s := val.AsString()
copy(raw, s)
// Pad with spaces
if len(s) < int(field.Len) {
for i := len(s); i < int(field.Len); i++ {
raw[i] = ' '
}
}
case 'N', 'n': // Numeric (ASCII, right-aligned, space-padded)
formatNumericField(raw, field.Len, field.Dec, val)
case 'L', 'l': // Logical
if val.IsNil() {
raw[0] = ' '
} else if val.AsBool() {
raw[0] = 'T'
} else {
raw[0] = 'F'
}
case 'D', 'd': // Date
putDateField(raw, field.Len, val)
case 'M', 'm': // Memo (block reference)
// Memo writes handled by MemoHandler
// Here just store block number
if val.IsNumInt() {
putMemoRef(raw, field.Len, uint32(val.AsNumInt()))
}
case 'I', 'i', '+': // Integer / Autoincrement
putIntegerField(raw, field.Len, val)
case 'B', 'b': // Double (IEEE 754 LE)
if field.Len == 8 {
binary.LittleEndian.PutUint64(raw, math.Float64bits(val.AsNumDouble()))
}
case '@', '=', 'T', 't': // Timestamp / Modtime
if field.Len >= 8 {
binary.LittleEndian.PutUint32(raw[0:4], uint32(val.AsJulian()))
binary.LittleEndian.PutUint32(raw[4:8], uint32(val.AsTimeMs()))
}
case 'Y', 'y': // Currency
if field.Len == 8 {
cents := int64(val.AsNumDouble() * 10000.0)
binary.LittleEndian.PutUint64(raw, uint64(cents))
}
case '^': // RowVersion
if field.Len == 8 {
binary.LittleEndian.PutUint64(raw, uint64(val.AsLong()))
}
default:
// Unknown: write as string
s := val.AsString()
copy(raw, s)
}
}
// --- Internal parsers ---
func parseNumericField(raw []byte, dec byte) hbrt.Value {
// Byte-level fast path — avoids `string(raw)` + TrimSpace + ParseInt
// allocations on the hot scan path. Numeric DBF fields are ASCII,
// right-aligned, space-padded, optional leading sign, optional `.`
// for decimals. A full 50k-row scan can hit this fn 100 k+ times,
// so every allocation matters.
//
// Algorithm:
// 1. Walk past leading spaces.
// 2. Detect sign.
// 3. Accumulate int64 digit-by-digit.
// 4. If we hit `.` or the field has dec > 0, bail to float parser
// (that path is rare on integer-typed DBF fields like IDs /
// counters, which dominate WHERE predicates).
// 5. Walk past trailing spaces.
//
// All operations are byte comparisons on the raw record buffer —
// no heap allocation unless the field is genuinely fractional.
start := 0
end := len(raw)
for start < end && raw[start] == ' ' {
start++
}
for end > start && raw[end-1] == ' ' {
end--
}
if start == end {
return hbrt.MakeInt(0)
}
if dec == 0 {
// Fast integer path
i := start
neg := false
if raw[i] == '-' {
neg = true
i++
} else if raw[i] == '+' {
i++
}
var n int64
ok := i < end
for ; i < end; i++ {
c := raw[i]
if c == '.' {
ok = false
break
}
if c < '0' || c > '9' {
ok = false
break
}
n = n*10 + int64(c-'0')
}
if ok {
if neg {
n = -n
}
return hbrt.MakeNumInt(n)
}
// Fall through: has a `.` or unexpected char → use float path
}
// Byte-level float parse for N(w,d) with d > 0 — avoids the
// string(raw) + strconv.ParseFloat allocation on the hot path.
// Profile (bench_bulk): parseNumericField was 23% of flat CPU,
// dominated by this allocation.
i := start
neg := false
if raw[i] == '-' {
neg = true
i++
} else if raw[i] == '+' {
i++
}
var intPart int64
var sawDigit bool
for ; i < end; i++ {
c := raw[i]
if c == '.' {
break
}
if c < '0' || c > '9' {
// Unexpected char — fall back to strconv for correctness.
if f, err := strconv.ParseFloat(string(raw[start:end]), 64); err == nil {
return hbrt.MakeDouble(f, uint16(len(raw)), uint16(dec))
}
return hbrt.MakeInt(0)
}
intPart = intPart*10 + int64(c-'0')
sawDigit = true
}
var fracPart int64
var fracLen int
if i < end && raw[i] == '.' {
i++
for ; i < end; i++ {
c := raw[i]
if c < '0' || c > '9' {
if f, err := strconv.ParseFloat(string(raw[start:end]), 64); err == nil {
return hbrt.MakeDouble(f, uint16(len(raw)), uint16(dec))
}
return hbrt.MakeInt(0)
}
fracPart = fracPart*10 + int64(c-'0')
fracLen++
sawDigit = true
}
}
if !sawDigit {
return hbrt.MakeDouble(0, uint16(len(raw)), uint16(dec))
}
var f float64
if fracLen == 0 {
f = float64(intPart)
} else {
f = float64(intPart) + float64(fracPart)/pow10f(fracLen)
}
if neg {
f = -f
}
return hbrt.MakeDouble(f, uint16(len(raw)), uint16(dec))
}
// pow10Table — precomputed 10^n for small n. DBF numeric fields rarely
// exceed 10 decimal places; the table covers the common range without
// calling math.Pow on the hot path.
var pow10Table = [20]float64{
1, 10, 100, 1000, 10000, 100000, 1000000, 10000000,
1e8, 1e9, 1e10, 1e11, 1e12, 1e13, 1e14, 1e15,
1e16, 1e17, 1e18, 1e19,
}
func pow10f(n int) float64 {
if n >= 0 && n < len(pow10Table) {
return pow10Table[n]
}
return math.Pow(10, float64(n))
}
func parseLogicalField(b byte) hbrt.Value {
switch b {
case 'T', 't', 'Y', 'y':
return hbrt.MakeBool(true)
case 'F', 'f', 'N', 'n':
return hbrt.MakeBool(false)
default:
return hbrt.MakeNil() // space = uninitialized
}
}
func parseDateField(raw []byte, fieldLen byte) hbrt.Value {
if fieldLen == 8 {
// Standard: YYYYMMDD ASCII
s := string(raw)
if strings.TrimSpace(s) == "" {
return hbrt.MakeDate(0) // empty date
}
y := parseInt(s[0:4])
m := parseInt(s[4:6])
d := parseInt(s[6:8])
if y > 0 {
return hbrt.MakeDate(dateToJulian(y, m, d))
}
return hbrt.MakeDate(0)
}
if fieldLen == 3 {
// Short: LE uint24
julian := int64(raw[0]) | int64(raw[1])<<8 | int64(raw[2])<<16
return hbrt.MakeDate(julian)
}
if fieldLen == 4 {
// VFP: LE uint32 Julian
return hbrt.MakeDate(int64(binary.LittleEndian.Uint32(raw)))
}
return hbrt.MakeDate(0)
}
func parseMemoRef(raw []byte, fieldLen byte) hbrt.Value {
if fieldLen == 4 {
blockNo := binary.LittleEndian.Uint32(raw)
return hbrt.MakeLong(int64(blockNo))
}
if fieldLen == 10 {
// Inline byte-level parse: same pattern as parseNumericField.
// Avoids string(raw) + strings.TrimSpace + strconv.ParseInt
// — roughly 3× faster and allocation-free.
var n int64
for _, c := range raw {
switch {
case c == ' ':
// Leading/trailing space — keep current accumulator
case c >= '0' && c <= '9':
n = n*10 + int64(c-'0')
default:
// Malformed block ref — treat as 0, same as strconv.ParseInt
// would on the non-digit prefix.
return hbrt.MakeLong(0)
}
}
return hbrt.MakeLong(n)
}
return hbrt.MakeLong(0)
}
func parseIntegerField(raw []byte, fieldLen byte) hbrt.Value {
switch fieldLen {
case 1:
return hbrt.MakeInt(int(int8(raw[0])))
case 2:
return hbrt.MakeInt(int(int16(binary.LittleEndian.Uint16(raw))))
case 3:
v := int32(raw[0]) | int32(raw[1])<<8 | int32(raw[2])<<16
if v&0x800000 != 0 {
v |= ^0xFFFFFF // sign extend
}
return hbrt.MakeInt(int(v))
case 4:
return hbrt.MakeInt(int(int32(binary.LittleEndian.Uint32(raw))))
case 8:
return hbrt.MakeLong(int64(binary.LittleEndian.Uint64(raw)))
default:
return hbrt.MakeInt(0)
}
}
// --- Internal formatters ---
func formatNumericField(raw []byte, fieldLen, dec byte, val hbrt.Value) {
d := val.AsNumDouble()
// NaN/Inf → asterisks (Harbour: field width overflow marker)
if math.IsNaN(d) || math.IsInf(d, 0) {
for i := range raw {
raw[i] = '*'
}
return
}
// Use strconv.AppendFloat into a stack-allocated scratch buffer.
// Skips fmt.Sprintf's format-string parsing and its temporary
// string allocation — 35× faster per write, zero heap allocs on
// the hot path. 48 bytes fits any DBF numeric field (max 20 len).
var scratch [48]byte
s := strconv.AppendFloat(scratch[:0], d, 'f', int(dec), 64)
// Overflow → asterisks, same as before.
if len(s) > int(fieldLen) {
for i := range raw {
raw[i] = '*'
}
return
}
// Right-align, space-pad left.
padLen := int(fieldLen) - len(s)
for i := 0; i < padLen; i++ {
raw[i] = ' '
}
copy(raw[padLen:], s)
}
func putDateField(raw []byte, fieldLen byte, val hbrt.Value) {
if fieldLen == 8 {
if !val.IsDateTime() || val.AsJulian() == 0 {
copy(raw, " ")
return
}
y, m, d := julianToDate(val.AsJulian())
s := fmt.Sprintf("%04d%02d%02d", y, m, d)
copy(raw, s)
} else if fieldLen == 4 {
binary.LittleEndian.PutUint32(raw, uint32(val.AsJulian()))
}
}
func putMemoRef(raw []byte, fieldLen byte, blockNo uint32) {
if fieldLen == 4 {
binary.LittleEndian.PutUint32(raw, blockNo)
} else if fieldLen == 10 {
s := fmt.Sprintf("%10d", blockNo)
copy(raw, s)
}
}
func putIntegerField(raw []byte, fieldLen byte, val hbrt.Value) {
n := val.AsNumInt()
switch fieldLen {
case 1:
raw[0] = byte(int8(n))
case 2:
binary.LittleEndian.PutUint16(raw, uint16(int16(n)))
case 4:
binary.LittleEndian.PutUint32(raw, uint32(int32(n)))
case 8:
binary.LittleEndian.PutUint64(raw, uint64(n))
}
}
// --- Julian date helpers ---
func dateToJulian(y, m, d int) int64 {
if m <= 2 {
y--
m += 12
}
a := y / 100
b := 2 - a + a/4
return int64(365.25*float64(y+4716)) + int64(30.6001*float64(m+1)) + int64(d+b) - 1524
}
func julianToDate(julian int64) (y, m, d int) {
if julian <= 0 {
return 0, 0, 0
}
l := julian + 68569
n := 4 * l / 146097
l = l - (146097*n+3)/4
i := 4000 * (l + 1) / 1461001
l = l - 1461*i/4 + 31
j := 80 * l / 2447
d = int(l - 2447*j/80)
l = j / 11
m = int(j + 2 - 12*l)
y = int(100*(n-49) + i + l)
return
}
func parseInt(s string) int {
n := 0
for _, c := range s {
if c >= '0' && c <= '9' {
n = n*10 + int(c-'0')
}
}
return n
}