parseNumericField was allocating on every call — `string(raw)` to
convert the record-buffer slice to a string, plus the implicit
allocation from TrimSpace's return value. For a 50k-row scan reading
two numeric fields, that's 100k+ small string allocations per scan,
all of which promptly became garbage.
Rewritten to walk the raw byte slice directly:
- Find the trimmed range by byte indexing (no alloc).
- Parse integer-typed fields (dec == 0) digit-by-digit into int64.
- Only fall back to strconv.ParseFloat + string allocation for
genuinely fractional data (dec > 0 or embedded `.`).
This also lifts the raw RDD baseline in our bench (6.8ms → 6.2ms)
because FieldGet hits this same parser. Every scan path benefits,
not just the FiveSql2 hot loop.
Measured (50k rows, 3-run steady state):
Before After
No WHERE 10.0ms 9.1ms
Numeric WHERE 7.8ms 6.9ms ← now 1.11x raw
String WHERE 7.9ms (see next commit)
Raw RDD baseline 6.8ms 6.2ms ← also faster
Validation:
- hbrdd/dbf tests PASS (including integer/float field roundtrips)
- FiveSql2 43/43
- Harbour compat 51/51
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
415 lines
9.9 KiB
Go
415 lines
9.9 KiB
Go
// Copyright (c) 2026 Charles KWON OhJun (charleskwonohjun@gmail.com)
|
|
// All rights reserved.
|
|
|
|
// DBF field type conversion: raw bytes ↔ Five Value.
|
|
// Each field type (C, N, L, D, M, I, B, @, etc.) has exact byte format.
|
|
//
|
|
// Reference: /mnt/d/harbour-core/src/rdd/dbf1.c (getValue/putValue)
|
|
// docs/dbf-engine-spec.md Section 3
|
|
package dbf
|
|
|
|
import (
|
|
"encoding/binary"
|
|
"five/hbrt"
|
|
"fmt"
|
|
"math"
|
|
"strconv"
|
|
"strings"
|
|
)
|
|
|
|
// GetFieldValue converts raw record bytes to a Five Value.
|
|
// Harbour: hb_dbfGetValue in dbf1.c
|
|
func GetFieldValue(recBuf []byte, offset uint16, field *FieldDesc) hbrt.Value {
|
|
raw := recBuf[offset : offset+uint16(field.Len)]
|
|
|
|
switch field.Type {
|
|
case 'C', 'c': // Character
|
|
return hbrt.MakeString(string(raw))
|
|
|
|
case 'N', 'n': // Numeric (ASCII)
|
|
return parseNumericField(raw, field.Dec)
|
|
|
|
case 'L', 'l': // Logical
|
|
return parseLogicalField(raw[0])
|
|
|
|
case 'D', 'd': // Date
|
|
return parseDateField(raw, field.Len)
|
|
|
|
case 'M', 'm': // Memo (block reference)
|
|
return parseMemoRef(raw, field.Len)
|
|
|
|
case 'I', 'i': // Integer (binary LE)
|
|
return parseIntegerField(raw, field.Len)
|
|
|
|
case 'B', 'b': // Double (IEEE 754 LE)
|
|
if field.Len == 8 {
|
|
bits := binary.LittleEndian.Uint64(raw)
|
|
return hbrt.MakeDoubleAuto(math.Float64frombits(bits))
|
|
}
|
|
return hbrt.MakeNil()
|
|
|
|
case '@': // Timestamp (4 bytes date + 4 bytes time, LE)
|
|
if field.Len >= 8 {
|
|
julian := int64(binary.LittleEndian.Uint32(raw[0:4]))
|
|
timeMs := int32(binary.LittleEndian.Uint32(raw[4:8]))
|
|
return hbrt.MakeTimestamp(julian, timeMs)
|
|
}
|
|
return hbrt.MakeNil()
|
|
|
|
case '+': // Autoincrement (binary LE integer)
|
|
return parseIntegerField(raw, field.Len)
|
|
|
|
case '=': // Modtime (same as Timestamp)
|
|
if field.Len >= 8 {
|
|
julian := int64(binary.LittleEndian.Uint32(raw[0:4]))
|
|
timeMs := int32(binary.LittleEndian.Uint32(raw[4:8]))
|
|
return hbrt.MakeTimestamp(julian, timeMs)
|
|
}
|
|
return hbrt.MakeNil()
|
|
|
|
case '^': // RowVersion (uint64 LE)
|
|
if field.Len == 8 {
|
|
return hbrt.MakeLong(int64(binary.LittleEndian.Uint64(raw)))
|
|
}
|
|
return hbrt.MakeNil()
|
|
|
|
case 'Y', 'y': // Currency (int64 LE, implicit 4 decimal places)
|
|
if field.Len == 8 {
|
|
cents := int64(binary.LittleEndian.Uint64(raw))
|
|
return hbrt.MakeDouble(float64(cents)/10000.0, 20, 4)
|
|
}
|
|
return hbrt.MakeNil()
|
|
|
|
case 'T', 't': // Timestamp (Harbour extension)
|
|
if field.Len >= 8 {
|
|
julian := int64(binary.LittleEndian.Uint32(raw[0:4]))
|
|
timeMs := int32(binary.LittleEndian.Uint32(raw[4:8]))
|
|
return hbrt.MakeTimestamp(julian, timeMs)
|
|
}
|
|
if field.Len == 4 {
|
|
// Time only
|
|
timeMs := int32(binary.LittleEndian.Uint32(raw[0:4]))
|
|
return hbrt.MakeTimestamp(0, timeMs)
|
|
}
|
|
return hbrt.MakeNil()
|
|
|
|
default:
|
|
// Unknown type: return as string
|
|
return hbrt.MakeString(string(raw))
|
|
}
|
|
}
|
|
|
|
// PutFieldValue converts a Five Value to raw record bytes.
|
|
// Harbour: hb_dbfPutValue in dbf1.c
|
|
func PutFieldValue(recBuf []byte, offset uint16, field *FieldDesc, val hbrt.Value) {
|
|
raw := recBuf[offset : offset+uint16(field.Len)]
|
|
|
|
switch field.Type {
|
|
case 'C', 'c': // Character
|
|
s := val.AsString()
|
|
copy(raw, s)
|
|
// Pad with spaces
|
|
if len(s) < int(field.Len) {
|
|
for i := len(s); i < int(field.Len); i++ {
|
|
raw[i] = ' '
|
|
}
|
|
}
|
|
|
|
case 'N', 'n': // Numeric (ASCII, right-aligned, space-padded)
|
|
formatNumericField(raw, field.Len, field.Dec, val)
|
|
|
|
case 'L', 'l': // Logical
|
|
if val.IsNil() {
|
|
raw[0] = ' '
|
|
} else if val.AsBool() {
|
|
raw[0] = 'T'
|
|
} else {
|
|
raw[0] = 'F'
|
|
}
|
|
|
|
case 'D', 'd': // Date
|
|
putDateField(raw, field.Len, val)
|
|
|
|
case 'M', 'm': // Memo (block reference)
|
|
// Memo writes handled by MemoHandler
|
|
// Here just store block number
|
|
if val.IsNumInt() {
|
|
putMemoRef(raw, field.Len, uint32(val.AsNumInt()))
|
|
}
|
|
|
|
case 'I', 'i', '+': // Integer / Autoincrement
|
|
putIntegerField(raw, field.Len, val)
|
|
|
|
case 'B', 'b': // Double (IEEE 754 LE)
|
|
if field.Len == 8 {
|
|
binary.LittleEndian.PutUint64(raw, math.Float64bits(val.AsNumDouble()))
|
|
}
|
|
|
|
case '@', '=', 'T', 't': // Timestamp / Modtime
|
|
if field.Len >= 8 {
|
|
binary.LittleEndian.PutUint32(raw[0:4], uint32(val.AsJulian()))
|
|
binary.LittleEndian.PutUint32(raw[4:8], uint32(val.AsTimeMs()))
|
|
}
|
|
|
|
case 'Y', 'y': // Currency
|
|
if field.Len == 8 {
|
|
cents := int64(val.AsNumDouble() * 10000.0)
|
|
binary.LittleEndian.PutUint64(raw, uint64(cents))
|
|
}
|
|
|
|
case '^': // RowVersion
|
|
if field.Len == 8 {
|
|
binary.LittleEndian.PutUint64(raw, uint64(val.AsLong()))
|
|
}
|
|
|
|
default:
|
|
// Unknown: write as string
|
|
s := val.AsString()
|
|
copy(raw, s)
|
|
}
|
|
}
|
|
|
|
// --- Internal parsers ---
|
|
|
|
func parseNumericField(raw []byte, dec byte) hbrt.Value {
|
|
// Byte-level fast path — avoids `string(raw)` + TrimSpace + ParseInt
|
|
// allocations on the hot scan path. Numeric DBF fields are ASCII,
|
|
// right-aligned, space-padded, optional leading sign, optional `.`
|
|
// for decimals. A full 50k-row scan can hit this fn 100 k+ times,
|
|
// so every allocation matters.
|
|
//
|
|
// Algorithm:
|
|
// 1. Walk past leading spaces.
|
|
// 2. Detect sign.
|
|
// 3. Accumulate int64 digit-by-digit.
|
|
// 4. If we hit `.` or the field has dec > 0, bail to float parser
|
|
// (that path is rare on integer-typed DBF fields like IDs /
|
|
// counters, which dominate WHERE predicates).
|
|
// 5. Walk past trailing spaces.
|
|
//
|
|
// All operations are byte comparisons on the raw record buffer —
|
|
// no heap allocation unless the field is genuinely fractional.
|
|
|
|
start := 0
|
|
end := len(raw)
|
|
for start < end && raw[start] == ' ' {
|
|
start++
|
|
}
|
|
for end > start && raw[end-1] == ' ' {
|
|
end--
|
|
}
|
|
if start == end {
|
|
return hbrt.MakeInt(0)
|
|
}
|
|
|
|
if dec == 0 {
|
|
// Fast integer path
|
|
i := start
|
|
neg := false
|
|
if raw[i] == '-' {
|
|
neg = true
|
|
i++
|
|
} else if raw[i] == '+' {
|
|
i++
|
|
}
|
|
var n int64
|
|
ok := i < end
|
|
for ; i < end; i++ {
|
|
c := raw[i]
|
|
if c == '.' {
|
|
ok = false
|
|
break
|
|
}
|
|
if c < '0' || c > '9' {
|
|
ok = false
|
|
break
|
|
}
|
|
n = n*10 + int64(c-'0')
|
|
}
|
|
if ok {
|
|
if neg {
|
|
n = -n
|
|
}
|
|
return hbrt.MakeNumInt(n)
|
|
}
|
|
// Fall through: has a `.` or unexpected char → use float path
|
|
}
|
|
|
|
// Decimal/float path — allocate once for strconv
|
|
f, err := strconv.ParseFloat(string(raw[start:end]), 64)
|
|
if err == nil {
|
|
return hbrt.MakeDouble(f, uint16(len(raw)), uint16(dec))
|
|
}
|
|
return hbrt.MakeInt(0)
|
|
}
|
|
|
|
func parseLogicalField(b byte) hbrt.Value {
|
|
switch b {
|
|
case 'T', 't', 'Y', 'y':
|
|
return hbrt.MakeBool(true)
|
|
case 'F', 'f', 'N', 'n':
|
|
return hbrt.MakeBool(false)
|
|
default:
|
|
return hbrt.MakeNil() // space = uninitialized
|
|
}
|
|
}
|
|
|
|
func parseDateField(raw []byte, fieldLen byte) hbrt.Value {
|
|
if fieldLen == 8 {
|
|
// Standard: YYYYMMDD ASCII
|
|
s := string(raw)
|
|
if strings.TrimSpace(s) == "" {
|
|
return hbrt.MakeDate(0) // empty date
|
|
}
|
|
y := parseInt(s[0:4])
|
|
m := parseInt(s[4:6])
|
|
d := parseInt(s[6:8])
|
|
if y > 0 {
|
|
return hbrt.MakeDate(dateToJulian(y, m, d))
|
|
}
|
|
return hbrt.MakeDate(0)
|
|
}
|
|
if fieldLen == 3 {
|
|
// Short: LE uint24
|
|
julian := int64(raw[0]) | int64(raw[1])<<8 | int64(raw[2])<<16
|
|
return hbrt.MakeDate(julian)
|
|
}
|
|
if fieldLen == 4 {
|
|
// VFP: LE uint32 Julian
|
|
return hbrt.MakeDate(int64(binary.LittleEndian.Uint32(raw)))
|
|
}
|
|
return hbrt.MakeDate(0)
|
|
}
|
|
|
|
func parseMemoRef(raw []byte, fieldLen byte) hbrt.Value {
|
|
if fieldLen == 4 {
|
|
blockNo := binary.LittleEndian.Uint32(raw)
|
|
return hbrt.MakeLong(int64(blockNo))
|
|
}
|
|
if fieldLen == 10 {
|
|
s := strings.TrimSpace(string(raw))
|
|
if s == "" {
|
|
return hbrt.MakeLong(0)
|
|
}
|
|
n, _ := strconv.ParseInt(s, 10, 64)
|
|
return hbrt.MakeLong(n)
|
|
}
|
|
return hbrt.MakeLong(0)
|
|
}
|
|
|
|
func parseIntegerField(raw []byte, fieldLen byte) hbrt.Value {
|
|
switch fieldLen {
|
|
case 1:
|
|
return hbrt.MakeInt(int(int8(raw[0])))
|
|
case 2:
|
|
return hbrt.MakeInt(int(int16(binary.LittleEndian.Uint16(raw))))
|
|
case 3:
|
|
v := int32(raw[0]) | int32(raw[1])<<8 | int32(raw[2])<<16
|
|
if v&0x800000 != 0 {
|
|
v |= ^0xFFFFFF // sign extend
|
|
}
|
|
return hbrt.MakeInt(int(v))
|
|
case 4:
|
|
return hbrt.MakeInt(int(int32(binary.LittleEndian.Uint32(raw))))
|
|
case 8:
|
|
return hbrt.MakeLong(int64(binary.LittleEndian.Uint64(raw)))
|
|
default:
|
|
return hbrt.MakeInt(0)
|
|
}
|
|
}
|
|
|
|
// --- Internal formatters ---
|
|
|
|
func formatNumericField(raw []byte, fieldLen, dec byte, val hbrt.Value) {
|
|
d := val.AsNumDouble()
|
|
format := "%" + strconv.Itoa(int(fieldLen)) + "." + strconv.Itoa(int(dec)) + "f"
|
|
s := []byte(fmt.Sprintf(format, d))
|
|
|
|
// If too wide, fill with asterisks (Harbour behavior)
|
|
if len(s) > int(fieldLen) {
|
|
for i := range raw {
|
|
raw[i] = '*'
|
|
}
|
|
return
|
|
}
|
|
|
|
// Right-align, space-pad left
|
|
copy(raw, s)
|
|
}
|
|
|
|
func putDateField(raw []byte, fieldLen byte, val hbrt.Value) {
|
|
if fieldLen == 8 {
|
|
if !val.IsDateTime() || val.AsJulian() == 0 {
|
|
copy(raw, " ")
|
|
return
|
|
}
|
|
y, m, d := julianToDate(val.AsJulian())
|
|
s := fmt.Sprintf("%04d%02d%02d", y, m, d)
|
|
copy(raw, s)
|
|
} else if fieldLen == 4 {
|
|
binary.LittleEndian.PutUint32(raw, uint32(val.AsJulian()))
|
|
}
|
|
}
|
|
|
|
func putMemoRef(raw []byte, fieldLen byte, blockNo uint32) {
|
|
if fieldLen == 4 {
|
|
binary.LittleEndian.PutUint32(raw, blockNo)
|
|
} else if fieldLen == 10 {
|
|
s := fmt.Sprintf("%10d", blockNo)
|
|
copy(raw, s)
|
|
}
|
|
}
|
|
|
|
func putIntegerField(raw []byte, fieldLen byte, val hbrt.Value) {
|
|
n := val.AsNumInt()
|
|
switch fieldLen {
|
|
case 1:
|
|
raw[0] = byte(int8(n))
|
|
case 2:
|
|
binary.LittleEndian.PutUint16(raw, uint16(int16(n)))
|
|
case 4:
|
|
binary.LittleEndian.PutUint32(raw, uint32(int32(n)))
|
|
case 8:
|
|
binary.LittleEndian.PutUint64(raw, uint64(n))
|
|
}
|
|
}
|
|
|
|
// --- Julian date helpers ---
|
|
|
|
func dateToJulian(y, m, d int) int64 {
|
|
if m <= 2 {
|
|
y--
|
|
m += 12
|
|
}
|
|
a := y / 100
|
|
b := 2 - a + a/4
|
|
return int64(365.25*float64(y+4716)) + int64(30.6001*float64(m+1)) + int64(d+b) - 1524
|
|
}
|
|
|
|
func julianToDate(julian int64) (y, m, d int) {
|
|
if julian <= 0 {
|
|
return 0, 0, 0
|
|
}
|
|
l := julian + 68569
|
|
n := 4 * l / 146097
|
|
l = l - (146097*n+3)/4
|
|
i := 4000 * (l + 1) / 1461001
|
|
l = l - 1461*i/4 + 31
|
|
j := 80 * l / 2447
|
|
d = int(l - 2447*j/80)
|
|
l = j / 11
|
|
m = int(j + 2 - 12*l)
|
|
y = int(100*(n-49) + i + l)
|
|
return
|
|
}
|
|
|
|
func parseInt(s string) int {
|
|
n := 0
|
|
for _, c := range s {
|
|
if c >= '0' && c <= '9' {
|
|
n = n*10 + int(c-'0')
|
|
}
|
|
}
|
|
return n
|
|
}
|