perf(dbf): byte-level numeric field parser — zero alloc for int fields
parseNumericField was allocating on every call — `string(raw)` to
convert the record-buffer slice to a string, plus the implicit
allocation from TrimSpace's return value. For a 50k-row scan reading
two numeric fields, that's 100k+ small string allocations per scan,
all of which promptly became garbage.
Rewritten to walk the raw byte slice directly:
- Find the trimmed range by byte indexing (no alloc).
- Parse integer-typed fields (dec == 0) digit-by-digit into int64.
- Only fall back to strconv.ParseFloat + string allocation for
genuinely fractional data (dec > 0 or embedded `.`).
This also lifts the raw RDD baseline in our bench (6.8ms → 6.2ms)
because FieldGet hits this same parser. Every scan path benefits,
not just the FiveSql2 hot loop.
Measured (50k rows, 3-run steady state):
Before After
No WHERE 10.0ms 9.1ms
Numeric WHERE 7.8ms 6.9ms ← now 1.11x raw
String WHERE 7.9ms (see next commit)
Raw RDD baseline 6.8ms 6.2ms ← also faster
Validation:
- hbrdd/dbf tests PASS (including integer/float field roundtrips)
- FiveSql2 43/43
- Harbour compat 51/51
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -172,19 +172,71 @@ func PutFieldValue(recBuf []byte, offset uint16, field *FieldDesc, val hbrt.Valu
|
||||
// --- Internal parsers ---
|
||||
|
||||
func parseNumericField(raw []byte, dec byte) hbrt.Value {
|
||||
s := strings.TrimSpace(string(raw))
|
||||
if s == "" {
|
||||
// Byte-level fast path — avoids `string(raw)` + TrimSpace + ParseInt
|
||||
// allocations on the hot scan path. Numeric DBF fields are ASCII,
|
||||
// right-aligned, space-padded, optional leading sign, optional `.`
|
||||
// for decimals. A full 50k-row scan can hit this fn 100 k+ times,
|
||||
// so every allocation matters.
|
||||
//
|
||||
// Algorithm:
|
||||
// 1. Walk past leading spaces.
|
||||
// 2. Detect sign.
|
||||
// 3. Accumulate int64 digit-by-digit.
|
||||
// 4. If we hit `.` or the field has dec > 0, bail to float parser
|
||||
// (that path is rare on integer-typed DBF fields like IDs /
|
||||
// counters, which dominate WHERE predicates).
|
||||
// 5. Walk past trailing spaces.
|
||||
//
|
||||
// All operations are byte comparisons on the raw record buffer —
|
||||
// no heap allocation unless the field is genuinely fractional.
|
||||
|
||||
start := 0
|
||||
end := len(raw)
|
||||
for start < end && raw[start] == ' ' {
|
||||
start++
|
||||
}
|
||||
for end > start && raw[end-1] == ' ' {
|
||||
end--
|
||||
}
|
||||
if start == end {
|
||||
return hbrt.MakeInt(0)
|
||||
}
|
||||
|
||||
if dec == 0 && !strings.Contains(s, ".") {
|
||||
n, err := strconv.ParseInt(s, 10, 64)
|
||||
if err == nil {
|
||||
if dec == 0 {
|
||||
// Fast integer path
|
||||
i := start
|
||||
neg := false
|
||||
if raw[i] == '-' {
|
||||
neg = true
|
||||
i++
|
||||
} else if raw[i] == '+' {
|
||||
i++
|
||||
}
|
||||
var n int64
|
||||
ok := i < end
|
||||
for ; i < end; i++ {
|
||||
c := raw[i]
|
||||
if c == '.' {
|
||||
ok = false
|
||||
break
|
||||
}
|
||||
if c < '0' || c > '9' {
|
||||
ok = false
|
||||
break
|
||||
}
|
||||
n = n*10 + int64(c-'0')
|
||||
}
|
||||
if ok {
|
||||
if neg {
|
||||
n = -n
|
||||
}
|
||||
return hbrt.MakeNumInt(n)
|
||||
}
|
||||
// Fall through: has a `.` or unexpected char → use float path
|
||||
}
|
||||
|
||||
f, err := strconv.ParseFloat(s, 64)
|
||||
// Decimal/float path — allocate once for strconv
|
||||
f, err := strconv.ParseFloat(string(raw[start:end]), 64)
|
||||
if err == nil {
|
||||
return hbrt.MakeDouble(f, uint16(len(raw)), uint16(dec))
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user