From af9e965bc6af744f4a708a67f7cadaacf99efd17 Mon Sep 17 00:00:00 2001 From: CharlesKWON Date: Tue, 14 Apr 2026 14:02:42 +0900 Subject: [PATCH] =?UTF-8?q?perf(dbf):=20byte-level=20numeric=20field=20par?= =?UTF-8?q?ser=20=E2=80=94=20zero=20alloc=20for=20int=20fields?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit parseNumericField was allocating on every call — `string(raw)` to convert the record-buffer slice to a string, plus the implicit allocation from TrimSpace's return value. For a 50k-row scan reading two numeric fields, that's 100k+ small string allocations per scan, all of which promptly became garbage. Rewritten to walk the raw byte slice directly: - Find the trimmed range by byte indexing (no alloc). - Parse integer-typed fields (dec == 0) digit-by-digit into int64. - Only fall back to strconv.ParseFloat + string allocation for genuinely fractional data (dec > 0 or embedded `.`). This also lifts the raw RDD baseline in our bench (6.8ms → 6.2ms) because FieldGet hits this same parser. Every scan path benefits, not just the FiveSql2 hot loop. Measured (50k rows, 3-run steady state): Before After No WHERE 10.0ms 9.1ms Numeric WHERE 7.8ms 6.9ms ← now 1.11x raw String WHERE 7.9ms (see next commit) Raw RDD baseline 6.8ms 6.2ms ← also faster Validation: - hbrdd/dbf tests PASS (including integer/float field roundtrips) - FiveSql2 43/43 - Harbour compat 51/51 Co-Authored-By: Claude Opus 4.6 (1M context) --- hbrdd/dbf/field.go | 64 +++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 58 insertions(+), 6 deletions(-) diff --git a/hbrdd/dbf/field.go b/hbrdd/dbf/field.go index 52eff42..b1b56a1 100644 --- a/hbrdd/dbf/field.go +++ b/hbrdd/dbf/field.go @@ -172,19 +172,71 @@ func PutFieldValue(recBuf []byte, offset uint16, field *FieldDesc, val hbrt.Valu // --- Internal parsers --- func parseNumericField(raw []byte, dec byte) hbrt.Value { - s := strings.TrimSpace(string(raw)) - if s == "" { + // Byte-level fast path — avoids `string(raw)` + TrimSpace + ParseInt + // allocations on the hot scan path. Numeric DBF fields are ASCII, + // right-aligned, space-padded, optional leading sign, optional `.` + // for decimals. A full 50k-row scan can hit this fn 100 k+ times, + // so every allocation matters. + // + // Algorithm: + // 1. Walk past leading spaces. + // 2. Detect sign. + // 3. Accumulate int64 digit-by-digit. + // 4. If we hit `.` or the field has dec > 0, bail to float parser + // (that path is rare on integer-typed DBF fields like IDs / + // counters, which dominate WHERE predicates). + // 5. Walk past trailing spaces. + // + // All operations are byte comparisons on the raw record buffer — + // no heap allocation unless the field is genuinely fractional. + + start := 0 + end := len(raw) + for start < end && raw[start] == ' ' { + start++ + } + for end > start && raw[end-1] == ' ' { + end-- + } + if start == end { return hbrt.MakeInt(0) } - if dec == 0 && !strings.Contains(s, ".") { - n, err := strconv.ParseInt(s, 10, 64) - if err == nil { + if dec == 0 { + // Fast integer path + i := start + neg := false + if raw[i] == '-' { + neg = true + i++ + } else if raw[i] == '+' { + i++ + } + var n int64 + ok := i < end + for ; i < end; i++ { + c := raw[i] + if c == '.' { + ok = false + break + } + if c < '0' || c > '9' { + ok = false + break + } + n = n*10 + int64(c-'0') + } + if ok { + if neg { + n = -n + } return hbrt.MakeNumInt(n) } + // Fall through: has a `.` or unexpected char → use float path } - f, err := strconv.ParseFloat(s, 64) + // Decimal/float path — allocate once for strconv + f, err := strconv.ParseFloat(string(raw[start:end]), 64) if err == nil { return hbrt.MakeDouble(f, uint16(len(raw)), uint16(dec)) }