From af9e965bc6af744f4a708a67f7cadaacf99efd17 Mon Sep 17 00:00:00 2001
From: CharlesKWON <charleskwonohjun@gmail.com>
Date: Tue, 14 Apr 2026 14:02:42 +0900
Subject: [PATCH] =?UTF-8?q?perf(dbf):=20byte-level=20numeric=20field=20par?=
 =?UTF-8?q?ser=20=E2=80=94=20zero=20alloc=20for=20int=20fields?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

parseNumericField was allocating on every call — `string(raw)` to
convert the record-buffer slice to a string, plus the implicit
allocation from TrimSpace's return value. For a 50k-row scan reading
two numeric fields, that's 100k+ small string allocations per scan,
all of which promptly became garbage.

Rewritten to walk the raw byte slice directly:
  - Find the trimmed range by byte indexing (no alloc).
  - Parse integer-typed fields (dec == 0) digit-by-digit into int64.
  - Only fall back to strconv.ParseFloat + string allocation for
    genuinely fractional data (dec > 0 or embedded `.`).

This also lifts the raw RDD baseline in our bench (6.8ms → 6.2ms)
because FieldGet hits this same parser. Every scan path benefits,
not just the FiveSql2 hot loop.

Measured (50k rows, 3-run steady state):

                       Before    After
  No WHERE              10.0ms   9.1ms
  Numeric WHERE          7.8ms   6.9ms   ← now 1.11x raw
  String WHERE           7.9ms   (see next commit)
  Raw RDD baseline       6.8ms   6.2ms   ← also faster

Validation:
  - hbrdd/dbf tests PASS (including integer/float field roundtrips)
  - FiveSql2 43/43
  - Harbour compat 51/51

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 hbrdd/dbf/field.go | 64 +++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 58 insertions(+), 6 deletions(-)

diff --git a/hbrdd/dbf/field.go b/hbrdd/dbf/field.go
index 52eff42..b1b56a1 100644
--- a/hbrdd/dbf/field.go
+++ b/hbrdd/dbf/field.go
@@ -172,19 +172,71 @@ func PutFieldValue(recBuf []byte, offset uint16, field *FieldDesc, val hbrt.Valu
 // --- Internal parsers ---
 
 func parseNumericField(raw []byte, dec byte) hbrt.Value {
-	s := strings.TrimSpace(string(raw))
-	if s == "" {
+	// Byte-level fast path — avoids `string(raw)` + TrimSpace + ParseInt
+	// allocations on the hot scan path. Numeric DBF fields are ASCII,
+	// right-aligned, space-padded, optional leading sign, optional `.`
+	// for decimals. A full 50k-row scan can hit this fn 100 k+ times,
+	// so every allocation matters.
+	//
+	// Algorithm:
+	//   1. Walk past leading spaces.
+	//   2. Detect sign.
+	//   3. Accumulate int64 digit-by-digit.
+	//   4. If we hit `.` or the field has dec > 0, bail to float parser
+	//      (that path is rare on integer-typed DBF fields like IDs /
+	//      counters, which dominate WHERE predicates).
+	//   5. Walk past trailing spaces.
+	//
+	// All operations are byte comparisons on the raw record buffer —
+	// no heap allocation unless the field is genuinely fractional.
+
+	start := 0
+	end := len(raw)
+	for start < end && raw[start] == ' ' {
+		start++
+	}
+	for end > start && raw[end-1] == ' ' {
+		end--
+	}
+	if start == end {
 		return hbrt.MakeInt(0)
 	}
 
-	if dec == 0 && !strings.Contains(s, ".") {
-		n, err := strconv.ParseInt(s, 10, 64)
-		if err == nil {
+	if dec == 0 {
+		// Fast integer path
+		i := start
+		neg := false
+		if raw[i] == '-' {
+			neg = true
+			i++
+		} else if raw[i] == '+' {
+			i++
+		}
+		var n int64
+		ok := i < end
+		for ; i < end; i++ {
+			c := raw[i]
+			if c == '.' {
+				ok = false
+				break
+			}
+			if c < '0' || c > '9' {
+				ok = false
+				break
+			}
+			n = n*10 + int64(c-'0')
+		}
+		if ok {
+			if neg {
+				n = -n
+			}
 			return hbrt.MakeNumInt(n)
 		}
+		// Fall through: has a `.` or unexpected char → use float path
 	}
 
-	f, err := strconv.ParseFloat(s, 64)
+	// Decimal/float path — allocate once for strconv
+	f, err := strconv.ParseFloat(string(raw[start:end]), 64)
 	if err == nil {
 		return hbrt.MakeDouble(f, uint16(len(raw)), uint16(dec))
 	}