// Copyright (c) 2026 Charles KWON OhJun (charleskwonohjun@gmail.com) // All rights reserved. // DBF field type conversion: raw bytes ↔ Five Value. // Each field type (C, N, L, D, M, I, B, @, etc.) has exact byte format. // // Reference: /mnt/d/harbour-core/src/rdd/dbf1.c (getValue/putValue) // docs/dbf-engine-spec.md Section 3 package dbf import ( "encoding/binary" "five/hbrt" "fmt" "math" "strconv" "strings" ) // GetFieldValue converts raw record bytes to a Five Value. // Harbour: hb_dbfGetValue in dbf1.c func GetFieldValue(recBuf []byte, offset uint16, field *FieldDesc) hbrt.Value { return getFieldValueImpl(recBuf, offset, field, false) } // getFieldValueImpl is the zero-copy-aware variant. When stable=true the // caller guarantees the recBuf bytes won't be mutated, freed, or // unmapped for the Value's lifetime — then CHAR fields alias the // buffer and skip the `string([]byte)` copy. // // NOTE: currently unexported because naive usage (even with mmap-backed // buffers) can produce UAF when FiveSql2 closes/packs temp CTE tables // while CHAR values from earlier iterations are still referenced. The // machinery is kept for a future refcounted mmap lifetime scheme. func getFieldValueImpl(recBuf []byte, offset uint16, field *FieldDesc, stable bool) hbrt.Value { raw := recBuf[offset : offset+uint16(field.Len)] switch field.Type { case 'C', 'c': // Character if stable { return hbrt.MakeStringBytes(raw) } return hbrt.MakeString(string(raw)) case 'N', 'n': // Numeric (ASCII) return parseNumericField(raw, field.Dec) case 'L', 'l': // Logical return parseLogicalField(raw[0]) case 'D', 'd': // Date return parseDateField(raw, field.Len) case 'M', 'm': // Memo (block reference) return parseMemoRef(raw, field.Len) case 'I', 'i': // Integer (binary LE) return parseIntegerField(raw, field.Len) case 'B', 'b': // Double (IEEE 754 LE) if field.Len == 8 { bits := binary.LittleEndian.Uint64(raw) return hbrt.MakeDoubleAuto(math.Float64frombits(bits)) } return hbrt.MakeNil() case '@': // Timestamp (4 bytes date + 4 bytes time, LE) if field.Len >= 8 { julian := int64(binary.LittleEndian.Uint32(raw[0:4])) timeMs := int32(binary.LittleEndian.Uint32(raw[4:8])) return hbrt.MakeTimestamp(julian, timeMs) } return hbrt.MakeNil() case '+': // Autoincrement (binary LE integer) return parseIntegerField(raw, field.Len) case '=': // Modtime (same as Timestamp) if field.Len >= 8 { julian := int64(binary.LittleEndian.Uint32(raw[0:4])) timeMs := int32(binary.LittleEndian.Uint32(raw[4:8])) return hbrt.MakeTimestamp(julian, timeMs) } return hbrt.MakeNil() case '^': // RowVersion (uint64 LE) if field.Len == 8 { return hbrt.MakeLong(int64(binary.LittleEndian.Uint64(raw))) } return hbrt.MakeNil() case 'Y', 'y': // Currency (int64 LE, implicit 4 decimal places) if field.Len == 8 { cents := int64(binary.LittleEndian.Uint64(raw)) return hbrt.MakeDouble(float64(cents)/10000.0, 20, 4) } return hbrt.MakeNil() case 'T', 't': // Timestamp (Harbour extension) if field.Len >= 8 { julian := int64(binary.LittleEndian.Uint32(raw[0:4])) timeMs := int32(binary.LittleEndian.Uint32(raw[4:8])) return hbrt.MakeTimestamp(julian, timeMs) } if field.Len == 4 { // Time only timeMs := int32(binary.LittleEndian.Uint32(raw[0:4])) return hbrt.MakeTimestamp(0, timeMs) } return hbrt.MakeNil() default: // Unknown type: return as string return hbrt.MakeString(string(raw)) } } // PutFieldValue converts a Five Value to raw record bytes. // Harbour: hb_dbfPutValue in dbf1.c func PutFieldValue(recBuf []byte, offset uint16, field *FieldDesc, val hbrt.Value) { raw := recBuf[offset : offset+uint16(field.Len)] switch field.Type { case 'C', 'c': // Character s := val.AsString() copy(raw, s) // Pad with spaces if len(s) < int(field.Len) { for i := len(s); i < int(field.Len); i++ { raw[i] = ' ' } } case 'N', 'n': // Numeric (ASCII, right-aligned, space-padded) formatNumericField(raw, field.Len, field.Dec, val) case 'L', 'l': // Logical if val.IsNil() { raw[0] = ' ' } else if val.AsBool() { raw[0] = 'T' } else { raw[0] = 'F' } case 'D', 'd': // Date putDateField(raw, field.Len, val) case 'M', 'm': // Memo (block reference) // Memo writes handled by MemoHandler // Here just store block number if val.IsNumInt() { putMemoRef(raw, field.Len, uint32(val.AsNumInt())) } case 'I', 'i', '+': // Integer / Autoincrement putIntegerField(raw, field.Len, val) case 'B', 'b': // Double (IEEE 754 LE) if field.Len == 8 { binary.LittleEndian.PutUint64(raw, math.Float64bits(val.AsNumDouble())) } case '@', '=', 'T', 't': // Timestamp / Modtime if field.Len >= 8 { binary.LittleEndian.PutUint32(raw[0:4], uint32(val.AsJulian())) binary.LittleEndian.PutUint32(raw[4:8], uint32(val.AsTimeMs())) } case 'Y', 'y': // Currency if field.Len == 8 { cents := int64(val.AsNumDouble() * 10000.0) binary.LittleEndian.PutUint64(raw, uint64(cents)) } case '^': // RowVersion if field.Len == 8 { binary.LittleEndian.PutUint64(raw, uint64(val.AsLong())) } default: // Unknown: write as string s := val.AsString() copy(raw, s) } } // --- Internal parsers --- func parseNumericField(raw []byte, dec byte) hbrt.Value { // Byte-level fast path — avoids `string(raw)` + TrimSpace + ParseInt // allocations on the hot scan path. Numeric DBF fields are ASCII, // right-aligned, space-padded, optional leading sign, optional `.` // for decimals. A full 50k-row scan can hit this fn 100 k+ times, // so every allocation matters. // // Algorithm: // 1. Walk past leading spaces. // 2. Detect sign. // 3. Accumulate int64 digit-by-digit. // 4. If we hit `.` or the field has dec > 0, bail to float parser // (that path is rare on integer-typed DBF fields like IDs / // counters, which dominate WHERE predicates). // 5. Walk past trailing spaces. // // All operations are byte comparisons on the raw record buffer — // no heap allocation unless the field is genuinely fractional. start := 0 end := len(raw) for start < end && raw[start] == ' ' { start++ } for end > start && raw[end-1] == ' ' { end-- } if start == end { return hbrt.MakeInt(0) } if dec == 0 { // Fast integer path i := start neg := false if raw[i] == '-' { neg = true i++ } else if raw[i] == '+' { i++ } var n int64 ok := i < end for ; i < end; i++ { c := raw[i] if c == '.' { ok = false break } if c < '0' || c > '9' { ok = false break } n = n*10 + int64(c-'0') } if ok { if neg { n = -n } return hbrt.MakeNumInt(n) } // Fall through: has a `.` or unexpected char → use float path } // Byte-level float parse for N(w,d) with d > 0 — avoids the // string(raw) + strconv.ParseFloat allocation on the hot path. // Profile (bench_bulk): parseNumericField was 23% of flat CPU, // dominated by this allocation. i := start neg := false if raw[i] == '-' { neg = true i++ } else if raw[i] == '+' { i++ } var intPart int64 var sawDigit bool for ; i < end; i++ { c := raw[i] if c == '.' { break } if c < '0' || c > '9' { // Unexpected char — fall back to strconv for correctness. if f, err := strconv.ParseFloat(string(raw[start:end]), 64); err == nil { return hbrt.MakeDouble(f, uint16(len(raw)), uint16(dec)) } return hbrt.MakeInt(0) } intPart = intPart*10 + int64(c-'0') sawDigit = true } var fracPart int64 var fracLen int if i < end && raw[i] == '.' { i++ for ; i < end; i++ { c := raw[i] if c < '0' || c > '9' { if f, err := strconv.ParseFloat(string(raw[start:end]), 64); err == nil { return hbrt.MakeDouble(f, uint16(len(raw)), uint16(dec)) } return hbrt.MakeInt(0) } fracPart = fracPart*10 + int64(c-'0') fracLen++ sawDigit = true } } if !sawDigit { return hbrt.MakeDouble(0, uint16(len(raw)), uint16(dec)) } var f float64 if fracLen == 0 { f = float64(intPart) } else { f = float64(intPart) + float64(fracPart)/pow10f(fracLen) } if neg { f = -f } return hbrt.MakeDouble(f, uint16(len(raw)), uint16(dec)) } // pow10Table — precomputed 10^n for small n. DBF numeric fields rarely // exceed 10 decimal places; the table covers the common range without // calling math.Pow on the hot path. var pow10Table = [20]float64{ 1, 10, 100, 1000, 10000, 100000, 1000000, 10000000, 1e8, 1e9, 1e10, 1e11, 1e12, 1e13, 1e14, 1e15, 1e16, 1e17, 1e18, 1e19, } func pow10f(n int) float64 { if n >= 0 && n < len(pow10Table) { return pow10Table[n] } return math.Pow(10, float64(n)) } func parseLogicalField(b byte) hbrt.Value { switch b { case 'T', 't', 'Y', 'y': return hbrt.MakeBool(true) case 'F', 'f', 'N', 'n': return hbrt.MakeBool(false) default: return hbrt.MakeNil() // space = uninitialized } } func parseDateField(raw []byte, fieldLen byte) hbrt.Value { if fieldLen == 8 { // Standard: YYYYMMDD ASCII s := string(raw) if strings.TrimSpace(s) == "" { return hbrt.MakeDate(0) // empty date } y := parseInt(s[0:4]) m := parseInt(s[4:6]) d := parseInt(s[6:8]) if y > 0 { return hbrt.MakeDate(dateToJulian(y, m, d)) } return hbrt.MakeDate(0) } if fieldLen == 3 { // Short: LE uint24 julian := int64(raw[0]) | int64(raw[1])<<8 | int64(raw[2])<<16 return hbrt.MakeDate(julian) } if fieldLen == 4 { // VFP: LE uint32 Julian return hbrt.MakeDate(int64(binary.LittleEndian.Uint32(raw))) } return hbrt.MakeDate(0) } func parseMemoRef(raw []byte, fieldLen byte) hbrt.Value { if fieldLen == 4 { blockNo := binary.LittleEndian.Uint32(raw) return hbrt.MakeLong(int64(blockNo)) } if fieldLen == 10 { // Inline byte-level parse: same pattern as parseNumericField. // Avoids string(raw) + strings.TrimSpace + strconv.ParseInt // — roughly 3× faster and allocation-free. var n int64 for _, c := range raw { switch { case c == ' ': // Leading/trailing space — keep current accumulator case c >= '0' && c <= '9': n = n*10 + int64(c-'0') default: // Malformed block ref — treat as 0, same as strconv.ParseInt // would on the non-digit prefix. return hbrt.MakeLong(0) } } return hbrt.MakeLong(n) } return hbrt.MakeLong(0) } func parseIntegerField(raw []byte, fieldLen byte) hbrt.Value { switch fieldLen { case 1: return hbrt.MakeInt(int(int8(raw[0]))) case 2: return hbrt.MakeInt(int(int16(binary.LittleEndian.Uint16(raw)))) case 3: v := int32(raw[0]) | int32(raw[1])<<8 | int32(raw[2])<<16 if v&0x800000 != 0 { v |= ^0xFFFFFF // sign extend } return hbrt.MakeInt(int(v)) case 4: return hbrt.MakeInt(int(int32(binary.LittleEndian.Uint32(raw)))) case 8: return hbrt.MakeLong(int64(binary.LittleEndian.Uint64(raw))) default: return hbrt.MakeInt(0) } } // --- Internal formatters --- func formatNumericField(raw []byte, fieldLen, dec byte, val hbrt.Value) { d := val.AsNumDouble() // NaN/Inf → asterisks (Harbour: field width overflow marker) if math.IsNaN(d) || math.IsInf(d, 0) { for i := range raw { raw[i] = '*' } return } // Use strconv.AppendFloat into a stack-allocated scratch buffer. // Skips fmt.Sprintf's format-string parsing and its temporary // string allocation — 3–5× faster per write, zero heap allocs on // the hot path. 48 bytes fits any DBF numeric field (max 20 len). var scratch [48]byte s := strconv.AppendFloat(scratch[:0], d, 'f', int(dec), 64) // Overflow → asterisks, same as before. if len(s) > int(fieldLen) { for i := range raw { raw[i] = '*' } return } // Right-align, space-pad left. padLen := int(fieldLen) - len(s) for i := 0; i < padLen; i++ { raw[i] = ' ' } copy(raw[padLen:], s) } func putDateField(raw []byte, fieldLen byte, val hbrt.Value) { if fieldLen == 8 { if !val.IsDateTime() || val.AsJulian() == 0 { copy(raw, " ") return } y, m, d := julianToDate(val.AsJulian()) s := fmt.Sprintf("%04d%02d%02d", y, m, d) copy(raw, s) } else if fieldLen == 4 { binary.LittleEndian.PutUint32(raw, uint32(val.AsJulian())) } } func putMemoRef(raw []byte, fieldLen byte, blockNo uint32) { if fieldLen == 4 { binary.LittleEndian.PutUint32(raw, blockNo) } else if fieldLen == 10 { s := fmt.Sprintf("%10d", blockNo) copy(raw, s) } } func putIntegerField(raw []byte, fieldLen byte, val hbrt.Value) { n := val.AsNumInt() switch fieldLen { case 1: raw[0] = byte(int8(n)) case 2: binary.LittleEndian.PutUint16(raw, uint16(int16(n))) case 4: binary.LittleEndian.PutUint32(raw, uint32(int32(n))) case 8: binary.LittleEndian.PutUint64(raw, uint64(n)) } } // --- Julian date helpers --- func dateToJulian(y, m, d int) int64 { if m <= 2 { y-- m += 12 } a := y / 100 b := 2 - a + a/4 return int64(365.25*float64(y+4716)) + int64(30.6001*float64(m+1)) + int64(d+b) - 1524 } func julianToDate(julian int64) (y, m, d int) { if julian <= 0 { return 0, 0, 0 } l := julian + 68569 n := 4 * l / 146097 l = l - (146097*n+3)/4 i := 4000 * (l + 1) / 1461001 l = l - 1461*i/4 + 31 j := 80 * l / 2447 d = int(l - 2447*j/80) l = j / 11 m = int(j + 2 - 12*l) y = int(100*(n-49) + i + l) return } func parseInt(s string) int { n := 0 for _, c := range s { if c >= '0' && c <= '9' { n = n*10 + int(c-'0') } } return n }