From d7a81af7db1cc2c88794957e8831a81f0a6ce596 Mon Sep 17 00:00:00 2001 From: CharlesKWON Date: Fri, 22 May 2026 10:02:15 +0900 Subject: [PATCH] feat(pgserver): binary-format param decoding (Phase 4.1) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit pgx defaults to binary wire format for INT2/INT4/INT8/FLOAT4/FLOAT8/ BOOL/NUMERIC/DATE/TIMESTAMP/TIMESTAMPTZ — Go's most-used PG driver ships nearly every typed parameter as binary unless explicitly told to use text mode. The Phase 3 implementation only decoded INT4/INT8/ BOOL, so any pgx call with a decimal price, a timestamp, or a date was silently mis-quoted into the SQL stream. Decoders now cover the seven additional OIDs. The interesting one is NUMERIC: PG's wire format is base-10000 digit groups plus a separate displayed-scale, so the decoder rebuilds the decimal string from weight+sign+ndigits+digits[] without going through float (which would lose precision for NUMERIC(38,*) values). Pinned by vectors covering zero / positive / negative / fractional-only / NaN / multi-group integer + fraction cases. DATE / TIMESTAMP decoders assume integer_datetimes=on (which the server advertises in ParameterStatus); the 8-byte microsecond delta from the PG epoch (2000-01-01 UTC) is converted via Go's time.Time machinery and re-emitted as a quoted SQL literal. Text-format path also broadened: FLOAT4/FLOAT8/INT2 now transit unquoted alongside INT4/INT8/BOOL/NUMERIC; the regression would have been clients sending text-format floats getting them rewritten as '1.5' (string literal) instead of 1.5 (numeric). Verified: all 6 mandatory gates green (go test, SQL 43/43, compat 56/56, std.ch 17/17, FRB 7/7, pgserver 11/11). Five new decoder tests pin each wire format against handcrafted PG payloads. Co-Authored-By: Claude Opus 4.7 (1M context) --- hbrtl/pgserver/extended.go | 165 ++++++++++++++++++++++++++++++- hbrtl/pgserver/pgserver_test.go | 167 ++++++++++++++++++++++++++++++++ hbrtl/pgserver/typemap.go | 15 ++- 3 files changed, 338 insertions(+), 9 deletions(-) diff --git a/hbrtl/pgserver/extended.go b/hbrtl/pgserver/extended.go index 3723239..d31f326 100644 --- a/hbrtl/pgserver/extended.go +++ b/hbrtl/pgserver/extended.go @@ -30,8 +30,10 @@ package pgserver import ( "encoding/binary" "fmt" + "math" "strconv" "strings" + "time" "github.com/jackc/pgx/v5/pgproto3" @@ -451,18 +453,25 @@ func paramToLiteral(raw []byte, oid uint32, format int16) (string, error) { return "NULL", nil } if format == 0 { - // Text format — quote per type. For numerics and bools we - // don't quote; for everything else we single-quote with - // inline-escape. + // Text format — quote per type. Numerics + bools transit + // unquoted; everything else (including DATE / TIMESTAMP in + // text form) gets single-quoted with embedded-quote escape. switch oid { - case oidInt4, oidInt8, oidBool, oidNumeric: + case oidInt2, oidInt4, oidInt8, oidBool, oidNumeric, oidFloat4, oidFloat8: return string(raw), nil default: return "'" + strings.ReplaceAll(string(raw), "'", "''") + "'", nil } } - // Binary format — decode the OIDs pgx uses by default. + // Binary format. pgx defaults to binary for INT*, FLOAT*, BOOL, + // NUMERIC, DATE, TIMESTAMP, TIMESTAMPTZ — decode each into a + // FiveSql2-shaped literal that the engine's lexer can re-parse. switch oid { + case oidInt2: + if len(raw) != 2 { + return "", fmt.Errorf("int2 param: want 2 bytes, got %d", len(raw)) + } + return strconv.FormatInt(int64(int16(binary.BigEndian.Uint16(raw))), 10), nil case oidInt4: if len(raw) != 4 { return "", fmt.Errorf("int4 param: want 4 bytes, got %d", len(raw)) @@ -473,6 +482,18 @@ func paramToLiteral(raw []byte, oid uint32, format int16) (string, error) { return "", fmt.Errorf("int8 param: want 8 bytes, got %d", len(raw)) } return strconv.FormatInt(int64(binary.BigEndian.Uint64(raw)), 10), nil + case oidFloat4: + if len(raw) != 4 { + return "", fmt.Errorf("float4 param: want 4 bytes, got %d", len(raw)) + } + f := math.Float32frombits(binary.BigEndian.Uint32(raw)) + return strconv.FormatFloat(float64(f), 'g', -1, 32), nil + case oidFloat8: + if len(raw) != 8 { + return "", fmt.Errorf("float8 param: want 8 bytes, got %d", len(raw)) + } + f := math.Float64frombits(binary.BigEndian.Uint64(raw)) + return strconv.FormatFloat(f, 'g', -1, 64), nil case oidBool: if len(raw) != 1 { return "", fmt.Errorf("bool param: want 1 byte, got %d", len(raw)) @@ -481,6 +502,24 @@ func paramToLiteral(raw []byte, oid uint32, format int16) (string, error) { return "FALSE", nil } return "TRUE", nil + case oidNumeric: + s, err := decodeBinaryNumeric(raw) + if err != nil { + return "", err + } + return s, nil + case oidDate: + s, err := decodeBinaryDate(raw) + if err != nil { + return "", err + } + return "'" + s + "'", nil + case oidTimestamp, oidTimestamptz: + s, err := decodeBinaryTimestamp(raw) + if err != nil { + return "", err + } + return "'" + s + "'", nil default: // Unknown binary OID — fall back to a quoted hex literal. // FiveSql2 won't accept this directly, but the resulting @@ -488,3 +527,119 @@ func paramToLiteral(raw []byte, oid uint32, format int16) (string, error) { return "'\\x" + fmt.Sprintf("%x", raw) + "'", nil } } + +// decodeBinaryNumeric converts PostgreSQL's binary NUMERIC wire +// format (RFC-independent — see PG source utils/adt/numeric.c +// numeric_send / numeric_recv) to a plain decimal string. The +// format is: +// +// int16 ndigits number of base-10000 "digits" +// int16 weight weight of the first digit, in base-10000 units +// uint16 sign 0x0000 positive, 0x4000 negative, 0xC000 NaN +// uint16 dscale displayed scale (decimal places to show) +// int16 digits[ndigits] each in 0..9999 +// +// The numeric value equals sign × Σ d[i] × 10000^(weight − i). +// +// Output is a FiveSql2-parseable decimal literal — unquoted, no +// scientific notation, with exactly `dscale` digits after the +// decimal point so round-trip width is preserved. +func decodeBinaryNumeric(raw []byte) (string, error) { + if len(raw) < 8 { + return "", fmt.Errorf("numeric param: header too short (%d bytes)", len(raw)) + } + ndigits := int16(binary.BigEndian.Uint16(raw[0:2])) + weight := int16(binary.BigEndian.Uint16(raw[2:4])) + sign := binary.BigEndian.Uint16(raw[4:6]) + dscale := int16(binary.BigEndian.Uint16(raw[6:8])) + if int(ndigits)*2+8 != len(raw) { + return "", fmt.Errorf("numeric param: digit count mismatch (ndigits=%d, body=%d)", ndigits, len(raw)-8) + } + if sign == 0xC000 { + return "NaN", nil + } + digs := make([]uint16, ndigits) + for i := 0; i < int(ndigits); i++ { + digs[i] = binary.BigEndian.Uint16(raw[8+i*2 : 10+i*2]) + } + + var sb strings.Builder + if sign == 0x4000 { + sb.WriteByte('-') + } + + // Integer part: weight+1 base-10000 digits. If weight is + // negative the integer part is just "0". + intDigits := int(weight) + 1 + if intDigits <= 0 { + sb.WriteByte('0') + } else { + for i := 0; i < intDigits; i++ { + var d uint16 + if i < int(ndigits) { + d = digs[i] + } + if i == 0 { + fmt.Fprintf(&sb, "%d", d) + } else { + fmt.Fprintf(&sb, "%04d", d) + } + } + } + + if dscale > 0 { + sb.WriteByte('.') + // Build the fractional digit string. When weight < -1, the + // first array digit is already several base-10000 positions + // past the decimal point — pad with "0000" groups for those + // missing leading-zero positions. + var frac strings.Builder + leadingZeroGroups := 0 + if intDigits < 0 { + leadingZeroGroups = -intDigits + } + for i := 0; i < leadingZeroGroups; i++ { + frac.WriteString("0000") + } + fracStart := intDigits + if fracStart < 0 { + fracStart = 0 + } + for i := fracStart; i < int(ndigits); i++ { + fmt.Fprintf(&frac, "%04d", digs[i]) + } + s := frac.String() + if len(s) >= int(dscale) { + sb.WriteString(s[:dscale]) + } else { + sb.WriteString(s) + sb.WriteString(strings.Repeat("0", int(dscale)-len(s))) + } + } + return sb.String(), nil +} + +// decodeBinaryDate converts a PG binary DATE (4-byte signed days +// since pgEpoch = 2000-01-01) to "YYYY-MM-DD". +func decodeBinaryDate(raw []byte) (string, error) { + if len(raw) != 4 { + return "", fmt.Errorf("date param: want 4 bytes, got %d", len(raw)) + } + days := int32(binary.BigEndian.Uint32(raw)) + t := pgEpoch.AddDate(0, 0, int(days)) + return t.Format("2006-01-02"), nil +} + +// decodeBinaryTimestamp converts a PG binary TIMESTAMP / TIMESTAMPTZ +// (8-byte signed microseconds since pgEpoch = 2000-01-01) to +// "YYYY-MM-DD HH:MM:SS.ffffff". Encoding assumes integer_datetimes +// = on; we advertise that in ParameterStatus on connect so clients +// won't send the floating-point variant. +func decodeBinaryTimestamp(raw []byte) (string, error) { + if len(raw) != 8 { + return "", fmt.Errorf("timestamp param: want 8 bytes, got %d", len(raw)) + } + us := int64(binary.BigEndian.Uint64(raw)) + t := pgEpoch.Add(time.Duration(us) * time.Microsecond) + return t.Format("2006-01-02 15:04:05.000000"), nil +} diff --git a/hbrtl/pgserver/pgserver_test.go b/hbrtl/pgserver/pgserver_test.go index 0150677..06c6ba2 100644 --- a/hbrtl/pgserver/pgserver_test.go +++ b/hbrtl/pgserver/pgserver_test.go @@ -6,9 +6,11 @@ package pgserver import ( "bytes" "encoding/base64" + "encoding/binary" "strconv" "strings" "testing" + "time" "five/hbrt" ) @@ -188,6 +190,171 @@ func TestCommandTagFor(t *testing.T) { _ = strconv.Itoa // keep import; will be used in Phase 3 with row counts } +// TestParamToLiteral_BinaryInts pins the integer binary decoders +// against handcrafted PG wire payloads. Every pgx call with an int +// arg flows through these — if any case regresses, Go clients can +// silently insert the wrong values. +func TestParamToLiteral_BinaryInts(t *testing.T) { + cases := []struct { + oid uint32 + raw []byte + want string + }{ + {oidInt2, []byte{0x00, 0x2a}, "42"}, + {oidInt2, []byte{0xff, 0xff}, "-1"}, + {oidInt4, []byte{0x00, 0x00, 0x00, 0x2a}, "42"}, + {oidInt4, []byte{0xff, 0xff, 0xff, 0xff}, "-1"}, + {oidInt8, []byte{0x00, 0, 0, 0, 0, 0, 0, 0x2a}, "42"}, + {oidInt8, []byte{0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}, "-1"}, + {oidBool, []byte{0x01}, "TRUE"}, + {oidBool, []byte{0x00}, "FALSE"}, + } + for _, c := range cases { + got, err := paramToLiteral(c.raw, c.oid, 1) + if err != nil { + t.Errorf("oid=%d raw=%x: unexpected error %v", c.oid, c.raw, err) + continue + } + if got != c.want { + t.Errorf("oid=%d raw=%x: got %q want %q", c.oid, c.raw, got, c.want) + } + } +} + +// TestParamToLiteral_BinaryFloats covers FLOAT4 + FLOAT8. We pin +// against bit patterns rather than decimal values to sidestep +// IEEE-754 print rounding noise — the test is about wire decoding, +// not formatter precision. +func TestParamToLiteral_BinaryFloats(t *testing.T) { + // 1.5 as float32 = 0x3FC00000 + got, err := paramToLiteral([]byte{0x3f, 0xc0, 0x00, 0x00}, oidFloat4, 1) + if err != nil || got != "1.5" { + t.Errorf("float4 1.5: got %q err=%v", got, err) + } + // -42.0 as float32 = 0xC2280000 + got, err = paramToLiteral([]byte{0xc2, 0x28, 0x00, 0x00}, oidFloat4, 1) + if err != nil || got != "-42" { + t.Errorf("float4 -42: got %q err=%v", got, err) + } + // 3.14 as float64 = 0x40091EB851EB851F + got, err = paramToLiteral([]byte{0x40, 0x09, 0x1e, 0xb8, 0x51, 0xeb, 0x85, 0x1f}, oidFloat8, 1) + if err != nil || got != "3.14" { + t.Errorf("float8 3.14: got %q err=%v", got, err) + } +} + +// TestParamToLiteral_BinaryNumeric pins the base-10000 → decimal +// algorithm. Vectors hand-encoded from PG numeric_send output so a +// regression in the bit-layout (which is independent of the engine +// behaviour) trips immediately. +func TestParamToLiteral_BinaryNumeric(t *testing.T) { + build := func(ndig, weight int16, sign uint16, dscale int16, digs ...uint16) []byte { + buf := make([]byte, 8+2*len(digs)) + binary.BigEndian.PutUint16(buf[0:2], uint16(ndig)) + binary.BigEndian.PutUint16(buf[2:4], uint16(weight)) + binary.BigEndian.PutUint16(buf[4:6], sign) + binary.BigEndian.PutUint16(buf[6:8], uint16(dscale)) + for i, d := range digs { + binary.BigEndian.PutUint16(buf[8+2*i:10+2*i], d) + } + return buf + } + cases := []struct { + name string + raw []byte + want string + }{ + // 0 — header-only, no digits + {"zero", build(0, 0, 0x0000, 0), "0"}, + // 99.95 — ndigits=2, weight=0, dscale=2, digits=[99, 9500] + {"99.95", build(2, 0, 0x0000, 2, 99, 9500), "99.95"}, + // -1234.5 — sign=-, ndigits=2, weight=0, dscale=1, digits=[1234, 5000] + {"-1234.5", build(2, 0, 0x4000, 1, 1234, 5000), "-1234.5"}, + // 12345.67 — weight=1, digits=[1, 2345, 6700] + {"12345.67", build(3, 1, 0x0000, 2, 1, 2345, 6700), "12345.67"}, + // 0.0001 — weight=-1, digits=[1], dscale=4 + {"0.0001", build(1, -1, 0x0000, 4, 1), "0.0001"}, + // NaN — sign=0xC000 + {"NaN", build(0, 0, 0xC000, 0), "NaN"}, + } + for _, c := range cases { + got, err := paramToLiteral(c.raw, oidNumeric, 1) + if err != nil { + t.Errorf("%s: unexpected error %v", c.name, err) + continue + } + if got != c.want { + t.Errorf("%s: got %q want %q", c.name, got, c.want) + } + } +} + +// TestParamToLiteral_BinaryDateTime pins DATE + TIMESTAMP decoders. +// Vectors handcrafted from the PG epoch (2000-01-01 UTC) — DATE in +// days, TIMESTAMP in microseconds. Output must be a SQL-literal +// shape (with quotes) FiveSql2's lexer accepts. +func TestParamToLiteral_BinaryDateTime(t *testing.T) { + // DATE 2026-05-22 — 26 years + 142 days past epoch. Use Go's + // time machinery to compute the days delta so the test is + // resilient against leap-year arithmetic mistakes in the + // expected value. + target := time.Date(2026, 5, 22, 0, 0, 0, 0, time.UTC) + days := int32(target.Sub(pgEpoch).Hours() / 24) + dateRaw := make([]byte, 4) + binary.BigEndian.PutUint32(dateRaw, uint32(days)) + got, err := paramToLiteral(dateRaw, oidDate, 1) + if err != nil || got != "'2026-05-22'" { + t.Errorf("date 2026-05-22: got %q err=%v", got, err) + } + + // TIMESTAMP 2026-05-22 12:34:56.000123 — microseconds since + // epoch. Build via time.Sub to avoid hand-rolling the count. + ts := time.Date(2026, 5, 22, 12, 34, 56, 123_000, time.UTC) // 123 µs + us := ts.Sub(pgEpoch).Microseconds() + tsRaw := make([]byte, 8) + binary.BigEndian.PutUint64(tsRaw, uint64(us)) + got, err = paramToLiteral(tsRaw, oidTimestamp, 1) + if err != nil || got != "'2026-05-22 12:34:56.000123'" { + t.Errorf("timestamp: got %q err=%v", got, err) + } + + // TIMESTAMPTZ rides the same decoder. + got, err = paramToLiteral(tsRaw, oidTimestamptz, 1) + if err != nil || got != "'2026-05-22 12:34:56.000123'" { + t.Errorf("timestamptz: got %q err=%v", got, err) + } +} + +// TestParamToLiteral_TextFormat verifies the text-mode path still +// works for the broadened OID set (no quoting around FLOAT4/8/INT2, +// quoting around DATE/TIMESTAMP). +func TestParamToLiteral_TextFormat(t *testing.T) { + cases := []struct { + oid uint32 + raw string + want string + }{ + {oidInt2, "32767", "32767"}, + {oidFloat4, "1.5", "1.5"}, + {oidFloat8, "3.14", "3.14"}, + {oidNumeric, "99.95", "99.95"}, + {oidText, "hello", "'hello'"}, + {oidText, "it's", "'it''s'"}, + {oidDate, "2026-05-22", "'2026-05-22'"}, + {oidTimestamp, "2026-05-22 12:34:56", "'2026-05-22 12:34:56'"}, + } + for _, c := range cases { + got, err := paramToLiteral([]byte(c.raw), c.oid, 0) + if err != nil { + t.Errorf("oid=%d raw=%q: error %v", c.oid, c.raw, err) + continue + } + if got != c.want { + t.Errorf("oid=%d raw=%q: got %q want %q", c.oid, c.raw, got, c.want) + } + } +} + // TestSCRAMParseClientFirst verifies the gs2-header strip + attr // parse for the SCRAM client-first message. Vector matches what // libpq + pgx + JDBC all emit (channel-binding flag "n", empty diff --git a/hbrtl/pgserver/typemap.go b/hbrtl/pgserver/typemap.go index 7e884f3..a2196c0 100644 --- a/hbrtl/pgserver/typemap.go +++ b/hbrtl/pgserver/typemap.go @@ -16,14 +16,25 @@ import ( // their decoders off these. const ( oidBool = 16 + oidInt2 = 21 oidInt4 = 23 oidInt8 = 20 + oidFloat4 = 700 + oidFloat8 = 701 oidNumeric = 1700 oidText = 25 oidDate = 1082 oidTimestamp = 1114 + oidTimestamptz = 1184 ) +// pgEpoch is the PostgreSQL binary date/time epoch — 2000-01-01 +// UTC. DATE counts days since this point (signed int32); TIMESTAMP +// and TIMESTAMPTZ count microseconds since this point (signed +// int64), with integer_datetimes=on (which we advertise via +// ParameterStatus in session.run). +var pgEpoch = time.Date(2000, 1, 1, 0, 0, 0, 0, time.UTC) + // pgTypeFor returns (OID, declared-size). The declared size is -1 // for variable-width types (per PG convention). Sample is one // representative value from the column; NIL falls back to TEXT @@ -141,7 +152,3 @@ func julianToYMD(j int64) (year, month, day int) { return } -// Force time package import — we'll need it for Timestamp parsing -// when extended protocol lands. Stub function keeps the import -// from being pruned in v1.0-skeleton. -var _ = time.Date