feat(pgserver): binary-format param decoding (Phase 4.1)

pgx defaults to binary wire format for INT2/INT4/INT8/FLOAT4/FLOAT8/
BOOL/NUMERIC/DATE/TIMESTAMP/TIMESTAMPTZ — Go's most-used PG driver
ships nearly every typed parameter as binary unless explicitly told
to use text mode. The Phase 3 implementation only decoded INT4/INT8/
BOOL, so any pgx call with a decimal price, a timestamp, or a date
was silently mis-quoted into the SQL stream.

Decoders now cover the seven additional OIDs. The interesting one is
NUMERIC: PG's wire format is base-10000 digit groups plus a separate
displayed-scale, so the decoder rebuilds the decimal string from
weight+sign+ndigits+digits[] without going through float (which would
lose precision for NUMERIC(38,*) values). Pinned by vectors covering
zero / positive / negative / fractional-only / NaN / multi-group
integer + fraction cases.

DATE / TIMESTAMP decoders assume integer_datetimes=on (which the
server advertises in ParameterStatus); the 8-byte microsecond delta
from the PG epoch (2000-01-01 UTC) is converted via Go's time.Time
machinery and re-emitted as a quoted SQL literal.

Text-format path also broadened: FLOAT4/FLOAT8/INT2 now transit
unquoted alongside INT4/INT8/BOOL/NUMERIC; the regression would have
been clients sending text-format floats getting them rewritten as
'1.5' (string literal) instead of 1.5 (numeric).

Verified: all 6 mandatory gates green (go test, SQL 43/43, compat
56/56, std.ch 17/17, FRB 7/7, pgserver 11/11). Five new decoder
tests pin each wire format against handcrafted PG payloads.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-05-22 10:02:15 +09:00
parent e83787750a
commit d7a81af7db
3 changed files with 338 additions and 9 deletions

View File

@@ -30,8 +30,10 @@ package pgserver
import (
"encoding/binary"
"fmt"
"math"
"strconv"
"strings"
"time"
"github.com/jackc/pgx/v5/pgproto3"
@@ -451,18 +453,25 @@ func paramToLiteral(raw []byte, oid uint32, format int16) (string, error) {
return "NULL", nil
}
if format == 0 {
// Text format — quote per type. For numerics and bools we
// don't quote; for everything else we single-quote with
// inline-escape.
// Text format — quote per type. Numerics + bools transit
// unquoted; everything else (including DATE / TIMESTAMP in
// text form) gets single-quoted with embedded-quote escape.
switch oid {
case oidInt4, oidInt8, oidBool, oidNumeric:
case oidInt2, oidInt4, oidInt8, oidBool, oidNumeric, oidFloat4, oidFloat8:
return string(raw), nil
default:
return "'" + strings.ReplaceAll(string(raw), "'", "''") + "'", nil
}
}
// Binary format — decode the OIDs pgx uses by default.
// Binary format. pgx defaults to binary for INT*, FLOAT*, BOOL,
// NUMERIC, DATE, TIMESTAMP, TIMESTAMPTZ — decode each into a
// FiveSql2-shaped literal that the engine's lexer can re-parse.
switch oid {
case oidInt2:
if len(raw) != 2 {
return "", fmt.Errorf("int2 param: want 2 bytes, got %d", len(raw))
}
return strconv.FormatInt(int64(int16(binary.BigEndian.Uint16(raw))), 10), nil
case oidInt4:
if len(raw) != 4 {
return "", fmt.Errorf("int4 param: want 4 bytes, got %d", len(raw))
@@ -473,6 +482,18 @@ func paramToLiteral(raw []byte, oid uint32, format int16) (string, error) {
return "", fmt.Errorf("int8 param: want 8 bytes, got %d", len(raw))
}
return strconv.FormatInt(int64(binary.BigEndian.Uint64(raw)), 10), nil
case oidFloat4:
if len(raw) != 4 {
return "", fmt.Errorf("float4 param: want 4 bytes, got %d", len(raw))
}
f := math.Float32frombits(binary.BigEndian.Uint32(raw))
return strconv.FormatFloat(float64(f), 'g', -1, 32), nil
case oidFloat8:
if len(raw) != 8 {
return "", fmt.Errorf("float8 param: want 8 bytes, got %d", len(raw))
}
f := math.Float64frombits(binary.BigEndian.Uint64(raw))
return strconv.FormatFloat(f, 'g', -1, 64), nil
case oidBool:
if len(raw) != 1 {
return "", fmt.Errorf("bool param: want 1 byte, got %d", len(raw))
@@ -481,6 +502,24 @@ func paramToLiteral(raw []byte, oid uint32, format int16) (string, error) {
return "FALSE", nil
}
return "TRUE", nil
case oidNumeric:
s, err := decodeBinaryNumeric(raw)
if err != nil {
return "", err
}
return s, nil
case oidDate:
s, err := decodeBinaryDate(raw)
if err != nil {
return "", err
}
return "'" + s + "'", nil
case oidTimestamp, oidTimestamptz:
s, err := decodeBinaryTimestamp(raw)
if err != nil {
return "", err
}
return "'" + s + "'", nil
default:
// Unknown binary OID — fall back to a quoted hex literal.
// FiveSql2 won't accept this directly, but the resulting
@@ -488,3 +527,119 @@ func paramToLiteral(raw []byte, oid uint32, format int16) (string, error) {
return "'\\x" + fmt.Sprintf("%x", raw) + "'", nil
}
}
// decodeBinaryNumeric converts PostgreSQL's binary NUMERIC wire
// format (RFC-independent — see PG source utils/adt/numeric.c
// numeric_send / numeric_recv) to a plain decimal string. The
// format is:
//
// int16 ndigits number of base-10000 "digits"
// int16 weight weight of the first digit, in base-10000 units
// uint16 sign 0x0000 positive, 0x4000 negative, 0xC000 NaN
// uint16 dscale displayed scale (decimal places to show)
// int16 digits[ndigits] each in 0..9999
//
// The numeric value equals sign × Σ d[i] × 10000^(weight i).
//
// Output is a FiveSql2-parseable decimal literal — unquoted, no
// scientific notation, with exactly `dscale` digits after the
// decimal point so round-trip width is preserved.
func decodeBinaryNumeric(raw []byte) (string, error) {
if len(raw) < 8 {
return "", fmt.Errorf("numeric param: header too short (%d bytes)", len(raw))
}
ndigits := int16(binary.BigEndian.Uint16(raw[0:2]))
weight := int16(binary.BigEndian.Uint16(raw[2:4]))
sign := binary.BigEndian.Uint16(raw[4:6])
dscale := int16(binary.BigEndian.Uint16(raw[6:8]))
if int(ndigits)*2+8 != len(raw) {
return "", fmt.Errorf("numeric param: digit count mismatch (ndigits=%d, body=%d)", ndigits, len(raw)-8)
}
if sign == 0xC000 {
return "NaN", nil
}
digs := make([]uint16, ndigits)
for i := 0; i < int(ndigits); i++ {
digs[i] = binary.BigEndian.Uint16(raw[8+i*2 : 10+i*2])
}
var sb strings.Builder
if sign == 0x4000 {
sb.WriteByte('-')
}
// Integer part: weight+1 base-10000 digits. If weight is
// negative the integer part is just "0".
intDigits := int(weight) + 1
if intDigits <= 0 {
sb.WriteByte('0')
} else {
for i := 0; i < intDigits; i++ {
var d uint16
if i < int(ndigits) {
d = digs[i]
}
if i == 0 {
fmt.Fprintf(&sb, "%d", d)
} else {
fmt.Fprintf(&sb, "%04d", d)
}
}
}
if dscale > 0 {
sb.WriteByte('.')
// Build the fractional digit string. When weight < -1, the
// first array digit is already several base-10000 positions
// past the decimal point — pad with "0000" groups for those
// missing leading-zero positions.
var frac strings.Builder
leadingZeroGroups := 0
if intDigits < 0 {
leadingZeroGroups = -intDigits
}
for i := 0; i < leadingZeroGroups; i++ {
frac.WriteString("0000")
}
fracStart := intDigits
if fracStart < 0 {
fracStart = 0
}
for i := fracStart; i < int(ndigits); i++ {
fmt.Fprintf(&frac, "%04d", digs[i])
}
s := frac.String()
if len(s) >= int(dscale) {
sb.WriteString(s[:dscale])
} else {
sb.WriteString(s)
sb.WriteString(strings.Repeat("0", int(dscale)-len(s)))
}
}
return sb.String(), nil
}
// decodeBinaryDate converts a PG binary DATE (4-byte signed days
// since pgEpoch = 2000-01-01) to "YYYY-MM-DD".
func decodeBinaryDate(raw []byte) (string, error) {
if len(raw) != 4 {
return "", fmt.Errorf("date param: want 4 bytes, got %d", len(raw))
}
days := int32(binary.BigEndian.Uint32(raw))
t := pgEpoch.AddDate(0, 0, int(days))
return t.Format("2006-01-02"), nil
}
// decodeBinaryTimestamp converts a PG binary TIMESTAMP / TIMESTAMPTZ
// (8-byte signed microseconds since pgEpoch = 2000-01-01) to
// "YYYY-MM-DD HH:MM:SS.ffffff". Encoding assumes integer_datetimes
// = on; we advertise that in ParameterStatus on connect so clients
// won't send the floating-point variant.
func decodeBinaryTimestamp(raw []byte) (string, error) {
if len(raw) != 8 {
return "", fmt.Errorf("timestamp param: want 8 bytes, got %d", len(raw))
}
us := int64(binary.BigEndian.Uint64(raw))
t := pgEpoch.Add(time.Duration(us) * time.Microsecond)
return t.Format("2006-01-02 15:04:05.000000"), nil
}

View File

@@ -6,9 +6,11 @@ package pgserver
import (
"bytes"
"encoding/base64"
"encoding/binary"
"strconv"
"strings"
"testing"
"time"
"five/hbrt"
)
@@ -188,6 +190,171 @@ func TestCommandTagFor(t *testing.T) {
_ = strconv.Itoa // keep import; will be used in Phase 3 with row counts
}
// TestParamToLiteral_BinaryInts pins the integer binary decoders
// against handcrafted PG wire payloads. Every pgx call with an int
// arg flows through these — if any case regresses, Go clients can
// silently insert the wrong values.
func TestParamToLiteral_BinaryInts(t *testing.T) {
cases := []struct {
oid uint32
raw []byte
want string
}{
{oidInt2, []byte{0x00, 0x2a}, "42"},
{oidInt2, []byte{0xff, 0xff}, "-1"},
{oidInt4, []byte{0x00, 0x00, 0x00, 0x2a}, "42"},
{oidInt4, []byte{0xff, 0xff, 0xff, 0xff}, "-1"},
{oidInt8, []byte{0x00, 0, 0, 0, 0, 0, 0, 0x2a}, "42"},
{oidInt8, []byte{0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}, "-1"},
{oidBool, []byte{0x01}, "TRUE"},
{oidBool, []byte{0x00}, "FALSE"},
}
for _, c := range cases {
got, err := paramToLiteral(c.raw, c.oid, 1)
if err != nil {
t.Errorf("oid=%d raw=%x: unexpected error %v", c.oid, c.raw, err)
continue
}
if got != c.want {
t.Errorf("oid=%d raw=%x: got %q want %q", c.oid, c.raw, got, c.want)
}
}
}
// TestParamToLiteral_BinaryFloats covers FLOAT4 + FLOAT8. We pin
// against bit patterns rather than decimal values to sidestep
// IEEE-754 print rounding noise — the test is about wire decoding,
// not formatter precision.
func TestParamToLiteral_BinaryFloats(t *testing.T) {
// 1.5 as float32 = 0x3FC00000
got, err := paramToLiteral([]byte{0x3f, 0xc0, 0x00, 0x00}, oidFloat4, 1)
if err != nil || got != "1.5" {
t.Errorf("float4 1.5: got %q err=%v", got, err)
}
// -42.0 as float32 = 0xC2280000
got, err = paramToLiteral([]byte{0xc2, 0x28, 0x00, 0x00}, oidFloat4, 1)
if err != nil || got != "-42" {
t.Errorf("float4 -42: got %q err=%v", got, err)
}
// 3.14 as float64 = 0x40091EB851EB851F
got, err = paramToLiteral([]byte{0x40, 0x09, 0x1e, 0xb8, 0x51, 0xeb, 0x85, 0x1f}, oidFloat8, 1)
if err != nil || got != "3.14" {
t.Errorf("float8 3.14: got %q err=%v", got, err)
}
}
// TestParamToLiteral_BinaryNumeric pins the base-10000 → decimal
// algorithm. Vectors hand-encoded from PG numeric_send output so a
// regression in the bit-layout (which is independent of the engine
// behaviour) trips immediately.
func TestParamToLiteral_BinaryNumeric(t *testing.T) {
build := func(ndig, weight int16, sign uint16, dscale int16, digs ...uint16) []byte {
buf := make([]byte, 8+2*len(digs))
binary.BigEndian.PutUint16(buf[0:2], uint16(ndig))
binary.BigEndian.PutUint16(buf[2:4], uint16(weight))
binary.BigEndian.PutUint16(buf[4:6], sign)
binary.BigEndian.PutUint16(buf[6:8], uint16(dscale))
for i, d := range digs {
binary.BigEndian.PutUint16(buf[8+2*i:10+2*i], d)
}
return buf
}
cases := []struct {
name string
raw []byte
want string
}{
// 0 — header-only, no digits
{"zero", build(0, 0, 0x0000, 0), "0"},
// 99.95 — ndigits=2, weight=0, dscale=2, digits=[99, 9500]
{"99.95", build(2, 0, 0x0000, 2, 99, 9500), "99.95"},
// -1234.5 — sign=-, ndigits=2, weight=0, dscale=1, digits=[1234, 5000]
{"-1234.5", build(2, 0, 0x4000, 1, 1234, 5000), "-1234.5"},
// 12345.67 — weight=1, digits=[1, 2345, 6700]
{"12345.67", build(3, 1, 0x0000, 2, 1, 2345, 6700), "12345.67"},
// 0.0001 — weight=-1, digits=[1], dscale=4
{"0.0001", build(1, -1, 0x0000, 4, 1), "0.0001"},
// NaN — sign=0xC000
{"NaN", build(0, 0, 0xC000, 0), "NaN"},
}
for _, c := range cases {
got, err := paramToLiteral(c.raw, oidNumeric, 1)
if err != nil {
t.Errorf("%s: unexpected error %v", c.name, err)
continue
}
if got != c.want {
t.Errorf("%s: got %q want %q", c.name, got, c.want)
}
}
}
// TestParamToLiteral_BinaryDateTime pins DATE + TIMESTAMP decoders.
// Vectors handcrafted from the PG epoch (2000-01-01 UTC) — DATE in
// days, TIMESTAMP in microseconds. Output must be a SQL-literal
// shape (with quotes) FiveSql2's lexer accepts.
func TestParamToLiteral_BinaryDateTime(t *testing.T) {
// DATE 2026-05-22 — 26 years + 142 days past epoch. Use Go's
// time machinery to compute the days delta so the test is
// resilient against leap-year arithmetic mistakes in the
// expected value.
target := time.Date(2026, 5, 22, 0, 0, 0, 0, time.UTC)
days := int32(target.Sub(pgEpoch).Hours() / 24)
dateRaw := make([]byte, 4)
binary.BigEndian.PutUint32(dateRaw, uint32(days))
got, err := paramToLiteral(dateRaw, oidDate, 1)
if err != nil || got != "'2026-05-22'" {
t.Errorf("date 2026-05-22: got %q err=%v", got, err)
}
// TIMESTAMP 2026-05-22 12:34:56.000123 — microseconds since
// epoch. Build via time.Sub to avoid hand-rolling the count.
ts := time.Date(2026, 5, 22, 12, 34, 56, 123_000, time.UTC) // 123 µs
us := ts.Sub(pgEpoch).Microseconds()
tsRaw := make([]byte, 8)
binary.BigEndian.PutUint64(tsRaw, uint64(us))
got, err = paramToLiteral(tsRaw, oidTimestamp, 1)
if err != nil || got != "'2026-05-22 12:34:56.000123'" {
t.Errorf("timestamp: got %q err=%v", got, err)
}
// TIMESTAMPTZ rides the same decoder.
got, err = paramToLiteral(tsRaw, oidTimestamptz, 1)
if err != nil || got != "'2026-05-22 12:34:56.000123'" {
t.Errorf("timestamptz: got %q err=%v", got, err)
}
}
// TestParamToLiteral_TextFormat verifies the text-mode path still
// works for the broadened OID set (no quoting around FLOAT4/8/INT2,
// quoting around DATE/TIMESTAMP).
func TestParamToLiteral_TextFormat(t *testing.T) {
cases := []struct {
oid uint32
raw string
want string
}{
{oidInt2, "32767", "32767"},
{oidFloat4, "1.5", "1.5"},
{oidFloat8, "3.14", "3.14"},
{oidNumeric, "99.95", "99.95"},
{oidText, "hello", "'hello'"},
{oidText, "it's", "'it''s'"},
{oidDate, "2026-05-22", "'2026-05-22'"},
{oidTimestamp, "2026-05-22 12:34:56", "'2026-05-22 12:34:56'"},
}
for _, c := range cases {
got, err := paramToLiteral([]byte(c.raw), c.oid, 0)
if err != nil {
t.Errorf("oid=%d raw=%q: error %v", c.oid, c.raw, err)
continue
}
if got != c.want {
t.Errorf("oid=%d raw=%q: got %q want %q", c.oid, c.raw, got, c.want)
}
}
}
// TestSCRAMParseClientFirst verifies the gs2-header strip + attr
// parse for the SCRAM client-first message. Vector matches what
// libpq + pgx + JDBC all emit (channel-binding flag "n", empty

View File

@@ -16,14 +16,25 @@ import (
// their decoders off these.
const (
oidBool = 16
oidInt2 = 21
oidInt4 = 23
oidInt8 = 20
oidFloat4 = 700
oidFloat8 = 701
oidNumeric = 1700
oidText = 25
oidDate = 1082
oidTimestamp = 1114
oidTimestamptz = 1184
)
// pgEpoch is the PostgreSQL binary date/time epoch — 2000-01-01
// UTC. DATE counts days since this point (signed int32); TIMESTAMP
// and TIMESTAMPTZ count microseconds since this point (signed
// int64), with integer_datetimes=on (which we advertise via
// ParameterStatus in session.run).
var pgEpoch = time.Date(2000, 1, 1, 0, 0, 0, 0, time.UTC)
// pgTypeFor returns (OID, declared-size). The declared size is -1
// for variable-width types (per PG convention). Sample is one
// representative value from the column; NIL falls back to TEXT
@@ -141,7 +152,3 @@ func julianToYMD(j int64) (year, month, day int) {
return
}
// Force time package import — we'll need it for Timestamp parsing
// when extended protocol lands. Stub function keeps the import
// from being pruned in v1.0-skeleton.
var _ = time.Date