fix(rtl,pp): pre-release safety round (Wave 2)

Five concrete gaps the audit flagged in the new __dbCopy / __dbSort /
__dbTotal / __dbJoin / PP code:

  * wam.Close() errors were dropped on the floor. Caller saw `.T.`
    even when the just-written DBF wasn't durable, leading to the
    classic "delete the source after the COPY succeeds" data-loss
    pattern. All four functions now capture the close error and
    return `.F.` if it fired.

  * drv.Create succeeded → wam.Open failed → orphaned-on-disk DBF.
    The user-named target file was left around with zero records,
    and the next call's drv.Create silently truncated it instead of
    surfacing the original error. Add `os.Remove(cFile)` on the
    Open-failure cleanup path for COPY/SORT/TOTAL/JOIN.

  * __dbTotal would write the DBF codec's overflow sentinel
    (`*****`) into the destination's sum-fields when a group total
    didn't fit in the source's declared field width, and still
    return `.T.`. Now: precompute each sum-field's max representable
    magnitude (10^(Len-Dec)) at start, mark the run as overflowed if
    any flush sees an out-of-range or NaN value, and propagate
    `.F.` to the caller so they don't trust the file.

  * cleanUnreferencedMarkers walked byte-by-byte and stripped any
    `<ident>` token in the result, INCLUDING ones that appear
    inside `"..."` / `'...'` string literals. A user expression
    like `LIST FOR url == "<a>x</a>"` got the `<a>` and `</a>`
    eaten on output. Now: track string-literal state and skip the
    cleanup pass while inside one. Bracket-strings `[…]` are
    intentionally not treated as strings here — the result template
    uses `[...]` as the optional-repeat marker, and disambiguating
    needs context the cleanup pass doesn't have.

  * (#8 SET SAFETY honoring) deferred. Harbour default is SAFETY
    OFF, so the current always-overwrite behavior matches default
    Harbour. The divergence only matters when user explicitly does
    `SET SAFETY ON`, which Five doesn't support yet — so the
    no-overwrite-protection is consistent end-to-end. Tracked as a
    separate followup.

Gates green:
  go test ./...      : PASS
  FiveSql2 SQL:1999  : 43/43
  Harbour compat     : 56/56

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-05-01 07:54:41 +09:00
parent 000500e034
commit f30704a854
2 changed files with 101 additions and 11 deletions

View File

@@ -1059,30 +1059,54 @@ func referencedMarkers(s string) []string {
return out return out
} }
// cleanUnreferencedMarkers removes any remaining <name>, <(name)>, <.name.>, #<name> references. // cleanUnreferencedMarkers removes any remaining <name>, <(name)>,
// Only removes well-formed PP marker references, not comparison operators. // <.name.>, #<name> references. Only removes well-formed PP marker
// references, not comparison operators. Skips over PRG string
// literals ("...", '...', [...]) so a captured value containing
// `<a>` text (e.g. "<a>http://x</a>" inside a regex/string) isn't
// gutted — that pass used to corrupt arbitrary string content.
func cleanUnreferencedMarkers(s string) string { func cleanUnreferencedMarkers(s string) string {
// Match patterns like <identifier>, <(identifier)>, <.identifier.>, #<identifier>
var out strings.Builder var out strings.Builder
i := 0 i := 0
inStr := byte(0)
for i < len(s) { for i < len(s) {
c := s[i]
// Inside a string literal: copy until the matching closer.
// Bracket-strings `[...]` are PRG-specific but are also used
// as the result template's optional-repeat brackets, so we
// leave them out of this pass — only `'…'` and `"…"` are
// unambiguous strings here.
if inStr != 0 {
out.WriteByte(c)
if c == inStr {
inStr = 0
}
i++
continue
}
if c == '"' || c == '\'' {
inStr = c
out.WriteByte(c)
i++
continue
}
removed := false removed := false
// #<name> // #<name>
if s[i] == '#' && i+1 < len(s) && s[i+1] == '<' { if c == '#' && i+1 < len(s) && s[i+1] == '<' {
if end := findMarkerEnd(s, i+1); end > 0 { if end := findMarkerEnd(s, i+1); end > 0 {
i = end i = end
removed = true removed = true
} }
} }
// <name>, <(name)>, <.name.>, <"name"> // <name>, <(name)>, <.name.>, <"name">
if !removed && s[i] == '<' { if !removed && c == '<' {
if end := findMarkerEnd(s, i); end > 0 { if end := findMarkerEnd(s, i); end > 0 {
i = end i = end
removed = true removed = true
} }
} }
if !removed { if !removed {
out.WriteByte(s[i]) out.WriteByte(c)
i++ i++
} }
} }

View File

@@ -9,6 +9,7 @@ package hbrtl
import ( import (
"fmt" "fmt"
"os"
"strings" "strings"
"five/hbrt" "five/hbrt"
@@ -884,6 +885,11 @@ func rtlDbCopy(t *hbrt.Thread) {
srcSel := wam.CurrentNum() srcSel := wam.CurrentNum()
dstSel, err := wam.Open("DBFNTX", cFile, "__copytmp", false, false) dstSel, err := wam.Open("DBFNTX", cFile, "__copytmp", false, false)
if err != nil { if err != nil {
// drv.Create wrote the file but Open failed (alias collision,
// area-table full, ...). Without cleanup the user is left
// with a stale-zero-row DBF on disk that the next call will
// silently truncate again.
_ = os.Remove(cFile)
t.RetBool(false) t.RetBool(false)
return return
} }
@@ -926,9 +932,16 @@ func rtlDbCopy(t *hbrt.Thread) {
} }
// Close the destination, leaving the source selected as on entry. // Close the destination, leaving the source selected as on entry.
// A close error means the just-written DBF may be partially
// flushed — surface it so the caller doesn't assume the file is
// durable and proceed to delete the source.
wam.SelectByNum(dstSel) wam.SelectByNum(dstSel)
wam.Close() closeErr := wam.Close()
wam.SelectByNum(srcSel) wam.SelectByNum(srcSel)
if closeErr != nil {
t.RetBool(false)
return
}
t.RetBool(true) t.RetBool(true)
} }
@@ -1103,6 +1116,7 @@ func rtlDbSort(t *hbrt.Thread) {
srcSel := wam.CurrentNum() srcSel := wam.CurrentNum()
dstSel, err := wam.Open("DBFNTX", cFile, "__sorttmp", false, false) dstSel, err := wam.Open("DBFNTX", cFile, "__sorttmp", false, false)
if err != nil { if err != nil {
_ = os.Remove(cFile)
t.RetBool(false) t.RetBool(false)
return return
} }
@@ -1113,8 +1127,13 @@ func rtlDbSort(t *hbrt.Thread) {
dstArea.PutValue(i, v) dstArea.PutValue(i, v)
} }
} }
wam.Close() wam.SelectByNum(dstSel)
closeErr := wam.Close()
wam.SelectByNum(srcSel) wam.SelectByNum(srcSel)
if closeErr != nil {
t.RetBool(false)
return
}
t.RetBool(true) t.RetBool(true)
} }
@@ -1364,6 +1383,7 @@ func rtlDbTotal(t *hbrt.Thread) {
srcSel := wam.CurrentNum() srcSel := wam.CurrentNum()
dstSel, err := wam.Open("DBFNTX", cFile, "__totaltmp", false, false) dstSel, err := wam.Open("DBFNTX", cFile, "__totaltmp", false, false)
if err != nil { if err != nil {
_ = os.Remove(cFile)
t.RetBool(false) t.RetBool(false)
return return
} }
@@ -1374,6 +1394,28 @@ func rtlDbTotal(t *hbrt.Thread) {
var prevKey hbrt.Value var prevKey hbrt.Value
haveGroup := false haveGroup := false
running := make([]float64, len(sums)) running := make([]float64, len(sums))
overflowed := false
// Pre-compute the max-magnitude representable in each sum-field:
// 10^(Len-Dec) - 10^(-Dec). Anything at or beyond this gets
// formatted as `*****` by the DBF codec, so we surface the issue
// instead of writing garbage.
maxAbs := make([]float64, len(sums))
for i, sp := range sums {
fi := dstFields[sp.dst]
intDigits := fi.Len - fi.Dec
if fi.Dec > 0 {
intDigits--
}
if intDigits < 0 {
intDigits = 0
}
m := 1.0
for d := 0; d < intDigits; d++ {
m *= 10
}
maxAbs[i] = m
}
flush := func() { flush := func() {
if !haveGroup { if !haveGroup {
@@ -1385,7 +1427,15 @@ func rtlDbTotal(t *hbrt.Thread) {
// preserving the field's declared length/decimals. // preserving the field's declared length/decimals.
for i, sp := range sums { for i, sp := range sums {
fi := dstFields[sp.dst] fi := dstFields[sp.dst]
dstArea.PutValue(sp.dst, hbrt.MakeDouble(running[i], uint16(fi.Len), uint16(fi.Dec))) v := running[i]
abs := v
if abs < 0 {
abs = -abs
}
if v != v || abs >= maxAbs[i] { // NaN or out-of-range
overflowed = true
}
dstArea.PutValue(sp.dst, hbrt.MakeDouble(v, uint16(fi.Len), uint16(fi.Dec)))
} }
wam.SelectByNum(srcSel) wam.SelectByNum(srcSel)
for i := range running { for i := range running {
@@ -1446,8 +1496,19 @@ func rtlDbTotal(t *hbrt.Thread) {
flush() flush()
wam.SelectByNum(dstSel) wam.SelectByNum(dstSel)
wam.Close() closeErr := wam.Close()
wam.SelectByNum(srcSel) wam.SelectByNum(srcSel)
if closeErr != nil {
t.RetBool(false)
return
}
if overflowed {
// One or more group totals didn't fit in the destination
// field's declared width. The DBF codec wrote `*****` for
// those cells; flag the caller so they don't trust the file.
t.RetBool(false)
return
}
t.RetBool(true) t.RetBool(true)
} }
@@ -1601,6 +1662,7 @@ func rtlDbJoin(t *hbrt.Thread) {
} }
dstSel, err := wam.Open("DBFNTX", cFile, "__jointmp", false, false) dstSel, err := wam.Open("DBFNTX", cFile, "__jointmp", false, false)
if err != nil { if err != nil {
_ = os.Remove(cFile)
t.RetBool(false) t.RetBool(false)
return return
} }
@@ -1645,8 +1707,12 @@ func rtlDbJoin(t *hbrt.Thread) {
} }
wam.SelectByNum(dstSel) wam.SelectByNum(dstSel)
wam.Close() closeErr := wam.Close()
wam.SelectByNum(masterSel) wam.SelectByNum(masterSel)
if closeErr != nil {
t.RetBool(false)
return
}
t.RetBool(true) t.RetBool(true)
} }