fix(pp): apply rules to every ;-separated statement on a line
Until now applyRules looked at the *first* token of each physical
line. PRG legitimately packs multiple statements on a single line
with `;` as an intra-line separator (e.g. `dbCommit(); CLOSE ALL`),
and after Wave 1 removed the parser's xBase fallback for CLOSE/
COMMIT/etc., a `;`-separated `CLOSE ALL` on a line that started
with another statement would slip past std.ch entirely. The parser
then saw `CLOSE` / `ALL` as IDENTifiers, the runtime tried to
dispatch `CLOSE` as a function, and the user got a "no function
symbol for call" panic at execution time.
Fix: at applyRules entry, check for top-level `;` (paren / bracket
/ brace / string-literal balanced), split the line into statement
segments, recursively apply rules to each, rejoin with `;`. Two
new helpers (`hasTopLevelSemi` / `splitTopLevelSemi`) keep the
balancing logic small and self-contained.
Found by compiling _FiveSql2/test/test_sql_extreme.prg, which packs
the typical xBase one-liner DBF setup `dbAppend(); FieldPut(...);
...; dbCommit(); CLOSE ALL` across many rows of test data. The
test was panicking at the first such line; with this fix it now
runs to completion: 15/15 PASS.
All FiveSql2 SQL tests green together for the first time:
test_sql1999 : 43/43
test_sql1999_hard : 10/10
test_sql_extreme : 15/15
test_sql_challenge : 15/15
--
83 / 83
Other gates green:
go test ./... : PASS
Harbour compat : 56/56
std.ch suite : 14/14
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -457,15 +457,105 @@ func (pp *Preprocessor) resolveInclude(currentFile, inclFile string) string {
|
|||||||
return ""
|
return ""
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// hasTopLevelSemi reports whether s contains a `;` outside of any
|
||||||
|
// string literal or paren/bracket/brace nesting. Used by applyRules
|
||||||
|
// to decide whether a line carries multiple PRG statements.
|
||||||
|
func hasTopLevelSemi(s string) bool {
|
||||||
|
depth := 0
|
||||||
|
inStr := byte(0)
|
||||||
|
for i := 0; i < len(s); i++ {
|
||||||
|
c := s[i]
|
||||||
|
if inStr != 0 {
|
||||||
|
if c == inStr {
|
||||||
|
inStr = 0
|
||||||
|
}
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
switch c {
|
||||||
|
case '"', '\'':
|
||||||
|
inStr = c
|
||||||
|
case '(', '[', '{':
|
||||||
|
depth++
|
||||||
|
case ')', ']', '}':
|
||||||
|
if depth > 0 {
|
||||||
|
depth--
|
||||||
|
}
|
||||||
|
case ';':
|
||||||
|
if depth == 0 {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
// splitTopLevelSemi splits s on top-level `;`, respecting string
|
||||||
|
// literals and paren/bracket/brace nesting. Empty trailing splits
|
||||||
|
// (caused by a trailing `;`) are preserved so the caller can rejoin
|
||||||
|
// without losing the separator's significance for line-continuation.
|
||||||
|
func splitTopLevelSemi(s string) []string {
|
||||||
|
var parts []string
|
||||||
|
depth := 0
|
||||||
|
inStr := byte(0)
|
||||||
|
start := 0
|
||||||
|
for i := 0; i < len(s); i++ {
|
||||||
|
c := s[i]
|
||||||
|
if inStr != 0 {
|
||||||
|
if c == inStr {
|
||||||
|
inStr = 0
|
||||||
|
}
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
switch c {
|
||||||
|
case '"', '\'':
|
||||||
|
inStr = c
|
||||||
|
case '(', '[', '{':
|
||||||
|
depth++
|
||||||
|
case ')', ']', '}':
|
||||||
|
if depth > 0 {
|
||||||
|
depth--
|
||||||
|
}
|
||||||
|
case ';':
|
||||||
|
if depth == 0 {
|
||||||
|
parts = append(parts, s[start:i])
|
||||||
|
start = i + 1
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
parts = append(parts, s[start:])
|
||||||
|
return parts
|
||||||
|
}
|
||||||
|
|
||||||
// applyRules applies #command and #translate rules to a line.
|
// applyRules applies #command and #translate rules to a line.
|
||||||
// #command rules are tried first (they match complete statements).
|
// #command rules are tried first (they match complete statements).
|
||||||
// #translate rules are tried on any part of a line.
|
// #translate rules are tried on any part of a line.
|
||||||
|
//
|
||||||
|
// `;`-separated statements share a line in PRG (`dbCommit(); CLOSE
|
||||||
|
// ALL`); each sub-statement is matched against the rule list
|
||||||
|
// independently. Without this, only the first statement on the line
|
||||||
|
// would have rules applied, and subsequent ones would reach the
|
||||||
|
// parser unrewritten — `CLOSE ALL` after a semicolon used to fall
|
||||||
|
// through to the parser as IDENT tokens, blowing up at runtime
|
||||||
|
// when "CLOSE" tried to dispatch as a function name.
|
||||||
func (pp *Preprocessor) applyRules(line string) string {
|
func (pp *Preprocessor) applyRules(line string) string {
|
||||||
trimmed := strings.TrimSpace(line)
|
trimmed := strings.TrimSpace(line)
|
||||||
if trimmed == "" || strings.HasPrefix(trimmed, "//") {
|
if trimmed == "" || strings.HasPrefix(trimmed, "//") {
|
||||||
return line
|
return line
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Multi-statement line: split on top-level `;` (paren / string
|
||||||
|
// balanced), apply rules to each segment, rejoin.
|
||||||
|
if hasTopLevelSemi(trimmed) {
|
||||||
|
parts := splitTopLevelSemi(line)
|
||||||
|
if len(parts) > 1 {
|
||||||
|
out := make([]string, len(parts))
|
||||||
|
for i, p := range parts {
|
||||||
|
out[i] = pp.applyRules(p)
|
||||||
|
}
|
||||||
|
return strings.Join(out, ";")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Try #command rules (match from start of line)
|
// Try #command rules (match from start of line)
|
||||||
for _, rule := range pp.commands {
|
for _, rule := range pp.commands {
|
||||||
if result, ok := rule.MatchLine(trimmed); ok {
|
if result, ok := rule.MatchLine(trimmed); ok {
|
||||||
|
|||||||
Reference in New Issue
Block a user