New Five-native HTTP / ZIP / XML primitives so PRG code can do
HTTPS fetch, ZIP container reads, and streaming XML row extraction
without dropping into BEGINDUMP. FV_ prefix marks Five-original
RTL (distinct from Harbour-inherited HB_ surface).
FV_HTTPGET(cUrl [, hOpts]) / FV_HTTPPOST(cUrl, cBody [, hOpts])
hOpts: { headers: {=>}, timeout: nSec, tls_legacy: .T./.F. }
Result: { status, body, error, headers }
tls_legacy re-enables TLS_RSA cipher suites for legacy
endpoints (DART OpenAPI pins them).
FV_ZIPENTRIES(cZipBytes) / FV_ZIPREAD(cZipBytes, cEntryName)
Read ZIP archives held in memory (e.g. from FV_HTTPGET).
FV_XML_ROWS(cXml, cRowTag)
Streaming reader for repeating-record XML. Each row becomes a
flat hash of immediate-child element name -> text. Verified
against DART corpCode.xml: 30 MB / 118k rows in seconds, no
full-tree allocation.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
92 lines
2.0 KiB
Go
92 lines
2.0 KiB
Go
// xmlx.go — Five-native XML repeating-record reader (FV_XML_ROWS).
|
|
//
|
|
// Designed for one extremely common shape — a parent element wrapping
|
|
// many same-tagged children whose own children are leaf text fields:
|
|
//
|
|
// <result>
|
|
// <list>
|
|
// <corp_code>00126380</corp_code>
|
|
// <corp_name>삼성전자(주)</corp_name>
|
|
// ...
|
|
// </list>
|
|
// <list>...</list>
|
|
// </result>
|
|
//
|
|
// FV_XML_ROWS(cXml, "list") returns an array where each element is
|
|
// a hash { "corp_code" => "00126380", "corp_name" => "삼성전자(주)", ... }.
|
|
//
|
|
// Streaming — never materialises the full XML tree, so the 30MB DART
|
|
// corpCode dump (118k rows) doesn't blow PRG memory.
|
|
|
|
package hbrtl
|
|
|
|
import (
|
|
"encoding/xml"
|
|
"strings"
|
|
|
|
"five/hbrt"
|
|
)
|
|
|
|
// FV_XML_ROWS(cXml, cRowTag) -> [ { field => text, ... }, ... ]
|
|
func FvXmlRows(t *hbrt.Thread) {
|
|
t.Frame(2, 0)
|
|
defer t.EndProc()
|
|
|
|
data := t.Local(1).AsString()
|
|
rowTag := t.Local(2).AsString()
|
|
|
|
dec := xml.NewDecoder(strings.NewReader(data))
|
|
rows := []hbrt.Value{}
|
|
|
|
for {
|
|
tok, err := dec.Token()
|
|
if err != nil {
|
|
break
|
|
}
|
|
se, ok := tok.(xml.StartElement)
|
|
if !ok {
|
|
continue
|
|
}
|
|
if se.Name.Local != rowTag {
|
|
continue
|
|
}
|
|
// Collect the row's immediate child elements as a flat hash.
|
|
row := &hbrt.HbHash{}
|
|
curField := ""
|
|
var curText strings.Builder
|
|
depth := 0
|
|
for {
|
|
t2, err := dec.Token()
|
|
if err != nil {
|
|
break
|
|
}
|
|
switch tt := t2.(type) {
|
|
case xml.StartElement:
|
|
if depth == 0 {
|
|
curField = tt.Name.Local
|
|
curText.Reset()
|
|
}
|
|
depth++
|
|
case xml.CharData:
|
|
if depth == 1 {
|
|
curText.Write([]byte(tt))
|
|
}
|
|
case xml.EndElement:
|
|
depth--
|
|
if depth == 0 && tt.Name.Local == curField && curField != "" {
|
|
row.Append(hbrt.MakeString(curField), hbrt.MakeString(curText.String()))
|
|
curField = ""
|
|
curText.Reset()
|
|
}
|
|
if depth < 0 {
|
|
// Closing tag for the row itself.
|
|
rows = append(rows, hbrt.MakeHashFrom(row))
|
|
goto nextRow
|
|
}
|
|
}
|
|
}
|
|
nextRow:
|
|
}
|
|
t.RetVal(hbrt.MakeArrayFrom(rows))
|
|
}
|