Files
five/hbrtl/xmlx.go
CharlesKWON 675eaa4def feat(hbrtl): FV_HTTPGET / FV_HTTPPOST / FV_ZIP* / FV_XML_ROWS
New Five-native HTTP / ZIP / XML primitives so PRG code can do
HTTPS fetch, ZIP container reads, and streaming XML row extraction
without dropping into BEGINDUMP. FV_ prefix marks Five-original
RTL (distinct from Harbour-inherited HB_ surface).

FV_HTTPGET(cUrl [, hOpts]) / FV_HTTPPOST(cUrl, cBody [, hOpts])
  hOpts:   { headers: {=>}, timeout: nSec, tls_legacy: .T./.F. }
  Result:  { status, body, error, headers }
  tls_legacy re-enables TLS_RSA cipher suites for legacy
  endpoints (DART OpenAPI pins them).

FV_ZIPENTRIES(cZipBytes) / FV_ZIPREAD(cZipBytes, cEntryName)
  Read ZIP archives held in memory (e.g. from FV_HTTPGET).

FV_XML_ROWS(cXml, cRowTag)
  Streaming reader for repeating-record XML. Each row becomes a
  flat hash of immediate-child element name -> text. Verified
  against DART corpCode.xml: 30 MB / 118k rows in seconds, no
  full-tree allocation.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-29 08:47:34 +09:00

92 lines
2.0 KiB
Go

// xmlx.go — Five-native XML repeating-record reader (FV_XML_ROWS).
//
// Designed for one extremely common shape — a parent element wrapping
// many same-tagged children whose own children are leaf text fields:
//
// <result>
// <list>
// <corp_code>00126380</corp_code>
// <corp_name>삼성전자(주)</corp_name>
// ...
// </list>
// <list>...</list>
// </result>
//
// FV_XML_ROWS(cXml, "list") returns an array where each element is
// a hash { "corp_code" => "00126380", "corp_name" => "삼성전자(주)", ... }.
//
// Streaming — never materialises the full XML tree, so the 30MB DART
// corpCode dump (118k rows) doesn't blow PRG memory.
package hbrtl
import (
"encoding/xml"
"strings"
"five/hbrt"
)
// FV_XML_ROWS(cXml, cRowTag) -> [ { field => text, ... }, ... ]
func FvXmlRows(t *hbrt.Thread) {
t.Frame(2, 0)
defer t.EndProc()
data := t.Local(1).AsString()
rowTag := t.Local(2).AsString()
dec := xml.NewDecoder(strings.NewReader(data))
rows := []hbrt.Value{}
for {
tok, err := dec.Token()
if err != nil {
break
}
se, ok := tok.(xml.StartElement)
if !ok {
continue
}
if se.Name.Local != rowTag {
continue
}
// Collect the row's immediate child elements as a flat hash.
row := &hbrt.HbHash{}
curField := ""
var curText strings.Builder
depth := 0
for {
t2, err := dec.Token()
if err != nil {
break
}
switch tt := t2.(type) {
case xml.StartElement:
if depth == 0 {
curField = tt.Name.Local
curText.Reset()
}
depth++
case xml.CharData:
if depth == 1 {
curText.Write([]byte(tt))
}
case xml.EndElement:
depth--
if depth == 0 && tt.Name.Local == curField && curField != "" {
row.Append(hbrt.MakeString(curField), hbrt.MakeString(curText.String()))
curField = ""
curText.Reset()
}
if depth < 0 {
// Closing tag for the row itself.
rows = append(rows, hbrt.MakeHashFrom(row))
goto nextRow
}
}
}
nextRow:
}
t.RetVal(hbrt.MakeArrayFrom(rows))
}