- Compiler: PP → Lexer → Parser → Analyzer → Gengo pipeline - Parser: 232/236 (98%) Harbour compatibility, registry-based dispatch - RTL: 351 Harbour-compatible functions - RDD: DBF/NTX/CDX engines with Rushmore bitmap optimization - Go Interop: IMPORT + pkg.Func() + obj:Method() with FastPath (15M calls/sec) - HB_FUNC API: Full Harbour C API compatible Go bridge - Concurrency: SPAWN/LAUNCH/GOROUTINE, <-, WATCH, PARALLEL FOR, ASYNC/AWAIT - Extensions: Multi-return, DEFER, Slice, f-string, Nil-safe ?:, CONST - Macro Compiler: Runtime AST parsing and evaluation - Debugger: TUI debugger with source display, breakpoints, stepping - FRB: Native + Pcode dual mode runtime binary - Tests: 13 packages ALL PASS Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
352 lines
7.8 KiB
Go
352 lines
7.8 KiB
Go
// Copyright (c) 2026 Charles KWON OhJun (charleskwonohjun@gmail.com)
|
|
// All rights reserved.
|
|
|
|
// Bitmap filter — Rushmore-style optimization for SET FILTER.
|
|
// Optimized for modern CPUs: POPCNT, TZCNT, LZCNT, cache-line aligned operations.
|
|
|
|
package hbrdd
|
|
|
|
import (
|
|
"math/bits"
|
|
"runtime"
|
|
"sync"
|
|
"sync/atomic"
|
|
)
|
|
|
|
// BitmapFilter holds a bit array where each bit represents one record.
|
|
type BitmapFilter struct {
|
|
bits []uint64
|
|
maxRec uint32
|
|
count int32 // atomic for concurrent builds
|
|
}
|
|
|
|
// NewBitmapFilter creates a bitmap for maxRec records, all bits off.
|
|
func NewBitmapFilter(maxRec uint32) *BitmapFilter {
|
|
nWords := (maxRec + 63) >> 6 // /64
|
|
return &BitmapFilter{
|
|
bits: make([]uint64, nWords),
|
|
maxRec: maxRec,
|
|
}
|
|
}
|
|
|
|
// NewBitmapFilterFull creates a bitmap with all valid bits set.
|
|
func NewBitmapFilterFull(maxRec uint32) *BitmapFilter {
|
|
nWords := (maxRec + 63) >> 6
|
|
bm := &BitmapFilter{
|
|
bits: make([]uint64, nWords),
|
|
maxRec: maxRec,
|
|
count: int32(maxRec),
|
|
}
|
|
for i := range bm.bits {
|
|
bm.bits[i] = ^uint64(0)
|
|
}
|
|
// Mask off excess bits in last word
|
|
if tail := maxRec & 63; tail != 0 && len(bm.bits) > 0 {
|
|
bm.bits[len(bm.bits)-1] = (uint64(1) << tail) - 1
|
|
}
|
|
return bm
|
|
}
|
|
|
|
// --- Single-bit operations (inline-friendly) ---
|
|
|
|
func (bm *BitmapFilter) Set(recNo uint32) {
|
|
r := recNo - 1
|
|
if r >= bm.maxRec {
|
|
return
|
|
}
|
|
mask := uint64(1) << (r & 63)
|
|
p := &bm.bits[r>>6]
|
|
if *p&mask == 0 {
|
|
*p |= mask
|
|
atomic.AddInt32(&bm.count, 1)
|
|
}
|
|
}
|
|
|
|
func (bm *BitmapFilter) Clear(recNo uint32) {
|
|
r := recNo - 1
|
|
if r >= bm.maxRec {
|
|
return
|
|
}
|
|
mask := uint64(1) << (r & 63)
|
|
p := &bm.bits[r>>6]
|
|
if *p&mask != 0 {
|
|
*p &^= mask
|
|
atomic.AddInt32(&bm.count, -1)
|
|
}
|
|
}
|
|
|
|
func (bm *BitmapFilter) Test(recNo uint32) bool {
|
|
r := recNo - 1
|
|
if r >= bm.maxRec {
|
|
return false
|
|
}
|
|
return bm.bits[r>>6]&(uint64(1)<<(r&63)) != 0
|
|
}
|
|
|
|
func (bm *BitmapFilter) Count() int {
|
|
return int(atomic.LoadInt32(&bm.count))
|
|
}
|
|
|
|
// --- Traversal (hot path — fully optimized) ---
|
|
|
|
// NextSet finds the next set bit >= recNo (1-based). Returns 0 if none.
|
|
// Uses TZCNT (TrailingZeros) for O(1) per word.
|
|
func (bm *BitmapFilter) NextSet(recNo uint32) uint32 {
|
|
if recNo < 1 {
|
|
recNo = 1
|
|
}
|
|
r := recNo - 1
|
|
if r >= bm.maxRec {
|
|
return 0
|
|
}
|
|
idx := r >> 6
|
|
// Mask off bits below current position in first word
|
|
word := bm.bits[idx] >> (r & 63)
|
|
if word != 0 {
|
|
return recNo + uint32(bits.TrailingZeros64(word))
|
|
}
|
|
// Scan remaining words
|
|
n := uint32(len(bm.bits))
|
|
for idx++; idx < n; idx++ {
|
|
if bm.bits[idx] != 0 {
|
|
result := (idx << 6) + 1 + uint32(bits.TrailingZeros64(bm.bits[idx]))
|
|
if result > bm.maxRec {
|
|
return 0
|
|
}
|
|
return result
|
|
}
|
|
}
|
|
return 0
|
|
}
|
|
|
|
// PrevSet finds the previous set bit <= recNo (1-based). Returns 0 if none.
|
|
// Uses LZCNT (LeadingZeros) for O(1) per word.
|
|
func (bm *BitmapFilter) PrevSet(recNo uint32) uint32 {
|
|
if recNo > bm.maxRec {
|
|
recNo = bm.maxRec
|
|
}
|
|
if recNo < 1 {
|
|
return 0
|
|
}
|
|
r := recNo - 1
|
|
idx := r >> 6
|
|
// Mask off bits above current position in first word
|
|
shift := 63 - (r & 63)
|
|
word := bm.bits[idx] << shift
|
|
if word != 0 {
|
|
return recNo - uint32(bits.LeadingZeros64(word))
|
|
}
|
|
// Scan backwards
|
|
for idx > 0 {
|
|
idx--
|
|
if bm.bits[idx] != 0 {
|
|
return (idx << 6) + 1 + uint32(63-bits.LeadingZeros64(bm.bits[idx]))
|
|
}
|
|
}
|
|
return 0
|
|
}
|
|
|
|
// --- Bulk operations (4-word unrolled for cache line) ---
|
|
|
|
// And returns intersection. Does NOT allocate if dst is provided.
|
|
func (bm *BitmapFilter) And(other *BitmapFilter) *BitmapFilter {
|
|
n := len(bm.bits)
|
|
if len(other.bits) < n {
|
|
n = len(other.bits)
|
|
}
|
|
result := &BitmapFilter{
|
|
bits: make([]uint64, len(bm.bits)),
|
|
maxRec: bm.maxRec,
|
|
}
|
|
// 4-word unrolled loop
|
|
i := 0
|
|
for ; i+3 < n; i += 4 {
|
|
result.bits[i] = bm.bits[i] & other.bits[i]
|
|
result.bits[i+1] = bm.bits[i+1] & other.bits[i+1]
|
|
result.bits[i+2] = bm.bits[i+2] & other.bits[i+2]
|
|
result.bits[i+3] = bm.bits[i+3] & other.bits[i+3]
|
|
}
|
|
for ; i < n; i++ {
|
|
result.bits[i] = bm.bits[i] & other.bits[i]
|
|
}
|
|
result.recount()
|
|
return result
|
|
}
|
|
|
|
// AndInPlace performs AND in-place (no allocation).
|
|
func (bm *BitmapFilter) AndInPlace(other *BitmapFilter) {
|
|
n := len(bm.bits)
|
|
if len(other.bits) < n {
|
|
n = len(other.bits)
|
|
}
|
|
i := 0
|
|
for ; i+3 < n; i += 4 {
|
|
bm.bits[i] &= other.bits[i]
|
|
bm.bits[i+1] &= other.bits[i+1]
|
|
bm.bits[i+2] &= other.bits[i+2]
|
|
bm.bits[i+3] &= other.bits[i+3]
|
|
}
|
|
for ; i < n; i++ {
|
|
bm.bits[i] &= other.bits[i]
|
|
}
|
|
// Clear words beyond other's length
|
|
for ; i < len(bm.bits); i++ {
|
|
bm.bits[i] = 0
|
|
}
|
|
bm.recount()
|
|
}
|
|
|
|
// Or returns union.
|
|
func (bm *BitmapFilter) Or(other *BitmapFilter) *BitmapFilter {
|
|
maxR := bm.maxRec
|
|
if other.maxRec > maxR {
|
|
maxR = other.maxRec
|
|
}
|
|
n := len(bm.bits)
|
|
if len(other.bits) > n {
|
|
n = len(other.bits)
|
|
}
|
|
result := &BitmapFilter{
|
|
bits: make([]uint64, n),
|
|
maxRec: maxR,
|
|
}
|
|
a, b := bm.bits, other.bits
|
|
i := 0
|
|
min := len(a)
|
|
if len(b) < min {
|
|
min = len(b)
|
|
}
|
|
for ; i+3 < min; i += 4 {
|
|
result.bits[i] = a[i] | b[i]
|
|
result.bits[i+1] = a[i+1] | b[i+1]
|
|
result.bits[i+2] = a[i+2] | b[i+2]
|
|
result.bits[i+3] = a[i+3] | b[i+3]
|
|
}
|
|
for ; i < min; i++ {
|
|
result.bits[i] = a[i] | b[i]
|
|
}
|
|
for ; i < len(a); i++ {
|
|
result.bits[i] = a[i]
|
|
}
|
|
for ; i < len(b); i++ {
|
|
result.bits[i] = b[i]
|
|
}
|
|
result.recount()
|
|
return result
|
|
}
|
|
|
|
// OrInPlace performs OR in-place.
|
|
func (bm *BitmapFilter) OrInPlace(other *BitmapFilter) {
|
|
if len(other.bits) > len(bm.bits) {
|
|
newBits := make([]uint64, len(other.bits))
|
|
copy(newBits, bm.bits)
|
|
bm.bits = newBits
|
|
}
|
|
if other.maxRec > bm.maxRec {
|
|
bm.maxRec = other.maxRec
|
|
}
|
|
i := 0
|
|
n := len(other.bits)
|
|
for ; i+3 < n; i += 4 {
|
|
bm.bits[i] |= other.bits[i]
|
|
bm.bits[i+1] |= other.bits[i+1]
|
|
bm.bits[i+2] |= other.bits[i+2]
|
|
bm.bits[i+3] |= other.bits[i+3]
|
|
}
|
|
for ; i < n; i++ {
|
|
bm.bits[i] |= other.bits[i]
|
|
}
|
|
bm.recount()
|
|
}
|
|
|
|
// AndNot returns A AND NOT B (difference) — single pass, no intermediate.
|
|
func (bm *BitmapFilter) AndNot(other *BitmapFilter) *BitmapFilter {
|
|
result := &BitmapFilter{
|
|
bits: make([]uint64, len(bm.bits)),
|
|
maxRec: bm.maxRec,
|
|
}
|
|
n := len(other.bits)
|
|
if n > len(bm.bits) {
|
|
n = len(bm.bits)
|
|
}
|
|
i := 0
|
|
for ; i+3 < n; i += 4 {
|
|
result.bits[i] = bm.bits[i] &^ other.bits[i]
|
|
result.bits[i+1] = bm.bits[i+1] &^ other.bits[i+1]
|
|
result.bits[i+2] = bm.bits[i+2] &^ other.bits[i+2]
|
|
result.bits[i+3] = bm.bits[i+3] &^ other.bits[i+3]
|
|
}
|
|
for ; i < n; i++ {
|
|
result.bits[i] = bm.bits[i] &^ other.bits[i]
|
|
}
|
|
for ; i < len(bm.bits); i++ {
|
|
result.bits[i] = bm.bits[i]
|
|
}
|
|
result.recount()
|
|
return result
|
|
}
|
|
|
|
// Not inverts all bits (respecting maxRec boundary).
|
|
func (bm *BitmapFilter) Not() *BitmapFilter {
|
|
result := &BitmapFilter{
|
|
bits: make([]uint64, len(bm.bits)),
|
|
maxRec: bm.maxRec,
|
|
}
|
|
for i := range bm.bits {
|
|
result.bits[i] = ^bm.bits[i]
|
|
}
|
|
// Mask off excess in last word
|
|
if tail := bm.maxRec & 63; tail != 0 && len(result.bits) > 0 {
|
|
result.bits[len(result.bits)-1] &= (uint64(1) << tail) - 1
|
|
}
|
|
result.recount()
|
|
return result
|
|
}
|
|
|
|
// --- Parallel bitmap build (Five's goroutine advantage) ---
|
|
|
|
// BuildParallel evaluates a test function on all records using goroutines.
|
|
// testFn(recNo) returns true if the record matches the filter.
|
|
func BuildParallel(maxRec uint32, testFn func(uint32) bool) *BitmapFilter {
|
|
bm := NewBitmapFilter(maxRec)
|
|
nCPU := runtime.NumCPU()
|
|
if nCPU > 16 {
|
|
nCPU = 16
|
|
}
|
|
if maxRec < 1000 {
|
|
nCPU = 1 // not worth parallelizing
|
|
}
|
|
|
|
chunkSize := (maxRec + uint32(nCPU) - 1) / uint32(nCPU)
|
|
var wg sync.WaitGroup
|
|
|
|
for cpu := 0; cpu < nCPU; cpu++ {
|
|
start := uint32(cpu)*chunkSize + 1
|
|
end := start + chunkSize
|
|
if end > maxRec+1 {
|
|
end = maxRec + 1
|
|
}
|
|
wg.Add(1)
|
|
go func(s, e uint32) {
|
|
defer wg.Done()
|
|
for r := s; r < e; r++ {
|
|
if testFn(r) {
|
|
bm.Set(r)
|
|
}
|
|
}
|
|
}(start, end)
|
|
}
|
|
wg.Wait()
|
|
return bm
|
|
}
|
|
|
|
// --- Internal ---
|
|
|
|
func (bm *BitmapFilter) recount() {
|
|
c := 0
|
|
for _, w := range bm.bits {
|
|
c += bits.OnesCount64(w)
|
|
}
|
|
atomic.StoreInt32(&bm.count, int32(c))
|
|
}
|