- 04-idioms: document the lint.sh + smoke_test.sh gates and their wiring (build.sh gate, pre-commit hook, deploy-time smoke). - search.sh: ripgrep/grep keyword ranker over the corpus (keywords ×3 + body), prints ranked docs + matching section headers — makes the RAG searchable with no index to build. README updated. - Note: KWONDoc bluge MCP/CLI was unavailable here (MCP not connected; CLI license-gated), so search.sh delivers the "searchable" goal now; a bluge/embeddings index can ingest the same .md files later. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
46 lines
1.4 KiB
Bash
Executable File
46 lines
1.4 KiB
Bash
Executable File
#!/usr/bin/env bash
|
|
# rag/search.sh "<query terms>" — Five RAG 코퍼스 키워드 검색.
|
|
#
|
|
# bluge/임베딩 검색계층이 없을 때 쓰는 의존성 없는 검색기. 각 문서를
|
|
# frontmatter `keywords:`(가중치 3) + 본문 매치 수로 점수화해 랭킹하고,
|
|
# 매칭 섹션 헤더를 함께 보여준다. 결과를 컨텍스트에 그대로 붙여 쓰면 됨.
|
|
#
|
|
# ./search.sh session token csprng
|
|
# ./search.sh xss sanitize
|
|
set -u
|
|
RAGDIR="$(cd "$(dirname "$0")" && pwd)"
|
|
if [ $# -eq 0 ]; then
|
|
echo "usage: ./search.sh <query terms>"
|
|
exit 1
|
|
fi
|
|
|
|
tmp="$(mktemp)"
|
|
for f in "$RAGDIR"/0*.md; do
|
|
score=0
|
|
kwline="$(awk '/^keywords:/{print; exit}' "$f")"
|
|
for term in "$@"; do
|
|
kw=$(printf '%s' "$kwline" | grep -io "$term" | grep -c .)
|
|
bd=$(grep -io "$term" "$f" | grep -c .)
|
|
score=$(( score + kw*3 + bd ))
|
|
done
|
|
[ "$score" -gt 0 ] && printf '%d\t%s\n' "$score" "$f" >> "$tmp"
|
|
done
|
|
|
|
if [ ! -s "$tmp" ]; then
|
|
echo "(매치 없음)"
|
|
rm -f "$tmp"
|
|
exit 0
|
|
fi
|
|
|
|
# 검색어를 OR 정규식으로(섹션 헤더 매칭 표시용)
|
|
pat="$(printf '%s|' "$@" | sed 's/|$//')"
|
|
|
|
sort -t"$(printf '\t')" -k1 -rn "$tmp" | while IFS="$(printf '\t')" read -r s f; do
|
|
title="$(awk -F': ' '/^title:/{print $2; exit}' "$f")"
|
|
echo "■ [$s] $(basename "$f") — $title"
|
|
# 매칭되는 ## 섹션 헤더 상위 3개
|
|
grep -nE "^#{1,6} " "$f" | grep -iE "$pat" | head -3 | sed 's/^/ §/'
|
|
echo
|
|
done
|
|
rm -f "$tmp"
|