add claude-obsidian

2026-05-28 10:57:16 +09:00
parent 1b07531a45
commit 72dad72703
205 changed files with 41703 additions and 80 deletions
@@ -0,0 +1,102 @@
+#!/usr/bin/env bash
+# allocate-address.sh — atomic creation-order address allocation for the vault.
+#
+# Reserves the next address of the form c-NNNNNN and increments the counter
+# under an exclusive flock. On missing counter file, recovers by scanning the
+# vault for the highest existing c-NNNNNN in page frontmatter and resuming from
+# max+1. Never silently resets to 1 in a non-empty vault.
+#
+# Usage:
+#   ./scripts/allocate-address.sh           # prints the reserved address (e.g. c-000042) to stdout
+#   ./scripts/allocate-address.sh --peek    # prints the next value without incrementing
+#   ./scripts/allocate-address.sh --rebuild # recomputes counter from max observed and exits
+#
+# Exit codes:
+#   0 — success
+#   1 — lock acquisition failed (another writer is holding the lock)
+#   2 — vault-meta directory missing and cannot be created
+#   3 — counter value corrupt or non-numeric
+
+set -euo pipefail
+
+VAULT_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
+COUNTER_FILE="${VAULT_ROOT}/.vault-meta/address-counter.txt"
+LOCK_FILE="${VAULT_ROOT}/.vault-meta/.address.lock"
+WIKI_DIR="${VAULT_ROOT}/wiki"
+
+MODE="${1:-allocate}"
+
+mkdir -p "$(dirname "$COUNTER_FILE")" || {
+  echo "ERR: cannot create .vault-meta/" >&2
+  exit 2
+}
+
+# Acquire exclusive lock with 5-second timeout. Release automatically on scope exit.
+exec 9>"$LOCK_FILE"
+if ! flock -x -w 5 9; then
+  echo "ERR: could not acquire address allocator lock within 5s" >&2
+  exit 1
+fi
+
+scan_max_c_address() {
+  # Emit the largest NNNNNN from "address: c-NNNNNN" lines that appear inside
+  # the FIRST YAML frontmatter block of each wiki .md file. Code-block examples
+  # and body prose are excluded. Returns 0 if none found.
+  if [ ! -d "$WIKI_DIR" ]; then
+    echo 0
+    return
+  fi
+  find "$WIKI_DIR" -type f -name '*.md' -print0 2>/dev/null \
+    | xargs -0 awk '
+        FNR == 1 { state = "pre"; next_is_fm = ($0 == "---") ? 1 : 0 }
+        FNR == 1 && $0 == "---" { state = "fm"; next }
+        state == "fm" && $0 == "---" { state = "body"; nextfile }
+        state == "fm" && match($0, /^address:[[:space:]]+c-[0-9]{6}[[:space:]]*$/) {
+          if (match($0, /c-[0-9]{6}/)) {
+            print substr($0, RSTART, RLENGTH)
+          }
+        }
+      ' 2>/dev/null \
+    | sed 's/^c-0*//;s/^$/0/' \
+    | sort -n \
+    | tail -1 \
+    | awk 'BEGIN{n=0} {n=$0} END{print (n+0)}'
+}
+
+read_or_recover_counter() {
+  if [ ! -f "$COUNTER_FILE" ]; then
+    local max_c
+    max_c="$(scan_max_c_address)"
+    echo $((max_c + 1)) > "$COUNTER_FILE"
+    echo "INFO: counter file missing; recovered from vault scan, set to $((max_c + 1))" >&2
+  fi
+  local raw
+  raw="$(cat "$COUNTER_FILE")"
+  if ! [[ "$raw" =~ ^[0-9]+$ ]]; then
+    echo "ERR: counter file content is not a positive integer: $raw" >&2
+    exit 3
+  fi
+  echo "$raw"
+}
+
+case "$MODE" in
+  --peek)
+    read_or_recover_counter
+    ;;
+  --rebuild)
+    max_c="$(scan_max_c_address)"
+    echo $((max_c + 1)) > "$COUNTER_FILE"
+    echo "Counter rebuilt: next = $((max_c + 1))"
+    ;;
+  allocate|"")
+    current="$(read_or_recover_counter)"
+    next=$((current + 1))
+    echo "$next" > "$COUNTER_FILE"
+    printf 'c-%06d\n' "$current"
+    ;;
+  *)
+    echo "ERR: unknown mode: $MODE" >&2
+    echo "Usage: $0 [allocate|--peek|--rebuild]" >&2
+    exit 3
+    ;;
+esac
@@ -0,0 +1,167 @@
+#!/usr/bin/env python3
+"""baseline-v16.py — simulate the v1.6 hot→index→drill retrieval chain.
+
+Exists ONLY for benchmarking v1.7's hybrid retrieval against the legacy
+v1.6 behavior. Not used by any v1.7 skill; not feature-gated; not part of
+the regular vault workflow.
+
+The v1.6 query path (per skills/wiki-query/SKILL.md before v1.7):
+  1. Read wiki/hot.md (always; quick context)
+  2. Read wiki/index.md (scan for descriptions matching query terms)
+  3. Read top-N pages cited in the index whose entries best match query
+  4. Caller synthesizes answer
+
+This script approximates that path by:
+  1. Tokenizing the query (same stopword-filtered ASCII tokenizer as bm25-index.py)
+  2. Scoring each *.md page in wiki/ by the count of distinct query terms it contains
+     (case-insensitive substring on the full file body; no semantic matching)
+  3. Returning top-K pages by score, with ties broken by:
+     a. Presence in hot.md (boost +5)
+     b. Presence in index.md (boost +3)
+     c. Total raw term-occurrence count
+
+The simulation is intentionally simple — it represents what a human or a
+basic agent does when reading hot/index "by hand" without any retrieval
+infrastructure. Anything fancier would not be a fair v1.6 baseline.
+
+Usage:
+  baseline-v16.py "your query" [--top 5]
+  baseline-v16.py "query" --top 5 --json   # output as JSON (default: text)
+
+Exit codes:
+  0 — success
+  2 — usage error
+  3 — wiki directory missing
+"""
+
+import argparse
+import json
+import re
+import sys
+from collections import Counter
+from pathlib import Path
+
+VAULT_ROOT = Path(__file__).resolve().parent.parent
+WIKI_DIR = VAULT_ROOT / "wiki"
+HOT_PATH = WIKI_DIR / "hot.md"
+INDEX_PATH = WIKI_DIR / "index.md"
+
+# Mirror bm25-index.py's tokenizer + stopword list so comparisons are fair.
+STOPWORDS = frozenset("""
+a an and are as at be by for from has have he her him his i if in is it its
+of on or that the their them they this to was were will with you your
+""".split())
+
+# Mirrors bm25-index.py's Unicode-aware tokenizer (v1.7.2; closes M2).
+TOKEN_RE = re.compile(r"\w[\w'\-]*", re.UNICODE)
+
+HOT_BOOST = 5.0
+INDEX_BOOST = 3.0
+
+EXIT_OK = 0
+EXIT_USAGE = 2
+EXIT_NO_WIKI = 3
+
+
+def tokenize(text):
+    return [t.lower() for t in TOKEN_RE.findall(text)
+            if t.lower() not in STOPWORDS and len(t) > 1]
+
+
+def page_paths():
+    if not WIKI_DIR.is_dir():
+        print(f"ERR: no wiki directory at {WIKI_DIR}", file=sys.stderr)
+        sys.exit(EXIT_NO_WIKI)
+    return sorted(p for p in WIKI_DIR.rglob("*.md")
+                  if not any(part.startswith(".") for part in p.parts))
+
+
+def score_page(page_path, query_terms_set, query_terms_counter):
+    """Score by distinct-query-term-presence + boost if cited in hot/index.
+
+    Returns (score, distinct_matches, total_occurrences).
+    """
+    try:
+        body = page_path.read_text(encoding="utf-8", errors="replace").lower()
+    except OSError:
+        return (0.0, 0, 0)
+
+    distinct = sum(1 for term in query_terms_set if term in body)
+    total = sum(body.count(term) for term in query_terms_set)
+    score = float(distinct) + 0.01 * total  # distinct dominates; total is tiebreak
+
+    # Hot-cache boost: if the page is referenced by name in hot.md
+    if HOT_PATH.is_file():
+        try:
+            hot_body = HOT_PATH.read_text(encoding="utf-8", errors="replace")
+            page_stem = page_path.stem
+            if page_stem in hot_body or str(page_path.relative_to(VAULT_ROOT)) in hot_body:
+                score += HOT_BOOST
+        except OSError:
+            pass
+
+    # Index boost: page is cited in index.md
+    if INDEX_PATH.is_file():
+        try:
+            index_body = INDEX_PATH.read_text(encoding="utf-8", errors="replace")
+            page_stem = page_path.stem
+            if page_stem in index_body or str(page_path.relative_to(VAULT_ROOT)) in index_body:
+                score += INDEX_BOOST
+        except OSError:
+            pass
+
+    return (score, distinct, total)
+
+
+def baseline_query(query, top_k=5):
+    """Return list of {path, score, distinct, total} for top-K pages."""
+    terms = tokenize(query)
+    if not terms:
+        return []
+    terms_set = set(terms)
+    terms_counter = Counter(terms)
+
+    scored = []
+    for p in page_paths():
+        score, distinct, total = score_page(p, terms_set, terms_counter)
+        if score > 0:
+            scored.append({
+                "path": str(p.relative_to(VAULT_ROOT)),
+                "score": round(score, 4),
+                "distinct_terms": distinct,
+                "total_occurrences": total,
+            })
+
+    scored.sort(key=lambda d: d["score"], reverse=True)
+    return scored[:top_k]
+
+
+def main():
+    parser = argparse.ArgumentParser(description="v1.6 baseline retrieval simulator.")
+    parser.add_argument("query", help="Natural-language query")
+    parser.add_argument("--top", type=int, default=5, help="Top-K results")
+    parser.add_argument("--json", action="store_true", help="Output as JSON")
+    args = parser.parse_args()
+
+    results = baseline_query(args.query, top_k=args.top)
+
+    if args.json:
+        print(json.dumps({
+            "query": args.query,
+            "strategy": "baseline-v1.6:hot+index+keyword",
+            "top_k": args.top,
+            "candidates": results,
+        }, indent=2))
+    else:
+        if not results:
+            print("(no matches)")
+        else:
+            print(f"v1.6 baseline for: {args.query!r}")
+            for i, r in enumerate(results, 1):
+                print(f"  {i}. {r['path']}  score={r['score']}  distinct={r['distinct_terms']}  occ={r['total_occurrences']}")
+
+    return EXIT_OK
+
+
+if __name__ == "__main__":
+    sys.exit(main())
@@ -0,0 +1,215 @@
+#!/usr/bin/env python3
+"""benchmark-runner.py — score v1.7 hybrid retrieval vs v1.6 baseline.
+
+Reads the 50-query corpus at wiki/meta/retrieval-benchmark-v1.7.md, runs both
+pipelines for each query, scores top-1 / top-5 accuracy, prints a comparison
+table. Used by the v1.7.0 audit.
+
+Pure stdlib + subprocess. No network or LLM calls of its own — the subprocess
+calls to retrieve.py may hit ollama (if installed) for rerank. baseline-v16.py
+is pure filesystem.
+
+Usage:
+  benchmark-runner.py                 # run all 50 queries, print summary
+  benchmark-runner.py --json results.json  # also write per-query results
+  benchmark-runner.py --limit 5       # smoke: first 5 queries only
+"""
+
+import argparse
+import json
+import re
+import subprocess
+import sys
+from pathlib import Path
+
+VAULT_ROOT = Path(__file__).resolve().parent.parent
+CORPUS = VAULT_ROOT / "wiki" / "meta" / "retrieval-benchmark-v1.7.md"
+
+
+def parse_corpus(corpus_path):
+    """Parse the ### <id> blocks into a list of query dicts."""
+    text = corpus_path.read_text(encoding="utf-8")
+    # Split on "### " at line start
+    blocks = re.split(r"\n### ", text)
+    queries = []
+    for blk in blocks[1:]:  # skip prelude
+        # First line is the id (e.g. "D1\n")
+        lines = blk.split("\n", 1)
+        if len(lines) < 2:
+            continue
+        qid = lines[0].strip()
+        # Ignore non-ID lines (e.g. "Schema", "Scoring rules")
+        if not re.match(r"^[DH]\d+$", qid):
+            continue
+        body = lines[1]
+        # Stop at next "## " (next section header)
+        body = re.split(r"\n## ", body, 1)[0]
+        # Parse fields
+        def get(field):
+            m = re.search(rf"^- {field}:\s*(.+)$", body, re.MULTILINE)
+            return m.group(1).strip() if m else ""
+
+        def get_list(field):
+            raw = get(field)
+            if not raw or raw == "null":
+                return []
+            return [s.strip() for s in raw.split(",") if s.strip()]
+
+        queries.append({
+            "id": qid,
+            "query": get("query"),
+            "correct": get_list("correct"),
+            "relevant": get_list("relevant"),
+            "category": get("category"),
+            "rationale": get("rationale"),
+        })
+    return queries
+
+
+def run_v17(query, top_k=5):
+    """Returns ordered list of page_paths from v1.7 retrieve.py."""
+    try:
+        result = subprocess.run(
+            ["python3", str(VAULT_ROOT / "scripts" / "retrieve.py"),
+             query, "--top", str(top_k)],
+            capture_output=True, text=True, timeout=60, check=False,
+        )
+        if result.returncode != 0:
+            return [], f"rc={result.returncode}: {result.stderr.strip()[:200]}"
+        data = json.loads(result.stdout)
+        return [c["page_path"] for c in data.get("candidates", [])], None
+    except (subprocess.TimeoutExpired, json.JSONDecodeError, OSError) as e:
+        return [], str(e)
+
+
+def run_v16(query, top_k=5):
+    """Returns ordered list of page_paths from v1.6 baseline-v16.py."""
+    try:
+        result = subprocess.run(
+            ["python3", str(VAULT_ROOT / "scripts" / "baseline-v16.py"),
+             query, "--top", str(top_k), "--json"],
+            capture_output=True, text=True, timeout=30, check=False,
+        )
+        if result.returncode != 0:
+            return [], f"rc={result.returncode}"
+        data = json.loads(result.stdout)
+        return [c["path"] for c in data.get("candidates", [])], None
+    except (subprocess.TimeoutExpired, json.JSONDecodeError, OSError) as e:
+        return [], str(e)
+
+
+def score_query(results, correct, relevant, category):
+    """Returns (top1_success, top5_success) per the scoring rules."""
+    # Negative queries: correct is empty; success = no results OR result is in relevant
+    if category == "negative" or not correct:
+        if not results:
+            return (1, 1)  # no results = correctly "found nothing"
+        top1 = 1 if results[0] in relevant else 0
+        top5 = 1 if any(r in relevant for r in results[:5]) else 0
+        return (top1, top5)
+    # Normal queries: top-1 if first result in correct; top-5 if any in correct
+    top1 = 1 if results and results[0] in correct else 0
+    top5 = 1 if any(r in correct for r in results[:5]) else 0
+    return (top1, top5)
+
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--limit", type=int, default=None, help="Only run first N queries")
+    parser.add_argument("--json", help="Write per-query results to PATH")
+    parser.add_argument("--top", type=int, default=5)
+    args = parser.parse_args()
+
+    queries = parse_corpus(CORPUS)
+    if args.limit:
+        queries = queries[: args.limit]
+
+    print(f"Parsed {len(queries)} queries from {CORPUS.relative_to(VAULT_ROOT)}\n")
+
+    per_query = []
+    cat_stats = {}  # category -> {v17_top1, v17_top5, v16_top1, v16_top5, count}
+
+    for q in queries:
+        v17_results, v17_err = run_v17(q["query"], top_k=args.top)
+        v16_results, v16_err = run_v16(q["query"], top_k=args.top)
+        v17_top1, v17_top5 = score_query(v17_results, q["correct"], q["relevant"], q["category"])
+        v16_top1, v16_top5 = score_query(v16_results, q["correct"], q["relevant"], q["category"])
+
+        record = {
+            "id": q["id"],
+            "category": q["category"],
+            "query": q["query"][:80] + ("..." if len(q["query"]) > 80 else ""),
+            "correct": q["correct"],
+            "v17_top1": v17_top1,
+            "v17_top5": v17_top5,
+            "v17_results": v17_results[:args.top],
+            "v17_err": v17_err,
+            "v16_top1": v16_top1,
+            "v16_top5": v16_top5,
+            "v16_results": v16_results[:args.top],
+            "v16_err": v16_err,
+        }
+        per_query.append(record)
+
+        cat = q["category"]
+        if cat not in cat_stats:
+            cat_stats[cat] = {"v17_t1": 0, "v17_t5": 0, "v16_t1": 0, "v16_t5": 0, "n": 0}
+        cat_stats[cat]["v17_t1"] += v17_top1
+        cat_stats[cat]["v17_t5"] += v17_top5
+        cat_stats[cat]["v16_t1"] += v16_top1
+        cat_stats[cat]["v16_t5"] += v16_top5
+        cat_stats[cat]["n"] += 1
+
+        # Live progress
+        marker = "✓" if v17_top1 else "·"
+        v16marker = "✓" if v16_top1 else "·"
+        print(f"  {q['id']:4} [{q['category']:14}] v17:{marker} v16:{v16marker}  {q['query'][:60]}")
+
+    # Aggregate
+    total_v17_t1 = sum(c["v17_t1"] for c in cat_stats.values())
+    total_v17_t5 = sum(c["v17_t5"] for c in cat_stats.values())
+    total_v16_t1 = sum(c["v16_t1"] for c in cat_stats.values())
+    total_v16_t5 = sum(c["v16_t5"] for c in cat_stats.values())
+    total_n = sum(c["n"] for c in cat_stats.values())
+
+    def pct(x, n):
+        return f"{100.0 * x / n:5.1f}%" if n else "  n/a"
+
+    print()
+    print("=" * 80)
+    print(f"{'Category':<16} {'N':>4} {'v17 top-1':>10} {'v17 top-5':>10} {'v16 top-1':>10} {'v16 top-5':>10}  Δ top-1")
+    print("-" * 80)
+    for cat, c in sorted(cat_stats.items()):
+        delta = (c["v17_t1"] - c["v16_t1"]) / c["n"] * 100 if c["n"] else 0
+        print(f"{cat:<16} {c['n']:>4} {pct(c['v17_t1'], c['n']):>10} {pct(c['v17_t5'], c['n']):>10} {pct(c['v16_t1'], c['n']):>10} {pct(c['v16_t5'], c['n']):>10}  {delta:+6.1f}pp")
+    delta_total = (total_v17_t1 - total_v16_t1) / total_n * 100 if total_n else 0
+    print("-" * 80)
+    print(f"{'TOTAL':<16} {total_n:>4} {pct(total_v17_t1, total_n):>10} {pct(total_v17_t5, total_n):>10} {pct(total_v16_t1, total_n):>10} {pct(total_v16_t5, total_n):>10}  {delta_total:+6.1f}pp")
+    print()
+    print(f"Plan §7 ship-gate target: ≥30 percentage-point improvement in top-1")
+    print(f"Actual: {delta_total:+.1f}pp ({'PASS' if delta_total >= 30 else 'INFO'} — pp gain alone, not failure-reduction %)")
+    # Also compute as a relative reduction in "wrong page cited" errors
+    v17_wrong = total_n - total_v17_t1
+    v16_wrong = total_n - total_v16_t1
+    err_reduction = (v16_wrong - v17_wrong) / v16_wrong * 100 if v16_wrong else 0
+    print(f"Error-reduction (the gate's actual framing): {err_reduction:+.1f}% ({'PASS' if err_reduction >= 30 else 'FAIL'})")
+    print()
+
+    if args.json:
+        Path(args.json).write_text(json.dumps({
+            "summary": {
+                "v17_top1_pct": 100 * total_v17_t1 / total_n if total_n else 0,
+                "v17_top5_pct": 100 * total_v17_t5 / total_n if total_n else 0,
+                "v16_top1_pct": 100 * total_v16_t1 / total_n if total_n else 0,
+                "v16_top5_pct": 100 * total_v16_t5 / total_n if total_n else 0,
+                "delta_top1_pp": delta_total,
+                "error_reduction_pct": err_reduction,
+            },
+            "by_category": {cat: {**c, "v17_top1_pct": 100*c["v17_t1"]/c["n"], "v16_top1_pct": 100*c["v16_t1"]/c["n"]} for cat, c in cat_stats.items()},
+            "per_query": per_query,
+        }, indent=2, ensure_ascii=False), encoding="utf-8")
+        print(f"Wrote per-query results to {args.json}")
+
+
+if __name__ == "__main__":
+    main()
@@ -0,0 +1,293 @@
+#!/usr/bin/env python3
+"""bm25-index.py — sparse BM25 inverted index over contextualized wiki chunks.
+
+Pure stdlib (no rank_bm25 dep). Standard Okapi BM25 with k1=1.5, b=0.75.
+Indexes the `contextualized_text` field of every chunk under .vault-meta/chunks/,
+emits a single JSON file at .vault-meta/bm25/index.json with the schema below.
+
+Concurrency:
+- Locks .vault-meta/.bm25.lock (fcntl exclusive) around any index write.
+- Atomic .tmp + rename for the index file.
+
+Index schema (.vault-meta/bm25/index.json):
+{
+  "schema_version": 1,
+  "params": {"k1": 1.5, "b": 0.75},
+  "doc_count": 1234,
+  "avg_dl": 487.5,
+  "updated_at": "2026-05-17T...",
+  "vocab": {
+    "<term>": {"df": 17, "postings": [["c-000001:0", 3], ["c-000042:2", 1], ...]}
+  },
+  "docs": {
+    "<chunk_id>": {"path": ".vault-meta/chunks/c-000001/chunk-000.json", "dl": 487}
+  }
+}
+
+Chunk id format: "<page-address>:<chunk-index>" (e.g. "c-000042:3").
+
+Tokenization: lowercase, collapse whitespace, drop punctuation except in-word
+apostrophes and hyphens. ASCII-only stopwords filtered (small list; favors
+recall over precision).
+
+Query interface (used by retrieve.py at query time):
+  bm25-index.py query "your text here" [--top 20]
+
+Build interface:
+  bm25-index.py build               # full rebuild (always; incremental is v1.7.x scope)
+  bm25-index.py stats               # print index stats
+
+Exit codes:
+  0 — success
+  1 — lock acquisition failed
+  2 — usage error
+  3 — index file missing or corrupt (query mode)
+  4 — chunks directory missing
+"""
+
+import argparse
+import fcntl
+import json
+import math
+import os
+import re
+import sys
+from collections import Counter, defaultdict
+from datetime import datetime, timezone
+from pathlib import Path
+
+VAULT_ROOT = Path(__file__).resolve().parent.parent
+META_DIR = VAULT_ROOT / ".vault-meta"
+CHUNKS_DIR = META_DIR / "chunks"
+BM25_DIR = META_DIR / "bm25"
+INDEX_PATH = BM25_DIR / "index.json"
+LOCK_PATH = META_DIR / ".bm25.lock"
+
+K1 = 1.5
+B = 0.75
+
+# Small high-frequency-stopword list (English). Conservative — keep recall high.
+STOPWORDS = frozenset("""
+a an and are as at be by for from has have he her him his i if in is it its
+of on or that the their them they this to was were will with you your
+""".split())
+
+# Unicode-aware tokenizer (v1.7.2; closes audit M2). \w under re.UNICODE
+# matches letters and digits from any script (CJK, Cyrillic, accented Latin,
+# Devanagari, etc.) plus underscore. Internal apostrophes and hyphens are
+# preserved so "user's" and "well-formed" stay single tokens. Pure-symbol or
+# pure-emoji tokens fail the leading \w anchor and are correctly skipped.
+TOKEN_RE = re.compile(r"\w[\w'\-]*", re.UNICODE)
+
+EXIT_OK = 0
+EXIT_LOCK = 1
+EXIT_USAGE = 2
+EXIT_INDEX_MISSING = 3
+EXIT_NO_CHUNKS = 4
+
+
+def log(msg):
+    print(msg, file=sys.stderr)
+
+
+def tokenize(text):
+    """Lowercase, strip punctuation, drop stopwords. Returns a list of terms."""
+    return [t.lower() for t in TOKEN_RE.findall(text)
+            if t.lower() not in STOPWORDS and len(t) > 1]
+
+
+def acquire_lock():
+    META_DIR.mkdir(parents=True, exist_ok=True)
+    fd = os.open(str(LOCK_PATH), os.O_CREAT | os.O_WRONLY, 0o644)
+    try:
+        fcntl.flock(fd, fcntl.LOCK_EX | fcntl.LOCK_NB)
+    except OSError:
+        os.close(fd)
+        log("ERR: could not acquire bm25 lock")
+        sys.exit(EXIT_LOCK)
+    return fd
+
+
+def release_lock(fd):
+    try:
+        fcntl.flock(fd, fcntl.LOCK_UN)
+    finally:
+        os.close(fd)
+
+
+def discover_chunks():
+    """Yield (chunk_id, path, contextualized_text) for every chunk on disk.
+
+    The yielded `path` is relative to the directory two levels above CHUNKS_DIR
+    (i.e. .vault-meta/chunks/<addr>/ → relative to the vault root). This works
+    both in production (CHUNKS_DIR is `<vault>/.vault-meta/chunks`) and when
+    tests monkey-patch CHUNKS_DIR to a sandbox `<tmp>/.vault-meta/chunks`.
+    """
+    if not CHUNKS_DIR.is_dir():
+        log(f"ERR: no chunks directory at {CHUNKS_DIR}")
+        sys.exit(EXIT_NO_CHUNKS)
+    rel_root = CHUNKS_DIR.parent.parent
+    for chunk_file in sorted(CHUNKS_DIR.glob("*/chunk-*.json")):
+        try:
+            data = json.loads(chunk_file.read_text(encoding="utf-8"))
+        except (json.JSONDecodeError, OSError) as e:
+            log(f"  skip (unreadable): {chunk_file} — {e}")
+            continue
+        address = data.get("page_address")
+        idx = data.get("chunk_index")
+        text = data.get("contextualized_text") or data.get("raw_text", "")
+        if address is None or idx is None:
+            continue
+        chunk_id = f"{address}:{idx}"
+        rel_path = str(chunk_file.relative_to(rel_root))
+        yield chunk_id, rel_path, text
+
+
+def build_index():
+    docs = {}
+    df = Counter()
+    postings = defaultdict(list)
+
+    for chunk_id, rel_path, text in discover_chunks():
+        tokens = tokenize(text)
+        tf = Counter(tokens)
+        docs[chunk_id] = {"path": rel_path, "dl": len(tokens)}
+        for term, count in tf.items():
+            df[term] += 1
+            postings[term].append([chunk_id, count])
+
+    if not docs:
+        log("WARN: no chunks indexed")
+        return None
+
+    avg_dl = sum(d["dl"] for d in docs.values()) / len(docs)
+    vocab = {term: {"df": df[term], "postings": postings[term]}
+             for term in sorted(df.keys())}
+
+    return {
+        "schema_version": 1,
+        "params": {"k1": K1, "b": B},
+        "doc_count": len(docs),
+        "avg_dl": avg_dl,
+        "updated_at": datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ"),
+        "vocab": vocab,
+        "docs": docs,
+    }
+
+
+def write_index(index):
+    BM25_DIR.mkdir(parents=True, exist_ok=True)
+    tmp = INDEX_PATH.with_suffix(f".{os.getpid()}.tmp")
+    try:
+        tmp.write_text(json.dumps(index, ensure_ascii=False), encoding="utf-8")
+        os.replace(tmp, INDEX_PATH)
+    finally:
+        if tmp.exists():
+            tmp.unlink(missing_ok=True)
+
+
+def load_index():
+    if not INDEX_PATH.is_file():
+        log(f"ERR: no index at {INDEX_PATH}. Run `bm25-index.py build` first.")
+        sys.exit(EXIT_INDEX_MISSING)
+    try:
+        return json.loads(INDEX_PATH.read_text(encoding="utf-8"))
+    except (json.JSONDecodeError, OSError) as e:
+        log(f"ERR: index corrupt: {e}")
+        sys.exit(EXIT_INDEX_MISSING)
+
+
+def query(text, top_k=20):
+    idx = load_index()
+    vocab = idx["vocab"]
+    docs = idx["docs"]
+    params = idx["params"]
+    avg_dl = idx["avg_dl"]
+    N = idx["doc_count"]
+    k1 = params["k1"]
+    b = params["b"]
+
+    qterms = tokenize(text)
+    if not qterms:
+        return []
+
+    # Defensive guard (v1.7.2; closes audit L7): avg_dl can only be 0 if the
+    # vocab is also empty (all chunks have zero tokens), in which case the
+    # loop never enters this divide path. But future refactors could change
+    # that invariant; the `or 1.0` keeps it safe by construction.
+    avg_dl_safe = avg_dl or 1.0
+    scores = defaultdict(float)
+    for term in qterms:
+        v = vocab.get(term)
+        if not v:
+            continue
+        df = v["df"]
+        idf = math.log(1 + (N - df + 0.5) / (df + 0.5))
+        for cid, cnt in v["postings"]:
+            dl = docs[cid]["dl"]
+            denom = cnt + k1 * (1 - b + b * dl / avg_dl_safe)
+            scores[cid] += idf * (cnt * (k1 + 1)) / denom
+
+    ranked = sorted(scores.items(), key=lambda kv: kv[1], reverse=True)[:top_k]
+    return [
+        {
+            "chunk_id": cid,
+            "score": round(score, 6),
+            "path": docs[cid]["path"],
+        }
+        for cid, score in ranked
+    ]
+
+
+def stats():
+    idx = load_index()
+    print(json.dumps({
+        "doc_count": idx["doc_count"],
+        "avg_dl": round(idx["avg_dl"], 2),
+        "vocab_size": len(idx["vocab"]),
+        "updated_at": idx["updated_at"],
+        "params": idx["params"],
+    }, indent=2))
+
+
+def main():
+    parser = argparse.ArgumentParser(description="BM25 inverted index over wiki chunks.")
+    sub = parser.add_subparsers(dest="cmd", required=True)
+
+    sub.add_parser("build", help="Build the index (full rebuild every time in v1.7).")
+
+    sp_query = sub.add_parser("query", help="Query the index.")
+    sp_query.add_argument("text", help="Query text")
+    sp_query.add_argument("--top", type=int, default=20, help="Top-K results")
+
+    sub.add_parser("stats", help="Print index stats.")
+
+    args = parser.parse_args()
+
+    if args.cmd == "build":
+        fd = acquire_lock()
+        try:
+            index = build_index()
+            if index is None:
+                log("Nothing to index.")
+                return EXIT_OK
+            write_index(index)
+            log(f"Wrote {INDEX_PATH}  docs={index['doc_count']}  vocab={len(index['vocab'])}  avg_dl={index['avg_dl']:.1f}")
+        finally:
+            release_lock(fd)
+        return EXIT_OK
+
+    if args.cmd == "query":
+        results = query(args.text, top_k=args.top)
+        print(json.dumps(results, indent=2))
+        return EXIT_OK
+
+    if args.cmd == "stats":
+        stats()
+        return EXIT_OK
+
+    return EXIT_USAGE
+
+
+if __name__ == "__main__":
+    sys.exit(main())
@@ -0,0 +1,312 @@
+#!/usr/bin/env python3
+"""boundary-score.py — DragonScale Mechanism 4: boundary-first autoresearch scorer.
+
+Reads `wiki/**/*.md`, builds a wikilink graph, and emits per-page boundary
+scores to stdout (text) or as JSON for tooling.
+
+boundary_score(p) = (out_degree(p) - in_degree(p)) * recency_weight(p)
+
+- out_degree(p): count of distinct wikilinks in p that resolve to a
+  scoreable page (scoreable = non-meta, non-fold, non-excluded).
+- in_degree(p):  count of distinct scoreable pages that link to p.
+- recency_weight(p): exp(-days_since_updated / RECENCY_HALFLIFE_DAYS).
+  No floor; very old pages approach zero weight, which is the intended
+  semantic of "frontier" (recently-touched and outward-pointing).
+
+High score = the page points at many things, is pointed at by few, and
+has been touched recently. That is a vault frontier page. Low or
+negative score = hub / integrated page.
+
+Feature-gated opt-in: autoresearch only invokes this when DragonScale
+setup is detected. Safe to run standalone even without DragonScale set
+up (reads wiki/ only; never writes).
+
+This script is intentionally stdout-only. There is no `--report PATH`
+equivalent to `tiling-check.py --report` because the helper is small
+enough to pipe directly (`./scripts/boundary-score.py --json | jq ...`)
+and keeping it read-only removes a write-path attack surface.
+
+Usage:
+  boundary-score.py                         # top-10 frontier, text
+  boundary-score.py --top N                 # top N frontier
+  boundary-score.py --json                  # JSON output
+  boundary-score.py --page PATH             # score for a single page
+  boundary-score.py --include-score-zero    # include pages with score=0
+
+Exit codes:
+  0  success
+  2  usage error
+"""
+
+import argparse
+import json
+import math
+import re
+import sys
+from datetime import date, datetime, timezone
+from pathlib import Path
+
+VAULT_ROOT = Path(__file__).resolve().parent.parent
+WIKI_DIR = VAULT_ROOT / "wiki"
+
+EXCLUDE_TYPES = {"meta", "fold"}
+EXCLUDE_FILENAMES = {
+    "_index.md", "index.md", "log.md", "hot.md", "overview.md",
+    "dashboard.md", "Wiki Map.md", "getting-started.md",
+}
+EXCLUDE_PATH_PREFIXES = ("wiki/folds/", "wiki/meta/")
+
+RECENCY_HALFLIFE_DAYS = 30.0
+# No recency floor: a truly stale page should NOT dominate the frontier
+# ranking, even if its out-degree is high. The exponential decay takes
+# weight toward zero for year-old pages, which is the intended semantic
+# of "frontier" (recently-touched and outward-pointing).
+DEFAULT_TOP = 10
+MAX_BODY_BYTES = 256 * 1024
+# CommonMark-ish fence tracking: opening fence records (char, length);
+# a closing fence must use the SAME char with SAME-OR-LONGER run length.
+# Tilde fences (~~~) are supported alongside backtick fences (```). Indented
+# code blocks (4+ spaces) are NOT filtered; in Obsidian usage, indented
+# bullets commonly contain wikilinks and should count as edges.
+
+FRONTMATTER_RE = re.compile(r"^---\n(.*?)\n---\n", re.DOTALL)
+TYPE_RE = re.compile(r"^type:\s*(\S+)", re.MULTILINE)
+UPDATED_RE = re.compile(r"^updated:\s*([0-9]{4}-[0-9]{2}-[0-9]{2})", re.MULTILINE)
+CREATED_RE = re.compile(r"^created:\s*([0-9]{4}-[0-9]{2}-[0-9]{2})", re.MULTILINE)
+TITLE_RE = re.compile(r'^title:\s*"?([^"\n]+?)"?\s*$', re.MULTILINE)
+# Obsidian wikilinks: [[Target]] or [[Target|Alias]] or [[Target#Heading]]
+WIKILINK_RE = re.compile(r"\[\[([^\]|#]+)(?:#[^\]|]+)?(?:\|[^\]]+)?\]\]")
+
+EXIT_OK = 0
+EXIT_USAGE = 2
+
+
+def log(msg: str) -> None:
+    print(msg, file=sys.stderr)
+
+
+def parse_frontmatter(text: str) -> tuple[dict, str]:
+    m = FRONTMATTER_RE.match(text)
+    if not m:
+        return {}, text
+    fm_raw = m.group(1)
+    body = text[m.end():]
+    fm: dict = {}
+    for key, regex in (("type", TYPE_RE), ("updated", UPDATED_RE),
+                       ("created", CREATED_RE), ("title", TITLE_RE)):
+        tm = regex.search(fm_raw)
+        if tm:
+            fm[key] = tm.group(1).strip().strip('"').strip("'")
+    return fm, body
+
+
+def included(path: Path, fm: dict) -> bool:
+    if path.is_symlink():
+        return False
+    try:
+        resolved = path.resolve(strict=True)
+        resolved.relative_to(VAULT_ROOT.resolve())
+    except (OSError, ValueError):
+        return False
+    rel = path.relative_to(VAULT_ROOT).as_posix()
+    if path.name in EXCLUDE_FILENAMES:
+        return False
+    for prefix in EXCLUDE_PATH_PREFIXES:
+        if rel.startswith(prefix):
+            return False
+    if fm.get("type") in EXCLUDE_TYPES:
+        return False
+    return True
+
+
+def days_since(date_str: str | None) -> float:
+    """Return days since the given YYYY-MM-DD string, or a large sentinel if missing."""
+    if not date_str:
+        return 10_000.0
+    try:
+        d = date.fromisoformat(date_str)
+    except ValueError:
+        return 10_000.0
+    delta = (date.today() - d).days
+    return max(0.0, float(delta))
+
+
+def recency_weight(days: float,
+                   halflife: float = RECENCY_HALFLIFE_DAYS) -> float:
+    return math.exp(-days / halflife)
+
+
+_FENCE_RE = re.compile(r"^(\s*)(`{3,}|~{3,})")
+
+
+def extract_wikilinks(body: str) -> set[str]:
+    """Extract unique link targets (without alias or heading suffix) from the body.
+
+    Skips wikilinks inside fenced code blocks so documentation examples
+    (including in this repo's own skill files) do not pollute the graph.
+
+    Fence handling: backtick AND tilde fences, with length tracking per
+    CommonMark: the opening run sets (char, min_len); the closing line
+    must use the SAME char with a run of SAME-OR-LONGER length. Indented
+    code blocks (4+ spaces) are intentionally NOT filtered — indented
+    bullets in Obsidian often contain wikilinks.
+    """
+    cleaned: list[str] = []
+    fence_char: str | None = None
+    fence_len: int = 0
+    for line in body.splitlines():
+        m = _FENCE_RE.match(line)
+        if m:
+            char = m.group(2)[0]
+            length = len(m.group(2))
+            if fence_char is None:
+                fence_char = char
+                fence_len = length
+                continue
+            if char == fence_char and length >= fence_len:
+                fence_char = None
+                fence_len = 0
+                continue
+        if fence_char is not None:
+            continue
+        cleaned.append(line)
+    scan = "\n".join(cleaned)
+    results: set[str] = set()
+    for m in WIKILINK_RE.finditer(scan):
+        raw = m.group(1).strip()
+        # Folder-qualified links like [[notes/Foo]] resolve to Foo.md by stem.
+        # This matches Obsidian default behavior for unique filenames.
+        stem = raw.rsplit("/", 1)[-1]
+        if stem:
+            results.add(stem)
+    return results
+
+
+def collect_pages() -> dict[str, dict]:
+    """Scan wiki/, return {title_key: {path, title, body, fm}} for scoreable pages.
+
+    `title_key` is the filename stem, which is what Obsidian wikilinks resolve
+    to by default. Assumes filenames are unique across the vault (enforced by
+    wiki-lint naming convention).
+    """
+    pages: dict[str, dict] = {}
+    if not WIKI_DIR.is_dir():
+        return pages
+    for md in sorted(WIKI_DIR.rglob("*.md")):
+        try:
+            text = md.read_text(encoding="utf-8")
+        except (OSError, UnicodeDecodeError):
+            continue
+        if len(text.encode("utf-8")) > MAX_BODY_BYTES:
+            continue
+        fm, body = parse_frontmatter(text)
+        if not included(md, fm):
+            continue
+        title_key = md.stem  # Obsidian wikilinks are filename-based
+        pages[title_key] = {
+            "path": md.relative_to(VAULT_ROOT).as_posix(),
+            "title": fm.get("title", title_key),
+            "body": body,
+            "fm": fm,
+        }
+    return pages
+
+
+def build_graph(pages: dict[str, dict]) -> tuple[dict[str, set[str]], dict[str, set[str]]]:
+    """Return (out_edges, in_edges) where each maps title_key -> set(title_key).
+
+    Only edges whose target is a known scoreable page are counted. Self-loops
+    are ignored.
+    """
+    out_edges: dict[str, set[str]] = {k: set() for k in pages}
+    in_edges: dict[str, set[str]] = {k: set() for k in pages}
+    for src, entry in pages.items():
+        links = extract_wikilinks(entry["body"])
+        for target in links:
+            if target == src:
+                continue
+            if target in pages:
+                out_edges[src].add(target)
+                in_edges[target].add(src)
+    return out_edges, in_edges
+
+
+def score_page(title_key: str,
+               pages: dict[str, dict],
+               out_edges: dict[str, set[str]],
+               in_edges: dict[str, set[str]]) -> dict:
+    entry = pages[title_key]
+    fm = entry["fm"]
+    out_deg = len(out_edges.get(title_key, set()))
+    in_deg = len(in_edges.get(title_key, set()))
+    date_str = fm.get("updated") or fm.get("created")
+    days = days_since(date_str)
+    rw = recency_weight(days)
+    score = (out_deg - in_deg) * rw
+    return {
+        "title": entry["title"],
+        "title_key": title_key,
+        "path": entry["path"],
+        "out_degree": out_deg,
+        "in_degree": in_deg,
+        "age_days": days,
+        "recency_weight": round(rw, 4),
+        "score": round(score, 4),
+    }
+
+
+def run(top: int, want_json: bool, include_zero: bool, page_filter: str | None) -> int:
+    pages = collect_pages()
+    out_edges, in_edges = build_graph(pages)
+    scored = [score_page(k, pages, out_edges, in_edges) for k in pages]
+    if page_filter:
+        key = Path(page_filter).stem
+        matched = [s for s in scored if s["title_key"] == key or s["path"] == page_filter]
+        if not matched:
+            log(f"ERR: no scoreable page matches '{page_filter}'")
+            return EXIT_USAGE
+        scored = matched
+    else:
+        if not include_zero:
+            scored = [s for s in scored if s["score"] > 0.0]
+        scored.sort(key=lambda s: (-s["score"], s["title_key"]))
+        scored = scored[:top]
+
+    if want_json:
+        print(json.dumps({
+            "generated": datetime.now(timezone.utc).isoformat(timespec="seconds").replace("+00:00", "Z"),
+            "halflife_days": RECENCY_HALFLIFE_DAYS,
+            "page_count_scoreable": len(pages),
+            "results": scored,
+        }, indent=2))
+    else:
+        print("# Boundary Score Report")
+        print(f"scoreable pages: {len(pages)}; halflife: {RECENCY_HALFLIFE_DAYS} days")
+        if not scored:
+            print("\nNo positive-score frontier pages found.")
+        else:
+            print("")
+            print("| # | score | out | in | age_d | title | path |")
+            print("|---|---|---|---|---|---|---|")
+            for i, s in enumerate(scored, 1):
+                print(f"| {i} | {s['score']:.3f} | {s['out_degree']} | {s['in_degree']} | "
+                      f"{int(s['age_days'])} | {s['title']} | {s['path']} |")
+    return EXIT_OK
+
+
+def main(argv: list[str]) -> int:
+    p = argparse.ArgumentParser()
+    p.add_argument("--top", type=int, default=DEFAULT_TOP)
+    p.add_argument("--json", action="store_true")
+    p.add_argument("--include-score-zero", action="store_true",
+                   help="Include pages whose score is zero or negative in the output")
+    p.add_argument("--page", default=None, help="Score a single page by path or stem")
+    args = p.parse_args(argv)
+    if args.top < 1:
+        log("ERR: --top must be >= 1")
+        return EXIT_USAGE
+    return run(args.top, args.json, args.include_score_zero, args.page)
+
+
+if __name__ == "__main__":
+    sys.exit(main(sys.argv[1:]))
@@ -0,0 +1,505 @@
+#!/usr/bin/env python3
+"""contextual-prefix.py — chunk wiki pages and generate per-chunk contextual prefixes.
+
+Implements the ingest-side of Anthropic's Sept 2024 Contextual Retrieval pattern
+(https://www.anthropic.com/news/contextual-retrieval). For each chunk of a wiki
+page, generates a 1-2 sentence prefix situating the chunk in its source. The
+prefixed text is what gets BM25-indexed and embedded, materially improving
+retrieval accuracy (Anthropic measured 35-49% failure reduction).
+
+Three-tier prefix generation (chosen per-run automatically):
+  1. If ANTHROPIC_API_KEY is set      → direct Anthropic API call (Haiku 4.5)
+                                         with prompt caching on the page body
+                                         (only when the body clears the ~16 KB
+                                         Haiku 4.5 cache floor; see
+                                         cache_control_for()).
+                                         ~$12 / 1000 docs per Anthropic figures.
+                                         REQUIRES --allow-egress (sends bodies off-machine).
+  2. Elif `claude` binary on PATH     → `claude -p` subprocess (uses CC subscription;
+                                         no API key needed; slower per call).
+                                         REQUIRES --allow-egress (subprocess egresses).
+  3. Else (default)                   → synthetic prefix from page frontmatter +
+                                         first paragraph (zero-cost floor; loses
+                                         most of the contextual benefit but BM25
+                                         and vector channels still work).
+
+Data-egress posture (v1.7.1+):
+  Tiers 1 and 2 send wiki page bodies off-machine. Both are GATED behind
+  --allow-egress (default off). Without the flag, pick_prefix_tier() always
+  returns "synthetic" regardless of env vars or claude binary presence.
+  Mirror of scripts/tiling-check.py:351 --allow-remote-ollama precedent.
+
+Chunk schema written to .vault-meta/chunks/<page-address>/chunk-NNN.json:
+{
+  "schema_version": 1,
+  "page_path": "wiki/concepts/Foo.md",
+  "page_address": "c-000042",
+  "chunk_index": 3,
+  "raw_text": "...",
+  "contextualized_text": "<prefix> <raw_text>",
+  "prefix_source": "anthropic-api" | "claude-cli" | "synthetic" | "skipped",
+  "char_count": 487,
+  "body_hash": "sha256:...",     # of raw_text
+  "page_body_hash": "sha256:...", # of the WHOLE source page (for invalidation)
+  "created_at": "2026-05-17T..."
+}
+
+Pages without an `address:` frontmatter field are still chunked (using a
+synthetic address derived from the path slug) so this tool works on v1.6 vaults
+without DragonScale Mechanism 2 enabled.
+
+Usage:
+  contextual-prefix.py PATH               # process a single page
+  contextual-prefix.py --all              # process every wiki/*.md
+  contextual-prefix.py PATH --no-llm      # force synthetic-prefix tier 3
+  contextual-prefix.py PATH --rebuild     # ignore existing chunks
+  contextual-prefix.py PATH --peek        # print what would happen; write nothing
+
+Exit codes:
+  0 — success
+  2 — usage error
+  3 — page file missing or unreadable
+  4 — chunk dir creation failed
+"""
+
+import argparse
+import hashlib
+import json
+import os
+import re
+import shutil
+import subprocess
+import sys
+import urllib.error
+import urllib.request
+from datetime import datetime, timezone
+from pathlib import Path
+
+VAULT_ROOT = Path(__file__).resolve().parent.parent
+WIKI_DIR = VAULT_ROOT / "wiki"
+META_DIR = VAULT_ROOT / ".vault-meta"
+CHUNKS_DIR = META_DIR / "chunks"
+
+CHUNK_TARGET_TOKENS = 500  # rough; we approximate via chars/4
+CHUNK_TARGET_CHARS = CHUNK_TARGET_TOKENS * 4
+CHUNK_OVERLAP_CHARS = 200
+
+ANTHROPIC_MODEL = "claude-haiku-4-5-20251001"
+ANTHROPIC_API_URL = "https://api.anthropic.com/v1/messages"
+ANTHROPIC_TIMEOUT_SEC = 30
+CLAUDE_CLI_TIMEOUT_SEC = 60
+
+# Anthropic prompt caching ignores any cached prefix below the model's minimum
+# cacheable size — 4,096 tokens for Haiku 4.5 (verified against the prompt-caching
+# docs, 2026-05). At ~4 chars/token that is ~16 KB. We attach cache_control only
+# when the body clears this floor so the marker reflects reality: below the floor
+# the API treats it as a silent no-op. The per-call cache telemetry in
+# anthropic_api_prefix() is what actually measures hit rate. The check counts the
+# body only — a deliberately conservative ~370-char underestimate that ignores the
+# system_msg + <page> wrapper also inside the cached prefix — so near the boundary
+# it errs toward not-marking, never toward a wrongly-attached marker.
+HAIKU_CACHE_MIN_CHARS = 16384  # 4096 tokens * 4 chars/token
+
+EXIT_OK = 0
+EXIT_USAGE = 2
+EXIT_PAGE_MISSING = 3
+EXIT_CHUNK_DIR = 4
+
+FRONTMATTER_RE = re.compile(r"^---\n(.*?)\n---\n", re.DOTALL)
+ADDRESS_RE = re.compile(r"^address:\s*(c-\d{6})\s*$", re.MULTILINE)
+TITLE_RE = re.compile(r"^title:\s*['\"]?(.+?)['\"]?\s*$", re.MULTILINE)
+
+
+def log(msg):
+    print(msg, file=sys.stderr)
+
+
+def sha256(text):
+    return "sha256:" + hashlib.sha256(text.encode("utf-8")).hexdigest()
+
+
+def read_page(path):
+    if not path.is_file():
+        raise SystemExit(EXIT_PAGE_MISSING)
+    return path.read_text(encoding="utf-8", errors="replace")
+
+
+def parse_frontmatter(body):
+    m = FRONTMATTER_RE.match(body)
+    if not m:
+        return {}, body
+    fm_text = m.group(1)
+    rest = body[m.end():]
+    addr_m = ADDRESS_RE.search(fm_text)
+    title_m = TITLE_RE.search(fm_text)
+    return {
+        "address": addr_m.group(1) if addr_m else None,
+        "title": title_m.group(1) if title_m else None,
+        "raw": fm_text,
+    }, rest
+
+
+def derive_synthetic_address(page_path):
+    """Stable per-path address-shaped string when no real address is set.
+    Format: c-NNNNNN derived from a hash of the relative path (deterministic).
+    Distinct from allocator addresses; used only for chunk filing.
+    """
+    rel = page_path.relative_to(VAULT_ROOT)
+    h = hashlib.sha1(str(rel).encode("utf-8")).hexdigest()
+    return "syn-" + h[:6]
+
+
+def chunk_body(body, target_chars=CHUNK_TARGET_CHARS, overlap=CHUNK_OVERLAP_CHARS):
+    """Split body into overlapping chunks on paragraph boundaries when possible.
+    Heuristic: walk the body, accumulate paragraphs until len exceeds target,
+    flush, then keep the trailing `overlap` chars as the seed of the next chunk.
+    Empty paragraphs collapse to single boundaries.
+    """
+    paragraphs = [p.strip() for p in re.split(r"\n\s*\n", body) if p.strip()]
+    chunks = []
+    cur = []
+    cur_len = 0
+    for p in paragraphs:
+        cur.append(p)
+        cur_len += len(p) + 2
+        if cur_len >= target_chars:
+            chunk_text = "\n\n".join(cur)
+            chunks.append(chunk_text)
+            # seed next chunk with the tail
+            tail = chunk_text[-overlap:] if overlap > 0 else ""
+            cur = [tail] if tail else []
+            cur_len = len(tail)
+    if cur and "".join(cur).strip():
+        chunks.append("\n\n".join(cur))
+    if not chunks and body.strip():
+        # tiny page — single chunk
+        chunks = [body.strip()]
+    return chunks
+
+
+def synthetic_prefix(fm, body, chunk_text):
+    """Tier-3 prefix: page title + first sentence of the page body.
+    Free, hermetic, deterministic. Provides modest BM25 lift via title-word
+    re-injection into the chunk corpus.
+    """
+    title = (fm.get("title") or "").strip() or "(untitled)"
+    # First sentence of the body (not the chunk — gives the chunk a page-level frame)
+    first_sentence = re.split(r"(?<=[.!?])\s+", body.strip(), maxsplit=1)
+    first = first_sentence[0][:300] if first_sentence else ""
+    return f"This passage is from the wiki page \"{title}\". The page opens: {first}"
+
+
+def cache_control_for(page_body):
+    """Ephemeral cache_control dict when the page body clears the Haiku cache
+    floor, else None. Pure function so the floor decision is unit-testable
+    without the network (the API call itself stays egress-gated).
+    """
+    if len(page_body) >= HAIKU_CACHE_MIN_CHARS:
+        return {"type": "ephemeral"}
+    return None
+
+
+def anthropic_api_prefix(api_key, page_title, page_body, chunk_text):
+    """Tier-1 prefix: direct Anthropic API call, Haiku, prompt-cached page body.
+
+    The page body is the stable prefix shared by every chunk of a page, so it
+    goes in `system` behind a cache breakpoint and the variable chunk goes in
+    `messages`. Cache reads only land because chunks are processed sequentially
+    (chunk 0 warms the prefix) — see the loop note in process_page().
+    """
+    system_msg = (
+        "You are a retrieval-augmentation assistant. Given a wiki page and one "
+        "chunk extracted from it, write a single short sentence (under 35 words) "
+        "that situates the chunk within the page's scope and topic. Output only "
+        "the sentence — no prefix, no quotation marks, no commentary."
+    )
+    page_block = {
+        "type": "text",
+        "text": f"<page title=\"{page_title}\">\n{page_body}\n</page>",
+    }
+    cc = cache_control_for(page_body)
+    if cc:
+        page_block["cache_control"] = cc
+    payload = {
+        "model": ANTHROPIC_MODEL,
+        "max_tokens": 100,
+        "system": [
+            {"type": "text", "text": system_msg},
+            page_block,
+        ],
+        "messages": [
+            {
+                "role": "user",
+                "content": (
+                    "Write the single contextualizing sentence for this chunk:\n\n"
+                    f"<chunk>\n{chunk_text}\n</chunk>"
+                ),
+            }
+        ],
+    }
+    body = json.dumps(payload).encode("utf-8")
+    req = urllib.request.Request(
+        ANTHROPIC_API_URL,
+        data=body,
+        headers={
+            "Content-Type": "application/json",
+            "x-api-key": api_key,
+            "anthropic-version": "2023-06-01",
+        },
+        method="POST",
+    )
+    try:
+        with urllib.request.urlopen(req, timeout=ANTHROPIC_TIMEOUT_SEC) as resp:
+            data = json.loads(resp.read().decode("utf-8"))
+            # Cache telemetry: integer token counts only, never page content, so
+            # the data-egress posture holds. Confirms whether the body cache is
+            # actually firing given the Haiku floor (wrote>0 on chunk 0, read>0
+            # on later chunks of the same page).
+            usage = data.get("usage", {})
+            log(f"  cache: wrote={usage.get('cache_creation_input_tokens', 0)} "
+                f"read={usage.get('cache_read_input_tokens', 0)} tok")
+            for block in data.get("content", []):
+                if block.get("type") == "text":
+                    return block["text"].strip().splitlines()[0]
+    except (urllib.error.URLError, json.JSONDecodeError, KeyError) as e:
+        log(f"  anthropic-api call failed: {e}")
+        return None
+    return None
+
+
+def claude_cli_prefix(page_title, page_body, chunk_text):
+    """Tier-2 prefix: `claude -p` subprocess (uses CC subscription, no API key)."""
+    prompt = (
+        f"Wiki page \"{page_title}\":\n\n"
+        f"---\n{page_body[:4000]}\n---\n\n"
+        f"Chunk:\n<chunk>\n{chunk_text}\n</chunk>\n\n"
+        "Write one short sentence (under 35 words) situating this chunk within "
+        "the page's scope. Output only the sentence."
+    )
+    try:
+        result = subprocess.run(
+            ["claude", "-p", prompt],
+            capture_output=True,
+            text=True,
+            timeout=CLAUDE_CLI_TIMEOUT_SEC,
+            check=False,
+        )
+        if result.returncode == 0 and result.stdout.strip():
+            return result.stdout.strip().splitlines()[0]
+        log(f"  claude-cli rc={result.returncode}: {result.stderr.strip()[:200]}")
+    except (subprocess.TimeoutExpired, FileNotFoundError) as e:
+        log(f"  claude-cli call failed: {e}")
+    return None
+
+
+def pick_prefix_tier(force_synthetic, allow_egress=False):
+    """Choose the contextual-prefix generation tier.
+
+    Without allow_egress=True, ALWAYS returns "synthetic" regardless of
+    env vars or claude binary availability. This is the v1.7.1 data-egress
+    guard: tiers 1 (Anthropic API) and 2 (claude CLI subprocess) both send
+    wiki page bodies off-machine, so they require explicit user consent via
+    the --allow-egress flag at the CLI layer.
+
+    Mirrors scripts/tiling-check.py:351 --allow-remote-ollama default-deny.
+    """
+    if force_synthetic or not allow_egress:
+        return "synthetic"
+    if os.environ.get("ANTHROPIC_API_KEY"):
+        return "anthropic-api"
+    if shutil.which("claude"):
+        return "claude-cli"
+    return "synthetic"
+
+
+def generate_prefix(tier, fm, body, chunk_text):
+    """Asymmetric fallback by design:
+      - tier="anthropic-api" → on failure, try claude-cli (subprocess,
+        free) before synthetic. The API is the user's stated preference,
+        and claude-cli is the closer-in-quality fallback.
+      - tier="claude-cli"    → on failure, go straight to synthetic. The
+        user has either no API key or has not opted into one; climbing
+        back to the API would silently spend money they did not authorize.
+      - tier="synthetic"     → always synthetic.
+    """
+    title = fm.get("title") or "(untitled)"
+    if tier == "anthropic-api":
+        result = anthropic_api_prefix(
+            os.environ["ANTHROPIC_API_KEY"], title, body, chunk_text
+        )
+        if result:
+            return result, "anthropic-api"
+        if shutil.which("claude"):
+            result = claude_cli_prefix(title, body, chunk_text)
+            if result:
+                return result, "claude-cli"
+        return synthetic_prefix(fm, body, chunk_text), "synthetic"
+    if tier == "claude-cli":
+        result = claude_cli_prefix(title, body, chunk_text)
+        if result:
+            return result, "claude-cli"
+        return synthetic_prefix(fm, body, chunk_text), "synthetic"
+    return synthetic_prefix(fm, body, chunk_text), "synthetic"
+
+
+def process_page(page_path, force_synthetic=False, rebuild=False, peek=False,
+                 allow_egress=False, progress_label=""):
+    body = read_page(page_path)
+    fm, content = parse_frontmatter(body)
+    address = fm.get("address") or derive_synthetic_address(page_path)
+    page_body_hash = sha256(body)
+
+    chunk_dir = CHUNKS_DIR / address
+    if not peek:
+        try:
+            chunk_dir.mkdir(parents=True, exist_ok=True)
+        except OSError as e:
+            log(f"ERR: cannot create chunk dir {chunk_dir}: {e}")
+            raise SystemExit(EXIT_CHUNK_DIR)
+
+    chunks = chunk_body(content)
+    tier = pick_prefix_tier(force_synthetic, allow_egress=allow_egress)
+
+    progress = (progress_label + " ") if progress_label else ""
+    if not chunks:
+        # v1.7.2 / closes audit M6: previously this logged "chunks=0" with no
+        # explanation and silently produced no index entries. Now: explicit WARN
+        # so the user notices empty-body pages (often frontmatter-only stubs).
+        log(f"{progress}WARN: {page_path.relative_to(VAULT_ROOT)} has no chunkable body content "
+            f"(empty after frontmatter strip). Skipping; no chunks written.")
+        return {"address": address, "written": [], "skipped": 0, "tier": tier}
+
+    log(f"{progress}-> {page_path.relative_to(VAULT_ROOT)}  address={address}  chunks={len(chunks)}  tier={tier}")
+
+    written = []
+    skipped = 0
+    # Keep this loop sequential. The tier-1 Anthropic path caches the page body;
+    # a cache entry is only readable after the first response begins (Anthropic
+    # prompt-caching concurrency rule), so chunk 0 warms the prefix and chunks
+    # 1..N read it. Parallelizing here would silently zero every cache read.
+    for idx, raw in enumerate(chunks):
+        chunk_path = chunk_dir / f"chunk-{idx:03d}.json"
+        body_hash = sha256(raw)
+
+        if chunk_path.exists() and not rebuild:
+            try:
+                existing = json.loads(chunk_path.read_text(encoding="utf-8"))
+                if existing.get("body_hash") == body_hash and \
+                   existing.get("page_body_hash") == page_body_hash:
+                    skipped += 1
+                    continue
+            except (json.JSONDecodeError, OSError):
+                pass  # corrupted; overwrite
+
+        if peek:
+            log(f"   would write {chunk_path.name} ({len(raw)} chars)")
+            continue
+
+        prefix, prefix_source = generate_prefix(tier, fm, content, raw)
+        contextualized = f"{prefix}\n\n{raw}" if prefix else raw
+
+        record = {
+            "schema_version": 1,
+            "page_path": str(page_path.relative_to(VAULT_ROOT)),
+            "page_address": address,
+            "chunk_index": idx,
+            "raw_text": raw,
+            "contextualized_text": contextualized,
+            "prefix": prefix or "",
+            "prefix_source": prefix_source,
+            "char_count": len(raw),
+            "body_hash": body_hash,
+            "page_body_hash": page_body_hash,
+            "created_at": datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ"),
+        }
+        tmp = chunk_path.with_suffix(f".{os.getpid()}.tmp")
+        try:
+            tmp.write_text(json.dumps(record, ensure_ascii=False, indent=2), encoding="utf-8")
+            os.replace(tmp, chunk_path)
+        finally:
+            if tmp.exists():
+                tmp.unlink(missing_ok=True)
+        written.append(chunk_path.name)
+
+    log(f"   wrote={len(written)}  skipped(unchanged)={skipped}")
+    return {"address": address, "written": written, "skipped": skipped, "tier": tier}
+
+
+def collect_pages(target):
+    if target == "--all" or target is None:
+        return sorted(p for p in WIKI_DIR.rglob("*.md")
+                      if not any(part.startswith(".") for part in p.parts))
+    p = Path(target)
+    if not p.is_absolute():
+        p = VAULT_ROOT / p
+    return [p]
+
+
+def main():
+    parser = argparse.ArgumentParser(description="Chunk + contextualize wiki pages.")
+    parser.add_argument("path", nargs="?",
+                        help="Page path relative to vault root. Omit (or pass --all) "
+                             "to process every wiki page.")
+    parser.add_argument("--all", action="store_true",
+                        help="Process every wiki page (equivalent to omitting path).")
+    parser.add_argument("--no-llm", action="store_true",
+                        help="Force tier-3 synthetic prefix (skip LLM calls).")
+    parser.add_argument("--allow-egress", action="store_true",
+                        help="Allow tier-1 (Anthropic API) or tier-2 (claude CLI "
+                             "subprocess) prefix generation. Without this flag, page "
+                             "bodies stay on-machine and only the tier-3 synthetic "
+                             "prefix is used. Mirror of tiling-check.py's "
+                             "--allow-remote-ollama guard.")
+    parser.add_argument("--rebuild", action="store_true",
+                        help="Re-process chunks even if body_hash matches.")
+    parser.add_argument("--peek", action="store_true",
+                        help="Print plan, write nothing.")
+    args = parser.parse_args()
+
+    if args.all and not args.path:
+        args.path = "--all"
+    elif not args.path:
+        # No path and no --all: default to all (matches the help text)
+        args.path = "--all"
+
+    pages = collect_pages(args.path)
+    # Explicit single-path invocations must point at a readable file inside the
+    # vault. --all only ever yields in-vault files, so this guard is explicit-only.
+    # Without it a typo'd path exited 0 silently, and an out-of-vault path raised
+    # a raw ValueError from relative_to().
+    if args.path != "--all":
+        target = pages[0].resolve()
+        if not target.is_relative_to(VAULT_ROOT):
+            log(f"ERR: {args.path} resolves outside the vault ({VAULT_ROOT}).")
+            return EXIT_USAGE
+        if not target.is_file():
+            log(f"ERR: {args.path} is not a readable file.")
+            return EXIT_PAGE_MISSING
+    # Filter to actual files up front so progress counter is meaningful
+    # (v1.7.2; closes audit L2: tier-2 over 47 pages can take 5+ min — the
+    # user needs a count, not just per-page log lines).
+    files = [p for p in pages if p.is_file()]
+    skipped_non_files = len(pages) - len(files)
+    if skipped_non_files:
+        log(f"({skipped_non_files} non-file paths skipped)")
+    total = len(files)
+    total_written = 0
+    total_skipped = 0
+    for i, page in enumerate(files, 1):
+        result = process_page(
+            page,
+            force_synthetic=args.no_llm,
+            rebuild=args.rebuild,
+            peek=args.peek,
+            allow_egress=args.allow_egress,
+            progress_label=f"[{i}/{total}]",
+        )
+        total_written += len(result["written"])
+        total_skipped += result["skipped"]
+
+    log(f"\nDone. pages={total}  chunks_written={total_written}  chunks_unchanged={total_skipped}")
+    return EXIT_OK
+
+
+if __name__ == "__main__":
+    sys.exit(main())
@@ -0,0 +1,229 @@
+#!/usr/bin/env bash
+# detect-transport.sh — discover which vault-mutation transports are available
+# on this machine, write a normalized JSON snapshot to .vault-meta/transport.json,
+# and pick a preferred transport per the v1.7 fallback chain.
+#
+# Fallback chain (highest to lowest precedence):
+#   1. cli         — Obsidian CLI binary (Obsidian 1.12+). No MCP server, no TLS, no plugin.
+#   2. mcp-obsidian — REST-API-backed MCP server (Local REST API plugin required).
+#   3. mcpvault    — Filesystem-backed MCP server (BM25 search; no Obsidian plugin).
+#   4. filesystem  — Direct Read/Write/Edit tools. Always available (ultimate floor).
+#
+# MCP auto-detection is deferred to a v1.7.x patch (calling `claude mcp list` from
+# inside a running claude session has reentrancy concerns). For v1.7, we detect
+# CLI + filesystem and leave MCP fields as `{"present": null, "detection": "deferred"}`.
+# Users with MCP transports configured can either edit transport.json manually or
+# follow the legacy guidance in wiki/references/mcp-setup.md.
+#
+# Usage:
+#   ./scripts/detect-transport.sh             # detect and write .vault-meta/transport.json
+#   ./scripts/detect-transport.sh --peek      # print result to stdout without writing
+#   ./scripts/detect-transport.sh --force     # refresh even if existing snapshot is fresh (<7d)
+#   ./scripts/detect-transport.sh --quiet     # suppress informational stderr output
+#
+# Exit codes:
+#   0 — success (transport.json written or peeked)
+#   2 — vault-meta/ missing and cannot be created
+#   3 — unrecognized flag
+
+set -euo pipefail
+
+VAULT_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
+META_DIR="${VAULT_ROOT}/.vault-meta"
+OUTPUT_FILE="${META_DIR}/transport.json"
+STALE_AFTER_DAYS=7
+
+MODE="write"
+QUIET=false
+
+while [ $# -gt 0 ]; do
+  case "$1" in
+    --peek)  MODE="peek" ;;
+    --force) MODE="force" ;;
+    --quiet) QUIET=true ;;
+    -h|--help)
+      sed -n '2,28p' "$0" | sed 's/^# \{0,1\}//'
+      exit 0
+      ;;
+    *)
+      echo "ERR: unknown flag: $1" >&2
+      exit 3
+      ;;
+  esac
+  shift
+done
+
+log() { $QUIET || echo "$@" >&2; }
+
+# json_escape: read stdin and emit a JSON-encoded string (including the
+# surrounding double quotes). Used for any untrusted value that lands in the
+# transport.json heredoc — newlines, backslashes, control chars in upstream
+# binaries (obsidian-cli --version) would otherwise break the JSON.
+json_escape() {
+  python3 -c 'import json,sys; print(json.dumps(sys.stdin.read().strip()), end="")'
+}
+
+mkdir -p "$META_DIR" || {
+  echo "ERR: cannot create .vault-meta/ at $META_DIR" >&2
+  exit 2
+}
+
+# ── 0. Honor manual_override from existing transport.json ────────────────────
+# Users can pin a non-detected transport (mcp-obsidian, mcpvault, or any custom
+# value) by editing transport.json to set:
+#     "manual_override": true
+#     "preferred": "<their-choice>"
+#     "fallback_chain": [...]
+# Auto-detection still runs (to refresh CLI/Obsidian-running flags for visibility),
+# but PREFERRED and CHAIN are preserved from the existing file across both the
+# normal write path AND --force runs. Documented at
+# wiki/references/transport-fallback.md §Manual override.
+MANUAL_OVERRIDE_FLAG=false
+MANUAL_OVERRIDE_PREFERRED=""
+MANUAL_OVERRIDE_CHAIN=""
+if [ -f "$OUTPUT_FILE" ]; then
+  MANUAL_PARSE="$(python3 - "$OUTPUT_FILE" 2>/dev/null <<'PYEOF'
+import json, sys
+try:
+    with open(sys.argv[1]) as fh:
+        data = json.load(fh)
+    if data.get("manual_override") is True:
+        pref = data.get("preferred", "")
+        chain = data.get("fallback_chain", [])
+        # Output: line 1 = preferred; line 2 = comma-separated quoted chain entries.
+        print(pref)
+        print(",".join('"' + str(c) + '"' for c in chain))
+except Exception:
+    pass
+PYEOF
+)" || MANUAL_PARSE=""
+  if [ -n "${MANUAL_PARSE:-}" ]; then
+    MANUAL_OVERRIDE_FLAG=true
+    MANUAL_OVERRIDE_PREFERRED="$(printf '%s\n' "$MANUAL_PARSE" | sed -n '1p')"
+    MANUAL_OVERRIDE_CHAIN="$(printf '%s\n' "$MANUAL_PARSE" | sed -n '2p')"
+    log "manual_override=true; preserving preferred=${MANUAL_OVERRIDE_PREFERRED}"
+  fi
+fi
+
+# ── Freshness check: skip detection if snapshot is recent ────────────────────
+if [ "$MODE" = "write" ] && [ -f "$OUTPUT_FILE" ]; then
+  if find "$OUTPUT_FILE" -mtime -${STALE_AFTER_DAYS} -print 2>/dev/null | grep -q .; then
+    log "transport.json is fresh (<${STALE_AFTER_DAYS}d). Use --force to refresh."
+    cat "$OUTPUT_FILE"
+    exit 0
+  fi
+fi
+
+# ── 1. CLI detection ─────────────────────────────────────────────────────────
+CLI_PRESENT=false
+CLI_BINARY=""
+CLI_VERSION=""
+CLI_VERSION_RAW=""
+if command -v obsidian-cli >/dev/null 2>&1; then
+  CLI_PRESENT=true
+  CLI_BINARY="obsidian-cli"
+  # Keep two views of the version: RAW for the human log line, JSON-escaped
+  # for the transport.json heredoc. CLI_VERSION below is pre-quoted (includes
+  # the surrounding double quotes), so the heredoc emits ${CLI_VERSION}
+  # without wrapping quotes.
+  CLI_VERSION_RAW="$(obsidian-cli --version 2>/dev/null | head -1 || echo unknown)"
+  CLI_VERSION="$(printf '%s' "$CLI_VERSION_RAW" | json_escape || echo '"unknown"')"
+elif command -v obsidian >/dev/null 2>&1; then
+  # Obsidian 1.12+ ships `obsidian` as the CLI binary on some platforms.
+  # We treat it as cli-capable if it accepts a --cli or --version flag without launching the GUI.
+  if obsidian --version >/dev/null 2>&1; then
+    CLI_PRESENT=true
+    CLI_BINARY="obsidian"
+    CLI_VERSION_RAW="$(obsidian --version 2>/dev/null | head -1 || echo unknown)"
+    CLI_VERSION="$(printf '%s' "$CLI_VERSION_RAW" | json_escape || echo '"unknown"')"
+  fi
+fi
+# Fallback default when neither binary was found: must still be a valid JSON literal.
+if [ -z "$CLI_VERSION" ]; then
+  CLI_VERSION='""'
+  CLI_VERSION_RAW=""
+fi
+
+# ── 2. Obsidian app running? (informational only; CLI works either way) ──────
+OBSIDIAN_RUNNING=false
+if command -v pgrep >/dev/null 2>&1; then
+  if pgrep -if 'obsidian' >/dev/null 2>&1; then
+    OBSIDIAN_RUNNING=true
+  fi
+fi
+
+# ── 3. Compute preferred + fallback chain ────────────────────────────────────
+if $CLI_PRESENT; then
+  PREFERRED="cli"
+  CHAIN='"cli", "filesystem"'
+else
+  PREFERRED="filesystem"
+  CHAIN='"filesystem"'
+fi
+
+# ── 3b. Apply manual_override if it was parsed from the existing snapshot ────
+# Auto-detected PREFERRED/CHAIN above are overridden so the user's pinned
+# transport survives every refresh cycle including --force.
+if $MANUAL_OVERRIDE_FLAG; then
+  PREFERRED="$MANUAL_OVERRIDE_PREFERRED"
+  CHAIN="$MANUAL_OVERRIDE_CHAIN"
+fi
+
+# ── 4. Build JSON snapshot ───────────────────────────────────────────────────
+TIMESTAMP="$(date -u +%Y-%m-%dT%H:%M:%SZ)"
+HOSTNAME="$(hostname 2>/dev/null || echo unknown)"
+
+snapshot() {
+  cat <<JSON
+{
+  "schema_version": 1,
+  "detected_at": "${TIMESTAMP}",
+  "host": "${HOSTNAME}",
+  "vault_root": "${VAULT_ROOT}",
+  "manual_override": ${MANUAL_OVERRIDE_FLAG},
+  "preferred": "${PREFERRED}",
+  "fallback_chain": [${CHAIN}],
+  "available": {
+    "cli": {
+      "present": ${CLI_PRESENT},
+      "binary": "${CLI_BINARY}",
+      "version_string": ${CLI_VERSION},
+      "obsidian_app_running": ${OBSIDIAN_RUNNING}
+    },
+    "filesystem": {
+      "present": true,
+      "vault_root": "${VAULT_ROOT}",
+      "note": "ultimate fallback; uses Claude's Read/Write/Edit tools directly"
+    },
+    "mcp_obsidian": {
+      "present": null,
+      "detection": "deferred",
+      "note": "v1.7 does not auto-detect MCP servers. Configure manually per wiki/references/mcp-setup.md and edit this file by hand if needed."
+    },
+    "mcpvault": {
+      "present": null,
+      "detection": "deferred",
+      "note": "v1.7 does not auto-detect MCP servers. Configure manually per wiki/references/mcp-setup.md and edit this file by hand if needed."
+    }
+  }
+}
+JSON
+}
+
+if [ "$MODE" = "peek" ]; then
+  snapshot
+  exit 0
+fi
+
+# Atomic write: stage to .tmp then rename. Avoids partial files if killed mid-write.
+TMP="${OUTPUT_FILE}.$$.tmp"
+trap 'rm -f "$TMP"' EXIT
+snapshot > "$TMP"
+mv "$TMP" "$OUTPUT_FILE"
+trap - EXIT
+
+log "Wrote: ${OUTPUT_FILE}"
+log "Preferred transport: ${PREFERRED}"
+$CLI_PRESENT && log "  CLI:        ${CLI_BINARY} (${CLI_VERSION_RAW})"
+log "  Filesystem: always available (Read/Write/Edit tools)"
+log "  MCP:        not auto-detected (see wiki/references/mcp-setup.md to configure)"
@@ -0,0 +1,312 @@
+#!/usr/bin/env python3
+"""rerank.py — query-time reranker for chunk candidates.
+
+Takes a query string + a list of candidate chunks (from BM25, vector, or any
+upstream stage) and reorders them using semantic similarity.
+
+v1.7 strategy (in preference order, automatically chosen at runtime):
+  1. If ollama is reachable AND nomic-embed-text is pulled
+       → embed the query, embed each candidate's contextualized_text,
+         rank by cosine. Caches per-chunk embeddings in
+         .vault-meta/embed-cache.json keyed by body_hash.
+  2. Otherwise
+       → no-op rerank: return candidates in input order with a synthesized
+         note. Caller (retrieve.py) still gets a useful result; downstream
+         drill-into-page logic is unchanged.
+
+Future v1.7.x upgrade paths:
+  - Cross-encoder reranker (sentence-transformers BGE-base) if installed
+  - Cohere Rerank API if COHERE_API_KEY set
+  - Voyage Rerank API if VOYAGE_API_KEY set
+
+Mirrors the localhost-only OLLAMA_URL guard from scripts/tiling-check.py:
+remote ollama endpoints require --allow-remote-ollama because page bodies
+are POSTed as embedding input.
+
+Usage:
+  rerank.py "query string" --candidates candidates.json [--top 5]
+  rerank.py "query string" --candidates - --top 5    # stdin
+  rerank.py --peek "query string"                     # show strategy chosen
+
+Candidates JSON shape:
+  [{"chunk_id": "c-000042:3", "path": ".vault-meta/chunks/.../chunk-003.json", "score": 7.1}, ...]
+
+Output: ranked candidates with `rerank_score` added.
+
+Exit codes:
+  0 — success
+  2 — usage error
+  3 — candidate input malformed
+  10 — ollama unreachable (no-op rerank performed, exit 0 with note)
+  11 — model not pulled (no-op rerank performed, exit 0 with note)
+"""
+
+import argparse
+import fcntl
+import json
+import math
+import os
+import shutil
+import sys
+import time
+import urllib.error
+import urllib.parse
+import urllib.request
+from datetime import datetime, timezone
+from pathlib import Path
+
+VAULT_ROOT = Path(__file__).resolve().parent.parent
+META_DIR = VAULT_ROOT / ".vault-meta"
+EMBED_CACHE_PATH = META_DIR / "embed-cache.json"
+CACHE_LOCK = META_DIR / ".embed-cache.lock"
+
+DEFAULT_OLLAMA_URL = "http://127.0.0.1:11434"
+DEFAULT_MODEL = "nomic-embed-text"
+OLLAMA_TIMEOUT_SEC = 3
+EMBED_TIMEOUT_SEC = 30
+MAX_RESPONSE_BYTES = 4 * 1024 * 1024
+
+EXIT_OK = 0
+EXIT_USAGE = 2
+EXIT_CANDIDATES = 3
+EXIT_NO_OLLAMA = 10
+EXIT_NO_MODEL = 11
+
+
+def log(msg):
+    print(msg, file=sys.stderr)
+
+
+def cosine(a, b):
+    if not a or not b or len(a) != len(b):
+        return 0.0
+    dot = sum(x * y for x, y in zip(a, b))
+    na = math.sqrt(sum(x * x for x in a))
+    nb = math.sqrt(sum(y * y for y in b))
+    if na == 0 or nb == 0:
+        return 0.0
+    return dot / (na * nb)
+
+
+def ollama_url(allow_remote):
+    url = os.environ.get("OLLAMA_URL", DEFAULT_OLLAMA_URL).rstrip("/")
+    if not allow_remote:
+        parsed = urllib.parse.urlparse(url)
+        host = parsed.hostname or ""
+        if host not in ("127.0.0.1", "localhost", "::1"):
+            log(f"ERR: OLLAMA_URL={url} points off-localhost (host={host!r}).")
+            log("  Either: (a) run ollama locally — `systemctl --user start ollama` or `ollama serve`")
+            log("  Or:     (b) pass --allow-remote-ollama through retrieve.py, which forwards it here.")
+            log("  Or:     (c) unset OLLAMA_URL to fall back to the local default (127.0.0.1:11434).")
+            sys.exit(EXIT_USAGE)
+    return url
+
+
+def ollama_alive(url):
+    try:
+        req = urllib.request.Request(f"{url}/api/tags", method="GET")
+        with urllib.request.urlopen(req, timeout=OLLAMA_TIMEOUT_SEC) as resp:
+            data = json.loads(resp.read(MAX_RESPONSE_BYTES))
+            models = [m.get("name", "").split(":")[0] for m in data.get("models", [])]
+            return True, models
+    except (urllib.error.URLError, json.JSONDecodeError, OSError):
+        return False, []
+
+
+def embed_one(url, model, text):
+    payload = json.dumps({"model": model, "prompt": text}).encode("utf-8")
+    req = urllib.request.Request(
+        f"{url}/api/embeddings",
+        data=payload,
+        headers={"Content-Type": "application/json"},
+        method="POST",
+    )
+    with urllib.request.urlopen(req, timeout=EMBED_TIMEOUT_SEC) as resp:
+        data = json.loads(resp.read(MAX_RESPONSE_BYTES))
+        return data.get("embedding") or []
+
+
+def load_cache():
+    if not EMBED_CACHE_PATH.is_file():
+        return {}
+    try:
+        return json.loads(EMBED_CACHE_PATH.read_text(encoding="utf-8"))
+    except (json.JSONDecodeError, OSError):
+        return {}
+
+
+def save_cache(cache):
+    """Persist the embed cache atomically.
+
+    v1.7.2 / closes audit M7: previously used blocking fcntl.LOCK_EX with no
+    timeout, which could hang indefinitely on a non-flock-capable filesystem
+    (some NFS mounts, network shares, FUSE backends without lock support).
+    Now uses LOCK_NB with a 3-attempt retry loop, then falls back to writing
+    without the lock (with a WARN) so the rerank pipeline never hangs the
+    user's session. The temp + os.replace pattern provides write atomicity
+    even without the lock; the lock only serializes concurrent writers.
+    """
+    META_DIR.mkdir(parents=True, exist_ok=True)
+    fd = os.open(str(CACHE_LOCK), os.O_CREAT | os.O_WRONLY, 0o644)
+    locked = False
+    try:
+        for attempt in range(3):
+            try:
+                fcntl.flock(fd, fcntl.LOCK_EX | fcntl.LOCK_NB)
+                locked = True
+                break
+            except BlockingIOError:
+                time.sleep(0.1)
+        if not locked:
+            msg = ("WARN: rerank embed-cache lock unavailable after 3 tries; "
+                   "writing unlocked (atomic via temp+rename). Concurrent writers "
+                   "may overwrite each other's last update.")
+            log(msg)
+            # v1.9.1 / closes audit Data M1: also route to .vault-meta/hook.log so
+            # the user sees the event via wiki-lint (stderr alone is invisible to
+            # most callers; this matches the hook's logging shape).
+            try:
+                META_DIR.mkdir(parents=True, exist_ok=True)
+                hook_log = META_DIR / "hook.log"
+                ts = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
+                with hook_log.open("a", encoding="utf-8") as fh:
+                    fh.write(f"{ts} rerank embed-cache lock unavailable; wrote unlocked\n")
+            except OSError:
+                pass  # never block on a logging failure
+        tmp = EMBED_CACHE_PATH.with_suffix(f".{os.getpid()}.tmp")
+        tmp.write_text(json.dumps(cache, ensure_ascii=False), encoding="utf-8")
+        os.replace(tmp, EMBED_CACHE_PATH)
+    finally:
+        if locked:
+            try:
+                fcntl.flock(fd, fcntl.LOCK_UN)
+            except OSError:
+                pass
+        os.close(fd)
+
+
+def load_chunk(chunk_rel_path):
+    p = VAULT_ROOT / chunk_rel_path
+    if not p.is_file():
+        return None
+    try:
+        return json.loads(p.read_text(encoding="utf-8"))
+    except (json.JSONDecodeError, OSError):
+        return None
+
+
+def rerank(query, candidates, top_k=5, allow_remote=False):
+    """Returns candidates list, possibly truncated to top_k, with rerank_score added.
+    Falls back to input-order if ollama is unavailable (still adds rerank_source: 'noop').
+    """
+    url = ollama_url(allow_remote)
+    alive, models = ollama_alive(url)
+    if not alive:
+        log("ollama unreachable — no-op rerank")
+        for c in candidates:
+            c["rerank_score"] = float(c.get("score", 0.0))
+            c["rerank_source"] = "noop-no-ollama"
+        return candidates[:top_k]
+    if DEFAULT_MODEL not in models:
+        log(f"model {DEFAULT_MODEL} not pulled — no-op rerank")
+        for c in candidates:
+            c["rerank_score"] = float(c.get("score", 0.0))
+            c["rerank_source"] = "noop-no-model"
+        return candidates[:top_k]
+
+    cache = load_cache()
+    cache_dirty = False
+    try:
+        q_emb = embed_one(url, DEFAULT_MODEL, query)
+    except Exception as e:
+        log(f"query embed failed: {e}")
+        for c in candidates:
+            c["rerank_score"] = float(c.get("score", 0.0))
+            c["rerank_source"] = "noop-embed-error"
+        return candidates[:top_k]
+
+    for c in candidates:
+        chunk = load_chunk(c.get("path", ""))
+        if not chunk:
+            c["rerank_score"] = 0.0
+            c["rerank_source"] = "missing-chunk"
+            continue
+        body_hash = chunk.get("body_hash", "")
+        cache_key = f"{DEFAULT_MODEL}:{body_hash}"
+        emb = cache.get(cache_key)
+        if not emb:
+            text = chunk.get("contextualized_text") or chunk.get("raw_text", "")
+            try:
+                emb = embed_one(url, DEFAULT_MODEL, text)
+            except Exception as e:
+                log(f"embed failed for {c.get('chunk_id')}: {e}")
+                c["rerank_score"] = float(c.get("score", 0.0))
+                c["rerank_source"] = "embed-error"
+                continue
+            cache[cache_key] = emb
+            cache_dirty = True
+        c["rerank_score"] = cosine(q_emb, emb)
+        c["rerank_source"] = f"cosine:{DEFAULT_MODEL}"
+
+    if cache_dirty:
+        save_cache(cache)
+
+    ranked = sorted(candidates, key=lambda x: x.get("rerank_score", 0.0), reverse=True)
+    return ranked[:top_k]
+
+
+def main():
+    parser = argparse.ArgumentParser(description="Rerank chunk candidates by semantic similarity.")
+    parser.add_argument("query", nargs="?", help="Query text")
+    parser.add_argument("--candidates", help="Path to candidates JSON or `-` for stdin",
+                        default=None)
+    parser.add_argument("--top", type=int, default=5, help="Top-K to return")
+    parser.add_argument("--peek", action="store_true",
+                        help="Print rerank strategy chosen and exit")
+    parser.add_argument("--allow-remote-ollama", action="store_true",
+                        help="Accept non-localhost OLLAMA_URL (potential data exfil)")
+    args = parser.parse_args()
+
+    if args.peek:
+        if not args.query:
+            log("--peek needs a query string")
+            sys.exit(EXIT_USAGE)
+        url = ollama_url(args.allow_remote_ollama)
+        alive, models = ollama_alive(url)
+        strategy = "noop-no-ollama"
+        if alive:
+            strategy = f"cosine:{DEFAULT_MODEL}" if DEFAULT_MODEL in models else "noop-no-model"
+        print(json.dumps({
+            "query": args.query,
+            "strategy": strategy,
+            "ollama_url": url,
+            "ollama_alive": alive,
+            "model_present": DEFAULT_MODEL in models,
+            "checked_at": datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ"),
+        }, indent=2))
+        return EXIT_OK
+
+    if not args.query or args.candidates is None:
+        log("usage: rerank.py <query> --candidates <path|-> [--top N]")
+        return EXIT_USAGE
+
+    if args.candidates == "-":
+        cand_text = sys.stdin.read()
+    else:
+        cand_text = Path(args.candidates).read_text(encoding="utf-8")
+    try:
+        candidates = json.loads(cand_text)
+        if not isinstance(candidates, list):
+            raise ValueError("candidates must be a JSON list")
+    except (json.JSONDecodeError, ValueError) as e:
+        log(f"ERR: bad candidates JSON: {e}")
+        return EXIT_CANDIDATES
+
+    result = rerank(args.query, candidates, top_k=args.top,
+                    allow_remote=args.allow_remote_ollama)
+    print(json.dumps(result, indent=2))
+    return EXIT_OK
+
+
+if __name__ == "__main__":
+    sys.exit(main())
@@ -0,0 +1,195 @@
+#!/usr/bin/env python3
+"""retrieve.py — hybrid retrieval orchestrator for the Compound Vault.
+
+Pipeline (v1.7):
+  query  →  bm25-index.py query (top-K candidates by BM25 over contextualized chunks)
+         →  rerank.py        (cosine on nomic-embed-text vectors via ollama,
+                              or no-op if ollama unavailable)
+         →  drill            (return chunk pages with absolute paths so the
+                              caller can Read them and synthesize)
+
+Loads sibling scripts as Python modules (no subprocess overhead). Falls back
+gracefully when index or rerank stage is missing:
+- If .vault-meta/bm25/index.json is absent     → exit 10 with friendly message;
+                                                  caller falls back to v1.6 legacy
+                                                  hot→index→drill read order.
+- If .vault-meta/chunks/ is empty              → exit 10 (same).
+- If rerank stage cannot embed (no ollama)     → no-op rerank, returns BM25 order.
+
+Output schema (JSON to stdout):
+{
+  "query": "...",
+  "strategy": "bm25+rerank:cosine:nomic-embed-text" | "bm25+noop-rerank",
+  "top_k": 5,
+  "candidates": [
+    {
+      "chunk_id": "c-000042:3",
+      "page_address": "c-000042",
+      "page_path": "wiki/concepts/Foo.md",
+      "absolute_path": "/abs/path/to/wiki/concepts/Foo.md",
+      "chunk_index": 3,
+      "bm25_score": 7.12,
+      "rerank_score": 0.81,
+      "rerank_source": "cosine:nomic-embed-text",
+      "snippet": "... first 200 chars of the chunk ..."
+    },
+    ...
+  ]
+}
+
+Usage:
+  retrieve.py "your query here"           # standard: BM25 top-20, rerank to top-5
+  retrieve.py "query" --top 10            # change result count
+  retrieve.py "query" --no-rerank         # skip rerank, BM25-only
+  retrieve.py "query" --explain           # include per-stage diagnostics
+
+Exit codes:
+  0 — success
+  2 — usage error
+  10 — feature not provisioned (no chunks or no BM25 index); caller falls back
+"""
+
+import argparse
+import importlib.util
+import json
+import sys
+from pathlib import Path
+
+VAULT_ROOT = Path(__file__).resolve().parent.parent
+SCRIPTS_DIR = VAULT_ROOT / "scripts"
+META_DIR = VAULT_ROOT / ".vault-meta"
+CHUNKS_DIR = META_DIR / "chunks"
+BM25_INDEX = META_DIR / "bm25" / "index.json"
+
+EXIT_OK = 0
+EXIT_USAGE = 2
+EXIT_NOT_PROVISIONED = 10
+
+
+def log(msg):
+    print(msg, file=sys.stderr)
+
+
+def import_sibling(name, filename):
+    """Import a hyphenated sibling .py file as a Python module.
+
+    Wrapped in try/except (v1.7.2; closes audit M5) so a syntax error or
+    missing dependency in a sibling helper produces a friendly diagnostic
+    instead of a bare Python traceback at the user's first retrieve call.
+    """
+    target = SCRIPTS_DIR / filename
+    if not target.is_file():
+        log(f"ERR: sibling helper {filename} not found at {target}")
+        log("  Run `bash bin/setup-retrieve.sh --check` to verify the install.")
+        sys.exit(EXIT_NOT_PROVISIONED)
+    try:
+        spec = importlib.util.spec_from_file_location(name, target)
+        mod = importlib.util.module_from_spec(spec)
+        spec.loader.exec_module(mod)
+        return mod
+    except (ImportError, SyntaxError, AttributeError) as e:
+        log(f"ERR: failed to import sibling helper {filename}: {type(e).__name__}: {e}")
+        log("  This likely means the helper script is corrupted or has a syntax error.")
+        log("  Run `python3 scripts/<helper>.py --help` directly to see the underlying error.")
+        log("  If it persists: re-clone the repo or check `git status` for local damage.")
+        sys.exit(EXIT_NOT_PROVISIONED)
+
+
+def chunk_snippet(chunk_data, max_chars=200):
+    text = chunk_data.get("raw_text", "")
+    if len(text) <= max_chars:
+        return text
+    return text[:max_chars].rstrip() + "…"
+
+
+def main():
+    parser = argparse.ArgumentParser(description="Hybrid retrieval over the vault.")
+    parser.add_argument("query", help="Natural-language query")
+    parser.add_argument("--top", type=int, default=5, help="Final result count (post-rerank)")
+    parser.add_argument("--bm25-top", type=int, default=20,
+                        help="Candidate count from BM25 (pre-rerank)")
+    parser.add_argument("--no-rerank", action="store_true",
+                        help="Skip the rerank stage; return BM25-only")
+    parser.add_argument("--explain", action="store_true",
+                        help="Include per-stage diagnostics in output")
+    parser.add_argument("--allow-remote-ollama", action="store_true",
+                        help="Forwarded to rerank.py")
+    args = parser.parse_args()
+
+    if not BM25_INDEX.is_file():
+        log(f"ERR: no BM25 index at {BM25_INDEX}. Run `bash bin/setup-retrieve.sh` "
+            "to provision, or fall back to legacy hot→index→drill.")
+        return EXIT_NOT_PROVISIONED
+    if not CHUNKS_DIR.is_dir() or not any(CHUNKS_DIR.iterdir()):
+        log(f"ERR: no chunks at {CHUNKS_DIR}. Run "
+            "`python3 scripts/contextual-prefix.py --all` first.")
+        return EXIT_NOT_PROVISIONED
+
+    bm25 = import_sibling("bm25_index", "bm25-index.py")
+    reranker = import_sibling("rerank", "rerank.py")
+
+    bm25_hits = bm25.query(args.query, top_k=args.bm25_top)
+    log(f"bm25: {len(bm25_hits)} hits")
+
+    candidates = []
+    for h in bm25_hits:
+        chunk_path = VAULT_ROOT / h["path"]
+        try:
+            chunk = json.loads(chunk_path.read_text(encoding="utf-8"))
+        except (json.JSONDecodeError, OSError):
+            continue
+        candidates.append({
+            "chunk_id": h["chunk_id"],
+            "page_address": chunk.get("page_address"),
+            "page_path": chunk.get("page_path"),
+            "absolute_path": str((VAULT_ROOT / chunk.get("page_path", "")).resolve()),
+            "chunk_index": chunk.get("chunk_index"),
+            "bm25_score": h["score"],
+            "path": h["path"],
+            "snippet": chunk_snippet(chunk),
+        })
+
+    if args.no_rerank:
+        final = candidates[:args.top]
+        strategy = "bm25-only"
+        for c in final:
+            c["rerank_score"] = c["bm25_score"]
+            c["rerank_source"] = "skipped"
+    else:
+        final = reranker.rerank(
+            args.query, candidates, top_k=args.top,
+            allow_remote=args.allow_remote_ollama,
+        )
+        # Derive strategy from first candidate's rerank_source
+        first_src = (final[0].get("rerank_source") if final else "unknown")
+        strategy = f"bm25+rerank:{first_src}"
+
+    # Dedupe by page (we may have multiple chunks of the same page; collapse to best)
+    by_page = {}
+    for c in final:
+        addr = c.get("page_address")
+        if addr not in by_page or c.get("rerank_score", 0) > by_page[addr].get("rerank_score", 0):
+            by_page[addr] = c
+    deduped = list(by_page.values())
+    deduped.sort(key=lambda c: c.get("rerank_score", 0), reverse=True)
+
+    out = {
+        "query": args.query,
+        "strategy": strategy,
+        "top_k": args.top,
+        "candidates": deduped[:args.top],
+    }
+    if args.explain:
+        out["explain"] = {
+            "bm25_candidate_count": len(bm25_hits),
+            "post_rerank_count": len(final),
+            "deduped_count": len(deduped),
+            "bm25_top_param": args.bm25_top,
+        }
+
+    print(json.dumps(out, indent=2, ensure_ascii=False))
+    return EXIT_OK
+
+
+if __name__ == "__main__":
+    sys.exit(main())
@@ -0,0 +1,496 @@
+#!/usr/bin/env python3
+"""tiling-check.py — DragonScale Mechanism 3: semantic tiling lint.
+
+Computes per-page embeddings via a local ollama instance and reports
+candidate duplicate page pairs. Read-only; never modifies wiki pages.
+
+Security model:
+- Defaults to http://127.0.0.1:11434. Remote ollama endpoints require
+  --allow-remote-ollama explicitly (vault bodies are POSTed as embedding
+  input; a hostile env var would otherwise exfiltrate content).
+- Rejects symlinked page files to prevent escape outside the vault root.
+
+Feature-gated: exits 10 if ollama is unreachable or 11 if the embedding
+model is not pulled, so the calling skill can no-op gracefully. Exits 0
+on success. Exit 3 on cache corruption. Exit 2 on usage error.
+
+Concurrency:
+- Locks `.vault-meta/.tiling.lock` (flock exclusive) around cache I/O.
+- Per-PID temp file to avoid shared-tempfile races.
+
+Usage:
+  tiling-check.py                      # run; exit 10/11 if ollama/model missing
+  tiling-check.py --report PATH        # also write report to PATH
+  tiling-check.py --rebuild-cache      # ignore cached embeddings
+  tiling-check.py --peek               # structured diagnostics; no compute
+  tiling-check.py --allow-remote-ollama # accept non-localhost OLLAMA_URL
+"""
+
+import argparse
+import fcntl
+import hashlib
+import json
+import math
+import os
+import re
+import sys
+import urllib.error
+import urllib.parse
+import urllib.request
+from datetime import datetime
+from pathlib import Path
+
+DEFAULT_OLLAMA_URL = "http://127.0.0.1:11434"
+DEFAULT_MODEL = "nomic-embed-text"
+OLLAMA_TIMEOUT_SEC = 3
+EMBED_TIMEOUT_SEC = 30
+MAX_RESPONSE_BYTES = 4 * 1024 * 1024  # 4 MB; embeddings can be ~10 KB each
+
+VAULT_ROOT = Path(__file__).resolve().parent.parent
+WIKI_DIR = VAULT_ROOT / "wiki"
+META_DIR = VAULT_ROOT / ".vault-meta"
+CACHE_PATH = META_DIR / "tiling-cache.json"
+CACHE_LOCK = META_DIR / ".tiling.lock"
+THRESHOLDS_PATH = META_DIR / "tiling-thresholds.json"
+
+EXCLUDE_TYPES = {"meta", "fold"}
+EXCLUDE_FILENAMES = {
+    "_index.md", "index.md", "log.md", "hot.md", "overview.md",
+    "dashboard.md", "Wiki Map.md", "getting-started.md",
+}
+EXCLUDE_PATH_PREFIXES = ("wiki/folds/", "wiki/meta/")
+MAX_BODY_BYTES = 128 * 1024
+SCALE_WARN_PAGES = 500
+SCALE_HARD_FAIL_PAGES = 5000
+
+EXIT_OK = 0
+EXIT_USAGE = 2
+EXIT_CACHE_CORRUPT = 3
+EXIT_SCALE_EXCEEDED = 4
+EXIT_NO_OLLAMA = 10
+EXIT_NO_MODEL = 11
+
+FRONTMATTER_RE = re.compile(r"^---\n(.*?)\n---\n", re.DOTALL)
+TYPE_RE = re.compile(r"^type:\s*(\S+)", re.MULTILINE)
+
+
+def log(msg: str) -> None:
+    print(msg, file=sys.stderr)
+
+
+def _is_local_url(url: str) -> bool:
+    try:
+        host = urllib.parse.urlparse(url).hostname or ""
+    except ValueError:
+        return False
+    return host in ("127.0.0.1", "localhost", "::1")
+
+
+def _http_get_json(url: str, timeout: float) -> dict:
+    with urllib.request.urlopen(url, timeout=timeout) as resp:
+        raw = resp.read(MAX_RESPONSE_BYTES + 1)
+    if len(raw) > MAX_RESPONSE_BYTES:
+        raise RuntimeError("response exceeded size limit")
+    return json.loads(raw.decode("utf-8"))
+
+
+def _http_post_json(url: str, payload: dict, timeout: float) -> dict:
+    data = json.dumps(payload).encode("utf-8")
+    req = urllib.request.Request(url, data=data, headers={"Content-Type": "application/json"})
+    with urllib.request.urlopen(req, timeout=timeout) as resp:
+        raw = resp.read(MAX_RESPONSE_BYTES + 1)
+    if len(raw) > MAX_RESPONSE_BYTES:
+        raise RuntimeError("response exceeded size limit")
+    return json.loads(raw.decode("utf-8"))
+
+
+def detect_ollama(url: str) -> bool:
+    try:
+        _http_get_json(f"{url}/api/version", OLLAMA_TIMEOUT_SEC)
+        return True
+    except (urllib.error.URLError, OSError, ValueError, TimeoutError, RuntimeError):
+        return False
+
+
+def detect_model(url: str, model: str) -> bool:
+    try:
+        data = _http_get_json(f"{url}/api/tags", OLLAMA_TIMEOUT_SEC)
+    except (urllib.error.URLError, OSError, ValueError, TimeoutError, RuntimeError):
+        return False
+    models = data.get("models")
+    if not isinstance(models, list):
+        return False
+    for entry in models:
+        if not isinstance(entry, dict):
+            continue
+        name = entry.get("name", "")
+        if isinstance(name, str) and (name == model or name.startswith(f"{model}:")):
+            return True
+    return False
+
+
+def parse_frontmatter(text: str) -> tuple[dict, str]:
+    m = FRONTMATTER_RE.match(text)
+    if not m:
+        return {}, text
+    fm_raw = m.group(1)
+    body = text[m.end():]
+    fm: dict = {}
+    tm = TYPE_RE.search(fm_raw)
+    if tm:
+        fm["type"] = tm.group(1).strip().strip('"').strip("'")
+    return fm, body
+
+
+def body_hash(body: str, model: str) -> str:
+    h = hashlib.sha256()
+    h.update(f"model={model}\n".encode("utf-8"))
+    h.update(body.encode("utf-8"))
+    return h.hexdigest()
+
+
+def cosine(a: list[float], b: list[float]) -> float:
+    if len(a) != len(b):
+        raise ValueError(f"dim mismatch: {len(a)} vs {len(b)}")
+    dot = sum(x * y for x, y in zip(a, b))
+    na = math.sqrt(sum(x * x for x in a))
+    nb = math.sqrt(sum(x * x for x in b))
+    if na == 0.0 or nb == 0.0:
+        return 0.0
+    return dot / (na * nb)
+
+
+def _lock_cache():
+    META_DIR.mkdir(exist_ok=True)
+    fd = os.open(str(CACHE_LOCK), os.O_CREAT | os.O_RDWR, 0o644)
+    try:
+        fcntl.flock(fd, fcntl.LOCK_EX)
+    except OSError:
+        os.close(fd)
+        raise
+    return fd
+
+
+def _unlock_cache(fd: int) -> None:
+    try:
+        fcntl.flock(fd, fcntl.LOCK_UN)
+    finally:
+        os.close(fd)
+
+
+def load_cache(current_model: str) -> dict:
+    if not CACHE_PATH.exists():
+        return {"version": 1, "model": current_model, "embeddings": {}}
+    try:
+        with CACHE_PATH.open() as f:
+            data = json.load(f)
+    except (OSError, json.JSONDecodeError) as exc:
+        log(f"ERR: cache read failed: {exc}")
+        sys.exit(EXIT_CACHE_CORRUPT)
+    if data.get("version") != 1:
+        log(f"ERR: unknown cache version: {data.get('version')}")
+        sys.exit(EXIT_CACHE_CORRUPT)
+    cached_model = data.get("model", "")
+    if cached_model != current_model:
+        log(f"INFO: cached model '{cached_model}' differs from current '{current_model}'; invalidating cache")
+        return {"version": 1, "model": current_model, "embeddings": {}}
+    if not isinstance(data.get("embeddings"), dict):
+        log("ERR: cache.embeddings is not a dict")
+        sys.exit(EXIT_CACHE_CORRUPT)
+    return data
+
+
+def save_cache(cache: dict) -> None:
+    META_DIR.mkdir(exist_ok=True)
+    tmp = CACHE_PATH.with_name(f"{CACHE_PATH.stem}.{os.getpid()}.tmp")
+    with tmp.open("w") as f:
+        json.dump(cache, f, indent=2)
+    tmp.replace(CACHE_PATH)
+
+
+def load_thresholds() -> dict:
+    if not THRESHOLDS_PATH.exists():
+        return {
+            "version": 1, "model": DEFAULT_MODEL,
+            "bands": {"error": 0.90, "review": 0.80},
+            "calibrated": False, "calibration_pairs_labeled": 0,
+        }
+    with THRESHOLDS_PATH.open() as f:
+        return json.load(f)
+
+
+def included(path: Path, fm: dict) -> tuple[bool, str]:
+    rel = path.relative_to(VAULT_ROOT).as_posix()
+    if path.is_symlink():
+        return False, "symlink"
+    resolved = path.resolve()
+    try:
+        resolved.relative_to(VAULT_ROOT.resolve())
+    except ValueError:
+        return False, "escapes vault"
+    if path.name in EXCLUDE_FILENAMES:
+        return False, "excluded filename"
+    for prefix in EXCLUDE_PATH_PREFIXES:
+        if rel.startswith(prefix):
+            return False, f"under {prefix}"
+    if fm.get("type") in EXCLUDE_TYPES:
+        return False, f"type={fm['type']}"
+    return True, "included"
+
+
+def embed(text: str, model: str, url: str) -> list[float]:
+    data = _http_post_json(
+        f"{url}/api/embeddings",
+        {"model": model, "prompt": text},
+        EMBED_TIMEOUT_SEC,
+    )
+    emb = data.get("embedding")
+    if not isinstance(emb, list) or not emb:
+        raise RuntimeError(f"ollama returned no embedding: {str(data)[:200]}")
+    for v in emb:
+        if not isinstance(v, (int, float)):
+            raise RuntimeError("embedding contains non-numeric values")
+    return emb
+
+
+def run_check(
+    rebuild: bool,
+    report_path: Path | None,
+    ollama_url: str,
+    model: str,
+) -> int:
+    if not detect_ollama(ollama_url):
+        log(f"ollama not reachable at {ollama_url}; skipping tiling check")
+        return EXIT_NO_OLLAMA
+    if not detect_model(ollama_url, model):
+        log(f"model '{model}' not pulled; run: ollama pull {model}")
+        return EXIT_NO_MODEL
+
+    thresholds = load_thresholds()
+
+    lock_fd = _lock_cache()
+    try:
+        cache = (load_cache(model) if not rebuild
+                 else {"version": 1, "model": model, "embeddings": {}})
+
+        pages: list[tuple[str, list[float]]] = []
+        scanned = 0
+        computed = 0
+        cached_hits = 0
+        skipped_counts: dict[str, int] = {}
+        live_paths: set[str] = set()
+
+        candidates = sorted(WIKI_DIR.rglob("*.md"))
+        scale_n = len(candidates)
+        if scale_n > SCALE_HARD_FAIL_PAGES:
+            log(f"ERR: {scale_n} pages exceed hard-fail limit {SCALE_HARD_FAIL_PAGES}")
+            return EXIT_SCALE_EXCEEDED
+        if scale_n > SCALE_WARN_PAGES:
+            log(f"WARN: {scale_n} pages; cold-cache embed will issue ~{scale_n} POSTs to ollama")
+
+        for md in candidates:
+            scanned += 1
+            # Symlink and vault-root guards must run BEFORE read_text so a
+            # hostile symlink cannot cause off-vault content to be read and
+            # POSTed to the embedding endpoint.
+            if md.is_symlink():
+                skipped_counts["symlink"] = skipped_counts.get("symlink", 0) + 1
+                continue
+            try:
+                resolved = md.resolve(strict=True)
+                resolved.relative_to(VAULT_ROOT.resolve())
+            except (OSError, ValueError):
+                skipped_counts["escapes vault"] = skipped_counts.get("escapes vault", 0) + 1
+                continue
+            try:
+                text = md.read_text(encoding="utf-8")
+            except (OSError, UnicodeDecodeError):
+                skipped_counts["read_error"] = skipped_counts.get("read_error", 0) + 1
+                continue
+            if len(text.encode("utf-8")) > MAX_BODY_BYTES:
+                skipped_counts["too_large"] = skipped_counts.get("too_large", 0) + 1
+                continue
+            fm, body = parse_frontmatter(text)
+            ok, reason = included(md, fm)
+            if not ok:
+                skipped_counts[reason] = skipped_counts.get(reason, 0) + 1
+                continue
+            rel = md.relative_to(VAULT_ROOT).as_posix()
+            live_paths.add(rel)
+            h = body_hash(body, model)
+            entry = cache["embeddings"].get(rel)
+            if entry and entry.get("hash") == h:
+                pages.append((rel, entry["embedding"]))
+                cached_hits += 1
+                continue
+            try:
+                emb = embed(body, model, ollama_url)
+            except Exception as exc:
+                log(f"ERR embedding {rel}: {exc}")
+                skipped_counts["embed_error"] = skipped_counts.get("embed_error", 0) + 1
+                continue
+            cache["embeddings"][rel] = {
+                "hash": h,
+                "embedding": emb,
+                "computed_at": datetime.utcnow().isoformat(timespec="seconds") + "Z",
+            }
+            pages.append((rel, emb))
+            computed += 1
+
+        # Orphan GC: drop cache entries for paths that no longer exist.
+        orphans = [k for k in cache["embeddings"] if k not in live_paths]
+        for k in orphans:
+            del cache["embeddings"][k]
+
+        save_cache(cache)
+    finally:
+        _unlock_cache(lock_fd)
+
+    review = thresholds["bands"]["review"]
+    error_ = thresholds["bands"]["error"]
+    pairs: list[tuple[float, str, str]] = []
+    for i in range(len(pages)):
+        for j in range(i + 1, len(pages)):
+            a_path, a_emb = pages[i]
+            b_path, b_emb = pages[j]
+            try:
+                sim = cosine(a_emb, b_emb)
+            except ValueError as exc:
+                log(f"WARN cosine skip ({a_path}, {b_path}): {exc}")
+                continue
+            if sim >= review:
+                pairs.append((sim, a_path, b_path))
+    pairs.sort(reverse=True)
+
+    errors = [p for p in pairs if p[0] >= error_]
+    reviews = [p for p in pairs if review <= p[0] < error_]
+
+    out_lines: list[str] = []
+    out_lines.append("# Semantic Tiling Report")
+    out_lines.append("")
+    out_lines.append(f"- generated: {datetime.utcnow().isoformat(timespec='seconds')}Z")
+    out_lines.append(f"- model: {model}")
+    out_lines.append(f"- ollama_url: {ollama_url}")
+    out_lines.append(f"- thresholds: error>={error_}, review={review}-{error_}")
+    out_lines.append(f"- calibrated: {thresholds.get('calibrated', False)}"
+                     + (" (using uncalibrated defaults)" if not thresholds.get("calibrated") else ""))
+    out_lines.append(f"- pages scanned: {scanned}; embedded: {len(pages)}; skipped: {sum(skipped_counts.values())}")
+    if skipped_counts:
+        out_lines.append("- skipped reasons: " + ", ".join(f"{k}={v}" for k, v in sorted(skipped_counts.items())))
+    out_lines.append(f"- cache hits: {cached_hits}; recomputed: {computed}; orphans pruned: {len(orphans)}")
+    out_lines.append("")
+    out_lines.append(f"## Errors (similarity >= {error_})")
+    out_lines.append("")
+    if not errors:
+        out_lines.append("- none")
+    else:
+        for sim, a, b in errors:
+            out_lines.append(f"- `{sim:.4f}` {a} -- {b}")
+    out_lines.append("")
+    out_lines.append(f"## Review ({review} <= similarity < {error_})")
+    out_lines.append("")
+    if not reviews:
+        out_lines.append("- none")
+    else:
+        for sim, a, b in reviews:
+            out_lines.append(f"- `{sim:.4f}` {a} -- {b}")
+    report = "\n".join(out_lines) + "\n"
+
+    print(report)
+    if report_path is not None:
+        # Confine report writes to VAULT_ROOT. A path that resolves outside
+        # the vault is refused (prevents `--report /etc/passwd` style
+        # accidents or hostile args from writing outside the repo).
+        try:
+            resolved_report = (
+                report_path if report_path.is_absolute() else (Path.cwd() / report_path)
+            ).resolve()
+            resolved_report.relative_to(VAULT_ROOT.resolve())
+        except ValueError:
+            log(f"ERR: --report path '{report_path}' escapes vault root {VAULT_ROOT}")
+            return EXIT_USAGE
+        resolved_report.parent.mkdir(parents=True, exist_ok=True)
+        resolved_report.write_text(report, encoding="utf-8")
+        log(f"report written: {resolved_report}")
+
+    return EXIT_OK
+
+
+def cmd_peek(ollama_url: str, model: str) -> int:
+    """Structured diagnostics. Prints a JSON object and a plain summary."""
+    diag: dict = {}
+    script_path = Path(__file__).resolve()
+    diag["script_path"] = str(script_path)
+    diag["script_executable"] = os.access(script_path, os.X_OK)
+    diag["python"] = sys.executable
+    diag["vault_root"] = str(VAULT_ROOT)
+    diag["ollama_url"] = ollama_url
+    diag["ollama_reachable"] = detect_ollama(ollama_url)
+    diag["model_requested"] = model
+    diag["model_present"] = detect_model(ollama_url, model) if diag["ollama_reachable"] else False
+    diag["cache_present"] = CACHE_PATH.exists()
+    diag["cache_readable"] = False
+    diag["cache_entries"] = 0
+    diag["cache_model"] = None
+    if diag["cache_present"]:
+        try:
+            with CACHE_PATH.open() as f:
+                c = json.load(f)
+            diag["cache_readable"] = (c.get("version") == 1
+                                      and isinstance(c.get("embeddings"), dict))
+            diag["cache_entries"] = len(c.get("embeddings", {}))
+            diag["cache_model"] = c.get("model")
+        except (OSError, json.JSONDecodeError) as exc:
+            diag["cache_readable"] = False
+            diag["cache_error"] = str(exc)
+    diag["thresholds_present"] = THRESHOLDS_PATH.exists()
+    diag["thresholds_readable"] = False
+    if diag["thresholds_present"]:
+        try:
+            with THRESHOLDS_PATH.open() as f:
+                t = json.load(f)
+            diag["thresholds_readable"] = True
+            diag["thresholds_calibrated"] = bool(t.get("calibrated", False))
+            diag["thresholds_bands"] = t.get("bands", {})
+        except (OSError, json.JSONDecodeError):
+            diag["thresholds_readable"] = False
+    print(json.dumps(diag, indent=2))
+    if not diag["ollama_reachable"]:
+        return EXIT_NO_OLLAMA
+    if not diag["model_present"]:
+        return EXIT_NO_MODEL
+    if diag["cache_present"] and not diag["cache_readable"]:
+        return EXIT_CACHE_CORRUPT
+    return EXIT_OK
+
+
+def main(argv: list[str]) -> int:
+    p = argparse.ArgumentParser()
+    p.add_argument("--report", type=Path, default=None)
+    p.add_argument("--rebuild-cache", action="store_true")
+    p.add_argument("--peek", action="store_true")
+    p.add_argument("--allow-remote-ollama", action="store_true",
+                   help="allow OLLAMA_URL env override pointing outside localhost")
+    p.add_argument("--model", default=DEFAULT_MODEL)
+    args = p.parse_args(argv)
+
+    env_url = os.environ.get("OLLAMA_URL")
+    ollama_url = env_url or DEFAULT_OLLAMA_URL
+    if env_url and not _is_local_url(ollama_url) and not args.allow_remote_ollama:
+        log(f"ERR: OLLAMA_URL={ollama_url!r} is not localhost. "
+            f"Vault content would be POSTed to a non-local host. "
+            f"Pass --allow-remote-ollama to override.")
+        return EXIT_USAGE
+
+    if args.peek:
+        return cmd_peek(ollama_url, args.model)
+    return run_check(
+        rebuild=args.rebuild_cache,
+        report_path=args.report,
+        ollama_url=ollama_url,
+        model=args.model,
+    )
+
+
+if __name__ == "__main__":
+    sys.exit(main(sys.argv[1:]))
@@ -0,0 +1,341 @@
+#!/usr/bin/env bash
+# wiki-lock.sh — per-file advisory locking for safe multi-writer vault mutation.
+#
+# Closes the latent multi-writer corruption bug in v1.6 where two parallel
+# sub-agents writing to the same wiki page could silently trample each other.
+# The README and skills/wiki-ingest/SKILL.md §259-264 documented "single-writer
+# only" as a convention; this script makes it an enforceable guard.
+#
+# Design (age-based, not flock-style):
+#   flock(2) advisory locks release when the holding process exits. That
+#   doesn't fit our model where `acquire` and `release` are SEPARATE bash
+#   invocations from the same skill (each Bash tool call is its own short-
+#   lived process — neither's PID survives long enough to mean anything).
+#   So we use atomic lockfile creation with `set -o noclobber` plus
+#   epoch-timestamp AGE-based staleness detection. Race-safe because the
+#   noclobber write itself is atomic on POSIX filesystems.
+#
+#   The PID written into the lockfile is informational only (helpful for
+#   `list` and debugging). The acquire decision considers AGE only:
+#     - If lockfile age < STALE_AFTER_SEC → refuse (return 75 EX_TEMPFAIL)
+#     - If lockfile age >= STALE_AFTER_SEC → reap and acquire
+#   Default STALE_AFTER_SEC=60. Long enough for any single skill operation
+#   (page writes are milliseconds; a multi-write ingest pass is seconds);
+#   short enough that a crashed holder unblocks quickly.
+#
+# Semantics:
+#   acquire <vault-rel-path>
+#     - Computes lock_file = .vault-meta/locks/<sha1(path)>.lock
+#     - Atomically creates the lockfile with this process's PID + epoch
+#     - Returns 0 if acquired, 75 (EX_TEMPFAIL) if held and age < threshold
+#     - Auto-reaps locks older than STALE_AFTER_SEC
+#   release <vault-rel-path>
+#     - Removes the lockfile unconditionally (rm -f). Idempotent.
+#     - Cross-process release IS allowed by design — acquire and release
+#       are typically separate bash invocations from the same skill, and
+#       PID-matching would never succeed. Skill authors are trusted not to
+#       release locks they don't own; that's no weaker than `rm` on the
+#       lockfile directly.
+#   list
+#     - Prints currently-held lock records (one per line: pid age path).
+#   clear-stale [--max-age N]
+#     - Removes lockfiles whose PID is dead OR whose age > N seconds.
+#       Default N = 3600 (1h). Returns count removed via stdout.
+#       (The N=3600 default is intentionally generous because clear-stale
+#       is admin-grade cleanup, distinct from the per-acquire age threshold.)
+#   peek <vault-rel-path>
+#     - Prints holder info or "unheld"; exit 0; never mutates.
+#
+# Globals:
+#   STALE_AFTER_SEC — default 60. Override via --stale-after-sec N on any cmd.
+#
+# Age-threshold naming (v1.7.2; closes audit L6):
+#   - STALE_AFTER_SEC (default 60) is the PER-ACQUIRE threshold. A new
+#     acquire that finds an existing lock will reap-and-take if the lock is
+#     older than this; refuse otherwise. Tuned for "single skill operation
+#     completes within 60s."
+#   - `clear-stale --max-age N` (default 3600) is the ADMIN reaper threshold,
+#     meant to be run periodically by an operator or hook to sweep abandoned
+#     locks. Tuned for "anything older than an hour is definitely abandoned."
+#   These are two distinct concerns; both are time-since-acquire but operate
+#   at different scopes. Do not unify the defaults.
+#
+# Usage:
+#   bash scripts/wiki-lock.sh acquire wiki/concepts/Foo.md
+#   bash scripts/wiki-lock.sh release wiki/concepts/Foo.md
+#   bash scripts/wiki-lock.sh list
+#   bash scripts/wiki-lock.sh clear-stale --max-age 1800
+#   bash scripts/wiki-lock.sh peek wiki/concepts/Foo.md
+#
+# Exit codes:
+#   0  — success
+#   2  — usage error
+#   75 — acquire failed (lock held by alive process)
+#   3  — vault-meta/locks dir creation failed
+#   4  — invalid vault-relative path (escape attempt)
+
+set -euo pipefail
+
+VAULT_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
+META_DIR="${VAULT_ROOT}/.vault-meta"
+LOCK_DIR="${META_DIR}/locks"
+META_LOCK="${META_DIR}/.wiki-lock.meta"
+STALE_AFTER_SEC=60
+
+# ── helpers ──────────────────────────────────────────────────────────────────
+die() { echo "ERR: $*" >&2; exit "${2:-2}"; }
+log() { echo "$*" >&2; }
+
+# Allow tests / non-default vault roots to override
+if [ -n "${WIKI_LOCK_VAULT:-}" ]; then
+  VAULT_ROOT="$WIKI_LOCK_VAULT"
+  META_DIR="${VAULT_ROOT}/.vault-meta"
+  LOCK_DIR="${META_DIR}/locks"
+  META_LOCK="${META_DIR}/.wiki-lock.meta"
+fi
+
+sha1_of() {
+  if command -v sha1sum >/dev/null 2>&1; then
+    printf '%s' "$1" | sha1sum | awk '{print $1}'
+  else
+    # macOS fallback
+    printf '%s' "$1" | shasum -a 1 | awk '{print $1}'
+  fi
+}
+
+ensure_dirs() {
+  mkdir -p "$LOCK_DIR" 2>/dev/null || die "cannot create $LOCK_DIR" 3
+}
+
+validate_path() {
+  # Reject empty, absolute, escape, or newline-bearing paths to prevent
+  # lock-namespace pollution. v1.7.2 / closes audit M4: newlines would break
+  # the meta-lock line format (key=value lines separated by literal \n).
+  # v1.9.1 / closes audit M3 (symlink escape): when a vault-relative path
+  # resolves through a symlink to outside VAULT_ROOT, treat as path traversal.
+  local p="$1"
+  [ -z "$p" ] && die "path cannot be empty" 4
+  case "$p" in
+    /*) die "path must be vault-relative, not absolute: $p" 4 ;;
+    *..*) die "path may not contain '..': $p" 4 ;;
+    *$'\n'*) die "path may not contain newlines (lockfile format would break)" 4 ;;
+    *$'\r'*) die "path may not contain carriage returns" 4 ;;
+  esac
+  # Symlink canonicalization (only when the path or one of its parents exists).
+  # Non-existent paths can pass; the lock acquire itself creates leaves under
+  # LOCK_DIR, not the path itself. We resolve via python3 (portable across
+  # GNU coreutils + macOS BSD where realpath flag semantics differ).
+  if command -v python3 >/dev/null 2>&1; then
+    local resolved root
+    resolved=$(VAULT_ROOT_BASH="$VAULT_ROOT" P_BASH="$p" python3 -c '
+import os, sys
+root = os.path.realpath(os.environ["VAULT_ROOT_BASH"])
+candidate = os.environ["P_BASH"]
+target = os.path.realpath(os.path.join(root, candidate))
+common = os.path.commonpath([root, target]) if target else ""
+sys.stdout.write("INSIDE" if common == root else "OUTSIDE")
+' 2>/dev/null)
+    [ "$resolved" = "OUTSIDE" ] && die "path resolves outside vault via symlink: $p" 4
+  fi
+  return 0
+}
+
+now_epoch() { date +%s; }
+
+is_alive() {
+  # kill -0 returns 0 if process exists and we can signal it
+  kill -0 "$1" 2>/dev/null
+}
+
+# Atomic meta-lock wrapper. Funcs that mutate LOCK_DIR call under this lock so
+# acquire/release/clear-stale don't race against each other.
+with_meta_lock() {
+  ensure_dirs
+  # Use flock under bash's redirect; meta lock is short-lived per command.
+  (
+    flock -x -w 5 9 || die "could not acquire meta-lock within 5s" 1
+    "$@"
+  ) 9>"$META_LOCK"
+}
+
+read_lockfile() {
+  # Echoes: <pid> <epoch> <path>  (or empty if file missing/unreadable)
+  local lf="$1"
+  [ -f "$lf" ] || return 0
+  head -1 "$lf" 2>/dev/null || true
+}
+
+# ── commands ─────────────────────────────────────────────────────────────────
+_cmd_acquire() {
+  local path="$1"
+  validate_path "$path"
+  ensure_dirs
+  local lf="${LOCK_DIR}/$(sha1_of "$path").lock"
+  local now
+  now=$(now_epoch)
+
+  # Try the cheap path first: noclobber-atomic create
+  if (set -o noclobber; printf '%s %s %s\n' "$$" "$now" "$path" > "$lf") 2>/dev/null; then
+    return 0
+  fi
+
+  # Lockfile already exists — examine age, not PID
+  local existing
+  existing=$(read_lockfile "$lf")
+  if [ -z "$existing" ]; then
+    # Empty/unreadable; treat as stale, clean and retry once
+    rm -f "$lf"
+    if (set -o noclobber; printf '%s %s %s\n' "$$" "$now" "$path" > "$lf") 2>/dev/null; then
+      return 0
+    fi
+    return 75
+  fi
+
+  local eepoch
+  eepoch=$(printf '%s' "$existing" | awk '{print $2}')
+  # Numeric sanity (corrupt lockfile → treat as stale)
+  case "$eepoch" in
+    ''|*[!0-9]*) rm -f "$lf"
+                 (set -o noclobber; printf '%s %s %s\n' "$$" "$now" "$path" > "$lf") 2>/dev/null && return 0
+                 return 75 ;;
+  esac
+  local age=$((now - eepoch))
+
+  if [ "$age" -gt "$STALE_AFTER_SEC" ]; then
+    # Age exceeds threshold → reap and re-acquire (regardless of holder PID)
+    rm -f "$lf"
+    if (set -o noclobber; printf '%s %s %s\n' "$$" "$now" "$path" > "$lf") 2>/dev/null; then
+      return 0
+    fi
+    return 75
+  fi
+
+  # Held and not yet stale by age — refuse
+  return 75
+}
+
+_cmd_release() {
+  local path="$1"
+  validate_path "$path"
+  ensure_dirs
+  local lf="${LOCK_DIR}/$(sha1_of "$path").lock"
+  # Unconditional remove — cross-process release is allowed by design
+  # (acquire and release are typically separate bash invocations from the
+  # same skill; PID-matching would never succeed). See header comment.
+  rm -f "$lf"
+  return 0
+}
+
+_cmd_list() {
+  ensure_dirs
+  local count=0
+  for lf in "$LOCK_DIR"/*.lock; do
+    [ -f "$lf" ] || continue
+    local rec
+    rec=$(read_lockfile "$lf")
+    [ -n "$rec" ] || continue
+    local pid epoch path now age
+    pid=$(printf '%s' "$rec" | awk '{print $1}')
+    epoch=$(printf '%s' "$rec" | awk '{print $2}')
+    path=$(printf '%s' "$rec" | cut -d' ' -f3-)
+    now=$(now_epoch)
+    age=$((now - epoch))
+    printf 'pid=%s age=%ss path=%s\n' "$pid" "$age" "$path"
+    count=$((count + 1))
+  done
+  return 0
+}
+
+_cmd_clear_stale() {
+  local max_age="$1"
+  ensure_dirs
+  local removed=0
+  local now
+  now=$(now_epoch)
+  for lf in "$LOCK_DIR"/*.lock; do
+    [ -f "$lf" ] || continue
+    local rec
+    rec=$(read_lockfile "$lf")
+    if [ -z "$rec" ]; then
+      rm -f "$lf"; removed=$((removed + 1)); continue
+    fi
+    local pid epoch age
+    pid=$(printf '%s' "$rec" | awk '{print $1}')
+    epoch=$(printf '%s' "$rec" | awk '{print $2}')
+    age=$((now - epoch))
+    if ! is_alive "$pid" || [ "$age" -gt "$max_age" ]; then
+      rm -f "$lf"; removed=$((removed + 1))
+    fi
+  done
+  echo "$removed"
+  return 0
+}
+
+_cmd_peek() {
+  local path="$1"
+  validate_path "$path"
+  ensure_dirs
+  local lf="${LOCK_DIR}/$(sha1_of "$path").lock"
+  if [ ! -f "$lf" ]; then
+    echo "unheld"
+    return 0
+  fi
+  local rec
+  rec=$(read_lockfile "$lf")
+  echo "$rec"
+  return 0
+}
+
+# ── arg parsing (flags accepted in any position) ─────────────────────────────
+if [ $# -lt 1 ]; then
+  sed -n '2,46p' "$0" | sed 's/^# \{0,1\}//'
+  exit 2
+fi
+
+CMD=""
+ARGS=()
+MAX_AGE_OVERRIDE=""
+while [ $# -gt 0 ]; do
+  case "$1" in
+    --stale-after-sec) STALE_AFTER_SEC="$2"; shift 2 ;;
+    --max-age)         MAX_AGE_OVERRIDE="$2"; shift 2 ;;
+    -h|--help)         sed -n '2,46p' "$0" | sed 's/^# \{0,1\}//'; exit 0 ;;
+    --) shift; while [ $# -gt 0 ]; do ARGS+=("$1"); shift; done ;;
+    -*) die "unknown flag: $1" ;;
+    *)
+      if [ -z "$CMD" ]; then
+        CMD="$1"
+      else
+        ARGS+=("$1")
+      fi
+      shift
+      ;;
+  esac
+done
+
+[ -n "$CMD" ] || die "no command given"
+
+case "$CMD" in
+  acquire)
+    [ ${#ARGS[@]} -ge 1 ] || die "acquire needs a path"
+    with_meta_lock _cmd_acquire "${ARGS[0]}"
+    ;;
+  release)
+    [ ${#ARGS[@]} -ge 1 ] || die "release needs a path"
+    with_meta_lock _cmd_release "${ARGS[0]}"
+    ;;
+  list)
+    with_meta_lock _cmd_list
+    ;;
+  clear-stale)
+    MAX="${MAX_AGE_OVERRIDE:-${ARGS[0]:-3600}}"
+    with_meta_lock _cmd_clear_stale "$MAX"
+    ;;
+  peek)
+    [ ${#ARGS[@]} -ge 1 ] || die "peek needs a path"
+    with_meta_lock _cmd_peek "${ARGS[0]}"
+    ;;
+  *)
+    die "unknown command: $CMD (try acquire|release|list|clear-stale|peek)"
+    ;;
+esac
@@ -0,0 +1,252 @@
+#!/usr/bin/env python3
+"""wiki-mode.py — read + route helper for v1.8 methodology modes.
+
+Single source of truth for "which mode is this vault in" and "where should
+new content of type X be filed under mode Y." Consumed by:
+
+  - skills/wiki-ingest/SKILL.md  (where to file new source/entity/concept pages)
+  - skills/save/SKILL.md         (where to file session notes)
+  - skills/autoresearch/SKILL.md (where to file research output)
+  - bin/setup-mode.sh            (writes .vault-meta/mode.json)
+
+If `.vault-meta/mode.json` is absent → mode = "generic" → behavior identical
+to v1.7. No skill needs to special-case the missing-config path.
+
+CLI:
+  wiki-mode.py get                      # print current mode (default: generic)
+  wiki-mode.py config                   # print full config JSON
+  wiki-mode.py route TYPE NAME          # print suggested path for new content
+                                        # TYPE: source|entity|concept|session|research
+  wiki-mode.py set MODE                 # write mode (lyt|para|zettelkasten|generic)
+  wiki-mode.py id                       # mint a Zettelkasten ID (timestamp)
+  wiki-mode.py templates                # list per-mode template files
+
+Exit codes:
+  0 — success
+  2 — usage error
+  3 — invalid mode string
+  4 — invalid content type
+"""
+
+import argparse
+import json
+import re
+import sys
+import tempfile
+from datetime import datetime, timezone
+from pathlib import Path
+
+VAULT_ROOT = Path(__file__).resolve().parent.parent
+META_DIR = VAULT_ROOT / ".vault-meta"
+MODE_PATH = META_DIR / "mode.json"
+
+VALID_MODES = ("generic", "lyt", "para", "zettelkasten")
+VALID_TYPES = ("source", "entity", "concept", "session", "research")
+
+DEFAULT_CONFIG = {
+    "schema_version": 1,
+    "mode": "generic",
+    "configured_at": None,
+    "config": {
+        "lyt": {
+            "moc_folder": "wiki/mocs/",
+            "notes_folder": "wiki/notes/",
+        },
+        "para": {
+            "projects_folder": "wiki/projects/",
+            "areas_folder": "wiki/areas/",
+            "resources_folder": "wiki/resources/",
+            "archives_folder": "wiki/archives/",
+        },
+        "zettelkasten": {
+            "id_format": "YYYYMMDDHHMMSSffffff",
+            "no_folders": True,
+            "root_folder": "wiki/",
+        },
+        "generic": {
+            "sources_folder": "wiki/sources/",
+            "entities_folder": "wiki/entities/",
+            "concepts_folder": "wiki/concepts/",
+            "sessions_folder": "wiki/sessions/",
+        },
+    },
+}
+
+
+def load_config():
+    """Return parsed mode.json, or DEFAULT_CONFIG with mode='generic' if absent."""
+    if not MODE_PATH.is_file():
+        return dict(DEFAULT_CONFIG)
+    try:
+        loaded = json.loads(MODE_PATH.read_text(encoding="utf-8"))
+        # Merge with defaults so partially-configured files still work
+        merged = dict(DEFAULT_CONFIG)
+        merged["mode"] = loaded.get("mode", "generic")
+        merged["configured_at"] = loaded.get("configured_at")
+        loaded_config = loaded.get("config", {})
+        for k, v in loaded_config.items():
+            if k in merged["config"] and isinstance(v, dict):
+                merged["config"][k].update(v)
+        return merged
+    except (json.JSONDecodeError, OSError) as e:
+        print(f"ERR: cannot parse {MODE_PATH}: {e}", file=sys.stderr)
+        print("  Falling back to mode=generic. Re-run `bash bin/setup-mode.sh` to fix.",
+              file=sys.stderr)
+        return dict(DEFAULT_CONFIG)
+
+
+def save_config(cfg):
+    META_DIR.mkdir(parents=True, exist_ok=True)
+    payload = json.dumps(cfg, indent=2, ensure_ascii=False) + "\n"
+    fd, tmp_path = tempfile.mkstemp(prefix="mode.", suffix=".tmp", dir=str(META_DIR))
+    try:
+        with open(fd, "w", encoding="utf-8") as fh:
+            fh.write(payload)
+        Path(tmp_path).replace(MODE_PATH)
+    except Exception:
+        try:
+            Path(tmp_path).unlink()
+        except OSError:
+            pass
+        raise
+
+
+def slugify(name):
+    """Filesystem-safe slug; matches the convention used by the existing skills.
+    Any run of non-word, non-hyphen characters becomes a single hyphen so that
+    'v1.8 launch! prep?' → 'v1-8-launch-prep' (not 'v18launchprep').
+    Unicode word characters (CJK, accented Latin, Cyrillic, etc.) are preserved.
+    """
+    s = re.sub(r"[^\w\-]+", "-", name, flags=re.UNICODE)
+    s = re.sub(r"-+", "-", s).strip("-")
+    return s or "untitled"
+
+
+def safe_name(name):
+    """Sanitize a name that intentionally preserves case + spaces (entity/concept).
+    Strips path separators, null bytes, control characters, and leading dots or
+    hyphens so the returned string cannot escape its parent directory or be
+    interpreted as a hidden file or flag. Spaces and case are preserved.
+    """
+    cleaned = re.sub(r"[/\\\x00-\x1f]+", "", name)
+    cleaned = cleaned.lstrip(".-")
+    return cleaned or "untitled"
+
+
+def mint_zettel_id():
+    """YYYYMMDDHHMMSSffffff in UTC (microsecond resolution).
+    Stable across timezones; lexicographically sortable; collision-resistant
+    against rapid back-to-back calls in the same second. Microsecond suffix
+    closes the v1.8.0 verifier LOW (two rapid mint calls produced the same
+    14-digit ID and would have generated colliding filenames).
+    """
+    return datetime.now(timezone.utc).strftime("%Y%m%d%H%M%S%f")
+
+
+def route_path(mode, content_type, name, cfg):
+    """Return the suggested vault-relative path for new content under `mode`."""
+    if content_type not in VALID_TYPES:
+        raise SystemExit(4)
+    slug = slugify(name)
+
+    raw = safe_name(name)  # case + spaces preserved, but path-traversal stripped
+
+    if mode == "generic":
+        g = cfg["config"]["generic"]
+        mapping = {
+            "source":   g["sources_folder"] + slug + ".md",
+            "entity":   g["entities_folder"] + raw + ".md",  # preserve capitalization for entities
+            "concept":  g["concepts_folder"] + raw + ".md",
+            "session":  g["sessions_folder"] + slug + ".md",
+            "research": g["concepts_folder"] + raw + ".md",
+        }
+        return mapping[content_type]
+
+    if mode == "lyt":
+        notes = cfg["config"]["lyt"]["notes_folder"]
+        # All atomic notes flat in wiki/notes/; routing is the same regardless of type
+        return notes + slug + ".md"
+
+    if mode == "para":
+        p = cfg["config"]["para"]
+        mapping = {
+            # New sources land in resources/<topic>/ (we use a generic 'incoming' bucket;
+            # the user will sort into specific topics via their own workflow)
+            "source":   p["resources_folder"] + "incoming/" + slug + ".md",
+            "entity":   p["resources_folder"] + "people/" + raw + ".md",
+            "concept":  p["resources_folder"] + "concepts/" + raw + ".md",
+            # Session notes land in projects/inbox/; user reroutes to specific projects
+            "session":  p["projects_folder"] + "inbox/" + slug + ".md",
+            "research": p["resources_folder"] + slug + "/" + slug + ".md",
+        }
+        return mapping[content_type]
+
+    if mode == "zettelkasten":
+        z = cfg["config"]["zettelkasten"]
+        zid = mint_zettel_id()
+        return z["root_folder"] + f"{zid}-{slug}.md"
+
+    raise SystemExit(3)
+
+
+def main():
+    parser = argparse.ArgumentParser(description="Methodology-mode router for v1.8 Compound Vault.")
+    sub = parser.add_subparsers(dest="cmd", required=True)
+
+    sub.add_parser("get", help="Print current mode")
+    sub.add_parser("config", help="Print full config JSON")
+
+    sp_route = sub.add_parser("route", help="Print suggested vault path for new content")
+    sp_route.add_argument("type", choices=VALID_TYPES)
+    sp_route.add_argument("name", help="Content name (will be slugified for filenames)")
+    sp_route.add_argument("--mode", choices=VALID_MODES, default=None,
+                          help="Preview routing under MODE without writing mode.json (default: use current vault mode)")
+
+    sp_set = sub.add_parser("set", help="Write a mode to .vault-meta/mode.json")
+    sp_set.add_argument("mode", choices=VALID_MODES)
+
+    sub.add_parser("id", help="Mint a Zettelkasten ID (timestamp)")
+    sub.add_parser("templates", help="List per-mode template files")
+
+    args = parser.parse_args()
+    cfg = load_config()
+
+    if args.cmd == "get":
+        print(cfg["mode"])
+        return 0
+
+    if args.cmd == "config":
+        print(json.dumps(cfg, indent=2, ensure_ascii=False))
+        return 0
+
+    if args.cmd == "route":
+        active_mode = args.mode if args.mode else cfg["mode"]
+        path = route_path(active_mode, args.type, args.name, cfg)
+        print(path)
+        return 0
+
+    if args.cmd == "set":
+        cfg["mode"] = args.mode
+        cfg["configured_at"] = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
+        save_config(cfg)
+        print(f"mode set: {args.mode}")
+        return 0
+
+    if args.cmd == "id":
+        print(mint_zettel_id())
+        return 0
+
+    if args.cmd == "templates":
+        templates_dir = VAULT_ROOT / "skills" / "wiki-mode" / "templates"
+        if not templates_dir.is_dir():
+            print(f"ERR: templates dir missing: {templates_dir}", file=sys.stderr)
+            return 2
+        for f in sorted(templates_dir.rglob("*.md")):
+            print(str(f.relative_to(VAULT_ROOT)))
+        return 0
+
+    return 2
+
+
+if __name__ == "__main__":
+    sys.exit(main())