#!/usr/bin/env python3 """Catch high-confidence documentation drift before a Codex turn ends.""" from __future__ import annotations import json import subprocess import sys from pathlib import Path def changed_paths(root: Path) -> set[str]: result = subprocess.run( ["git", "status", "--porcelain"], cwd=root, capture_output=True, text=True, timeout=20, ) if result.returncode != 0: return set() paths: set[str] = set() for line in result.stdout.splitlines(): if not line.strip(): continue path = line[3:].replace("\\", "/") if " -> " in path: path = path.split(" -> ", 1)[1] paths.add(path) return paths def block(reason: str) -> int: json.dump({"decision": "block", "reason": reason}, sys.stdout) return 0 def main() -> int: try: payload = json.load(sys.stdin) except json.JSONDecodeError: return 0 if payload.get("stop_hook_active"): return 0 root = Path(payload.get("cwd") or ".").resolve() paths = changed_paths(root) if "requirements.txt" in paths and "docs/TOOLCHAIN.md" not in paths: return block( "requirements.txt changed without docs/TOOLCHAIN.md. " "Update the toolchain notes with dependency compatibility rationale." ) sample_pdf_changed = any(path.startswith("samples/") and path.lower().endswith(".pdf") for path in paths) metadata_changed = "samples/metadata.json" in paths if sample_pdf_changed and not metadata_changed: return block( "A sample PDF changed without samples/metadata.json. " "Update the sample metadata mapping so quality tests know the corpus traits." ) policy_docs = { "docs/ARCHITECTURE.md", "docs/CONVERSION_POLICY.md", "docs/ADR.md", } touched_policy_docs = policy_docs.intersection(paths) if touched_policy_docs and "PROGRESS.md" not in paths: return block( "Architecture or conversion policy docs changed without PROGRESS.md. " "Record the decision and handoff context in PROGRESS.md." ) return 0 if __name__ == "__main__": raise SystemExit(main())