add pdftomd

This commit is contained in:
김경종
2026-05-08 16:42:19 +09:00
parent 551ab50735
commit 88d6b92283
99 changed files with 47332 additions and 0 deletions
+168
View File
@@ -0,0 +1,168 @@
"""Project guardrails for shell commands and apply_patch edits."""
from __future__ import annotations
import json
import re
import subprocess
import sys
from pathlib import Path
REMOTE_ENGINE_PATTERNS = [
"--api-url",
"router mode",
"http client mode",
"http client backend",
"http-client",
"remote api",
"remote endpoint",
"openai-compatible",
"openai compatible",
"mathpix",
"mistral ocr",
"nanonets",
]
DIRECT_SERVER_COMMAND_PATTERNS = [
r"(^|\s)mineru-api(\s|$)",
r"(^|\s)mineru-router(\s|$)",
]
ALLOWED_NEGATION_PATTERNS = [
"do not",
"never",
"exclude",
"excluded",
"non-goal",
"not use",
"no cloud",
"blocked",
"prohibit",
"prohibited",
"forbid",
"forbidden",
"reject",
"rejecting",
]
def read_payload() -> dict:
raw = sys.stdin.read()
if not raw.strip():
return {}
try:
return json.loads(raw)
except json.JSONDecodeError:
return {}
def deny(reason: str) -> int:
output = {
"hookSpecificOutput": {
"hookEventName": "PreToolUse",
"permissionDecision": "deny",
"permissionDecisionReason": reason,
}
}
print(json.dumps(output, ensure_ascii=True))
return 0
def find_repo_root(cwd: str | None) -> Path:
start = Path(cwd or Path.cwd()).resolve()
try:
result = subprocess.run(
["git", "rev-parse", "--show-toplevel"],
cwd=start,
capture_output=True,
text=True,
check=True,
)
return Path(result.stdout.strip()).resolve()
except Exception:
return start
def samples_are_untracked(root: Path) -> bool:
try:
result = subprocess.run(
["git", "status", "--porcelain", "--", "samples"],
cwd=root,
capture_output=True,
text=True,
check=True,
)
except Exception:
return False
return any(line.startswith("?? ") for line in result.stdout.splitlines())
def check_shell_command(command: str, root: Path) -> str | None:
normalized = command.replace("\\", "/").lower()
if re.search(r"\bgit\s+add\b.*(?:^|\s|/)samples(?:\s|/|$)", normalized):
return "Do not stage samples/ unless the user explicitly requests it."
stages_everything = re.search(r"\bgit\s+add\b", normalized) and re.search(
r"(\s\.($|\s)|\s-a($|\s)|\s--all($|\s))",
normalized,
)
if samples_are_untracked(root) and stages_everything:
return "Use path-specific git add commands; samples/ is untracked local fixture data."
destructive_samples = [
r"\bgit\s+clean\b.*\b-f\b.*(?:^|\s|/)samples(?:\s|/|$)",
r"\brm\s+.*-r[f]?\b.*(?:^|\s|/)samples(?:\s|/|$)",
r"\bremove-item\b.*-recurse\b.*(?:^|\s|/)samples(?:\s|/|$)",
r"\bgit\s+reset\s+--hard\b",
]
if any(re.search(pattern, normalized) for pattern in destructive_samples):
return "Destructive workspace or samples/ command blocked by project policy."
if any(re.search(pattern, normalized) for pattern in DIRECT_SERVER_COMMAND_PATTERNS):
return "Direct MinerU server/router commands are blocked; use the mineru CLI. CLI-internal temporary local mineru-api is allowed."
for pattern in REMOTE_ENGINE_PATTERNS:
if pattern in normalized:
return "Remote/API conversion paths are blocked; v1 must run MinerU 3.1.0 through the local CLI only."
return None
def check_patch(command: str) -> str | None:
for line in command.splitlines():
if not line.startswith("+") or line.startswith("+++"):
continue
lowered = line[1:].strip().lower()
if any(negation in lowered for negation in ALLOWED_NEGATION_PATTERNS):
continue
if any(pattern in lowered for pattern in REMOTE_ENGINE_PATTERNS):
return "Patch appears to add remote/API conversion behavior or excluded engine references."
if "runtime engine" in lowered and ("selection" in lowered or "switch" in lowered):
return "Runtime engine selection is out of scope for v1."
return None
def main() -> int:
payload = read_payload()
tool_name = payload.get("tool_name", "")
tool_input = payload.get("tool_input") or {}
command = str(tool_input.get("command") or tool_input.get("patch") or "")
root = find_repo_root(payload.get("cwd"))
if tool_name == "Bash":
reason = check_shell_command(command, root)
if reason:
return deny(reason)
if tool_name in {"apply_patch", "Edit", "Write"}:
reason = check_patch(command)
if reason:
return deny(reason)
return 0
if __name__ == "__main__":
raise SystemExit(main())
+63
View File
@@ -0,0 +1,63 @@
"""Inject the project coordination reminder at Codex session start."""
from __future__ import annotations
import json
import subprocess
import sys
from pathlib import Path
def read_payload() -> dict:
raw = sys.stdin.read()
if not raw.strip():
return {}
try:
return json.loads(raw)
except json.JSONDecodeError:
return {}
def find_repo_root(cwd: str | None) -> Path:
start = Path(cwd or Path.cwd()).resolve()
try:
result = subprocess.run(
["git", "rev-parse", "--show-toplevel"],
cwd=start,
capture_output=True,
text=True,
check=True,
)
return Path(result.stdout.strip()).resolve()
except Exception:
return start
def main() -> int:
payload = read_payload()
root = find_repo_root(payload.get("cwd"))
required = ["PLAN.md", "PROGRESS.md"]
missing = [name for name in required if not (root / name).exists()]
context = (
"Before starting work in this repository, read PLAN.md and PROGRESS.md. "
"Use PROGRESS.md as the factual state, update PLAN.md when sequencing changes, "
"and keep samples/ out of commits unless the user explicitly requests otherwise."
)
output = {
"continue": True,
"hookSpecificOutput": {
"hookEventName": "SessionStart",
"additionalContext": context,
},
}
if missing:
output["systemMessage"] = "Missing project coordination file(s): " + ", ".join(missing)
print(json.dumps(output, ensure_ascii=True))
return 0
if __name__ == "__main__":
raise SystemExit(main())
+85
View File
@@ -0,0 +1,85 @@
"""Remind agents to verify and commit completed project-file changes."""
from __future__ import annotations
import json
import subprocess
import sys
from pathlib import Path
PROJECT_PREFIXES = (
".codex/",
"AGENTS.md",
"ARCHITECTURE.md",
"PLAN.md",
"PRD.md",
"PROGRESS.md",
"docs/",
)
def read_payload() -> dict:
raw = sys.stdin.read()
if not raw.strip():
return {}
try:
return json.loads(raw)
except json.JSONDecodeError:
return {}
def find_repo_root(cwd: str | None) -> Path:
start = Path(cwd or Path.cwd()).resolve()
try:
result = subprocess.run(
["git", "rev-parse", "--show-toplevel"],
cwd=start,
capture_output=True,
text=True,
check=True,
)
return Path(result.stdout.strip()).resolve()
except Exception:
return start
def project_changes(root: Path) -> list[str]:
try:
result = subprocess.run(
["git", "status", "--short"],
cwd=root,
capture_output=True,
text=True,
check=True,
)
except Exception:
return []
paths: list[str] = []
for line in result.stdout.splitlines():
path = line[3:].replace("\\", "/")
if path.startswith("samples/"):
continue
if path.startswith(PROJECT_PREFIXES):
paths.append(path)
return paths
def main() -> int:
payload = read_payload()
root = find_repo_root(payload.get("cwd"))
changes = project_changes(root)
if not changes:
return 0
message = (
"Project workflow/docs changed. Before finishing, run focused verification, "
"commit the completed change, and keep samples/ out of the commit."
)
print(json.dumps({"continue": True, "systemMessage": message}, ensure_ascii=True))
return 0
if __name__ == "__main__":
raise SystemExit(main())