add pdftomd
This commit is contained in:
@@ -0,0 +1,168 @@
|
||||
"""Project guardrails for shell commands and apply_patch edits."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import re
|
||||
import subprocess
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
REMOTE_ENGINE_PATTERNS = [
|
||||
"--api-url",
|
||||
"router mode",
|
||||
"http client mode",
|
||||
"http client backend",
|
||||
"http-client",
|
||||
"remote api",
|
||||
"remote endpoint",
|
||||
"openai-compatible",
|
||||
"openai compatible",
|
||||
"mathpix",
|
||||
"mistral ocr",
|
||||
"nanonets",
|
||||
]
|
||||
|
||||
DIRECT_SERVER_COMMAND_PATTERNS = [
|
||||
r"(^|\s)mineru-api(\s|$)",
|
||||
r"(^|\s)mineru-router(\s|$)",
|
||||
]
|
||||
|
||||
ALLOWED_NEGATION_PATTERNS = [
|
||||
"do not",
|
||||
"never",
|
||||
"exclude",
|
||||
"excluded",
|
||||
"non-goal",
|
||||
"not use",
|
||||
"no cloud",
|
||||
"blocked",
|
||||
"prohibit",
|
||||
"prohibited",
|
||||
"forbid",
|
||||
"forbidden",
|
||||
"reject",
|
||||
"rejecting",
|
||||
]
|
||||
|
||||
|
||||
def read_payload() -> dict:
|
||||
raw = sys.stdin.read()
|
||||
if not raw.strip():
|
||||
return {}
|
||||
try:
|
||||
return json.loads(raw)
|
||||
except json.JSONDecodeError:
|
||||
return {}
|
||||
|
||||
|
||||
def deny(reason: str) -> int:
|
||||
output = {
|
||||
"hookSpecificOutput": {
|
||||
"hookEventName": "PreToolUse",
|
||||
"permissionDecision": "deny",
|
||||
"permissionDecisionReason": reason,
|
||||
}
|
||||
}
|
||||
print(json.dumps(output, ensure_ascii=True))
|
||||
return 0
|
||||
|
||||
|
||||
def find_repo_root(cwd: str | None) -> Path:
|
||||
start = Path(cwd or Path.cwd()).resolve()
|
||||
try:
|
||||
result = subprocess.run(
|
||||
["git", "rev-parse", "--show-toplevel"],
|
||||
cwd=start,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
check=True,
|
||||
)
|
||||
return Path(result.stdout.strip()).resolve()
|
||||
except Exception:
|
||||
return start
|
||||
|
||||
|
||||
def samples_are_untracked(root: Path) -> bool:
|
||||
try:
|
||||
result = subprocess.run(
|
||||
["git", "status", "--porcelain", "--", "samples"],
|
||||
cwd=root,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
check=True,
|
||||
)
|
||||
except Exception:
|
||||
return False
|
||||
return any(line.startswith("?? ") for line in result.stdout.splitlines())
|
||||
|
||||
|
||||
def check_shell_command(command: str, root: Path) -> str | None:
|
||||
normalized = command.replace("\\", "/").lower()
|
||||
|
||||
if re.search(r"\bgit\s+add\b.*(?:^|\s|/)samples(?:\s|/|$)", normalized):
|
||||
return "Do not stage samples/ unless the user explicitly requests it."
|
||||
|
||||
stages_everything = re.search(r"\bgit\s+add\b", normalized) and re.search(
|
||||
r"(\s\.($|\s)|\s-a($|\s)|\s--all($|\s))",
|
||||
normalized,
|
||||
)
|
||||
if samples_are_untracked(root) and stages_everything:
|
||||
return "Use path-specific git add commands; samples/ is untracked local fixture data."
|
||||
|
||||
destructive_samples = [
|
||||
r"\bgit\s+clean\b.*\b-f\b.*(?:^|\s|/)samples(?:\s|/|$)",
|
||||
r"\brm\s+.*-r[f]?\b.*(?:^|\s|/)samples(?:\s|/|$)",
|
||||
r"\bremove-item\b.*-recurse\b.*(?:^|\s|/)samples(?:\s|/|$)",
|
||||
r"\bgit\s+reset\s+--hard\b",
|
||||
]
|
||||
if any(re.search(pattern, normalized) for pattern in destructive_samples):
|
||||
return "Destructive workspace or samples/ command blocked by project policy."
|
||||
|
||||
if any(re.search(pattern, normalized) for pattern in DIRECT_SERVER_COMMAND_PATTERNS):
|
||||
return "Direct MinerU server/router commands are blocked; use the mineru CLI. CLI-internal temporary local mineru-api is allowed."
|
||||
|
||||
for pattern in REMOTE_ENGINE_PATTERNS:
|
||||
if pattern in normalized:
|
||||
return "Remote/API conversion paths are blocked; v1 must run MinerU 3.1.0 through the local CLI only."
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def check_patch(command: str) -> str | None:
|
||||
for line in command.splitlines():
|
||||
if not line.startswith("+") or line.startswith("+++"):
|
||||
continue
|
||||
lowered = line[1:].strip().lower()
|
||||
if any(negation in lowered for negation in ALLOWED_NEGATION_PATTERNS):
|
||||
continue
|
||||
if any(pattern in lowered for pattern in REMOTE_ENGINE_PATTERNS):
|
||||
return "Patch appears to add remote/API conversion behavior or excluded engine references."
|
||||
if "runtime engine" in lowered and ("selection" in lowered or "switch" in lowered):
|
||||
return "Runtime engine selection is out of scope for v1."
|
||||
return None
|
||||
|
||||
|
||||
def main() -> int:
|
||||
payload = read_payload()
|
||||
tool_name = payload.get("tool_name", "")
|
||||
tool_input = payload.get("tool_input") or {}
|
||||
command = str(tool_input.get("command") or tool_input.get("patch") or "")
|
||||
root = find_repo_root(payload.get("cwd"))
|
||||
|
||||
if tool_name == "Bash":
|
||||
reason = check_shell_command(command, root)
|
||||
if reason:
|
||||
return deny(reason)
|
||||
|
||||
if tool_name in {"apply_patch", "Edit", "Write"}:
|
||||
reason = check_patch(command)
|
||||
if reason:
|
||||
return deny(reason)
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
raise SystemExit(main())
|
||||
@@ -0,0 +1,63 @@
|
||||
"""Inject the project coordination reminder at Codex session start."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import subprocess
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
def read_payload() -> dict:
|
||||
raw = sys.stdin.read()
|
||||
if not raw.strip():
|
||||
return {}
|
||||
try:
|
||||
return json.loads(raw)
|
||||
except json.JSONDecodeError:
|
||||
return {}
|
||||
|
||||
|
||||
def find_repo_root(cwd: str | None) -> Path:
|
||||
start = Path(cwd or Path.cwd()).resolve()
|
||||
try:
|
||||
result = subprocess.run(
|
||||
["git", "rev-parse", "--show-toplevel"],
|
||||
cwd=start,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
check=True,
|
||||
)
|
||||
return Path(result.stdout.strip()).resolve()
|
||||
except Exception:
|
||||
return start
|
||||
|
||||
|
||||
def main() -> int:
|
||||
payload = read_payload()
|
||||
root = find_repo_root(payload.get("cwd"))
|
||||
required = ["PLAN.md", "PROGRESS.md"]
|
||||
missing = [name for name in required if not (root / name).exists()]
|
||||
|
||||
context = (
|
||||
"Before starting work in this repository, read PLAN.md and PROGRESS.md. "
|
||||
"Use PROGRESS.md as the factual state, update PLAN.md when sequencing changes, "
|
||||
"and keep samples/ out of commits unless the user explicitly requests otherwise."
|
||||
)
|
||||
|
||||
output = {
|
||||
"continue": True,
|
||||
"hookSpecificOutput": {
|
||||
"hookEventName": "SessionStart",
|
||||
"additionalContext": context,
|
||||
},
|
||||
}
|
||||
if missing:
|
||||
output["systemMessage"] = "Missing project coordination file(s): " + ", ".join(missing)
|
||||
|
||||
print(json.dumps(output, ensure_ascii=True))
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
raise SystemExit(main())
|
||||
@@ -0,0 +1,85 @@
|
||||
"""Remind agents to verify and commit completed project-file changes."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import subprocess
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
PROJECT_PREFIXES = (
|
||||
".codex/",
|
||||
"AGENTS.md",
|
||||
"ARCHITECTURE.md",
|
||||
"PLAN.md",
|
||||
"PRD.md",
|
||||
"PROGRESS.md",
|
||||
"docs/",
|
||||
)
|
||||
|
||||
|
||||
def read_payload() -> dict:
|
||||
raw = sys.stdin.read()
|
||||
if not raw.strip():
|
||||
return {}
|
||||
try:
|
||||
return json.loads(raw)
|
||||
except json.JSONDecodeError:
|
||||
return {}
|
||||
|
||||
|
||||
def find_repo_root(cwd: str | None) -> Path:
|
||||
start = Path(cwd or Path.cwd()).resolve()
|
||||
try:
|
||||
result = subprocess.run(
|
||||
["git", "rev-parse", "--show-toplevel"],
|
||||
cwd=start,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
check=True,
|
||||
)
|
||||
return Path(result.stdout.strip()).resolve()
|
||||
except Exception:
|
||||
return start
|
||||
|
||||
|
||||
def project_changes(root: Path) -> list[str]:
|
||||
try:
|
||||
result = subprocess.run(
|
||||
["git", "status", "--short"],
|
||||
cwd=root,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
check=True,
|
||||
)
|
||||
except Exception:
|
||||
return []
|
||||
|
||||
paths: list[str] = []
|
||||
for line in result.stdout.splitlines():
|
||||
path = line[3:].replace("\\", "/")
|
||||
if path.startswith("samples/"):
|
||||
continue
|
||||
if path.startswith(PROJECT_PREFIXES):
|
||||
paths.append(path)
|
||||
return paths
|
||||
|
||||
|
||||
def main() -> int:
|
||||
payload = read_payload()
|
||||
root = find_repo_root(payload.get("cwd"))
|
||||
changes = project_changes(root)
|
||||
if not changes:
|
||||
return 0
|
||||
|
||||
message = (
|
||||
"Project workflow/docs changed. Before finishing, run focused verification, "
|
||||
"commit the completed change, and keep samples/ out of the commit."
|
||||
)
|
||||
print(json.dumps({"continue": True, "systemMessage": message}, ensure_ascii=True))
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
raise SystemExit(main())
|
||||
Reference in New Issue
Block a user