Add Markdown recheck command

This commit is contained in:
NINI
2026-05-11 01:00:26 +09:00
parent b69c03c206
commit 03927a26a1
9 changed files with 276 additions and 11 deletions
+29 -2
View File
@@ -16,8 +16,9 @@ from pdf2md.mineru_adapter import MinerUAdapterResult
class FakeAdapter:
def __init__(self, *, succeeded: bool = True) -> None:
def __init__(self, *, succeeded: bool = True, raw_markdown: str | None = None) -> None:
self.succeeded = succeeded
self.raw_markdown = raw_markdown
self.calls: list[Path] = []
self.options: list[object] = []
@@ -33,7 +34,7 @@ class FakeAdapter:
command=("mineru", "-p", str(input_path), "-o", str(output_dir)),
input_pdf=input_path,
work_dir=output_dir,
raw_markdown=f"# {input_path.stem}\n" if self.succeeded else None,
raw_markdown=(self.raw_markdown or f"# {input_path.stem}\n") if self.succeeded else None,
raw_structured={"pages": 1},
asset_paths=(),
warnings=() if self.succeeded else (warning,),
@@ -188,6 +189,32 @@ def test_cli_failure_summary_returns_nonzero(tmp_path: Path, capsys) -> None:
assert not (tmp_path / "out" / "paper.md").exists()
def test_cli_recheck_markdown_regenerates_adjacent_metadata_and_report(tmp_path: Path, capsys) -> None:
pdf = make_pdf(tmp_path, "paper.pdf")
out = tmp_path / "out"
adapter = FakeAdapter(raw_markdown="Inline \\(bad_math\\)\n")
assert (
main(
["convert", str(pdf), "--out", str(out)],
adapter=adapter,
clock=fixed_clock,
math_checker=lambda _: False,
)
== 0
)
capsys.readouterr()
markdown_path = out / "paper.md"
markdown_path.write_text("Inline $x_i$\n", encoding="utf-8")
exit_code = main(["recheck", str(markdown_path)], clock=fixed_clock, math_checker=lambda _: True)
captured = capsys.readouterr()
assert exit_code == 0
assert "rechecked:" in captured.out
assert "warnings: 0" in captured.out
assert "- Final status: `success`" in (out / "paper.report.md").read_text(encoding="utf-8")
def test_cli_preflight_conflict_fails_before_conversion(tmp_path: Path, capsys) -> None:
pdf = make_pdf(tmp_path, "paper.pdf")
out = tmp_path / "out"