Add Markdown recheck command

This commit is contained in:
NINI
2026-05-11 01:00:26 +09:00
parent b69c03c206
commit 03927a26a1
9 changed files with 276 additions and 11 deletions
+28 -1
View File
@@ -9,7 +9,7 @@ import pytest
from pypdf import PdfWriter
import pdf2md.conversion as conversion_module
from pdf2md.conversion import BatchConversionResult, convert_input, convert_pdf
from pdf2md.conversion import BatchConversionResult, convert_input, convert_pdf, recheck_markdown
from pdf2md.ir import WarningCode, WarningRecord, WarningSeverity
from pdf2md.mineru_adapter import MinerUAdapterResult, StrictLocalViolationError
from pdf2md.paths import OutputConflictError
@@ -230,6 +230,33 @@ def test_convert_pdf_records_math_checker_failures_in_metadata_and_report(tmp_pa
assert "`MATH_RENDER_FAILED`" in report
def test_recheck_markdown_regenerates_metadata_and_report_from_current_markdown(tmp_path: Path) -> None:
pdf = make_pdf(tmp_path)
adapter = FakeAdapter(raw_markdown="Inline \\(bad_math\\)\n")
result = convert_pdf(pdf, tmp_path / "out", adapter=adapter, math_checker=lambda _: False, clock=fixed_clock)
result.markdown_path.write_text("Inline $x_i$\n", encoding="utf-8")
rechecked = recheck_markdown(result.markdown_path, math_checker=lambda _: True, clock=fixed_clock)
assert rechecked.final_status == "success"
assert rechecked.warning_count == 0
assert rechecked.markdown_path == result.markdown_path
assert rechecked.metadata_path == result.metadata_path
assert rechecked.report_path == result.report_path
metadata = json.loads(result.metadata_path.read_text(encoding="utf-8"))
assert metadata["source_sha256"] == hashlib.sha256(pdf.read_bytes()).hexdigest()
assert metadata["created_at"] == "2026-05-08T00:00:00Z"
assert metadata["summary"]["pages_processed"] == 1
assert metadata["summary"]["inline_formula_count"] == 1
assert metadata["summary"]["math_render_error_count"] == 0
assert metadata["summary"]["warning_count"] == 0
assert metadata["warnings"] == []
report = result.report_path.read_text(encoding="utf-8")
assert "- Final status: `success`" in report
assert "- Math render error count: 0" in report
assert "- None" in report
def test_convert_pdf_records_unavailable_math_checker_for_math_output(tmp_path: Path, monkeypatch) -> None:
pdf = make_pdf(tmp_path)
adapter = FakeAdapter(raw_markdown="Inline \\(x\\)\n")