from __future__ import annotations from pathlib import Path from pdf2md.ir import ( AssetRecord, BlockRecord, BlockType, DocumentRecord, PageRecord, WarningCode, WarningRecord, WarningSeverity, ) from pdf2md.metadata import build_metadata from pdf2md.quality import QualityResult from pdf2md.report import determine_final_status, pages_with_warnings, render_report def make_metadata(tmp_path: Path, *, warnings: tuple[WarningRecord, ...] = ()) -> dict[str, object]: document = DocumentRecord( source_pdf=tmp_path / "paper.pdf", pages=( PageRecord( page_index=0, blocks=( BlockRecord(BlockType.INLINE_FORMULA, page_index=0), BlockRecord(BlockType.DISPLAY_FORMULA, page_index=0), ), ), PageRecord(page_index=1, blocks=(BlockRecord(BlockType.PARAGRAPH, page_index=1),)), ), assets=(AssetRecord("paper.assets/fig.png", page_index=1),), warnings=warnings, ) return build_metadata( document=document, source_sha256="0" * 64, created_at="2026-05-08T00:00:00Z", engine="MinerU", engine_version="3.1.0", engine_options={"strict_local": True}, ) def test_final_status_success_partial_and_failed(tmp_path: Path) -> None: success_metadata = make_metadata(tmp_path) warning_metadata = make_metadata( tmp_path, warnings=(WarningRecord(WarningCode.READING_ORDER_UNCERTAIN, WarningSeverity.WARNING, "Review page.", page_index=1),), ) failed_metadata = make_metadata( tmp_path, warnings=(WarningRecord(WarningCode.MINERU_CLI_FAILED, WarningSeverity.ERROR, "MinerU failed."),), ) assert determine_final_status(success_metadata) == "success" assert determine_final_status(warning_metadata) == "partial" assert determine_final_status(success_metadata, QualityResult(missing_asset_link_count=1)) == "partial" assert determine_final_status(failed_metadata) == "failed" def test_pages_with_warnings_are_sorted_and_derived_from_metadata_and_quality(tmp_path: Path) -> None: metadata = make_metadata( tmp_path, warnings=(WarningRecord(WarningCode.READING_ORDER_UNCERTAIN, WarningSeverity.WARNING, "Review page.", page_index=1),), ) quality = QualityResult( warnings=(WarningRecord(WarningCode.MATH_RENDER_FAILED, WarningSeverity.WARNING, "Math failed.", page_index=0),) ) assert pages_with_warnings(metadata, quality) == (0, 1) def test_report_content_includes_required_sections_and_counts(tmp_path: Path) -> None: metadata = make_metadata( tmp_path, warnings=(WarningRecord(WarningCode.READING_ORDER_UNCERTAIN, WarningSeverity.WARNING, "Review page.", page_index=1),), ) quality = QualityResult( missing_asset_link_count=2, invalid_asset_link_count=1, math_render_error_count=3, warnings=(WarningRecord(WarningCode.ASSET_LINK_MISSING, WarningSeverity.WARNING, "Missing asset."),), ) report = render_report( metadata, quality=quality, markdown_path=tmp_path / "paper.md", metadata_path=tmp_path / "paper.metadata.json", report_path=tmp_path / "paper.report.md", ) assert "# PDF-to-Markdown Quality Report" in report assert "- Final status: `partial`" in report assert f"- Source PDF: {tmp_path / 'paper.pdf'}" in report assert f"- Output Markdown: {tmp_path / 'paper.md'}" in report assert "- Engine: MinerU" in report assert "- Engine version: 3.1.0" in report assert '- Engine options: `{"strict_local": true}`' in report assert "- Pages processed: 2" in report assert "- Warning count: 2" in report assert "- Asset count: 1" in report assert "- Missing asset link count: 2" in report assert "- Invalid asset link count: 1" in report assert "- Inline formula count: 1" in report assert "- Display formula count: 1" in report assert "- Math render error count: 3" in report assert "- Page 1" in report assert "`ASSET_LINK_MISSING`" in report def test_report_omits_absent_optional_paths_and_does_not_write_files(tmp_path: Path) -> None: metadata = make_metadata(tmp_path) report_path = tmp_path / "paper.report.md" report = render_report(metadata) assert "Output Markdown:" not in report assert "Metadata JSON:" not in report assert "Report Markdown:" not in report assert not report_path.exists() def test_report_failed_status_comes_from_error_severity_warning(tmp_path: Path) -> None: metadata = make_metadata( tmp_path, warnings=(WarningRecord(WarningCode.MINERU_CLI_FAILED, WarningSeverity.ERROR, "MinerU failed."),), ) report = render_report(metadata) assert "- Final status: `failed`" in report def test_report_uses_metadata_math_render_count_plus_quality_count(tmp_path: Path) -> None: metadata = make_metadata( tmp_path, warnings=(WarningRecord(WarningCode.MATH_RENDER_FAILED, WarningSeverity.ERROR, "Metadata math failed."),), ) quality = QualityResult(math_render_error_count=2) report = render_report(metadata, quality=quality) assert "- Math render error count: 3" in report def test_report_includes_chunk_context_when_metadata_has_chunk_options(tmp_path: Path) -> None: metadata = make_metadata(tmp_path) metadata["engine_options"] = { "strict_local": True, "chunk": { "chunk_index": 2, "total_chunks": 3, "source_page_start": 21, "source_page_end": 40, }, } report = render_report(metadata) assert "- Chunk: 2/3, source pages: 21-40" in report