from __future__ import annotations from pathlib import Path from pdf2md.ir import WarningCode, WarningSeverity from pdf2md.quality import ( MathCheckerUnavailable, MathCheckResult, check_asset_links, check_math_renderability, extract_math_expressions, merge_quality_results, ) def test_missing_asset_link_is_counted(tmp_path: Path) -> None: asset_root = tmp_path / "assets" asset_root.mkdir() result = check_asset_links("![missing](assets/missing.png)", markdown_dir=tmp_path, asset_root=asset_root) assert result.missing_asset_link_count == 1 assert result.invalid_asset_link_count == 0 assert [warning.code for warning in result.warnings] == [WarningCode.ASSET_LINK_MISSING] def test_existing_asset_link_passes_without_warning(tmp_path: Path) -> None: asset_root = tmp_path / "assets" asset_root.mkdir() (asset_root / "fig.png").write_bytes(b"image") result = check_asset_links("![fig](assets/fig.png)", markdown_dir=tmp_path, asset_root=asset_root) assert result.failure_count == 0 assert result.warnings == () def test_invalid_asset_links_are_counted_without_fetching(tmp_path: Path) -> None: markdown = "\n".join( [ "![remote](https://example.test/fig.png)", "![escape](../outside.png)", r"![absolute](C:\tmp\fig.png)", ] ) result = check_asset_links(markdown, markdown_dir=tmp_path, asset_root=tmp_path / "assets") assert result.invalid_asset_link_count == 3 assert result.missing_asset_link_count == 0 assert [warning.code for warning in result.warnings] == [WarningCode.ASSET_LINK_INVALID] * 3 def test_asset_links_inside_code_are_ignored(tmp_path: Path) -> None: markdown = "```md\n![missing](assets/missing.png)\n```\n`![missing](assets/inline.png)`" result = check_asset_links(markdown, markdown_dir=tmp_path, asset_root=tmp_path / "assets") assert result.failure_count == 0 assert result.warnings == () def test_math_render_failures_are_aggregated_with_fake_checker() -> None: def checker(body: str) -> MathCheckResult: return MathCheckResult(ok="bad" not in body, message=f"{body} failed") result = check_math_renderability("$x_i^2$\n\n$$\nbad_math\n$$", checker) assert result.math_render_error_count == 1 assert [warning.code for warning in result.warnings] == [WarningCode.MATH_RENDER_FAILED] assert "bad_math failed" in result.warnings[0].message def test_math_extraction_records_display_mode_and_markdown_spans() -> None: markdown = "Inline $x_i^2$ before\n\n$$\n\\frac{1}{2}\n$$\n" expressions = extract_math_expressions(markdown) assert [(expression.index, expression.body, expression.display) for expression in expressions] == [ (0, "x_i^2", False), (1, "\\frac{1}{2}", True), ] assert [markdown[start:end] for start, end in (expression.markdown_span for expression in expressions)] == [ "$x_i^2$", "$$\n\\frac{1}{2}\n$$", ] def test_math_extraction_ignores_code_and_currency_like_text() -> None: markdown = "```tex\n$x$\n```\n`$y$`\nPrice $12.00$ and real $z$." expressions = extract_math_expressions(markdown) assert [(expression.body, expression.display) for expression in expressions] == [("z", False)] def test_batch_math_checker_receives_expression_records() -> None: class BatchChecker: def __init__(self) -> None: self.expressions = () def check_expressions(self, expressions): self.expressions = expressions return tuple(MathCheckResult(ok=expression.display) for expression in expressions) checker = BatchChecker() result = check_math_renderability("$inline$\n\n$$\ndisplay\n$$", checker) assert [expression.body for expression in checker.expressions] == ["inline", "display"] assert result.math_render_error_count == 1 assert "inline" in result.warnings[0].message def test_math_checker_unavailable_is_nonfatal() -> None: def checker(_: str) -> bool: raise MathCheckerUnavailable("local renderer missing") result = check_math_renderability("$x$", checker) assert result.math_render_error_count == 0 assert result.warnings[0].code == WarningCode.MATH_RENDER_FAILED assert result.warnings[0].severity == WarningSeverity.INFO def test_missing_math_checker_is_explicit_and_nonfatal() -> None: result = check_math_renderability("$x$") assert result.math_render_error_count == 0 assert result.warnings[0].code == WarningCode.MATH_RENDER_FAILED assert result.warnings[0].severity == WarningSeverity.INFO def test_merge_quality_results_combines_counts_and_warning_order(tmp_path: Path) -> None: asset_result = check_asset_links("![missing](assets/missing.png)", markdown_dir=tmp_path) math_result = check_math_renderability("$x$", lambda _: False) result = merge_quality_results(asset_result, math_result) assert result.missing_asset_link_count == 1 assert result.math_render_error_count == 1 assert [warning.code for warning in result.warnings] == [ WarningCode.ASSET_LINK_MISSING, WarningCode.MATH_RENDER_FAILED, ]