Files
PDFToMD/tests/test_quality.py
T
2026-05-11 02:08:46 +09:00

160 lines
5.7 KiB
Python

from __future__ import annotations
from pathlib import Path
from pdf2md.ir import WarningCode, WarningSeverity
from pdf2md.quality import (
MathCheckerUnavailable,
MathCheckResult,
check_math_renderability_details,
check_asset_links,
check_math_renderability,
extract_math_expressions,
merge_quality_results,
)
def test_missing_asset_link_is_counted(tmp_path: Path) -> None:
asset_root = tmp_path / "assets"
asset_root.mkdir()
result = check_asset_links("![missing](assets/missing.png)", markdown_dir=tmp_path, asset_root=asset_root)
assert result.missing_asset_link_count == 1
assert result.invalid_asset_link_count == 0
assert [warning.code for warning in result.warnings] == [WarningCode.ASSET_LINK_MISSING]
def test_existing_asset_link_passes_without_warning(tmp_path: Path) -> None:
asset_root = tmp_path / "assets"
asset_root.mkdir()
(asset_root / "fig.png").write_bytes(b"image")
result = check_asset_links("![fig](assets/fig.png)", markdown_dir=tmp_path, asset_root=asset_root)
assert result.failure_count == 0
assert result.warnings == ()
def test_invalid_asset_links_are_counted_without_fetching(tmp_path: Path) -> None:
markdown = "\n".join(
[
"![remote](https://example.test/fig.png)",
"![escape](../outside.png)",
r"![absolute](C:\tmp\fig.png)",
]
)
result = check_asset_links(markdown, markdown_dir=tmp_path, asset_root=tmp_path / "assets")
assert result.invalid_asset_link_count == 3
assert result.missing_asset_link_count == 0
assert [warning.code for warning in result.warnings] == [WarningCode.ASSET_LINK_INVALID] * 3
def test_asset_links_inside_code_are_ignored(tmp_path: Path) -> None:
markdown = "```md\n![missing](assets/missing.png)\n```\n`![missing](assets/inline.png)`"
result = check_asset_links(markdown, markdown_dir=tmp_path, asset_root=tmp_path / "assets")
assert result.failure_count == 0
assert result.warnings == ()
def test_math_render_failures_are_aggregated_with_fake_checker() -> None:
def checker(body: str) -> MathCheckResult:
return MathCheckResult(ok="bad" not in body, message=f"{body} failed")
result = check_math_renderability("$x_i^2$\n\n$$\nbad_math\n$$", checker)
assert result.math_render_error_count == 1
assert [warning.code for warning in result.warnings] == [WarningCode.MATH_RENDER_FAILED]
assert "bad_math failed" in result.warnings[0].message
def test_math_renderability_details_include_failed_expression_records() -> None:
def checker(body: str) -> MathCheckResult:
return MathCheckResult(ok="bad" not in body, message=f"{body} failed")
result = check_math_renderability_details("$x_i$\n\n$$\nbad_math\n$$", checker)
assert result.quality.math_render_error_count == 1
assert len(result.failures) == 1
assert result.failures[0].expression.index == 1
assert result.failures[0].expression.body == "bad_math"
assert result.failures[0].expression.display is True
assert result.failures[0].message == "bad_math failed"
def test_math_extraction_records_display_mode_and_markdown_spans() -> None:
markdown = "Inline $x_i^2$ before\n\n$$\n\\frac{1}{2}\n$$\n"
expressions = extract_math_expressions(markdown)
assert [(expression.index, expression.body, expression.display) for expression in expressions] == [
(0, "x_i^2", False),
(1, "\\frac{1}{2}", True),
]
assert [markdown[start:end] for start, end in (expression.markdown_span for expression in expressions)] == [
"$x_i^2$",
"$$\n\\frac{1}{2}\n$$",
]
def test_math_extraction_ignores_code_and_currency_like_text() -> None:
markdown = "```tex\n$x$\n```\n`$y$`\nPrice $12.00$ and real $z$."
expressions = extract_math_expressions(markdown)
assert [(expression.body, expression.display) for expression in expressions] == [("z", False)]
def test_batch_math_checker_receives_expression_records() -> None:
class BatchChecker:
def __init__(self) -> None:
self.expressions = ()
def check_expressions(self, expressions):
self.expressions = expressions
return tuple(MathCheckResult(ok=expression.display) for expression in expressions)
checker = BatchChecker()
result = check_math_renderability("$inline$\n\n$$\ndisplay\n$$", checker)
assert [expression.body for expression in checker.expressions] == ["inline", "display"]
assert result.math_render_error_count == 1
assert "inline" in result.warnings[0].message
def test_math_checker_unavailable_is_nonfatal() -> None:
def checker(_: str) -> bool:
raise MathCheckerUnavailable("local renderer missing")
result = check_math_renderability("$x$", checker)
assert result.math_render_error_count == 0
assert result.warnings[0].code == WarningCode.MATH_RENDER_FAILED
assert result.warnings[0].severity == WarningSeverity.INFO
def test_missing_math_checker_is_explicit_and_nonfatal() -> None:
result = check_math_renderability("$x$")
assert result.math_render_error_count == 0
assert result.warnings[0].code == WarningCode.MATH_RENDER_FAILED
assert result.warnings[0].severity == WarningSeverity.INFO
def test_merge_quality_results_combines_counts_and_warning_order(tmp_path: Path) -> None:
asset_result = check_asset_links("![missing](assets/missing.png)", markdown_dir=tmp_path)
math_result = check_math_renderability("$x$", lambda _: False)
result = merge_quality_results(asset_result, math_result)
assert result.missing_asset_link_count == 1
assert result.math_render_error_count == 1
assert [warning.code for warning in result.warnings] == [
WarningCode.ASSET_LINK_MISSING,
WarningCode.MATH_RENDER_FAILED,
]