160 lines
5.7 KiB
Python
160 lines
5.7 KiB
Python
from __future__ import annotations
|
|
|
|
from pathlib import Path
|
|
|
|
from pdf2md.ir import WarningCode, WarningSeverity
|
|
from pdf2md.quality import (
|
|
MathCheckerUnavailable,
|
|
MathCheckResult,
|
|
check_math_renderability_details,
|
|
check_asset_links,
|
|
check_math_renderability,
|
|
extract_math_expressions,
|
|
merge_quality_results,
|
|
)
|
|
|
|
|
|
def test_missing_asset_link_is_counted(tmp_path: Path) -> None:
|
|
asset_root = tmp_path / "assets"
|
|
asset_root.mkdir()
|
|
|
|
result = check_asset_links("", markdown_dir=tmp_path, asset_root=asset_root)
|
|
|
|
assert result.missing_asset_link_count == 1
|
|
assert result.invalid_asset_link_count == 0
|
|
assert [warning.code for warning in result.warnings] == [WarningCode.ASSET_LINK_MISSING]
|
|
|
|
|
|
def test_existing_asset_link_passes_without_warning(tmp_path: Path) -> None:
|
|
asset_root = tmp_path / "assets"
|
|
asset_root.mkdir()
|
|
(asset_root / "fig.png").write_bytes(b"image")
|
|
|
|
result = check_asset_links("", markdown_dir=tmp_path, asset_root=asset_root)
|
|
|
|
assert result.failure_count == 0
|
|
assert result.warnings == ()
|
|
|
|
|
|
def test_invalid_asset_links_are_counted_without_fetching(tmp_path: Path) -> None:
|
|
markdown = "\n".join(
|
|
[
|
|
"",
|
|
"",
|
|
r"",
|
|
]
|
|
)
|
|
|
|
result = check_asset_links(markdown, markdown_dir=tmp_path, asset_root=tmp_path / "assets")
|
|
|
|
assert result.invalid_asset_link_count == 3
|
|
assert result.missing_asset_link_count == 0
|
|
assert [warning.code for warning in result.warnings] == [WarningCode.ASSET_LINK_INVALID] * 3
|
|
|
|
|
|
def test_asset_links_inside_code_are_ignored(tmp_path: Path) -> None:
|
|
markdown = "```md\n\n```\n``"
|
|
|
|
result = check_asset_links(markdown, markdown_dir=tmp_path, asset_root=tmp_path / "assets")
|
|
|
|
assert result.failure_count == 0
|
|
assert result.warnings == ()
|
|
|
|
|
|
def test_math_render_failures_are_aggregated_with_fake_checker() -> None:
|
|
def checker(body: str) -> MathCheckResult:
|
|
return MathCheckResult(ok="bad" not in body, message=f"{body} failed")
|
|
|
|
result = check_math_renderability("$x_i^2$\n\n$$\nbad_math\n$$", checker)
|
|
|
|
assert result.math_render_error_count == 1
|
|
assert [warning.code for warning in result.warnings] == [WarningCode.MATH_RENDER_FAILED]
|
|
assert "bad_math failed" in result.warnings[0].message
|
|
|
|
|
|
def test_math_renderability_details_include_failed_expression_records() -> None:
|
|
def checker(body: str) -> MathCheckResult:
|
|
return MathCheckResult(ok="bad" not in body, message=f"{body} failed")
|
|
|
|
result = check_math_renderability_details("$x_i$\n\n$$\nbad_math\n$$", checker)
|
|
|
|
assert result.quality.math_render_error_count == 1
|
|
assert len(result.failures) == 1
|
|
assert result.failures[0].expression.index == 1
|
|
assert result.failures[0].expression.body == "bad_math"
|
|
assert result.failures[0].expression.display is True
|
|
assert result.failures[0].message == "bad_math failed"
|
|
|
|
|
|
def test_math_extraction_records_display_mode_and_markdown_spans() -> None:
|
|
markdown = "Inline $x_i^2$ before\n\n$$\n\\frac{1}{2}\n$$\n"
|
|
|
|
expressions = extract_math_expressions(markdown)
|
|
|
|
assert [(expression.index, expression.body, expression.display) for expression in expressions] == [
|
|
(0, "x_i^2", False),
|
|
(1, "\\frac{1}{2}", True),
|
|
]
|
|
assert [markdown[start:end] for start, end in (expression.markdown_span for expression in expressions)] == [
|
|
"$x_i^2$",
|
|
"$$\n\\frac{1}{2}\n$$",
|
|
]
|
|
|
|
|
|
def test_math_extraction_ignores_code_and_currency_like_text() -> None:
|
|
markdown = "```tex\n$x$\n```\n`$y$`\nPrice $12.00$ and real $z$."
|
|
|
|
expressions = extract_math_expressions(markdown)
|
|
|
|
assert [(expression.body, expression.display) for expression in expressions] == [("z", False)]
|
|
|
|
|
|
def test_batch_math_checker_receives_expression_records() -> None:
|
|
class BatchChecker:
|
|
def __init__(self) -> None:
|
|
self.expressions = ()
|
|
|
|
def check_expressions(self, expressions):
|
|
self.expressions = expressions
|
|
return tuple(MathCheckResult(ok=expression.display) for expression in expressions)
|
|
|
|
checker = BatchChecker()
|
|
result = check_math_renderability("$inline$\n\n$$\ndisplay\n$$", checker)
|
|
|
|
assert [expression.body for expression in checker.expressions] == ["inline", "display"]
|
|
assert result.math_render_error_count == 1
|
|
assert "inline" in result.warnings[0].message
|
|
|
|
|
|
def test_math_checker_unavailable_is_nonfatal() -> None:
|
|
def checker(_: str) -> bool:
|
|
raise MathCheckerUnavailable("local renderer missing")
|
|
|
|
result = check_math_renderability("$x$", checker)
|
|
|
|
assert result.math_render_error_count == 0
|
|
assert result.warnings[0].code == WarningCode.MATH_RENDER_FAILED
|
|
assert result.warnings[0].severity == WarningSeverity.INFO
|
|
|
|
|
|
def test_missing_math_checker_is_explicit_and_nonfatal() -> None:
|
|
result = check_math_renderability("$x$")
|
|
|
|
assert result.math_render_error_count == 0
|
|
assert result.warnings[0].code == WarningCode.MATH_RENDER_FAILED
|
|
assert result.warnings[0].severity == WarningSeverity.INFO
|
|
|
|
|
|
def test_merge_quality_results_combines_counts_and_warning_order(tmp_path: Path) -> None:
|
|
asset_result = check_asset_links("", markdown_dir=tmp_path)
|
|
math_result = check_math_renderability("$x$", lambda _: False)
|
|
|
|
result = merge_quality_results(asset_result, math_result)
|
|
|
|
assert result.missing_asset_link_count == 1
|
|
assert result.math_render_error_count == 1
|
|
assert [warning.code for warning in result.warnings] == [
|
|
WarningCode.ASSET_LINK_MISSING,
|
|
WarningCode.MATH_RENDER_FAILED,
|
|
]
|