add pdftomd
This commit is contained in:
@@ -0,0 +1,144 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
from pdf2md.ir import WarningCode, WarningSeverity
|
||||
from pdf2md.quality import (
|
||||
MathCheckerUnavailable,
|
||||
MathCheckResult,
|
||||
check_asset_links,
|
||||
check_math_renderability,
|
||||
extract_math_expressions,
|
||||
merge_quality_results,
|
||||
)
|
||||
|
||||
|
||||
def test_missing_asset_link_is_counted(tmp_path: Path) -> None:
|
||||
asset_root = tmp_path / "assets"
|
||||
asset_root.mkdir()
|
||||
|
||||
result = check_asset_links("", markdown_dir=tmp_path, asset_root=asset_root)
|
||||
|
||||
assert result.missing_asset_link_count == 1
|
||||
assert result.invalid_asset_link_count == 0
|
||||
assert [warning.code for warning in result.warnings] == [WarningCode.ASSET_LINK_MISSING]
|
||||
|
||||
|
||||
def test_existing_asset_link_passes_without_warning(tmp_path: Path) -> None:
|
||||
asset_root = tmp_path / "assets"
|
||||
asset_root.mkdir()
|
||||
(asset_root / "fig.png").write_bytes(b"image")
|
||||
|
||||
result = check_asset_links("", markdown_dir=tmp_path, asset_root=asset_root)
|
||||
|
||||
assert result.failure_count == 0
|
||||
assert result.warnings == ()
|
||||
|
||||
|
||||
def test_invalid_asset_links_are_counted_without_fetching(tmp_path: Path) -> None:
|
||||
markdown = "\n".join(
|
||||
[
|
||||
"",
|
||||
"",
|
||||
r"",
|
||||
]
|
||||
)
|
||||
|
||||
result = check_asset_links(markdown, markdown_dir=tmp_path, asset_root=tmp_path / "assets")
|
||||
|
||||
assert result.invalid_asset_link_count == 3
|
||||
assert result.missing_asset_link_count == 0
|
||||
assert [warning.code for warning in result.warnings] == [WarningCode.ASSET_LINK_INVALID] * 3
|
||||
|
||||
|
||||
def test_asset_links_inside_code_are_ignored(tmp_path: Path) -> None:
|
||||
markdown = "```md\n\n```\n``"
|
||||
|
||||
result = check_asset_links(markdown, markdown_dir=tmp_path, asset_root=tmp_path / "assets")
|
||||
|
||||
assert result.failure_count == 0
|
||||
assert result.warnings == ()
|
||||
|
||||
|
||||
def test_math_render_failures_are_aggregated_with_fake_checker() -> None:
|
||||
def checker(body: str) -> MathCheckResult:
|
||||
return MathCheckResult(ok="bad" not in body, message=f"{body} failed")
|
||||
|
||||
result = check_math_renderability("$x_i^2$\n\n$$\nbad_math\n$$", checker)
|
||||
|
||||
assert result.math_render_error_count == 1
|
||||
assert [warning.code for warning in result.warnings] == [WarningCode.MATH_RENDER_FAILED]
|
||||
assert "bad_math failed" in result.warnings[0].message
|
||||
|
||||
|
||||
def test_math_extraction_records_display_mode_and_markdown_spans() -> None:
|
||||
markdown = "Inline $x_i^2$ before\n\n$$\n\\frac{1}{2}\n$$\n"
|
||||
|
||||
expressions = extract_math_expressions(markdown)
|
||||
|
||||
assert [(expression.index, expression.body, expression.display) for expression in expressions] == [
|
||||
(0, "x_i^2", False),
|
||||
(1, "\\frac{1}{2}", True),
|
||||
]
|
||||
assert [markdown[start:end] for start, end in (expression.markdown_span for expression in expressions)] == [
|
||||
"$x_i^2$",
|
||||
"$$\n\\frac{1}{2}\n$$",
|
||||
]
|
||||
|
||||
|
||||
def test_math_extraction_ignores_code_and_currency_like_text() -> None:
|
||||
markdown = "```tex\n$x$\n```\n`$y$`\nPrice $12.00$ and real $z$."
|
||||
|
||||
expressions = extract_math_expressions(markdown)
|
||||
|
||||
assert [(expression.body, expression.display) for expression in expressions] == [("z", False)]
|
||||
|
||||
|
||||
def test_batch_math_checker_receives_expression_records() -> None:
|
||||
class BatchChecker:
|
||||
def __init__(self) -> None:
|
||||
self.expressions = ()
|
||||
|
||||
def check_expressions(self, expressions):
|
||||
self.expressions = expressions
|
||||
return tuple(MathCheckResult(ok=expression.display) for expression in expressions)
|
||||
|
||||
checker = BatchChecker()
|
||||
result = check_math_renderability("$inline$\n\n$$\ndisplay\n$$", checker)
|
||||
|
||||
assert [expression.body for expression in checker.expressions] == ["inline", "display"]
|
||||
assert result.math_render_error_count == 1
|
||||
assert "inline" in result.warnings[0].message
|
||||
|
||||
|
||||
def test_math_checker_unavailable_is_nonfatal() -> None:
|
||||
def checker(_: str) -> bool:
|
||||
raise MathCheckerUnavailable("local renderer missing")
|
||||
|
||||
result = check_math_renderability("$x$", checker)
|
||||
|
||||
assert result.math_render_error_count == 0
|
||||
assert result.warnings[0].code == WarningCode.MATH_RENDER_FAILED
|
||||
assert result.warnings[0].severity == WarningSeverity.INFO
|
||||
|
||||
|
||||
def test_missing_math_checker_is_explicit_and_nonfatal() -> None:
|
||||
result = check_math_renderability("$x$")
|
||||
|
||||
assert result.math_render_error_count == 0
|
||||
assert result.warnings[0].code == WarningCode.MATH_RENDER_FAILED
|
||||
assert result.warnings[0].severity == WarningSeverity.INFO
|
||||
|
||||
|
||||
def test_merge_quality_results_combines_counts_and_warning_order(tmp_path: Path) -> None:
|
||||
asset_result = check_asset_links("", markdown_dir=tmp_path)
|
||||
math_result = check_math_renderability("$x$", lambda _: False)
|
||||
|
||||
result = merge_quality_results(asset_result, math_result)
|
||||
|
||||
assert result.missing_asset_link_count == 1
|
||||
assert result.math_render_error_count == 1
|
||||
assert [warning.code for warning in result.warnings] == [
|
||||
WarningCode.ASSET_LINK_MISSING,
|
||||
WarningCode.MATH_RENDER_FAILED,
|
||||
]
|
||||
Reference in New Issue
Block a user