Files
PDFToMD/tests/test_math_repair.py
T
2026-05-11 02:08:46 +09:00

66 lines
2.8 KiB
Python

from __future__ import annotations
from pdf2md.ir import WarningCode, WarningSeverity
from pdf2md.math_repair import repair_math_render_failures
from pdf2md.quality import MathCheckResult, MathRenderFailure, extract_math_expressions
class BodyChecker:
def __init__(self, passing_fragment: str) -> None:
self.passing_fragment = passing_fragment
self.checked_bodies: list[str] = []
def check_expressions(self, expressions):
self.checked_bodies.extend(expression.body for expression in expressions)
return tuple(MathCheckResult(ok=self.passing_fragment in expression.body) for expression in expressions)
def test_repair_math_render_failures_disambiguates_repeated_superscripts() -> None:
markdown = "$$\nx ^ {i} ^ {t}\n$$\n"
expression = extract_math_expressions(markdown)[0]
failure = MathRenderFailure(expression=expression, message="Double exponent: use braces to clarify")
checker = BodyChecker("{} ^ {t}")
result = repair_math_render_failures(markdown, (failure,), checker)
assert result.markdown == "$$\nx ^ {i} {} ^ {t}\n$$\n"
assert result.repairs[0].rule == "repeated_script"
assert result.warnings[0].code == WarningCode.MATH_RENDER_REPAIRED
assert result.warnings[0].severity == WarningSeverity.INFO
def test_repair_math_render_failures_repairs_truncated_array_environment() -> None:
markdown = "$$\n\\begin{array}{c} x \\end{a}\n$$\n"
expression = extract_math_expressions(markdown)[0]
failure = MathRenderFailure(expression=expression, message="Unknown environment 'a'")
checker = BodyChecker("\\end{array}")
result = repair_math_render_failures(markdown, (failure,), checker)
assert result.markdown == "$$\n\\begin{array}{c} x \\end{array}\n$$\n"
assert result.repairs[0].rule == "truncated_array_end"
def test_repair_math_render_failures_leaves_markdown_unchanged_when_candidate_fails() -> None:
markdown = "$$\nx ^ {i} ^ {t}\n$$\n"
expression = extract_math_expressions(markdown)[0]
failure = MathRenderFailure(expression=expression, message="Double exponent: use braces to clarify")
checker = BodyChecker("never-passes")
result = repair_math_render_failures(markdown, (failure,), checker)
assert result.markdown == markdown
assert result.repairs == ()
assert result.warnings == ()
def test_repair_math_render_failures_only_changes_failed_spans() -> None:
markdown = "$a ^ {b} ^ {c}$ and $unchanged ^ {ok}$\n"
expressions = extract_math_expressions(markdown)
failure = MathRenderFailure(expression=expressions[0], message="Double exponent: use braces to clarify")
checker = BodyChecker("{} ^ {c}")
result = repair_math_render_failures(markdown, (failure,), checker)
assert result.markdown == "$a ^ {b} {} ^ {c}$ and $unchanged ^ {ok}$\n"