66 lines
2.8 KiB
Python
66 lines
2.8 KiB
Python
from __future__ import annotations
|
|
|
|
from pdf2md.ir import WarningCode, WarningSeverity
|
|
from pdf2md.math_repair import repair_math_render_failures
|
|
from pdf2md.quality import MathCheckResult, MathRenderFailure, extract_math_expressions
|
|
|
|
|
|
class BodyChecker:
|
|
def __init__(self, passing_fragment: str) -> None:
|
|
self.passing_fragment = passing_fragment
|
|
self.checked_bodies: list[str] = []
|
|
|
|
def check_expressions(self, expressions):
|
|
self.checked_bodies.extend(expression.body for expression in expressions)
|
|
return tuple(MathCheckResult(ok=self.passing_fragment in expression.body) for expression in expressions)
|
|
|
|
|
|
def test_repair_math_render_failures_disambiguates_repeated_superscripts() -> None:
|
|
markdown = "$$\nx ^ {i} ^ {t}\n$$\n"
|
|
expression = extract_math_expressions(markdown)[0]
|
|
failure = MathRenderFailure(expression=expression, message="Double exponent: use braces to clarify")
|
|
checker = BodyChecker("{} ^ {t}")
|
|
|
|
result = repair_math_render_failures(markdown, (failure,), checker)
|
|
|
|
assert result.markdown == "$$\nx ^ {i} {} ^ {t}\n$$\n"
|
|
assert result.repairs[0].rule == "repeated_script"
|
|
assert result.warnings[0].code == WarningCode.MATH_RENDER_REPAIRED
|
|
assert result.warnings[0].severity == WarningSeverity.INFO
|
|
|
|
|
|
def test_repair_math_render_failures_repairs_truncated_array_environment() -> None:
|
|
markdown = "$$\n\\begin{array}{c} x \\end{a}\n$$\n"
|
|
expression = extract_math_expressions(markdown)[0]
|
|
failure = MathRenderFailure(expression=expression, message="Unknown environment 'a'")
|
|
checker = BodyChecker("\\end{array}")
|
|
|
|
result = repair_math_render_failures(markdown, (failure,), checker)
|
|
|
|
assert result.markdown == "$$\n\\begin{array}{c} x \\end{array}\n$$\n"
|
|
assert result.repairs[0].rule == "truncated_array_end"
|
|
|
|
|
|
def test_repair_math_render_failures_leaves_markdown_unchanged_when_candidate_fails() -> None:
|
|
markdown = "$$\nx ^ {i} ^ {t}\n$$\n"
|
|
expression = extract_math_expressions(markdown)[0]
|
|
failure = MathRenderFailure(expression=expression, message="Double exponent: use braces to clarify")
|
|
checker = BodyChecker("never-passes")
|
|
|
|
result = repair_math_render_failures(markdown, (failure,), checker)
|
|
|
|
assert result.markdown == markdown
|
|
assert result.repairs == ()
|
|
assert result.warnings == ()
|
|
|
|
|
|
def test_repair_math_render_failures_only_changes_failed_spans() -> None:
|
|
markdown = "$a ^ {b} ^ {c}$ and $unchanged ^ {ok}$\n"
|
|
expressions = extract_math_expressions(markdown)
|
|
failure = MathRenderFailure(expression=expressions[0], message="Double exponent: use braces to clarify")
|
|
checker = BodyChecker("{} ^ {c}")
|
|
|
|
result = repair_math_render_failures(markdown, (failure,), checker)
|
|
|
|
assert result.markdown == "$a ^ {b} {} ^ {c}$ and $unchanged ^ {ok}$\n"
|