add pdftomd

This commit is contained in:
김경종
2026-05-08 16:42:19 +09:00
parent 551ab50735
commit 88d6b92283
99 changed files with 47332 additions and 0 deletions
+118
View File
@@ -0,0 +1,118 @@
from __future__ import annotations
import json
from pathlib import Path
import pytest
from pdf2md.math_render import MathJaxCommandResult, MathJaxRenderChecker
from pdf2md.quality import MathCheckerUnavailable, MathExpression
def test_mathjax_checker_batches_expressions_as_json(tmp_path: Path) -> None:
helper = make_helper(tmp_path)
calls = []
def runner(command: tuple[str, ...], stdin: str, timeout_seconds: int) -> MathJaxCommandResult:
calls.append((command, json.loads(stdin), timeout_seconds))
return MathJaxCommandResult(
command,
0,
stdout=json.dumps(
{
"results": [
{"index": 0, "ok": True},
{"index": 1, "ok": False, "message": "Undefined control sequence"},
]
}
),
)
checker = MathJaxRenderChecker(
helper_path=helper,
which=lambda executable: "C:/node/node.exe" if executable == "node" else None,
runner=runner,
timeout_seconds=7,
)
expressions = (
MathExpression(0, "x_i^2", False, (0, 7)),
MathExpression(1, "\\bad", True, (9, 18)),
)
results = checker.check_expressions(expressions)
assert [result.ok for result in results] == [True, False]
assert results[1].message == "Undefined control sequence"
assert calls == [
(
("C:/node/node.exe", str(helper)),
{
"expressions": [
{"index": 0, "body": "x_i^2", "display": False},
{"index": 1, "body": "\\bad", "display": True},
]
},
7,
)
]
def test_mathjax_checker_reports_missing_node_as_unavailable(tmp_path: Path) -> None:
checker = MathJaxRenderChecker(helper_path=make_helper(tmp_path), which=lambda _: None)
with pytest.raises(MathCheckerUnavailable, match="Node.js"):
checker.check_expressions((MathExpression(0, "x", False, (0, 3)),))
def test_mathjax_checker_reports_helper_failure_as_unavailable(tmp_path: Path) -> None:
helper = make_helper(tmp_path)
def runner(command: tuple[str, ...], stdin: str, timeout_seconds: int) -> MathJaxCommandResult:
return MathJaxCommandResult(command, 124, stderr="MathJax helper timed out")
checker = MathJaxRenderChecker(
helper_path=helper,
which=lambda _: "node",
runner=runner,
)
with pytest.raises(MathCheckerUnavailable, match="timed out"):
checker.check_expressions((MathExpression(0, "x", False, (0, 3)),))
def test_mathjax_checker_reports_invalid_json_as_unavailable(tmp_path: Path) -> None:
helper = make_helper(tmp_path)
def runner(command: tuple[str, ...], stdin: str, timeout_seconds: int) -> MathJaxCommandResult:
return MathJaxCommandResult(command, 0, stdout="not json")
checker = MathJaxRenderChecker(
helper_path=helper,
which=lambda _: "node",
runner=runner,
)
with pytest.raises(MathCheckerUnavailable, match="invalid JSON"):
checker.check_expressions((MathExpression(0, "x", False, (0, 3)),))
def test_mathjax_checker_rejects_mismatched_result_indexes(tmp_path: Path) -> None:
helper = make_helper(tmp_path)
def runner(command: tuple[str, ...], stdin: str, timeout_seconds: int) -> MathJaxCommandResult:
return MathJaxCommandResult(command, 0, stdout=json.dumps({"results": [{"index": 99, "ok": True}]}))
checker = MathJaxRenderChecker(
helper_path=helper,
which=lambda _: "node",
runner=runner,
)
with pytest.raises(MathCheckerUnavailable, match="indexes"):
checker.check_expressions((MathExpression(0, "x", False, (0, 3)),))
def make_helper(tmp_path: Path) -> Path:
helper = tmp_path / "check.mjs"
helper.write_text("// fake helper", encoding="utf-8")
return helper