Files
PDFToMD/tests/test_doctor.py
T
2026-05-14 10:16:59 +09:00

328 lines
11 KiB
Python

from __future__ import annotations
from pathlib import Path
from pdf2md.doctor import DoctorCommandResult, DoctorReport, format_doctor_report, run_doctor
from pdf2md.ir import WarningCode, WarningRecord, WarningSeverity
from pdf2md.math_render import default_mathjax_helper_path
from pdf2md.mineru_adapter import MinerUVersionResult
class FakeMinerUProbe:
def __init__(self, result: MinerUVersionResult) -> None:
self.result = result
def version(self) -> MinerUVersionResult:
return self.result
class FakeCuda:
def __init__(
self,
*,
available: bool = True,
devices: tuple[str, ...] = ("NVIDIA RTX 4060",),
capabilities: tuple[tuple[int, int], ...] = ((8, 9),),
) -> None:
self._available = available
self._devices = devices
self._capabilities = capabilities
def is_available(self) -> bool:
return self._available
def device_count(self) -> int:
return len(self._devices)
def get_device_name(self, index: int) -> str:
return self._devices[index]
def get_device_capability(self, index: int) -> tuple[int, int]:
return self._capabilities[index]
class FakeTorchVersion:
cuda = "12.8"
class FakeTorch:
__version__ = "2.8.0+cu128"
version = FakeTorchVersion()
def __init__(self, cuda: FakeCuda) -> None:
self.cuda = cuda
def test_doctor_all_checks_pass_with_mocked_tools(tmp_path: Path) -> None:
report = make_report(
tmp_path,
env={"HF_HOME": str(tmp_path / "hf")},
existing_paths={tmp_path / "hf"},
)
assert report.status == "pass"
assert report.exit_code == 0
assert [check.name for check in report.checks] == [
"python",
"uv",
"mineru",
"gpu",
"pytorch",
"models",
"mathjax",
"local-only",
]
def test_doctor_fails_outside_python_312(tmp_path: Path) -> None:
report = make_report(tmp_path, python_version=(3, 11, 9))
python_check = find_check(report, "python")
assert report.status == "fail"
assert python_check.status == "fail"
assert "use Python 3.12.x" in python_check.message
def test_doctor_fails_when_uv_is_missing(tmp_path: Path) -> None:
report = make_report(tmp_path, available_tools={"nvidia-smi": "C:/Windows/System32/nvidia-smi.exe"})
uv_check = find_check(report, "uv")
assert report.status == "fail"
assert uv_check.status == "fail"
assert "uv executable was not found" in uv_check.message
def test_doctor_fails_when_mineru_is_missing(tmp_path: Path) -> None:
report = make_report(
tmp_path,
mineru_result=MinerUVersionResult(
available=False,
version=None,
command=("mineru", "--version"),
exit_code=None,
stdout="",
stderr="",
),
)
mineru_check = find_check(report, "mineru")
assert report.status == "fail"
assert report.exit_code == 1
assert mineru_check.status == "fail"
assert "MinerU CLI executable was not found" in mineru_check.message
def test_doctor_warns_when_mineru_version_command_fails(tmp_path: Path) -> None:
warning = WarningRecord(WarningCode.MINERU_CLI_FAILED, WarningSeverity.ERROR, "MinerU version command failed.")
report = make_report(
tmp_path,
mineru_result=MinerUVersionResult(
available=True,
version=None,
command=("mineru", "--version"),
exit_code=2,
stdout="",
stderr="boom",
warnings=(warning,),
),
)
mineru_check = find_check(report, "mineru")
assert report.status == "warn"
assert mineru_check.status == "warn"
assert "version could not be detected" in mineru_check.message
def test_doctor_warns_when_mineru_version_is_not_target(tmp_path: Path) -> None:
report = make_report(
tmp_path,
mineru_result=MinerUVersionResult(
available=True,
version="mineru, version 3.1.8",
command=("mineru", "--version"),
exit_code=0,
stdout="mineru, version 3.1.8",
stderr="",
),
)
mineru_check = find_check(report, "mineru")
assert report.status == "warn"
assert mineru_check.status == "warn"
assert "project target is 3.1.0" in mineru_check.message
def test_doctor_warns_when_gpu_and_pytorch_are_missing(tmp_path: Path) -> None:
report = make_report(
tmp_path,
available_tools={"uv": "C:/Users/user/.local/bin/uv.exe"},
import_module=missing_torch,
)
assert report.status == "warn"
assert find_check(report, "gpu").status == "warn"
assert find_check(report, "pytorch").status == "warn"
def test_doctor_warns_for_gtx_1070_ti_pascal_risk(tmp_path: Path) -> None:
report = make_report(tmp_path, gpu_stdout="0, NVIDIA GeForce GTX 1070 Ti, 8192, 551.86\n")
gpu_check = find_check(report, "gpu")
assert report.status == "warn"
assert gpu_check.status == "warn"
assert "Pascal/pre-Turing compatibility risk" in gpu_check.message
assert any("GTX 1070 Ti" in detail for detail in gpu_check.details)
def test_doctor_warns_for_pytorch_pre_turing_capability(tmp_path: Path) -> None:
def fake_pascal_torch(name: str) -> FakeTorch:
assert name == "torch"
return FakeTorch(FakeCuda(devices=("NVIDIA GeForce GTX 1070 Ti",), capabilities=((6, 1),)))
report = make_report(
tmp_path,
gpu_stdout="0, NVIDIA RTX 4060, 8192, 551.86\n",
import_module=fake_pascal_torch,
)
pytorch_check = find_check(report, "pytorch")
assert report.status == "warn"
assert pytorch_check.status == "warn"
assert "Pascal/pre-Turing compatibility risk" in pytorch_check.message
assert any("compute capability 6.1" in detail for detail in pytorch_check.details)
def test_doctor_warns_when_model_cache_is_not_detected(tmp_path: Path) -> None:
report = make_report(tmp_path, env={}, existing_paths=set())
models_check = find_check(report, "models")
assert report.status == "warn"
assert models_check.status == "warn"
assert "No MinerU model/cache/config path" in models_check.message
def test_doctor_warns_when_mathjax_node_is_missing(tmp_path: Path) -> None:
report = make_report(
tmp_path,
available_tools={
"uv": "C:/Users/user/.local/bin/uv.exe",
"nvidia-smi": "C:/Windows/System32/nvidia-smi.exe",
},
)
mathjax_check = find_check(report, "mathjax")
assert report.status == "warn"
assert mathjax_check.status == "warn"
assert "Node.js executable was not found" in mathjax_check.message
def test_doctor_warns_when_mathjax_health_fails(tmp_path: Path) -> None:
def failing_runner(command: tuple[str, ...]) -> DoctorCommandResult:
if command[-1] == "--health":
return DoctorCommandResult(command, 1, stderr="Cannot find package 'mathjax'")
return command_runner("0, NVIDIA RTX 4060, 8192, 551.86\n")(command)
report = make_report(tmp_path, run_command=failing_runner)
mathjax_check = find_check(report, "mathjax")
assert report.status == "warn"
assert mathjax_check.status == "warn"
assert "unavailable" in mathjax_check.message
assert any("mathjax" in detail for detail in mathjax_check.details)
def test_format_doctor_report_is_stable(tmp_path: Path) -> None:
report = make_report(tmp_path, gpu_stdout="0, NVIDIA GeForce GTX 1070 Ti, 8192, 551.86\n")
formatted = format_doctor_report(report)
assert formatted.startswith("Doctor status: WARN\n")
assert "[WARN] gpu:" in formatted
assert "[PASS] local-only:" in formatted
def test_doctor_reports_auto_gpu_and_recommended_profile(tmp_path: Path) -> None:
report = make_report(
tmp_path,
gpu_stdout=(
"0, NVIDIA RTX 4060, 8192, 577.00\n"
"1, NVIDIA RTX 4090, 24564, 577.00\n"
),
)
gpu_check = find_check(report, "gpu")
assert gpu_check.status == "pass"
assert any("gpu 1: NVIDIA RTX 4090, 24564 MiB, driver 577.00" in detail for detail in gpu_check.details)
assert any("auto gpu: cuda:1" in detail for detail in gpu_check.details)
assert any("recommended MinerU profile: auto" in detail for detail in gpu_check.details)
def make_report(
tmp_path: Path,
*,
python_version: tuple[int, int, int] = (3, 12, 7),
available_tools: dict[str, str] | None = None,
mineru_result: MinerUVersionResult | None = None,
gpu_stdout: str = "0, NVIDIA RTX 4060, 8192, 551.86\n",
env: dict[str, str] | None = None,
existing_paths: set[Path] | None = None,
import_module=None,
run_command=None,
) -> DoctorReport:
tools = available_tools or {
"uv": "C:/Users/user/.local/bin/uv.exe",
"nvidia-smi": "C:/Windows/System32/nvidia-smi.exe",
"node": "C:/Program Files/nodejs/node.exe",
}
result = mineru_result or MinerUVersionResult(
available=True,
version="mineru, version 3.1.0",
command=("mineru", "--version"),
exit_code=0,
stdout="mineru, version 3.1.0",
stderr="",
)
environment = env if env is not None else {"HF_HOME": str(tmp_path / "hf")}
paths = set(existing_paths if existing_paths is not None else {tmp_path / "hf"})
paths.add(default_mathjax_helper_path())
return run_doctor(
python_version=python_version,
which=lambda executable: tools.get(executable),
run_command=run_command or command_runner(gpu_stdout),
import_module=import_module or fake_torch,
env=environment,
path_exists=lambda path: path in paths,
home=tmp_path,
mineru_probe=FakeMinerUProbe(result),
)
def command_runner(gpu_stdout: str):
def run(command: tuple[str, ...]) -> DoctorCommandResult:
if command == ("uv", "--version"):
return DoctorCommandResult(command, 0, stdout="uv 0.8.13\n")
if command and command[0] == "nvidia-smi":
return DoctorCommandResult(command, 0, stdout=gpu_stdout)
if len(command) == 2 and command[1] == "--version" and command[0].endswith("node.exe"):
return DoctorCommandResult(command, 0, stdout="v24.13.0\n")
if command and command[-1] == "--health":
return DoctorCommandResult(command, 0, stdout='{"ok":true}\n')
return DoctorCommandResult(command, 127, stderr="not found")
return run
def fake_torch(name: str) -> FakeTorch:
assert name == "torch"
return FakeTorch(FakeCuda())
def missing_torch(name: str):
assert name == "torch"
raise ImportError(name)
def find_check(report: DoctorReport, name: str):
return next(check for check in report.checks if check.name == name)