from __future__ import annotations from pathlib import Path from pdf2md.doctor import DoctorCommandResult, DoctorReport, format_doctor_report, run_doctor from pdf2md.ir import WarningCode, WarningRecord, WarningSeverity from pdf2md.math_render import default_mathjax_helper_path from pdf2md.mineru_adapter import MinerUVersionResult class FakeMinerUProbe: def __init__(self, result: MinerUVersionResult) -> None: self.result = result def version(self) -> MinerUVersionResult: return self.result class FakeCuda: def __init__( self, *, available: bool = True, devices: tuple[str, ...] = ("NVIDIA RTX 4060",), capabilities: tuple[tuple[int, int], ...] = ((8, 9),), ) -> None: self._available = available self._devices = devices self._capabilities = capabilities def is_available(self) -> bool: return self._available def device_count(self) -> int: return len(self._devices) def get_device_name(self, index: int) -> str: return self._devices[index] def get_device_capability(self, index: int) -> tuple[int, int]: return self._capabilities[index] class FakeTorchVersion: cuda = "12.8" class FakeTorch: __version__ = "2.8.0+cu128" version = FakeTorchVersion() def __init__(self, cuda: FakeCuda) -> None: self.cuda = cuda def test_doctor_all_checks_pass_with_mocked_tools(tmp_path: Path) -> None: report = make_report( tmp_path, env={"HF_HOME": str(tmp_path / "hf")}, existing_paths={tmp_path / "hf"}, ) assert report.status == "pass" assert report.exit_code == 0 assert [check.name for check in report.checks] == [ "python", "uv", "mineru", "gpu", "pytorch", "models", "mathjax", "local-only", ] def test_doctor_fails_outside_python_312(tmp_path: Path) -> None: report = make_report(tmp_path, python_version=(3, 11, 9)) python_check = find_check(report, "python") assert report.status == "fail" assert python_check.status == "fail" assert "use Python 3.12.x" in python_check.message def test_doctor_fails_when_uv_is_missing(tmp_path: Path) -> None: report = make_report(tmp_path, available_tools={"nvidia-smi": "C:/Windows/System32/nvidia-smi.exe"}) uv_check = find_check(report, "uv") assert report.status == "fail" assert uv_check.status == "fail" assert "uv executable was not found" in uv_check.message def test_doctor_fails_when_mineru_is_missing(tmp_path: Path) -> None: report = make_report( tmp_path, mineru_result=MinerUVersionResult( available=False, version=None, command=("mineru", "--version"), exit_code=None, stdout="", stderr="", ), ) mineru_check = find_check(report, "mineru") assert report.status == "fail" assert report.exit_code == 1 assert mineru_check.status == "fail" assert "MinerU CLI executable was not found" in mineru_check.message def test_doctor_warns_when_mineru_version_command_fails(tmp_path: Path) -> None: warning = WarningRecord(WarningCode.MINERU_CLI_FAILED, WarningSeverity.ERROR, "MinerU version command failed.") report = make_report( tmp_path, mineru_result=MinerUVersionResult( available=True, version=None, command=("mineru", "--version"), exit_code=2, stdout="", stderr="boom", warnings=(warning,), ), ) mineru_check = find_check(report, "mineru") assert report.status == "warn" assert mineru_check.status == "warn" assert "version could not be detected" in mineru_check.message def test_doctor_warns_when_mineru_version_is_not_target(tmp_path: Path) -> None: report = make_report( tmp_path, mineru_result=MinerUVersionResult( available=True, version="mineru, version 3.1.8", command=("mineru", "--version"), exit_code=0, stdout="mineru, version 3.1.8", stderr="", ), ) mineru_check = find_check(report, "mineru") assert report.status == "warn" assert mineru_check.status == "warn" assert "project target is 3.1.0" in mineru_check.message def test_doctor_warns_when_gpu_and_pytorch_are_missing(tmp_path: Path) -> None: report = make_report( tmp_path, available_tools={"uv": "C:/Users/user/.local/bin/uv.exe"}, import_module=missing_torch, ) assert report.status == "warn" assert find_check(report, "gpu").status == "warn" assert find_check(report, "pytorch").status == "warn" def test_doctor_warns_for_gtx_1070_ti_pascal_risk(tmp_path: Path) -> None: report = make_report(tmp_path, gpu_stdout="0, NVIDIA GeForce GTX 1070 Ti, 8192, 551.86\n") gpu_check = find_check(report, "gpu") assert report.status == "warn" assert gpu_check.status == "warn" assert "Pascal/pre-Turing compatibility risk" in gpu_check.message assert any("GTX 1070 Ti" in detail for detail in gpu_check.details) def test_doctor_warns_for_pytorch_pre_turing_capability(tmp_path: Path) -> None: def fake_pascal_torch(name: str) -> FakeTorch: assert name == "torch" return FakeTorch(FakeCuda(devices=("NVIDIA GeForce GTX 1070 Ti",), capabilities=((6, 1),))) report = make_report( tmp_path, gpu_stdout="0, NVIDIA RTX 4060, 8192, 551.86\n", import_module=fake_pascal_torch, ) pytorch_check = find_check(report, "pytorch") assert report.status == "warn" assert pytorch_check.status == "warn" assert "Pascal/pre-Turing compatibility risk" in pytorch_check.message assert any("compute capability 6.1" in detail for detail in pytorch_check.details) def test_doctor_warns_when_model_cache_is_not_detected(tmp_path: Path) -> None: report = make_report(tmp_path, env={}, existing_paths=set()) models_check = find_check(report, "models") assert report.status == "warn" assert models_check.status == "warn" assert "No MinerU model/cache/config path" in models_check.message def test_doctor_warns_when_mathjax_node_is_missing(tmp_path: Path) -> None: report = make_report( tmp_path, available_tools={ "uv": "C:/Users/user/.local/bin/uv.exe", "nvidia-smi": "C:/Windows/System32/nvidia-smi.exe", }, ) mathjax_check = find_check(report, "mathjax") assert report.status == "warn" assert mathjax_check.status == "warn" assert "Node.js executable was not found" in mathjax_check.message def test_doctor_warns_when_mathjax_health_fails(tmp_path: Path) -> None: def failing_runner(command: tuple[str, ...]) -> DoctorCommandResult: if command[-1] == "--health": return DoctorCommandResult(command, 1, stderr="Cannot find package 'mathjax'") return command_runner("0, NVIDIA RTX 4060, 8192, 551.86\n")(command) report = make_report(tmp_path, run_command=failing_runner) mathjax_check = find_check(report, "mathjax") assert report.status == "warn" assert mathjax_check.status == "warn" assert "unavailable" in mathjax_check.message assert any("mathjax" in detail for detail in mathjax_check.details) def test_format_doctor_report_is_stable(tmp_path: Path) -> None: report = make_report(tmp_path, gpu_stdout="0, NVIDIA GeForce GTX 1070 Ti, 8192, 551.86\n") formatted = format_doctor_report(report) assert formatted.startswith("Doctor status: WARN\n") assert "[WARN] gpu:" in formatted assert "[PASS] local-only:" in formatted def test_doctor_reports_auto_gpu_and_recommended_profile(tmp_path: Path) -> None: report = make_report( tmp_path, gpu_stdout=( "0, NVIDIA RTX 4060, 8192, 577.00\n" "1, NVIDIA RTX 4090, 24564, 577.00\n" ), ) gpu_check = find_check(report, "gpu") assert gpu_check.status == "pass" assert any("gpu 1: NVIDIA RTX 4090, 24564 MiB, driver 577.00" in detail for detail in gpu_check.details) assert any("auto gpu: cuda:1" in detail for detail in gpu_check.details) assert any("recommended MinerU profile: auto" in detail for detail in gpu_check.details) def make_report( tmp_path: Path, *, python_version: tuple[int, int, int] = (3, 12, 7), available_tools: dict[str, str] | None = None, mineru_result: MinerUVersionResult | None = None, gpu_stdout: str = "0, NVIDIA RTX 4060, 8192, 551.86\n", env: dict[str, str] | None = None, existing_paths: set[Path] | None = None, import_module=None, run_command=None, ) -> DoctorReport: tools = available_tools or { "uv": "C:/Users/user/.local/bin/uv.exe", "nvidia-smi": "C:/Windows/System32/nvidia-smi.exe", "node": "C:/Program Files/nodejs/node.exe", } result = mineru_result or MinerUVersionResult( available=True, version="mineru, version 3.1.0", command=("mineru", "--version"), exit_code=0, stdout="mineru, version 3.1.0", stderr="", ) environment = env if env is not None else {"HF_HOME": str(tmp_path / "hf")} paths = set(existing_paths if existing_paths is not None else {tmp_path / "hf"}) paths.add(default_mathjax_helper_path()) return run_doctor( python_version=python_version, which=lambda executable: tools.get(executable), run_command=run_command or command_runner(gpu_stdout), import_module=import_module or fake_torch, env=environment, path_exists=lambda path: path in paths, home=tmp_path, mineru_probe=FakeMinerUProbe(result), ) def command_runner(gpu_stdout: str): def run(command: tuple[str, ...]) -> DoctorCommandResult: if command == ("uv", "--version"): return DoctorCommandResult(command, 0, stdout="uv 0.8.13\n") if command and command[0] == "nvidia-smi": return DoctorCommandResult(command, 0, stdout=gpu_stdout) if len(command) == 2 and command[1] == "--version" and command[0].endswith("node.exe"): return DoctorCommandResult(command, 0, stdout="v24.13.0\n") if command and command[-1] == "--health": return DoctorCommandResult(command, 0, stdout='{"ok":true}\n') return DoctorCommandResult(command, 127, stderr="not found") return run def fake_torch(name: str) -> FakeTorch: assert name == "torch" return FakeTorch(FakeCuda()) def missing_torch(name: str): assert name == "torch" raise ImportError(name) def find_check(report: DoctorReport, name: str): return next(check for check in report.checks if check.name == name)