312 lines
10 KiB
Python
312 lines
10 KiB
Python
from __future__ import annotations
|
|
|
|
from pathlib import Path
|
|
|
|
from pdf2md.doctor import DoctorCommandResult, DoctorReport, format_doctor_report, run_doctor
|
|
from pdf2md.ir import WarningCode, WarningRecord, WarningSeverity
|
|
from pdf2md.math_render import default_mathjax_helper_path
|
|
from pdf2md.mineru_adapter import MinerUVersionResult
|
|
|
|
|
|
class FakeMinerUProbe:
|
|
def __init__(self, result: MinerUVersionResult) -> None:
|
|
self.result = result
|
|
|
|
def version(self) -> MinerUVersionResult:
|
|
return self.result
|
|
|
|
|
|
class FakeCuda:
|
|
def __init__(
|
|
self,
|
|
*,
|
|
available: bool = True,
|
|
devices: tuple[str, ...] = ("NVIDIA RTX 4060",),
|
|
capabilities: tuple[tuple[int, int], ...] = ((8, 9),),
|
|
) -> None:
|
|
self._available = available
|
|
self._devices = devices
|
|
self._capabilities = capabilities
|
|
|
|
def is_available(self) -> bool:
|
|
return self._available
|
|
|
|
def device_count(self) -> int:
|
|
return len(self._devices)
|
|
|
|
def get_device_name(self, index: int) -> str:
|
|
return self._devices[index]
|
|
|
|
def get_device_capability(self, index: int) -> tuple[int, int]:
|
|
return self._capabilities[index]
|
|
|
|
|
|
class FakeTorchVersion:
|
|
cuda = "12.8"
|
|
|
|
|
|
class FakeTorch:
|
|
__version__ = "2.8.0+cu128"
|
|
version = FakeTorchVersion()
|
|
|
|
def __init__(self, cuda: FakeCuda) -> None:
|
|
self.cuda = cuda
|
|
|
|
|
|
def test_doctor_all_checks_pass_with_mocked_tools(tmp_path: Path) -> None:
|
|
report = make_report(
|
|
tmp_path,
|
|
env={"HF_HOME": str(tmp_path / "hf")},
|
|
existing_paths={tmp_path / "hf"},
|
|
)
|
|
|
|
assert report.status == "pass"
|
|
assert report.exit_code == 0
|
|
assert [check.name for check in report.checks] == [
|
|
"python",
|
|
"uv",
|
|
"mineru",
|
|
"gpu",
|
|
"pytorch",
|
|
"models",
|
|
"mathjax",
|
|
"local-only",
|
|
]
|
|
|
|
|
|
def test_doctor_fails_outside_python_312(tmp_path: Path) -> None:
|
|
report = make_report(tmp_path, python_version=(3, 11, 9))
|
|
|
|
python_check = find_check(report, "python")
|
|
assert report.status == "fail"
|
|
assert python_check.status == "fail"
|
|
assert "use Python 3.12.x" in python_check.message
|
|
|
|
|
|
def test_doctor_fails_when_uv_is_missing(tmp_path: Path) -> None:
|
|
report = make_report(tmp_path, available_tools={"nvidia-smi": "C:/Windows/System32/nvidia-smi.exe"})
|
|
|
|
uv_check = find_check(report, "uv")
|
|
assert report.status == "fail"
|
|
assert uv_check.status == "fail"
|
|
assert "uv executable was not found" in uv_check.message
|
|
|
|
|
|
def test_doctor_fails_when_mineru_is_missing(tmp_path: Path) -> None:
|
|
report = make_report(
|
|
tmp_path,
|
|
mineru_result=MinerUVersionResult(
|
|
available=False,
|
|
version=None,
|
|
command=("mineru", "--version"),
|
|
exit_code=None,
|
|
stdout="",
|
|
stderr="",
|
|
),
|
|
)
|
|
|
|
mineru_check = find_check(report, "mineru")
|
|
assert report.status == "fail"
|
|
assert report.exit_code == 1
|
|
assert mineru_check.status == "fail"
|
|
assert "MinerU CLI executable was not found" in mineru_check.message
|
|
|
|
|
|
def test_doctor_warns_when_mineru_version_command_fails(tmp_path: Path) -> None:
|
|
warning = WarningRecord(WarningCode.MINERU_CLI_FAILED, WarningSeverity.ERROR, "MinerU version command failed.")
|
|
report = make_report(
|
|
tmp_path,
|
|
mineru_result=MinerUVersionResult(
|
|
available=True,
|
|
version=None,
|
|
command=("mineru", "--version"),
|
|
exit_code=2,
|
|
stdout="",
|
|
stderr="boom",
|
|
warnings=(warning,),
|
|
),
|
|
)
|
|
|
|
mineru_check = find_check(report, "mineru")
|
|
assert report.status == "warn"
|
|
assert mineru_check.status == "warn"
|
|
assert "version could not be detected" in mineru_check.message
|
|
|
|
|
|
def test_doctor_warns_when_mineru_version_is_not_target(tmp_path: Path) -> None:
|
|
report = make_report(
|
|
tmp_path,
|
|
mineru_result=MinerUVersionResult(
|
|
available=True,
|
|
version="mineru, version 3.1.8",
|
|
command=("mineru", "--version"),
|
|
exit_code=0,
|
|
stdout="mineru, version 3.1.8",
|
|
stderr="",
|
|
),
|
|
)
|
|
|
|
mineru_check = find_check(report, "mineru")
|
|
assert report.status == "warn"
|
|
assert mineru_check.status == "warn"
|
|
assert "project target is 3.1.0" in mineru_check.message
|
|
|
|
|
|
def test_doctor_warns_when_gpu_and_pytorch_are_missing(tmp_path: Path) -> None:
|
|
report = make_report(
|
|
tmp_path,
|
|
available_tools={"uv": "C:/Users/user/.local/bin/uv.exe"},
|
|
import_module=missing_torch,
|
|
)
|
|
|
|
assert report.status == "warn"
|
|
assert find_check(report, "gpu").status == "warn"
|
|
assert find_check(report, "pytorch").status == "warn"
|
|
|
|
|
|
def test_doctor_warns_for_gtx_1070_ti_pascal_risk(tmp_path: Path) -> None:
|
|
report = make_report(tmp_path, gpu_stdout="NVIDIA GeForce GTX 1070 Ti, 8192 MiB, 551.86\n")
|
|
|
|
gpu_check = find_check(report, "gpu")
|
|
assert report.status == "warn"
|
|
assert gpu_check.status == "warn"
|
|
assert "Pascal/pre-Turing compatibility risk" in gpu_check.message
|
|
assert any("GTX 1070 Ti" in detail for detail in gpu_check.details)
|
|
|
|
|
|
def test_doctor_warns_for_pytorch_pre_turing_capability(tmp_path: Path) -> None:
|
|
def fake_pascal_torch(name: str) -> FakeTorch:
|
|
assert name == "torch"
|
|
return FakeTorch(FakeCuda(devices=("NVIDIA GeForce GTX 1070 Ti",), capabilities=((6, 1),)))
|
|
|
|
report = make_report(
|
|
tmp_path,
|
|
gpu_stdout="NVIDIA RTX 4060, 8192 MiB, 551.86\n",
|
|
import_module=fake_pascal_torch,
|
|
)
|
|
|
|
pytorch_check = find_check(report, "pytorch")
|
|
assert report.status == "warn"
|
|
assert pytorch_check.status == "warn"
|
|
assert "Pascal/pre-Turing compatibility risk" in pytorch_check.message
|
|
assert any("compute capability 6.1" in detail for detail in pytorch_check.details)
|
|
|
|
|
|
def test_doctor_warns_when_model_cache_is_not_detected(tmp_path: Path) -> None:
|
|
report = make_report(tmp_path, env={}, existing_paths=set())
|
|
|
|
models_check = find_check(report, "models")
|
|
assert report.status == "warn"
|
|
assert models_check.status == "warn"
|
|
assert "No MinerU model/cache/config path" in models_check.message
|
|
|
|
|
|
def test_doctor_warns_when_mathjax_node_is_missing(tmp_path: Path) -> None:
|
|
report = make_report(
|
|
tmp_path,
|
|
available_tools={
|
|
"uv": "C:/Users/user/.local/bin/uv.exe",
|
|
"nvidia-smi": "C:/Windows/System32/nvidia-smi.exe",
|
|
},
|
|
)
|
|
|
|
mathjax_check = find_check(report, "mathjax")
|
|
assert report.status == "warn"
|
|
assert mathjax_check.status == "warn"
|
|
assert "Node.js executable was not found" in mathjax_check.message
|
|
|
|
|
|
def test_doctor_warns_when_mathjax_health_fails(tmp_path: Path) -> None:
|
|
def failing_runner(command: tuple[str, ...]) -> DoctorCommandResult:
|
|
if command[-1] == "--health":
|
|
return DoctorCommandResult(command, 1, stderr="Cannot find package 'mathjax'")
|
|
return command_runner("NVIDIA RTX 4060, 8192 MiB, 551.86\n")(command)
|
|
|
|
report = make_report(tmp_path, run_command=failing_runner)
|
|
|
|
mathjax_check = find_check(report, "mathjax")
|
|
assert report.status == "warn"
|
|
assert mathjax_check.status == "warn"
|
|
assert "unavailable" in mathjax_check.message
|
|
assert any("mathjax" in detail for detail in mathjax_check.details)
|
|
|
|
|
|
def test_format_doctor_report_is_stable(tmp_path: Path) -> None:
|
|
report = make_report(tmp_path, gpu_stdout="NVIDIA GeForce GTX 1070 Ti, 8192 MiB, 551.86\n")
|
|
|
|
formatted = format_doctor_report(report)
|
|
|
|
assert formatted.startswith("Doctor status: WARN\n")
|
|
assert "[WARN] gpu:" in formatted
|
|
assert "[PASS] local-only:" in formatted
|
|
|
|
|
|
def make_report(
|
|
tmp_path: Path,
|
|
*,
|
|
python_version: tuple[int, int, int] = (3, 12, 7),
|
|
available_tools: dict[str, str] | None = None,
|
|
mineru_result: MinerUVersionResult | None = None,
|
|
gpu_stdout: str = "NVIDIA RTX 4060, 8192 MiB, 551.86\n",
|
|
env: dict[str, str] | None = None,
|
|
existing_paths: set[Path] | None = None,
|
|
import_module=None,
|
|
run_command=None,
|
|
) -> DoctorReport:
|
|
tools = available_tools or {
|
|
"uv": "C:/Users/user/.local/bin/uv.exe",
|
|
"nvidia-smi": "C:/Windows/System32/nvidia-smi.exe",
|
|
"node": "C:/Program Files/nodejs/node.exe",
|
|
}
|
|
result = mineru_result or MinerUVersionResult(
|
|
available=True,
|
|
version="mineru, version 3.1.0",
|
|
command=("mineru", "--version"),
|
|
exit_code=0,
|
|
stdout="mineru, version 3.1.0",
|
|
stderr="",
|
|
)
|
|
environment = env if env is not None else {"HF_HOME": str(tmp_path / "hf")}
|
|
paths = set(existing_paths if existing_paths is not None else {tmp_path / "hf"})
|
|
paths.add(default_mathjax_helper_path())
|
|
|
|
return run_doctor(
|
|
python_version=python_version,
|
|
which=lambda executable: tools.get(executable),
|
|
run_command=run_command or command_runner(gpu_stdout),
|
|
import_module=import_module or fake_torch,
|
|
env=environment,
|
|
path_exists=lambda path: path in paths,
|
|
home=tmp_path,
|
|
mineru_probe=FakeMinerUProbe(result),
|
|
)
|
|
|
|
|
|
def command_runner(gpu_stdout: str):
|
|
def run(command: tuple[str, ...]) -> DoctorCommandResult:
|
|
if command == ("uv", "--version"):
|
|
return DoctorCommandResult(command, 0, stdout="uv 0.8.13\n")
|
|
if command and command[0] == "nvidia-smi":
|
|
return DoctorCommandResult(command, 0, stdout=gpu_stdout)
|
|
if len(command) == 2 and command[1] == "--version" and command[0].endswith("node.exe"):
|
|
return DoctorCommandResult(command, 0, stdout="v24.13.0\n")
|
|
if command and command[-1] == "--health":
|
|
return DoctorCommandResult(command, 0, stdout='{"ok":true}\n')
|
|
return DoctorCommandResult(command, 127, stderr="not found")
|
|
|
|
return run
|
|
|
|
|
|
def fake_torch(name: str) -> FakeTorch:
|
|
assert name == "torch"
|
|
return FakeTorch(FakeCuda())
|
|
|
|
|
|
def missing_torch(name: str):
|
|
assert name == "torch"
|
|
raise ImportError(name)
|
|
|
|
|
|
def find_check(report: DoctorReport, name: str):
|
|
return next(check for check in report.checks if check.name == name)
|