from __future__ import annotations from pathlib import Path from pdftomd.models import DocumentIdentity from pdftomd.runtime_contracts import ModelCachePolicy, RuntimeArtifactPaths def test_model_cache_policy_prefers_explicit_path(tmp_path: Path) -> None: policy = ModelCachePolicy.from_environment( project_root=tmp_path, explicit_model_cache=tmp_path / "explicit-models", env={"PDFTOMD_MODEL_CACHE": str(tmp_path / "ignored")}, ) assert policy.root == tmp_path / "explicit-models" assert policy.marker_dir == tmp_path / "explicit-models" / "marker" assert policy.nougat_dir == tmp_path / "explicit-models" / "nougat" assert policy.huggingface_home == tmp_path / "explicit-models" / "huggingface" def test_model_cache_policy_uses_env_then_project_default(tmp_path: Path) -> None: env_policy = ModelCachePolicy.from_environment( project_root=tmp_path, env={"PDFTOMD_MODEL_CACHE": str(tmp_path / "env-models")}, ) default_policy = ModelCachePolicy.from_environment(project_root=tmp_path, env={}) assert env_policy.root == tmp_path / "env-models" assert default_policy.root == tmp_path / ".models" def test_model_cache_policy_exports_offline_environment(tmp_path: Path) -> None: policy = ModelCachePolicy.from_environment(project_root=tmp_path, env={}) environment = policy.to_environment(offline=True) assert environment["HF_HOME"] == str(tmp_path / ".models" / "huggingface") assert environment["HUGGINGFACE_HUB_CACHE"] == str(tmp_path / ".models" / "huggingface" / "hub") assert environment["HF_HUB_OFFLINE"] == "1" def test_runtime_artifact_paths_are_outside_document_bundle(tmp_path: Path) -> None: document = DocumentIdentity.from_path("Example Paper.pdf") artifacts = RuntimeArtifactPaths.from_output_root(tmp_path, document) assert artifacts.root == tmp_path / ".pdftomd-runtime" / "example-paper" assert artifacts.log_file == artifacts.root / "logs" / "conversion.log" assert artifacts.resume_state_file == artifacts.root / "state" / "resume-state.json" assert "example-paper" in str(artifacts.root)