51 lines
2.1 KiB
Python
51 lines
2.1 KiB
Python
from __future__ import annotations
|
|
|
|
from pathlib import Path
|
|
|
|
from pdftomd.models import DocumentIdentity
|
|
from pdftomd.runtime_contracts import ModelCachePolicy, RuntimeArtifactPaths
|
|
|
|
|
|
def test_model_cache_policy_prefers_explicit_path(tmp_path: Path) -> None:
|
|
policy = ModelCachePolicy.from_environment(
|
|
project_root=tmp_path,
|
|
explicit_model_cache=tmp_path / "explicit-models",
|
|
env={"PDFTOMD_MODEL_CACHE": str(tmp_path / "ignored")},
|
|
)
|
|
|
|
assert policy.root == tmp_path / "explicit-models"
|
|
assert policy.marker_dir == tmp_path / "explicit-models" / "marker"
|
|
assert policy.nougat_dir == tmp_path / "explicit-models" / "nougat"
|
|
assert policy.huggingface_home == tmp_path / "explicit-models" / "huggingface"
|
|
|
|
|
|
def test_model_cache_policy_uses_env_then_project_default(tmp_path: Path) -> None:
|
|
env_policy = ModelCachePolicy.from_environment(
|
|
project_root=tmp_path,
|
|
env={"PDFTOMD_MODEL_CACHE": str(tmp_path / "env-models")},
|
|
)
|
|
default_policy = ModelCachePolicy.from_environment(project_root=tmp_path, env={})
|
|
|
|
assert env_policy.root == tmp_path / "env-models"
|
|
assert default_policy.root == tmp_path / ".models"
|
|
|
|
|
|
def test_model_cache_policy_exports_offline_environment(tmp_path: Path) -> None:
|
|
policy = ModelCachePolicy.from_environment(project_root=tmp_path, env={})
|
|
|
|
environment = policy.to_environment(offline=True)
|
|
|
|
assert environment["HF_HOME"] == str(tmp_path / ".models" / "huggingface")
|
|
assert environment["HUGGINGFACE_HUB_CACHE"] == str(tmp_path / ".models" / "huggingface" / "hub")
|
|
assert environment["HF_HUB_OFFLINE"] == "1"
|
|
|
|
|
|
def test_runtime_artifact_paths_are_outside_document_bundle(tmp_path: Path) -> None:
|
|
document = DocumentIdentity.from_path("Example Paper.pdf")
|
|
artifacts = RuntimeArtifactPaths.from_output_root(tmp_path, document)
|
|
|
|
assert artifacts.root == tmp_path / ".pdftomd-runtime" / "example-paper"
|
|
assert artifacts.log_file == artifacts.root / "logs" / "conversion.log"
|
|
assert artifacts.resume_state_file == artifacts.root / "state" / "resume-state.json"
|
|
assert "example-paper" in str(artifacts.root)
|