Files
PDFToMD/tests/test_paths.py
T
김경종 7e985ae94a add files
2026-04-30 17:05:19 +09:00

68 lines
2.3 KiB
Python

from __future__ import annotations
from pathlib import Path
import pytest
from pdftomd.models import DocumentIdentity
from pdftomd.paths import (
OutputBundlePaths,
document_identity_from_pdf,
make_anchor,
normalize_pdf_path,
)
def test_normalize_pdf_path_accepts_korean_and_spaced_paths(tmp_path: Path) -> None:
pdf = tmp_path / "한글 경로" / "My Report 2026.pdf"
pdf.parent.mkdir()
pdf.write_bytes(b"%PDF-1.7\n")
normalized = normalize_pdf_path(pdf)
assert normalized.is_absolute()
assert normalized.name == "My Report 2026.pdf"
def test_normalize_pdf_path_rejects_non_pdf_files(tmp_path: Path) -> None:
text_file = tmp_path / "document.txt"
text_file.write_text("not a pdf", encoding="utf-8")
with pytest.raises(ValueError, match="PDF"):
normalize_pdf_path(text_file)
def test_document_identity_from_pdf_uses_stable_slug(tmp_path: Path) -> None:
pdf = tmp_path / "한글 보고서.pdf"
pdf.write_bytes(b"%PDF-1.7\n")
first = document_identity_from_pdf(pdf)
second = document_identity_from_pdf(pdf)
assert first.filename == "한글 보고서.pdf"
assert first.slug == second.slug
assert first.slug.startswith("document-")
assert first.source_path == str(normalize_pdf_path(pdf))
def test_output_bundle_paths_keep_document_and_runtime_artifacts_separate(tmp_path: Path) -> None:
document = DocumentIdentity.from_path("Example Paper.pdf")
bundle = OutputBundlePaths.from_document(tmp_path, document)
assert bundle.document_dir == tmp_path / "example-paper"
assert bundle.images_dir == tmp_path / "example-paper" / "images"
assert bundle.chunk_path(1) == tmp_path / "example-paper" / "example-paper_001.md"
assert bundle.figure_asset_path("1") == tmp_path / "example-paper" / "images" / "example-paper_fig-1.png"
assert bundle.runtime_dir == tmp_path / ".pdftomd-runtime" / "example-paper"
assert bundle.log_path.name == "conversion.log"
assert bundle.resume_state_path.name == "resume-state.json"
assert bundle.runtime_dir not in bundle.document_dir.parents
def test_make_anchor_is_deterministic_and_validates_kind() -> None:
assert make_anchor("Figure", "2 A") == "figure-2-a"
assert make_anchor("Equation", "식 3") == "equation-3"
with pytest.raises(ValueError, match="kind"):
make_anchor("", "1")