from __future__ import annotations from pathlib import Path import pytest from pdftomd.models import DocumentIdentity from pdftomd.paths import ( OutputBundlePaths, document_identity_from_pdf, make_anchor, normalize_pdf_path, ) def test_normalize_pdf_path_accepts_korean_and_spaced_paths(tmp_path: Path) -> None: pdf = tmp_path / "한글 경로" / "My Report 2026.pdf" pdf.parent.mkdir() pdf.write_bytes(b"%PDF-1.7\n") normalized = normalize_pdf_path(pdf) assert normalized.is_absolute() assert normalized.name == "My Report 2026.pdf" def test_normalize_pdf_path_rejects_non_pdf_files(tmp_path: Path) -> None: text_file = tmp_path / "document.txt" text_file.write_text("not a pdf", encoding="utf-8") with pytest.raises(ValueError, match="PDF"): normalize_pdf_path(text_file) def test_document_identity_from_pdf_uses_stable_slug(tmp_path: Path) -> None: pdf = tmp_path / "한글 보고서.pdf" pdf.write_bytes(b"%PDF-1.7\n") first = document_identity_from_pdf(pdf) second = document_identity_from_pdf(pdf) assert first.filename == "한글 보고서.pdf" assert first.slug == second.slug assert first.slug.startswith("document-") assert first.source_path == str(normalize_pdf_path(pdf)) def test_output_bundle_paths_keep_document_and_runtime_artifacts_separate(tmp_path: Path) -> None: document = DocumentIdentity.from_path("Example Paper.pdf") bundle = OutputBundlePaths.from_document(tmp_path, document) assert bundle.document_dir == tmp_path / "example-paper" assert bundle.images_dir == tmp_path / "example-paper" / "images" assert bundle.chunk_path(1) == tmp_path / "example-paper" / "example-paper_001.md" assert bundle.figure_asset_path("1") == tmp_path / "example-paper" / "images" / "example-paper_fig-1.png" assert bundle.runtime_dir == tmp_path / ".pdftomd-runtime" / "example-paper" assert bundle.log_path.name == "conversion.log" assert bundle.resume_state_path.name == "resume-state.json" assert bundle.runtime_dir not in bundle.document_dir.parents def test_make_anchor_is_deterministic_and_validates_kind() -> None: assert make_anchor("Figure", "2 A") == "figure-2-a" assert make_anchor("Equation", "식 3") == "equation-3" with pytest.raises(ValueError, match="kind"): make_anchor("", "1")