modify pdftomd
This commit is contained in:
+18
-16
@@ -73,14 +73,14 @@ def test_discovers_directory_recursive_with_relative_parents(tmp_path: Path) ->
|
||||
|
||||
|
||||
def test_discovery_order_is_deterministic_for_non_ascii_names(tmp_path: Path) -> None:
|
||||
touch(tmp_path / "한글.pdf")
|
||||
korean_pdf = touch(tmp_path / "논문.pdf")
|
||||
touch(tmp_path / "Alpha.pdf")
|
||||
touch(tmp_path / "beta.PDF")
|
||||
|
||||
first = discover_pdfs(tmp_path)
|
||||
second = discover_pdfs(tmp_path)
|
||||
|
||||
assert [item.source_path.name for item in first] == ["Alpha.pdf", "beta.PDF", "한글.pdf"]
|
||||
assert [item.source_path.name for item in first] == ["Alpha.pdf", "beta.PDF", korean_pdf.name]
|
||||
assert first == second
|
||||
|
||||
|
||||
@@ -91,22 +91,24 @@ def test_plans_all_default_output_paths_for_single_pdf(tmp_path: Path) -> None:
|
||||
[plan] = plan_pdf_outputs(pdf, output_root)
|
||||
|
||||
assert plan.source_pdf == pdf.resolve()
|
||||
assert plan.markdown_path == output_root.resolve() / "입력.md"
|
||||
assert plan.assets_dir == output_root.resolve() / "입력.assets"
|
||||
assert plan.metadata_path == output_root.resolve() / "입력.metadata.json"
|
||||
assert plan.report_path == output_root.resolve() / "입력.report.md"
|
||||
assert plan.markdown_path == output_root.resolve() / "입력" / "입력_001.md"
|
||||
assert plan.assets_dir == output_root.resolve() / "입력" / "images"
|
||||
assert plan.metadata_path is None
|
||||
assert plan.report_path == output_root.resolve() / "입력" / "입력_report.md"
|
||||
assert plan.raw_dir is None
|
||||
|
||||
|
||||
def test_plans_optional_metadata_and_raw_outputs(tmp_path: Path) -> None:
|
||||
def test_plans_metadata_flag_as_noop_and_raw_outputs(tmp_path: Path) -> None:
|
||||
pdf = touch(tmp_path / "paper.pdf")
|
||||
|
||||
[with_metadata_flag] = plan_pdf_outputs(pdf, tmp_path / "out", metadata=True)
|
||||
[without_metadata] = plan_pdf_outputs(pdf, tmp_path / "out", metadata=False)
|
||||
[with_raw] = plan_pdf_outputs(pdf, tmp_path / "out", keep_raw=True)
|
||||
|
||||
assert with_metadata_flag.metadata_path is None
|
||||
assert without_metadata.metadata_path is None
|
||||
assert without_metadata.report_path == (tmp_path / "out").resolve() / "paper.report.md"
|
||||
assert with_raw.raw_dir == (tmp_path / "out").resolve() / "paper.raw"
|
||||
assert without_metadata.report_path == (tmp_path / "out").resolve() / "paper" / "paper_report.md"
|
||||
assert with_raw.raw_dir == (tmp_path / "out").resolve() / "paper" / "raw"
|
||||
|
||||
|
||||
def test_recursive_planning_preserves_relative_subdirectories(tmp_path: Path) -> None:
|
||||
@@ -117,8 +119,8 @@ def test_recursive_planning_preserves_relative_subdirectories(tmp_path: Path) ->
|
||||
plans = plan_pdf_outputs(root, tmp_path / "out", recursive=True)
|
||||
|
||||
assert [plan.markdown_path.relative_to((tmp_path / "out").resolve()) for plan in plans] == [
|
||||
Path("nested") / "same.md",
|
||||
Path("same.md"),
|
||||
Path("nested") / "same" / "same_001.md",
|
||||
Path("same") / "same_001.md",
|
||||
]
|
||||
|
||||
|
||||
@@ -137,21 +139,21 @@ def test_non_recursive_duplicate_output_paths_fail(tmp_path: Path) -> None:
|
||||
def test_output_conflicts_report_all_existing_paths(tmp_path: Path) -> None:
|
||||
pdf = touch(tmp_path / "paper.pdf")
|
||||
output_root = tmp_path / "out"
|
||||
(output_root / "paper.assets").mkdir(parents=True)
|
||||
(output_root / "paper.md").mkdir()
|
||||
touch(output_root / "paper.metadata.json")
|
||||
(output_root / "paper" / "images").mkdir(parents=True)
|
||||
(output_root / "paper" / "paper_001.md").mkdir()
|
||||
touch(output_root / "paper" / "paper_report.md")
|
||||
|
||||
with pytest.raises(OutputConflictError) as error:
|
||||
plan_pdf_outputs(pdf, output_root)
|
||||
|
||||
conflict_names = {path.name for path in error.value.conflicts}
|
||||
assert conflict_names == {"paper.assets", "paper.md", "paper.metadata.json"}
|
||||
assert conflict_names == {"images", "paper_001.md", "paper_report.md"}
|
||||
|
||||
|
||||
def test_overwrite_allows_existing_paths_without_deleting(tmp_path: Path) -> None:
|
||||
pdf = touch(tmp_path / "paper.pdf")
|
||||
output_root = tmp_path / "out"
|
||||
existing = touch(output_root / "paper.md")
|
||||
existing = touch(output_root / "paper" / "paper_001.md")
|
||||
|
||||
[plan] = plan_pdf_outputs(pdf, output_root, overwrite=True)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user