63 lines
1.9 KiB
Python
63 lines
1.9 KiB
Python
from __future__ import annotations
|
|
|
|
from pathlib import Path
|
|
|
|
import pytest
|
|
from pypdf import PdfReader, PdfWriter
|
|
|
|
from pdf2md.pdf_splitter import PdfChunkError, count_pdf_pages, plan_pdf_chunks, write_pdf_chunk
|
|
|
|
|
|
def make_blank_pdf(path: Path, page_count: int) -> Path:
|
|
path.parent.mkdir(parents=True, exist_ok=True)
|
|
writer = PdfWriter()
|
|
for _ in range(page_count):
|
|
writer.add_blank_page(width=72, height=72)
|
|
with path.open("wb") as file:
|
|
writer.write(file)
|
|
return path
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
("page_count", "expected_ranges"),
|
|
[
|
|
(1, [(1, 1)]),
|
|
(20, [(1, 20)]),
|
|
(21, [(1, 20), (21, 21)]),
|
|
(40, [(1, 20), (21, 40)]),
|
|
(41, [(1, 20), (21, 40), (41, 41)]),
|
|
],
|
|
)
|
|
def test_plan_pdf_chunks_uses_one_based_ranges_and_names(
|
|
tmp_path: Path,
|
|
page_count: int,
|
|
expected_ranges: list[tuple[int, int]],
|
|
) -> None:
|
|
pdf = make_blank_pdf(tmp_path / "paper.pdf", page_count)
|
|
|
|
chunks = plan_pdf_chunks(pdf, chunk_pages=20)
|
|
|
|
assert count_pdf_pages(pdf) == page_count
|
|
assert [(chunk.source_page_start, chunk.source_page_end) for chunk in chunks] == expected_ranges
|
|
assert [chunk.output_filename for chunk in chunks] == [
|
|
f"paper.part-{index:03d}.pages-{start:03d}-{end:03d}.pdf"
|
|
for index, (start, end) in enumerate(expected_ranges, start=1)
|
|
]
|
|
|
|
|
|
def test_write_pdf_chunk_writes_expected_page_count(tmp_path: Path) -> None:
|
|
pdf = make_blank_pdf(tmp_path / "paper.pdf", 41)
|
|
chunk = plan_pdf_chunks(pdf, chunk_pages=20)[1]
|
|
|
|
output = write_pdf_chunk(chunk, tmp_path / "chunks" / chunk.output_filename)
|
|
|
|
assert output.exists()
|
|
assert len(PdfReader(output).pages) == 20
|
|
|
|
|
|
def test_plan_pdf_chunks_rejects_non_positive_chunk_size(tmp_path: Path) -> None:
|
|
pdf = make_blank_pdf(tmp_path / "paper.pdf", 1)
|
|
|
|
with pytest.raises(PdfChunkError, match="positive integer"):
|
|
plan_pdf_chunks(pdf, chunk_pages=0)
|