from __future__ import annotations from pathlib import Path from pdftomd.models import Asset, AssetKind from pdftomd.quality import ( validate_caption_reference_anchors, validate_chunk_frontmatter, validate_image_links, validate_latex_environments, validate_markdown_quality, validate_math_delimiters, validate_tables, ) def messages(issues: list[object]) -> list[str]: return [getattr(issue, "message") for issue in issues] def test_math_delimiters_accept_inline_and_block_math() -> None: markdown = "\n".join( [ "Inline energy $E = mc^2$ is preserved.", "", "$$", r"\int_0^1 x^2 dx", "$$", ] ) assert validate_math_delimiters(markdown) == [] def test_math_delimiters_report_actionable_unclosed_inline_math() -> None: issues = validate_math_delimiters("The expression $E = mc^2 is missing a close.") assert len(issues) == 1 assert "Unclosed inline math delimiter" in issues[0].message assert issues[0].line == 1 assert "$" in issues[0].message def test_math_delimiters_report_actionable_unclosed_block_math() -> None: issues = validate_math_delimiters("Before\n$$\na^2 + b^2 = c^2\nAfter") assert len(issues) == 1 assert "Unclosed block math delimiter" in issues[0].message assert issues[0].line == 2 def test_latex_environment_pairs_accept_nested_matching_pairs() -> None: markdown = r""" $$ \begin{aligned} a &= b \\ \begin{matrix}1 & 2\end{matrix} \end{aligned} $$ """ assert validate_latex_environments(markdown) == [] def test_latex_environment_pairs_report_mismatch() -> None: issues = validate_latex_environments(r"\begin{aligned} x \end{matrix}") assert len(issues) == 1 assert "LaTeX environment mismatch" in issues[0].message assert "aligned" in issues[0].message assert "matrix" in issues[0].message def test_image_links_validate_filesystem_and_modeled_assets(tmp_path: Path) -> None: image_dir = tmp_path / "images" image_dir.mkdir() (image_dir / "paper_fig-1.png").write_bytes(b"png") asset = Asset( id="asset-001", kind=AssetKind.FIGURE, relative_path="images/paper_fig-1.png", page=1, ) markdown = "![Figure 1](images/paper_fig-1.png)\n![Figure 2](images/missing.png)" issues = validate_image_links(markdown, base_dir=tmp_path, assets=[asset]) assert messages(issues) == [ "Image link target does not exist on disk and is not present in modeled assets: images/missing.png" ] def test_simple_markdown_table_parseability() -> None: markdown = "\n".join( [ "| A | B |", "| --- | --- |", "| 1 | 2 |", "| 3 | 4 |", ] ) assert validate_tables(markdown) == [] def test_markdown_table_reports_row_width_mismatch() -> None: issues = validate_tables("| A | B |\n| --- | --- |\n| 1 | 2 | 3 |") assert len(issues) == 1 assert "Markdown table row has 3 cells; expected 2" in issues[0].message def test_complex_table_can_be_represented_as_allowed_html_with_fallback() -> None: markdown = "\n".join( [ '', "", "", "
LoadValue
42
", "![Table 1 fallback](images/table-1.png)", ] ) assert validate_tables(markdown, allow_html_table_fallback=True) == [] def test_frontmatter_requires_chunk_context_fields() -> None: markdown = "---\ndocument_slug: paper\nchunk_index: 1\n---\n# Paper" issues = validate_chunk_frontmatter(markdown) assert messages(issues) == [ "Chunk frontmatter is missing required field: title", "Chunk frontmatter is missing required field: page_range", ] def test_frontmatter_accepts_required_chunk_context_fields() -> None: markdown = "---\ntitle: Paper\ndocument_slug: paper\nchunk_index: 1\npage_range: 1-3\n---\n# Paper" assert validate_chunk_frontmatter(markdown) == [] def test_caption_reference_anchor_shape_checks_known_reference_targets() -> None: markdown = "\n".join( [ '', "![Figure 1](images/fig-1.png)", "Figure 1. Diagram.", "As shown in [Fig. 1](#fig-1) and [Table 2](#table-2).", ] ) issues = validate_caption_reference_anchors(markdown) assert messages(issues) == ["Reference link points to a missing anchor: #table-2"] def test_combined_quality_gate_does_not_mutate_markdown() -> None: markdown = "---\ntitle: Paper\ndocument_slug: paper\nchunk_index: 1\npage_range: 1\n---\n$E=mc^2$" result = validate_markdown_quality(markdown) assert result.markdown == markdown assert result.ok assert result.issues == ()