remove files
This commit is contained in:
@@ -1,166 +0,0 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
from pdftomd.models import Asset, AssetKind
|
||||
from pdftomd.quality import (
|
||||
validate_caption_reference_anchors,
|
||||
validate_chunk_frontmatter,
|
||||
validate_image_links,
|
||||
validate_latex_environments,
|
||||
validate_markdown_quality,
|
||||
validate_math_delimiters,
|
||||
validate_tables,
|
||||
)
|
||||
|
||||
|
||||
def messages(issues: list[object]) -> list[str]:
|
||||
return [getattr(issue, "message") for issue in issues]
|
||||
|
||||
|
||||
def test_math_delimiters_accept_inline_and_block_math() -> None:
|
||||
markdown = "\n".join(
|
||||
[
|
||||
"Inline energy $E = mc^2$ is preserved.",
|
||||
"",
|
||||
"$$",
|
||||
r"\int_0^1 x^2 dx",
|
||||
"$$",
|
||||
]
|
||||
)
|
||||
|
||||
assert validate_math_delimiters(markdown) == []
|
||||
|
||||
|
||||
def test_math_delimiters_report_actionable_unclosed_inline_math() -> None:
|
||||
issues = validate_math_delimiters("The expression $E = mc^2 is missing a close.")
|
||||
|
||||
assert len(issues) == 1
|
||||
assert "Unclosed inline math delimiter" in issues[0].message
|
||||
assert issues[0].line == 1
|
||||
assert "$" in issues[0].message
|
||||
|
||||
|
||||
def test_math_delimiters_report_actionable_unclosed_block_math() -> None:
|
||||
issues = validate_math_delimiters("Before\n$$\na^2 + b^2 = c^2\nAfter")
|
||||
|
||||
assert len(issues) == 1
|
||||
assert "Unclosed block math delimiter" in issues[0].message
|
||||
assert issues[0].line == 2
|
||||
|
||||
|
||||
def test_latex_environment_pairs_accept_nested_matching_pairs() -> None:
|
||||
markdown = r"""
|
||||
$$
|
||||
\begin{aligned}
|
||||
a &= b \\
|
||||
\begin{matrix}1 & 2\end{matrix}
|
||||
\end{aligned}
|
||||
$$
|
||||
"""
|
||||
|
||||
assert validate_latex_environments(markdown) == []
|
||||
|
||||
|
||||
def test_latex_environment_pairs_report_mismatch() -> None:
|
||||
issues = validate_latex_environments(r"\begin{aligned} x \end{matrix}")
|
||||
|
||||
assert len(issues) == 1
|
||||
assert "LaTeX environment mismatch" in issues[0].message
|
||||
assert "aligned" in issues[0].message
|
||||
assert "matrix" in issues[0].message
|
||||
|
||||
|
||||
def test_image_links_validate_filesystem_and_modeled_assets(tmp_path: Path) -> None:
|
||||
image_dir = tmp_path / "images"
|
||||
image_dir.mkdir()
|
||||
(image_dir / "paper_fig-1.png").write_bytes(b"png")
|
||||
asset = Asset(
|
||||
id="asset-001",
|
||||
kind=AssetKind.FIGURE,
|
||||
relative_path="images/paper_fig-1.png",
|
||||
page=1,
|
||||
)
|
||||
markdown = "\n"
|
||||
|
||||
issues = validate_image_links(markdown, base_dir=tmp_path, assets=[asset])
|
||||
|
||||
assert messages(issues) == [
|
||||
"Image link target does not exist on disk and is not present in modeled assets: images/missing.png"
|
||||
]
|
||||
|
||||
|
||||
def test_simple_markdown_table_parseability() -> None:
|
||||
markdown = "\n".join(
|
||||
[
|
||||
"| A | B |",
|
||||
"| --- | --- |",
|
||||
"| 1 | 2 |",
|
||||
"| 3 | 4 |",
|
||||
]
|
||||
)
|
||||
|
||||
assert validate_tables(markdown) == []
|
||||
|
||||
|
||||
def test_markdown_table_reports_row_width_mismatch() -> None:
|
||||
issues = validate_tables("| A | B |\n| --- | --- |\n| 1 | 2 | 3 |")
|
||||
|
||||
assert len(issues) == 1
|
||||
assert "Markdown table row has 3 cells; expected 2" in issues[0].message
|
||||
|
||||
|
||||
def test_complex_table_can_be_represented_as_allowed_html_with_fallback() -> None:
|
||||
markdown = "\n".join(
|
||||
[
|
||||
'<table id="tbl-1">',
|
||||
"<tr><th rowspan=\"2\">Load</th><th>Value</th></tr>",
|
||||
"<tr><td>42</td></tr>",
|
||||
"</table>",
|
||||
"",
|
||||
]
|
||||
)
|
||||
|
||||
assert validate_tables(markdown, allow_html_table_fallback=True) == []
|
||||
|
||||
|
||||
def test_frontmatter_requires_chunk_context_fields() -> None:
|
||||
markdown = "---\ndocument_slug: paper\nchunk_index: 1\n---\n# Paper"
|
||||
|
||||
issues = validate_chunk_frontmatter(markdown)
|
||||
|
||||
assert messages(issues) == [
|
||||
"Chunk frontmatter is missing required field: title",
|
||||
"Chunk frontmatter is missing required field: page_range",
|
||||
]
|
||||
|
||||
|
||||
def test_frontmatter_accepts_required_chunk_context_fields() -> None:
|
||||
markdown = "---\ntitle: Paper\ndocument_slug: paper\nchunk_index: 1\npage_range: 1-3\n---\n# Paper"
|
||||
|
||||
assert validate_chunk_frontmatter(markdown) == []
|
||||
|
||||
|
||||
def test_caption_reference_anchor_shape_checks_known_reference_targets() -> None:
|
||||
markdown = "\n".join(
|
||||
[
|
||||
'<a id="fig-1"></a>',
|
||||
"",
|
||||
"Figure 1. Diagram.",
|
||||
"As shown in [Fig. 1](#fig-1) and [Table 2](#table-2).",
|
||||
]
|
||||
)
|
||||
|
||||
issues = validate_caption_reference_anchors(markdown)
|
||||
|
||||
assert messages(issues) == ["Reference link points to a missing anchor: #table-2"]
|
||||
|
||||
|
||||
def test_combined_quality_gate_does_not_mutate_markdown() -> None:
|
||||
markdown = "---\ntitle: Paper\ndocument_slug: paper\nchunk_index: 1\npage_range: 1\n---\n$E=mc^2$"
|
||||
|
||||
result = validate_markdown_quality(markdown)
|
||||
|
||||
assert result.markdown == markdown
|
||||
assert result.ok
|
||||
assert result.issues == ()
|
||||
Reference in New Issue
Block a user