modify docu

2026-06-11 17:18:03 +09:00
parent c4f8f95d4b
commit 742f311be1
66 changed files with 3354 additions and 375 deletions
@@ -8,8 +8,10 @@ Usage:

 import argparse
 import contextlib
+import fnmatch
 import json
 import os
+import re
 import subprocess
 import sys
 import threading
@@ -54,6 +56,10 @@ class StepExecutor:
    """Phase 디렉토리 안의 step들을 순차 실행하는 하네스."""

    MAX_RETRIES = 3
+    VALIDATION_COMMANDS = (
+        [sys.executable, "-m", "unittest", "discover", "-s", "scripts", "-p", "test_*.py"],
+        [sys.executable, "scripts/validate_workspace.py"],
+    )
    FEAT_MSG = "feat({phase}): step {num} — {name}"
    CHORE_MSG = "chore({phase}): step {num} output"
    TZ = timezone(timedelta(hours=9))
@@ -83,6 +89,7 @@ class StepExecutor:
    def run(self):
        self._print_header()
        self._check_blockers()
+        self._assert_clean_worktree("before branch checkout")
        self._checkout_branch()
        guardrails = self._load_guardrails()
        self._ensure_created_at()
@@ -110,8 +117,117 @@ class StepExecutor:
        cmd = ["git"] + list(args)
        return subprocess.run(cmd, cwd=self._root, capture_output=True, text=True)

+    def _validate_before_commit(self, commit_message: str):
+        print(f"  Validation before commit: {commit_message}")
+        for cmd in self.VALIDATION_COMMANDS:
+            r = subprocess.run(cmd, cwd=self._root, capture_output=True, text=True)
+            if r.returncode != 0:
+                print(f"  ERROR: validation failed before commit: {' '.join(cmd)}")
+                if r.stdout:
+                    print(r.stdout[-2000:])
+                if r.stderr:
+                    print(r.stderr[-2000:])
+                sys.exit(1)
+
+    def _branch_name(self) -> str:
+        slug = re.sub(r"[^A-Za-z0-9._-]+", "-", self._phase_name.strip())
+        slug = slug.strip("/.-")
+        if not slug:
+            slug = self._phase_dir_name
+        return f"codex/{slug}"
+
+    def _assert_clean_worktree(self, context: str):
+        r = self._run_git("status", "--porcelain")
+        if r.returncode != 0:
+            print("  ERROR: git status failed.")
+            print(f"  {r.stderr.strip()}")
+            sys.exit(1)
+        dirty = r.stdout.strip()
+        if dirty:
+            print(f"  ERROR: dirty worktree detected {context}.")
+            print("  Commit, stash, or remove these changes before running scripts/execute.py:")
+            for line in dirty.splitlines():
+                print(f"    {line}")
+            sys.exit(1)
+
+    @staticmethod
+    def _normalize_rel_path(path: str) -> str:
+        return path.replace("\\", "/").lstrip("./")
+
+    def _path_allowed(self, path: str, patterns: list[str]) -> bool:
+        rel = self._normalize_rel_path(path)
+        for raw in patterns:
+            pattern = self._normalize_rel_path(str(raw))
+            if not pattern:
+                continue
+            if pattern.endswith("/") and rel.startswith(pattern):
+                return True
+            if any(ch in pattern for ch in "*?[") and fnmatch.fnmatchcase(rel, pattern):
+                return True
+            if rel == pattern:
+                return True
+        return False
+
+    def _validate_step_allowlist(self, step: dict):
+        allowed = step.get("allowed_paths")
+        if (
+            not isinstance(allowed, list)
+            or not allowed
+            or not all(isinstance(p, str) and p.strip() for p in allowed)
+        ):
+            print(f"  ERROR: Step {step.get('step')} must define non-empty allowed_paths.")
+            sys.exit(1)
+
+    def _changed_paths(self) -> list[str]:
+        paths: list[str] = []
+        tracked = self._run_git("diff", "--name-only")
+        if tracked.returncode != 0:
+            print("  ERROR: git diff --name-only failed.")
+            print(f"  {tracked.stderr.strip()}")
+            sys.exit(1)
+        paths.extend(tracked.stdout.splitlines())
+
+        staged = self._run_git("diff", "--cached", "--name-only")
+        if staged.returncode != 0:
+            print("  ERROR: git diff --cached --name-only failed.")
+            print(f"  {staged.stderr.strip()}")
+            sys.exit(1)
+        paths.extend(staged.stdout.splitlines())
+
+        untracked = self._run_git("ls-files", "--others", "--exclude-standard")
+        if untracked.returncode != 0:
+            print("  ERROR: git ls-files --others failed.")
+            print(f"  {untracked.stderr.strip()}")
+            sys.exit(1)
+        paths.extend(untracked.stdout.splitlines())
+
+        return sorted({self._normalize_rel_path(p) for p in paths if p.strip()})
+
+    def _housekeeping_paths(self, step_num: int) -> set[str]:
+        return {
+            f"phases/{self._phase_dir_name}/index.json",
+            f"phases/{self._phase_dir_name}/step{step_num}-output.json",
+            "phases/index.json",
+        }
+
+    def _classify_step_changes(self, step_num: int, step: dict, changed_paths: list[str]) -> tuple[list[str], list[str], list[str]]:
+        allowed_patterns = step.get("allowed_paths", [])
+        housekeeping_set = self._housekeeping_paths(step_num)
+        allowed: list[str] = []
+        housekeeping: list[str] = []
+        disallowed: list[str] = []
+        for path in changed_paths:
+            rel = self._normalize_rel_path(path)
+            if rel in housekeeping_set:
+                housekeeping.append(rel)
+            elif self._path_allowed(rel, allowed_patterns):
+                allowed.append(rel)
+            else:
+                disallowed.append(rel)
+        return allowed, housekeeping, disallowed
+
    def _checkout_branch(self):
-        branch = f"feat-{self._phase_name}"
+        branch = self._branch_name()

        r = self._run_git("rev-parse", "--abbrev-ref", "HEAD")
        if r.returncode != 0:
@@ -133,28 +249,45 @@ class StepExecutor:

        print(f"  Branch: {branch}")

-    def _commit_step(self, step_num: int, step_name: str):
-        output_rel = f"phases/{self._phase_dir_name}/step{step_num}-output.json"
-        index_rel = f"phases/{self._phase_dir_name}/index.json"
+    def _stage_paths(self, paths: list[str]):
+        if not paths:
+            return
+        r = self._run_git("add", "--", *paths)
+        if r.returncode != 0:
+            print("  ERROR: git add failed.")
+            print(f"  {r.stderr.strip()}")
+            sys.exit(1)

-        self._run_git("add", "-A")
-        self._run_git("reset", "HEAD", "--", output_rel)
-        self._run_git("reset", "HEAD", "--", index_rel)
+    def _commit_step(self, step: dict, step_name: str):
+        step_num = step["step"]
+        changed = self._changed_paths()
+        allowed, housekeeping, disallowed = self._classify_step_changes(step_num, step, changed)
+        if disallowed:
+            print(f"  ERROR: Step {step_num} modified files outside allowed_paths:")
+            for path in disallowed:
+                print(f"    {path}")
+            sys.exit(1)

-        if self._run_git("diff", "--cached", "--quiet").returncode != 0:
+        if allowed:
            msg = self.FEAT_MSG.format(phase=self._phase_name, num=step_num, name=step_name)
-            r = self._run_git("commit", "-m", msg)
-            if r.returncode == 0:
+            self._validate_before_commit(msg)
+            self._stage_paths(allowed)
+            if self._run_git("diff", "--cached", "--quiet").returncode != 0:
+                r = self._run_git("commit", "-m", msg)
+                if r.returncode != 0:
+                    print(f"  ERROR: code commit failed: {r.stderr.strip()}")
+                    sys.exit(1)
                print(f"  Commit: {msg}")
-            else:
-                print(f"  WARN: 코드 커밋 실패: {r.stderr.strip()}")

-        self._run_git("add", "-A")
-        if self._run_git("diff", "--cached", "--quiet").returncode != 0:
+        if housekeeping:
            msg = self.CHORE_MSG.format(phase=self._phase_name, num=step_num)
-            r = self._run_git("commit", "-m", msg)
-            if r.returncode != 0:
-                print(f"  WARN: housekeeping 커밋 실패: {r.stderr.strip()}")
+            self._validate_before_commit(msg)
+            self._stage_paths(housekeeping)
+            if self._run_git("diff", "--cached", "--quiet").returncode != 0:
+                r = self._run_git("commit", "-m", msg)
+                if r.returncode != 0:
+                    print(f"  ERROR: housekeeping commit failed: {r.stderr.strip()}")
+                    sys.exit(1)

    # --- top-level index ---

@@ -197,6 +330,7 @@ class StepExecutor:
        return "## 이전 Step 산출물\n\n" + "\n".join(lines) + "\n\n"

    def _build_preamble(self, guardrails: str, step_context: str,
+                        allowed_paths: list[str],
                        prev_error: Optional[str] = None) -> str:
        commit_example = self.FEAT_MSG.format(
            phase=self._phase_name, num="N", name="<step-name>"
@@ -211,6 +345,9 @@ class StepExecutor:
            f"당신은 {self._project} 프로젝트의 개발자입니다. 아래 step을 수행하세요.\n\n"
            f"{guardrails}\n\n---\n\n"
            f"{step_context}{retry_section}"
+            f"## Step file allowlist\n\n"
+            f"This step may modify only these repository-relative paths:\n"
+            f"{chr(10).join(f'- {p}' for p in allowed_paths)}\n\n"
            f"## 작업 규칙\n\n"
            f"1. 이전 step에서 작성된 코드를 확인하고 일관성을 유지하라.\n"
            f"2. 이 step에 명시된 작업만 수행하라. 추가 기능이나 파일을 만들지 마라.\n"
@@ -299,7 +436,7 @@ class StepExecutor:
        for attempt in range(1, self.MAX_RETRIES + 1):
            index = self._read_json(self._index_file)
            step_context = self._build_step_context(index)
-            preamble = self._build_preamble(guardrails, step_context, prev_error)
+            preamble = self._build_preamble(guardrails, step_context, step.get("allowed_paths", []), prev_error)

            tag = f"Step {step_num}/{self._total - 1} ({done} done): {step_name}"
            if attempt > 1:
@@ -318,7 +455,7 @@ class StepExecutor:
                    if s["step"] == step_num:
                        s["completed_at"] = ts
                self._write_json(self._index_file, index)
-                self._commit_step(step_num, step_name)
+                self._commit_step(step, step_name)
                print(f"  ✓ Step {step_num}: {step_name} [{elapsed}s]")
                return True

@@ -353,7 +490,7 @@ class StepExecutor:
                        s["error_message"] = f"[{self.MAX_RETRIES}회 시도 후 실패] {err_msg}"
                        s["failed_at"] = ts
                self._write_json(self._index_file, index)
-                self._commit_step(step_num, step_name)
+                self._commit_step(step, step_name)
                print(f"  ✗ Step {step_num}: {step_name} failed after {self.MAX_RETRIES} attempts [{elapsed}s]")
                print(f"    Error: {err_msg}")
                self._update_top_index("error")
@@ -369,6 +506,7 @@ class StepExecutor:
                print("\n  All steps completed!")
                return

+            self._validate_step_allowlist(pending)
            step_num = pending["step"]
            for s in index["steps"]:
                if s["step"] == step_num and "started_at" not in s:
@@ -384,15 +522,22 @@ class StepExecutor:
        self._write_json(self._index_file, index)
        self._update_top_index("completed")

-        self._run_git("add", "-A")
+        final_paths = [f"phases/{self._phase_dir_name}/index.json"]
+        if self._top_index_file.exists():
+            final_paths.append("phases/index.json")
+        self._validate_before_commit(f"chore({self._phase_name}): mark phase completed")
+        self._stage_paths(final_paths)
        if self._run_git("diff", "--cached", "--quiet").returncode != 0:
            msg = f"chore({self._phase_name}): mark phase completed"
            r = self._run_git("commit", "-m", msg)
-            if r.returncode == 0:
+            if r.returncode != 0:
+                print(f"  ERROR: phase completion commit failed: {r.stderr.strip()}")
+                sys.exit(1)
+            else:
                print(f"  ✓ {msg}")

        if self._auto_push:
-            branch = f"feat-{self._phase_name}"
+            branch = self._branch_name()
            r = self._run_git("push", "-u", "origin", branch)
            if r.returncode != 0:
                print(f"\n  ERROR: git push 실패: {r.stderr.strip()}")
@@ -30,7 +30,7 @@ class BuildTestExecutorAgentConfigTests(unittest.TestCase):
            "Do not edit tests.",
            "Do not edit CMake.",
            "Do not run Abaqus, Nastran, or any reference solver.",
-            "Do not generate reference CSVs.",
+            "Do not generate reference HDF5 files or deterministic CSV views.",
            "Do not approve release readiness.",
        ):
            self.assertIn(required_text, instructions)
@@ -32,7 +32,7 @@ class CoordinatorAgentConfigTests(unittest.TestCase):
            "Do not edit CMake.",
            "Do not run build/test validation.",
            "Do not run Abaqus, Nastran, or any reference solver.",
-            "Do not generate reference CSVs.",
+            "Do not generate reference HDF5 files or deterministic CSV views.",
            "Do not automatically spawn subagents.",
            "Do not approve release readiness independently.",
        ):
@@ -32,7 +32,7 @@ class CorrectionAgentConfigTests(unittest.TestCase):
            "Do not change reference artifacts",
            "Do not change tolerance policies",
            "Do not run Abaqus, Nastran, or any reference solver.",
-            "Do not generate reference CSVs.",
+            "Do not generate reference HDF5 files or deterministic CSV views.",
            "Do not approve release readiness.",
        ):
            self.assertIn(required_text, instructions)
@@ -0,0 +1,286 @@
+import importlib.util
+import json
+import subprocess
+import sys
+import tempfile
+import unittest
+from pathlib import Path
+from unittest.mock import patch
+
+
+def load_execute():
+    module_path = Path(__file__).resolve().parent / "execute.py"
+    spec = importlib.util.spec_from_file_location("execute", module_path)
+    module = importlib.util.module_from_spec(spec)
+    spec.loader.exec_module(module)
+    return module
+
+
+def write_phase(root: Path, phase_dir: str = "0-mvp", phase_name: str = "0-mvp", steps=None):
+    phase_path = root / "phases" / phase_dir
+    phase_path.mkdir(parents=True)
+    if steps is None:
+        steps = [
+            {
+                "step": 1,
+                "name": "Docs",
+                "status": "pending",
+                "summary": "",
+                "allowed_paths": ["docs/*.md"],
+            }
+        ]
+    (phase_path / "index.json").write_text(
+        json.dumps({"project": "FESA", "phase": phase_name, "steps": steps}, indent=2),
+        encoding="utf-8",
+    )
+    (phase_path / "step1.md").write_text("# Step 1\n", encoding="utf-8")
+    return phase_path
+
+
+def make_executor(execute, root: Path, phase_dir: str = "0-mvp"):
+    with patch.object(execute, "ROOT", root):
+        return execute.StepExecutor(phase_dir)
+
+
+class ExecuteRunnerSafetyTests(unittest.TestCase):
+    def test_scaffold_loads_execute_module(self):
+        execute = load_execute()
+
+        self.assertTrue(hasattr(execute, "StepExecutor"))
+
+    def test_branch_name_uses_codex_prefix_and_sanitized_phase(self):
+        execute = load_execute()
+        with tempfile.TemporaryDirectory() as tmp:
+            root = Path(tmp)
+            write_phase(root, phase_name="linear truss/1d")
+            executor = make_executor(execute, root)
+
+            self.assertEqual(executor._branch_name(), "codex/linear-truss-1d")
+
+    def test_finalize_push_uses_codex_branch_name(self):
+        execute = load_execute()
+        with tempfile.TemporaryDirectory() as tmp:
+            root = Path(tmp)
+            write_phase(root, phase_name="0-mvp")
+            executor = make_executor(execute, root)
+            executor._auto_push = True
+            calls = []
+
+            def fake_git(*args):
+                calls.append(args)
+                if args == ("diff", "--cached", "--quiet"):
+                    return subprocess.CompletedProcess(args, 0, "", "")
+                return subprocess.CompletedProcess(args, 0, "", "")
+
+            with patch.object(executor, "_run_git", side_effect=fake_git):
+                with patch.object(executor, "_validate_before_commit", create=True):
+                    with patch("builtins.print"):
+                        executor._finalize()
+
+            self.assertIn(("push", "-u", "origin", "codex/0-mvp"), calls)
+
+    def test_finalize_stages_only_phase_indexes(self):
+        execute = load_execute()
+        with tempfile.TemporaryDirectory() as tmp:
+            root = Path(tmp)
+            write_phase(root)
+            (root / "phases" / "index.json").write_text('{"phases":[]}', encoding="utf-8")
+            executor = make_executor(execute, root)
+            calls = []
+
+            def fake_git(*args):
+                calls.append(args)
+                if args == ("diff", "--cached", "--quiet"):
+                    return subprocess.CompletedProcess(args, 1, "", "")
+                return subprocess.CompletedProcess(args, 0, "", "")
+
+            with patch.object(executor, "_run_git", side_effect=fake_git):
+                with patch.object(executor, "_validate_before_commit"):
+                    with patch("builtins.print"):
+                        executor._finalize()
+
+            self.assertNotIn(("add", "-A"), calls)
+            self.assertIn(("add", "--", "phases/0-mvp/index.json", "phases/index.json"), calls)
+
+    def test_assert_clean_worktree_exits_when_git_status_has_changes(self):
+        execute = load_execute()
+        with tempfile.TemporaryDirectory() as tmp:
+            root = Path(tmp)
+            write_phase(root)
+            executor = make_executor(execute, root)
+
+            with patch.object(
+                executor,
+                "_run_git",
+                return_value=subprocess.CompletedProcess([], 0, " M AGENTS.md\n?? scratch.txt\n", ""),
+            ):
+                with patch("builtins.print"):
+                    with self.assertRaises(SystemExit) as cm:
+                        executor._assert_clean_worktree("before checkout")
+
+            self.assertEqual(cm.exception.code, 1)
+
+    def test_run_checks_clean_worktree_before_checkout(self):
+        execute = load_execute()
+        with tempfile.TemporaryDirectory() as tmp:
+            root = Path(tmp)
+            write_phase(root)
+            executor = make_executor(execute, root)
+            calls = []
+
+            def record(name):
+                def inner(*args, **kwargs):
+                    calls.append(name)
+                return inner
+
+            with patch.object(executor, "_assert_clean_worktree", side_effect=record("clean")):
+                with patch.object(executor, "_checkout_branch", side_effect=record("checkout")):
+                    with patch.object(executor, "_print_header"):
+                        with patch.object(executor, "_check_blockers"):
+                            with patch.object(executor, "_load_guardrails", return_value=""):
+                                with patch.object(executor, "_ensure_created_at"):
+                                    with patch.object(executor, "_execute_all_steps"):
+                                        with patch.object(executor, "_finalize"):
+                                            executor.run()
+
+            self.assertLess(calls.index("clean"), calls.index("checkout"))
+
+    def test_step_allowlist_accepts_exact_prefix_and_glob_paths(self):
+        execute = load_execute()
+        with tempfile.TemporaryDirectory() as tmp:
+            root = Path(tmp)
+            write_phase(root)
+            executor = make_executor(execute, root)
+            patterns = ["AGENTS.md", "docs/", "scripts/*.py"]
+
+            self.assertTrue(executor._path_allowed("AGENTS.md", patterns))
+            self.assertTrue(executor._path_allowed("docs/PRD.md", patterns))
+            self.assertTrue(executor._path_allowed("scripts/execute.py", patterns))
+            self.assertFalse(executor._path_allowed(".codex/hooks.json", patterns))
+
+    def test_step_without_allowed_paths_is_rejected_before_codex_invocation(self):
+        execute = load_execute()
+        steps = [{"step": 1, "name": "Unsafe", "status": "pending", "summary": ""}]
+        with tempfile.TemporaryDirectory() as tmp:
+            root = Path(tmp)
+            write_phase(root, steps=steps)
+            executor = make_executor(execute, root)
+
+            with patch("builtins.print"):
+                with self.assertRaises(SystemExit) as cm:
+                    executor._validate_step_allowlist(steps[0])
+
+            self.assertEqual(cm.exception.code, 1)
+
+    def test_classify_step_changes_splits_allowed_housekeeping_and_disallowed_paths(self):
+        execute = load_execute()
+        step = {
+            "step": 1,
+            "name": "Docs",
+            "status": "completed",
+            "summary": "",
+            "allowed_paths": ["docs/*.md"],
+        }
+        with tempfile.TemporaryDirectory() as tmp:
+            root = Path(tmp)
+            write_phase(root)
+            executor = make_executor(execute, root)
+            changed = [
+                "docs/PRD.md",
+                "phases/0-mvp/index.json",
+                "phases/0-mvp/step1-output.json",
+                "scripts/execute.py",
+            ]
+
+            allowed, housekeeping, disallowed = executor._classify_step_changes(1, step, changed)
+
+            self.assertEqual(allowed, ["docs/PRD.md"])
+            self.assertEqual(housekeeping, ["phases/0-mvp/index.json", "phases/0-mvp/step1-output.json"])
+            self.assertEqual(disallowed, ["scripts/execute.py"])
+
+    def test_commit_step_stages_only_explicit_allowed_and_housekeeping_paths(self):
+        execute = load_execute()
+        step = {
+            "step": 1,
+            "name": "Docs",
+            "status": "completed",
+            "summary": "",
+            "allowed_paths": ["docs/*.md"],
+        }
+        with tempfile.TemporaryDirectory() as tmp:
+            root = Path(tmp)
+            write_phase(root)
+            executor = make_executor(execute, root)
+            calls = []
+
+            def fake_git(*args):
+                calls.append(args)
+                if args in {
+                    ("diff", "--quiet", "--cached", "--"),
+                    ("diff", "--cached", "--quiet"),
+                }:
+                    return subprocess.CompletedProcess(args, 1, "", "")
+                return subprocess.CompletedProcess(args, 0, "", "")
+
+            with patch.object(
+                executor,
+                "_changed_paths",
+                return_value=[
+                    "docs/PRD.md",
+                    "phases/0-mvp/index.json",
+                    "phases/0-mvp/step1-output.json",
+                ],
+            ):
+                with patch.object(executor, "_run_git", side_effect=fake_git):
+                    with patch.object(executor, "_validate_before_commit", create=True):
+                        with patch("builtins.print"):
+                            executor._commit_step(step, "Docs")
+
+            self.assertNotIn(("add", "-A"), calls)
+            self.assertIn(("add", "--", "docs/PRD.md"), calls)
+            self.assertIn(("add", "--", "phases/0-mvp/index.json", "phases/0-mvp/step1-output.json"), calls)
+
+    def test_validate_before_commit_runs_python_selftest_then_workspace_validation(self):
+        execute = load_execute()
+        with tempfile.TemporaryDirectory() as tmp:
+            root = Path(tmp)
+            write_phase(root)
+            executor = make_executor(execute, root)
+            commands = []
+
+            def fake_run(cmd, **kwargs):
+                commands.append(cmd)
+                return subprocess.CompletedProcess(cmd, 0, "ok", "")
+
+            with patch.object(execute.subprocess, "run", side_effect=fake_run):
+                with patch("builtins.print"):
+                    executor._validate_before_commit("feat(0-mvp): step 1")
+
+            self.assertEqual(
+                commands,
+                [
+                    [sys.executable, "-m", "unittest", "discover", "-s", "scripts", "-p", "test_*.py"],
+                    [sys.executable, "scripts/validate_workspace.py"],
+                ],
+            )
+
+    def test_validate_before_commit_exits_before_commit_when_validation_fails(self):
+        execute = load_execute()
+        with tempfile.TemporaryDirectory() as tmp:
+            root = Path(tmp)
+            write_phase(root)
+            executor = make_executor(execute, root)
+
+            def fake_run(cmd, **kwargs):
+                return subprocess.CompletedProcess(cmd, 1, "bad", "failed")
+
+            with patch.object(execute.subprocess, "run", side_effect=fake_run):
+                with patch("builtins.print"):
+                    with self.assertRaises(SystemExit) as cm:
+                        executor._validate_before_commit("feat(0-mvp): step 1")
+
+            self.assertEqual(cm.exception.code, 1)
+
+
+if __name__ == "__main__":
+    unittest.main()
@@ -96,14 +96,18 @@ SKILLS = {
            "Use when",
            "FESA solver",
            "Abaqus .inp",
-            "CSV schemas",
+            "HDF5",
+            "CSV view",
            "I/O",
        ),
        "body_terms": (
            "docs/io-definitions/<feature-id>-io.md",
            "Abaqus Input Scope",
            "Internal Model Contract",
-            "Output and CSV Schemas",
+            "Output HDF5 Schema",
+            "Deterministic CSV View Schemas",
+            "results.h5",
+            "csv/displacements.csv",
            "*NODE",
            "*ELEMENT",
            "*MATERIAL",
@@ -125,12 +129,13 @@ SKILLS = {
            "references/<feature-id>/<model-id>/",
            "model.inp",
            "metadata.json",
-            "displacements.csv",
-            "reactions.csv",
-            "element_forces.csv",
-            "stresses.csv",
+            "reference.h5",
+            "csv/displacements.csv",
+            "csv/reactions.csv",
+            "csv/element_forces.csv",
+            "csv/stresses.csv",
            "Coverage Matrix",
-            "Do not generate reference CSVs.",
+            "Do not generate reference HDF5 files or deterministic CSV views.",
        ),
    },
    "fesa-cpp-msvc-tdd": {
@@ -155,13 +160,17 @@ SKILLS = {
        "description_terms": (
            "Use when",
            "FESA solver",
-            "reference CSV",
+            "HDF5",
+            "CSV view",
            "tolerance",
            "comparison",
        ),
        "body_terms": (
            "docs/reference-verifications/<feature-id>-reference-verification.md",
            "ARTIFACT CHECK -> COMPARE -> CLASSIFY -> REPORT",
+            "results.h5",
+            "reference.h5",
+            "deterministic CSV view",
            "max absolute error",
            "max relative error",
            "RMS error",
@@ -29,7 +29,7 @@ class FormulationAgentConfigTests(unittest.TestCase):
            "Do not implement code.",
            "Do not design C++ APIs",
            "Do not run Abaqus, Nastran, or any reference solver.",
-            "Do not generate reference CSVs.",
+            "Do not generate reference HDF5 files or deterministic CSV views.",
            "Do not approve release readiness.",
            "docs/SOLVER_AGENT_DESIGN.md",
            "docs/requirements/<feature-id>.md",
@@ -41,7 +41,7 @@ class ImplementationAgentConfigTests(unittest.TestCase):

        for required_text in (
            "Do not run Abaqus, Nastran, or any reference solver.",
-            "Do not generate reference CSVs.",
+            "Do not generate reference HDF5 files or deterministic CSV views.",
            "Do not approve release readiness.",
            "Do not change requirements",
            "Do not change formulations",
@@ -31,7 +31,7 @@ class ImplementationPlanningAgentConfigTests(unittest.TestCase):
            "Do not edit CMake.",
            "Do not run CMake/CTest.",
            "Do not run Abaqus, Nastran, or any reference solver.",
-            "Do not generate reference CSVs.",
+            "Do not generate reference HDF5 files or deterministic CSV views.",
            "Do not compare solver results.",
            "Do not approve release readiness.",
        ):
@@ -29,7 +29,7 @@ class IoDefinitionAgentConfigTests(unittest.TestCase):
            "Do not implement parsers.",
            "Do not design C++ APIs",
            "Do not run Abaqus, Nastran, or any reference solver.",
-            "Do not generate reference CSVs.",
+            "Do not generate reference HDF5 files or deterministic CSV views.",
            "Do not approve release readiness.",
            "Do not claim full Abaqus compatibility",
        ):
@@ -43,7 +43,8 @@ class IoDefinitionAgentConfigTests(unittest.TestCase):
            "Abaqus input files use keyword lines, data lines, and comment lines.",
            "Model data and history data",
            "supported Abaqus keyword subset",
-            "comparison CSV schemas",
+            "HDF5 result schema",
+            "deterministic CSV view schemas",
        ):
            self.assertIn(required_text, instructions)

@@ -56,7 +57,8 @@ class IoDefinitionAgentConfigTests(unittest.TestCase):
            "Model Data Mapping",
            "History Data Mapping",
            "Internal Model Contract",
-            "Output and CSV Schemas",
+            "Output HDF5 Schema",
+            "Deterministic CSV View Schemas",
            "Validation Rules",
            "Downstream Handoff",
        ):
@@ -86,7 +88,8 @@ class IoDefinitionAgentConfigTests(unittest.TestCase):
            "Model Data Mapping",
            "History Data Mapping",
            "Internal Model Contract",
-            "Output and CSV Schemas",
+            "Output HDF5 Schema",
+            "Deterministic CSV View Schemas",
            "Validation Rules",
            "Downstream Handoff",
            "FESA 솔버의 입력 파일은 Abaqus input file이다.",
@@ -30,7 +30,7 @@ class NumericalReviewAgentConfigTests(unittest.TestCase):
            "Do not edit formulations directly.",
            "Do not design C++ APIs",
            "Do not run Abaqus, Nastran, or any reference solver.",
-            "Do not generate reference CSVs.",
+            "Do not generate reference HDF5 files or deterministic CSV views.",
            "Do not approve release readiness.",
            "docs/SOLVER_AGENT_DESIGN.md",
            "docs/formulations/<feature-id>-formulation.md",
@@ -30,7 +30,7 @@ class PhysicsEvaluationAgentConfigTests(unittest.TestCase):
            "Do not edit tests.",
            "Do not edit CMake.",
            "Do not run Abaqus, Nastran, or any reference solver.",
-            "Do not generate reference CSVs.",
+            "Do not generate reference HDF5 files or deterministic CSV views.",
            "Do not change tolerances.",
            "Do not approve release readiness.",
        ):
@@ -55,6 +55,8 @@ class PhysicsEvaluationAgentConfigTests(unittest.TestCase):
        instructions = AGENT_PATH.read_text(encoding="utf-8")

        for required_text in (
+            "HDF5",
+            "deterministic CSV views",
            "Input Evidence",
            "Physics Checks",
            "Failure Classification",
@@ -30,7 +30,7 @@ class ReferenceModelAgentConfigTests(unittest.TestCase):
            "Do not implement parsers.",
            "Do not design C++ APIs",
            "Do not run Abaqus, Nastran, or any reference solver.",
-            "Do not generate reference CSVs.",
+            "Do not generate reference HDF5 files or deterministic CSV views.",
            "Do not compare solver results.",
            "Do not approve release readiness.",
        ):
@@ -44,10 +44,11 @@ class ReferenceModelAgentConfigTests(unittest.TestCase):
            "references/<feature-id>/<model-id>/",
            "model.inp",
            "metadata.json",
-            "displacements.csv",
-            "reactions.csv",
-            "element_forces.csv",
-            "stresses.csv",
+            "reference.h5",
+            "csv/displacements.csv",
+            "csv/reactions.csv",
+            "csv/element_forces.csv",
+            "csv/stresses.csv",
        ):
            self.assertIn(required_text, instructions)

@@ -60,7 +61,7 @@ class ReferenceModelAgentConfigTests(unittest.TestCase):
            "Abaqus Input Requirements",
            "Artifact Bundle Contract",
            "Metadata JSON Contract",
-            "Reference CSV Requirements",
+            "Reference HDF5 and CSV View Requirements",
            "Coverage Matrix",
            "Downstream Handoff",
        ):
@@ -75,7 +76,7 @@ class ReferenceModelAgentConfigTests(unittest.TestCase):
            "Abaqus Input Requirements",
            "Artifact Bundle Contract",
            "Metadata JSON Contract",
-            "Reference CSV Requirements",
+            "Reference HDF5 and CSV View Requirements",
            "Coverage Matrix",
            "Downstream Handoff",
            "references/<feature-id>/<model-id>/",
@@ -17,7 +17,8 @@ class ReferenceVerificationAgentConfigTests(unittest.TestCase):
        data = tomllib.loads(AGENT_PATH.read_text(encoding="utf-8"))

        self.assertEqual(data["name"], "reference-verification-agent")
-        self.assertIn("stored Abaqus reference CSV artifacts", data["description"])
+        self.assertIn("HDF5", data["description"])
+        self.assertIn("deterministic CSV views", data["description"])
        self.assertEqual(data["sandbox_mode"], "workspace-write")
        self.assertEqual(data["model_reasoning_effort"], "extra high")
        self.assertIn("developer_instructions", data)
@@ -30,7 +31,7 @@ class ReferenceVerificationAgentConfigTests(unittest.TestCase):
            "Do not edit tests.",
            "Do not edit CMake.",
            "Do not run Abaqus, Nastran, or any reference solver.",
-            "Do not generate reference CSVs.",
+            "Do not generate reference HDF5 files or deterministic CSV views.",
            "Do not approve release readiness.",
            "Do not change tolerance policies.",
        ):
@@ -40,10 +41,12 @@ class ReferenceVerificationAgentConfigTests(unittest.TestCase):
        instructions = AGENT_PATH.read_text(encoding="utf-8")

        for required_text in (
-            "displacements.csv",
-            "reactions.csv",
-            "element_forces.csv",
-            "stresses.csv",
+            "results.h5",
+            "reference.h5",
+            "csv/displacements.csv",
+            "csv/reactions.csv",
+            "csv/element_forces.csv",
+            "csv/stresses.csv",
            "metadata.json",
            "references/<feature-id>/<model-id>/",
        ):
@@ -36,7 +36,7 @@ class ReleaseAgentConfigTests(unittest.TestCase):
            "Do not change reference artifacts",
            "Do not change tolerance policies",
            "Do not run Abaqus, Nastran, or any reference solver.",
-            "Do not generate reference CSVs.",
+            "Do not generate reference HDF5 files or deterministic CSV views.",
            "Do not override failed or missing upstream gates.",
        ):
            self.assertIn(required_text, instructions)
@@ -29,7 +29,7 @@ class RequirementAgentConfigTests(unittest.TestCase):
            "Do not implement code.",
            "Do not write finite element formulations.",
            "Do not run Abaqus, Nastran, or any reference solver.",
-            "Do not create reference CSV outputs.",
+            "Do not create reference HDF5 outputs or deterministic CSV views.",
            "Requirement Verification Matrix",
            "docs/SOLVER_AGENT_DESIGN.md",
            "references/<feature>",
@@ -29,7 +29,7 @@ class ResearchAgentConfigTests(unittest.TestCase):
            "Do not implement code.",
            "Do not finalize FEM formulations.",
            "Do not run Abaqus, Nastran, or any reference solver.",
-            "Do not generate reference CSVs.",
+            "Do not generate reference HDF5 files or deterministic CSV views.",
            "docs/SOLVER_AGENT_DESIGN.md",
            "docs/requirements/<feature-id>.md",
            "Separate verified facts from inference.",