From bcc756a4c2747e44621c2b9e85c5aa46e6a06360 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=EA=B9=80=EA=B2=BD=EC=A2=85?= <kyungjong74@gmail.com>
Date: Tue, 2 Jun 2026 16:58:56 +0900
Subject: [PATCH] add skills

---
 .../skills/fesa-feature-definition/SKILL.md   | 112 +++++++++++++
 .codex/skills/fesa-fem-specification/SKILL.md | 156 ++++++++++++++++++
 AGENTS.md                                     |   1 +
 .../test_build_test_executor_agent_config.py  |   2 +-
 scripts/test_coordinator_agent_config.py      |   2 +-
 scripts/test_correction_agent_config.py       |   2 +-
 scripts/test_fesa_feature_definition_skill.py |  92 +++++++++++
 scripts/test_fesa_fem_specification_skill.py  | 124 ++++++++++++++
 scripts/test_formulation_agent_config.py      |   2 +-
 scripts/test_implementation_agent_config.py   |   2 +-
 ...st_implementation_planning_agent_config.py |   2 +-
 scripts/test_io_definition_agent_config.py    |   2 +-
 scripts/test_numerical_review_agent_config.py |   2 +-
 .../test_physics_evaluation_agent_config.py   |   2 +-
 scripts/test_reference_model_agent_config.py  |   2 +-
 ...est_reference_verification_agent_config.py |   2 +-
 scripts/test_release_agent_config.py          |   2 +-
 scripts/test_requirement_agent_config.py      |   2 +-
 scripts/test_research_agent_config.py         |   2 +-
 19 files changed, 499 insertions(+), 14 deletions(-)
 create mode 100644 .codex/skills/fesa-feature-definition/SKILL.md
 create mode 100644 .codex/skills/fesa-fem-specification/SKILL.md
 create mode 100644 scripts/test_fesa_feature_definition_skill.py
 create mode 100644 scripts/test_fesa_fem_specification_skill.py
diff --git a/.codex/skills/fesa-feature-definition/SKILL.md b/.codex/skills/fesa-feature-definition/SKILL.md
new file mode 100644
index 0000000..fec8aff
--- /dev/null
+++ b/.codex/skills/fesa-feature-definition/SKILL.md
@@ -0,0 +1,112 @@
+---
+name: fesa-feature-definition
+description: Use when defining or reviewing FESA solver feature requests, requirements, research questions, acceptance criteria, verification matrices, tolerance needs, or downstream handoffs before FEM formulation, I/O definition, reference modeling, implementation, or release.
+---
+
+# FESA Feature Definition
+
+## Overview
+
+Use this skill to turn a FESA solver feature request into a verifiable Feature Definition Packet. Define what the solver feature must do, how it will be verified, what research is needed, and what downstream agents need as input.
+
+This skill is shared by Requirement Agent, Research Agent, Coordinator Agent, and Release Agent. It is not a formulation, implementation, reference generation, or release approval workflow.
+
+## Inputs
+
+Read the smallest useful set of these inputs before drafting or reviewing the packet:
+
+- `docs/SOLVER_SKILL_DESIGN.md`
+- `docs/SOLVER_AGENT_DESIGN.md`
+- `AGENTS.md`
+- User feature request or Coordinator handoff
+- Existing `docs/requirements/<feature-id>.md`
+- Existing `docs/research/<feature-id>-research.md`
+- Relevant coordination or release reports when reviewing traceability
+
+## Workflow
+
+1. Feature Intake: assign a stable `feature_id`, summarize the requested capability, and separate user-visible behavior from implementation detail.
+2. Solver Context: classify analysis type, element family, DOFs, material model boundary, loads, boundary conditions, units, coordinates, and required result quantities.
+3. Requirement Drafting: write singular `shall` requirements. Keep each requirement measurable, necessary, unambiguous, feasible, and traceable.
+4. Verification Planning: create a Requirement Verification Matrix that maps each `must` requirement to a verification method, acceptance criterion, compared quantity, artifact need, and tolerance decision.
+5. Research Framing: list Research Questions for missing theory, benchmark values, official solver manual details, source quality, or applicability limits.
+6. Quality Check: mark unverifiable, ambiguous, or missing decisions as `needs-user-decision`; do not invent values.
+7. Downstream Handoff: package only the facts needed by Research, Formulation, I/O Definition, Reference Model, Implementation Planning, Release, or Coordinator agents.
+
+## Output Contract
+
+The primary output is a `Feature Definition Packet`. The default save candidate is `docs/requirements/<feature-id>.md`; Research Agent may later create `docs/research/<feature-id>-research.md`.
+
+Include these sections:
+
+- Metadata: `feature_id`, `title`, `status`, `owner_agent`, `date`, source request
+- Feature Summary: purpose and expected solver behavior
+- Included Scope: analysis, element, material, load, boundary, I/O, and result scope
+- Excluded Scope: explicitly deferred capabilities
+- Solver Context: units, coordinate system, DOFs, sign conventions, output quantities
+- Requirement Records: one record per `shall` requirement
+- Requirement Verification Matrix: requirement id, method, acceptance criterion, artifact, tolerance
+- Research Questions: source gaps and benchmark questions for Research Agent
+- Reference Artifact Needs: expected `model.inp`, `metadata.json`, and required CSVs
+- Tolerance Decisions: known tolerance policies and unresolved tolerance decisions
+- Open Issues: `needs-user-decision`, `needs-research`, or `needs-reference-artifacts`
+- Downstream Handoff: target agent, required inputs, expected output, stop condition
+
+Requirement record format:
+
+```yaml
+id: FESA-REQ-<FEATURE>-###
+statement: "The FESA solver shall ..."
+category: functional | physics | numerical | input | output | verification | constraint
+rationale: "<why this is needed>"
+source: user | docs | standard | benchmark | derived
+priority: must | should | could
+verification_method: test | analysis | inspection | demonstration | reference-comparison
+acceptance_criteria: "<measurable pass/fail rule>"
+tolerance: "<absolute/relative/norm tolerance or N/A with reason>"
+trace_to:
+  parent_need: "<need id or statement>"
+  downstream_agents: ["Research Agent", "Formulation Agent"]
+status: draft | needs-user-decision | approved
+```
+
+## Verification Matrix Rules
+
+- Use `test` for deterministic unit, parser, or integration behavior.
+- Use `analysis` for checks proven by calculation, dimensional reasoning, or theoretical derivation.
+- Use `inspection` for static documents, schemas, files, and artifact presence.
+- Use `demonstration` only when pass/fail can be observed without reference comparison.
+- Use `reference-comparison` for stored reference artifact comparisons against displacements, reactions, element internal forces, stresses, and optional strain, energy, or residual quantities.
+- If a tolerance is missing, write the tolerance need as an open issue instead of choosing one.
+- If a required CSV or `metadata.json` is missing, do not mark the feature definition ready for downstream implementation.
+
+## Boundaries
+
+- Do not finalize FEM formulations.
+- Do not write weak forms, shape functions, element matrices, or C++ API decisions as approved requirements.
+- Do not implement C++ code.
+- Do not create implementation plans beyond downstream handoff notes.
+- Do not run Abaqus, Nastran, or any reference solver.
+- Do not generate reference CSVs.
+- Do not change stored reference artifacts or tolerance policies.
+- Do not approve release readiness.
+
+## Quality Gate
+
+Before marking the packet `approved` or ready for a downstream agent, verify:
+
+- Included and excluded scope are explicit.
+- Every `must` requirement has a verification method and measurable acceptance criteria.
+- Numerical requirements include units, coordinate system, quantity, and tolerance status.
+- Reference-comparison requirements name the physical quantities and required artifacts.
+- Phrases such as "same as Abaqus", "accurate", or "fast" are converted into measurable criteria or open issues.
+- Implementation details are separated from requirements and moved to downstream handoff.
+- Open decisions are marked `needs-user-decision` or assigned to a downstream agent.
+
+## Common Mistakes
+
+- Treating "Abaqus-compatible" as a requirement without a supported keyword subset and verification evidence.
+- Turning preferred implementation details into `shall` requirements.
+- Advancing to formulation when tolerance, unit, coordinate, or output-location decisions are absent.
+- Treating a reference artifact request as permission to run Abaqus or create CSVs.
+- Declaring release readiness from requirement quality alone.
diff --git a/.codex/skills/fesa-fem-specification/SKILL.md b/.codex/skills/fesa-fem-specification/SKILL.md
new file mode 100644
index 0000000..894a0a5
--- /dev/null
+++ b/.codex/skills/fesa-fem-specification/SKILL.md
@@ -0,0 +1,156 @@
+---
+name: fesa-fem-specification
+description: Use when drafting or reviewing FESA FEM formulations, numerical review criteria, Abaqus .inp keyword subsets, internal model mappings, result CSV schemas, output recovery, numerical risks, or implementation-planning handoffs before C++ implementation or reference validation.
+---
+
+# FESA FEM Specification
+
+## Overview
+
+Use this skill to convert approved FESA requirements and research briefs into an implementation-ready FEM specification package: mathematical formulation, independent numerical review criteria, and Abaqus `.inp`/CSV I/O contract.
+
+This skill is shared by Formulation Agent, Numerical Review Agent, I/O Definition Agent, and Implementation Planning Agent. It prepares specifications only; implementation and reference validation are handled by other skills.
+
+## Inputs
+
+Read the smallest useful set of these inputs before drafting or reviewing a specification:
+
+- `docs/SOLVER_SKILL_DESIGN.md`
+- `docs/SOLVER_AGENT_DESIGN.md`
+- `AGENTS.md`
+- `docs/requirements/<feature-id>.md`
+- `docs/research/<feature-id>-research.md`
+- Existing `docs/formulations/<feature-id>-formulation.md`
+- Existing `docs/numerical-reviews/<feature-id>-review.md`
+- Existing `docs/io-definitions/<feature-id>-io.md`
+
+## Workflow
+
+1. INPUT CHECK: verify that requirements, research sources, analysis type, element type, material scope, output quantities, units, coordinate system, and tolerance status are available. Mark missing decisions as `needs-user-decision` or `needs-research`.
+2. FORMULATION SPEC: draft the math-level contract: strong form, weak/variational form, discretization, shape functions, DOFs, kinematics, constitutive contract, element residual/internal force, stiffness/tangent, mapping/Jacobian, numerical integration, and output recovery.
+3. NUMERICAL REVIEW CHECK: review dimensions, signs, DOF ordering, coordinate transforms, matrix/vector sizes, integration weights, tangent consistency, output locations, rigid body modes, patch test readiness, symmetry, positive definiteness, hourglass, locking, distortion, singular Jacobian, conditioning, and convergence expectations.
+4. I/O CONTRACT: define the feature-specific Abaqus `.inp` supported keyword subset, unsupported/ignored/error policy, model data/history data mapping, internal semantic model mapping, output request mapping, and result CSV schema.
+5. HANDOFF: pass math-level pseudocode, parser acceptance cases, CSV writer tests, numerical risk tests, and open issues to Implementation Planning Agent without prescribing C++ structure.
+
+## Output Contract
+
+Create or review one or more of these documents:
+
+- `docs/formulations/<feature-id>-formulation.md`
+- `docs/numerical-reviews/<feature-id>-review.md`
+- `docs/io-definitions/<feature-id>-io.md`
+
+Keep documents in Korean Markdown. Keep FEM symbols, Abaqus keywords, status values, schema keys, and requirement IDs in English.
+
+## FORMULATION SPEC Checklist
+
+Include these formulation sections when the feature requires a formulation document:
+
+- Metadata: `feature_id`, source requirement, source research, `status`, owner agent, date.
+- Scope and Assumptions: analysis type, element type, small/large deformation, linear/nonlinear, material model boundary, coordinate system, units.
+- Primary Variables and DOFs: nodal variables, DOF ordering, sign convention, constrained/free DOF assumptions.
+- Strong Form and Boundary Conditions: governing equation, Dirichlet boundary, Neumann boundary, natural boundary terms.
+- Weak or Variational Form: test functions, integration by parts, internal virtual work, external virtual work.
+- Discretization: interpolation, shape functions, nodal layout, partition of unity, Kronecker delta.
+- Kinematics: strain-displacement relation, `B` matrix or kinematic operator, deformation gradient or strain measure when needed.
+- Constitutive Contract: elasticity matrix or stress-update assumptions, material state variables, constraints; never C++ APIs.
+- Element Equations: internal force or residual, external force, stiffness or tangent matrix, mass/damping only when required, vector/matrix dimensions.
+- Mapping and Numerical Integration: reference coordinates, isoparametric mapping, Jacobian, determinant validity, derivative transform, Gauss points, weights, full/reduced/selective/analytical integration policy.
+- Output Recovery: displacement, reaction, element force, strain, stress, output location, nodal/element/integration-point/centroidal/nodal extrapolation policy.
+- Algorithm Pseudocode: math-level element routine and assembly flow only.
+- Numerical Risks: rigid body modes, patch test, symmetry, positive definiteness, hourglass, shear locking, volumetric locking, distortion, singular Jacobian, conditioning, convergence risk.
+
+## NUMERICAL REVIEW CHECK Rules
+
+When reviewing a formulation, lead with findings and required revisions:
+
+- Treat confirmed defects, numerical risks, open questions, and downstream test recommendations as separate categories.
+- Check dimensions of equations, vectors, matrices, integration terms, residuals, and stiffness/tangent matrices.
+- Check signs for loads, reactions, stresses, internal force, residual, and element force output.
+- Check coordinate transforms, local/global conventions, output locations, and component naming.
+- Check Jacobian rules, determinant validity, derivative transform, distortion policy, integration weights, and Gauss point counts.
+- Check whether patch tests, rigid body mode checks, symmetry checks, positive definiteness expectations, locking/hourglass risks, and conditioning risks are documented.
+- Use `pass-for-implementation-planning` only when the specification is complete enough for implementation planning; this is not release approval.
+
+## I/O CONTRACT Checklist
+
+Include these I/O sections when the feature requires an Abaqus input or result CSV contract:
+
+- Abaqus Input Scope: input format is Abaqus `.inp`; define supported documentation source/version and state that FESA supports only this feature's keyword subset.
+- Syntax Policy: case-insensitivity, comma-separated keyword/data lines, comments beginning with `**`, continuation, includes, labels, line-length limits, ASCII assumptions, empty data fields.
+- Model Data Mapping: nodes, elements, node sets, element sets, material, section, coordinates, units.
+- History Data Mapping: steps, analysis procedure keyword, boundary conditions, loads, output requests.
+- Internal Model Contract: semantic fields for node label, element label, element type, connectivity, set membership, material, section, boundary condition, load, step, output request; never C++ classes or function signatures.
+- Output and CSV Schemas: column names, ID fields, component naming, coordinate system, units, step/frame identity, and quantity location.
+- Validation Rules: required fields, duplicate labels, missing references, unsupported keywords, set expansion, coordinate conventions, output quantity availability.
+
+Default Abaqus keyword checklist:
+
+- `*HEADING`
+- `*INCLUDE`
+- `*NODE`
+- `*NSET`
+- `*ELEMENT`
+- `*ELSET`
+- `*MATERIAL`
+- `*ELASTIC`
+- feature-specific section keyword such as `*SOLID SECTION`, `*BEAM SECTION`, or `*SHELL SECTION`
+- `*BOUNDARY`
+- `*CLOAD`
+- `*DLOAD`
+- `*STEP`
+- feature-specific procedure keyword such as `*STATIC`
+- `*OUTPUT`
+- `*NODE OUTPUT`
+- `*ELEMENT OUTPUT`
+
+Default result CSV checklist:
+
+- `displacements.csv`: step/frame, node id, displacement components, coordinate system, units.
+- `reactions.csv`: step/frame, constrained node id, reaction force components, sign convention, units.
+- `element_forces.csv`: step/frame, element id, location, component, value, sign convention, units.
+- `stresses.csv`: step/frame, element id, integration point or recovery location, component, value, coordinate system, units.
+- Optional `strains.csv` and `energy_or_residual.csv` only when upstream documents define schema and acceptance need.
+
+## Handoff
+
+Prepare downstream handoff without deciding implementation structure:
+
+- Numerical Review Agent: derivations, assumptions, numerical risks, dimensions, open issues.
+- I/O Definition Agent: required inputs, outputs, units, coordinates, output locations, Abaqus keyword needs.
+- Reference Model Agent: benchmarkable quantities, patch test needs, expected invariants, singular or invalid-input cases.
+- Implementation Planning Agent: math-level pseudocode, parser acceptance cases, CSV writer tests, numerical risk tests, acceptance-relevant quantities.
+
+## Boundaries
+
+- Do not implement C++ code.
+- Do not design C++ APIs.
+- Do not decide C++ file ownership or storage layout.
+- Do not implement parsers.
+- Do not run Abaqus, Nastran, or any reference solver.
+- Do not generate reference CSVs.
+- Do not create or modify reference artifacts.
+- Do not change tolerance policies.
+- Do not perform reference verification or physics validation.
+- Do not approve release readiness.
+
+## Quality Gate
+
+Before marking a specification ready for implementation planning:
+
+- Requirements and research sources are traceable, or missing source evidence is marked `needs-research`.
+- Strong Form, Weak or Variational Form, Discretization, Kinematics, Element Equations, Mapping and Numerical Integration, Output Recovery, and Numerical Risks are complete enough for the feature scope.
+- Shape functions include partition of unity and Kronecker delta expectations when applicable.
+- Mapping includes reference coordinates, Jacobian, determinant validity, and derivative transform.
+- Numerical review checks cover dimensions, signs, DOF ordering, coordinate transforms, integration weights, output locations, rigid body modes, patch tests, symmetry, positive definiteness, hourglass, locking, singular Jacobian, and conditioning.
+- Abaqus Input Scope, Model Data Mapping, History Data Mapping, Internal Model Contract, Output and CSV Schemas, and Validation Rules are documented when I/O is in scope.
+- Unsupported Abaqus keyword behavior is classified as `unsupported`, `ignored-with-warning`, or `requires-user-decision`.
+- No C++ API, parser implementation, reference value, or tolerance policy is invented.
+
+## Common Mistakes
+
+- Advancing a formulation without dimensions, signs, coordinate system, or output location.
+- Treating a feature as fully Abaqus-compatible when only a keyword subset is defined.
+- Hiding uncertain derivations inside implementation pseudocode instead of recording open issues.
+- Defining CSV columns without units, coordinate system, step/frame identity, or quantity location.
+- Treating `pass-for-implementation-planning` as solver verification or release readiness.
diff --git a/AGENTS.md b/AGENTS.md
index 2abc792..ada05a7 100644
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -11,6 +11,7 @@
 - CRITICAL: C++ 빌드는 CMake/MSVC/x64/Debug 기준으로 검증한다.
 - CRITICAL: 새 기능 또는 동작 변경은 테스트를 먼저 작성하고 실패를 확인한 뒤 구현한다.
 - CRITICAL: Abaqus reference artifact나 solver 코드 복원은 명시적으로 요청된 phase에서만 수행한다.
+- Codex custom agent의 `model_reasoning_effort` 기본값은 `extra high`로 둔다.
 - Harness runner는 `scripts/execute.py`에 둔다.
 - Codex hook 정책은 `.codex/hooks/`에 둔다.
 - Harness planning/review instructions are stored in `.codex/skills/`.
diff --git a/scripts/test_build_test_executor_agent_config.py b/scripts/test_build_test_executor_agent_config.py
index 855a9b7..ea08811 100644
--- a/scripts/test_build_test_executor_agent_config.py
+++ b/scripts/test_build_test_executor_agent_config.py
@@ -19,7 +19,7 @@ class BuildTestExecutorAgentConfigTests(unittest.TestCase):
         self.assertEqual(data["name"], "build-test-executor-agent")
         self.assertIn("C++/MSVC/CMake/CTest validation", data["description"])
         self.assertEqual(data["sandbox_mode"], "workspace-write")
-        self.assertEqual(data["model_reasoning_effort"], "high")
+        self.assertEqual(data["model_reasoning_effort"], "extra high")
         self.assertIn("developer_instructions", data)
 
     def test_build_test_executor_agent_instructions_enforce_boundaries(self):
diff --git a/scripts/test_coordinator_agent_config.py b/scripts/test_coordinator_agent_config.py
index e2d30ad..f0b4c3b 100644
--- a/scripts/test_coordinator_agent_config.py
+++ b/scripts/test_coordinator_agent_config.py
@@ -19,7 +19,7 @@ class CoordinatorAgentConfigTests(unittest.TestCase):
         self.assertEqual(data["name"], "coordinator-agent")
         self.assertIn("workflow state", data["description"])
         self.assertEqual(data["sandbox_mode"], "workspace-write")
-        self.assertEqual(data["model_reasoning_effort"], "high")
+        self.assertEqual(data["model_reasoning_effort"], "extra high")
         self.assertIn("developer_instructions", data)
 
     def test_coordinator_agent_instructions_enforce_boundaries(self):
diff --git a/scripts/test_correction_agent_config.py b/scripts/test_correction_agent_config.py
index 1d377a3..7c7802e 100644
--- a/scripts/test_correction_agent_config.py
+++ b/scripts/test_correction_agent_config.py
@@ -19,7 +19,7 @@ class CorrectionAgentConfigTests(unittest.TestCase):
         self.assertEqual(data["name"], "correction-agent")
         self.assertIn("C++/MSVC/CMake/CTest fixes", data["description"])
         self.assertEqual(data["sandbox_mode"], "workspace-write")
-        self.assertEqual(data["model_reasoning_effort"], "high")
+        self.assertEqual(data["model_reasoning_effort"], "extra high")
         self.assertIn("developer_instructions", data)
 
     def test_correction_agent_instructions_enforce_boundaries(self):
diff --git a/scripts/test_fesa_feature_definition_skill.py b/scripts/test_fesa_feature_definition_skill.py
new file mode 100644
index 0000000..250b98a
--- /dev/null
+++ b/scripts/test_fesa_feature_definition_skill.py
@@ -0,0 +1,92 @@
+import unittest
+from pathlib import Path
+
+
+ROOT = Path(__file__).resolve().parents[1]
+SKILL_PATH = ROOT / ".codex" / "skills" / "fesa-feature-definition" / "SKILL.md"
+
+
+def read_skill():
+    return SKILL_PATH.read_text(encoding="utf-8")
+
+
+def parse_frontmatter(text):
+    lines = text.splitlines()
+    if not lines or lines[0] != "---":
+        raise AssertionError("SKILL.md must start with YAML frontmatter")
+
+    fields = {}
+    for line in lines[1:]:
+        if line == "---":
+            return fields
+        key, sep, value = line.partition(":")
+        if not sep:
+            raise AssertionError(f"Invalid frontmatter line: {line}")
+        fields[key.strip()] = value.strip()
+
+    raise AssertionError("SKILL.md frontmatter must be closed")
+
+
+class FesaFeatureDefinitionSkillTests(unittest.TestCase):
+    def test_skill_file_exists_with_required_frontmatter(self):
+        self.assertTrue(SKILL_PATH.exists(), "fesa-feature-definition SKILL.md is missing")
+
+        fields = parse_frontmatter(read_skill())
+
+        self.assertEqual(set(fields), {"name", "description"})
+        self.assertEqual(fields["name"], "fesa-feature-definition")
+        self.assertIn("Use when", fields["description"])
+        self.assertIn("FESA solver feature requests", fields["description"])
+        self.assertIn("requirements", fields["description"])
+        self.assertIn("research questions", fields["description"])
+        self.assertIn("acceptance criteria", fields["description"])
+        self.assertIn("verification matrices", fields["description"])
+
+    def test_skill_body_defines_core_workflow_and_inputs(self):
+        body = read_skill()
+
+        for required_text in (
+            "## Inputs",
+            "## Workflow",
+            "## Output Contract",
+            "## Boundaries",
+            "## Quality Gate",
+            "docs/SOLVER_SKILL_DESIGN.md",
+            "docs/SOLVER_AGENT_DESIGN.md",
+            "AGENTS.md",
+            "docs/requirements/<feature-id>.md",
+            "docs/research/<feature-id>-research.md",
+        ):
+            self.assertIn(required_text, body)
+
+    def test_skill_body_defines_requirement_and_verification_contract(self):
+        body = read_skill()
+
+        for required_text in (
+            "Feature Definition Packet",
+            "shall",
+            "Requirement Verification Matrix",
+            "Research Questions",
+            "Reference Artifact Needs",
+            "Tolerance Decisions",
+            "Downstream Handoff",
+            "FESA-REQ-<FEATURE>-###",
+            "needs-user-decision",
+        ):
+            self.assertIn(required_text, body)
+
+    def test_skill_body_enforces_scope_boundaries(self):
+        body = read_skill()
+
+        for required_text in (
+            "Do not finalize FEM formulations.",
+            "Do not implement C++ code.",
+            "Do not run Abaqus, Nastran, or any reference solver.",
+            "Do not generate reference CSVs.",
+            "Do not approve release readiness.",
+        ):
+            self.assertIn(required_text, body)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/scripts/test_fesa_fem_specification_skill.py b/scripts/test_fesa_fem_specification_skill.py
new file mode 100644
index 0000000..724d601
--- /dev/null
+++ b/scripts/test_fesa_fem_specification_skill.py
@@ -0,0 +1,124 @@
+import unittest
+from pathlib import Path
+
+
+ROOT = Path(__file__).resolve().parents[1]
+SKILL_PATH = ROOT / ".codex" / "skills" / "fesa-fem-specification" / "SKILL.md"
+
+
+def read_skill():
+    return SKILL_PATH.read_text(encoding="utf-8")
+
+
+def parse_frontmatter(text):
+    lines = text.splitlines()
+    if not lines or lines[0] != "---":
+        raise AssertionError("SKILL.md must start with YAML frontmatter")
+
+    fields = {}
+    for line in lines[1:]:
+        if line == "---":
+            return fields
+        key, sep, value = line.partition(":")
+        if not sep:
+            raise AssertionError(f"Invalid frontmatter line: {line}")
+        fields[key.strip()] = value.strip()
+
+    raise AssertionError("SKILL.md frontmatter must be closed")
+
+
+class FesaFemSpecificationSkillTests(unittest.TestCase):
+    def test_skill_file_exists_with_required_frontmatter(self):
+        self.assertTrue(SKILL_PATH.exists(), "fesa-fem-specification SKILL.md is missing")
+
+        fields = parse_frontmatter(read_skill())
+
+        self.assertEqual(set(fields), {"name", "description"})
+        self.assertEqual(fields["name"], "fesa-fem-specification")
+        self.assertIn("Use when", fields["description"])
+        self.assertIn("FESA FEM formulations", fields["description"])
+        self.assertIn("numerical review", fields["description"])
+        self.assertIn("Abaqus .inp", fields["description"])
+        self.assertIn("CSV schemas", fields["description"])
+        self.assertIn("implementation-planning handoffs", fields["description"])
+
+    def test_skill_body_defines_workflow_and_inputs(self):
+        body = read_skill()
+
+        for required_text in (
+            "## Inputs",
+            "## Workflow",
+            "INPUT CHECK",
+            "FORMULATION SPEC",
+            "NUMERICAL REVIEW CHECK",
+            "I/O CONTRACT",
+            "HANDOFF",
+            "## Quality Gate",
+            "docs/SOLVER_SKILL_DESIGN.md",
+            "docs/SOLVER_AGENT_DESIGN.md",
+            "AGENTS.md",
+            "docs/requirements/<feature-id>.md",
+            "docs/research/<feature-id>-research.md",
+        ):
+            self.assertIn(required_text, body)
+
+    def test_skill_body_defines_formulation_contract(self):
+        body = read_skill()
+
+        for required_text in (
+            "Strong Form",
+            "Weak or Variational Form",
+            "Discretization",
+            "Kinematics",
+            "Element Equations",
+            "Mapping and Numerical Integration",
+            "Output Recovery",
+            "Numerical Risks",
+            "partition of unity",
+            "Kronecker delta",
+            "Jacobian",
+            "derivative transform",
+        ):
+            self.assertIn(required_text, body)
+
+    def test_skill_body_defines_io_contract(self):
+        body = read_skill()
+
+        for required_text in (
+            "Abaqus Input Scope",
+            "Model Data Mapping",
+            "History Data Mapping",
+            "Internal Model Contract",
+            "Output and CSV Schemas",
+            "*NODE",
+            "*ELEMENT",
+            "*MATERIAL",
+            "*ELASTIC",
+            "*BOUNDARY",
+            "*STEP",
+            "*OUTPUT",
+            "*NODE OUTPUT",
+            "*ELEMENT OUTPUT",
+            "displacements.csv",
+            "reactions.csv",
+            "element_forces.csv",
+            "stresses.csv",
+        ):
+            self.assertIn(required_text, body)
+
+    def test_skill_body_enforces_scope_boundaries(self):
+        body = read_skill()
+
+        for required_text in (
+            "Do not implement C++ code.",
+            "Do not design C++ APIs.",
+            "Do not implement parsers.",
+            "Do not run Abaqus, Nastran, or any reference solver.",
+            "Do not generate reference CSVs.",
+            "Do not approve release readiness.",
+        ):
+            self.assertIn(required_text, body)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/scripts/test_formulation_agent_config.py b/scripts/test_formulation_agent_config.py
index c1221e6..aad6ee3 100644
--- a/scripts/test_formulation_agent_config.py
+++ b/scripts/test_formulation_agent_config.py
@@ -19,7 +19,7 @@ class FormulationAgentConfigTests(unittest.TestCase):
         self.assertEqual(data["name"], "formulation-agent")
         self.assertIn("FEM formulation", data["description"])
         self.assertEqual(data["sandbox_mode"], "read-only")
-        self.assertEqual(data["model_reasoning_effort"], "high")
+        self.assertEqual(data["model_reasoning_effort"], "extra high")
         self.assertIn("developer_instructions", data)
 
     def test_formulation_agent_instructions_enforce_boundaries(self):
diff --git a/scripts/test_implementation_agent_config.py b/scripts/test_implementation_agent_config.py
index bfb8318..f35f52c 100644
--- a/scripts/test_implementation_agent_config.py
+++ b/scripts/test_implementation_agent_config.py
@@ -18,7 +18,7 @@ class ImplementationAgentConfigTests(unittest.TestCase):
         self.assertEqual(data["name"], "implementation-agent")
         self.assertIn("C++17/MSVC", data["description"])
         self.assertEqual(data["sandbox_mode"], "workspace-write")
-        self.assertEqual(data["model_reasoning_effort"], "high")
+        self.assertEqual(data["model_reasoning_effort"], "extra high")
         self.assertIn("developer_instructions", data)
 
     def test_implementation_agent_instructions_define_tdd_execution_contract(self):
diff --git a/scripts/test_implementation_planning_agent_config.py b/scripts/test_implementation_planning_agent_config.py
index c9291cb..4463da4 100644
--- a/scripts/test_implementation_planning_agent_config.py
+++ b/scripts/test_implementation_planning_agent_config.py
@@ -19,7 +19,7 @@ class ImplementationPlanningAgentConfigTests(unittest.TestCase):
         self.assertEqual(data["name"], "implementation-planning-agent")
         self.assertIn("TDD-first C++/MSVC implementation plans", data["description"])
         self.assertEqual(data["sandbox_mode"], "read-only")
-        self.assertEqual(data["model_reasoning_effort"], "high")
+        self.assertEqual(data["model_reasoning_effort"], "extra high")
         self.assertIn("developer_instructions", data)
 
     def test_implementation_planning_agent_instructions_enforce_boundaries(self):
diff --git a/scripts/test_io_definition_agent_config.py b/scripts/test_io_definition_agent_config.py
index 76f2375..b8d54c7 100644
--- a/scripts/test_io_definition_agent_config.py
+++ b/scripts/test_io_definition_agent_config.py
@@ -19,7 +19,7 @@ class IoDefinitionAgentConfigTests(unittest.TestCase):
         self.assertEqual(data["name"], "io-definition-agent")
         self.assertIn("Abaqus input-file subsets", data["description"])
         self.assertEqual(data["sandbox_mode"], "read-only")
-        self.assertEqual(data["model_reasoning_effort"], "high")
+        self.assertEqual(data["model_reasoning_effort"], "extra high")
         self.assertIn("developer_instructions", data)
 
     def test_io_definition_agent_instructions_enforce_boundaries(self):
diff --git a/scripts/test_numerical_review_agent_config.py b/scripts/test_numerical_review_agent_config.py
index 1c6aa80..109f807 100644
--- a/scripts/test_numerical_review_agent_config.py
+++ b/scripts/test_numerical_review_agent_config.py
@@ -19,7 +19,7 @@ class NumericalReviewAgentConfigTests(unittest.TestCase):
         self.assertEqual(data["name"], "numerical-review-agent")
         self.assertIn("numerical correctness", data["description"])
         self.assertEqual(data["sandbox_mode"], "read-only")
-        self.assertEqual(data["model_reasoning_effort"], "high")
+        self.assertEqual(data["model_reasoning_effort"], "extra high")
         self.assertIn("developer_instructions", data)
 
     def test_numerical_review_agent_instructions_enforce_boundaries(self):
diff --git a/scripts/test_physics_evaluation_agent_config.py b/scripts/test_physics_evaluation_agent_config.py
index 75c98d4..32ba58b 100644
--- a/scripts/test_physics_evaluation_agent_config.py
+++ b/scripts/test_physics_evaluation_agent_config.py
@@ -19,7 +19,7 @@ class PhysicsEvaluationAgentConfigTests(unittest.TestCase):
         self.assertEqual(data["name"], "physics-evaluation-agent")
         self.assertIn("physical plausibility", data["description"])
         self.assertEqual(data["sandbox_mode"], "workspace-write")
-        self.assertEqual(data["model_reasoning_effort"], "high")
+        self.assertEqual(data["model_reasoning_effort"], "extra high")
         self.assertIn("developer_instructions", data)
 
     def test_physics_evaluation_agent_instructions_enforce_boundaries(self):
diff --git a/scripts/test_reference_model_agent_config.py b/scripts/test_reference_model_agent_config.py
index 8395aed..abc643b 100644
--- a/scripts/test_reference_model_agent_config.py
+++ b/scripts/test_reference_model_agent_config.py
@@ -19,7 +19,7 @@ class ReferenceModelAgentConfigTests(unittest.TestCase):
         self.assertEqual(data["name"], "reference-model-agent")
         self.assertIn("reference model packages", data["description"])
         self.assertEqual(data["sandbox_mode"], "read-only")
-        self.assertEqual(data["model_reasoning_effort"], "high")
+        self.assertEqual(data["model_reasoning_effort"], "extra high")
         self.assertIn("developer_instructions", data)
 
     def test_reference_model_agent_instructions_enforce_boundaries(self):
diff --git a/scripts/test_reference_verification_agent_config.py b/scripts/test_reference_verification_agent_config.py
index 64404ea..26670a7 100644
--- a/scripts/test_reference_verification_agent_config.py
+++ b/scripts/test_reference_verification_agent_config.py
@@ -19,7 +19,7 @@ class ReferenceVerificationAgentConfigTests(unittest.TestCase):
         self.assertEqual(data["name"], "reference-verification-agent")
         self.assertIn("stored Abaqus reference CSV artifacts", data["description"])
         self.assertEqual(data["sandbox_mode"], "workspace-write")
-        self.assertEqual(data["model_reasoning_effort"], "high")
+        self.assertEqual(data["model_reasoning_effort"], "extra high")
         self.assertIn("developer_instructions", data)
 
     def test_reference_verification_agent_instructions_enforce_boundaries(self):
diff --git a/scripts/test_release_agent_config.py b/scripts/test_release_agent_config.py
index f059392..5181e48 100644
--- a/scripts/test_release_agent_config.py
+++ b/scripts/test_release_agent_config.py
@@ -19,7 +19,7 @@ class ReleaseAgentConfigTests(unittest.TestCase):
         self.assertEqual(data["name"], "release-agent")
         self.assertIn("release readiness", data["description"])
         self.assertEqual(data["sandbox_mode"], "workspace-write")
-        self.assertEqual(data["model_reasoning_effort"], "high")
+        self.assertEqual(data["model_reasoning_effort"], "extra high")
         self.assertIn("developer_instructions", data)
 
     def test_release_agent_instructions_enforce_boundaries(self):
diff --git a/scripts/test_requirement_agent_config.py b/scripts/test_requirement_agent_config.py
index ee8b9fe..78b526b 100644
--- a/scripts/test_requirement_agent_config.py
+++ b/scripts/test_requirement_agent_config.py
@@ -19,7 +19,7 @@ class RequirementAgentConfigTests(unittest.TestCase):
         self.assertEqual(data["name"], "requirement-agent")
         self.assertIn("verifiable requirements", data["description"])
         self.assertEqual(data["sandbox_mode"], "read-only")
-        self.assertEqual(data["model_reasoning_effort"], "high")
+        self.assertEqual(data["model_reasoning_effort"], "extra high")
         self.assertIn("developer_instructions", data)
 
     def test_requirement_agent_instructions_enforce_boundaries(self):
diff --git a/scripts/test_research_agent_config.py b/scripts/test_research_agent_config.py
index 7f592f8..173890e 100644
--- a/scripts/test_research_agent_config.py
+++ b/scripts/test_research_agent_config.py
@@ -19,7 +19,7 @@ class ResearchAgentConfigTests(unittest.TestCase):
         self.assertEqual(data["name"], "research-agent")
         self.assertIn("FEM theory", data["description"])
         self.assertEqual(data["sandbox_mode"], "read-only")
-        self.assertEqual(data["model_reasoning_effort"], "high")
+        self.assertEqual(data["model_reasoning_effort"], "extra high")
         self.assertIn("developer_instructions", data)
 
     def test_research_agent_instructions_enforce_boundaries(self):