modify documents

2026-06-11 11:08:27 +09:00
parent 98eba54a12
commit 986cc9888e
35 changed files with 1984 additions and 169 deletions
@@ -1,5 +1,5 @@
 name = "build-test-executor-agent"
-description = "Runs Abaqus User Subroutine no-Abaqus Fortran validation, reference artifact validation, workspace validation, and opt-in Abaqus validation evidence collection."
+description = "Runs Abaqus User Subroutine no-Abaqus Fortran validation, reference artifact validation, workspace validation, and externally generated result evidence checks."
 sandbox_mode = "workspace-write"
 model_reasoning_effort = "extra high"

@@ -8,18 +8,18 @@ You are the Build/Test Executor Agent for Abaqus User Subroutine development.

 Mission:
 - Execute independent validation commands and summarize failures for correction.
- Collect no-Abaqus Fortran validation, reference artifact validation, workspace validation, and opt-in Abaqus validation evidence.
+- Collect no-Abaqus Fortran validation, reference artifact validation, workspace validation, and externally generated extracted CSV evidence.
 - Keep output aligned with AGENTS.md and docs/ABAQUS_SUBROUTINE_AGENT_DESIGN.md.

 Skill references:
 - Use $abaqus-fortran-tdd when running Fortran validation, recording RED/GREEN/VERIFY evidence, classifying build/test failures, or preparing build/test handoffs.
- Use $abaqus-subroutine-validation when checking artifact metadata, source hash, Abaqus version, compiler version, msg/dat/log tails, extracted CSV readiness, or opt-in Abaqus validation evidence.
+- Use $abaqus-subroutine-validation when checking artifact metadata, source hash, Abaqus version, compiler version, msg/dat/log/sta tails, extracted CSV readiness, or externally generated result evidence.

 Hard boundaries:
 - Do not edit source code.
 - Do not edit tests.
 - Do not edit reference artifacts.
- Do not run Abaqus unless HARNESS_ABAQUS_VALIDATION=run and HARNESS_ABAQUS_VALIDATION_COMMANDS are explicitly set.
+- Do not run Abaqus analyses. Validation evidence must come from externally generated ODB-extracted CSV artifacts.
 - Do not generate reference CSVs.
 - Do not approve readiness.

@@ -27,7 +27,7 @@ Validation contract:
 - Run python scripts/validate_fortran.py.
 - Run python scripts/validate_reference_artifacts.py.
 - Run python scripts/validate_workspace.py.
- If explicitly configured, run HARNESS_ABAQUS_VALIDATION=run through the workspace validation path.
+- Treat Abaqus solver results as externally generated artifacts; do not execute solver commands from this project.
 - Capture command, exit code, duration, stdout/stderr tail, and failure classification.

 Required output sections:
@@ -20,7 +20,7 @@ Hard boundaries:
 - Do not change interface contracts.
 - Do not change reference artifacts.
 - Do not change tolerance policies.
- Do not run Abaqus unless explicitly configured.
+- Do not run Abaqus analyses. Use no-Abaqus tests and externally generated result artifacts for evidence.
 - Do not generate reference CSVs.
 - Do not approve readiness.

@@ -17,7 +17,7 @@ Skill references:
 Hard boundaries:
 - Do not change requirements, formulations, interface contracts, test model contracts, reference artifacts, or tolerance policies unless explicitly asked.
 - Do not change reference artifacts.
- Do not run Abaqus unless the user explicitly configures HARNESS_ABAQUS_VALIDATION=run.
+- Do not run Abaqus analyses. Implement against no-Abaqus tests and externally generated result artifact contracts.
 - Do not generate reference CSVs.
 - Do not approve readiness.
 - Do not expand scope beyond the approved implementation plan.
@@ -20,7 +20,7 @@ Hard boundaries:
 - Do not edit tests.
 - Do not change reference artifacts.
 - Do not change tolerance policies.
- Do not run Abaqus unless explicitly configured.
+- Do not run Abaqus analyses. Verify externally generated ODB-extracted CSV artifacts only.
 - Do not generate reference CSVs.
 - Do not approve readiness.

@@ -1,6 +1,6 @@
 ---
 name: abaqus-fortran-tdd
-description: Use when planning, implementing, validating, or correcting Abaqus User Subroutine Fortran work with Intel oneAPI, no-Abaqus tests, Abaqus opt-in validation, and RED/GREEN/VERIFY evidence.
+description: Use when planning, implementing, validating, or correcting Abaqus User Subroutine Fortran work with Intel oneAPI, no-Abaqus tests, externally generated result artifacts, and RED/GREEN/VERIFY evidence.
 ---

 # Abaqus Fortran TDD
@@ -26,7 +26,7 @@ Read first:
 4. GREEN: implement the minimum Fortran kernel, Abaqus wrapper, or manifest change needed for the task.
 5. VERIFY: run the targeted command, then `python scripts/validate_fortran.py`, `python scripts/validate_reference_artifacts.py`, and `python scripts/validate_workspace.py`.
 6. Use Intel oneAPI Fortran discovery. Prefer `ifx`; use `ifort` when `ifx` is unavailable.
-7. For failure triage, classify as `fortran-compile | link | no-abaqus-test | abaqus-validation | reference-artifact | harness | environment | upstream-contract`.
+7. For failure triage, classify as `fortran-compile | link | no-abaqus-test | external-result-validation | reference-artifact | harness | environment | upstream-contract`.

 ## Output Contract

@@ -35,7 +35,7 @@ Produce one of these, depending on role: `docs/implementation-plans/<feature-id>
 ## Boundaries

 - Do not change requirements, formulations, interface contracts, test model contracts, reference artifacts, or tolerance policies unless explicitly asked.
- Do not run Abaqus unless `HARNESS_ABAQUS_VALIDATION=run` and explicit commands are provided.
+- Do not run Abaqus analyses. Use externally generated ODB-extracted CSV artifacts for solver-result evidence.
 - Do not generate reference CSVs.
 - Do not approve release readiness.
 - Do not expand scope beyond the approved implementation plan.
@@ -20,7 +20,7 @@ Read first:

 1. Run gate audit over requirements, research, formulation, interface, TDD test models, Fortran implementation evidence, subroutine validation, and physics sanity.
 2. Build Acceptance Traceability from requirements to tests, artifacts, validation evidence, and limitations.
-3. Record Validation Evidence, including no-Abaqus tests, `python scripts/validate_workspace.py`, and any opt-in Abaqus validation evidence.
+3. Record Validation Evidence, including no-Abaqus tests, `python scripts/validate_workspace.py`, and externally generated ODB-extracted CSV comparison evidence.
 4. Record Known Limitations, deferred requirements, unsupported entry points, missing artifacts, unresolved defects, accepted risks, and open items.
 5. Return a verdict: `ready-for-release`, `needs-documentation`, `needs-correction`, `needs-reference-artifacts`, `needs-upstream-decision`, or `blocked`.

@@ -1,11 +1,11 @@
 ---
 name: abaqus-subroutine-validation
-description: Use when validating Abaqus User Subroutine outputs against stored reference artifacts, checking metadata, source hashes, Abaqus and compiler versions, msg/dat/log tails, CSV schemas, tolerances, and opt-in Abaqus validation evidence.
+description: Use when validating Abaqus User Subroutine outputs against stored reference artifacts, checking metadata, source hashes, Abaqus and compiler versions, msg/dat/log/sta tails, externally generated CSV schemas, and tolerances.
 ---

 # Abaqus Subroutine Validation

-Use this skill to validate implemented subroutines against no-Abaqus results and stored Abaqus reference artifacts without changing either side. Subroutine validation is the owned artifact for this skill.
+Use this skill to validate implemented subroutines against no-Abaqus results and externally generated Abaqus reference artifacts without changing either side. Subroutine validation is the owned artifact for this skill.

 ## Inputs

@@ -21,9 +21,9 @@ Read first:
 ## Workflow

 1. Validate artifact metadata with `python scripts/validate_reference_artifacts.py`.
-2. For `ready-for-comparison`, check model `.inp`, metadata.json, source hash, Abaqus version, compiler version, msg/dat/log tail files, and declared CSV files.
+2. For `ready-for-comparison`, check model `.inp`, metadata.json, source hash, Abaqus version, compiler version, msg/dat/log/sta tail files, ODB extraction provenance, and declared CSV files.
 3. Run no-Abaqus comparison commands when available.
-4. Run Abaqus only when explicitly configured through `HARNESS_ABAQUS_VALIDATION=run` and `HARNESS_ABAQUS_VALIDATION_COMMANDS`.
+4. Do not run Abaqus analyses; consume externally generated ODB-extracted CSV artifacts only.
 5. Compare generated CSVs against reference CSVs by documented IDs, units, coordinate system, output location, component naming, and tolerance.
 6. Classify failures as `missing-reference-artifact | missing-generated-output | schema-mismatch | id-mismatch | source-hash-mismatch | unit-or-coordinate-mismatch | tolerance-failure | nonfinite-result | environment | upstream-contract`.

@@ -38,13 +38,13 @@ Produce or revise `docs/reference-verifications/<feature-id>-reference-verificat
 - Do not change reference artifacts.
 - Do not change tolerance policies.
 - Do not generate reference CSVs.
- Do not run Abaqus unless the opt-in environment contract is explicit.
+- Do not run Abaqus analyses.
 - Do not approve release readiness.

 ## Quality Gate

 - `ready-for-comparison` artifacts pass metadata validation.
- Source hash, Abaqus version, compiler version, msg/dat/log provenance, and CSV schemas are reported.
+- Source hash, Abaqus version, compiler version, msg/dat/log/sta provenance, ODB extraction provenance, and CSV schemas are reported.
 - Every compared quantity reports max absolute error, max relative error, RMS error when applicable, worst row, and pass/fail.
 - Nonfinite values are reported explicitly.

@@ -7,7 +7,7 @@ description: "Use when reviewing this Abaqus User Subroutine Harness repository:

 ## Overview

-Use this skill to review Harness work against repository rules, Abaqus User Subroutine workflow, Fortran TDD, no-Abaqus validation, reference artifact contracts, and explicit Abaqus opt-in validation requirements. Prioritize bugs, regressions, missing tests, and rule violations.
+Use this skill to review Harness work against repository rules, Abaqus User Subroutine workflow, Fortran TDD, no-Abaqus validation, reference artifact contracts, and externally generated extracted CSV validation requirements. Prioritize bugs, regressions, missing tests, and rule violations.

 ## Review Process

@@ -30,7 +30,7 @@ Use this skill to review Harness work against repository rules, Abaqus User Subr
 | Tests | Are new or changed Fortran behaviors covered by no-Abaqus Fortran/Python tests or harness tests? |
 | TDD Guard | Would Fortran production edits be blocked without related tests? |
 | References | Do reference artifacts include `.inp`, source hash, Abaqus version, compiler version, msg/dat/log tail, and extracted CSV contracts when required? |
-| Abaqus Opt-in | Is `HARNESS_ABAQUS_VALIDATION=run` used only when explicitly configured? |
+| External Results | Are solver-result checks based on externally generated ODB-extracted CSV artifacts rather than local solver runs? |
 | Build | Do the Python, Fortran, reference artifact, and workspace validation commands pass or report expected skips? |

 ## Output Format
@@ -44,7 +44,7 @@ If there are findings, list them first in severity order with file and line refe
 | Tests | PASS/FAIL | {detail} |
 | TDD Guard | PASS/FAIL | {detail} |
 | Reference Artifacts | PASS/FAIL | {detail} |
-| Abaqus Opt-in | PASS/FAIL | {detail} |
+| External Results | PASS/FAIL | {detail} |
 | Validation | PASS/FAIL | {detail} |

 When there are no findings, say that clearly, then mention commands not run or remaining risk.
@@ -7,7 +7,7 @@ description: "Use when planning or running this Abaqus User Subroutine Harness:

 ## Overview

-Use this skill to turn a user-approved Abaqus User Subroutine task into small, self-contained Harness steps. Keep every step grounded in repository docs, Fortran TDD, no-Abaqus validation, reference artifact validation, and explicit Abaqus opt-in rules.
+Use this skill to turn a user-approved Abaqus User Subroutine task into small, self-contained Harness steps. Keep every step grounded in repository docs, Fortran TDD, no-Abaqus validation, and externally generated reference artifact validation.

 ## Workflow

@@ -24,7 +24,7 @@ Use this skill to turn a user-approved Abaqus User Subroutine task into small, s
 - Specify interfaces and signatures only when the step owns the interface contract.
 - Use executable acceptance criteria, not abstract statements.
 - For Fortran behavior changes, require tests first and name the no-Abaqus Fortran/Python driver test or `tests/fortran/manifest.json` entry.
- Preserve the rule that Abaqus is not run by default; use `HARNESS_ABAQUS_VALIDATION=run` only when explicitly requested and configured.
+- Preserve the rule that this project does not run Abaqus analyses; solver-result evidence must be externally generated ODB-extracted CSV artifacts.

 ## Phase Files

@@ -45,7 +45,7 @@ Create `phases/{task-name}/index.json`:

 ```json
 {
-  "project": "FESA Harness",
+  "project": "Abaqus User Subroutine Development",
  "phase": "<task-name>",
  "steps": [
    { "step": 0, "name": "requirements", "status": "pending" },
@@ -95,13 +95,13 @@ python scripts/validate_workspace.py

 ## Validation Notes

- Use `HARNESS_ABAQUS_VALIDATION=run` only when the step explicitly owns Abaqus validation and the command contract is provided.
+- Use externally generated ODB-extracted CSV artifacts for solver-result validation evidence.
 - Update `phases/{task-name}/index.json` with `completed`, `error`, or `blocked` and a concrete summary/reason.

 ## Forbidden

 - Do not add JavaScript/TypeScript/npm fallback.
- Do not run Abaqus by default.
+- Do not run Abaqus analyses from this project.
 - Do not generate reference CSVs unless the user explicitly authorized a reference-artifact phase.
 - Do not break existing tests.
 ```
@@ -4,7 +4,7 @@
 - 이 저장소의 목적은 Abaqus User Subroutine을 요구조건 분석부터 검증까지 일관되게 개발하는 것이다.
 - User Subroutine production code는 Fortran을 기본 언어로 작성한다.
 - Intel oneAPI Fortran compiler를 기본 컴파일러 체계로 사용한다. 자동 탐지는 `ifx`를 우선하고, 없으면 `ifort`를 사용한다.
- Abaqus 실행은 라이선스, 설치 버전, 컴파일러 연동 상태에 의존하므로 기본 검증에서 실행하지 않는다.
+- 이 프로젝트는 Abaqus job 해석을 직접 실행하지 않는다. 해석은 사용자가 다른 Abaqus PC에서 수행하고, 이 저장소는 ODB에서 추출된 CSV와 provenance artifact를 검증 입력으로 사용한다.
 - `.codex/agents/`와 `.codex/skills/`는 Abaqus User Subroutine 개발 단계별 전문 agent와 작업 규칙의 기준이다.

 ## 기술 스택
@@ -20,9 +20,9 @@
 2. 책, 논문, Abaqus manual, benchmark 등 연구자료 조사
 3. 코드 구현을 위한 유한요소 정식화
 4. Subroutine 입출력 파라미터와 Abaqus ABI 계약 정의
-5. TDD 방법을 사용하는 no-Abaqus test model 및 reference artifact 계약 작성
+5. TDD 방법을 사용하는 no-Abaqus test model 및 외부 생성 reference artifact 계약 작성
 6. Fortran 코드 구현
-7. Subroutine 검증, 물리 타당성 검토, readiness audit
+7. ODB에서 추출된 CSV 기반 Subroutine 검증, 물리 타당성 검토, readiness audit

 ## Agent / Skill 운영 규칙
 - 장기 작업은 위 7단계 gate로 나누고, 각 gate는 독립적으로 검토 가능한 문서 산출물을 남긴다.
@@ -30,17 +30,26 @@
 - Requirement Agent는 `docs/requirements/<feature-id>.md`에 요구조건과 Requirement Verification Matrix를 작성한다.
 - Research Agent는 `docs/research/<feature-id>-research.md`에 source-backed fact, inference, applicability limit를 분리해 기록한다.
 - Formulation Agent와 Numerical Review Agent는 formulation, tangent, state variable, numerical risk를 구현 전 검토한다.
- I/O Definition Agent는 Abaqus ABI argument, update responsibility, tensor order, unit, CSV schema를 명시한다.
- Reference Model Agent는 `tests/fortran/manifest.json` 계획과 `references/<feature-id>/<model-id>/` artifact 계약을 정의한다.
+- I/O Definition Agent는 Abaqus ABI argument, update responsibility, tensor order, unit, ODB 추출 CSV schema를 명시한다.
+- Reference Model Agent는 `tests/fortran/manifest.json` 계획과 `references/<feature-id>/<model-id>/` artifact 계약을 정의한다. 최소 artifact는 `model.inp`, extracted CSV, `.msg/.dat/.log/.sta` tail files를 포함해야 한다.
 - Implementation Agent는 승인된 implementation plan만 구현하며 RED -> GREEN -> VERIFY 순서를 지킨다.
 - Validation, Physics Evaluation, Release Agent는 source code, tests, reference artifacts, tolerances를 임의로 변경하지 않는다.

+## 작업 상태 공유 파일
+- 모든 agent는 새 작업을 시작할 때 `AGENTS.md`를 읽은 뒤 루트의 `PLAN.md`, `PROGRESS.md`, `WORKNOTE.md`를 확인해 현재 목표, 진행 상태, 알려진 시행착오를 파악한다.
+- `PLAN.md`는 현재 목표, phase/step 분해, 성공 기준, 범위 제외, 미해결 결정을 관리한다. 계획이 바뀌면 구현 전에 먼저 갱신한다.
+- `PROGRESS.md`는 완료된 일, 진행 중인 일, 막힌 일, 다음 agent가 바로 수행해야 할 next action, 마지막 검증 명령 결과를 관리한다. step 시작/완료/blocked/error 전환 시 갱신한다.
+- `WORKNOTE.md`는 실수, 실패한 명령, 잘못된 가정, 우회 방법, 재발 방지 메모를 기록한다. 단순 진행 로그를 중복하지 말고 다음 agent에게 도움이 되는 시행착오만 남긴다.
+- 여러 agent가 나눠 작업할 때는 `PROGRESS.md`의 현재 owner/active step을 먼저 확인하고, 자신이 맡은 범위와 변경 파일을 기록한 뒤 작업한다.
+- phase 파일(`phases/<phase>/stepN.md`)과 세 공유 파일이 충돌하면 더 구체적인 phase step 지시를 우선하되, 충돌 사실과 처리 결정을 `WORKNOTE.md`에 기록한다.
+- 작업을 마치기 전에는 `PROGRESS.md`에 실제 수행한 검증 명령과 결과를 남기고, 새 시행착오가 있으면 `WORKNOTE.md`를 갱신한다.
+
 ## 아키텍처 규칙
 - CRITICAL: 기본 검증 경로는 `python scripts/validate_workspace.py`이다.
- CRITICAL: Abaqus 실행은 기본 검증에서 수행하지 않는다. `HARNESS_ABAQUS_VALIDATION=run`과 `HARNESS_ABAQUS_VALIDATION_COMMANDS`가 명시된 경우에만 실행한다.
+- CRITICAL: 이 프로젝트의 수치 검증은 Abaqus job 실행이 아니라 외부 생성 extracted result validation이다. ODB는 직접 파싱하지 않고, 사용자가 ODB에서 추출한 CSV를 schema/tolerance로 비교한다.
 - CRITICAL: 새 기능 또는 동작 변경은 테스트를 먼저 작성하고 실패를 확인한 뒤 구현한다.
 - CRITICAL: Fortran user subroutine production file을 바꿀 때는 관련 no-Abaqus Fortran/Python driver test가 있어야 한다.
- CRITICAL: Abaqus reference artifact 생성, 갱신, 승인된 solver output 복원은 명시적으로 요청된 phase에서만 수행한다.
+- CRITICAL: Abaqus reference artifact 등록, 갱신, 승인된 extracted result 복원은 명시적으로 요청된 phase에서만 수행한다.
 - Abaqus ABI wrapper는 얇게 유지하고, 테스트 가능한 계산 로직은 no-Abaqus kernel 또는 driver에서 검증 가능하게 분리한다.
 - Reference artifacts는 승인 후 read-only evidence로 취급한다.
 - Codex custom agent의 `model_reasoning_effort` 기본값은 `extra high`로 둔다.
@@ -58,16 +67,14 @@ python scripts/execute.py <phase-dir>
 python scripts/execute.py <phase-dir> --push
 ```

-## Fortran / Abaqus 검증 기본값
+## Fortran / External Result 검증 기본값
 - `HARNESS_FORTRAN_VALIDATION=auto`: `tests/fortran/manifest.json`이 있으면 Intel Fortran no-Abaqus tests를 실행한다.
 - `HARNESS_FORTRAN_VALIDATION=off`: Fortran validation을 건너뛴다.
 - `HARNESS_FORTRAN_VALIDATION=detect`: manifest와 compiler 감지만 수행하고 compile/run command를 만들지 않는다.
 - `HARNESS_FORTRAN_COMPILER=auto`: `ifx`를 우선 사용하고, 없으면 `ifort`를 사용한다.
 - `HARNESS_ONEAPI_VARS_BAT`: Intel oneAPI 환경 설정 batch file override.
- `HARNESS_ABAQUS_VALIDATION=off`: 기본값이며 Abaqus job을 실행하지 않는다.
- `HARNESS_ABAQUS_VALIDATION=detect`: Abaqus executable 탐지만 수행한다.
- `HARNESS_ABAQUS_VALIDATION=run`: `HARNESS_ABAQUS_VALIDATION_COMMANDS`에 명시된 Abaqus command만 실행한다.
- `HARNESS_ABAQUS_USE_ONEAPI_ENV=auto`: Abaqus run command 앞에 oneAPI 환경 설정을 자동 적용할 수 있다.
+- Abaqus job 실행 관련 환경 변수는 legacy/diagnostic capability로만 취급한다. 새 validation workflow는 저장소 내부 Abaqus 해석 실행을 요구하거나 권장하지 않는다.
+- Reference validation의 기본 입력은 외부 Abaqus 해석 후 ODB에서 추출된 CSV와 provenance metadata이다.

 ## Supporting CMake / MSVC 검증 기본값
 - CMake project가 존재할 때만 CMake/CTest 경로를 실행한다.
@@ -86,6 +93,6 @@ python scripts/execute.py <phase-dir> --push
 - 문서 변경은 관련 구현을 대신하지 않는다. Requirements, research, formulation, interface, test model, implementation, validation evidence를 구분한다.
 - 모든 must requirement는 verification method, acceptance criteria, tolerance 또는 decision owner를 가져야 한다.
 - Public example repository는 layout과 학습 자료로만 사용한다. Acceptance evidence로 쓰려면 source, license, version, generated artifact provenance를 별도로 기록한다.
- Abaqus `.inp`, `.msg`, `.dat`, `.log`, extracted CSV는 reference artifact contract에 맞춰 저장한다.
+- Abaqus `model.inp`, extracted CSV, `.msg/.dat/.log/.sta` tail files는 reference artifact contract에 맞춰 저장한다. ODB 자체는 직접 파싱 대상이 아니며, 필요하면 opaque artifact 또는 hash/provenance로만 기록한다.
 - 커밋 전 hook은 Python self-test와 workspace validation을 실행해야 한다.
 - 커밋 메시지는 conventional commits 형식을 따른다: `feat:`, `fix:`, `docs:`, `refactor:`, `test:`.
@@ -0,0 +1,48 @@
+# PLAN.md
+
+## Purpose
+
+이 파일은 여러 AI Agent가 같은 작업을 이어받을 때 현재 목표와 실행 계획을 빠르게 파악하기 위한 공유 계획서이다.
+
+## Current Objective
+
+3D Euler-Bernoulli beam Abaqus/Standard `UEL` 구현을 하네스 phase 흐름에 따라 진행한다.
+
+## Active Phase
+
+- Phase directory: `phases/uel-3d-euler-beam`
+- Current planned entry point: `python scripts/execute.py uel-3d-euler-beam`
+- First pending step: `step0` requirements
+
+## Planned Steps
+
+1. Requirements: `docs/requirements/uel-3d-euler-beam.md`
+2. Research: `docs/research/uel-3d-euler-beam-research.md`
+3. Formulation: `docs/formulations/uel-3d-euler-beam.md`
+4. Numerical review: `docs/numerical-reviews/uel-3d-euler-beam.md`
+5. Interface contract: `docs/io-definitions/uel-3d-euler-beam.md`
+6. Test/reference model plan: `docs/reference-models/uel-3d-euler-beam.md`
+7. RED no-Abaqus Fortran tests and manifest
+8. GREEN Fortran implementation
+9. Validation, physics sanity, and readiness audit
+
+## Success Criteria
+
+- Every must requirement has verification method, acceptance criteria, and tolerance or decision owner.
+- Fortran production changes follow RED -> GREEN -> VERIFY.
+- No-Abaqus tests pass through `python scripts/validate_fortran.py` when compiler and manifest are available.
+- Workspace validation passes through `python scripts/validate_workspace.py`.
+- Solver-result validation is marked complete only after externally generated ODB-extracted CSV artifacts are provided and validated.
+
+## Out Of Scope Until Explicitly Approved
+
+- Running Abaqus jobs from this repository.
+- Direct ODB parsing in this repository.
+- Fabricating reference CSV, `.msg`, `.dat`, `.log`, or `.sta` evidence.
+- Geometric nonlinearity, Timoshenko shear deformation, mass, damping, distributed loads, plasticity, damage, thermal strain, warping torsion, and section offsets for the first UEL implementation.
+
+## Open Decisions
+
+- Confirm exact first-scope beam assumptions in the requirements step.
+- Decide `PROPS`/`JPROPS` ordering and orientation-vector convention in the interface step.
+- Decide exact no-Abaqus Fortran source and test file layout in the reference model step.
@@ -0,0 +1,42 @@
+# PROGRESS.md
+
+## Current State
+
+- Active objective: 3D Euler-Bernoulli beam Abaqus/Standard `UEL`
+- Active phase: `phases/uel-3d-euler-beam`
+- Active owner: unassigned
+- Current status: phase scaffold created; implementation has not started
+- Next action: run or manually execute `phases/uel-3d-euler-beam/step0.md` to create requirements
+
+## Completed
+
+- Created phase scaffold under `phases/uel-3d-euler-beam`.
+- Added shared coordination files: `PLAN.md`, `PROGRESS.md`, `WORKNOTE.md`.
+- Updated `AGENTS.md` to require agents to read and maintain the shared coordination files.
+
+## In Progress
+
+- None.
+
+## Blocked
+
+- None.
+
+## Last Verification
+
+Latest verification after adding shared coordination files:
+
+```bash
+python -m unittest discover -s scripts -p "test_*.py"
+python scripts/validate_reference_artifacts.py
+python scripts/validate_workspace.py
+```
+
+Result: all passed. `validate_workspace.py` reported no Fortran validation commands configured because `tests/fortran/manifest.json` does not exist yet.
+
+## Next Agent Checklist
+
+- Read `AGENTS.md`, `PLAN.md`, `PROGRESS.md`, and `WORKNOTE.md`.
+- Confirm no other owner is active in this file.
+- Start with `phases/uel-3d-euler-beam/step0.md`.
+- Update this file when step status changes or before handing off.
@@ -0,0 +1,13 @@
+# WORKNOTE.md
+
+## Purpose
+
+이 파일은 다음 AI Agent가 같은 시행착오를 반복하지 않도록 실수, 실패한 명령, 잘못된 가정, 우회 방법을 기록한다. 단순 진행 상황은 `PROGRESS.md`에 기록한다.
+
+## Notes
+
+- `phases/` 디렉터리는 처음에 없었으므로 3D Euler beam UEL phase scaffold를 새로 만들었다.
+- `AGENTS.md`는 이미 작업 중인 변경이 있는 상태였다. 이후 agent는 기존 변경을 되돌리지 말고 필요한 범위만 추가 수정해야 한다.
+- PowerShell에서 기본 출력 인코딩에 따라 Korean text가 깨져 보일 수 있다. `Get-Content -Raw -Encoding UTF8 AGENTS.md`처럼 UTF-8을 명시하면 정상 확인 가능하다.
+- `scripts/execute.py`는 phase step을 Codex subprocess로 실행하고, 각 step이 `phases/<phase>/index.json`의 status를 직접 갱신해야 한다. Step 지시문에는 이 요구가 명확해야 한다.
+- `python scripts/validate_workspace.py`는 Fortran manifest가 없으면 `validate_fortran.py`에서 "No Fortran validation commands configured."를 출력하고 성공할 수 있다. 이것은 구현 전 scaffold 상태에서는 정상이다.
@@ -1,7 +1,7 @@
 # Architecture Decision Records

 ## 철학
-Abaqus User Subroutine 개발은 일반 application code 개발보다 ABI, solver execution, compiler integration, reference artifact provenance의 영향을 크게 받는다. 이 프로젝트의 결정은 Abaqus 실행을 기본 전제로 삼지 않으면서도, Fortran code와 numerical behavior를 검증 가능한 단위로 나누는 방향을 따른다.
+Abaqus User Subroutine 개발은 일반 application code 개발보다 ABI, solver execution, compiler integration, reference artifact provenance의 영향을 크게 받는다. 이 프로젝트의 결정은 Abaqus 실행을 저장소 내부 기본 전제로 삼지 않으면서도, Fortran code와 ODB에서 추출된 numerical behavior를 검증 가능한 단위로 나누는 방향을 따른다.

 ---

@@ -26,12 +26,12 @@ Abaqus User Subroutine 개발은 일반 application code 개발보다 ABI, solve

 **트레이드오프**: 작은 변경에도 문서화 비용이 생긴다. 단, 단순 correction은 기존 approved contract 안에서 Correction Agent가 최소 수정으로 처리할 수 있다.

-### ADR-004: 기본 검증은 no-Abaqus path로 유지한다
-**결정**: `python scripts/validate_workspace.py`는 기본 검증 entry point이며 Abaqus job을 자동 실행하지 않는다. Abaqus 실행은 `HARNESS_ABAQUS_VALIDATION=run`과 `HARNESS_ABAQUS_VALIDATION_COMMANDS`가 명시된 경우에만 수행한다.
+### ADR-004: 기본 검증은 no-Abaqus path와 외부 생성 CSV artifact validation으로 유지한다
+**결정**: `python scripts/validate_workspace.py`는 기본 검증 entry point이며 Abaqus job을 자동 실행하지 않는다. Abaqus 해석과 ODB CSV 추출은 사용자가 외부 Abaqus PC에서 수행하고, 이 저장소는 `references/<feature-id>/<model-id>/`에 등록된 extracted CSV artifact를 검증한다.

-**이유**: Abaqus 실행은 설치, 라이선스, target version, compiler integration, working directory, scratch behavior에 의존한다. 기본 검증이 Abaqus에 의존하면 개발 재현성이 떨어진다.
+**이유**: Abaqus 실행은 설치, 라이선스, target version, compiler integration, working directory, scratch behavior에 의존한다. 또한 ODB는 일반 Python이 아니라 Abaqus scripting environment가 필요한 결과 database이므로, 이 프로젝트의 기본 검증이 Abaqus 실행 또는 ODB 직접 파싱에 의존하면 개발 재현성이 떨어진다.

-**트레이드오프**: 기본 검증만으로 Abaqus runtime symbol resolution이나 actual solver behavior를 완전히 보장할 수 없다. 해당 evidence는 opt-in validation과 approved reference artifacts로 보완한다.
+**트레이드오프**: 기본 검증만으로 Abaqus runtime symbol resolution이나 actual solver behavior를 완전히 보장할 수 없다. 해당 evidence는 사용자가 외부에서 수행한 Abaqus 해석 결과, ODB 추출 CSV, log tail, metadata provenance를 approved reference artifacts로 등록해 보완한다.

 ### ADR-005: Abaqus ABI wrapper는 얇게 유지하고 계산 kernel은 no-Abaqus로 검증한다
 **결정**: Subroutine source는 가능한 한 thin Abaqus ABI wrapper와 testable kernel 또는 driver logic으로 분리한다.
@@ -40,12 +40,12 @@ Abaqus User Subroutine 개발은 일반 application code 개발보다 ABI, solve

 **트레이드오프**: Wrapper와 kernel 사이의 mapping code가 추가된다. 이 mapping 자체는 interface contract와 wrapper-level compile/smoke test로 검증해야 한다.

-### ADR-006: Reference artifacts는 metadata contract로 검증한다
-**결정**: Abaqus reference artifacts는 `references/<feature-id>/<model-id>/metadata.json`과 함께 보관하고, `scripts/validate_reference_artifacts.py`로 metadata, source hash, required files를 검증한다.
+### ADR-006: Reference artifacts는 extracted CSV와 metadata contract로 검증한다
+**결정**: Abaqus reference artifacts는 `references/<feature-id>/<model-id>/metadata.json`과 함께 보관하고, `scripts/validate_reference_artifacts.py`로 metadata, source hash, required files를 검증한다. 최소 bundle은 `model.inp`, extracted CSV, `.msg/.dat/.log/.sta` tail files를 포함한다. ODB 파일은 직접 parsing하지 않으며, 필요하면 opaque artifact 또는 hash/provenance로만 기록한다.

-**이유**: Abaqus output CSV만 있으면 어떤 source, Abaqus version, compiler, precision, command에서 생성됐는지 추적할 수 없다. Source hash와 log tail을 포함해야 comparison evidence로 사용할 수 있다.
+**이유**: Abaqus output CSV만 있으면 어떤 source, Abaqus version, compiler, precision, extraction script에서 생성됐는지 추적할 수 없다. Source hash와 log tail을 포함해야 comparison evidence로 사용할 수 있다. `.sta` tail은 해석 진행과 종료 상태를 확인하는 보조 evidence로 필요하다.

-**트레이드오프**: Reference artifact 준비 비용이 증가한다. 대신 stale artifact, source mismatch, missing provenance를 자동으로 탐지할 수 있다.
+**트레이드오프**: Reference artifact 준비 비용이 증가하고, 사용자는 Abaqus PC에서 ODB-to-CSV extraction을 별도로 수행해야 한다. 대신 stale artifact, source mismatch, missing provenance, schema mismatch를 자동으로 탐지할 수 있다.

 ### ADR-007: Fortran production 변경은 TDD guard 대상이다
 **결정**: `.f`, `.for`, `.f90`, `.f95`, `.f03`, `.f08` production source 변경은 관련 test file이 없으면 guard가 차단한다.
@@ -1,13 +1,14 @@
 # Architecture: Abaqus User Subroutine Development

 ## 목표
-이 저장소는 Abaqus User Subroutine 개발을 위한 agent-driven workflow와 검증 체계를 제공한다. 핵심 아키텍처는 단계별 specialist agent, gate 문서, no-Abaqus Fortran TDD, opt-in Abaqus validation, reference artifact metadata validation으로 구성된다.
+이 저장소는 Abaqus User Subroutine 개발을 위한 agent-driven workflow와 검증 체계를 제공한다. 핵심 아키텍처는 단계별 specialist agent, gate 문서, no-Abaqus Fortran TDD, 외부 생성 ODB 추출 CSV validation, reference artifact metadata validation으로 구성된다.

 ## 주요 원칙
 - Abaqus User Subroutine 개발이 프로젝트의 중심이다.
 - Fortran source는 Abaqus ABI wrapper와 testable kernel/driver logic을 가능한 한 분리한다.
 - 기본 검증은 Abaqus를 실행하지 않는다.
- Abaqus 실행과 reference artifact 생성은 명시적으로 승인된 환경에서만 수행한다.
+- Abaqus 해석 실행과 ODB CSV 추출은 사용자가 외부 Abaqus PC에서 수행한다.
+- 이 프로젝트는 ODB를 직접 파싱하지 않고, 추출된 CSV와 metadata를 검증한다.
 - Requirements, research, formulation, interface, test model, implementation, validation 산출물을 섞지 않는다.

 ## 디렉토리 구조
@@ -38,7 +39,7 @@ scripts/
 tests/
 └── fortran/manifest.json       # Optional no-Abaqus Fortran test manifest
 references/
-└── <feature-id>/<model-id>/    # Optional approved Abaqus reference artifacts
+└── <feature-id>/<model-id>/    # External Abaqus result artifacts for CSV comparison
 phases/
 └── <phase-id>/                 # Optional staged execution plans
 ```
@@ -98,10 +99,17 @@ references/<feature-id>/<model-id>/
 ├── job.msg.tail.txt
 ├── job.dat.tail.txt
 ├── job.log.tail.txt
+├── job.sta.tail.txt
+├── result.odb.sha256           # Optional when ODB cannot or should not be stored
+├── extraction/
+│   └── extract_odb_to_csv.py    # Optional provenance copy of user-run extraction script
+└── extracted/
    └── *.csv
 ```

-`metadata.json` schema version은 `abaqus-user-subroutine-artifact-v1`이다. `artifact_status=ready-for-comparison`인 artifact는 Abaqus version, precision, command, compiler vendor/name/version, entry points, source file hashes, input file, output tails, declared CSV files를 모두 가져야 한다.
+`metadata.json` schema version은 `abaqus-user-subroutine-artifact-v1`이다. `artifact_status=ready-for-comparison`인 artifact는 Abaqus version, precision, compiler vendor/name/version, entry points, source file hashes, input file, output tails, ODB extraction provenance, declared CSV files를 모두 가져야 한다.
+
+수치 검증은 ODB 직접 parsing이 아니라 `extracted/*.csv`의 schema/tolerance comparison이다. CSV row는 비교 가능한 최소 식별자로 step, frame/time, instance, node 또는 element label, integration point, section point, output position, component, coordinate system, unit, value를 포함해야 한다. Feature별 interface contract는 필요한 column과 tolerance를 더 좁게 정의한다.

 Reference artifacts는 생성 후 검증 입력으로 취급한다. Validation agent는 source code, tests, tolerances, reference artifacts를 임의 수정하지 않는다.

@@ -114,10 +122,20 @@ Default workspace validation:
 -> scripts/validate_reference_artifacts.py
 -> scripts/validate_fortran.py
 -> optional CMake/CTest path if CMake project exists
-> optional Abaqus command path only when HARNESS_ABAQUS_VALIDATION=run
 ```

-`HARNESS_ABAQUS_VALIDATION=detect`는 Abaqus executable 탐지만 보고한다. `HARNESS_ABAQUS_VALIDATION=run`은 `HARNESS_ABAQUS_VALIDATION_COMMANDS`가 없으면 configuration error로 실패한다.
+Standard numerical validation flow:
+```text
+Fortran subroutine implemented
+-> user-authored model.inp
+-> user runs Abaqus with the subroutine on another PC
+-> user extracts ODB quantities to CSV
+-> user places model.inp, extracted CSV, msg/dat/log/sta tails, and metadata under references/
+-> scripts/validate_reference_artifacts.py checks artifact completeness
+-> comparison tooling checks CSV schema, IDs, units, coordinate systems, and tolerances
+```
+
+Existing Abaqus execution environment variables are treated as legacy/diagnostic script capabilities, not the project validation workflow. New documents and agents should describe solver evidence as externally generated artifacts, not as commands run by this repository.

 ## Hook 흐름
 ```text
@@ -3,7 +3,7 @@
 ## 목표
 이 프로젝트는 Abaqus User Subroutine을 요구조건 분석, 연구, 유한요소 정식화, ABI 정의, TDD test model 설계, Fortran 구현, 검증까지 일관된 agent-driven workflow로 개발하게 한다.

-기본 목표는 Abaqus가 없는 환경에서도 가능한 검증을 최대화하고, Abaqus 실행이 필요한 검증은 명시적으로 opt-in한 환경에서만 수행하는 것이다.
+기본 목표는 Abaqus가 없는 환경에서도 가능한 검증을 최대화하고, 수치 결과 검증은 사용자가 다른 Abaqus PC에서 수행한 해석의 ODB 추출 CSV를 schema/tolerance로 비교하는 것이다. 이 프로젝트는 Abaqus job 해석을 직접 실행하지 않는다.

 ## 사용자
 - Abaqus User Subroutine을 개발하는 엔지니어
@@ -14,8 +14,9 @@
 ## 문제 정의
 - Abaqus User Subroutine은 Abaqus ABI, analysis procedure, tensor ordering, state variable, compiler/linker, reference artifact provenance가 모두 맞아야 한다.
 - Abaqus 실행은 설치, 라이선스, compiler integration에 의존하므로 모든 개발 단계의 기본 검증 수단으로 삼기 어렵다.
+- ODB는 Abaqus 환경의 scripting interface가 필요한 solver result database이므로 이 프로젝트의 기본 검증에서 직접 파싱하지 않는다.
 - LLM agent가 요구조건, formulation, interface, test, implementation, validation을 한 번에 섞으면 검증 불가능한 Fortran code가 생성되기 쉽다.
- 따라서 단계별 gate, 문서 산출물, no-Abaqus TDD, reference artifact metadata 검증이 필요하다.
+- 따라서 단계별 gate, 문서 산출물, no-Abaqus TDD, 외부 생성 extracted CSV artifact metadata 검증이 필요하다.

 ## 핵심 워크플로우
 1. Requirement Agent가 feature requirement와 Requirement Verification Matrix를 작성한다.
@@ -23,9 +24,9 @@
 3. Formulation Agent가 finite element formulation, stress update, tangent, state variable, numerical integration을 정의한다.
 4. Numerical Review Agent가 formulation consistency, tangent consistency, stability risk, patch/tangent check 필요성을 검토한다.
 5. I/O Definition Agent가 Abaqus ABI arguments, input/output direction, tensor component order, unit, CSV schema를 정의한다.
-6. Reference Model Agent가 no-Abaqus driver tests와 Abaqus reference artifact bundle 계약을 설계한다.
+6. Reference Model Agent가 no-Abaqus driver tests와 외부 생성 Abaqus reference artifact bundle 계약을 설계한다.
 7. Implementation Planning Agent와 Implementation Agent가 RED -> GREEN -> VERIFY 순서로 Fortran code를 구현한다.
-8. Build/Test Executor, Reference Verification, Physics Evaluation, Release Agent가 validation evidence와 readiness를 검토한다.
+8. Build/Test Executor, Reference Verification, Physics Evaluation, Release Agent가 extracted CSV comparison evidence와 readiness를 검토한다.

 ## 핵심 기능
 1. `.codex/agents/`와 `.codex/skills/` 기반 단계별 specialist workflow
@@ -34,7 +35,7 @@
 4. Intel oneAPI Fortran 기반 no-Abaqus kernel/fake-driver validation
 5. `tests/fortran/manifest.json` 기반 Fortran compile/run test discovery
 6. `references/<feature-id>/<model-id>/metadata.json` 기반 reference artifact metadata validation
-7. Abaqus job 실행을 기본 검증에서 제외하고 `HARNESS_ABAQUS_VALIDATION=run`에서만 수행하는 opt-in validation
+7. `model.inp`, extracted CSV, `.msg/.dat/.log/.sta` tail files를 포함한 외부 생성 result artifact validation
 8. Optional CMake/CTest validation path for supporting native code when a CMake project exists

 ## 대표 Subroutine 범위
@@ -49,17 +50,19 @@
 - Interface contract는 Abaqus ABI argument direction, update responsibility, tensor order, unit, coordinate system, CSV schema를 명시한다.
 - Fortran implementation은 RED -> GREEN -> VERIFY evidence를 남긴다.
 - `python scripts/validate_workspace.py`가 기본 검증 entry point로 성공한다.
- Abaqus reference comparison은 approved reference artifacts가 `ready-for-comparison` 상태일 때만 수행한다.
+- Abaqus reference comparison은 ODB에서 추출된 CSV와 approved reference artifacts가 `ready-for-comparison` 상태일 때만 수행한다.
+- Reference artifact bundle은 최소 `model.inp`, extracted CSV, `.msg/.dat/.log/.sta` tail files를 포함한다.

 ## 제외 사항
 - 프로젝트 정체성을 Abaqus User Subroutine development 외의 다른 개발 체계로 정의하지 않는다.
 - 기본 validation에서 Abaqus job을 자동 실행하지 않는다.
- Agent가 reference CSV, `.msg`, `.dat`, `.log` evidence를 임의 생성하거나 승인하지 않는다.
+- Agent가 reference CSV, `.msg`, `.dat`, `.log`, `.sta` evidence를 임의 생성하거나 승인하지 않는다.
+- 프로젝트는 ODB 파일을 직접 파싱하지 않는다. ODB 추출은 사용자가 Abaqus PC에서 수행하고, 이 프로젝트는 추출된 CSV와 metadata를 검증한다.
 - 특정 재료모델, 요소모델, plasticity model, damage model의 물리적 타당성을 이 PRD에서 승인하지 않는다.
 - Public example repository code를 license 검토 없이 복사하거나 acceptance evidence로 사용하지 않는다.
 - Visual Studio `.sln`/`.vcxproj` 전용 workflow를 기본 지원하지 않는다.

 ## 운영 제약
 - 문서 산출물은 Korean narrative를 기본으로 하되, Abaqus keyword, subroutine name, status value, command, schema key는 English를 유지한다.
- Abaqus version, compiler version, precision, command line, source hash, output tail, CSV schema는 reference artifact metadata에 기록한다.
- Abaqus execution이 필요한 검증은 user 또는 승인된 환경이 명시적으로 설정해야 한다.
+- Abaqus version, compiler version, precision, source hash, output tail, CSV schema, ODB extraction provenance는 reference artifact metadata에 기록한다.
+- Abaqus execution은 user가 외부 Abaqus PC에서 수행한다. 이 프로젝트의 validation command는 해석 실행이 아니라 extracted CSV artifact 검증을 수행한다.
@@ -2,7 +2,7 @@

 이 디렉터리는 Build/Test Executor Agent가 작성하거나 제안하는 기능별 build/test 실행 리포트를 보관하는 위치다.

-Build/Test Executor Agent는 Implementation Agent 이후 독립적으로 C++/MSVC/CMake/CTest 검증을 실행하고, 실패를 분류해 다음 agent로 handoff한다. 이 agent는 source code, tests, CMake files, requirements, formulations, I/O contracts, reference artifacts, tolerance policies를 수정하지 않는다. build artifacts와 test outputs는 `build/` 아래 생성될 수 있다.
+Build/Test Executor Agent는 Implementation Agent 이후 독립적으로 Fortran no-Abaqus validation, reference artifact validation, workspace validation을 실행하고, 실패를 분류해 다음 agent로 handoff한다. 이 agent는 source code, tests, requirements, formulations, I/O contracts, reference artifacts, tolerance policies를 수정하지 않는다. build artifacts와 test outputs는 `build/` 아래 생성될 수 있다.

 기본 문서명은 `docs/build-test-reports/<feature-id>-build-test.md` 형식을 사용한다.

@@ -21,7 +21,7 @@ Build/Test Executor Agent는 Implementation Agent 이후 독립적으로 C++/MSV
 - tests를 수정하지 않는다.
 - CMake files를 수정하지 않는다.
 - requirements, formulations, I/O contracts, reference artifacts, tolerance policies를 수정하지 않는다.
- Abaqus, Nastran 또는 reference solver를 실행하지 않는다.
+- Abaqus 해석을 직접 실행하지 않는다.
 - reference CSV를 생성하지 않는다.
 - release readiness, reference tolerance success, physics validation success를 승인하지 않는다.
 - 최종 reference verification report를 작성하지 않는다.
@@ -55,7 +55,7 @@ For Abaqus UserSubroutine work, workspace validation also supports:
 - `HARNESS_ABAQUS_VALIDATION_COMMANDS=<newline commands>`
 - `HARNESS_ABAQUS_USE_ONEAPI_ENV=auto|on|off`

-Default validation does not run Abaqus jobs. Abaqus execution is valid only when `HARNESS_ABAQUS_VALIDATION=run` and explicit commands are provided.
+Default validation does not run Abaqus jobs. Solver-result evidence must come from externally generated ODB-extracted CSV artifacts.

 기본 CMake/MSVC x64 Debug 명령은 다음과 같다.

@@ -2,7 +2,7 @@

 이 디렉터리는 Coordinator Agent가 작성하거나 제안하는 기능별 workflow coordination report를 보관하는 위치다.

-Coordinator Agent는 FESA solver 기능 개발의 전체 lifecycle에서 gate evidence, handoff, rework loop, blocker, user decision을 관리한다. 이 Agent는 specialist agent의 기술 판정을 대체하지 않고, 다음 agent가 어떤 입력으로 무엇을 산출해야 하는지 명확히 기록한다.
+Coordinator Agent는 Abaqus User Subroutine 개발의 전체 lifecycle에서 gate evidence, handoff, rework loop, blocker, user decision을 관리한다. 이 Agent는 specialist agent의 기술 판정을 대체하지 않고, 다음 agent가 어떤 입력으로 무엇을 산출해야 하는지 명확히 기록한다.

 기본 문서명은 `docs/coordination/<feature-id>-coordination.md` 형식을 사용한다.

@@ -25,7 +25,7 @@ Coordinator Agent는 FESA solver 기능 개발의 전체 lifecycle에서 gate ev
 - physics evaluation을 실행하지 않는다.
 - requirements, formulations, I/O contracts, numerical review reports를 수정하지 않는다.
 - reference artifacts 또는 tolerance policies를 수정하지 않는다.
- Abaqus, Nastran 또는 reference solver를 실행하지 않는다.
+- Abaqus 해석을 직접 실행하지 않는다.
 - reference CSV를 생성하지 않는다.
 - subagents를 자동 spawn하지 않는다.
 - release readiness를 독립적으로 승인하지 않는다.
@@ -23,7 +23,7 @@ Correction Agent는 Build/Test Executor Agent, Reference Verification Agent, Phy
 - numerical review reports를 수정하지 않는다.
 - reference artifacts를 수정하지 않는다.
 - tolerance policies를 수정하지 않는다.
- Abaqus, Nastran 또는 reference solver를 실행하지 않는다.
+- Abaqus 해석을 직접 실행하지 않는다.
 - reference CSV를 생성하지 않는다.
 - release readiness, reference tolerance success, physics validation success를 승인하지 않는다.
 - 최종 reference verification report 또는 physics validation report를 작성하지 않는다.
@@ -49,7 +49,7 @@ python -m unittest discover -s scripts -p "test_*.py"
 ## Failure Classification

 - `configure`: CMake configure, preset, generator, cache setup 실패
- `compile`: C++ compilation 실패
+- `compile`: Fortran compilation 실패
 - `link`: linker, symbol resolution, target dependency 실패
 - `test`: CTest, unit, integration, parser/I/O, ordinary regression test 실패
 - `reference-comparison`: 저장된 reference artifact와 deterministic comparison 실패
@@ -145,7 +145,7 @@ Excluded files:

 - 수정 전 failure classification을 기록해야 한다.
 - 모든 변경은 실패 로그 또는 implementation plan acceptance criterion에 trace되어야 한다.
- production C++ 수정에는 관련 테스트 또는 기존 실패 테스트가 있어야 한다.
+- production Fortran 수정에는 관련 테스트 또는 기존 실패 테스트가 있어야 한다.
 - requirements, formulations, I/O contracts, reference artifacts, tolerance policies는 수정하지 않는다.
 - 실패 로그는 전체 원문을 복제하지 않고 핵심 tail과 원인 요약만 기록한다.
 - 동일 classification이 두 번 반복되면 Coordinator Agent 또는 관련 upstream agent로 handoff한다.
@@ -17,9 +17,9 @@ Formulation Agent는 구현 가능한 FEM 정식화 문서를 작성한다.
 - Numerical Review Agent가 검토할 handoff 항목을 남긴다.

 수행하지 않는다:
- C++ 코드를 구현하지 않는다.
- C++ API나 파일 구조를 설계하지 않는다.
- Abaqus, Nastran 또는 레퍼런스 솔버를 직접 실행하지 않는다.
+- Fortran 코드를 구현하지 않는다.
+- Fortran source layout이나 파일 구조를 설계하지 않는다.
+- Abaqus 해석을 직접 실행하지 않는다.
 - reference CSV 결과를 생성하지 않는다.
 - release readiness를 승인하지 않는다.
 - Numerical Review Agent 검토 전 정식화를 최종 승인하지 않는다.
@@ -108,7 +108,7 @@ Formulation Agent는 구현 가능한 FEM 정식화 문서를 작성한다.
 ## Algorithm Pseudocode
 ```text
 math-level element routine and assembly flow only
-no C++ signatures, class names, or file paths
+no Fortran signatures, source layout, or file paths
 ```

 ## Numerical Risks
@@ -2,7 +2,7 @@

 이 디렉터리는 Implementation Planning Agent가 작성하거나 제안한 기능별 구현계획 문서를 보관하는 위치다.

-Implementation Planning Agent는 승인된 요구조건, 연구 브리프, 정식화, 수치 리뷰, I/O 정의, reference model 계약을 C++/MSVC 구현 전 TDD 작업계획으로 변환한다. Agent는 코드, 테스트, CMake 파일을 작성하지 않고, Abaqus/Nastran을 실행하지 않으며, reference CSV 생성이나 solver 결과 비교, release readiness 승인도 하지 않는다.
+Implementation Planning Agent는 승인된 요구조건, 연구 브리프, 정식화, 수치 리뷰, I/O 정의, reference model 계약을 Fortran Abaqus User Subroutine 구현 전 TDD 작업계획으로 변환한다. Agent는 코드와 테스트를 작성하지 않고, Abaqus 해석을 실행하지 않으며, reference CSV 생성이나 solver 결과 비교, release readiness 승인도 하지 않는다.

 기본 파일명은 `docs/implementation-plans/<feature-id>-implementation-plan.md` 형식을 사용한다. 각 문서는 Implementation Agent가 먼저 작성해야 할 실패 테스트, 최소 구현 순서, CMake/CTest 등록 계획, acceptance traceability를 제공해야 한다.

@@ -18,15 +18,15 @@ Implementation Planning Agent는 승인된 요구조건, 연구 브리프, 정
 - `python scripts/validate_workspace.py`를 포함한 validation command를 명시한다.

 수행하지 않는다:
- C++ 코드를 구현하지 않는다.
+- Fortran 코드를 구현하지 않는다.
 - 테스트 파일을 작성하지 않는다.
 - CMake 파일을 수정하지 않는다.
 - CMake/CTest를 실행하지 않는다.
- Abaqus, Nastran 또는 레퍼런스 솔버를 직접 실행하지 않는다.
+- Abaqus 해석을 직접 실행하지 않는다.
 - reference CSV를 생성하지 않는다.
 - solver 결과를 비교하지 않는다.
 - release readiness를 승인하지 않는다.
- C++ API, class name, storage layout, file ownership을 확정하지 않는다.
+- Fortran source layout, entry point ownership, file ownership을 확정하지 않는다.

 ## 문서 템플릿

@@ -132,7 +132,7 @@ ctest -C Debug -R <feature-or-label>
 ## 품질 기준

 - 모든 `must` requirement는 최소 하나의 task와 test에 연결되어야 한다.
- C++ production 변경마다 선행 테스트 파일 또는 테스트 추가 계획이 있어야 한다.
+- Fortran production 변경마다 선행 테스트 파일 또는 테스트 추가 계획이 있어야 한다.
 - reference artifact가 필요한 기능은 `references/<feature-id>/<model-id>/`와 CSV 비교 테스트 계획을 가져야 한다.
 - CMake/CTest 계획은 MSVC x64 Debug 검증 경로와 호환되어야 한다.
 - 구현 계획은 테스트 작성, 실패 확인, 최소 구현, validation 순서를 명시해야 한다.
@@ -2,7 +2,7 @@

 이 디렉터리는 I/O Definition Agent가 작성하거나 제안한 기능별 입출력 정의 문서를 보관하는 위치다.

-FESA 솔버의 입력 파일은 Abaqus input file이다. 다만 초기 FESA는 Abaqus 전체 문법 호환을 목표로 하지 않고, 기능별로 지원할 Abaqus keyword subset과 내부 모델 매핑을 명확히 정의한다.
+Abaqus User Subroutine 검증 모델의 입력 파일은 Abaqus input file이다. 이 프로젝트는 Abaqus 전체 문법 호환 parser 개발을 목표로 하지 않고, 기능별로 필요한 Abaqus keyword subset과 결과 CSV schema를 명확히 정의한다.

 기본 파일명은 `docs/io-definitions/<feature-id>-io.md` 형식을 사용한다. 각 문서는 Requirement Agent, Formulation Agent, Numerical Review Agent의 산출물을 입력으로 받아 Abaqus `.inp` 입력 계약과 결과 CSV schema를 정의해야 한다.

@@ -19,8 +19,8 @@ I/O Definition Agent는 Abaqus input file subset, 내부 solver model mapping, o

 수행하지 않는다:
 - parser를 구현하지 않는다.
- C++ API나 파일 구조를 설계하지 않는다.
- Abaqus, Nastran 또는 레퍼런스 솔버를 직접 실행하지 않는다.
+- Fortran source layout이나 파일 구조를 설계하지 않는다.
+- Abaqus 해석을 직접 실행하지 않는다.
 - reference CSV 결과를 생성하지 않는다.
 - solver 결과와 reference 결과를 비교하지 않는다.
 - release readiness를 승인하지 않는다.
@@ -44,7 +44,7 @@ I/O Definition Agent는 Abaqus input file subset, 내부 solver model mapping, o
 ## Abaqus Input Scope
 - input_format: Abaqus input file (`.inp`)
 - abaqus_documentation_source: <version/source URL>
- compatibility_disclaimer: FESA supports only the keyword subset defined in this document.
+- compatibility_disclaimer: This feature supports only the keyword subset defined in this document.

 | keyword | support_status | level | required_parameters | mapped_internal_concept | notes |
 | --- | --- | --- | --- | --- | --- |
@@ -176,5 +176,5 @@ I/O Definition Agent는 Abaqus input file subset, 내부 solver model mapping, o
 - Abaqus full compatibility를 주장하지 않고 기능별 supported keyword subset을 명시해야 한다.
 - model data와 history data의 매핑을 구분해야 한다.
 - unsupported keyword 처리 정책을 명확히 해야 한다.
- 내부 모델 계약은 semantic fields로 작성하고 C++ class/function/API를 확정하지 않는다.
+- 내부 모델 계약은 semantic fields로 작성하고 Fortran source layout이나 helper API를 확정하지 않는다.
 - CSV schema는 column name, ID field, component naming, coordinate system, units, step/frame identity, quantity location을 포함해야 한다.
@@ -16,10 +16,10 @@ Numerical Review Agent는 정식화의 수학적 일관성, 수치 안정성 위
 - 구현 계획 전에 필요한 정식화 수정, 연구 보강, reference model 요구사항을 작성한다.

 수행하지 않는다:
- C++ 코드를 구현하지 않는다.
+- Fortran 코드를 구현하지 않는다.
 - 정식화 문서를 직접 수정하지 않는다.
- C++ API나 파일 구조를 설계하지 않는다.
- Abaqus, Nastran 또는 레퍼런스 솔버를 직접 실행하지 않는다.
+- Fortran source layout이나 파일 구조를 설계하지 않는다.
+- Abaqus 해석을 직접 실행하지 않는다.
 - reference CSV 결과를 생성하지 않는다.
 - release readiness를 승인하지 않는다.
 - 레퍼런스 결과와 구현 솔버 결과의 일치 여부를 판정하지 않는다.
@@ -22,7 +22,7 @@ Physics Evaluation Agent는 Reference Verification Agent가 `pass-for-physics-ev
 - CMake files를 수정하지 않는다.
 - requirements, formulations, I/O contracts, reference model contracts를 수정하지 않는다.
 - reference artifacts 또는 tolerance policies를 수정하지 않는다.
- Abaqus, Nastran 또는 reference solver를 실행하지 않는다.
+- Abaqus 해석을 직접 실행하지 않는다.
 - reference CSV를 생성하지 않는다.
 - reference tolerance를 다시 판정하지 않는다.
 - release readiness를 승인하지 않는다.
@@ -2,7 +2,7 @@

 이 디렉터리는 Reference Model Agent가 작성하거나 제안한 기능별 reference model 설계 문서를 보관하는 위치다.

-Reference Model Agent는 FESA 기능 검증에 필요한 Abaqus `.inp` 기반 테스트 모델 포트폴리오와 `references/<feature-id>/<model-id>/` artifact bundle 계약을 정의한다. Agent는 Abaqus, Nastran 또는 레퍼런스 솔버를 직접 실행하지 않고, reference CSV 값을 생성하지 않으며, solver 결과 비교나 release readiness 승인도 하지 않는다.
+Reference Model Agent는 Abaqus User Subroutine 검증에 필요한 Abaqus `.inp` 기반 테스트 모델 포트폴리오와 `references/<feature-id>/<model-id>/` artifact bundle 계약을 정의한다. Agent는 Abaqus 해석을 직접 실행하지 않고, reference CSV 값을 생성하지 않으며, solver 결과 비교나 release readiness 승인도 하지 않는다.

 기본 파일명은 `docs/reference-models/<feature-id>-reference-models.md` 형식을 사용한다. 각 문서는 요구조건, 연구 브리프, 정식화, 수치 리뷰, I/O 정의를 입력으로 받아 구현 전에 준비해야 할 테스트 모델과 reference artifact 요구사항을 정의해야 한다.

@@ -17,10 +17,10 @@ Reference Model Agent는 FESA 기능 검증에 필요한 Abaqus `.inp` 기반
 - requirement와 model, compared quantity, tolerance, artifact status를 연결하는 Coverage Matrix를 작성한다.

 수행하지 않는다:
- C++ 코드를 구현하지 않는다.
+- Fortran 코드를 구현하지 않는다.
 - parser를 구현하지 않는다.
- C++ API나 파일 구조를 설계하지 않는다.
- Abaqus, Nastran 또는 레퍼런스 솔버를 직접 실행하지 않는다.
+- Fortran source layout이나 파일 구조를 설계하지 않는다.
+- Abaqus 해석을 직접 실행하지 않는다.
 - reference CSV를 생성하지 않는다.
 - solver 결과를 비교하지 않는다.
 - release readiness를 승인하지 않는다.
@@ -67,7 +67,7 @@ Reference Model Agent는 FESA 기능 검증에 필요한 Abaqus `.inp` 기반
 - purpose: <what this model proves>
 - verified_requirements: [<requirement-id>]
 - analysis_type: <linear static | nonlinear static | modal | other>
- element_type: <Abaqus element type and FESA feature element>
+- element_type: <Abaqus element type and subroutine feature scope>
 - material: <material model and values>
 - boundary_conditions: <BC summary>
 - loads: <load summary>
@@ -92,6 +92,14 @@ references/
    <model-id>/
      model.inp
      metadata.json
+      job.msg.tail.txt
+      job.dat.tail.txt
+      job.log.tail.txt
+      job.sta.tail.txt
+      result.odb.sha256
+      extraction/
+        extract_odb_to_csv.py
+      extracted/
        displacements.csv
        reactions.csv
        element_forces.csv
@@ -102,15 +110,15 @@ references/
 Required files:
 - `model.inp`: Abaqus input file for the reference model.
 - `metadata.json`: provenance and model contract metadata.
- `displacements.csv`: nodal displacement reference results.
- `reactions.csv`: nodal reaction force reference results.
- `element_forces.csv`: element internal force reference results.
- `stresses.csv`: stress reference results.
+- `.msg/.dat/.log/.sta` tail files: externally generated Abaqus job evidence.
+- `extracted/*.csv`: ODB-extracted CSV reference results declared in `metadata.json`.
 - `README.md`: short description, generation notes, and limitations.

 Optional files:
- `strains.csv`: strain reference results when required.
- `energy_or_residual.csv`: energy, residual, or convergence reference results when required.
+- `result.odb.sha256`: ODB hash evidence when the ODB itself is not stored.
+- `extraction/extract_odb_to_csv.py`: copy of the user-run extraction script when available.
+- `extracted/strains.csv`: strain reference results when required.
+- `extracted/energy_or_residual.csv`: energy, residual, or convergence reference results when required.
 - `notes.md`: manual review notes.

 ## Metadata JSON Contract
@@ -216,8 +224,7 @@ For Fortran Abaqus UserSubroutine work, each stored artifact bundle may include
  "artifact_status": "draft | needs-reference-artifacts | ready-for-comparison | blocked",
  "abaqus": {
    "version": "<Abaqus version>",
-    "precision": "single | double",
-    "command": "abaqus job=<job> user=<subroutine>"
+    "precision": "single | double"
  },
  "compiler": {
    "vendor": "Intel oneAPI",
@@ -239,13 +246,59 @@ For Fortran Abaqus UserSubroutine work, each stored artifact bundle may include
    "tails": {
      "msg": "job.msg.tail.txt",
      "dat": "job.dat.tail.txt",
-      "log": "job.log.tail.txt"
+      "log": "job.log.tail.txt",
+      "sta": "job.sta.tail.txt"
    },
    "csv": {
-      "stresses": "stresses.csv"
+      "stresses": "extracted/stresses.csv"
+    }
+  },
+  "extraction": {
+    "source_odb": "job.odb",
+    "tool": "Abaqus Python",
+    "extracted_at": "<ISO-8601 datetime>",
+    "csv_directory": "extracted",
+    "script": "extraction/extract_odb_to_csv.py",
+    "odb_sha256_file": "result.odb.sha256"
+  },
+  "comparisons": {
+    "stresses": {
+      "reference_csv": "extracted/stresses.csv",
+      "actual_csv": "extracted/stresses.csv",
+      "required_columns": [
+        "step",
+        "frame",
+        "instance",
+        "element_label",
+        "integration_point",
+        "section_point",
+        "output_position",
+        "component",
+        "coordinate_system",
+        "unit",
+        "value"
+      ],
+      "key_columns": [
+        "step",
+        "frame",
+        "instance",
+        "element_label",
+        "integration_point",
+        "section_point",
+        "output_position",
+        "component"
+      ],
+      "value_column": "value",
+      "unit_column": "unit",
+      "coordinate_system_column": "coordinate_system",
+      "tolerance": {
+        "absolute": 1.0e-8,
+        "relative": 1.0e-6,
+        "relative_floor": 1.0e-12
+      }
    }
  }
 }
 ```

-`artifact_status=ready-for-comparison` means `scripts/validate_reference_artifacts.py` must find all declared files and confirm source SHA-256 values. Agents must not generate or edit the declared reference CSVs unless an explicit reference-artifact phase authorizes that work.
+`artifact_status=ready-for-comparison` means `scripts/validate_reference_artifacts.py` must find all declared files, confirm source SHA-256 values, require `.msg/.dat/.log/.sta` tails, require ODB extraction provenance, and require declared CSVs to match `extracted/*.csv`. `comparisons` defines the executable CSV schema, row key, and tolerance contract for `scripts/compare_extracted_csv.py`. Agents must not generate or edit the declared reference CSVs unless an explicit reference-artifact phase authorizes that work.
@@ -14,7 +14,7 @@ Reference Verification Agent는 Build/Test Executor Agent 통과 후 generated s
 - `displacements.csv`, `reactions.csv`, `element_forces.csv`, `stresses.csv`를 기본 비교 대상으로 삼는다.
 - upstream 문서가 요구할 때만 `strains.csv`, `energy_or_residual.csv`를 추가 비교한다.
 - max absolute error, max relative error, RMS error, norm error, worst node/element/component, missing rows, extra rows, pass/fail을 보고한다.
- 실패를 missing-reference-artifact, missing-solver-output, schema-mismatch, id-mismatch, unit-or-coordinate-mismatch, tolerance-failure, nonfinite-result, upstream-contract, environment로 분류한다.
+- 실패를 missing-reference-artifact, missing-generated-output, schema-mismatch, id-mismatch, unit-or-coordinate-mismatch, tolerance-failure, nonfinite-result, upstream-contract, environment로 분류한다.

 수행하지 않는다:
 - source code를 수정하지 않는다.
@@ -22,7 +22,7 @@ Reference Verification Agent는 Build/Test Executor Agent 통과 후 generated s
 - CMake files를 수정하지 않는다.
 - requirements, formulations, I/O contracts, reference model contracts를 수정하지 않는다.
 - reference artifacts 또는 tolerance policies를 수정하지 않는다.
- Abaqus, Nastran 또는 reference solver를 실행하지 않는다.
+- Abaqus 해석을 직접 실행하지 않는다.
 - reference CSV를 생성하지 않는다.
 - solver output CSV를 tolerance에 맞추기 위해 보정하지 않는다.
 - physics validation success 또는 release readiness를 승인하지 않는다.
@@ -63,7 +63,7 @@ ARTIFACT CHECK -> COMPARE -> CLASSIFY -> REPORT
 ## Failure Classification

 - `missing-reference-artifact`: required stored reference file 또는 provenance가 없다.
- `missing-solver-output`: generated solver result CSV 또는 comparison command가 없다.
+- `missing-generated-output`: externally generated actual CSV 또는 comparison command가 없다.
 - `schema-mismatch`: reference CSV와 solver CSV column/schema가 다르다.
 - `id-mismatch`: node id, element id, step/frame, integration point, component matching이 실패했다.
 - `unit-or-coordinate-mismatch`: units 또는 coordinate system이 비교 가능하지 않다.
@@ -120,7 +120,7 @@ ARTIFACT CHECK -> COMPARE -> CLASSIFY -> REPORT
 | stress | <model-id> | stresses.csv | <n> | <n> | <n> | <value> | <value> | <value> | <value or N/A> | <element/ip id> | <component> | pass | fail |

 ## Failure Classification
- classification: missing-reference-artifact | missing-solver-output | schema-mismatch | id-mismatch | unit-or-coordinate-mismatch | tolerance-failure | nonfinite-result | upstream-contract | environment | N/A
+- classification: missing-reference-artifact | missing-generated-output | schema-mismatch | id-mismatch | unit-or-coordinate-mismatch | tolerance-failure | nonfinite-result | upstream-contract | environment | N/A
 - primary_failure: <short summary>
 - evidence: <short relevant excerpt or computed metric>

@@ -166,3 +166,19 @@ ARTIFACT CHECK -> COMPARE -> CLASSIFY -> REPORT
 - NaN 또는 infinite value는 `nonfinite-result`로 분류한다.
 - pass는 reference tolerance 통과만 의미한다.
 - physics validation과 release readiness는 각각 Physics Evaluation Agent와 Release Agent가 판정한다.
+
+## CSV Comparison Command
+
+Run explicit external-result comparison with:
+
+```bash
+python scripts/compare_extracted_csv.py --metadata references/<feature-id>/<model-id>/metadata.json --actual-root external-results/<feature-id>/<model-id>
+```
+
+Optional quantity filtering and JSON report output:
+
+```bash
+python scripts/compare_extracted_csv.py --metadata references/umat/single-element/metadata.json --actual-root external-results/umat/single-element --quantity stresses --report-json build/reference-verification/umat-single-element.json
+```
+
+This command does not run Abaqus and does not parse ODB files. It compares approved `references/.../extracted/*.csv` files with externally generated actual CSV files under `--actual-root` using the `comparisons` block in `metadata.json`.
@@ -2,7 +2,7 @@

 이 디렉터리는 Release Agent가 작성하거나 제안하는 기능별 release readiness report를 보관하는 위치다.

-Release Agent는 Physics Evaluation Agent가 `pass-for-release-agent`로 넘긴 기능에 대해 최종 gate evidence를 감사한다. 이 Agent는 source code, tests, CMake, upstream 계약, reference artifacts, tolerance policies를 수정하지 않는다. 또한 Abaqus/Nastran 실행, reference CSV 생성, 외부 publish/deploy/package/tag/commit 작업을 수행하지 않는다.
+Release Agent는 Physics Evaluation Agent가 `pass-for-release-agent`로 넘긴 기능에 대해 최종 gate evidence를 감사한다. 이 Agent는 source code, tests, upstream 계약, reference artifacts, tolerance policies를 수정하지 않는다. 또한 Abaqus 해석 실행, reference CSV 생성, 외부 publish/deploy/package/tag/commit 작업을 수행하지 않는다.

 기본 문서명은 `docs/releases/<feature-id>-release.md` 형식을 사용한다.

@@ -23,7 +23,7 @@ Release Agent는 Physics Evaluation Agent가 `pass-for-release-agent`로 넘긴
 - CMake files 또는 build configuration을 수정하지 않는다.
 - requirements, formulations, I/O contracts, numerical review reports, reference verification reports, physics evaluation reports를 수정하지 않는다.
 - reference artifacts 또는 tolerance policies를 수정하지 않는다.
- Abaqus, Nastran 또는 reference solver를 실행하지 않는다.
+- Abaqus 해석을 직접 실행하지 않는다.
 - reference CSV를 생성하지 않는다.
 - 실패하거나 누락된 upstream gate를 우회하지 않는다.
 - 사용자 명시 요청 없이 publish, deploy, package, tag, commit, external release를 수행하지 않는다.
@@ -168,4 +168,4 @@ GATE AUDIT -> TRACEABILITY CHECK -> RELEASE DOCUMENTATION -> RELEASE VERDICT
 - 모든 `must` requirement는 acceptance criterion, test/reference evidence, release scope에 trace되어야 한다.
 - known limitations와 deferred/open issue는 Release Notes Draft에 명확히 기록되어야 한다.
 - missing evidence, contradictory upstream reports, unresolved defects, incomplete reference artifacts는 release pass가 아니라 적절한 `needs-*` 상태로 분류한다.
- 이 문서는 FESA 내부 feature release readiness 판정을 위한 것이며, 외부 publish/deploy/package/tag/commit 자동화는 포함하지 않는다.
+- 이 문서는 Abaqus User Subroutine feature의 내부 readiness 판정을 위한 것이며, 외부 publish/deploy/package/tag/commit 자동화는 포함하지 않는다.
@@ -16,10 +16,10 @@ Requirement Agent는 솔버 기능 요청을 검증 가능한 요구조건으로
 - Requirement Verification Matrix를 작성한다.

 수행하지 않는다:
- C++ 코드를 구현하지 않는다.
+- Fortran 코드를 구현하지 않는다.
 - 유한요소 정식화를 확정하지 않는다.
- C++ API나 파일 구조를 설계하지 않는다.
- Abaqus, Nastran 또는 레퍼런스 솔버를 직접 실행하지 않는다.
+- Fortran source layout이나 파일 구조를 설계하지 않는다.
+- Abaqus 해석을 직접 실행하지 않는다.
 - reference CSV 결과를 생성하지 않는다.
 - 기능 완료 여부를 승인하지 않는다.

@@ -87,7 +87,7 @@ Expected location: `references/<feature-id>/`

 | id | statement | category | rationale | source | priority | verification_method | acceptance_criteria | tolerance | downstream_agents | status |
 | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- |
-| FESA-REQ-<FEATURE>-001 | The FESA solver shall ... | functional | ... | user | must | reference-comparison | ... | ... | Reference Model Agent; Implementation Planning Agent | draft |
+| ABAQUS-USUB-REQ-<FEATURE>-001 | The user subroutine shall ... | functional | ... | user | must | reference-comparison | ... | ... | Reference Model Agent; Implementation Planning Agent | draft |

 ## Open Questions
 - <미확정 값 또는 사용자 결정 필요 사항>
@@ -116,4 +116,4 @@ Expected location: `references/<feature-id>/`
 - 모든 수치 요구조건은 단위, 좌표계, tolerance 또는 `TBD with owner`를 가져야 한다.
 - reference 비교가 필요한 요구조건은 필요한 CSV artifact를 명시해야 한다.
 - "빠르게", "정확하게", "Abaqus처럼" 같은 문장은 검증 가능한 기준으로 바꾸거나 open question으로 남겨야 한다.
- 구현 방법, 정식화 세부식, C++ API는 이 문서에서 확정하지 않는다.
+- 구현 방법, 정식화 세부식, Fortran source layout은 이 문서에서 확정하지 않는다.
@@ -11,15 +11,15 @@ Research Agent는 FEM 이론, benchmark, verification reference, solver manual,
 수행한다:
 - 기능 요구조건과 관련된 이론 자료를 조사한다.
 - 요소별 benchmark, patch test, MMS, MES, convergence study 후보를 찾는다.
- Abaqus/Nastran 결과와 비교 가능한 공개 benchmark 또는 문헌 해를 정리한다.
+- Abaqus 결과와 비교 가능한 공개 benchmark 또는 문헌 해를 정리한다.
 - 자료의 신뢰도, 적용 범위, 한계, 상충 여부를 평가한다.
 - downstream agent가 사용할 수 있도록 출처와 근거를 추적 가능하게 남긴다.

 수행하지 않는다:
- C++ 코드를 구현하지 않는다.
+- Fortran 코드를 구현하지 않는다.
 - 유한요소 정식화를 확정하지 않는다.
- C++ API나 파일 구조를 설계하지 않는다.
- Abaqus, Nastran 또는 레퍼런스 솔버를 직접 실행하지 않는다.
+- Fortran source layout이나 파일 구조를 설계하지 않는다.
+- Abaqus 해석을 직접 실행하지 않는다.
 - reference CSV 결과를 생성하지 않는다.
 - 기능 완료 여부를 승인하지 않는다.

@@ -0,0 +1,706 @@
+# CSV Schema/Tolerance Comparison Implementation Plan
+
+> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking.
+
+**Goal:** Add a no-Abaqus CSV comparison script that validates externally generated ODB-extracted actual CSV files against approved reference CSV artifacts by schema, row identity, units/coordinate metadata, and tolerance.
+
+**Architecture:** Keep `scripts/validate_reference_artifacts.py` responsible for artifact completeness only. Add `scripts/compare_extracted_csv.py` as an explicit CLI tool that reads `references/<feature-id>/<model-id>/metadata.json`, validates the reference bundle, loads actual CSVs from a user-provided external result bundle, compares rows using declared schema/tolerance rules, and emits pass/fail plus optional JSON evidence. Do not integrate this into default `scripts/validate_workspace.py` because actual CSVs are generated outside this project and may not exist on every machine.
+
+**Tech Stack:** Python standard library only (`argparse`, `csv`, `json`, `math`, `dataclasses`, `pathlib`, `statistics` or direct RMS math), existing `unittest` test style, existing reference artifact metadata contract.
+
+---
+
+## File Structure
+
+- Create: `scripts/compare_extracted_csv.py`
+  - CLI and importable functions for loading metadata, resolving CSV paths, validating CSV schema, matching rows, computing tolerance metrics, classifying failures, and emitting text/JSON reports.
+- Create: `scripts/test_compare_extracted_csv.py`
+  - TDD coverage for pass, schema mismatch, missing actual output, ID mismatch, unit/coordinate mismatch, nonfinite values, and tolerance failure.
+- Modify: `docs/reference-verifications/README.md`
+  - Document CLI usage, metadata comparison contract, failure classifications, and expected report fields.
+- Modify: `docs/reference-models/README.md`
+  - Add optional `comparisons` metadata block to the artifact bundle example.
+- Do not modify: `scripts/validate_workspace.py`
+  - CSV comparison needs explicit actual output paths, so it stays outside default workspace validation.
+
+## Metadata Contract
+
+Add an optional `comparisons` block to `metadata.json`. The comparison script requires this block for quantities it compares, but `validate_reference_artifacts.py` does not need to require it for all `ready-for-comparison` bundles.
+
+```json
+{
+  "comparisons": {
+    "stresses": {
+      "reference_csv": "extracted/stresses.csv",
+      "actual_csv": "extracted/stresses.csv",
+      "required_columns": [
+        "step",
+        "frame",
+        "instance",
+        "element_label",
+        "integration_point",
+        "section_point",
+        "output_position",
+        "component",
+        "coordinate_system",
+        "unit",
+        "value"
+      ],
+      "key_columns": [
+        "step",
+        "frame",
+        "instance",
+        "element_label",
+        "integration_point",
+        "section_point",
+        "output_position",
+        "component"
+      ],
+      "value_column": "value",
+      "unit_column": "unit",
+      "coordinate_system_column": "coordinate_system",
+      "tolerance": {
+        "absolute": 1.0e-8,
+        "relative": 1.0e-6,
+        "relative_floor": 1.0e-12
+      }
+    }
+  }
+}
+```
+
+Tolerance rule:
+
+```text
+absolute_error = abs(actual - reference)
+relative_error = absolute_error / max(abs(reference), relative_floor)
+allowed_error = absolute + relative * max(abs(reference), relative_floor)
+row_pass = absolute_error <= allowed_error
+quantity_pass = all rows pass and no schema/id/unit/coordinate/nonfinite errors exist
+```
+
+## CLI Contract
+
+Primary command:
+
+```bash
+python scripts/compare_extracted_csv.py --metadata references/<feature-id>/<model-id>/metadata.json --actual-root external-results/<feature-id>/<model-id>
+```
+
+Optional filters and report output:
+
+```bash
+python scripts/compare_extracted_csv.py --metadata references/umat/single-element/metadata.json --actual-root external-results/umat/single-element --quantity stresses --report-json build/reference-verification/umat-single-element.json
+```
+
+Exit codes:
+
+- `0`: every requested quantity passed.
+- `1`: comparison completed and one or more quantities failed.
+- `2`: invalid CLI arguments, invalid metadata, missing files, or unreadable CSV.
+
+## Failure Classification
+
+The script should produce one primary classification per failed quantity:
+
+- `missing-reference-artifact`: declared reference CSV is absent after metadata validation.
+- `missing-generated-output`: actual CSV under `--actual-root` is absent.
+- `schema-mismatch`: required columns are missing, duplicate headers exist, or duplicate key rows exist.
+- `id-mismatch`: missing or extra key rows exist.
+- `unit-or-coordinate-mismatch`: matched rows disagree on unit or coordinate system.
+- `nonfinite-result`: reference or actual `value` is NaN or infinite.
+- `tolerance-failure`: schema, IDs, unit, and coordinate checks pass, but numeric error exceeds tolerance.
+- `upstream-contract`: requested quantity has no `comparisons.<quantity>` contract.
+- `environment`: file cannot be read due to encoding or OS errors.
+
+## Report Contract
+
+Text output should be concise and machine-adjacent:
+
+```text
+PASS stresses rows=8 max_abs_error=1.2e-10 max_rel_error=3.0e-9 rms_error=8.1e-11 worst_key=Step-1|1|PART-1-1|1|1||INTEGRATION_POINT|S11
+```
+
+Failed quantity example:
+
+```text
+FAIL stresses classification=tolerance-failure rows=8 max_abs_error=2.4e-4 max_rel_error=1.2e-2 rms_error=8.5e-5 worst_key=Step-1|1|PART-1-1|1|1||INTEGRATION_POINT|S11
+```
+
+JSON report should contain:
+
+```json
+{
+  "metadata": "references/umat/single-element/metadata.json",
+  "actual_root": "external-results/umat/single-element",
+  "overall_result": "pass",
+  "quantities": [
+    {
+      "quantity": "stresses",
+      "result": "pass",
+      "classification": "N/A",
+      "compared_rows": 8,
+      "missing_rows": 0,
+      "extra_rows": 0,
+      "max_abs_error": 1.2e-10,
+      "max_rel_error": 3.0e-9,
+      "rms_error": 8.1e-11,
+      "worst_key": "Step-1|1|PART-1-1|1|1||INTEGRATION_POINT|S11",
+      "worst_component": "S11"
+    }
+  ]
+}
+```
+
+---
+
+### Task 1: Write Pass-Case Test Fixture
+
+**Files:**
+- Create: `scripts/test_compare_extracted_csv.py`
+
+- [ ] **Step 1: Write dynamic import and fixture helpers**
+
+```python
+import csv
+import importlib.util
+import json
+import tempfile
+import unittest
+from pathlib import Path
+
+
+def load_compare_extracted_csv():
+    module_path = Path(__file__).resolve().parent / "compare_extracted_csv.py"
+    spec = importlib.util.spec_from_file_location("compare_extracted_csv", module_path)
+    module = importlib.util.module_from_spec(spec)
+    spec.loader.exec_module(module)
+    return module
+
+
+def write_json(path: Path, payload: dict):
+    path.parent.mkdir(parents=True, exist_ok=True)
+    path.write_text(json.dumps(payload, indent=2), encoding="utf-8")
+
+
+def write_csv(path: Path, rows: list[dict[str, str]]):
+    path.parent.mkdir(parents=True, exist_ok=True)
+    with path.open("w", newline="", encoding="utf-8") as handle:
+        writer = csv.DictWriter(handle, fieldnames=list(rows[0]))
+        writer.writeheader()
+        writer.writerows(rows)
+
+
+def metadata_payload() -> dict:
+    return {
+        "schema_version": "abaqus-user-subroutine-artifact-v1",
+        "feature_id": "umat",
+        "model_id": "single-element",
+        "artifact_status": "ready-for-comparison",
+        "abaqus": {"version": "2024", "precision": "double"},
+        "compiler": {"vendor": "Intel oneAPI", "name": "ifx", "version": "2024"},
+        "subroutine": {"entry_points": ["UMAT"], "source_files": []},
+        "input_file": "model.inp",
+        "outputs": {
+            "tails": {
+                "msg": "job.msg.tail.txt",
+                "dat": "job.dat.tail.txt",
+                "log": "job.log.tail.txt",
+                "sta": "job.sta.tail.txt"
+            },
+            "csv": {"stresses": "extracted/stresses.csv"}
+        },
+        "extraction": {
+            "source_odb": "job.odb",
+            "tool": "Abaqus Python",
+            "extracted_at": "2026-06-10T00:00:00+09:00",
+            "csv_directory": "extracted"
+        },
+        "comparisons": {
+            "stresses": {
+                "reference_csv": "extracted/stresses.csv",
+                "actual_csv": "extracted/stresses.csv",
+                "required_columns": [
+                    "step", "frame", "instance", "element_label", "integration_point",
+                    "section_point", "output_position", "component",
+                    "coordinate_system", "unit", "value"
+                ],
+                "key_columns": [
+                    "step", "frame", "instance", "element_label", "integration_point",
+                    "section_point", "output_position", "component"
+                ],
+                "value_column": "value",
+                "unit_column": "unit",
+                "coordinate_system_column": "coordinate_system",
+                "tolerance": {"absolute": 1.0e-8, "relative": 1.0e-6, "relative_floor": 1.0e-12}
+            }
+        }
+    }
+
+
+def stress_rows(value: str = "100.0") -> list[dict[str, str]]:
+    return [
+        {
+            "step": "Step-1",
+            "frame": "1",
+            "instance": "PART-1-1",
+            "element_label": "1",
+            "integration_point": "1",
+            "section_point": "",
+            "output_position": "INTEGRATION_POINT",
+            "component": "S11",
+            "coordinate_system": "GLOBAL",
+            "unit": "MPa",
+            "value": value
+        }
+    ]
+```
+
+- [ ] **Step 2: Write passing comparison test**
+
+```python
+class CompareExtractedCsvTests(unittest.TestCase):
+    def test_quantity_passes_when_schema_keys_units_and_values_match_within_tolerance(self):
+        compare = load_compare_extracted_csv()
+        with tempfile.TemporaryDirectory() as tmp:
+            root = Path(tmp)
+            reference = root / "references" / "umat" / "single-element"
+            actual = root / "external-results" / "umat" / "single-element"
+            write_json(reference / "metadata.json", metadata_payload())
+            write_csv(reference / "extracted" / "stresses.csv", stress_rows("100.0"))
+            write_csv(actual / "extracted" / "stresses.csv", stress_rows("100.00000001"))
+
+            report = compare.compare_metadata(reference / "metadata.json", actual, quantities=["stresses"], validate_artifacts=False)
+
+        self.assertEqual(report["overall_result"], "pass")
+        self.assertEqual(report["quantities"][0]["result"], "pass")
+        self.assertEqual(report["quantities"][0]["classification"], "N/A")
+        self.assertEqual(report["quantities"][0]["compared_rows"], 1)
+```
+
+- [ ] **Step 3: Run test to verify RED**
+
+Run:
+
+```bash
+python -m unittest scripts.test_compare_extracted_csv
+```
+
+Expected: FAIL because `scripts/compare_extracted_csv.py` does not exist.
+
+### Task 2: Implement Minimal Pass-Case Comparison
+
+**Files:**
+- Create: `scripts/compare_extracted_csv.py`
+
+- [ ] **Step 1: Add importable API skeleton and minimal comparison**
+
+Implement these functions:
+
+```python
+def compare_metadata(metadata_path: Path, actual_root: Path, *, quantities: list[str] | None = None, validate_artifacts: bool = True) -> dict:
+    ...
+
+def load_csv_rows(path: Path) -> tuple[list[str], list[dict[str, str]]]:
+    ...
+
+def compare_quantity(quantity: str, contract: dict, reference_root: Path, actual_root: Path) -> dict:
+    ...
+```
+
+Minimum behavior for GREEN:
+- Load metadata JSON.
+- Resolve `comparisons.<quantity>.reference_csv` under `metadata_path.parent`.
+- Resolve `comparisons.<quantity>.actual_csv` under `actual_root`.
+- Load both CSV files with `csv.DictReader`.
+- Check required columns are present.
+- Match rows by `key_columns`.
+- Parse `value_column` as finite float.
+- Compute `max_abs_error`, `max_rel_error`, `rms_error`, `worst_key`.
+- Return `overall_result=pass` if no errors exceed tolerance.
+
+- [ ] **Step 2: Run pass-case test**
+
+Run:
+
+```bash
+python -m unittest scripts.test_compare_extracted_csv
+```
+
+Expected: PASS.
+
+### Task 3: Add Schema and Contract Failure Tests
+
+**Files:**
+- Modify: `scripts/test_compare_extracted_csv.py`
+- Modify: `scripts/compare_extracted_csv.py`
+
+- [ ] **Step 1: Add missing actual output test**
+
+```python
+def test_missing_actual_csv_is_missing_generated_output(self):
+    compare = load_compare_extracted_csv()
+    with tempfile.TemporaryDirectory() as tmp:
+        root = Path(tmp)
+        reference = root / "references" / "umat" / "single-element"
+        actual = root / "external-results" / "umat" / "single-element"
+        write_json(reference / "metadata.json", metadata_payload())
+        write_csv(reference / "extracted" / "stresses.csv", stress_rows("100.0"))
+
+        report = compare.compare_metadata(reference / "metadata.json", actual, quantities=["stresses"], validate_artifacts=False)
+
+    self.assertEqual(report["overall_result"], "fail")
+    self.assertEqual(report["quantities"][0]["classification"], "missing-generated-output")
+```
+
+- [ ] **Step 2: Add missing required column test**
+
+```python
+def test_missing_required_column_is_schema_mismatch(self):
+    compare = load_compare_extracted_csv()
+    with tempfile.TemporaryDirectory() as tmp:
+        root = Path(tmp)
+        reference = root / "references" / "umat" / "single-element"
+        actual = root / "external-results" / "umat" / "single-element"
+        write_json(reference / "metadata.json", metadata_payload())
+        row = stress_rows("100.0")[0]
+        write_csv(reference / "extracted" / "stresses.csv", [row])
+        actual_row = dict(row)
+        actual_row.pop("coordinate_system")
+        write_csv(actual / "extracted" / "stresses.csv", [actual_row])
+
+        report = compare.compare_metadata(reference / "metadata.json", actual, quantities=["stresses"], validate_artifacts=False)
+
+    self.assertEqual(report["quantities"][0]["classification"], "schema-mismatch")
+```
+
+- [ ] **Step 3: Add missing comparison contract test**
+
+```python
+def test_missing_quantity_contract_is_upstream_contract(self):
+    compare = load_compare_extracted_csv()
+    with tempfile.TemporaryDirectory() as tmp:
+        root = Path(tmp)
+        reference = root / "references" / "umat" / "single-element"
+        actual = root / "external-results" / "umat" / "single-element"
+        payload = metadata_payload()
+        payload["comparisons"].pop("stresses")
+        write_json(reference / "metadata.json", payload)
+
+        report = compare.compare_metadata(reference / "metadata.json", actual, quantities=["stresses"], validate_artifacts=False)
+
+    self.assertEqual(report["quantities"][0]["classification"], "upstream-contract")
+```
+
+- [ ] **Step 4: Run tests to verify RED**
+
+Run:
+
+```bash
+python -m unittest scripts.test_compare_extracted_csv
+```
+
+Expected: FAIL on the new failure classifications.
+
+- [ ] **Step 5: Implement missing file, schema, and contract classification**
+
+Add helper functions:
+
+```python
+def failed_quantity(quantity: str, classification: str, message: str) -> dict:
+    ...
+
+def validate_columns(headers: list[str], required_columns: list[str]) -> list[str]:
+    ...
+```
+
+Return stable fields even on failure:
+
+```python
+{
+    "quantity": quantity,
+    "result": "fail",
+    "classification": classification,
+    "message": message,
+    "compared_rows": 0,
+    "missing_rows": 0,
+    "extra_rows": 0,
+    "max_abs_error": None,
+    "max_rel_error": None,
+    "rms_error": None,
+    "worst_key": None,
+    "worst_component": None
+}
+```
+
+- [ ] **Step 6: Run tests to verify GREEN**
+
+Run:
+
+```bash
+python -m unittest scripts.test_compare_extracted_csv
+```
+
+Expected: PASS.
+
+### Task 4: Add Row Matching, Unit, Coordinate, Nonfinite, and Tolerance Tests
+
+**Files:**
+- Modify: `scripts/test_compare_extracted_csv.py`
+- Modify: `scripts/compare_extracted_csv.py`
+
+- [ ] **Step 1: Add ID mismatch test**
+
+Change actual `element_label` from `1` to `2`. Expected classification: `id-mismatch`, with `missing_rows=1` and `extra_rows=1`.
+
+- [ ] **Step 2: Add unit mismatch test**
+
+Change actual `unit` from `MPa` to `Pa`. Expected classification: `unit-or-coordinate-mismatch`.
+
+- [ ] **Step 3: Add coordinate mismatch test**
+
+Change actual `coordinate_system` from `GLOBAL` to `LOCAL-1`. Expected classification: `unit-or-coordinate-mismatch`.
+
+- [ ] **Step 4: Add nonfinite test**
+
+Set actual `value` to `nan`. Expected classification: `nonfinite-result`.
+
+- [ ] **Step 5: Add tolerance failure test**
+
+Set actual `value` to `101.0` for reference `100.0`. Expected classification: `tolerance-failure`, `max_abs_error=1.0`, and `result=fail`.
+
+- [ ] **Step 6: Run tests to verify RED**
+
+Run:
+
+```bash
+python -m unittest scripts.test_compare_extracted_csv
+```
+
+Expected: FAIL until these classifications are implemented.
+
+- [ ] **Step 7: Implement row matching and classification precedence**
+
+Use this precedence:
+
+```text
+missing-reference-artifact
+missing-generated-output
+upstream-contract
+schema-mismatch
+id-mismatch
+nonfinite-result
+unit-or-coordinate-mismatch
+tolerance-failure
+N/A
+```
+
+Implement row keys as:
+
+```python
+def make_key(row: dict[str, str], key_columns: list[str]) -> tuple[str, ...]:
+    return tuple(row.get(column, "") for column in key_columns)
+```
+
+Detect duplicate keys in either CSV as `schema-mismatch`.
+
+- [ ] **Step 8: Run tests to verify GREEN**
+
+Run:
+
+```bash
+python -m unittest scripts.test_compare_extracted_csv
+```
+
+Expected: PASS.
+
+### Task 5: Add CLI and JSON Report Tests
+
+**Files:**
+- Modify: `scripts/test_compare_extracted_csv.py`
+- Modify: `scripts/compare_extracted_csv.py`
+
+- [ ] **Step 1: Add CLI pass test using `main(argv)`**
+
+Test:
+
+```python
+exit_code = compare.main([
+    "--metadata", str(reference / "metadata.json"),
+    "--actual-root", str(actual),
+    "--quantity", "stresses",
+    "--report-json", str(report_json)
+])
+self.assertEqual(exit_code, 0)
+self.assertEqual(json.loads(report_json.read_text(encoding="utf-8"))["overall_result"], "pass")
+```
+
+- [ ] **Step 2: Add CLI fail test**
+
+Use a tolerance failure fixture. Expected `main(...) == 1` and JSON `overall_result == "fail"`.
+
+- [ ] **Step 3: Add CLI invalid argument test**
+
+Call without `--metadata` or `--actual-root`. Expected `main(...) == 2`.
+
+- [ ] **Step 4: Run tests to verify RED**
+
+Run:
+
+```bash
+python -m unittest scripts.test_compare_extracted_csv
+```
+
+Expected: FAIL until CLI exists.
+
+- [ ] **Step 5: Implement CLI**
+
+Implement:
+
+```python
+def build_arg_parser() -> argparse.ArgumentParser:
+    ...
+
+def main(argv: list[str] | None = None) -> int:
+    ...
+```
+
+CLI behavior:
+- `--quantity` may be repeated.
+- If no `--quantity` is supplied, compare all keys under `metadata["comparisons"]`.
+- `--report-json` creates parent directories and writes UTF-8 JSON.
+- Print one summary line per quantity.
+- Return `0`, `1`, or `2` according to the CLI contract.
+
+- [ ] **Step 6: Run tests to verify GREEN**
+
+Run:
+
+```bash
+python -m unittest scripts.test_compare_extracted_csv
+```
+
+Expected: PASS.
+
+### Task 6: Optional Artifact Validator Integration
+
+**Files:**
+- Modify: `scripts/compare_extracted_csv.py`
+- Modify: `scripts/test_compare_extracted_csv.py`
+
+- [ ] **Step 1: Add test that default comparison calls metadata validation**
+
+Use a metadata file missing required ready-for-comparison fields and call `compare_metadata(..., validate_artifacts=True)`. Expected classification: `missing-reference-artifact` or exit code `2` with validation errors.
+
+- [ ] **Step 2: Implement reuse of `validate_reference_artifacts.validate_metadata`**
+
+Import safely:
+
+```python
+try:
+    from validate_reference_artifacts import validate_metadata
+except ImportError:
+    from scripts.validate_reference_artifacts import validate_metadata
+```
+
+Run validation before comparison when `validate_artifacts=True`.
+
+- [ ] **Step 3: Keep tests able to bypass validation**
+
+Continue supporting `validate_artifacts=False` in unit tests that only exercise comparison logic.
+
+### Task 7: Documentation Updates
+
+**Files:**
+- Modify: `docs/reference-verifications/README.md`
+- Modify: `docs/reference-models/README.md`
+
+- [ ] **Step 1: Update reference verification README**
+
+Add a section:
+
+````markdown
+## CSV Comparison Command
+
+Run explicit external-result comparison with:
+
+```bash
+python scripts/compare_extracted_csv.py --metadata references/<feature-id>/<model-id>/metadata.json --actual-root external-results/<feature-id>/<model-id>
+```
+
+The command does not run Abaqus and does not parse ODB files. It compares approved `references/.../extracted/*.csv` files with externally generated actual CSV files under `--actual-root`.
+````
+
+- [ ] **Step 2: Update reference model README metadata example**
+
+Add the `comparisons` JSON block shown in this plan.
+
+- [ ] **Step 3: Run documentation-sensitive search**
+
+Run:
+
+```bash
+rg -n "compare_extracted_csv|comparisons|extracted/.*\\.csv" docs scripts
+```
+
+Expected: The new script, tests, and docs mention the comparison contract.
+
+### Task 8: Full Verification
+
+**Files:**
+- No new edits unless failures reveal a bug in this task's changes.
+
+- [ ] **Step 1: Run targeted tests**
+
+```bash
+python -m unittest scripts.test_compare_extracted_csv
+```
+
+Expected: PASS.
+
+- [ ] **Step 2: Run full script tests**
+
+```bash
+python -m unittest discover -s scripts -p "test_*.py"
+```
+
+Expected: PASS.
+
+- [ ] **Step 3: Run reference artifact validation**
+
+```bash
+python scripts/validate_reference_artifacts.py
+```
+
+Expected: `Reference artifact metadata validation succeeded.`
+
+- [ ] **Step 4: Run Fortran validation**
+
+```bash
+python scripts/validate_fortran.py
+```
+
+Expected: PASS, or `No Fortran validation commands configured.` when no manifest exists.
+
+- [ ] **Step 5: Run workspace validation**
+
+```bash
+python scripts/validate_workspace.py
+```
+
+Expected: PASS. It should not require actual CSV outputs because `compare_extracted_csv.py` is explicit-use only.
+
+## Acceptance Criteria
+
+- The project never runs Abaqus and never opens ODB files during CSV comparison.
+- Reference bundle completeness remains checked by `scripts/validate_reference_artifacts.py`.
+- CSV numeric validation is performed by explicit command only.
+- Actual generated CSVs are read from a user-supplied `--actual-root`.
+- Comparison requires declared schema, key columns, value column, unit/coordinate columns, and absolute/relative tolerance.
+- Missing files, schema mismatch, ID mismatch, unit/coordinate mismatch, nonfinite results, and tolerance failures have distinct classifications.
+- JSON report includes enough metrics for Reference Verification Agent handoff: compared rows, missing rows, extra rows, max absolute error, max relative error, RMS error, worst key, and pass/fail.
+
+## Open Decisions
+
+- Whether actual result bundles should live under a conventional ignored path such as `external-results/` or `runs/`. The script should accept any `--actual-root`, so this can remain a documentation convention.
+- Whether comparison metadata should later move from `metadata.json` into feature-specific I/O definition documents. For the first implementation, keep executable comparison rules in `metadata.json` so the script has one deterministic contract source.
@@ -0,0 +1,327 @@
+#!/usr/bin/env python3
+"""Compare externally generated Abaqus ODB-extracted CSV results."""
+
+from __future__ import annotations
+
+import argparse
+import csv
+import json
+import math
+import sys
+from pathlib import Path
+
+try:
+    from validate_reference_artifacts import validate_metadata
+except ImportError:
+    from scripts.validate_reference_artifacts import validate_metadata
+
+
+def load_csv_rows(path: Path) -> tuple[list[str], list[dict[str, str]]]:
+    with path.open(newline="", encoding="utf-8") as handle:
+        reader = csv.DictReader(handle)
+        return list(reader.fieldnames or []), list(reader)
+
+
+def make_key(row: dict[str, str], key_columns: list[str]) -> tuple[str, ...]:
+    return tuple(row.get(column, "") for column in key_columns)
+
+
+def _key_text(key: tuple[str, ...]) -> str:
+    return "|".join(key)
+
+
+def _parse_finite(value: str) -> float:
+    parsed = float(value)
+    if not math.isfinite(parsed):
+        raise ValueError(f"nonfinite value: {value}")
+    return parsed
+
+
+def failed_quantity(quantity: str, classification: str, message: str) -> dict:
+    return {
+        "quantity": quantity,
+        "result": "fail",
+        "classification": classification,
+        "message": message,
+        "compared_rows": 0,
+        "missing_rows": 0,
+        "extra_rows": 0,
+        "max_abs_error": None,
+        "max_rel_error": None,
+        "rms_error": None,
+        "worst_key": None,
+        "worst_component": None,
+    }
+
+
+def validate_columns(headers: list[str], required_columns: list[str]) -> list[str]:
+    return [column for column in required_columns if column not in headers]
+
+
+def duplicate_columns(headers: list[str]) -> list[str]:
+    seen: set[str] = set()
+    duplicates: list[str] = []
+    for header in headers:
+        if header in seen and header not in duplicates:
+            duplicates.append(header)
+        seen.add(header)
+    return duplicates
+
+
+def _rows_by_key(rows: list[dict[str, str]], key_columns: list[str]) -> tuple[dict[tuple[str, ...], dict[str, str]], set[tuple[str, ...]]]:
+    keyed: dict[tuple[str, ...], dict[str, str]] = {}
+    duplicates: set[tuple[str, ...]] = set()
+    for row in rows:
+        key = make_key(row, key_columns)
+        if key in keyed:
+            duplicates.add(key)
+        keyed[key] = row
+    return keyed, duplicates
+
+
+def validate_contract(contract: dict) -> list[str]:
+    required_keys = [
+        "reference_csv",
+        "actual_csv",
+        "required_columns",
+        "key_columns",
+        "value_column",
+        "tolerance",
+    ]
+    missing = [key for key in required_keys if key not in contract]
+    if not isinstance(contract.get("tolerance", {}), dict):
+        missing.append("tolerance")
+    return sorted(set(missing))
+
+
+def compare_quantity(quantity: str, contract: dict, reference_root: Path, actual_root: Path) -> dict:
+    contract_errors = validate_contract(contract)
+    if contract_errors:
+        return failed_quantity(
+            quantity,
+            "upstream-contract",
+            f"missing comparison contract keys: {', '.join(contract_errors)}",
+        )
+
+    reference_csv = reference_root / contract["reference_csv"]
+    actual_csv = actual_root / contract["actual_csv"]
+    if not reference_csv.exists():
+        return failed_quantity(quantity, "missing-reference-artifact", f"missing reference CSV: {reference_csv}")
+    if not actual_csv.exists():
+        return failed_quantity(quantity, "missing-generated-output", f"missing actual CSV: {actual_csv}")
+
+    reference_headers, reference_rows = load_csv_rows(reference_csv)
+    actual_headers, actual_rows = load_csv_rows(actual_csv)
+
+    required_columns = list(contract["required_columns"])
+    key_columns = list(contract["key_columns"])
+    value_column = contract["value_column"]
+    repeated_columns = duplicate_columns(reference_headers) + duplicate_columns(actual_headers)
+    if repeated_columns:
+        return failed_quantity(
+            quantity,
+            "schema-mismatch",
+            f"duplicate CSV header columns: {', '.join(sorted(set(repeated_columns)))}",
+        )
+    reference_missing_columns = validate_columns(reference_headers, required_columns)
+    actual_missing_columns = validate_columns(actual_headers, required_columns)
+    if reference_missing_columns or actual_missing_columns:
+        missing = sorted(set(reference_missing_columns + actual_missing_columns))
+        return failed_quantity(quantity, "schema-mismatch", f"missing required columns: {', '.join(missing)}")
+
+    reference_by_key, reference_duplicates = _rows_by_key(reference_rows, key_columns)
+    actual_by_key, actual_duplicates = _rows_by_key(actual_rows, key_columns)
+    duplicate_keys = reference_duplicates | actual_duplicates
+    if duplicate_keys:
+        return failed_quantity(quantity, "schema-mismatch", f"duplicate key rows: {_key_text(sorted(duplicate_keys)[0])}")
+
+    reference_keys = set(reference_by_key)
+    actual_keys = set(actual_by_key)
+    missing_keys = sorted(reference_keys - actual_keys)
+    extra_keys = sorted(actual_keys - reference_keys)
+    if missing_keys or extra_keys:
+        result = failed_quantity(quantity, "id-mismatch", "reference and actual row keys do not match")
+        result["missing_rows"] = len(missing_keys)
+        result["extra_rows"] = len(extra_keys)
+        result["worst_key"] = _key_text((missing_keys or extra_keys)[0])
+        return result
+
+    tolerance = contract.get("tolerance", {})
+    absolute = float(tolerance.get("absolute", 0.0))
+    relative = float(tolerance.get("relative", 0.0))
+    relative_floor = float(tolerance.get("relative_floor", 0.0))
+    unit_column = contract.get("unit_column")
+    coordinate_system_column = contract.get("coordinate_system_column")
+
+    compared_rows = 0
+    max_abs_error = 0.0
+    max_rel_error = 0.0
+    sum_square_error = 0.0
+    worst_key: tuple[str, ...] | None = None
+    tolerance_failed = False
+
+    for key in sorted(reference_keys):
+        reference_row = reference_by_key[key]
+        actual_row = actual_by_key[key]
+        try:
+            reference_value = _parse_finite(reference_row[value_column])
+            actual_value = _parse_finite(actual_row[value_column])
+        except (KeyError, ValueError) as exc:
+            return failed_quantity(quantity, "nonfinite-result", str(exc))
+        if unit_column and reference_row[unit_column] != actual_row[unit_column]:
+            result = failed_quantity(quantity, "unit-or-coordinate-mismatch", f"unit mismatch at {_key_text(key)}")
+            result["worst_key"] = _key_text(key)
+            return result
+        if coordinate_system_column and reference_row[coordinate_system_column] != actual_row[coordinate_system_column]:
+            result = failed_quantity(
+                quantity,
+                "unit-or-coordinate-mismatch",
+                f"coordinate system mismatch at {_key_text(key)}",
+            )
+            result["worst_key"] = _key_text(key)
+            return result
+        abs_error = abs(actual_value - reference_value)
+        rel_denominator = max(abs(reference_value), relative_floor)
+        rel_error = abs_error / rel_denominator if rel_denominator else 0.0
+        allowed_error = absolute + relative * rel_denominator
+        compared_rows += 1
+        sum_square_error += abs_error * abs_error
+        max_rel_error = max(max_rel_error, rel_error)
+        if worst_key is None or abs_error > max_abs_error:
+            max_abs_error = abs_error
+            worst_key = key
+        if abs_error > allowed_error:
+            tolerance_failed = True
+
+    rms_error = math.sqrt(sum_square_error / compared_rows) if compared_rows else 0.0
+    worst_key_text = _key_text(worst_key) if worst_key is not None else None
+    worst_component = worst_key[-1] if worst_key else None
+    result = "fail" if tolerance_failed else "pass"
+    classification = "tolerance-failure" if tolerance_failed else "N/A"
+    return {
+        "quantity": quantity,
+        "result": result,
+        "classification": classification,
+        "message": "",
+        "compared_rows": compared_rows,
+        "missing_rows": 0,
+        "extra_rows": 0,
+        "max_abs_error": max_abs_error,
+        "max_rel_error": max_rel_error,
+        "rms_error": rms_error,
+        "worst_key": worst_key_text,
+        "worst_component": worst_component,
+    }
+
+
+def compare_metadata(
+    metadata_path: Path,
+    actual_root: Path,
+    *,
+    quantities: list[str] | None = None,
+    validate_artifacts: bool = True,
+) -> dict:
+    payload = json.loads(metadata_path.read_text(encoding="utf-8"))
+    comparisons = payload.get("comparisons", {})
+    if not isinstance(comparisons, dict) or (not comparisons and quantities is None):
+        quantity_names = quantities if quantities is not None else ["metadata"]
+        results = [
+            failed_quantity(quantity, "upstream-contract", "missing comparison contracts")
+            for quantity in quantity_names
+        ]
+        return {
+            "metadata": str(metadata_path),
+            "actual_root": str(actual_root),
+            "overall_result": "fail",
+            "quantities": results,
+        }
+    selected_quantities = quantities if quantities is not None else sorted(comparisons)
+    if validate_artifacts:
+        validation_errors = validate_metadata(metadata_path, _project_root_from_metadata(metadata_path))
+        if validation_errors:
+            message = "; ".join(validation_errors)
+            results = [
+                failed_quantity(quantity, "missing-reference-artifact", message)
+                for quantity in (selected_quantities or ["metadata"])
+            ]
+            return {
+                "metadata": str(metadata_path),
+                "actual_root": str(actual_root),
+                "overall_result": "fail",
+                "quantities": results,
+            }
+
+    results = []
+    for quantity in selected_quantities:
+        contract = comparisons.get(quantity)
+        if contract is None:
+            results.append(failed_quantity(quantity, "upstream-contract", f"missing comparison contract: {quantity}"))
+            continue
+        results.append(compare_quantity(quantity, contract, metadata_path.parent, actual_root))
+    overall = "pass" if all(result["result"] == "pass" for result in results) else "fail"
+    return {
+        "metadata": str(metadata_path),
+        "actual_root": str(actual_root),
+        "overall_result": overall,
+        "quantities": results,
+    }
+
+
+class _ArgumentParser(argparse.ArgumentParser):
+    def error(self, message: str) -> None:
+        raise ValueError(message)
+
+
+def build_arg_parser() -> argparse.ArgumentParser:
+    parser = _ArgumentParser(description="Compare externally generated ODB-extracted CSV outputs.")
+    parser.add_argument("--metadata", required=True, type=Path, help="Reference metadata.json path.")
+    parser.add_argument("--actual-root", required=True, type=Path, help="Root directory containing actual extracted CSVs.")
+    parser.add_argument("--quantity", action="append", default=None, help="Quantity key to compare. May be repeated.")
+    parser.add_argument("--report-json", type=Path, default=None, help="Optional JSON report output path.")
+    return parser
+
+
+def _format_summary(result: dict) -> str:
+    status = result["result"].upper()
+    parts = [
+        f"{status} {result['quantity']}",
+        f"rows={result['compared_rows']}",
+        f"max_abs_error={result['max_abs_error']}",
+        f"max_rel_error={result['max_rel_error']}",
+        f"rms_error={result['rms_error']}",
+        f"worst_key={result['worst_key']}",
+    ]
+    if result["classification"] != "N/A":
+        parts.insert(2, f"classification={result['classification']}")
+    return " ".join(parts)
+
+
+def _project_root_from_metadata(metadata_path: Path) -> Path:
+    resolved = metadata_path.resolve()
+    for parent in resolved.parents:
+        if parent.name == "references":
+            return parent.parent
+    return resolved.parent
+
+
+def main(argv: list[str] | None = None) -> int:
+    parser = build_arg_parser()
+    try:
+        args = parser.parse_args(argv)
+        report = compare_metadata(args.metadata, args.actual_root, quantities=args.quantity)
+    except (OSError, ValueError, json.JSONDecodeError) as exc:
+        print(f"CSV comparison configuration failed: {exc}", file=sys.stderr)
+        return 2
+
+    if args.report_json is not None:
+        args.report_json.parent.mkdir(parents=True, exist_ok=True)
+        args.report_json.write_text(json.dumps(report, indent=2), encoding="utf-8")
+
+    for result in report["quantities"]:
+        print(_format_summary(result))
+
+    return 0 if report["overall_result"] == "pass" else 1
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
@@ -77,7 +77,7 @@ SKILLS = {
    ),
    "abaqus-subroutine-validation": (
        "Subroutine validation",
-        "HARNESS_ABAQUS_VALIDATION=run",
+        "externally generated",
        "ready-for-comparison",
        "source hash",
        "msg/dat/log",
@@ -174,7 +174,7 @@ AGENT_REQUIRED_TERMS = {
    "implementation-agent.toml": ("Fortran source", "Intel oneAPI", "RED -> GREEN -> VERIFY"),
    "build-test-executor-agent.toml": (
        "python scripts/validate_fortran.py",
-        "HARNESS_ABAQUS_VALIDATION=run",
+        "externally generated",
    ),
    "correction-agent.toml": ("Fortran compile", "minimal correction"),
    "reference-verification-agent.toml": (
@@ -263,14 +263,21 @@ class AbaqusSubroutineCodexConfigTests(unittest.TestCase):
        checked_paths += [SKILLS_ROOT / name / "SKILL.md" for name in SKILLS]
        checked_paths += list((SKILLS_ROOT / "harness-workflow").glob("SKILL.md"))
        checked_paths += list((SKILLS_ROOT / "harness-review").glob("SKILL.md"))
+        checked_paths += list((ROOT / "docs").glob("**/README.md"))

        forbidden_terms = (
+            "FESA",
            "FESA solver",
            "FESA FEM",
            "FESA C++",
+            "Nastran",
            "C++17/MSVC",
            "C++/MSVC",
-            "CMake/CTest",
+            "HARNESS_ABAQUS_VALIDATION=run",
+            "opt-in Abaqus",
+            "Abaqus opt-in",
+            "Abaqus execution is valid",
+            "If explicitly configured, run",
        )
        for path in checked_paths:
            with self.subTest(path=path):
@@ -287,7 +294,7 @@ class AbaqusSubroutineCodexConfigTests(unittest.TestCase):
                    "Fortran",
                    "python scripts/validate_fortran.py",
                    "python scripts/validate_reference_artifacts.py",
-                    "HARNESS_ABAQUS_VALIDATION=run",
+                    "externally generated",
                ):
                    self.assertIn(term, text)

@@ -0,0 +1,434 @@
+import csv
+import contextlib
+import hashlib
+import importlib.util
+import io
+import json
+import tempfile
+import unittest
+from pathlib import Path
+
+
+def load_compare_extracted_csv():
+    module_path = Path(__file__).resolve().parent / "compare_extracted_csv.py"
+    spec = importlib.util.spec_from_file_location("compare_extracted_csv", module_path)
+    module = importlib.util.module_from_spec(spec)
+    spec.loader.exec_module(module)
+    return module
+
+
+def write_json(path: Path, payload: dict):
+    path.parent.mkdir(parents=True, exist_ok=True)
+    path.write_text(json.dumps(payload, indent=2), encoding="utf-8")
+
+
+def write_csv(path: Path, rows: list[dict[str, str]]):
+    path.parent.mkdir(parents=True, exist_ok=True)
+    with path.open("w", newline="", encoding="utf-8") as handle:
+        writer = csv.DictWriter(handle, fieldnames=list(rows[0]))
+        writer.writeheader()
+        writer.writerows(rows)
+
+
+def write_text(path: Path, text: str):
+    path.parent.mkdir(parents=True, exist_ok=True)
+    path.write_text(text, encoding="utf-8")
+
+
+def metadata_payload() -> dict:
+    return {
+        "schema_version": "abaqus-user-subroutine-artifact-v1",
+        "feature_id": "umat",
+        "model_id": "single-element",
+        "artifact_status": "ready-for-comparison",
+        "abaqus": {"version": "2024", "precision": "double"},
+        "compiler": {"vendor": "Intel oneAPI", "name": "ifx", "version": "2024"},
+        "subroutine": {"entry_points": ["UMAT"], "source_files": []},
+        "input_file": "model.inp",
+        "outputs": {
+            "tails": {
+                "msg": "job.msg.tail.txt",
+                "dat": "job.dat.tail.txt",
+                "log": "job.log.tail.txt",
+                "sta": "job.sta.tail.txt",
+            },
+            "csv": {"stresses": "extracted/stresses.csv"},
+        },
+        "extraction": {
+            "source_odb": "job.odb",
+            "tool": "Abaqus Python",
+            "extracted_at": "2026-06-10T00:00:00+09:00",
+            "csv_directory": "extracted",
+        },
+        "comparisons": {
+            "stresses": {
+                "reference_csv": "extracted/stresses.csv",
+                "actual_csv": "extracted/stresses.csv",
+                "required_columns": [
+                    "step",
+                    "frame",
+                    "instance",
+                    "element_label",
+                    "integration_point",
+                    "section_point",
+                    "output_position",
+                    "component",
+                    "coordinate_system",
+                    "unit",
+                    "value",
+                ],
+                "key_columns": [
+                    "step",
+                    "frame",
+                    "instance",
+                    "element_label",
+                    "integration_point",
+                    "section_point",
+                    "output_position",
+                    "component",
+                ],
+                "value_column": "value",
+                "unit_column": "unit",
+                "coordinate_system_column": "coordinate_system",
+                "tolerance": {"absolute": 1.0e-8, "relative": 1.0e-6, "relative_floor": 1.0e-12},
+            }
+        },
+    }
+
+
+def stress_rows(value: str = "100.0") -> list[dict[str, str]]:
+    return [
+        {
+            "step": "Step-1",
+            "frame": "1",
+            "instance": "PART-1-1",
+            "element_label": "1",
+            "integration_point": "1",
+            "section_point": "",
+            "output_position": "INTEGRATION_POINT",
+            "component": "S11",
+            "coordinate_system": "GLOBAL",
+            "unit": "MPa",
+            "value": value,
+        }
+    ]
+
+
+def make_metadata_valid_for_artifact_validation(root: Path, model_dir: Path, payload: dict) -> dict:
+    source = root / "src" / "fortran" / "abaqus" / "UMAT.for"
+    source.parent.mkdir(parents=True, exist_ok=True)
+    source.write_text("      subroutine umat()\n      end\n", encoding="utf-8")
+    source_hash = hashlib.sha256(source.read_bytes()).hexdigest()
+    payload["subroutine"]["source_files"] = [
+        {
+            "path": "src/fortran/abaqus/UMAT.for",
+            "language": "Fortran",
+            "sha256": source_hash,
+        }
+    ]
+    for name in [
+        "model.inp",
+        "job.msg.tail.txt",
+        "job.dat.tail.txt",
+        "job.log.tail.txt",
+        "job.sta.tail.txt",
+    ]:
+        path = model_dir / name
+        path.parent.mkdir(parents=True, exist_ok=True)
+        path.write_text("ok\n", encoding="utf-8")
+    return payload
+
+
+def compare_stresses(reference_rows: list[dict[str, str]], actual_rows: list[dict[str, str]]) -> dict:
+    compare = load_compare_extracted_csv()
+    with tempfile.TemporaryDirectory() as tmp:
+        root = Path(tmp)
+        reference = root / "references" / "umat" / "single-element"
+        actual = root / "external-results" / "umat" / "single-element"
+        write_json(reference / "metadata.json", metadata_payload())
+        write_csv(reference / "extracted" / "stresses.csv", reference_rows)
+        write_csv(actual / "extracted" / "stresses.csv", actual_rows)
+
+        return compare.compare_metadata(
+            reference / "metadata.json",
+            actual,
+            quantities=["stresses"],
+            validate_artifacts=False,
+        )
+
+
+def call_main_silently(compare, argv: list[str]) -> int:
+    with contextlib.redirect_stdout(io.StringIO()), contextlib.redirect_stderr(io.StringIO()):
+        return compare.main(argv)
+
+
+class CompareExtractedCsvTests(unittest.TestCase):
+    def test_quantity_passes_when_schema_keys_units_and_values_match_within_tolerance(self):
+        compare = load_compare_extracted_csv()
+        with tempfile.TemporaryDirectory() as tmp:
+            root = Path(tmp)
+            reference = root / "references" / "umat" / "single-element"
+            actual = root / "external-results" / "umat" / "single-element"
+            write_json(reference / "metadata.json", metadata_payload())
+            write_csv(reference / "extracted" / "stresses.csv", stress_rows("100.0"))
+            write_csv(actual / "extracted" / "stresses.csv", stress_rows("100.00000001"))
+
+            report = compare.compare_metadata(
+                reference / "metadata.json",
+                actual,
+                quantities=["stresses"],
+                validate_artifacts=False,
+            )
+
+        self.assertEqual(report["overall_result"], "pass")
+        self.assertEqual(report["quantities"][0]["result"], "pass")
+        self.assertEqual(report["quantities"][0]["classification"], "N/A")
+        self.assertEqual(report["quantities"][0]["compared_rows"], 1)
+
+    def test_missing_actual_csv_is_missing_generated_output(self):
+        compare = load_compare_extracted_csv()
+        with tempfile.TemporaryDirectory() as tmp:
+            root = Path(tmp)
+            reference = root / "references" / "umat" / "single-element"
+            actual = root / "external-results" / "umat" / "single-element"
+            write_json(reference / "metadata.json", metadata_payload())
+            write_csv(reference / "extracted" / "stresses.csv", stress_rows("100.0"))
+
+            report = compare.compare_metadata(
+                reference / "metadata.json",
+                actual,
+                quantities=["stresses"],
+                validate_artifacts=False,
+            )
+
+        self.assertEqual(report["overall_result"], "fail")
+        self.assertEqual(report["quantities"][0]["classification"], "missing-generated-output")
+
+    def test_missing_required_column_is_schema_mismatch(self):
+        compare = load_compare_extracted_csv()
+        with tempfile.TemporaryDirectory() as tmp:
+            root = Path(tmp)
+            reference = root / "references" / "umat" / "single-element"
+            actual = root / "external-results" / "umat" / "single-element"
+            write_json(reference / "metadata.json", metadata_payload())
+            row = stress_rows("100.0")[0]
+            write_csv(reference / "extracted" / "stresses.csv", [row])
+            actual_row = dict(row)
+            actual_row.pop("coordinate_system")
+            write_csv(actual / "extracted" / "stresses.csv", [actual_row])
+
+            report = compare.compare_metadata(
+                reference / "metadata.json",
+                actual,
+                quantities=["stresses"],
+                validate_artifacts=False,
+            )
+
+        self.assertEqual(report["overall_result"], "fail")
+        self.assertEqual(report["quantities"][0]["classification"], "schema-mismatch")
+
+    def test_missing_quantity_contract_is_upstream_contract(self):
+        compare = load_compare_extracted_csv()
+        with tempfile.TemporaryDirectory() as tmp:
+            root = Path(tmp)
+            reference = root / "references" / "umat" / "single-element"
+            actual = root / "external-results" / "umat" / "single-element"
+            payload = metadata_payload()
+            payload["comparisons"].pop("stresses")
+            write_json(reference / "metadata.json", payload)
+
+            report = compare.compare_metadata(
+                reference / "metadata.json",
+                actual,
+                quantities=["stresses"],
+                validate_artifacts=False,
+            )
+
+        self.assertEqual(report["overall_result"], "fail")
+        self.assertEqual(report["quantities"][0]["classification"], "upstream-contract")
+
+    def test_missing_comparisons_block_is_upstream_contract(self):
+        compare = load_compare_extracted_csv()
+        with tempfile.TemporaryDirectory() as tmp:
+            root = Path(tmp)
+            reference = root / "references" / "umat" / "single-element"
+            actual = root / "external-results" / "umat" / "single-element"
+            payload = metadata_payload()
+            payload.pop("comparisons")
+            write_json(reference / "metadata.json", payload)
+
+            report = compare.compare_metadata(reference / "metadata.json", actual, validate_artifacts=False)
+
+        self.assertEqual(report["overall_result"], "fail")
+        self.assertEqual(report["quantities"][0]["classification"], "upstream-contract")
+
+    def test_incomplete_quantity_contract_is_upstream_contract(self):
+        compare = load_compare_extracted_csv()
+        with tempfile.TemporaryDirectory() as tmp:
+            root = Path(tmp)
+            reference = root / "references" / "umat" / "single-element"
+            actual = root / "external-results" / "umat" / "single-element"
+            payload = metadata_payload()
+            payload["comparisons"]["stresses"].pop("value_column")
+            write_json(reference / "metadata.json", payload)
+
+            report = compare.compare_metadata(
+                reference / "metadata.json",
+                actual,
+                quantities=["stresses"],
+                validate_artifacts=False,
+            )
+
+        self.assertEqual(report["overall_result"], "fail")
+        self.assertEqual(report["quantities"][0]["classification"], "upstream-contract")
+
+    def test_duplicate_header_is_schema_mismatch(self):
+        compare = load_compare_extracted_csv()
+        with tempfile.TemporaryDirectory() as tmp:
+            root = Path(tmp)
+            reference = root / "references" / "umat" / "single-element"
+            actual = root / "external-results" / "umat" / "single-element"
+            write_json(reference / "metadata.json", metadata_payload())
+            write_csv(reference / "extracted" / "stresses.csv", stress_rows("100.0"))
+            header = ",".join(list(stress_rows("100.0")[0]) + ["value"])
+            row = ",".join(stress_rows("100.0")[0].values()) + ",100.0"
+            write_text(actual / "extracted" / "stresses.csv", f"{header}\n{row}\n")
+
+            report = compare.compare_metadata(
+                reference / "metadata.json",
+                actual,
+                quantities=["stresses"],
+                validate_artifacts=False,
+            )
+
+        self.assertEqual(report["overall_result"], "fail")
+        self.assertEqual(report["quantities"][0]["classification"], "schema-mismatch")
+
+    def test_changed_row_key_is_id_mismatch(self):
+        actual_row = dict(stress_rows("100.0")[0])
+        actual_row["element_label"] = "2"
+
+        report = compare_stresses(stress_rows("100.0"), [actual_row])
+
+        self.assertEqual(report["overall_result"], "fail")
+        self.assertEqual(report["quantities"][0]["classification"], "id-mismatch")
+        self.assertEqual(report["quantities"][0]["missing_rows"], 1)
+        self.assertEqual(report["quantities"][0]["extra_rows"], 1)
+
+    def test_unit_mismatch_is_unit_or_coordinate_mismatch(self):
+        actual_row = dict(stress_rows("100.0")[0])
+        actual_row["unit"] = "Pa"
+
+        report = compare_stresses(stress_rows("100.0"), [actual_row])
+
+        self.assertEqual(report["overall_result"], "fail")
+        self.assertEqual(report["quantities"][0]["classification"], "unit-or-coordinate-mismatch")
+
+    def test_coordinate_system_mismatch_is_unit_or_coordinate_mismatch(self):
+        actual_row = dict(stress_rows("100.0")[0])
+        actual_row["coordinate_system"] = "LOCAL-1"
+
+        report = compare_stresses(stress_rows("100.0"), [actual_row])
+
+        self.assertEqual(report["overall_result"], "fail")
+        self.assertEqual(report["quantities"][0]["classification"], "unit-or-coordinate-mismatch")
+
+    def test_nonfinite_value_is_nonfinite_result(self):
+        report = compare_stresses(stress_rows("100.0"), stress_rows("nan"))
+
+        self.assertEqual(report["overall_result"], "fail")
+        self.assertEqual(report["quantities"][0]["classification"], "nonfinite-result")
+
+    def test_value_outside_tolerance_is_tolerance_failure(self):
+        report = compare_stresses(stress_rows("100.0"), stress_rows("101.0"))
+
+        self.assertEqual(report["overall_result"], "fail")
+        self.assertEqual(report["quantities"][0]["classification"], "tolerance-failure")
+        self.assertEqual(report["quantities"][0]["max_abs_error"], 1.0)
+        self.assertEqual(report["quantities"][0]["result"], "fail")
+
+    def test_cli_writes_json_report_and_returns_zero_when_all_quantities_pass(self):
+        compare = load_compare_extracted_csv()
+        with tempfile.TemporaryDirectory() as tmp:
+            root = Path(tmp)
+            reference = root / "references" / "umat" / "single-element"
+            actual = root / "external-results" / "umat" / "single-element"
+            report_json = root / "build" / "reference-verification" / "umat-single-element.json"
+            payload = make_metadata_valid_for_artifact_validation(root, reference, metadata_payload())
+            write_json(reference / "metadata.json", payload)
+            write_csv(reference / "extracted" / "stresses.csv", stress_rows("100.0"))
+            write_csv(actual / "extracted" / "stresses.csv", stress_rows("100.00000001"))
+
+            exit_code = call_main_silently(
+                compare,
+                [
+                    "--metadata",
+                    str(reference / "metadata.json"),
+                    "--actual-root",
+                    str(actual),
+                    "--quantity",
+                    "stresses",
+                    "--report-json",
+                    str(report_json),
+                ],
+            )
+
+            report = json.loads(report_json.read_text(encoding="utf-8"))
+
+        self.assertEqual(exit_code, 0)
+        self.assertEqual(report["overall_result"], "pass")
+
+    def test_cli_writes_json_report_and_returns_one_when_a_quantity_fails(self):
+        compare = load_compare_extracted_csv()
+        with tempfile.TemporaryDirectory() as tmp:
+            root = Path(tmp)
+            reference = root / "references" / "umat" / "single-element"
+            actual = root / "external-results" / "umat" / "single-element"
+            report_json = root / "build" / "reference-verification" / "umat-single-element.json"
+            payload = make_metadata_valid_for_artifact_validation(root, reference, metadata_payload())
+            write_json(reference / "metadata.json", payload)
+            write_csv(reference / "extracted" / "stresses.csv", stress_rows("100.0"))
+            write_csv(actual / "extracted" / "stresses.csv", stress_rows("101.0"))
+
+            exit_code = call_main_silently(
+                compare,
+                [
+                    "--metadata",
+                    str(reference / "metadata.json"),
+                    "--actual-root",
+                    str(actual),
+                    "--quantity",
+                    "stresses",
+                    "--report-json",
+                    str(report_json),
+                ],
+            )
+
+            report = json.loads(report_json.read_text(encoding="utf-8"))
+
+        self.assertEqual(exit_code, 1)
+        self.assertEqual(report["overall_result"], "fail")
+
+    def test_cli_returns_two_for_invalid_arguments(self):
+        compare = load_compare_extracted_csv()
+
+        self.assertEqual(call_main_silently(compare, []), 2)
+
+    def test_default_comparison_validates_reference_artifact_metadata(self):
+        compare = load_compare_extracted_csv()
+        with tempfile.TemporaryDirectory() as tmp:
+            root = Path(tmp)
+            reference = root / "references" / "umat" / "single-element"
+            actual = root / "external-results" / "umat" / "single-element"
+            write_json(reference / "metadata.json", metadata_payload())
+            write_csv(reference / "extracted" / "stresses.csv", stress_rows("100.0"))
+            write_csv(actual / "extracted" / "stresses.csv", stress_rows("100.0"))
+
+            report = compare.compare_metadata(reference / "metadata.json", actual)
+
+        self.assertEqual(report["overall_result"], "fail")
+        self.assertEqual(report["quantities"][0]["classification"], "missing-reference-artifact")
+
+
+if __name__ == "__main__":
+    unittest.main()
@@ -19,6 +19,69 @@ def write_json(path: Path, payload: dict):
    path.write_text(json.dumps(payload, indent=2), encoding="utf-8")


+def write_text(path: Path, text: str = "ok\n"):
+    path.parent.mkdir(parents=True, exist_ok=True)
+    path.write_text(text, encoding="utf-8")
+
+
+def ready_metadata(source_hash: str) -> dict:
+    return {
+        "schema_version": "abaqus-user-subroutine-artifact-v1",
+        "feature_id": "umat",
+        "model_id": "single-element",
+        "artifact_status": "ready-for-comparison",
+        "abaqus": {"version": "2024", "precision": "double"},
+        "compiler": {"vendor": "Intel oneAPI", "name": "ifx", "version": "2024"},
+        "subroutine": {
+            "entry_points": ["UMAT"],
+            "source_files": [
+                {
+                    "path": "src/fortran/abaqus/UMAT.for",
+                    "language": "Fortran",
+                    "sha256": source_hash,
+                }
+            ],
+        },
+        "input_file": "model.inp",
+        "outputs": {
+            "tails": {
+                "msg": "job.msg.tail.txt",
+                "dat": "job.dat.tail.txt",
+                "log": "job.log.tail.txt",
+                "sta": "job.sta.tail.txt",
+            },
+            "csv": {"stresses": "extracted/stresses.csv"},
+        },
+        "extraction": {
+            "source_odb": "job.odb",
+            "tool": "Abaqus Python",
+            "extracted_at": "2026-06-10T00:00:00+09:00",
+            "csv_directory": "extracted",
+            "script": "extraction/extract_odb_to_csv.py",
+        },
+    }
+
+
+def create_ready_bundle(root: Path) -> tuple[Path, dict]:
+    source = root / "src" / "fortran" / "abaqus" / "UMAT.for"
+    write_text(source, "      subroutine umat()\n      end\n")
+    source_hash = hashlib.sha256(source.read_bytes()).hexdigest()
+
+    model_dir = root / "references" / "umat" / "single-element"
+    for name in [
+        "model.inp",
+        "job.msg.tail.txt",
+        "job.dat.tail.txt",
+        "job.log.tail.txt",
+        "job.sta.tail.txt",
+        "extracted/stresses.csv",
+        "extraction/extract_odb_to_csv.py",
+    ]:
+        write_text(model_dir / name)
+
+    return model_dir, ready_metadata(source_hash)
+
+
 class ValidateReferenceArtifactsTests(unittest.TestCase):
    def test_missing_references_directory_is_valid(self):
        validator = load_validate_reference_artifacts()
@@ -52,7 +115,7 @@ class ValidateReferenceArtifactsTests(unittest.TestCase):
                    "feature_id": "umat",
                    "model_id": "single-element",
                    "artifact_status": "ready-for-comparison",
-                    "abaqus": {"version": "2024", "precision": "double", "command": "abaqus job=case user=UMAT.for"},
+                    "abaqus": {"version": "2024", "precision": "double"},
                    "compiler": {"vendor": "Intel oneAPI", "name": "ifx", "version": "2024"},
                    "subroutine": {
                        "entry_points": ["UMAT"],
@@ -61,7 +124,7 @@ class ValidateReferenceArtifactsTests(unittest.TestCase):
                    "input_file": "model.inp",
                    "outputs": {
                        "tails": {"msg": "job.msg.tail.txt", "dat": "job.dat.tail.txt", "log": "job.log.tail.txt"},
-                        "csv": {"stresses": "stresses.csv"},
+                        "csv": {"stresses": "extracted/stresses.csv"},
                    },
                },
            )
@@ -69,53 +132,60 @@ class ValidateReferenceArtifactsTests(unittest.TestCase):
            errors = validator.validate_root(root)

        self.assertTrue(any("missing input_file" in error for error in errors))
-        self.assertTrue(any("missing output tail" in error for error in errors))
+        self.assertTrue(any("missing output tail sta" in error for error in errors))
+        self.assertTrue(any("missing ready-for-comparison key extraction" in error for error in errors))
        self.assertTrue(any("missing csv output" in error for error in errors))
        self.assertTrue(any("missing source file" in error for error in errors))

+    def test_ready_for_comparison_accepts_external_bundle_without_abaqus_command(self):
+        validator = load_validate_reference_artifacts()
+        with tempfile.TemporaryDirectory() as tmp:
+            root = Path(tmp)
+            model_dir, payload = create_ready_bundle(root)
+            payload["extraction"]["odb_sha256_file"] = "result.odb.sha256"
+            write_text(model_dir / "result.odb.sha256", "0" * 64 + "  job.odb\n")
+            write_json(model_dir / "metadata.json", payload)
+
+            self.assertEqual(validator.validate_root(root), [])
+
    def test_ready_for_comparison_checks_source_sha256(self):
        validator = load_validate_reference_artifacts()
        with tempfile.TemporaryDirectory() as tmp:
            root = Path(tmp)
-            source = root / "src" / "fortran" / "abaqus" / "UMAT.for"
-            source.parent.mkdir(parents=True)
-            source.write_text("      subroutine umat()\n      end\n", encoding="utf-8")
-            source_hash = hashlib.sha256(source.read_bytes()).hexdigest()
-            model_dir = root / "references" / "umat" / "single-element"
-            for name in ["model.inp", "job.msg.tail.txt", "job.dat.tail.txt", "job.log.tail.txt", "stresses.csv"]:
-                (model_dir / name).parent.mkdir(parents=True, exist_ok=True)
-                (model_dir / name).write_text("ok\n", encoding="utf-8")
-            write_json(
-                model_dir / "metadata.json",
-                {
-                    "schema_version": "abaqus-user-subroutine-artifact-v1",
-                    "feature_id": "umat",
-                    "model_id": "single-element",
-                    "artifact_status": "ready-for-comparison",
-                    "abaqus": {"version": "2024", "precision": "double", "command": "abaqus job=case user=UMAT.for"},
-                    "compiler": {"vendor": "Intel oneAPI", "name": "ifx", "version": "2024"},
-                    "subroutine": {
-                        "entry_points": ["UMAT"],
-                        "source_files": [
-                            {
-                                "path": "src/fortran/abaqus/UMAT.for",
-                                "language": "Fortran",
-                                "sha256": "0" * len(source_hash),
-                            }
-                        ],
-                    },
-                    "input_file": "model.inp",
-                    "outputs": {
-                        "tails": {"msg": "job.msg.tail.txt", "dat": "job.dat.tail.txt", "log": "job.log.tail.txt"},
-                        "csv": {"stresses": "stresses.csv"},
-                    },
-                },
-            )
+            model_dir, payload = create_ready_bundle(root)
+            payload["subroutine"]["source_files"][0]["sha256"] = "0" * 64
+            write_json(model_dir / "metadata.json", payload)

            errors = validator.validate_root(root)

        self.assertTrue(any("sha256 mismatch" in error for error in errors))

+    def test_ready_for_comparison_rejects_csv_outside_extracted_directory(self):
+        validator = load_validate_reference_artifacts()
+        with tempfile.TemporaryDirectory() as tmp:
+            root = Path(tmp)
+            model_dir, payload = create_ready_bundle(root)
+            payload["outputs"]["csv"] = {"stresses": "stresses.csv"}
+            write_text(model_dir / "stresses.csv")
+            write_json(model_dir / "metadata.json", payload)
+
+            errors = validator.validate_root(root)
+
+        self.assertTrue(any("csv output stresses must match extracted/*.csv" in error for error in errors))
+
+    def test_ready_for_comparison_checks_optional_odb_sha256_file(self):
+        validator = load_validate_reference_artifacts()
+        with tempfile.TemporaryDirectory() as tmp:
+            root = Path(tmp)
+            model_dir, payload = create_ready_bundle(root)
+            payload["extraction"]["odb_sha256_file"] = "result.odb.sha256"
+            write_text(model_dir / "result.odb.sha256", "not-a-sha\n")
+            write_json(model_dir / "metadata.json", payload)
+
+            errors = validator.validate_root(root)
+
+        self.assertTrue(any("invalid odb_sha256_file" in error for error in errors))
+

 if __name__ == "__main__":
    unittest.main()
@@ -5,6 +5,7 @@ from __future__ import annotations

 import hashlib
 import json
+import re
 import sys
 from pathlib import Path

@@ -12,6 +13,7 @@ from pathlib import Path
 SCHEMA_VERSION = "abaqus-user-subroutine-artifact-v1"
 VALID_STATUSES = {"draft", "needs-reference-artifacts", "ready-for-comparison", "blocked"}
 READY_STATUS = "ready-for-comparison"
+SHA256_RE = re.compile(r"^[0-9a-fA-F]{64}$")


 def sha256_file(path: Path) -> str:
@@ -64,6 +66,70 @@ def _require_ready_key(path: Path, payload: dict, *keys: str) -> list[str]:
    return []


+def _is_safe_relative_path(path_text: str) -> bool:
+    candidate = Path(path_text)
+    return not candidate.is_absolute() and ".." not in candidate.parts
+
+
+def _is_extracted_csv_path(path_text: str) -> bool:
+    candidate = Path(path_text)
+    return (
+        _is_safe_relative_path(path_text)
+        and len(candidate.parts) == 2
+        and candidate.parts[0] == "extracted"
+        and candidate.suffix.lower() == ".csv"
+    )
+
+
+def _validate_optional_sha256_file(path: Path, model_dir: Path, key: str, value: object) -> list[str]:
+    if value is None:
+        return []
+    if not isinstance(value, str) or not value:
+        return [f"{path}: invalid {key}"]
+    if not _is_safe_relative_path(value):
+        return [f"{path}: {key} must be a relative path inside the artifact bundle"]
+
+    sha_path = model_dir / value
+    if not sha_path.exists():
+        return [f"{path}: missing {key}: {value}"]
+
+    first_token = sha_path.read_text(encoding="utf-8").strip().split(maxsplit=1)[0]
+    if not SHA256_RE.match(first_token):
+        return [f"{path}: invalid {key}: {value}"]
+    return []
+
+
+def _validate_extraction(path: Path, model_dir: Path, payload: dict) -> list[str]:
+    errors: list[str] = []
+    extraction = payload.get("extraction")
+    if not isinstance(extraction, dict):
+        return [f"{path}: extraction provenance must be an object"]
+
+    for key in ("source_odb", "tool", "extracted_at", "csv_directory"):
+        if not extraction.get(key):
+            errors.append(f"{path}: missing extraction provenance key {key}")
+
+    csv_directory = extraction.get("csv_directory")
+    if isinstance(csv_directory, str) and csv_directory != "extracted":
+        errors.append(f"{path}: extraction.csv_directory must be extracted")
+
+    script = extraction.get("script")
+    if script is not None:
+        if not isinstance(script, str) or not script:
+            errors.append(f"{path}: invalid extraction script")
+        elif not _is_safe_relative_path(script):
+            errors.append(f"{path}: extraction script must be a relative path inside the artifact bundle")
+        elif not (model_dir / script).exists():
+            errors.append(f"{path}: missing extraction script: {script}")
+
+    odb_sha256 = extraction.get("odb_sha256")
+    if odb_sha256 is not None and (not isinstance(odb_sha256, str) or not SHA256_RE.match(odb_sha256)):
+        errors.append(f"{path}: invalid odb_sha256")
+
+    errors.extend(_validate_optional_sha256_file(path, model_dir, "odb_sha256_file", extraction.get("odb_sha256_file")))
+    return errors
+
+
 def _validate_ready_files(path: Path, root: Path, payload: dict) -> list[str]:
    errors: list[str] = []
    model_dir = path.parent
@@ -71,7 +137,6 @@ def _validate_ready_files(path: Path, root: Path, payload: dict) -> list[str]:
    for keys in (
        ("abaqus", "version"),
        ("abaqus", "precision"),
-        ("abaqus", "command"),
        ("compiler", "vendor"),
        ("compiler", "name"),
        ("compiler", "version"),
@@ -80,6 +145,7 @@ def _validate_ready_files(path: Path, root: Path, payload: dict) -> list[str]:
        ("input_file",),
        ("outputs", "tails"),
        ("outputs", "csv"),
+        ("extraction",),
    ):
        errors.extend(_require_ready_key(path, payload, *keys))

@@ -89,7 +155,7 @@ def _validate_ready_files(path: Path, root: Path, payload: dict) -> list[str]:

    tails = _nested(payload, "outputs", "tails")
    if isinstance(tails, dict):
-        for key in ("msg", "dat", "log"):
+        for key in ("msg", "dat", "log", "sta"):
            tail_path = tails.get(key)
            if not isinstance(tail_path, str) or not tail_path:
                errors.append(f"{path}: missing output tail {key}")
@@ -103,9 +169,14 @@ def _validate_ready_files(path: Path, root: Path, payload: dict) -> list[str]:
        for key, csv_path in csv_outputs.items():
            if not isinstance(csv_path, str) or not csv_path:
                errors.append(f"{path}: missing csv output {key}")
+            elif not _is_extracted_csv_path(csv_path):
+                errors.append(f"{path}: csv output {key} must match extracted/*.csv")
            elif not (model_dir / csv_path).exists():
                errors.append(f"{path}: missing csv output {key}: {csv_path}")

+    if "extraction" in payload:
+        errors.extend(_validate_extraction(path, model_dir, payload))
+
    source_files = _nested(payload, "subroutine", "source_files")
    if isinstance(source_files, list):
        if not source_files: