initial commit FESurrogateModelTutorial

This commit is contained in:
김경종
2026-05-21 17:03:51 +09:00
parent 93665d9ee6
commit 43b86669fa
122 changed files with 7929 additions and 0 deletions
@@ -0,0 +1 @@
@@ -0,0 +1,173 @@
{
"cells": [
{
"cell_type": "markdown",
"id": "7e3ade70",
"metadata": {},
"source": [
"# 00 Beam2D FEM Dataset\n",
"\n",
"BeamExamples ?? ??? ?? ??? ? LHS sample? ???, repository solver? FEM surrogate ??? dataset? ????."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "71929240",
"metadata": {},
"outputs": [],
"source": [
"import json\n",
"from pathlib import Path\n",
"\n",
"import numpy as np\n",
"\n",
"from femsurrogate.data.bounds import DEFAULT_PARAMETER_BOUNDS\n",
"from femsurrogate.data.dataset import build_dataset\n",
"from femsurrogate.data.sampling import generate_lhs_samples\n",
"from femsurrogate.data.schema import DEFAULT_RANDOM_SEED, TARGET_COLUMNS\n",
"from femsurrogate.fea.io import read_beam_example, read_expected_displacements\n",
"from femsurrogate.fea.solver import solve_linear_static\n",
"\n",
"ROOT = Path.cwd().resolve()\n",
"if not (ROOT / \"pyproject.toml\").exists():\n",
" ROOT = ROOT.parent\n",
"assert (ROOT / \"pyproject.toml\").exists(), ROOT\n",
"REFERENCE_DIR = ROOT / \"data\" / \"reference\"\n",
"REFERENCE_DIR.mkdir(parents=True, exist_ok=True)"
]
},
{
"cell_type": "markdown",
"id": "df256ebc",
"metadata": {},
"source": [
"## BeamExamples ?? ??\n",
"\n",
"Fixture? ?? node? ?? `Ux`, `Uy`, `Rz`? ???? ???? ?? ????? ????."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "0a8c947c",
"metadata": {},
"outputs": [],
"source": [
"model = read_beam_example(ROOT / \"BeamExamples\" / \"CantileverBeam.txt\")\n",
"expected = read_expected_displacements(ROOT / \"BeamExamples\" / \"CantileverBeam_Displacements.txt\")\n",
"actual = solve_linear_static(model)\n",
"\n",
"max_abs_error = 0.0\n",
"for node_id, expected_displacement in expected.items():\n",
" actual_displacement = actual[node_id]\n",
" actual_values = np.array(\n",
" [actual_displacement.ux, actual_displacement.uy, actual_displacement.rz]\n",
" )\n",
" expected_values = np.array(\n",
" [expected_displacement.ux, expected_displacement.uy, expected_displacement.rz]\n",
" )\n",
" error = float(np.max(np.abs(actual_values - expected_values)))\n",
" max_abs_error = max(max_abs_error, error)\n",
"\n",
"assert max_abs_error <= 5e-7\n",
"max_abs_error"
]
},
{
"cell_type": "markdown",
"id": "6620d0ee",
"metadata": {},
"source": [
"## LHS sampling? FEM batch ??"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "9923fd04",
"metadata": {},
"outputs": [],
"source": [
"N_SAMPLES = 300\n",
"\n",
"samples = generate_lhs_samples(DEFAULT_PARAMETER_BOUNDS, n=N_SAMPLES, seed=DEFAULT_RANDOM_SEED)\n",
"dataset = build_dataset(samples)\n",
"\n",
"assert len(dataset) == N_SAMPLES\n",
"assert set(TARGET_COLUMNS).issubset(dataset.columns)\n",
"dataset.head()"
]
},
{
"cell_type": "markdown",
"id": "c0efee15",
"metadata": {},
"source": [
"## Dataset ??\n",
"\n",
"?? notebook? ?? CSV? metadata? ??? `data/reference/`? ????."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "d80890bd",
"metadata": {},
"outputs": [],
"source": [
"dataset_path = REFERENCE_DIR / \"beam2d_lhs_300.csv\"\n",
"metadata_path = REFERENCE_DIR / \"beam2d_lhs_300_metadata.json\"\n",
"\n",
"dataset.to_csv(dataset_path, index=False)\n",
"metadata = {\n",
" \"dataset_name\": \"beam2d_lhs_300\",\n",
" \"sample_count\": N_SAMPLES,\n",
" \"random_seed\": DEFAULT_RANDOM_SEED,\n",
" \"unit_system\": \"SI\",\n",
" \"fea_model\": \"2D Euler-Bernoulli beam/frame, linear static\",\n",
" \"target_columns\": list(TARGET_COLUMNS),\n",
" \"parameter_bounds\": {\n",
" name: {\"lower\": bound.lower, \"upper\": bound.upper}\n",
" for name, bound in DEFAULT_PARAMETER_BOUNDS.items()\n",
" },\n",
" \"notes\": \"Generated by notebooks/00_beam2d_fea_dataset.ipynb using src/femsurrogate.\",\n",
"}\n",
"metadata_path.write_text(json.dumps(metadata, indent=2), encoding=\"utf-8\")\n",
"\n",
"{\"dataset_path\": str(dataset_path), \"metadata_path\": str(metadata_path)}"
]
},
{
"cell_type": "markdown",
"id": "d56bcddb",
"metadata": {},
"source": [
"## ?? sanity check"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "e560e50c",
"metadata": {},
"outputs": [],
"source": [
"dataset.describe().T.loc[[\"L_m\", \"b_m\", \"h_m\", \"E_pa\", \"P_n\", *TARGET_COLUMNS]]"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"name": "python",
"pygments_lexer": "ipython3"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
@@ -0,0 +1,168 @@
{
"cells": [
{
"cell_type": "markdown",
"id": "260fc3ce",
"metadata": {},
"source": [
"# Response Surface Surrogate\n",
"\n",
"Response Surface? polynomial feature? Ridge regression?? ?? baseline? ???.\n",
"\n",
"?? ?? notebook? ?? dataset, target, train/test split seed? ????."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "56528a9c",
"metadata": {},
"outputs": [],
"source": [
"import json\n",
"import warnings\n",
"from pathlib import Path\n",
"\n",
"import pandas as pd\n",
"from sklearn.exceptions import ConvergenceWarning\n",
"\n",
"from femsurrogate.data.schema import DEFAULT_RANDOM_SEED, PARAMETER_COLUMNS\n",
"from femsurrogate.plotting.diagnostics import plot_parity, plot_residuals\n",
"from femsurrogate.surrogates.common import evaluate_model, metrics_to_dict, split_dataset\n",
"from femsurrogate.surrogates.registry import make_model\n",
"\n",
"ROOT = Path.cwd().resolve()\n",
"if not (ROOT / \"pyproject.toml\").exists():\n",
" ROOT = ROOT.parent\n",
"assert (ROOT / \"pyproject.toml\").exists(), ROOT\n",
"DATASET_PATH = ROOT / \"data\" / \"reference\" / \"beam2d_lhs_300.csv\"\n",
"RESULTS_DIR = ROOT / \"reports\" / \"results\"\n",
"PREDICTIONS_DIR = ROOT / \"reports\" / \"predictions\"\n",
"FIGURES_DIR = ROOT / \"reports\" / \"figures\"\n",
"for directory in [RESULTS_DIR, PREDICTIONS_DIR, FIGURES_DIR]:\n",
" directory.mkdir(parents=True, exist_ok=True)"
]
},
{
"cell_type": "markdown",
"id": "7cd36ad9",
"metadata": {},
"source": [
"## Dataset? split"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "50689a07",
"metadata": {},
"outputs": [],
"source": [
"dataset = pd.read_csv(DATASET_PATH)\n",
"target_column = \"tip_uy_m\"\n",
"split = split_dataset(\n",
" dataset,\n",
" feature_columns=list(PARAMETER_COLUMNS),\n",
" target_column=target_column,\n",
" test_size=0.2,\n",
" random_state=DEFAULT_RANDOM_SEED,\n",
")\n",
"\n",
"len(split.X_train), len(split.X_test)"
]
},
{
"cell_type": "markdown",
"id": "0473847f",
"metadata": {},
"source": [
"## ?? ??? ??"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "539ffb4d",
"metadata": {},
"outputs": [],
"source": [
"MODEL_NAME = \"rsm\"\n",
"model = make_model(MODEL_NAME, random_state=DEFAULT_RANDOM_SEED, **{})\n",
"\n",
"with warnings.catch_warnings():\n",
" warnings.filterwarnings(\"ignore\", category=ConvergenceWarning)\n",
" result = evaluate_model(\n",
" model,\n",
" split.X_train,\n",
" split.X_test,\n",
" split.y_train,\n",
" split.y_test,\n",
" model_name=MODEL_NAME,\n",
" target_column=target_column,\n",
" )\n",
"\n",
"metrics = metrics_to_dict(result.metrics)\n",
"metrics"
]
},
{
"cell_type": "markdown",
"id": "78c34610",
"metadata": {},
"source": [
"## ?? ??"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "e15bf80d",
"metadata": {},
"outputs": [],
"source": [
"metrics_path = RESULTS_DIR / f\"{MODEL_NAME}_metrics.json\"\n",
"predictions_path = PREDICTIONS_DIR / f\"{MODEL_NAME}_predictions.csv\"\n",
"\n",
"metrics_path.write_text(json.dumps(metrics, indent=2), encoding=\"utf-8\")\n",
"result.predictions.to_csv(predictions_path, index=False)\n",
"\n",
"{\"metrics_path\": str(metrics_path), \"predictions_path\": str(predictions_path)}"
]
},
{
"cell_type": "markdown",
"id": "2142e40b",
"metadata": {},
"source": [
"## ?? plot"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "14b89510",
"metadata": {},
"outputs": [],
"source": [
"parity_fig = plot_parity(result.predictions, title=f\"{MODEL_NAME} parity\")\n",
"residual_fig = plot_residuals(result.predictions, title=f\"{MODEL_NAME} residuals\")\n",
"parity_fig.savefig(FIGURES_DIR / f\"{MODEL_NAME}_parity.png\", dpi=150, bbox_inches=\"tight\")\n",
"residual_fig.savefig(FIGURES_DIR / f\"{MODEL_NAME}_residuals.png\", dpi=150, bbox_inches=\"tight\")\n",
"parity_fig"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"name": "python",
"pygments_lexer": "ipython3"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
@@ -0,0 +1,168 @@
{
"cells": [
{
"cell_type": "markdown",
"id": "144f56e0",
"metadata": {},
"source": [
"# Gaussian Process / Kriging Surrogate\n",
"\n",
"GPR? smooth response? ?? ???? ??? ???? ????.\n",
"\n",
"?? ?? notebook? ?? dataset, target, train/test split seed? ????."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "7a5a2974",
"metadata": {},
"outputs": [],
"source": [
"import json\n",
"import warnings\n",
"from pathlib import Path\n",
"\n",
"import pandas as pd\n",
"from sklearn.exceptions import ConvergenceWarning\n",
"\n",
"from femsurrogate.data.schema import DEFAULT_RANDOM_SEED, PARAMETER_COLUMNS\n",
"from femsurrogate.plotting.diagnostics import plot_parity, plot_residuals\n",
"from femsurrogate.surrogates.common import evaluate_model, metrics_to_dict, split_dataset\n",
"from femsurrogate.surrogates.registry import make_model\n",
"\n",
"ROOT = Path.cwd().resolve()\n",
"if not (ROOT / \"pyproject.toml\").exists():\n",
" ROOT = ROOT.parent\n",
"assert (ROOT / \"pyproject.toml\").exists(), ROOT\n",
"DATASET_PATH = ROOT / \"data\" / \"reference\" / \"beam2d_lhs_300.csv\"\n",
"RESULTS_DIR = ROOT / \"reports\" / \"results\"\n",
"PREDICTIONS_DIR = ROOT / \"reports\" / \"predictions\"\n",
"FIGURES_DIR = ROOT / \"reports\" / \"figures\"\n",
"for directory in [RESULTS_DIR, PREDICTIONS_DIR, FIGURES_DIR]:\n",
" directory.mkdir(parents=True, exist_ok=True)"
]
},
{
"cell_type": "markdown",
"id": "a2c2b3fe",
"metadata": {},
"source": [
"## Dataset? split"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "ef13216f",
"metadata": {},
"outputs": [],
"source": [
"dataset = pd.read_csv(DATASET_PATH)\n",
"target_column = \"tip_uy_m\"\n",
"split = split_dataset(\n",
" dataset,\n",
" feature_columns=list(PARAMETER_COLUMNS),\n",
" target_column=target_column,\n",
" test_size=0.2,\n",
" random_state=DEFAULT_RANDOM_SEED,\n",
")\n",
"\n",
"len(split.X_train), len(split.X_test)"
]
},
{
"cell_type": "markdown",
"id": "6c3be3f4",
"metadata": {},
"source": [
"## ?? ??? ??"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "d5e09c8a",
"metadata": {},
"outputs": [],
"source": [
"MODEL_NAME = \"gpr\"\n",
"model = make_model(MODEL_NAME, random_state=DEFAULT_RANDOM_SEED, **{})\n",
"\n",
"with warnings.catch_warnings():\n",
" warnings.filterwarnings(\"ignore\", category=ConvergenceWarning)\n",
" result = evaluate_model(\n",
" model,\n",
" split.X_train,\n",
" split.X_test,\n",
" split.y_train,\n",
" split.y_test,\n",
" model_name=MODEL_NAME,\n",
" target_column=target_column,\n",
" )\n",
"\n",
"metrics = metrics_to_dict(result.metrics)\n",
"metrics"
]
},
{
"cell_type": "markdown",
"id": "4b9e74af",
"metadata": {},
"source": [
"## ?? ??"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "5af40658",
"metadata": {},
"outputs": [],
"source": [
"metrics_path = RESULTS_DIR / f\"{MODEL_NAME}_metrics.json\"\n",
"predictions_path = PREDICTIONS_DIR / f\"{MODEL_NAME}_predictions.csv\"\n",
"\n",
"metrics_path.write_text(json.dumps(metrics, indent=2), encoding=\"utf-8\")\n",
"result.predictions.to_csv(predictions_path, index=False)\n",
"\n",
"{\"metrics_path\": str(metrics_path), \"predictions_path\": str(predictions_path)}"
]
},
{
"cell_type": "markdown",
"id": "37a881b4",
"metadata": {},
"source": [
"## ?? plot"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "d156a54d",
"metadata": {},
"outputs": [],
"source": [
"parity_fig = plot_parity(result.predictions, title=f\"{MODEL_NAME} parity\")\n",
"residual_fig = plot_residuals(result.predictions, title=f\"{MODEL_NAME} residuals\")\n",
"parity_fig.savefig(FIGURES_DIR / f\"{MODEL_NAME}_parity.png\", dpi=150, bbox_inches=\"tight\")\n",
"residual_fig.savefig(FIGURES_DIR / f\"{MODEL_NAME}_residuals.png\", dpi=150, bbox_inches=\"tight\")\n",
"parity_fig"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"name": "python",
"pygments_lexer": "ipython3"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
@@ -0,0 +1,168 @@
{
"cells": [
{
"cell_type": "markdown",
"id": "1638813f",
"metadata": {},
"source": [
"# Random Forest Surrogate\n",
"\n",
"Random Forest? feature scaling ??? ????? interaction? ??? ???? tree ensemble??.\n",
"\n",
"?? ?? notebook? ?? dataset, target, train/test split seed? ????."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "b8bdcff3",
"metadata": {},
"outputs": [],
"source": [
"import json\n",
"import warnings\n",
"from pathlib import Path\n",
"\n",
"import pandas as pd\n",
"from sklearn.exceptions import ConvergenceWarning\n",
"\n",
"from femsurrogate.data.schema import DEFAULT_RANDOM_SEED, PARAMETER_COLUMNS\n",
"from femsurrogate.plotting.diagnostics import plot_parity, plot_residuals\n",
"from femsurrogate.surrogates.common import evaluate_model, metrics_to_dict, split_dataset\n",
"from femsurrogate.surrogates.registry import make_model\n",
"\n",
"ROOT = Path.cwd().resolve()\n",
"if not (ROOT / \"pyproject.toml\").exists():\n",
" ROOT = ROOT.parent\n",
"assert (ROOT / \"pyproject.toml\").exists(), ROOT\n",
"DATASET_PATH = ROOT / \"data\" / \"reference\" / \"beam2d_lhs_300.csv\"\n",
"RESULTS_DIR = ROOT / \"reports\" / \"results\"\n",
"PREDICTIONS_DIR = ROOT / \"reports\" / \"predictions\"\n",
"FIGURES_DIR = ROOT / \"reports\" / \"figures\"\n",
"for directory in [RESULTS_DIR, PREDICTIONS_DIR, FIGURES_DIR]:\n",
" directory.mkdir(parents=True, exist_ok=True)"
]
},
{
"cell_type": "markdown",
"id": "6957e06b",
"metadata": {},
"source": [
"## Dataset? split"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "209f40be",
"metadata": {},
"outputs": [],
"source": [
"dataset = pd.read_csv(DATASET_PATH)\n",
"target_column = \"tip_uy_m\"\n",
"split = split_dataset(\n",
" dataset,\n",
" feature_columns=list(PARAMETER_COLUMNS),\n",
" target_column=target_column,\n",
" test_size=0.2,\n",
" random_state=DEFAULT_RANDOM_SEED,\n",
")\n",
"\n",
"len(split.X_train), len(split.X_test)"
]
},
{
"cell_type": "markdown",
"id": "fdc2fbed",
"metadata": {},
"source": [
"## ?? ??? ??"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "63ed6060",
"metadata": {},
"outputs": [],
"source": [
"MODEL_NAME = \"random_forest\"\n",
"model = make_model(MODEL_NAME, random_state=DEFAULT_RANDOM_SEED, **{})\n",
"\n",
"with warnings.catch_warnings():\n",
" warnings.filterwarnings(\"ignore\", category=ConvergenceWarning)\n",
" result = evaluate_model(\n",
" model,\n",
" split.X_train,\n",
" split.X_test,\n",
" split.y_train,\n",
" split.y_test,\n",
" model_name=MODEL_NAME,\n",
" target_column=target_column,\n",
" )\n",
"\n",
"metrics = metrics_to_dict(result.metrics)\n",
"metrics"
]
},
{
"cell_type": "markdown",
"id": "25dafcbf",
"metadata": {},
"source": [
"## ?? ??"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "8357de5d",
"metadata": {},
"outputs": [],
"source": [
"metrics_path = RESULTS_DIR / f\"{MODEL_NAME}_metrics.json\"\n",
"predictions_path = PREDICTIONS_DIR / f\"{MODEL_NAME}_predictions.csv\"\n",
"\n",
"metrics_path.write_text(json.dumps(metrics, indent=2), encoding=\"utf-8\")\n",
"result.predictions.to_csv(predictions_path, index=False)\n",
"\n",
"{\"metrics_path\": str(metrics_path), \"predictions_path\": str(predictions_path)}"
]
},
{
"cell_type": "markdown",
"id": "069f56dd",
"metadata": {},
"source": [
"## ?? plot"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "00159cc2",
"metadata": {},
"outputs": [],
"source": [
"parity_fig = plot_parity(result.predictions, title=f\"{MODEL_NAME} parity\")\n",
"residual_fig = plot_residuals(result.predictions, title=f\"{MODEL_NAME} residuals\")\n",
"parity_fig.savefig(FIGURES_DIR / f\"{MODEL_NAME}_parity.png\", dpi=150, bbox_inches=\"tight\")\n",
"residual_fig.savefig(FIGURES_DIR / f\"{MODEL_NAME}_residuals.png\", dpi=150, bbox_inches=\"tight\")\n",
"parity_fig"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"name": "python",
"pygments_lexer": "ipython3"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
@@ -0,0 +1,168 @@
{
"cells": [
{
"cell_type": "markdown",
"id": "ad3ce4ad",
"metadata": {},
"source": [
"# Gradient Boosting Surrogate\n",
"\n",
"Gradient Boosting? shallow tree? ????? ?? residual pattern? ??? ensemble??.\n",
"\n",
"?? ?? notebook? ?? dataset, target, train/test split seed? ????."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "0fec75b1",
"metadata": {},
"outputs": [],
"source": [
"import json\n",
"import warnings\n",
"from pathlib import Path\n",
"\n",
"import pandas as pd\n",
"from sklearn.exceptions import ConvergenceWarning\n",
"\n",
"from femsurrogate.data.schema import DEFAULT_RANDOM_SEED, PARAMETER_COLUMNS\n",
"from femsurrogate.plotting.diagnostics import plot_parity, plot_residuals\n",
"from femsurrogate.surrogates.common import evaluate_model, metrics_to_dict, split_dataset\n",
"from femsurrogate.surrogates.registry import make_model\n",
"\n",
"ROOT = Path.cwd().resolve()\n",
"if not (ROOT / \"pyproject.toml\").exists():\n",
" ROOT = ROOT.parent\n",
"assert (ROOT / \"pyproject.toml\").exists(), ROOT\n",
"DATASET_PATH = ROOT / \"data\" / \"reference\" / \"beam2d_lhs_300.csv\"\n",
"RESULTS_DIR = ROOT / \"reports\" / \"results\"\n",
"PREDICTIONS_DIR = ROOT / \"reports\" / \"predictions\"\n",
"FIGURES_DIR = ROOT / \"reports\" / \"figures\"\n",
"for directory in [RESULTS_DIR, PREDICTIONS_DIR, FIGURES_DIR]:\n",
" directory.mkdir(parents=True, exist_ok=True)"
]
},
{
"cell_type": "markdown",
"id": "1d93d9fb",
"metadata": {},
"source": [
"## Dataset? split"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c905b0aa",
"metadata": {},
"outputs": [],
"source": [
"dataset = pd.read_csv(DATASET_PATH)\n",
"target_column = \"tip_uy_m\"\n",
"split = split_dataset(\n",
" dataset,\n",
" feature_columns=list(PARAMETER_COLUMNS),\n",
" target_column=target_column,\n",
" test_size=0.2,\n",
" random_state=DEFAULT_RANDOM_SEED,\n",
")\n",
"\n",
"len(split.X_train), len(split.X_test)"
]
},
{
"cell_type": "markdown",
"id": "c5e7af1a",
"metadata": {},
"source": [
"## ?? ??? ??"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f5168f3e",
"metadata": {},
"outputs": [],
"source": [
"MODEL_NAME = \"gradient_boosting\"\n",
"model = make_model(MODEL_NAME, random_state=DEFAULT_RANDOM_SEED, **{})\n",
"\n",
"with warnings.catch_warnings():\n",
" warnings.filterwarnings(\"ignore\", category=ConvergenceWarning)\n",
" result = evaluate_model(\n",
" model,\n",
" split.X_train,\n",
" split.X_test,\n",
" split.y_train,\n",
" split.y_test,\n",
" model_name=MODEL_NAME,\n",
" target_column=target_column,\n",
" )\n",
"\n",
"metrics = metrics_to_dict(result.metrics)\n",
"metrics"
]
},
{
"cell_type": "markdown",
"id": "f02aead6",
"metadata": {},
"source": [
"## ?? ??"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "1daaf0f6",
"metadata": {},
"outputs": [],
"source": [
"metrics_path = RESULTS_DIR / f\"{MODEL_NAME}_metrics.json\"\n",
"predictions_path = PREDICTIONS_DIR / f\"{MODEL_NAME}_predictions.csv\"\n",
"\n",
"metrics_path.write_text(json.dumps(metrics, indent=2), encoding=\"utf-8\")\n",
"result.predictions.to_csv(predictions_path, index=False)\n",
"\n",
"{\"metrics_path\": str(metrics_path), \"predictions_path\": str(predictions_path)}"
]
},
{
"cell_type": "markdown",
"id": "14f71882",
"metadata": {},
"source": [
"## ?? plot"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "807c1025",
"metadata": {},
"outputs": [],
"source": [
"parity_fig = plot_parity(result.predictions, title=f\"{MODEL_NAME} parity\")\n",
"residual_fig = plot_residuals(result.predictions, title=f\"{MODEL_NAME} residuals\")\n",
"parity_fig.savefig(FIGURES_DIR / f\"{MODEL_NAME}_parity.png\", dpi=150, bbox_inches=\"tight\")\n",
"residual_fig.savefig(FIGURES_DIR / f\"{MODEL_NAME}_residuals.png\", dpi=150, bbox_inches=\"tight\")\n",
"parity_fig"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"name": "python",
"pygments_lexer": "ipython3"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
@@ -0,0 +1,168 @@
{
"cells": [
{
"cell_type": "markdown",
"id": "69c9cc92",
"metadata": {},
"source": [
"# MLP Neural Network Surrogate\n",
"\n",
"MLP? scaled input? target? ??? ???? ??? ??? ????.\n",
"\n",
"?? ?? notebook? ?? dataset, target, train/test split seed? ????."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "eed089f5",
"metadata": {},
"outputs": [],
"source": [
"import json\n",
"import warnings\n",
"from pathlib import Path\n",
"\n",
"import pandas as pd\n",
"from sklearn.exceptions import ConvergenceWarning\n",
"\n",
"from femsurrogate.data.schema import DEFAULT_RANDOM_SEED, PARAMETER_COLUMNS\n",
"from femsurrogate.plotting.diagnostics import plot_parity, plot_residuals\n",
"from femsurrogate.surrogates.common import evaluate_model, metrics_to_dict, split_dataset\n",
"from femsurrogate.surrogates.registry import make_model\n",
"\n",
"ROOT = Path.cwd().resolve()\n",
"if not (ROOT / \"pyproject.toml\").exists():\n",
" ROOT = ROOT.parent\n",
"assert (ROOT / \"pyproject.toml\").exists(), ROOT\n",
"DATASET_PATH = ROOT / \"data\" / \"reference\" / \"beam2d_lhs_300.csv\"\n",
"RESULTS_DIR = ROOT / \"reports\" / \"results\"\n",
"PREDICTIONS_DIR = ROOT / \"reports\" / \"predictions\"\n",
"FIGURES_DIR = ROOT / \"reports\" / \"figures\"\n",
"for directory in [RESULTS_DIR, PREDICTIONS_DIR, FIGURES_DIR]:\n",
" directory.mkdir(parents=True, exist_ok=True)"
]
},
{
"cell_type": "markdown",
"id": "b2547495",
"metadata": {},
"source": [
"## Dataset? split"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "039279d4",
"metadata": {},
"outputs": [],
"source": [
"dataset = pd.read_csv(DATASET_PATH)\n",
"target_column = \"tip_uy_m\"\n",
"split = split_dataset(\n",
" dataset,\n",
" feature_columns=list(PARAMETER_COLUMNS),\n",
" target_column=target_column,\n",
" test_size=0.2,\n",
" random_state=DEFAULT_RANDOM_SEED,\n",
")\n",
"\n",
"len(split.X_train), len(split.X_test)"
]
},
{
"cell_type": "markdown",
"id": "71b3323f",
"metadata": {},
"source": [
"## ?? ??? ??"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "605253ef",
"metadata": {},
"outputs": [],
"source": [
"MODEL_NAME = \"mlp\"\n",
"model = make_model(MODEL_NAME, random_state=DEFAULT_RANDOM_SEED, **{'max_iter': 500})\n",
"\n",
"with warnings.catch_warnings():\n",
" warnings.filterwarnings(\"ignore\", category=ConvergenceWarning)\n",
" result = evaluate_model(\n",
" model,\n",
" split.X_train,\n",
" split.X_test,\n",
" split.y_train,\n",
" split.y_test,\n",
" model_name=MODEL_NAME,\n",
" target_column=target_column,\n",
" )\n",
"\n",
"metrics = metrics_to_dict(result.metrics)\n",
"metrics"
]
},
{
"cell_type": "markdown",
"id": "f70ad9fd",
"metadata": {},
"source": [
"## ?? ??"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "fb08797b",
"metadata": {},
"outputs": [],
"source": [
"metrics_path = RESULTS_DIR / f\"{MODEL_NAME}_metrics.json\"\n",
"predictions_path = PREDICTIONS_DIR / f\"{MODEL_NAME}_predictions.csv\"\n",
"\n",
"metrics_path.write_text(json.dumps(metrics, indent=2), encoding=\"utf-8\")\n",
"result.predictions.to_csv(predictions_path, index=False)\n",
"\n",
"{\"metrics_path\": str(metrics_path), \"predictions_path\": str(predictions_path)}"
]
},
{
"cell_type": "markdown",
"id": "39a34166",
"metadata": {},
"source": [
"## ?? plot"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "694a1081",
"metadata": {},
"outputs": [],
"source": [
"parity_fig = plot_parity(result.predictions, title=f\"{MODEL_NAME} parity\")\n",
"residual_fig = plot_residuals(result.predictions, title=f\"{MODEL_NAME} residuals\")\n",
"parity_fig.savefig(FIGURES_DIR / f\"{MODEL_NAME}_parity.png\", dpi=150, bbox_inches=\"tight\")\n",
"residual_fig.savefig(FIGURES_DIR / f\"{MODEL_NAME}_residuals.png\", dpi=150, bbox_inches=\"tight\")\n",
"parity_fig"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"name": "python",
"pygments_lexer": "ipython3"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
@@ -0,0 +1,115 @@
{
"cells": [
{
"cell_type": "markdown",
"id": "741ede83",
"metadata": {},
"source": [
"# 06 Compare Surrogate Models\n",
"\n",
"? notebook? ?? model notebook?? ??? metrics JSON? ?? ??? ????. ??? ?? ???? ???."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "10f43a71",
"metadata": {},
"outputs": [],
"source": [
"import json\n",
"from pathlib import Path\n",
"\n",
"import pandas as pd\n",
"\n",
"from femsurrogate.plotting.comparison import plot_metric_comparison\n",
"\n",
"ROOT = Path.cwd().resolve()\n",
"if not (ROOT / \"pyproject.toml\").exists():\n",
" ROOT = ROOT.parent\n",
"assert (ROOT / \"pyproject.toml\").exists(), ROOT\n",
"RESULTS_DIR = ROOT / \"reports\" / \"results\"\n",
"FIGURES_DIR = ROOT / \"reports\" / \"figures\"\n",
"FIGURES_DIR.mkdir(parents=True, exist_ok=True)\n",
"MODEL_NAMES = [\"rsm\", \"gpr\", \"random_forest\", \"gradient_boosting\", \"mlp\"]"
]
},
{
"cell_type": "markdown",
"id": "9798e3bb",
"metadata": {},
"source": [
"## Metrics ??"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "55d2447d",
"metadata": {},
"outputs": [],
"source": [
"records = []\n",
"for model_name in MODEL_NAMES:\n",
" metrics_path = RESULTS_DIR / f\"{model_name}_metrics.json\"\n",
" assert metrics_path.exists(), metrics_path\n",
" records.append(json.loads(metrics_path.read_text(encoding=\"utf-8\")))\n",
"\n",
"comparison = pd.DataFrame(records).sort_values(\"rmse\").reset_index(drop=True)\n",
"comparison_path = RESULTS_DIR / \"model_comparison.csv\"\n",
"comparison.to_csv(comparison_path, index=False)\n",
"comparison"
]
},
{
"cell_type": "markdown",
"id": "731bb2f7",
"metadata": {},
"source": [
"## Metric ?? plot"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "64657b84",
"metadata": {},
"outputs": [],
"source": [
"figures = {}\n",
"for metric in [\"rmse\", \"mae\", \"r2\", \"fit_time_s\", \"predict_time_s\"]:\n",
" figure = plot_metric_comparison(comparison, metric=metric, title=f\"Surrogate {metric}\")\n",
" figure.savefig(FIGURES_DIR / f\"comparison_{metric}.png\", dpi=150, bbox_inches=\"tight\")\n",
" figures[metric] = figure\n",
"\n",
"figures[\"rmse\"]"
]
},
{
"cell_type": "markdown",
"id": "63cb5b8d",
"metadata": {},
"source": [
"## ?? ???\n",
"\n",
"- `rmse`? `mae`? ?? ??? ?? ????.\n",
"- `r2`? ??? residual plot?? ?? ?? ??? bias? ??? ????? ????.\n",
"- `fit_time_s`, `predict_time_s`? ?? ??? ??? loop?? ????.\n",
"- GPR? ?? dataset?? ???? sample ?? ??? ?? ??? ??? ?? ? ??."
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"name": "python",
"pygments_lexer": "ipython3"
}
},
"nbformat": 4,
"nbformat_minor": 5
}