initial commit FESurrogateModelTutorial
This commit is contained in:
@@ -0,0 +1 @@
|
||||
|
||||
@@ -0,0 +1,173 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "7e3ade70",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# 00 Beam2D FEM Dataset\n",
|
||||
"\n",
|
||||
"BeamExamples ?? ??? ?? ??? ? LHS sample? ???, repository solver? FEM surrogate ??? dataset? ????."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "71929240",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import json\n",
|
||||
"from pathlib import Path\n",
|
||||
"\n",
|
||||
"import numpy as np\n",
|
||||
"\n",
|
||||
"from femsurrogate.data.bounds import DEFAULT_PARAMETER_BOUNDS\n",
|
||||
"from femsurrogate.data.dataset import build_dataset\n",
|
||||
"from femsurrogate.data.sampling import generate_lhs_samples\n",
|
||||
"from femsurrogate.data.schema import DEFAULT_RANDOM_SEED, TARGET_COLUMNS\n",
|
||||
"from femsurrogate.fea.io import read_beam_example, read_expected_displacements\n",
|
||||
"from femsurrogate.fea.solver import solve_linear_static\n",
|
||||
"\n",
|
||||
"ROOT = Path.cwd().resolve()\n",
|
||||
"if not (ROOT / \"pyproject.toml\").exists():\n",
|
||||
" ROOT = ROOT.parent\n",
|
||||
"assert (ROOT / \"pyproject.toml\").exists(), ROOT\n",
|
||||
"REFERENCE_DIR = ROOT / \"data\" / \"reference\"\n",
|
||||
"REFERENCE_DIR.mkdir(parents=True, exist_ok=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "df256ebc",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## BeamExamples ?? ??\n",
|
||||
"\n",
|
||||
"Fixture? ?? node? ?? `Ux`, `Uy`, `Rz`? ???? ???? ?? ????? ????."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "0a8c947c",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"model = read_beam_example(ROOT / \"BeamExamples\" / \"CantileverBeam.txt\")\n",
|
||||
"expected = read_expected_displacements(ROOT / \"BeamExamples\" / \"CantileverBeam_Displacements.txt\")\n",
|
||||
"actual = solve_linear_static(model)\n",
|
||||
"\n",
|
||||
"max_abs_error = 0.0\n",
|
||||
"for node_id, expected_displacement in expected.items():\n",
|
||||
" actual_displacement = actual[node_id]\n",
|
||||
" actual_values = np.array(\n",
|
||||
" [actual_displacement.ux, actual_displacement.uy, actual_displacement.rz]\n",
|
||||
" )\n",
|
||||
" expected_values = np.array(\n",
|
||||
" [expected_displacement.ux, expected_displacement.uy, expected_displacement.rz]\n",
|
||||
" )\n",
|
||||
" error = float(np.max(np.abs(actual_values - expected_values)))\n",
|
||||
" max_abs_error = max(max_abs_error, error)\n",
|
||||
"\n",
|
||||
"assert max_abs_error <= 5e-7\n",
|
||||
"max_abs_error"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "6620d0ee",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## LHS sampling? FEM batch ??"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "9923fd04",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"N_SAMPLES = 300\n",
|
||||
"\n",
|
||||
"samples = generate_lhs_samples(DEFAULT_PARAMETER_BOUNDS, n=N_SAMPLES, seed=DEFAULT_RANDOM_SEED)\n",
|
||||
"dataset = build_dataset(samples)\n",
|
||||
"\n",
|
||||
"assert len(dataset) == N_SAMPLES\n",
|
||||
"assert set(TARGET_COLUMNS).issubset(dataset.columns)\n",
|
||||
"dataset.head()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "c0efee15",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Dataset ??\n",
|
||||
"\n",
|
||||
"?? notebook? ?? CSV? metadata? ??? `data/reference/`? ????."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "d80890bd",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"dataset_path = REFERENCE_DIR / \"beam2d_lhs_300.csv\"\n",
|
||||
"metadata_path = REFERENCE_DIR / \"beam2d_lhs_300_metadata.json\"\n",
|
||||
"\n",
|
||||
"dataset.to_csv(dataset_path, index=False)\n",
|
||||
"metadata = {\n",
|
||||
" \"dataset_name\": \"beam2d_lhs_300\",\n",
|
||||
" \"sample_count\": N_SAMPLES,\n",
|
||||
" \"random_seed\": DEFAULT_RANDOM_SEED,\n",
|
||||
" \"unit_system\": \"SI\",\n",
|
||||
" \"fea_model\": \"2D Euler-Bernoulli beam/frame, linear static\",\n",
|
||||
" \"target_columns\": list(TARGET_COLUMNS),\n",
|
||||
" \"parameter_bounds\": {\n",
|
||||
" name: {\"lower\": bound.lower, \"upper\": bound.upper}\n",
|
||||
" for name, bound in DEFAULT_PARAMETER_BOUNDS.items()\n",
|
||||
" },\n",
|
||||
" \"notes\": \"Generated by notebooks/00_beam2d_fea_dataset.ipynb using src/femsurrogate.\",\n",
|
||||
"}\n",
|
||||
"metadata_path.write_text(json.dumps(metadata, indent=2), encoding=\"utf-8\")\n",
|
||||
"\n",
|
||||
"{\"dataset_path\": str(dataset_path), \"metadata_path\": str(metadata_path)}"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "d56bcddb",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## ?? sanity check"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "e560e50c",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"dataset.describe().T.loc[[\"L_m\", \"b_m\", \"h_m\", \"E_pa\", \"P_n\", *TARGET_COLUMNS]]"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"name": "python",
|
||||
"pygments_lexer": "ipython3"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
@@ -0,0 +1,168 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "260fc3ce",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Response Surface Surrogate\n",
|
||||
"\n",
|
||||
"Response Surface? polynomial feature? Ridge regression?? ?? baseline? ???.\n",
|
||||
"\n",
|
||||
"?? ?? notebook? ?? dataset, target, train/test split seed? ????."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "56528a9c",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import json\n",
|
||||
"import warnings\n",
|
||||
"from pathlib import Path\n",
|
||||
"\n",
|
||||
"import pandas as pd\n",
|
||||
"from sklearn.exceptions import ConvergenceWarning\n",
|
||||
"\n",
|
||||
"from femsurrogate.data.schema import DEFAULT_RANDOM_SEED, PARAMETER_COLUMNS\n",
|
||||
"from femsurrogate.plotting.diagnostics import plot_parity, plot_residuals\n",
|
||||
"from femsurrogate.surrogates.common import evaluate_model, metrics_to_dict, split_dataset\n",
|
||||
"from femsurrogate.surrogates.registry import make_model\n",
|
||||
"\n",
|
||||
"ROOT = Path.cwd().resolve()\n",
|
||||
"if not (ROOT / \"pyproject.toml\").exists():\n",
|
||||
" ROOT = ROOT.parent\n",
|
||||
"assert (ROOT / \"pyproject.toml\").exists(), ROOT\n",
|
||||
"DATASET_PATH = ROOT / \"data\" / \"reference\" / \"beam2d_lhs_300.csv\"\n",
|
||||
"RESULTS_DIR = ROOT / \"reports\" / \"results\"\n",
|
||||
"PREDICTIONS_DIR = ROOT / \"reports\" / \"predictions\"\n",
|
||||
"FIGURES_DIR = ROOT / \"reports\" / \"figures\"\n",
|
||||
"for directory in [RESULTS_DIR, PREDICTIONS_DIR, FIGURES_DIR]:\n",
|
||||
" directory.mkdir(parents=True, exist_ok=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "7cd36ad9",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Dataset? split"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "50689a07",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"dataset = pd.read_csv(DATASET_PATH)\n",
|
||||
"target_column = \"tip_uy_m\"\n",
|
||||
"split = split_dataset(\n",
|
||||
" dataset,\n",
|
||||
" feature_columns=list(PARAMETER_COLUMNS),\n",
|
||||
" target_column=target_column,\n",
|
||||
" test_size=0.2,\n",
|
||||
" random_state=DEFAULT_RANDOM_SEED,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"len(split.X_train), len(split.X_test)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "0473847f",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## ?? ??? ??"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "539ffb4d",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"MODEL_NAME = \"rsm\"\n",
|
||||
"model = make_model(MODEL_NAME, random_state=DEFAULT_RANDOM_SEED, **{})\n",
|
||||
"\n",
|
||||
"with warnings.catch_warnings():\n",
|
||||
" warnings.filterwarnings(\"ignore\", category=ConvergenceWarning)\n",
|
||||
" result = evaluate_model(\n",
|
||||
" model,\n",
|
||||
" split.X_train,\n",
|
||||
" split.X_test,\n",
|
||||
" split.y_train,\n",
|
||||
" split.y_test,\n",
|
||||
" model_name=MODEL_NAME,\n",
|
||||
" target_column=target_column,\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
"metrics = metrics_to_dict(result.metrics)\n",
|
||||
"metrics"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "78c34610",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## ?? ??"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "e15bf80d",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"metrics_path = RESULTS_DIR / f\"{MODEL_NAME}_metrics.json\"\n",
|
||||
"predictions_path = PREDICTIONS_DIR / f\"{MODEL_NAME}_predictions.csv\"\n",
|
||||
"\n",
|
||||
"metrics_path.write_text(json.dumps(metrics, indent=2), encoding=\"utf-8\")\n",
|
||||
"result.predictions.to_csv(predictions_path, index=False)\n",
|
||||
"\n",
|
||||
"{\"metrics_path\": str(metrics_path), \"predictions_path\": str(predictions_path)}"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "2142e40b",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## ?? plot"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "14b89510",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"parity_fig = plot_parity(result.predictions, title=f\"{MODEL_NAME} parity\")\n",
|
||||
"residual_fig = plot_residuals(result.predictions, title=f\"{MODEL_NAME} residuals\")\n",
|
||||
"parity_fig.savefig(FIGURES_DIR / f\"{MODEL_NAME}_parity.png\", dpi=150, bbox_inches=\"tight\")\n",
|
||||
"residual_fig.savefig(FIGURES_DIR / f\"{MODEL_NAME}_residuals.png\", dpi=150, bbox_inches=\"tight\")\n",
|
||||
"parity_fig"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"name": "python",
|
||||
"pygments_lexer": "ipython3"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
@@ -0,0 +1,168 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "144f56e0",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Gaussian Process / Kriging Surrogate\n",
|
||||
"\n",
|
||||
"GPR? smooth response? ?? ???? ??? ???? ????.\n",
|
||||
"\n",
|
||||
"?? ?? notebook? ?? dataset, target, train/test split seed? ????."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "7a5a2974",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import json\n",
|
||||
"import warnings\n",
|
||||
"from pathlib import Path\n",
|
||||
"\n",
|
||||
"import pandas as pd\n",
|
||||
"from sklearn.exceptions import ConvergenceWarning\n",
|
||||
"\n",
|
||||
"from femsurrogate.data.schema import DEFAULT_RANDOM_SEED, PARAMETER_COLUMNS\n",
|
||||
"from femsurrogate.plotting.diagnostics import plot_parity, plot_residuals\n",
|
||||
"from femsurrogate.surrogates.common import evaluate_model, metrics_to_dict, split_dataset\n",
|
||||
"from femsurrogate.surrogates.registry import make_model\n",
|
||||
"\n",
|
||||
"ROOT = Path.cwd().resolve()\n",
|
||||
"if not (ROOT / \"pyproject.toml\").exists():\n",
|
||||
" ROOT = ROOT.parent\n",
|
||||
"assert (ROOT / \"pyproject.toml\").exists(), ROOT\n",
|
||||
"DATASET_PATH = ROOT / \"data\" / \"reference\" / \"beam2d_lhs_300.csv\"\n",
|
||||
"RESULTS_DIR = ROOT / \"reports\" / \"results\"\n",
|
||||
"PREDICTIONS_DIR = ROOT / \"reports\" / \"predictions\"\n",
|
||||
"FIGURES_DIR = ROOT / \"reports\" / \"figures\"\n",
|
||||
"for directory in [RESULTS_DIR, PREDICTIONS_DIR, FIGURES_DIR]:\n",
|
||||
" directory.mkdir(parents=True, exist_ok=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "a2c2b3fe",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Dataset? split"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "ef13216f",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"dataset = pd.read_csv(DATASET_PATH)\n",
|
||||
"target_column = \"tip_uy_m\"\n",
|
||||
"split = split_dataset(\n",
|
||||
" dataset,\n",
|
||||
" feature_columns=list(PARAMETER_COLUMNS),\n",
|
||||
" target_column=target_column,\n",
|
||||
" test_size=0.2,\n",
|
||||
" random_state=DEFAULT_RANDOM_SEED,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"len(split.X_train), len(split.X_test)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "6c3be3f4",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## ?? ??? ??"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "d5e09c8a",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"MODEL_NAME = \"gpr\"\n",
|
||||
"model = make_model(MODEL_NAME, random_state=DEFAULT_RANDOM_SEED, **{})\n",
|
||||
"\n",
|
||||
"with warnings.catch_warnings():\n",
|
||||
" warnings.filterwarnings(\"ignore\", category=ConvergenceWarning)\n",
|
||||
" result = evaluate_model(\n",
|
||||
" model,\n",
|
||||
" split.X_train,\n",
|
||||
" split.X_test,\n",
|
||||
" split.y_train,\n",
|
||||
" split.y_test,\n",
|
||||
" model_name=MODEL_NAME,\n",
|
||||
" target_column=target_column,\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
"metrics = metrics_to_dict(result.metrics)\n",
|
||||
"metrics"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "4b9e74af",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## ?? ??"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "5af40658",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"metrics_path = RESULTS_DIR / f\"{MODEL_NAME}_metrics.json\"\n",
|
||||
"predictions_path = PREDICTIONS_DIR / f\"{MODEL_NAME}_predictions.csv\"\n",
|
||||
"\n",
|
||||
"metrics_path.write_text(json.dumps(metrics, indent=2), encoding=\"utf-8\")\n",
|
||||
"result.predictions.to_csv(predictions_path, index=False)\n",
|
||||
"\n",
|
||||
"{\"metrics_path\": str(metrics_path), \"predictions_path\": str(predictions_path)}"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "37a881b4",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## ?? plot"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "d156a54d",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"parity_fig = plot_parity(result.predictions, title=f\"{MODEL_NAME} parity\")\n",
|
||||
"residual_fig = plot_residuals(result.predictions, title=f\"{MODEL_NAME} residuals\")\n",
|
||||
"parity_fig.savefig(FIGURES_DIR / f\"{MODEL_NAME}_parity.png\", dpi=150, bbox_inches=\"tight\")\n",
|
||||
"residual_fig.savefig(FIGURES_DIR / f\"{MODEL_NAME}_residuals.png\", dpi=150, bbox_inches=\"tight\")\n",
|
||||
"parity_fig"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"name": "python",
|
||||
"pygments_lexer": "ipython3"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
@@ -0,0 +1,168 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "1638813f",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Random Forest Surrogate\n",
|
||||
"\n",
|
||||
"Random Forest? feature scaling ??? ????? interaction? ??? ???? tree ensemble??.\n",
|
||||
"\n",
|
||||
"?? ?? notebook? ?? dataset, target, train/test split seed? ????."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "b8bdcff3",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import json\n",
|
||||
"import warnings\n",
|
||||
"from pathlib import Path\n",
|
||||
"\n",
|
||||
"import pandas as pd\n",
|
||||
"from sklearn.exceptions import ConvergenceWarning\n",
|
||||
"\n",
|
||||
"from femsurrogate.data.schema import DEFAULT_RANDOM_SEED, PARAMETER_COLUMNS\n",
|
||||
"from femsurrogate.plotting.diagnostics import plot_parity, plot_residuals\n",
|
||||
"from femsurrogate.surrogates.common import evaluate_model, metrics_to_dict, split_dataset\n",
|
||||
"from femsurrogate.surrogates.registry import make_model\n",
|
||||
"\n",
|
||||
"ROOT = Path.cwd().resolve()\n",
|
||||
"if not (ROOT / \"pyproject.toml\").exists():\n",
|
||||
" ROOT = ROOT.parent\n",
|
||||
"assert (ROOT / \"pyproject.toml\").exists(), ROOT\n",
|
||||
"DATASET_PATH = ROOT / \"data\" / \"reference\" / \"beam2d_lhs_300.csv\"\n",
|
||||
"RESULTS_DIR = ROOT / \"reports\" / \"results\"\n",
|
||||
"PREDICTIONS_DIR = ROOT / \"reports\" / \"predictions\"\n",
|
||||
"FIGURES_DIR = ROOT / \"reports\" / \"figures\"\n",
|
||||
"for directory in [RESULTS_DIR, PREDICTIONS_DIR, FIGURES_DIR]:\n",
|
||||
" directory.mkdir(parents=True, exist_ok=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "6957e06b",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Dataset? split"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "209f40be",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"dataset = pd.read_csv(DATASET_PATH)\n",
|
||||
"target_column = \"tip_uy_m\"\n",
|
||||
"split = split_dataset(\n",
|
||||
" dataset,\n",
|
||||
" feature_columns=list(PARAMETER_COLUMNS),\n",
|
||||
" target_column=target_column,\n",
|
||||
" test_size=0.2,\n",
|
||||
" random_state=DEFAULT_RANDOM_SEED,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"len(split.X_train), len(split.X_test)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "fdc2fbed",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## ?? ??? ??"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "63ed6060",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"MODEL_NAME = \"random_forest\"\n",
|
||||
"model = make_model(MODEL_NAME, random_state=DEFAULT_RANDOM_SEED, **{})\n",
|
||||
"\n",
|
||||
"with warnings.catch_warnings():\n",
|
||||
" warnings.filterwarnings(\"ignore\", category=ConvergenceWarning)\n",
|
||||
" result = evaluate_model(\n",
|
||||
" model,\n",
|
||||
" split.X_train,\n",
|
||||
" split.X_test,\n",
|
||||
" split.y_train,\n",
|
||||
" split.y_test,\n",
|
||||
" model_name=MODEL_NAME,\n",
|
||||
" target_column=target_column,\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
"metrics = metrics_to_dict(result.metrics)\n",
|
||||
"metrics"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "25dafcbf",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## ?? ??"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "8357de5d",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"metrics_path = RESULTS_DIR / f\"{MODEL_NAME}_metrics.json\"\n",
|
||||
"predictions_path = PREDICTIONS_DIR / f\"{MODEL_NAME}_predictions.csv\"\n",
|
||||
"\n",
|
||||
"metrics_path.write_text(json.dumps(metrics, indent=2), encoding=\"utf-8\")\n",
|
||||
"result.predictions.to_csv(predictions_path, index=False)\n",
|
||||
"\n",
|
||||
"{\"metrics_path\": str(metrics_path), \"predictions_path\": str(predictions_path)}"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "069f56dd",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## ?? plot"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "00159cc2",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"parity_fig = plot_parity(result.predictions, title=f\"{MODEL_NAME} parity\")\n",
|
||||
"residual_fig = plot_residuals(result.predictions, title=f\"{MODEL_NAME} residuals\")\n",
|
||||
"parity_fig.savefig(FIGURES_DIR / f\"{MODEL_NAME}_parity.png\", dpi=150, bbox_inches=\"tight\")\n",
|
||||
"residual_fig.savefig(FIGURES_DIR / f\"{MODEL_NAME}_residuals.png\", dpi=150, bbox_inches=\"tight\")\n",
|
||||
"parity_fig"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"name": "python",
|
||||
"pygments_lexer": "ipython3"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
@@ -0,0 +1,168 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "ad3ce4ad",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Gradient Boosting Surrogate\n",
|
||||
"\n",
|
||||
"Gradient Boosting? shallow tree? ????? ?? residual pattern? ??? ensemble??.\n",
|
||||
"\n",
|
||||
"?? ?? notebook? ?? dataset, target, train/test split seed? ????."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "0fec75b1",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import json\n",
|
||||
"import warnings\n",
|
||||
"from pathlib import Path\n",
|
||||
"\n",
|
||||
"import pandas as pd\n",
|
||||
"from sklearn.exceptions import ConvergenceWarning\n",
|
||||
"\n",
|
||||
"from femsurrogate.data.schema import DEFAULT_RANDOM_SEED, PARAMETER_COLUMNS\n",
|
||||
"from femsurrogate.plotting.diagnostics import plot_parity, plot_residuals\n",
|
||||
"from femsurrogate.surrogates.common import evaluate_model, metrics_to_dict, split_dataset\n",
|
||||
"from femsurrogate.surrogates.registry import make_model\n",
|
||||
"\n",
|
||||
"ROOT = Path.cwd().resolve()\n",
|
||||
"if not (ROOT / \"pyproject.toml\").exists():\n",
|
||||
" ROOT = ROOT.parent\n",
|
||||
"assert (ROOT / \"pyproject.toml\").exists(), ROOT\n",
|
||||
"DATASET_PATH = ROOT / \"data\" / \"reference\" / \"beam2d_lhs_300.csv\"\n",
|
||||
"RESULTS_DIR = ROOT / \"reports\" / \"results\"\n",
|
||||
"PREDICTIONS_DIR = ROOT / \"reports\" / \"predictions\"\n",
|
||||
"FIGURES_DIR = ROOT / \"reports\" / \"figures\"\n",
|
||||
"for directory in [RESULTS_DIR, PREDICTIONS_DIR, FIGURES_DIR]:\n",
|
||||
" directory.mkdir(parents=True, exist_ok=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "1d93d9fb",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Dataset? split"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "c905b0aa",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"dataset = pd.read_csv(DATASET_PATH)\n",
|
||||
"target_column = \"tip_uy_m\"\n",
|
||||
"split = split_dataset(\n",
|
||||
" dataset,\n",
|
||||
" feature_columns=list(PARAMETER_COLUMNS),\n",
|
||||
" target_column=target_column,\n",
|
||||
" test_size=0.2,\n",
|
||||
" random_state=DEFAULT_RANDOM_SEED,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"len(split.X_train), len(split.X_test)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "c5e7af1a",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## ?? ??? ??"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "f5168f3e",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"MODEL_NAME = \"gradient_boosting\"\n",
|
||||
"model = make_model(MODEL_NAME, random_state=DEFAULT_RANDOM_SEED, **{})\n",
|
||||
"\n",
|
||||
"with warnings.catch_warnings():\n",
|
||||
" warnings.filterwarnings(\"ignore\", category=ConvergenceWarning)\n",
|
||||
" result = evaluate_model(\n",
|
||||
" model,\n",
|
||||
" split.X_train,\n",
|
||||
" split.X_test,\n",
|
||||
" split.y_train,\n",
|
||||
" split.y_test,\n",
|
||||
" model_name=MODEL_NAME,\n",
|
||||
" target_column=target_column,\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
"metrics = metrics_to_dict(result.metrics)\n",
|
||||
"metrics"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "f02aead6",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## ?? ??"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "1daaf0f6",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"metrics_path = RESULTS_DIR / f\"{MODEL_NAME}_metrics.json\"\n",
|
||||
"predictions_path = PREDICTIONS_DIR / f\"{MODEL_NAME}_predictions.csv\"\n",
|
||||
"\n",
|
||||
"metrics_path.write_text(json.dumps(metrics, indent=2), encoding=\"utf-8\")\n",
|
||||
"result.predictions.to_csv(predictions_path, index=False)\n",
|
||||
"\n",
|
||||
"{\"metrics_path\": str(metrics_path), \"predictions_path\": str(predictions_path)}"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "14f71882",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## ?? plot"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "807c1025",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"parity_fig = plot_parity(result.predictions, title=f\"{MODEL_NAME} parity\")\n",
|
||||
"residual_fig = plot_residuals(result.predictions, title=f\"{MODEL_NAME} residuals\")\n",
|
||||
"parity_fig.savefig(FIGURES_DIR / f\"{MODEL_NAME}_parity.png\", dpi=150, bbox_inches=\"tight\")\n",
|
||||
"residual_fig.savefig(FIGURES_DIR / f\"{MODEL_NAME}_residuals.png\", dpi=150, bbox_inches=\"tight\")\n",
|
||||
"parity_fig"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"name": "python",
|
||||
"pygments_lexer": "ipython3"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
@@ -0,0 +1,168 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "69c9cc92",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# MLP Neural Network Surrogate\n",
|
||||
"\n",
|
||||
"MLP? scaled input? target? ??? ???? ??? ??? ????.\n",
|
||||
"\n",
|
||||
"?? ?? notebook? ?? dataset, target, train/test split seed? ????."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "eed089f5",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import json\n",
|
||||
"import warnings\n",
|
||||
"from pathlib import Path\n",
|
||||
"\n",
|
||||
"import pandas as pd\n",
|
||||
"from sklearn.exceptions import ConvergenceWarning\n",
|
||||
"\n",
|
||||
"from femsurrogate.data.schema import DEFAULT_RANDOM_SEED, PARAMETER_COLUMNS\n",
|
||||
"from femsurrogate.plotting.diagnostics import plot_parity, plot_residuals\n",
|
||||
"from femsurrogate.surrogates.common import evaluate_model, metrics_to_dict, split_dataset\n",
|
||||
"from femsurrogate.surrogates.registry import make_model\n",
|
||||
"\n",
|
||||
"ROOT = Path.cwd().resolve()\n",
|
||||
"if not (ROOT / \"pyproject.toml\").exists():\n",
|
||||
" ROOT = ROOT.parent\n",
|
||||
"assert (ROOT / \"pyproject.toml\").exists(), ROOT\n",
|
||||
"DATASET_PATH = ROOT / \"data\" / \"reference\" / \"beam2d_lhs_300.csv\"\n",
|
||||
"RESULTS_DIR = ROOT / \"reports\" / \"results\"\n",
|
||||
"PREDICTIONS_DIR = ROOT / \"reports\" / \"predictions\"\n",
|
||||
"FIGURES_DIR = ROOT / \"reports\" / \"figures\"\n",
|
||||
"for directory in [RESULTS_DIR, PREDICTIONS_DIR, FIGURES_DIR]:\n",
|
||||
" directory.mkdir(parents=True, exist_ok=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "b2547495",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Dataset? split"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "039279d4",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"dataset = pd.read_csv(DATASET_PATH)\n",
|
||||
"target_column = \"tip_uy_m\"\n",
|
||||
"split = split_dataset(\n",
|
||||
" dataset,\n",
|
||||
" feature_columns=list(PARAMETER_COLUMNS),\n",
|
||||
" target_column=target_column,\n",
|
||||
" test_size=0.2,\n",
|
||||
" random_state=DEFAULT_RANDOM_SEED,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"len(split.X_train), len(split.X_test)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "71b3323f",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## ?? ??? ??"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "605253ef",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"MODEL_NAME = \"mlp\"\n",
|
||||
"model = make_model(MODEL_NAME, random_state=DEFAULT_RANDOM_SEED, **{'max_iter': 500})\n",
|
||||
"\n",
|
||||
"with warnings.catch_warnings():\n",
|
||||
" warnings.filterwarnings(\"ignore\", category=ConvergenceWarning)\n",
|
||||
" result = evaluate_model(\n",
|
||||
" model,\n",
|
||||
" split.X_train,\n",
|
||||
" split.X_test,\n",
|
||||
" split.y_train,\n",
|
||||
" split.y_test,\n",
|
||||
" model_name=MODEL_NAME,\n",
|
||||
" target_column=target_column,\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
"metrics = metrics_to_dict(result.metrics)\n",
|
||||
"metrics"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "f70ad9fd",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## ?? ??"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "fb08797b",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"metrics_path = RESULTS_DIR / f\"{MODEL_NAME}_metrics.json\"\n",
|
||||
"predictions_path = PREDICTIONS_DIR / f\"{MODEL_NAME}_predictions.csv\"\n",
|
||||
"\n",
|
||||
"metrics_path.write_text(json.dumps(metrics, indent=2), encoding=\"utf-8\")\n",
|
||||
"result.predictions.to_csv(predictions_path, index=False)\n",
|
||||
"\n",
|
||||
"{\"metrics_path\": str(metrics_path), \"predictions_path\": str(predictions_path)}"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "39a34166",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## ?? plot"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "694a1081",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"parity_fig = plot_parity(result.predictions, title=f\"{MODEL_NAME} parity\")\n",
|
||||
"residual_fig = plot_residuals(result.predictions, title=f\"{MODEL_NAME} residuals\")\n",
|
||||
"parity_fig.savefig(FIGURES_DIR / f\"{MODEL_NAME}_parity.png\", dpi=150, bbox_inches=\"tight\")\n",
|
||||
"residual_fig.savefig(FIGURES_DIR / f\"{MODEL_NAME}_residuals.png\", dpi=150, bbox_inches=\"tight\")\n",
|
||||
"parity_fig"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"name": "python",
|
||||
"pygments_lexer": "ipython3"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
@@ -0,0 +1,115 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "741ede83",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# 06 Compare Surrogate Models\n",
|
||||
"\n",
|
||||
"? notebook? ?? model notebook?? ??? metrics JSON? ?? ??? ????. ??? ?? ???? ???."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "10f43a71",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import json\n",
|
||||
"from pathlib import Path\n",
|
||||
"\n",
|
||||
"import pandas as pd\n",
|
||||
"\n",
|
||||
"from femsurrogate.plotting.comparison import plot_metric_comparison\n",
|
||||
"\n",
|
||||
"ROOT = Path.cwd().resolve()\n",
|
||||
"if not (ROOT / \"pyproject.toml\").exists():\n",
|
||||
" ROOT = ROOT.parent\n",
|
||||
"assert (ROOT / \"pyproject.toml\").exists(), ROOT\n",
|
||||
"RESULTS_DIR = ROOT / \"reports\" / \"results\"\n",
|
||||
"FIGURES_DIR = ROOT / \"reports\" / \"figures\"\n",
|
||||
"FIGURES_DIR.mkdir(parents=True, exist_ok=True)\n",
|
||||
"MODEL_NAMES = [\"rsm\", \"gpr\", \"random_forest\", \"gradient_boosting\", \"mlp\"]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "9798e3bb",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Metrics ??"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "55d2447d",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"records = []\n",
|
||||
"for model_name in MODEL_NAMES:\n",
|
||||
" metrics_path = RESULTS_DIR / f\"{model_name}_metrics.json\"\n",
|
||||
" assert metrics_path.exists(), metrics_path\n",
|
||||
" records.append(json.loads(metrics_path.read_text(encoding=\"utf-8\")))\n",
|
||||
"\n",
|
||||
"comparison = pd.DataFrame(records).sort_values(\"rmse\").reset_index(drop=True)\n",
|
||||
"comparison_path = RESULTS_DIR / \"model_comparison.csv\"\n",
|
||||
"comparison.to_csv(comparison_path, index=False)\n",
|
||||
"comparison"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "731bb2f7",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Metric ?? plot"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "64657b84",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"figures = {}\n",
|
||||
"for metric in [\"rmse\", \"mae\", \"r2\", \"fit_time_s\", \"predict_time_s\"]:\n",
|
||||
" figure = plot_metric_comparison(comparison, metric=metric, title=f\"Surrogate {metric}\")\n",
|
||||
" figure.savefig(FIGURES_DIR / f\"comparison_{metric}.png\", dpi=150, bbox_inches=\"tight\")\n",
|
||||
" figures[metric] = figure\n",
|
||||
"\n",
|
||||
"figures[\"rmse\"]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "63cb5b8d",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## ?? ???\n",
|
||||
"\n",
|
||||
"- `rmse`? `mae`? ?? ??? ?? ????.\n",
|
||||
"- `r2`? ??? residual plot?? ?? ?? ??? bias? ??? ????? ????.\n",
|
||||
"- `fit_time_s`, `predict_time_s`? ?? ??? ??? loop?? ????.\n",
|
||||
"- GPR? ?? dataset?? ???? sample ?? ??? ?? ??? ??? ?? ? ??."
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"name": "python",
|
||||
"pygments_lexer": "ipython3"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
Reference in New Issue
Block a user