diff --git a/notebooks/ANNModel.ipynb b/notebooks/ANNModel.ipynb new file mode 100644 index 0000000..b13e25b --- /dev/null +++ b/notebooks/ANNModel.ipynb @@ -0,0 +1,252 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Defaulting to user installation because normal site-packages is not writeable\n", + "Collecting scikeras[tensorflow]\n", + " Downloading scikeras-0.12.0-py3-none-any.whl (27 kB)\n", + "Requirement already satisfied: scikit-learn>=1.0.0 in /home/vscode/.local/lib/python3.8/site-packages (from scikeras[tensorflow]) (1.3.2)\n", + "Requirement already satisfied: packaging>=0.21 in /home/vscode/.local/lib/python3.8/site-packages (from scikeras[tensorflow]) (24.1)\n", + "Collecting tensorflow<2.13.0,>=2.12.0\n", + " Downloading tensorflow-2.12.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (585.9 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m585.9/585.9 MB\u001b[0m \u001b[31m2.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: scipy>=1.5.0 in /home/vscode/.local/lib/python3.8/site-packages (from scikit-learn>=1.0.0->scikeras[tensorflow]) (1.10.1)\n", + "Requirement already satisfied: joblib>=1.1.1 in /home/vscode/.local/lib/python3.8/site-packages (from scikit-learn>=1.0.0->scikeras[tensorflow]) (1.4.2)\n", + "Requirement already satisfied: threadpoolctl>=2.0.0 in /home/vscode/.local/lib/python3.8/site-packages (from scikit-learn>=1.0.0->scikeras[tensorflow]) (3.5.0)\n", + "Requirement already satisfied: numpy<2.0,>=1.17.3 in /home/vscode/.local/lib/python3.8/site-packages (from scikit-learn>=1.0.0->scikeras[tensorflow]) (1.24.3)\n", + "Requirement already satisfied: libclang>=13.0.0 in /home/vscode/.local/lib/python3.8/site-packages (from tensorflow<2.13.0,>=2.12.0->scikeras[tensorflow]) (18.1.1)\n", + "Requirement already satisfied: grpcio<2.0,>=1.24.3 in /home/vscode/.local/lib/python3.8/site-packages (from tensorflow<2.13.0,>=2.12.0->scikeras[tensorflow]) (1.65.1)\n", + "Requirement already satisfied: six>=1.12.0 in /home/vscode/.local/lib/python3.8/site-packages (from tensorflow<2.13.0,>=2.12.0->scikeras[tensorflow]) (1.16.0)\n", + "Requirement already satisfied: absl-py>=1.0.0 in /home/vscode/.local/lib/python3.8/site-packages (from tensorflow<2.13.0,>=2.12.0->scikeras[tensorflow]) (2.1.0)\n", + "Requirement already satisfied: h5py>=2.9.0 in /home/vscode/.local/lib/python3.8/site-packages (from tensorflow<2.13.0,>=2.12.0->scikeras[tensorflow]) (3.11.0)\n", + "Requirement already satisfied: flatbuffers>=2.0 in /home/vscode/.local/lib/python3.8/site-packages (from tensorflow<2.13.0,>=2.12.0->scikeras[tensorflow]) (24.3.25)\n", + "Requirement already satisfied: tensorflow-io-gcs-filesystem>=0.23.1 in /home/vscode/.local/lib/python3.8/site-packages (from tensorflow<2.13.0,>=2.12.0->scikeras[tensorflow]) (0.34.0)\n", + "Collecting tensorflow-estimator<2.13,>=2.12.0\n", + " Downloading tensorflow_estimator-2.12.0-py2.py3-none-any.whl (440 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m440.7/440.7 kB\u001b[0m \u001b[31m19.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting tensorboard<2.13,>=2.12\n", + " Downloading tensorboard-2.12.3-py3-none-any.whl (5.6 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.6/5.6 MB\u001b[0m \u001b[31m62.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m:00:01\u001b[0m00:01\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: protobuf!=4.21.0,!=4.21.1,!=4.21.2,!=4.21.3,!=4.21.4,!=4.21.5,<5.0.0dev,>=3.20.3 in /home/vscode/.local/lib/python3.8/site-packages (from tensorflow<2.13.0,>=2.12.0->scikeras[tensorflow]) (4.25.4)\n", + "Collecting keras<2.13,>=2.12.0\n", + " Downloading keras-2.12.0-py2.py3-none-any.whl (1.7 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.7/1.7 MB\u001b[0m \u001b[31m17.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m:00:01\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: astunparse>=1.6.0 in /home/vscode/.local/lib/python3.8/site-packages (from tensorflow<2.13.0,>=2.12.0->scikeras[tensorflow]) (1.6.3)\n", + "Requirement already satisfied: gast<=0.4.0,>=0.2.1 in /home/vscode/.local/lib/python3.8/site-packages (from tensorflow<2.13.0,>=2.12.0->scikeras[tensorflow]) (0.4.0)\n", + "Requirement already satisfied: google-pasta>=0.1.1 in /home/vscode/.local/lib/python3.8/site-packages (from tensorflow<2.13.0,>=2.12.0->scikeras[tensorflow]) (0.2.0)\n", + "Requirement already satisfied: termcolor>=1.1.0 in /home/vscode/.local/lib/python3.8/site-packages (from tensorflow<2.13.0,>=2.12.0->scikeras[tensorflow]) (2.4.0)\n", + "Requirement already satisfied: setuptools in /usr/local/lib/python3.8/site-packages (from tensorflow<2.13.0,>=2.12.0->scikeras[tensorflow]) (69.0.3)\n", + "Collecting wrapt<1.15,>=1.11.0\n", + " Downloading wrapt-1.14.1-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (81 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m81.0/81.0 kB\u001b[0m \u001b[31m4.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: typing-extensions<4.6.0,>=3.6.6 in /home/vscode/.local/lib/python3.8/site-packages (from tensorflow<2.13.0,>=2.12.0->scikeras[tensorflow]) (4.5.0)\n", + "Requirement already satisfied: opt-einsum>=2.3.2 in /home/vscode/.local/lib/python3.8/site-packages (from tensorflow<2.13.0,>=2.12.0->scikeras[tensorflow]) (3.3.0)\n", + "Collecting jax>=0.3.15\n", + " Downloading jax-0.4.13.tar.gz (1.3 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.3/1.3 MB\u001b[0m \u001b[31m36.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m:00:01\u001b[0m\n", + "\u001b[?25h Installing build dependencies ... \u001b[?25ldone\n", + "\u001b[?25h Getting requirements to build wheel ... \u001b[?25ldone\n", + "\u001b[?25h Preparing metadata (pyproject.toml) ... \u001b[?25ldone\n", + "\u001b[?25hRequirement already satisfied: wheel<1.0,>=0.23.0 in /usr/local/lib/python3.8/site-packages (from astunparse>=1.6.0->tensorflow<2.13.0,>=2.12.0->scikeras[tensorflow]) (0.43.0)\n", + "Collecting ml-dtypes>=0.1.0\n", + " Downloading ml_dtypes-0.2.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.0 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.0/1.0 MB\u001b[0m \u001b[31m34.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: importlib-metadata>=4.6 in /home/vscode/.local/lib/python3.8/site-packages (from jax>=0.3.15->tensorflow<2.13.0,>=2.12.0->scikeras[tensorflow]) (8.2.0)\n", + "Requirement already satisfied: google-auth-oauthlib<1.1,>=0.5 in /home/vscode/.local/lib/python3.8/site-packages (from tensorboard<2.13,>=2.12->tensorflow<2.13.0,>=2.12.0->scikeras[tensorflow]) (1.0.0)\n", + "Requirement already satisfied: tensorboard-data-server<0.8.0,>=0.7.0 in /home/vscode/.local/lib/python3.8/site-packages (from tensorboard<2.13,>=2.12->tensorflow<2.13.0,>=2.12.0->scikeras[tensorflow]) (0.7.2)\n", + "Requirement already satisfied: google-auth<3,>=1.6.3 in /home/vscode/.local/lib/python3.8/site-packages (from tensorboard<2.13,>=2.12->tensorflow<2.13.0,>=2.12.0->scikeras[tensorflow]) (2.32.0)\n", + "Requirement already satisfied: markdown>=2.6.8 in /home/vscode/.local/lib/python3.8/site-packages (from tensorboard<2.13,>=2.12->tensorflow<2.13.0,>=2.12.0->scikeras[tensorflow]) (3.6)\n", + "Requirement already satisfied: requests<3,>=2.21.0 in /home/vscode/.local/lib/python3.8/site-packages (from tensorboard<2.13,>=2.12->tensorflow<2.13.0,>=2.12.0->scikeras[tensorflow]) (2.32.3)\n", + "Requirement already satisfied: werkzeug>=1.0.1 in /home/vscode/.local/lib/python3.8/site-packages (from tensorboard<2.13,>=2.12->tensorflow<2.13.0,>=2.12.0->scikeras[tensorflow]) (3.0.3)\n", + "Requirement already satisfied: cachetools<6.0,>=2.0.0 in /home/vscode/.local/lib/python3.8/site-packages (from google-auth<3,>=1.6.3->tensorboard<2.13,>=2.12->tensorflow<2.13.0,>=2.12.0->scikeras[tensorflow]) (5.4.0)\n", + "Requirement already satisfied: rsa<5,>=3.1.4 in /home/vscode/.local/lib/python3.8/site-packages (from google-auth<3,>=1.6.3->tensorboard<2.13,>=2.12->tensorflow<2.13.0,>=2.12.0->scikeras[tensorflow]) (4.9)\n", + "Requirement already satisfied: pyasn1-modules>=0.2.1 in /home/vscode/.local/lib/python3.8/site-packages (from google-auth<3,>=1.6.3->tensorboard<2.13,>=2.12->tensorflow<2.13.0,>=2.12.0->scikeras[tensorflow]) (0.4.0)\n", + "Requirement already satisfied: requests-oauthlib>=0.7.0 in /home/vscode/.local/lib/python3.8/site-packages (from google-auth-oauthlib<1.1,>=0.5->tensorboard<2.13,>=2.12->tensorflow<2.13.0,>=2.12.0->scikeras[tensorflow]) (2.0.0)\n", + "Requirement already satisfied: zipp>=0.5 in /home/vscode/.local/lib/python3.8/site-packages (from importlib-metadata>=4.6->jax>=0.3.15->tensorflow<2.13.0,>=2.12.0->scikeras[tensorflow]) (3.19.2)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /home/vscode/.local/lib/python3.8/site-packages (from requests<3,>=2.21.0->tensorboard<2.13,>=2.12->tensorflow<2.13.0,>=2.12.0->scikeras[tensorflow]) (2024.7.4)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /home/vscode/.local/lib/python3.8/site-packages (from requests<3,>=2.21.0->tensorboard<2.13,>=2.12->tensorflow<2.13.0,>=2.12.0->scikeras[tensorflow]) (2.2.2)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /home/vscode/.local/lib/python3.8/site-packages (from requests<3,>=2.21.0->tensorboard<2.13,>=2.12->tensorflow<2.13.0,>=2.12.0->scikeras[tensorflow]) (3.3.2)\n", + "Requirement already satisfied: idna<4,>=2.5 in /home/vscode/.local/lib/python3.8/site-packages (from requests<3,>=2.21.0->tensorboard<2.13,>=2.12->tensorflow<2.13.0,>=2.12.0->scikeras[tensorflow]) (3.7)\n", + "Requirement already satisfied: MarkupSafe>=2.1.1 in /home/vscode/.local/lib/python3.8/site-packages (from werkzeug>=1.0.1->tensorboard<2.13,>=2.12->tensorflow<2.13.0,>=2.12.0->scikeras[tensorflow]) (2.1.5)\n", + "Requirement already satisfied: pyasn1<0.7.0,>=0.4.6 in /home/vscode/.local/lib/python3.8/site-packages (from pyasn1-modules>=0.2.1->google-auth<3,>=1.6.3->tensorboard<2.13,>=2.12->tensorflow<2.13.0,>=2.12.0->scikeras[tensorflow]) (0.6.0)\n", + "Requirement already satisfied: oauthlib>=3.0.0 in /home/vscode/.local/lib/python3.8/site-packages (from requests-oauthlib>=0.7.0->google-auth-oauthlib<1.1,>=0.5->tensorboard<2.13,>=2.12->tensorflow<2.13.0,>=2.12.0->scikeras[tensorflow]) (3.2.2)\n", + "Building wheels for collected packages: jax\n", + " Building wheel for jax (pyproject.toml) ... \u001b[?25ldone\n", + "\u001b[?25h Created wheel for jax: filename=jax-0.4.13-py3-none-any.whl size=1518705 sha256=f1d02552a5e357681bcaab262d063c12bf8dcbbfa16daa794a4e97376772e555\n", + " Stored in directory: /home/vscode/.cache/pip/wheels/46/d9/15/d2800d4089dc4c77299ac7513c6aa1036f5491edbd2bf6ba16\n", + "Successfully built jax\n", + "Installing collected packages: wrapt, tensorflow-estimator, ml-dtypes, keras, jax, scikeras, tensorboard, tensorflow\n", + " Attempting uninstall: wrapt\n", + " Found existing installation: wrapt 1.16.0\n", + " Uninstalling wrapt-1.16.0:\n", + " Successfully uninstalled wrapt-1.16.0\n", + " Attempting uninstall: tensorflow-estimator\n", + " Found existing installation: tensorflow-estimator 2.13.0\n", + " Uninstalling tensorflow-estimator-2.13.0:\n", + " Successfully uninstalled tensorflow-estimator-2.13.0\n", + " Attempting uninstall: keras\n", + " Found existing installation: keras 2.13.1\n", + " Uninstalling keras-2.13.1:\n", + " Successfully uninstalled keras-2.13.1\n", + " Attempting uninstall: tensorboard\n", + " Found existing installation: tensorboard 2.13.0\n", + " Uninstalling tensorboard-2.13.0:\n", + " Successfully uninstalled tensorboard-2.13.0\n", + " Attempting uninstall: tensorflow\n", + " Found existing installation: tensorflow 2.13.1\n", + " Uninstalling tensorflow-2.13.1:\n", + " Successfully uninstalled tensorflow-2.13.1\n", + "Successfully installed jax-0.4.13 keras-2.12.0 ml-dtypes-0.2.0 scikeras-0.12.0 tensorboard-2.12.3 tensorflow-2.12.1 tensorflow-estimator-2.12.0 wrapt-1.14.1\n", + "\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m23.0.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m24.2\u001b[0m\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2024-07-29 19:06:35.916957: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.\n", + "2024-07-29 19:06:35.955385: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.\n", + "2024-07-29 19:06:35.957278: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n", + "To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", + "2024-07-29 19:06:37.345996: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Fitting 3 folds for each of 10 candidates, totalling 30 fits\n", + "Best: 0.8452076237345957 using {'model__optimizer': 'adam', 'model__learning_rate': 0.001, 'model__dropout_rate': 0.3, 'epochs': 50, 'batch_size': 64}\n", + "Mean Absolute Error on scaled data: 0.9159975888259128\n", + "Rescaled Mean Absolute Error: 14853.925471006318\n" + ] + } + ], + "source": [ + "!pip install scikeras[tensorflow]\n", + "\n", + "import pandas as pd\n", + "import numpy as np\n", + "from sklearn.model_selection import train_test_split, RandomizedSearchCV\n", + "from sklearn.preprocessing import StandardScaler\n", + "from sklearn.metrics import mean_absolute_error\n", + "from tensorflow.keras.models import Sequential\n", + "from tensorflow.keras.layers import Dense, Dropout\n", + "from tensorflow.keras.optimizers import Adam\n", + "from scikeras.wrappers import KerasRegressor\n", + "\n", + "# Loading the dataset\n", + "data = pd.read_csv('../data/AmesData.csv')\n", + "\n", + "# Filling NaN values in numeric columns with the median\n", + "for col in data.select_dtypes(include=[np.number]).columns:\n", + " data[col].fillna(data[col].median(), inplace=True)\n", + "\n", + "# Filling NaN values in categorical columns with the most frequent value\n", + "for col in data.select_dtypes(include=[object]).columns:\n", + " data[col].fillna(data[col].mode()[0], inplace=True)\n", + "\n", + "# Encoding categorical variables using one-hot encoding\n", + "data_encoded = pd.get_dummies(data, drop_first=True)\n", + "\n", + "# Splitting the data into features and target\n", + "X = data_encoded.drop('SalePrice', axis=1)\n", + "y = data['SalePrice']\n", + "\n", + "# Spliting the data into training and testing sets\n", + "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)\n", + "\n", + "# Initializing the scaler for the features\n", + "scaler = StandardScaler()\n", + "\n", + "# Scaling the features\n", + "X_train_scaled = scaler.fit_transform(X_train)\n", + "X_test_scaled = scaler.transform(X_test)\n", + "\n", + "# Scaling the target variable\n", + "y_train_mean = y_train.mean()\n", + "y_train_std = y_train.std()\n", + "y_train_scaled = (y_train - y_train_mean) / y_train_std\n", + "y_test_scaled = (y_test - y_train_mean) / y_train_std\n", + "\n", + "# Defining a function to create the model (for KerasRegressor)\n", + "def create_model(optimizer='adam', learning_rate=0.001, dropout_rate=0.0):\n", + " model = Sequential()\n", + " model.add(Dense(128, input_dim=X_train_scaled.shape[1], activation='relu', kernel_regularizer='l2'))\n", + " model.add(Dropout(dropout_rate))\n", + " model.add(Dense(64, activation='relu', kernel_regularizer='l2'))\n", + " model.add(Dropout(dropout_rate))\n", + " model.add(Dense(32, activation='relu', kernel_regularizer='l2'))\n", + " model.add(Dense(1))\n", + " \n", + " optimizer = Adam(learning_rate=learning_rate)\n", + " model.compile(optimizer=optimizer, loss='mean_squared_error')\n", + " return model\n", + "\n", + "# Creating the KerasRegressor\n", + "model = KerasRegressor(model=create_model, epochs=100, batch_size=32, verbose=0)\n", + "\n", + "# Defining the hyperparameters grid\n", + "param_grid = {\n", + " 'model__optimizer': ['adam'],\n", + " 'model__learning_rate': [0.01, 0.001, 0.0001],\n", + " 'model__dropout_rate': [0.0, 0.3, 0.5],\n", + " 'batch_size': [16, 32, 64],\n", + " 'epochs': [50, 100]\n", + "}\n", + "\n", + "# Performing RandomizedSearchCV\n", + "random_search = RandomizedSearchCV(estimator=model, param_distributions=param_grid, n_iter=10, cv=3, verbose=1)\n", + "random_search_result = random_search.fit(X_train_scaled, y_train_scaled)\n", + "\n", + "print(f\"Best: {random_search_result.best_score_} using {random_search_result.best_params_}\")\n", + "\n", + "# Evaluating the best model on the test set\n", + "best_model = random_search_result.best_estimator_\n", + "mae_scaled = best_model.score(X_test_scaled, y_test_scaled)\n", + "print(f'Mean Absolute Error on scaled data: {mae_scaled}')\n", + "\n", + "# Predicting on the test data\n", + "predictions_scaled = best_model.predict(X_test_scaled)\n", + "\n", + "# Rescaling the predictions to the original scale\n", + "predictions_rescaled = predictions_scaled * y_train_std + y_train_mean\n", + "y_test_rescaled = y_test\n", + "\n", + "# Calculating the rescaled mean absolute error\n", + "mae_rescaled = mean_absolute_error(y_test_rescaled, predictions_rescaled)\n", + "print(f'Rescaled Mean Absolute Error: {mae_rescaled}')" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.19" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}