diff --git a/units/SLU08_Data_Problems/Exercise Notebook - SLU8 - Data Problems.ipynb b/units/SLU08_Data_Problems/Exercise Notebook - SLU8 - Data Problems.ipynb index 8ce6493..1368df3 100644 --- a/units/SLU08_Data_Problems/Exercise Notebook - SLU8 - Data Problems.ipynb +++ b/units/SLU08_Data_Problems/Exercise Notebook - SLU8 - Data Problems.ipynb @@ -3,10 +3,7 @@ { "cell_type": "markdown", "metadata": { - "deletable": false, - "editable": false, "nbgrader": { - "checksum": "0b0d3775309bed48af20258e9b245955", "grade": false, "grade_id": "cell-8de03764c3fadbb7", "locked": true, @@ -39,10 +36,7 @@ "cell_type": "code", "execution_count": null, "metadata": { - "deletable": false, - "editable": false, "nbgrader": { - "checksum": "8032d3767a5e3dec7d4bfa7652978eaa", "grade": false, "grade_id": "cell-5c7aeb95386944af", "locked": true, @@ -62,10 +56,7 @@ { "cell_type": "markdown", "metadata": { - "deletable": false, - "editable": false, "nbgrader": { - "checksum": "b38ab254545e188a7d4c5130f69e43a3", "grade": false, "grade_id": "cell-cce5c680443f5ed3", "locked": true, @@ -82,10 +73,7 @@ "cell_type": "code", "execution_count": null, "metadata": { - "deletable": false, - "editable": false, "nbgrader": { - "checksum": "085c637b366d0ea98fd9f30a17e34dde", "grade": false, "grade_id": "cell-e4a7efe6d5ed3d75", "locked": true, @@ -103,10 +91,7 @@ { "cell_type": "markdown", "metadata": { - "deletable": false, - "editable": false, "nbgrader": { - "checksum": "83ebcdab445db02e4db840a73cb8862f", "grade": false, "grade_id": "cell-f06de24313c07c92", "locked": true, @@ -123,9 +108,7 @@ "cell_type": "code", "execution_count": null, "metadata": { - "deletable": false, "nbgrader": { - "checksum": "f1824162b10117dc1b8c42bfdf076feb", "grade": false, "grade_id": "cell-44d0fe77142624bb", "locked": false, @@ -138,8 +121,9 @@ "# EXERCISE\n", "# Check the unique values of Sex, and assign it to a variable 'uniques' \n", "# uniques = ...\n", - "# YOUR CODE HERE\n", - "raise NotImplementedError()\n", + "### BEGIN SOLUTION\n", + "uniques = df.Sex.unique()\n", + "### END SOLUTION\n", "\n", "\n", "# For validation (do not modify):\n", @@ -149,10 +133,7 @@ { "cell_type": "markdown", "metadata": { - "deletable": false, - "editable": false, "nbgrader": { - "checksum": "251b2d9db3b2b629aa24be243f6deeb5", "grade": false, "grade_id": "cell-b13c11cb6f00d751", "locked": true, @@ -170,10 +151,7 @@ "cell_type": "code", "execution_count": null, "metadata": { - "deletable": false, - "editable": false, "nbgrader": { - "checksum": "a9f226402f1bd6b7cd993c1ab4156b7e", "grade": true, "grade_id": "cell-709b00f7c20780f2", "locked": true, @@ -192,10 +170,7 @@ { "cell_type": "markdown", "metadata": { - "deletable": false, - "editable": false, "nbgrader": { - "checksum": "71a52177421fd66084d7cc5d47587713", "grade": false, "grade_id": "cell-ea41714a27e6e0f3", "locked": true, @@ -211,9 +186,7 @@ "cell_type": "code", "execution_count": null, "metadata": { - "deletable": false, "nbgrader": { - "checksum": "37e2787ce6cf852246ed1e919d2aba0e", "grade": false, "grade_id": "cell-a6e72eaaffeeacf5", "locked": false, @@ -226,8 +199,9 @@ "# EXERCISE\n", "# Find the rows with Squirrel (create a boolean mask)\n", "# mask = ...\n", - "# YOUR CODE HERE\n", - "raise NotImplementedError()\n", + "### BEGIN SOLUTION\n", + "mask = df.Sex == 'Squirrel'\n", + "### END SOLUTION\n", "\n", "\n", "# For validation (do not modify):\n", @@ -237,10 +211,7 @@ { "cell_type": "markdown", "metadata": { - "deletable": false, - "editable": false, "nbgrader": { - "checksum": "9a1664528e6473a16e4de4fb1cd7ffc5", "grade": false, "grade_id": "cell-fbb9677e4a63207c", "locked": true, @@ -258,10 +229,7 @@ "cell_type": "code", "execution_count": null, "metadata": { - "deletable": false, - "editable": false, "nbgrader": { - "checksum": "7c153571880dd616045d034f33c4c85c", "grade": true, "grade_id": "cell-9edeb7cb605f8b41", "locked": true, @@ -282,9 +250,7 @@ "cell_type": "code", "execution_count": null, "metadata": { - "deletable": false, "nbgrader": { - "checksum": "cbaacfdb2bd473a53bf2ab800cce9937", "grade": false, "grade_id": "cell-8f48643883f8a197", "locked": false, @@ -296,8 +262,9 @@ "source": [ "# Now drop the rows that have Squirrel (update 'df')\n", "# df = ...\n", - "# YOUR CODE HERE\n", - "raise NotImplementedError()\n", + "### BEGIN SOLUTION\n", + "df = df[~mask]\n", + "### END SOLUTION\n", "\n", "\n", "# For validation (do not modify):\n", @@ -307,10 +274,7 @@ { "cell_type": "markdown", "metadata": { - "deletable": false, - "editable": false, "nbgrader": { - "checksum": "e87970e2251e8fa95ac4f0abcd634465", "grade": false, "grade_id": "cell-b05ecafc537030fa", "locked": true, @@ -328,10 +292,7 @@ "cell_type": "code", "execution_count": null, "metadata": { - "deletable": false, - "editable": false, "nbgrader": { - "checksum": "2ac6bdd20cbbc207f8288fe9c114eef0", "grade": true, "grade_id": "cell-641ee100d76c157a", "locked": true, @@ -350,10 +311,7 @@ { "cell_type": "markdown", "metadata": { - "deletable": false, - "editable": false, "nbgrader": { - "checksum": "022fa2e184cce685ce305ea8da11f17d", "grade": false, "grade_id": "cell-3ad116a8f5ae2fc8", "locked": true, @@ -370,9 +328,7 @@ "cell_type": "code", "execution_count": null, "metadata": { - "deletable": false, "nbgrader": { - "checksum": "6071f795dc3e073d3b274fd46a76d97c", "grade": false, "grade_id": "cell-6d88fd848d6c9a56", "locked": false, @@ -385,8 +341,9 @@ "# Find the duplicated lines according to the 'PassengerId' subset. \n", "# Create a mask out of it (hint: use 'duplicated()')\n", "# duplicates = ...\n", - "# YOUR CODE HERE\n", - "raise NotImplementedError()\n", + "### BEGIN SOLUTION\n", + "duplicates = df.duplicated(subset=['PassengerId'])\n", + "### END SOLUTION\n", "\n", "print('Number of duplicates:', duplicates.sum())" ] @@ -394,10 +351,7 @@ { "cell_type": "markdown", "metadata": { - "deletable": false, - "editable": false, "nbgrader": { - "checksum": "778d66e93ff4c2e4edfa51ef22c67af8", "grade": false, "grade_id": "cell-5890020756c54c64", "locked": true, @@ -415,10 +369,7 @@ "cell_type": "code", "execution_count": null, "metadata": { - "deletable": false, - "editable": false, "nbgrader": { - "checksum": "fac1561f583c7cbf253c4e9007d7f685", "grade": true, "grade_id": "cell-449f9d2ba07dd384", "locked": true, @@ -437,10 +388,7 @@ { "cell_type": "markdown", "metadata": { - "deletable": false, - "editable": false, "nbgrader": { - "checksum": "23eaca41cd8bce7affaf6089824f82be", "grade": false, "grade_id": "cell-65e45640cea438ab", "locked": true, @@ -456,9 +404,7 @@ "cell_type": "code", "execution_count": null, "metadata": { - "deletable": false, "nbgrader": { - "checksum": "00532f27294a65c59c2a1f5ffee86678", "grade": false, "grade_id": "cell-77f4e93e1d7b5692", "locked": false, @@ -470,9 +416,10 @@ "source": [ "# Drop the duplicated lines according to the 'PassengerId' subset. \n", "# Create a mask out of it (hint: use 'duplicated()')\n", - "# duplicates = ...\n", - "# YOUR CODE HERE\n", - "raise NotImplementedError()\n", + "# df = ...\n", + "### BEGIN SOLUTION\n", + "df = df.drop_duplicates(subset=['PassengerId'])\n", + "### END SOLUTION\n", "\n", "# For validation (do not modify):\n", "print('Number of duplicated lines after drop:', df.duplicated().sum())\n", @@ -482,10 +429,7 @@ { "cell_type": "markdown", "metadata": { - "deletable": false, - "editable": false, "nbgrader": { - "checksum": "bc5231d17f155b741f69db0eb757c035", "grade": false, "grade_id": "cell-0c599103715d89ef", "locked": true, @@ -504,10 +448,7 @@ "cell_type": "code", "execution_count": null, "metadata": { - "deletable": false, - "editable": false, "nbgrader": { - "checksum": "c89487697e84809bda3c5693613200e8", "grade": true, "grade_id": "cell-30f8efef22074f36", "locked": true, @@ -527,10 +468,7 @@ { "cell_type": "markdown", "metadata": { - "deletable": false, - "editable": false, "nbgrader": { - "checksum": "ed16515af8f531d7145a9fb6f96700a5", "grade": false, "grade_id": "cell-29d505bdc172805c", "locked": true, @@ -554,9 +492,7 @@ "cell_type": "code", "execution_count": null, "metadata": { - "deletable": false, "nbgrader": { - "checksum": "1dec357956047d2f05f0194ab075a9d8", "grade": false, "grade_id": "cell-552e1b726a8a6011", "locked": false, @@ -575,32 +511,36 @@ " # Count the number of missing values in the full dataset. \n", " # Use pandas '.isnull()'. Number_of_missing should be a single int number\n", " # number_of_missing = ...\n", - " # YOUR CODE HERE\n", - " raise NotImplementedError()\n", + " ### BEGIN SOLUTION\n", + " number_of_missing = data.isnull().sum().sum()\n", + " ### END SOLUTION\n", " \n", " \n", " # 2) Cleaning missing data on numerical features\n", " # Fill the missing values of 'Age' by the median. \n", " # You can use 'fillna'\n", " # df.Age = ...\n", - " # YOUR CODE HERE\n", - " raise NotImplementedError()\n", + " ### BEGIN SOLUTION\n", + " data.Age = data.Age.fillna(df.Age.median())\n", + " ### END SOLUTION\n", " \n", " \n", " # 3) Solving Categorical Features\n", " # Replace the missing values in the feature 'Embarked' by 'unknown'\n", " # You can use 'fillna()'\n", " # df.Embarked = ...\n", - " # YOUR CODE HERE\n", - " raise NotImplementedError()\n", + " ### BEGIN SOLUTION\n", + " data.Embarked = data.Embarked.fillna('unknown')\n", + " ### END SOLUTION\n", " \n", " \n", " # 4) Drop the feature 'Cabin' which has a lot of missing values\n", " # You can use the method 'drop(...)'. \n", " # Hint: remember what you learned about the axis number\n", " # df = ...\n", - " # YOUR CODE HERE\n", - " raise NotImplementedError()\n", + " ### BEGIN SOLUTION\n", + " data = data.drop('Cabin', axis=1)\n", + " ### END SOLUTION\n", " \n", " return number_of_missing, data" ] @@ -609,16 +549,14 @@ "cell_type": "code", "execution_count": null, "metadata": { - "deletable": false, - "editable": false, "nbgrader": { - "checksum": "0adcc30670ef2e7f15c7fbd4c556c4a9", "grade": false, "grade_id": "cell-fa84cfb06127a968", "locked": true, "schema_version": 1, "solution": false - } + }, + "scrolled": false }, "outputs": [], "source": [ @@ -634,10 +572,7 @@ { "cell_type": "markdown", "metadata": { - "deletable": false, - "editable": false, "nbgrader": { - "checksum": "496ed0966fe0c9b87f11b9dc602f0683", "grade": false, "grade_id": "cell-f7852cf665a4d1f7", "locked": true, @@ -658,10 +593,7 @@ "cell_type": "code", "execution_count": null, "metadata": { - "deletable": false, - "editable": false, "nbgrader": { - "checksum": "0911b8af5f84231f5f974b93f0a1d2b8", "grade": true, "grade_id": "cell-ea7ba97ea7d284d1", "locked": true, @@ -683,10 +615,7 @@ { "cell_type": "markdown", "metadata": { - "deletable": false, - "editable": false, "nbgrader": { - "checksum": "a41b0b5ec2b0a57250fdbe9d6d5176ec", "grade": false, "grade_id": "cell-468be3b369dc2d8e", "locked": true, @@ -703,9 +632,7 @@ "cell_type": "code", "execution_count": null, "metadata": { - "deletable": false, "nbgrader": { - "checksum": "0f2edbe3615f35bd89c473da7c0e8ca0", "grade": false, "grade_id": "cell-39e8900a6cbea07f", "locked": false, @@ -728,14 +655,17 @@ " # Hint: beware of your parenthesis!\n", " # mask = ...\n", " # number_of_outliers = ...\n", - " # YOUR CODE HERE\n", - " raise NotImplementedError()\n", + " ### BEGIN SOLUTION\n", + " mask = (data['Age'] <= 117) & (data['Age'] >= 0)\n", + " number_of_outliers = (~mask).sum()\n", + " ### END SOLUTION\n", " \n", " # 2) Update the dataframe 'data'. Keep only the rows that do not\n", " # have outliers in 'Age'. \n", " # data = ...\n", - " # YOUR CODE HERE\n", - " raise NotImplementedError()\n", + " ### BEGIN SOLUTION\n", + " data = data[mask]\n", + " ### END SOLUTION\n", " \n", " assert mask.dtype == 'bool', \"The mask must be of bool type\"\n", " return data, number_of_outliers" @@ -745,16 +675,14 @@ "cell_type": "code", "execution_count": null, "metadata": { - "deletable": false, - "editable": false, "nbgrader": { - "checksum": "92879d5783b057931db6896bf8b1c0ee", "grade": false, "grade_id": "cell-f5543791ccb88747", "locked": true, "schema_version": 1, "solution": false - } + }, + "scrolled": true }, "outputs": [], "source": [ @@ -770,10 +698,7 @@ { "cell_type": "markdown", "metadata": { - "deletable": false, - "editable": false, "nbgrader": { - "checksum": "7e21b5b9086c2d2f7c4d5f5843f50aac", "grade": false, "grade_id": "cell-20bc552eef9d0a09", "locked": true, @@ -793,10 +718,7 @@ "cell_type": "code", "execution_count": null, "metadata": { - "deletable": false, - "editable": false, "nbgrader": { - "checksum": "48132765d485fded7ac841a1cb0c7413", "grade": true, "grade_id": "cell-4cb8c79a8fdd5bdd", "locked": true, @@ -816,10 +738,7 @@ { "cell_type": "markdown", "metadata": { - "deletable": false, - "editable": false, "nbgrader": { - "checksum": "71f0d4ed71dc597eb0e11f46e81ab68f", "grade": false, "grade_id": "cell-764534232a55bd48", "locked": true, @@ -843,9 +762,7 @@ "cell_type": "code", "execution_count": null, "metadata": { - "deletable": false, "nbgrader": { - "checksum": "fd4fe96e422382354740f21199373267", "grade": false, "grade_id": "cell-ea52b88623d3149c", "locked": false, @@ -858,8 +775,9 @@ "# EXERCISE\n", "# Check the Age dtype, assign it to 'dtype' variable\n", "# dtype = ...\n", - "# YOUR CODE HERE\n", - "raise NotImplementedError()\n", + "### BEGIN SOLUTION\n", + "dtype = df.Age.dtype\n", + "### END SOLUTION\n", "\n", "print('Current Age dtype:', dtype)" ] @@ -867,10 +785,7 @@ { "cell_type": "markdown", "metadata": { - "deletable": false, - "editable": false, "nbgrader": { - "checksum": "6728faa9ec435dd7fabc6192cb56d505", "grade": false, "grade_id": "cell-8bf307de79b8b13e", "locked": true, @@ -888,10 +803,7 @@ "cell_type": "code", "execution_count": null, "metadata": { - "deletable": false, - "editable": false, "nbgrader": { - "checksum": "13e1c88dbf6aa0030e3c326e74d43be8", "grade": true, "grade_id": "cell-f19c24b25c6160e5", "locked": true, @@ -911,9 +823,7 @@ "cell_type": "code", "execution_count": null, "metadata": { - "deletable": false, "nbgrader": { - "checksum": "009164bdf3259cb3b90facdcbb5c9b27", "grade": false, "grade_id": "cell-d2764b69e81a3588", "locked": false, @@ -927,8 +837,9 @@ "# Convert the feature Age to int. Update the dataframe.\n", "# Hint: Use the method `astype()`.\n", "# df.Age = ...\n", - "# YOUR CODE HERE\n", - "raise NotImplementedError()\n", + "### BEGIN SOLUTION\n", + "df.Age = df.Age.astype(int)\n", + "### END SOLUTION\n", "\n", "# For validation (do not modify):\n", "print('New Age dtype:', df.Age.dtype)" @@ -937,10 +848,7 @@ { "cell_type": "markdown", "metadata": { - "deletable": false, - "editable": false, "nbgrader": { - "checksum": "33629511cf6ffb52d757798c5b9cdf3d", "grade": false, "grade_id": "cell-2c131ec6536891f8", "locked": true, @@ -958,10 +866,7 @@ "cell_type": "code", "execution_count": null, "metadata": { - "deletable": false, - "editable": false, "nbgrader": { - "checksum": "7d019a2e94f7ba5117d516286e2e1edb", "grade": true, "grade_id": "cell-929605d4e13df683", "locked": true, @@ -980,10 +885,7 @@ { "cell_type": "markdown", "metadata": { - "deletable": false, - "editable": false, "nbgrader": { - "checksum": "239ad0180e121e0a6d86969f432c37bd", "grade": false, "grade_id": "cell-40e257159a597291", "locked": true, @@ -999,10 +901,7 @@ "cell_type": "code", "execution_count": null, "metadata": { - "deletable": false, - "editable": false, "nbgrader": { - "checksum": "9e220b02558c6c5659c5051244d88eb0", "grade": false, "grade_id": "cell-f98f76d299c6d6c1", "locked": true, @@ -1018,10 +917,7 @@ { "cell_type": "markdown", "metadata": { - "deletable": false, - "editable": false, "nbgrader": { - "checksum": "486e3d7849ca7681e7a1baf1ae64601f", "grade": false, "grade_id": "cell-c400e5543660e3f4", "locked": true, @@ -1042,6 +938,7 @@ } ], "metadata": { + "celltoolbar": "Create Assignment", "kernelspec": { "display_name": "Python 3", "language": "python", diff --git a/units/SLU13_Validation_classification/Exercise Notebook - SLU13 - Validation Metrics for Classification.ipynb b/units/SLU13_Validation_classification/Exercise Notebook - SLU13 - Validation Metrics for Classification.ipynb index 2b2900e..37ed43f 100644 --- a/units/SLU13_Validation_classification/Exercise Notebook - SLU13 - Validation Metrics for Classification.ipynb +++ b/units/SLU13_Validation_classification/Exercise Notebook - SLU13 - Validation Metrics for Classification.ipynb @@ -3,10 +3,7 @@ { "cell_type": "markdown", "metadata": { - "deletable": false, - "editable": false, "nbgrader": { - "checksum": "e59d80ca8ef039c907954e0b22d36554", "grade": false, "grade_id": "cell-6a239644097794a8", "locked": true, @@ -27,10 +24,7 @@ "cell_type": "code", "execution_count": null, "metadata": { - "deletable": false, - "editable": false, "nbgrader": { - "checksum": "a1ec9e9391fa1153d6a8d25ca89a99d8", "grade": false, "grade_id": "cell-80b6ded3c7e4ff71", "locked": true, @@ -53,10 +47,7 @@ "cell_type": "code", "execution_count": null, "metadata": { - "deletable": false, - "editable": false, "nbgrader": { - "checksum": "609bda154b35c1231c96bc37d469c50b", "grade": false, "grade_id": "cell-50a2e3f5c827c482", "locked": true, @@ -76,10 +67,7 @@ { "cell_type": "markdown", "metadata": { - "deletable": false, - "editable": false, "nbgrader": { - "checksum": "98ccddf4e0ac082e5f1f1f1bbf8e035f", "grade": false, "grade_id": "cell-f767181ff8a74302", "locked": true, @@ -97,10 +85,7 @@ "cell_type": "code", "execution_count": null, "metadata": { - "deletable": false, - "editable": false, "nbgrader": { - "checksum": "e908c8130f037a8ab0fc67d72d242c11", "grade": false, "grade_id": "cell-183ae06e20a51d99", "locked": true, @@ -119,10 +104,7 @@ { "cell_type": "markdown", "metadata": { - "deletable": false, - "editable": false, "nbgrader": { - "checksum": "0aeab207fc14a02a9a272e33cbc38c87", "grade": false, "grade_id": "cell-09fdc6498dfbee6a", "locked": true, @@ -137,10 +119,7 @@ { "cell_type": "markdown", "metadata": { - "deletable": false, - "editable": false, "nbgrader": { - "checksum": "88333ac4722695091b7a4f5627072c16", "grade": false, "grade_id": "cell-309aeaa50a83f366", "locked": true, @@ -157,9 +136,7 @@ "cell_type": "code", "execution_count": null, "metadata": { - "deletable": false, "nbgrader": { - "checksum": "bb3d042b403a3aec8624d215bf784a5f", "grade": false, "grade_id": "cell-0eda277e12939a26", "locked": false, @@ -176,8 +153,9 @@ " # Calculate the class imbalance, i.e., the ratio of 1s (ones)\n", " # in the dataset\n", " # ratio_1s = ...\n", - " # YOUR CODE HERE\n", - " raise NotImplementedError()\n", + " ### BEGIN SOLUTION\n", + " ratio_1s = labels.mean()\n", + " ### END SOLUTION\n", " \n", " return ratio_1s" ] @@ -186,10 +164,7 @@ "cell_type": "code", "execution_count": null, "metadata": { - "deletable": false, - "editable": false, "nbgrader": { - "checksum": "2434c55a4ff09c1c619b4cc2a7e236f0", "grade": false, "grade_id": "cell-94bd00c67a1fe4c4", "locked": true, @@ -205,10 +180,7 @@ { "cell_type": "markdown", "metadata": { - "deletable": false, - "editable": false, "nbgrader": { - "checksum": "b3f0262a0d72b6cec54e00b5b2ebaa0b", "grade": false, "grade_id": "cell-6079d75d48915987", "locked": true, @@ -226,10 +198,7 @@ "cell_type": "code", "execution_count": null, "metadata": { - "deletable": false, - "editable": false, "nbgrader": { - "checksum": "e84e332887dcc17f419bf74a698d55a8", "grade": true, "grade_id": "cell-030786b1768fc408", "locked": true, @@ -248,10 +217,7 @@ { "cell_type": "markdown", "metadata": { - "deletable": false, - "editable": false, "nbgrader": { - "checksum": "57c2b90fa243b47a52021dcd8878c542", "grade": false, "grade_id": "cell-5ac5292b4570c33a", "locked": true, @@ -266,10 +232,7 @@ { "cell_type": "markdown", "metadata": { - "deletable": false, - "editable": false, "nbgrader": { - "checksum": "272a7f6d5053c933c4cf7a38d98772d1", "grade": false, "grade_id": "cell-9e3cc9c61f3335b8", "locked": true, @@ -290,10 +253,7 @@ "cell_type": "code", "execution_count": null, "metadata": { - "deletable": false, - "editable": false, "nbgrader": { - "checksum": "94994ea674ab3361961eafe6fd7df495", "grade": false, "grade_id": "cell-ee81d9fbdee5c5a2", "locked": true, @@ -313,10 +273,7 @@ { "cell_type": "markdown", "metadata": { - "deletable": false, - "editable": false, "nbgrader": { - "checksum": "17bd21c363658f433feea27e5635df4a", "grade": false, "grade_id": "cell-959a32d33b84d1fd", "locked": true, @@ -333,10 +290,7 @@ "cell_type": "code", "execution_count": null, "metadata": { - "deletable": false, - "editable": false, "nbgrader": { - "checksum": "9815766114704d90c29507c9ba72e771", "grade": false, "grade_id": "cell-13180965a7f1de43", "locked": true, @@ -347,16 +301,13 @@ "outputs": [], "source": [ "# RUN cell:\n", - "clf = LogisticRegression(random_state=123, tol=1e-8).fit(X_train, y_train)" + "clf = LogisticRegression(random_state=123, tol=1e-10).fit(X_train, y_train)" ] }, { "cell_type": "markdown", "metadata": { - "deletable": false, - "editable": false, "nbgrader": { - "checksum": "27b044e1e4050d2201a9b3d9ed3eccb5", "grade": false, "grade_id": "cell-d7b9cadddaa7d3c6", "locked": true, @@ -372,9 +323,7 @@ "cell_type": "code", "execution_count": null, "metadata": { - "deletable": false, "nbgrader": { - "checksum": "7a9a4eda0e3e70334dad1cc8c3bcc3e1", "grade": false, "grade_id": "cell-280d064d01d162c9", "locked": false, @@ -400,8 +349,9 @@ " # `predict_proba` of your classifier. Assign it to the variable `probas`\n", " # NOTE: don't forget to extract only the second column.\n", " # probas = ...\n", - " # YOUR CODE HERE\n", - " raise NotImplementedError()\n", + " ### BEGIN SOLUTION\n", + " probas = clf.predict_proba(X_test)[:, 1]\n", + " ### END SOLUTION\n", " \n", " return probas" ] @@ -410,10 +360,7 @@ "cell_type": "code", "execution_count": null, "metadata": { - "deletable": false, - "editable": false, "nbgrader": { - "checksum": "52efedbc4d7581b4c5a0a1aac55b7f7e", "grade": false, "grade_id": "cell-1f6777972243da4f", "locked": true, @@ -430,10 +377,7 @@ { "cell_type": "markdown", "metadata": { - "deletable": false, - "editable": false, "nbgrader": { - "checksum": "35ebf0544d6549074b19c028fcfeaf14", "grade": false, "grade_id": "cell-d4828a09c677f7eb", "locked": true, @@ -451,10 +395,7 @@ "cell_type": "code", "execution_count": null, "metadata": { - "deletable": false, - "editable": false, "nbgrader": { - "checksum": "3f73732fc4820a3658c25a389524257c", "grade": true, "grade_id": "cell-e906e89e01c760bb", "locked": true, @@ -466,7 +407,7 @@ "outputs": [], "source": [ "### BEGIN TESTS\n", - "assert np.isclose(probas[0], 0.07192596, atol=1e-5)\n", + "assert np.isclose(probas[0], 0.07192596, atol=1e-3)\n", "assert len(probas) == 3300, \"The length of the variable 'probas' is expected to be 3300.\"\n", "assert type(probas) == np.ndarray\n", "### END TESTS" @@ -475,10 +416,7 @@ { "cell_type": "markdown", "metadata": { - "deletable": false, - "editable": false, "nbgrader": { - "checksum": "3d410dbdc91c2728c2d2ac72ee3f57f4", "grade": false, "grade_id": "cell-dfb578b62308daa5", "locked": true, @@ -501,9 +439,7 @@ "cell_type": "code", "execution_count": null, "metadata": { - "deletable": false, "nbgrader": { - "checksum": "558697bd266f78755a09a7b4c2a9dcf2", "grade": false, "grade_id": "cell-43efa02831475bb3", "locked": false, @@ -528,8 +464,9 @@ " # the value 0 is below or equal to 'threshold' and 1 is above the \n", " # 'threshold'\n", " # predictions = ...\n", - " # YOUR CODE HERE\n", - " raise NotImplementedError()\n", + " ### BEGIN SOLUTION\n", + " predictions = (probas > threshold).astype(int)\n", + " ### END SOLUTION\n", " \n", " return predictions" ] @@ -538,10 +475,7 @@ "cell_type": "code", "execution_count": null, "metadata": { - "deletable": false, - "editable": false, "nbgrader": { - "checksum": "41741d1907e86eaec3e8508196ee9305", "grade": false, "grade_id": "cell-23a5f5d590771c51", "locked": true, @@ -561,10 +495,7 @@ { "cell_type": "markdown", "metadata": { - "deletable": false, - "editable": false, "nbgrader": { - "checksum": "4fc9441d0bf3c4d83b303241bd915dde", "grade": false, "grade_id": "cell-53f58a856a25b889", "locked": true, @@ -583,10 +514,7 @@ "cell_type": "code", "execution_count": null, "metadata": { - "deletable": false, - "editable": false, "nbgrader": { - "checksum": "49ea03e9509fbc680a23e4a8584c07f4", "grade": true, "grade_id": "cell-b0475bb6576f9a6f", "locked": true, @@ -600,7 +528,6 @@ "### BEGIN TESTS\n", "assert predictions.sum() == 168\n", "assert len(predictions) == 3300, \"The length of the variable 'predictions' is expected to be 3300.\"\n", - "assert predictions.dtype == np.int64\n", "assert predictions[-1] == 0\n", "### END TESTS" ] @@ -608,10 +535,7 @@ { "cell_type": "markdown", "metadata": { - "deletable": false, - "editable": false, "nbgrader": { - "checksum": "c4e4f257168b763eac3da0b3fe158999", "grade": false, "grade_id": "cell-4df6efa366e0554f", "locked": true, @@ -630,9 +554,7 @@ "cell_type": "code", "execution_count": null, "metadata": { - "deletable": false, "nbgrader": { - "checksum": "8a5cdeaa1906c1043c64f11a51128eb0", "grade": false, "grade_id": "cell-49baf011180c6d08", "locked": false, @@ -656,8 +578,9 @@ " # Get the TP, FP, TN, FN from `confusion_matrix(...)` of sklearn\n", " # Assign to the following variables:\n", " # tn, fp, fn, tp = ...\n", - " # YOUR CODE HERE\n", - " raise NotImplementedError()\n", + " ### BEGIN SOLUTION\n", + " tn, fp, fn, tp = confusion_matrix(y_true, predictions).ravel()\n", + " ### END SOLUTION\n", " \n", " return {'TP': tp, 'FP': fp, 'TN': tn, 'FN': fn}" ] @@ -666,10 +589,7 @@ "cell_type": "code", "execution_count": null, "metadata": { - "deletable": false, - "editable": false, "nbgrader": { - "checksum": "fdf8c5670a0ed58bf219d1a7f4463e58", "grade": false, "grade_id": "cell-9271e7e8f9e640f8", "locked": true, @@ -687,10 +607,7 @@ { "cell_type": "markdown", "metadata": { - "deletable": false, - "editable": false, "nbgrader": { - "checksum": "7c347e4c59bb41a94cb32c5137ae46e1", "grade": false, "grade_id": "cell-28b321d6cd55918d", "locked": true, @@ -708,10 +625,7 @@ "cell_type": "code", "execution_count": null, "metadata": { - "deletable": false, - "editable": false, "nbgrader": { - "checksum": "a66fc7a4361aa42c76a786339492c867", "grade": true, "grade_id": "cell-7052c12dff7ba619", "locked": true, @@ -733,10 +647,7 @@ { "cell_type": "markdown", "metadata": { - "deletable": false, - "editable": false, "nbgrader": { - "checksum": "4ca39ab5623918de8452acf52dd61929", "grade": false, "grade_id": "cell-0a9296e5c35f2bc4", "locked": true, @@ -765,9 +676,7 @@ "cell_type": "code", "execution_count": null, "metadata": { - "deletable": false, "nbgrader": { - "checksum": "6d7a82b3927ee3795c6a26f22d3e84c7", "grade": false, "grade_id": "cell-19648a0d1cc1f1aa", "locked": false, @@ -796,18 +705,21 @@ " \n", " # Calculate Accuracy and assign it to the variable 'accuracy'\n", " # accuracy = ...\n", - " # YOUR CODE HERE\n", - " raise NotImplementedError()\n", + " ### BEGIN SOLUTION\n", + " accuracy = (tp + tn) / (tn + fp + fn + tp)\n", + " ### END SOLUTION\n", " \n", " # Calculate Precision and assign it to the variable 'precision'\n", " # precision = ...\n", - " # YOUR CODE HERE\n", - " raise NotImplementedError()\n", + " ### BEGIN SOLUTION\n", + " precision = tp / (tp + fp)\n", + " ### END SOLUTION\n", " \n", " # Calculate Recall and assign it to the variable 'recall'\n", " # recall = ...\n", - " # YOUR CODE HERE\n", - " raise NotImplementedError()\n", + " ### BEGIN SOLUTION\n", + " recall = tp / (tp + fn)\n", + " ### END SOLUTION\n", " \n", " return {'accuracy': accuracy, 'precision': precision, 'recall': recall}" ] @@ -816,16 +728,14 @@ "cell_type": "code", "execution_count": null, "metadata": { - "deletable": false, - "editable": false, "nbgrader": { - "checksum": "80ea2cc30bdb004d420a8a379bc23e60", "grade": false, "grade_id": "cell-2b7e1d089248b805", "locked": true, "schema_version": 1, "solution": false - } + }, + "scrolled": true }, "outputs": [], "source": [ @@ -838,10 +748,7 @@ { "cell_type": "markdown", "metadata": { - "deletable": false, - "editable": false, "nbgrader": { - "checksum": "ead058f26a82169f30498b38eecf5e01", "grade": false, "grade_id": "cell-1d537c3a5e8da719", "locked": true, @@ -861,10 +768,7 @@ "cell_type": "code", "execution_count": null, "metadata": { - "deletable": false, - "editable": false, "nbgrader": { - "checksum": "ad5bf969ac440dbc97c4dee7ba8e04ec", "grade": true, "grade_id": "cell-d5ff24bc34c52f97", "locked": true, @@ -885,10 +789,7 @@ { "cell_type": "markdown", "metadata": { - "deletable": false, - "editable": false, "nbgrader": { - "checksum": "ed5647c9d69c1b0c3709aa856652b2d3", "grade": false, "grade_id": "cell-b00319b2812cd3c7", "locked": true, @@ -910,9 +811,7 @@ "cell_type": "code", "execution_count": null, "metadata": { - "deletable": false, "nbgrader": { - "checksum": "c8d17b7e79986bc83325e0cbe95edff7", "grade": false, "grade_id": "cell-676a58eb3ed5f6af", "locked": false, @@ -932,8 +831,9 @@ " # Calculate the Area Under ROC Curve. Use the sklearn implementation\n", " # 'roc_auc_score(...)'\n", " # auc = ...\n", - " # YOUR CODE HERE\n", - " raise NotImplementedError()\n", + " ### BEGIN SOLUTION\n", + " auc = roc_auc_score(y_true, probas)\n", + " ### END SOLUTION\n", " \n", " return auc" ] @@ -942,10 +842,7 @@ "cell_type": "code", "execution_count": null, "metadata": { - "deletable": false, - "editable": false, "nbgrader": { - "checksum": "d6bb733eb57356a8d837cc185f5315ea", "grade": false, "grade_id": "cell-314374b120cc13e6", "locked": true, @@ -962,10 +859,7 @@ { "cell_type": "markdown", "metadata": { - "deletable": false, - "editable": false, "nbgrader": { - "checksum": "f3dfcb139dd10d72de1b6654ed97e427", "grade": false, "grade_id": "cell-e8d053a5d581a92f", "locked": true, @@ -983,10 +877,7 @@ "cell_type": "code", "execution_count": null, "metadata": { - "deletable": false, - "editable": false, "nbgrader": { - "checksum": "7e6cd946c153f40813267bb7f4695846", "grade": true, "grade_id": "cell-f998cca62cc9882d", "locked": true, @@ -1005,10 +896,7 @@ { "cell_type": "markdown", "metadata": { - "deletable": false, - "editable": false, "nbgrader": { - "checksum": "301f2f54667c13e23f494006ed4e8511", "grade": false, "grade_id": "cell-73e9fab048f0a91a", "locked": true, @@ -1023,10 +911,7 @@ { "cell_type": "markdown", "metadata": { - "deletable": false, - "editable": false, "nbgrader": { - "checksum": "c4610066475df8710a7bd27f21b728cd", "grade": false, "grade_id": "cell-1f332356383a4893", "locked": true, @@ -1046,10 +931,7 @@ "cell_type": "code", "execution_count": null, "metadata": { - "deletable": false, - "editable": false, "nbgrader": { - "checksum": "3e91597bd7697a38bb9ce8e2c48f493e", "grade": false, "grade_id": "cell-0dc967f02469cfb4", "locked": true, @@ -1067,10 +949,7 @@ "cell_type": "code", "execution_count": null, "metadata": { - "deletable": false, - "editable": false, "nbgrader": { - "checksum": "76b1ba0b4d69e55d7ffe65ca9eb6aa81", "grade": false, "grade_id": "cell-c5ac554cabad3210", "locked": true, @@ -1087,10 +966,7 @@ { "cell_type": "markdown", "metadata": { - "deletable": false, - "editable": false, "nbgrader": { - "checksum": "07db78df16130724ad91cb9b0f0a92e1", "grade": false, "grade_id": "cell-6922e33796ef4ce1", "locked": true, @@ -1104,6 +980,7 @@ } ], "metadata": { + "celltoolbar": "Create Assignment", "kernelspec": { "display_name": "Python 3", "language": "python",