From 54e7964372e05673bf2f300f40badff707c14b71 Mon Sep 17 00:00:00 2001 From: Martina Rivero Date: Thu, 19 Oct 2023 09:17:59 +0200 Subject: [PATCH] Martina --- your-code/pandas_1.ipynb | 579 +++++++++++++++++++++++++++++++++------ 1 file changed, 497 insertions(+), 82 deletions(-) diff --git a/your-code/pandas_1.ipynb b/your-code/pandas_1.ipynb index 4f428ac..d292e89 100644 --- a/your-code/pandas_1.ipynb +++ b/your-code/pandas_1.ipynb @@ -18,7 +18,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 4, "metadata": {}, "outputs": [], "source": [ @@ -35,7 +35,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 5, "metadata": {}, "outputs": [], "source": [ @@ -44,10 +44,34 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 6, "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "text/plain": [ + "0 5.7\n", + "1 75.2\n", + "2 74.4\n", + "3 84.0\n", + "4 66.5\n", + "5 66.3\n", + "6 55.8\n", + "7 75.7\n", + "8 29.1\n", + "9 43.7\n", + "dtype: float64" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "serie_1 = pd.Series(lst)\n", + "serie_1" + ] }, { "cell_type": "markdown", @@ -60,10 +84,24 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] + "execution_count": 27, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "74.4" + ] + }, + "execution_count": 27, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "serie_1.values[2]\n", + "\n" + ] }, { "cell_type": "markdown", @@ -74,7 +112,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 28, "metadata": {}, "outputs": [], "source": [ @@ -92,10 +130,31 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] + "execution_count": 29, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " 0 1 2 3 4\n", + "0 53.1 95.0 67.5 35.0 78.4\n", + "1 61.3 40.8 30.8 37.8 87.6\n", + "2 20.6 73.2 44.2 14.6 91.8\n", + "3 57.4 0.1 96.1 4.2 69.5\n", + "4 83.6 20.5 85.4 22.8 35.9\n", + "5 49.0 69.0 0.1 31.8 89.1\n", + "6 23.3 40.7 95.0 83.8 26.9\n", + "7 27.6 26.4 53.8 88.8 68.5\n", + "8 96.6 96.4 53.4 72.4 50.1\n", + "9 73.7 39.0 43.2 81.6 34.7\n" + ] + } + ], + "source": [ + "df = pd.DataFrame(b)\n", + "print(df)" + ] }, { "cell_type": "markdown", @@ -106,7 +165,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 30, "metadata": {}, "outputs": [], "source": [ @@ -124,7 +183,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 31, "metadata": {}, "outputs": [], "source": [ @@ -133,10 +192,12 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 32, "metadata": {}, "outputs": [], - "source": [] + "source": [ + "df_1 = df.rename(columns={0:'Score_1', 1:'Score_2', 2:'Score_3', 3:'Score_4', 4:'Score_5'})" + ] }, { "cell_type": "markdown", @@ -147,10 +208,12 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 33, "metadata": {}, "outputs": [], - "source": [] + "source": [ + "df_2 = df_1[['Score_1', 'Score_3', 'Score_5']]\n" + ] }, { "cell_type": "markdown", @@ -161,10 +224,22 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] + "execution_count": 34, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "2 56.95\n", + "dtype: float64\n" + ] + } + ], + "source": [ + "df_3 = df[[2]].mean()\n", + "print(df_3)" + ] }, { "cell_type": "markdown", @@ -175,10 +250,22 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] + "execution_count": 35, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "3 88.8\n", + "dtype: float64\n" + ] + } + ], + "source": [ + "df_4 = df[[3]].max()\n", + "print(df_4)" + ] }, { "cell_type": "markdown", @@ -189,10 +276,22 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] + "execution_count": 36, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "1 40.75\n", + "dtype: float64\n" + ] + } + ], + "source": [ + "df_5 = df[[1]].median()\n", + "print(df_5)" + ] }, { "cell_type": "markdown", @@ -203,7 +302,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 37, "metadata": {}, "outputs": [], "source": [ @@ -224,10 +323,31 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] + "execution_count": 38, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " Description Quantity UnitPrice Revenue\n", + "0 LUNCH BAG APPLE DESIGN 1 1.65 1.65\n", + "1 SET OF 60 VINTAGE LEAF CAKE CASES 24 0.55 13.20\n", + "2 RIBBON REEL STRIPES DESIGN 1 1.65 1.65\n", + "3 WORLD WAR 2 GLIDERS ASSTD DESIGNS 2880 0.18 518.40\n", + "4 PLAYING CARDS JUBILEE UNION JACK 2 1.25 2.50\n", + "5 POPCORN HOLDER 7 0.85 5.95\n", + "6 BOX OF VINTAGE ALPHABET BLOCKS 1 11.95 11.95\n", + "7 PARTY BUNTING 4 4.95 19.80\n", + "8 JAZZ HEARTS ADDRESS BOOK 10 0.19 1.90\n", + "9 SET OF 4 SANTA PLACE SETTINGS 48 1.25 60.00\n" + ] + } + ], + "source": [ + "orders_1 = pd.DataFrame(orders)\n", + "print(orders_1)" + ] }, { "cell_type": "markdown", @@ -238,10 +358,27 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] + "execution_count": 39, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Quantity 2978\n", + "dtype: int64\n", + "Revenue 637.0\n", + "dtype: float64\n" + ] + } + ], + "source": [ + "orders_quan = orders_1[['Quantity']].sum()\n", + "print(orders_quan)\n", + "\n", + "orders_rev = orders_1[['Revenue']].sum()\n", + "print(orders_rev)" + ] }, { "cell_type": "markdown", @@ -252,10 +389,22 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] + "execution_count": 40, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "UnitPrice 11.77\n", + "dtype: float64\n" + ] + } + ], + "source": [ + "orders_diff = orders_1[['UnitPrice']].max() - orders_1[['UnitPrice']].min()\n", + "print(orders_diff)" + ] }, { "cell_type": "markdown", @@ -266,7 +415,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 53, "metadata": {}, "outputs": [], "source": [ @@ -285,10 +434,130 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] + "execution_count": 54, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Serial No.GRE ScoreTOEFL ScoreUniversity RatingSOPLORCGPAResearchChance of Admit
0133711844.54.59.6510.92
1231610433.03.58.0010.72
2332211033.52.58.6710.80
3431410322.03.08.2100.65
4533011554.53.09.3410.90
\n", + "
" + ], + "text/plain": [ + " Serial No. GRE Score TOEFL Score University Rating SOP LOR CGPA \\\n", + "0 1 337 118 4 4.5 4.5 9.65 \n", + "1 2 316 104 3 3.0 3.5 8.00 \n", + "2 3 322 110 3 3.5 2.5 8.67 \n", + "3 4 314 103 2 2.0 3.0 8.21 \n", + "4 5 330 115 5 4.5 3.0 9.34 \n", + "\n", + " Research Chance of Admit \n", + "0 1 0.92 \n", + "1 1 0.72 \n", + "2 1 0.80 \n", + "3 0 0.65 \n", + "4 1 0.90 " + ] + }, + "execution_count": 54, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "admissions.head()" + ] }, { "cell_type": "markdown", @@ -299,10 +568,32 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] + "execution_count": 55, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Serial No. 0\n", + "GRE Score 0\n", + "TOEFL Score 0\n", + "University Rating 0\n", + "SOP 0\n", + "LOR 0\n", + "CGPA 0\n", + "Research 0\n", + "Chance of Admit 0\n", + "dtype: int64" + ] + }, + "execution_count": 55, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "admissions.isnull().sum()" + ] }, { "cell_type": "markdown", @@ -313,17 +604,49 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] + "execution_count": 56, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " GRE Score TOEFL Score University Rating SOP LOR CGPA \\\n", + "Serial No. \n", + "1 337 118 4 4.5 4.5 9.65 \n", + "2 316 104 3 3.0 3.5 8.00 \n", + "3 322 110 3 3.5 2.5 8.67 \n", + "4 314 103 2 2.0 3.0 8.21 \n", + "5 330 115 5 4.5 3.0 9.34 \n", + "... ... ... ... ... ... ... \n", + "381 324 110 3 3.5 3.5 9.04 \n", + "382 325 107 3 3.0 3.5 9.11 \n", + "383 330 116 4 5.0 4.5 9.45 \n", + "384 312 103 3 3.5 4.0 8.78 \n", + "385 333 117 4 5.0 4.0 9.66 \n", + "\n", + " Research Chance of Admit \n", + "Serial No. \n", + "1 1 0.92 \n", + "2 1 0.72 \n", + "3 1 0.80 \n", + "4 0 0.65 \n", + "5 1 0.90 \n", + "... ... ... \n", + "381 1 0.82 \n", + "382 1 0.84 \n", + "383 1 0.91 \n", + "384 0 0.67 \n", + "385 1 0.95 \n", + "\n", + "[385 rows x 8 columns]\n" + ] + } + ], + "source": [ + "admissions_1 = admissions.set_index('Serial No.')\n", + "print(admissions_1)" + ] }, { "cell_type": "markdown", @@ -334,10 +657,24 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] + "execution_count": 57, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 57, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "admissions_1[\"GRE-CGPA\"] = admissions_1[\"GRE Score\"].astype(str) + \" - \" + admissions_1[\"CGPA\"].astype(str)\n", + "admissions_1[\"GRE-CGPA\"].is_unique" + ] }, { "cell_type": "markdown", @@ -348,10 +685,50 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] + "execution_count": 59, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " GRE Score TOEFL Score University Rating SOP LOR CGPA \\\n", + "Serial No. \n", + "1 337 118 4 4.5 4.5 9.65 \n", + "5 330 115 5 4.5 3.0 9.34 \n", + "11 328 112 4 4.0 4.5 9.10 \n", + "20 328 116 5 5.0 5.0 9.50 \n", + "21 334 119 5 5.0 4.5 9.70 \n", + "... ... ... ... ... ... ... \n", + "380 329 111 4 4.5 4.0 9.23 \n", + "381 324 110 3 3.5 3.5 9.04 \n", + "382 325 107 3 3.0 3.5 9.11 \n", + "383 330 116 4 5.0 4.5 9.45 \n", + "385 333 117 4 5.0 4.0 9.66 \n", + "\n", + " Research Chance of Admit GRE-CGPA \n", + "Serial No. \n", + "1 1 0.92 337 - 9.65 \n", + "5 1 0.90 330 - 9.34 \n", + "11 1 0.78 328 - 9.1 \n", + "20 1 0.94 328 - 9.5 \n", + "21 1 0.95 334 - 9.7 \n", + "... ... ... ... \n", + "380 1 0.89 329 - 9.23 \n", + "381 1 0.82 324 - 9.04 \n", + "382 1 0.84 325 - 9.11 \n", + "383 1 0.91 330 - 9.45 \n", + "385 1 0.95 333 - 9.66 \n", + "\n", + "[101 rows x 9 columns]\n" + ] + } + ], + "source": [ + "condition_1 = (admissions_1[\"CGPA\"] > 9) & (admissions_1[\"Research\"] == 1)\n", + "admissions_2 = admissions_1[condition_1] \n", + "print(admissions_2)" + ] }, { "cell_type": "markdown", @@ -362,17 +739,55 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] + "execution_count": 60, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " GRE Score TOEFL Score University Rating SOP LOR CGPA \\\n", + "Serial No. \n", + "29 338 118 4 3.0 4.5 9.40 \n", + "63 327 114 3 3.0 3.0 9.02 \n", + "141 326 114 3 3.0 3.0 9.11 \n", + "218 324 111 4 3.0 3.0 9.01 \n", + "382 325 107 3 3.0 3.5 9.11 \n", + "\n", + " Research Chance of Admit GRE-CGPA \n", + "Serial No. \n", + "29 1 0.91 338 - 9.4 \n", + "63 0 0.61 327 - 9.02 \n", + "141 1 0.83 326 - 9.11 \n", + "218 1 0.82 324 - 9.01 \n", + "382 1 0.84 325 - 9.11 \n" + ] + } + ], + "source": [ + "condition_2 = (admissions_1[\"CGPA\"] > 9) & (admissions_1[\"SOP\"] < 3.5)\n", + "admissions_3 = admissions_1[condition_2] \n", + "print(admissions_3)\n" + ] }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] + "execution_count": 61, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Chance of Admit 0.888218\n", + "dtype: float64\n" + ] + } + ], + "source": [ + "admissions_4 = admissions_2[[\"Chance of Admit\"]].mean()\n", + "print(admissions_4)" + ] }, { "cell_type": "markdown", @@ -449,7 +864,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.8" + "version": "3.11.5" }, "toc": { "base_numbering": "",