From 00cfc5514d6c98e252c8d46da770bb37ec75f16c Mon Sep 17 00:00:00 2001 From: Amir Golafshan Date: Wed, 18 Oct 2023 17:00:02 +0200 Subject: [PATCH] Amir G - All Done :) --- your-code/pandas_1.ipynb | 2071 ++++++++++++++++++++++++++++++++++++-- 1 file changed, 1992 insertions(+), 79 deletions(-) diff --git a/your-code/pandas_1.ipynb b/your-code/pandas_1.ipynb index 4f428ac..d075933 100644 --- a/your-code/pandas_1.ipynb +++ b/your-code/pandas_1.ipynb @@ -18,7 +18,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 420, "metadata": {}, "outputs": [], "source": [ @@ -35,7 +35,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 421, "metadata": {}, "outputs": [], "source": [ @@ -44,10 +44,34 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 422, "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "text/plain": [ + "0 5.7\n", + "1 75.2\n", + "2 74.4\n", + "3 84.0\n", + "4 66.5\n", + "5 66.3\n", + "6 55.8\n", + "7 75.7\n", + "8 29.1\n", + "9 43.7\n", + "dtype: float64" + ] + }, + "execution_count": 422, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "lst = pd.Series(lst)\n", + "lst" + ] }, { "cell_type": "markdown", @@ -60,10 +84,23 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 423, "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "text/plain": [ + "74.4" + ] + }, + "execution_count": 423, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "lst[2]" + ] }, { "cell_type": "markdown", @@ -74,7 +111,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 424, "metadata": {}, "outputs": [], "source": [ @@ -92,10 +129,28 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 425, "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Transpose of above dataframe is-\n", + " 0 1 2 3 4 5 6 7 8 9\n", + "0 53.1 61.3 20.6 57.4 83.6 49.0 23.3 27.6 96.6 73.7\n", + "1 95.0 40.8 73.2 0.1 20.5 69.0 40.7 26.4 96.4 39.0\n", + "2 67.5 30.8 44.2 96.1 85.4 0.1 95.0 53.8 53.4 43.2\n", + "3 35.0 37.8 14.6 4.2 22.8 31.8 83.8 88.8 72.4 81.6\n", + "4 78.4 87.6 91.8 69.5 35.9 89.1 26.9 68.5 50.1 34.7\n" + ] + } + ], + "source": [ + "df = pd.DataFrame(b)\n", + "df = df.transpose()\n", + "print(\"Transpose of above dataframe is-\\n\", df)" + ] }, { "cell_type": "markdown", @@ -106,7 +161,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 426, "metadata": {}, "outputs": [], "source": [ @@ -124,20 +179,148 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 427, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Score_1Score_2Score_3Score_4Score_5
053.195.067.535.078.4
161.340.830.837.887.6
220.673.244.214.691.8
357.40.196.14.269.5
483.620.585.422.835.9
549.069.00.131.889.1
623.340.795.083.826.9
727.626.453.888.868.5
896.696.453.472.450.1
973.739.043.281.634.7
\n", + "
" + ], + "text/plain": [ + " Score_1 Score_2 Score_3 Score_4 Score_5\n", + "0 53.1 95.0 67.5 35.0 78.4\n", + "1 61.3 40.8 30.8 37.8 87.6\n", + "2 20.6 73.2 44.2 14.6 91.8\n", + "3 57.4 0.1 96.1 4.2 69.5\n", + "4 83.6 20.5 85.4 22.8 35.9\n", + "5 49.0 69.0 0.1 31.8 89.1\n", + "6 23.3 40.7 95.0 83.8 26.9\n", + "7 27.6 26.4 53.8 88.8 68.5\n", + "8 96.6 96.4 53.4 72.4 50.1\n", + "9 73.7 39.0 43.2 81.6 34.7" + ] + }, + "execution_count": 427, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "colnames = ['Score_1', 'Score_2', 'Score_3', 'Score_4', 'Score_5']" + "colnames = ['Score_1', 'Score_2', 'Score_3', 'Score_4', 'Score_5']\n", + "df = pd.DataFrame(b)\n", + "df.columns = colnames\n", + "df" ] }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, { "cell_type": "markdown", "metadata": {}, @@ -147,10 +330,123 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 428, "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Score_1Score_3Score_5
053.167.578.4
161.330.887.6
220.644.291.8
357.496.169.5
483.685.435.9
549.00.189.1
623.395.026.9
727.653.868.5
896.653.450.1
973.743.234.7
\n", + "
" + ], + "text/plain": [ + " Score_1 Score_3 Score_5\n", + "0 53.1 67.5 78.4\n", + "1 61.3 30.8 87.6\n", + "2 20.6 44.2 91.8\n", + "3 57.4 96.1 69.5\n", + "4 83.6 85.4 35.9\n", + "5 49.0 0.1 89.1\n", + "6 23.3 95.0 26.9\n", + "7 27.6 53.8 68.5\n", + "8 96.6 53.4 50.1\n", + "9 73.7 43.2 34.7" + ] + }, + "execution_count": 428, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "subset_df = df[['Score_1', 'Score_3', 'Score_5']]\n", + "subset_df" + ] }, { "cell_type": "markdown", @@ -161,10 +457,24 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 429, "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "text/plain": [ + "56.95000000000001" + ] + }, + "execution_count": 429, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "mean_df_3 = df['Score_3'].mean()\n", + "mean_df_3\n" + ] }, { "cell_type": "markdown", @@ -175,10 +485,21 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 430, "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "88.8\n" + ] + } + ], + "source": [ + "max_df_4 = df['Score_4'].max()\n", + "print(max_df_4)" + ] }, { "cell_type": "markdown", @@ -189,10 +510,24 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 431, "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "text/plain": [ + "40.75" + ] + }, + "execution_count": 431, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "median_df_2 = df['Score_2'].median()\n", + "median_df_2" + ] }, { "cell_type": "markdown", @@ -203,7 +538,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 432, "metadata": {}, "outputs": [], "source": [ @@ -224,10 +559,134 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 433, "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
DescriptionQuantityUnitPriceRevenue
0LUNCH BAG APPLE DESIGN11.651.65
1SET OF 60 VINTAGE LEAF CAKE CASES240.5513.20
2RIBBON REEL STRIPES DESIGN11.651.65
3WORLD WAR 2 GLIDERS ASSTD DESIGNS28800.18518.40
4PLAYING CARDS JUBILEE UNION JACK21.252.50
5POPCORN HOLDER70.855.95
6BOX OF VINTAGE ALPHABET BLOCKS111.9511.95
7PARTY BUNTING44.9519.80
8JAZZ HEARTS ADDRESS BOOK100.191.90
9SET OF 4 SANTA PLACE SETTINGS481.2560.00
\n", + "
" + ], + "text/plain": [ + " Description Quantity UnitPrice Revenue\n", + "0 LUNCH BAG APPLE DESIGN 1 1.65 1.65\n", + "1 SET OF 60 VINTAGE LEAF CAKE CASES 24 0.55 13.20\n", + "2 RIBBON REEL STRIPES DESIGN 1 1.65 1.65\n", + "3 WORLD WAR 2 GLIDERS ASSTD DESIGNS 2880 0.18 518.40\n", + "4 PLAYING CARDS JUBILEE UNION JACK 2 1.25 2.50\n", + "5 POPCORN HOLDER 7 0.85 5.95\n", + "6 BOX OF VINTAGE ALPHABET BLOCKS 1 11.95 11.95\n", + "7 PARTY BUNTING 4 4.95 19.80\n", + "8 JAZZ HEARTS ADDRESS BOOK 10 0.19 1.90\n", + "9 SET OF 4 SANTA PLACE SETTINGS 48 1.25 60.00" + ] + }, + "execution_count": 433, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df = pd.DataFrame(orders)\n", + "df" + ] }, { "cell_type": "markdown", @@ -238,10 +697,22 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 434, "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The Total Quantity is 2978 and the total revenue is 637\n" + ] + } + ], + "source": [ + "total_quantity = df['Quantity'].sum()\n", + "total_revenue = df['Revenue'].sum()\n", + "print(f'The Total Quantity is {total_quantity} and the total revenue is {int(total_revenue)}')" + ] }, { "cell_type": "markdown", @@ -252,10 +723,24 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 435, "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The most expensive price is 11.95\n", + "The least is 0.18\n", + "The difference of those two is 11.77\n" + ] + } + ], + "source": [ + "least_expensive = df['UnitPrice'].min()\n", + "most_expensive = df['UnitPrice'].max()\n", + "print(f'The most expensive price is {most_expensive}\\nThe least is {least_expensive}\\nThe difference of those two is {most_expensive - least_expensive}')" + ] }, { "cell_type": "markdown", @@ -266,14 +751,15 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 436, "metadata": {}, "outputs": [], "source": [ "# Run this code:\n", "admissions = pd.read_csv('../Admission_Predict.csv')\n", "#to remove trailing spaces at the end of the column names\n", - "admissions.columns = admissions.columns.str.rstrip()" + "admissions.columns = admissions.columns.str.rstrip()\n", + "df = pd.DataFrame(admissions)" ] }, { @@ -285,10 +771,130 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 437, "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Serial No.GRE ScoreTOEFL ScoreUniversity RatingSOPLORCGPAResearchChance of Admit
0133711844.54.59.6510.92
1231610433.03.58.0010.72
2332211033.52.58.6710.80
3431410322.03.08.2100.65
4533011554.53.09.3410.90
\n", + "
" + ], + "text/plain": [ + " Serial No. GRE Score TOEFL Score University Rating SOP LOR CGPA \\\n", + "0 1 337 118 4 4.5 4.5 9.65 \n", + "1 2 316 104 3 3.0 3.5 8.00 \n", + "2 3 322 110 3 3.5 2.5 8.67 \n", + "3 4 314 103 2 2.0 3.0 8.21 \n", + "4 5 330 115 5 4.5 3.0 9.34 \n", + "\n", + " Research Chance of Admit \n", + "0 1 0.92 \n", + "1 1 0.72 \n", + "2 1 0.80 \n", + "3 0 0.65 \n", + "4 1 0.90 " + ] + }, + "execution_count": 437, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.head()" + ] }, { "cell_type": "markdown", @@ -299,10 +905,61 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 438, "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "text/plain": [ + "Serial No. 0\n", + "GRE Score 0\n", + "TOEFL Score 0\n", + "University Rating 0\n", + "SOP 0\n", + "LOR 0\n", + "CGPA 0\n", + "Research 0\n", + "Chance of Admit 0\n", + "dtype: int64" + ] + }, + "execution_count": 438, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.isnull().sum()\n" + ] + }, + { + "cell_type": "code", + "execution_count": 439, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Serial No. 0\n", + "GRE Score 0\n", + "TOEFL Score 0\n", + "University Rating 0\n", + "SOP 0\n", + "LOR 0\n", + "CGPA 0\n", + "Research 0\n", + "Chance of Admit 0\n", + "dtype: int64" + ] + }, + "execution_count": 439, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pd.isna(df).sum()" + ] }, { "cell_type": "markdown", @@ -313,31 +970,287 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 440, "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Serial No.GRE ScoreTOEFL ScoreUniversity RatingSOPLORCGPAResearchChance of Admit
Serial No.
1133711844.54.59.6510.92
2231610433.03.58.0010.72
3332211033.52.58.6710.80
4431410322.03.08.2100.65
5533011554.53.09.3410.90
..............................
38138132411033.53.59.0410.82
38238232510733.03.59.1110.84
38338333011645.04.59.4510.91
38438431210333.54.08.7800.67
38538533311745.04.09.6610.95
\n", + "

385 rows × 9 columns

\n", + "
" + ], + "text/plain": [ + " Serial No. GRE Score TOEFL Score University Rating SOP LOR \\\n", + "Serial No. \n", + "1 1 337 118 4 4.5 4.5 \n", + "2 2 316 104 3 3.0 3.5 \n", + "3 3 322 110 3 3.5 2.5 \n", + "4 4 314 103 2 2.0 3.0 \n", + "5 5 330 115 5 4.5 3.0 \n", + "... ... ... ... ... ... ... \n", + "381 381 324 110 3 3.5 3.5 \n", + "382 382 325 107 3 3.0 3.5 \n", + "383 383 330 116 4 5.0 4.5 \n", + "384 384 312 103 3 3.5 4.0 \n", + "385 385 333 117 4 5.0 4.0 \n", + "\n", + " CGPA Research Chance of Admit \n", + "Serial No. \n", + "1 9.65 1 0.92 \n", + "2 8.00 1 0.72 \n", + "3 8.67 1 0.80 \n", + "4 8.21 0 0.65 \n", + "5 9.34 1 0.90 \n", + "... ... ... ... \n", + "381 9.04 1 0.82 \n", + "382 9.11 1 0.84 \n", + "383 9.45 1 0.91 \n", + "384 8.78 0 0.67 \n", + "385 9.66 1 0.95 \n", + "\n", + "[385 rows x 9 columns]" + ] + }, + "execution_count": 440, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.set_index('Serial No.', drop = False)" + ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 441, "metadata": {}, "outputs": [], - "source": [] + "source": [ + "admissions.insert(1, 'COUMNA NUEVA EN EL DOS', admissions.index.values)" + ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "\"Turns out that GRE Score and CGPA also uniquely identify the data. Show this in the cell below.\"" + "#### \"Turns out that GRE Score and CGPA also uniquely identify the data. Show this in the cell below.\"" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 461, "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 461, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df[[\"GRE Score\", \"CGPA\"]].value_counts().sum() == len(df.index)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 455, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "False" + ] + }, + "execution_count": 455, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "\n" + ] }, { "cell_type": "markdown", @@ -348,10 +1261,221 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 443, "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Serial No.GRE ScoreTOEFL ScoreUniversity RatingSOPLORCGPAResearchChance of Admit
0133711844.54.59.6510.92
4533011554.53.09.3410.90
101132811244.04.59.1010.78
192032811655.05.09.5010.94
202133411955.04.59.7010.95
..............................
37938032911144.54.09.2310.89
38038132411033.53.59.0410.82
38138232510733.03.59.1110.84
38238333011645.04.59.4510.91
38438533311745.04.09.6610.95
\n", + "

101 rows × 9 columns

\n", + "
" + ], + "text/plain": [ + " Serial No. GRE Score TOEFL Score University Rating SOP LOR CGPA \\\n", + "0 1 337 118 4 4.5 4.5 9.65 \n", + "4 5 330 115 5 4.5 3.0 9.34 \n", + "10 11 328 112 4 4.0 4.5 9.10 \n", + "19 20 328 116 5 5.0 5.0 9.50 \n", + "20 21 334 119 5 5.0 4.5 9.70 \n", + ".. ... ... ... ... ... ... ... \n", + "379 380 329 111 4 4.5 4.0 9.23 \n", + "380 381 324 110 3 3.5 3.5 9.04 \n", + "381 382 325 107 3 3.0 3.5 9.11 \n", + "382 383 330 116 4 5.0 4.5 9.45 \n", + "384 385 333 117 4 5.0 4.0 9.66 \n", + "\n", + " Research Chance of Admit \n", + "0 1 0.92 \n", + "4 1 0.90 \n", + "10 1 0.78 \n", + "19 1 0.94 \n", + "20 1 0.95 \n", + ".. ... ... \n", + "379 1 0.89 \n", + "380 1 0.82 \n", + "381 1 0.84 \n", + "382 1 0.91 \n", + "384 1 0.95 \n", + "\n", + "[101 rows x 9 columns]" + ] + }, + "execution_count": 443, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "cond_1 = (df['CGPA'] > 9)\n", + "cond_2 = (df['Research'] == 1)\n", + "\n", + "performed_infvestigation = df[(cond_1 & cond_2)]\n", + "performed_infvestigation" + ] }, { "cell_type": "markdown", @@ -362,24 +1486,270 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 444, "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The mean value of those students to get admited is 80.2%\n" + ] + } + ], + "source": [ + "condition_1 = (df['CGPA'] > 9.0)\n", + "condition_2 = (df['SOP'] < 3.5)\n", + "\n", + "mean_price_applicants = df[(condition_1 & condition_2)]\n", + "mean = mean_price_applicants['Chance of Admit'].mean()\n", + "perc_mean = mean * 100\n", + "print(f'The mean value of those students to get admited is {perc_mean:.1f}%')\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Let's create a new column based on the TOEFL Score column, where we put True if the TOEFL value is greater than 100 and False if it is less. We are going to do it with a function and an apply. \n", + "#### To do this, we first create a function that receives an argument. The function will return True if the parameter entered is greater than 100, otherwise it will return False." + ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 445, "metadata": {}, "outputs": [], - "source": [] + "source": [ + "def score_greater(x):\n", + " return x > 100\n", + "df['Boolean TOEFL Score'] = df['TOEFL Score'].apply(score_greater)\n" + ] }, { - "cell_type": "markdown", + "cell_type": "code", + "execution_count": 446, "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Serial No.GRE ScoreTOEFL ScoreUniversity RatingSOPLORCGPAResearchChance of AdmitBoolean TOEFL Score
0133711844.54.59.6510.92True
1231610433.03.58.0010.72True
2332211033.52.58.6710.80True
3431410322.03.08.2100.65True
4533011554.53.09.3410.90True
.................................
38038132411033.53.59.0410.82True
38138232510733.03.59.1110.84True
38238333011645.04.59.4510.91True
38338431210333.54.08.7800.67True
38438533311745.04.09.6610.95True
\n", + "

385 rows × 10 columns

\n", + "
" + ], + "text/plain": [ + " Serial No. GRE Score TOEFL Score University Rating SOP LOR CGPA \\\n", + "0 1 337 118 4 4.5 4.5 9.65 \n", + "1 2 316 104 3 3.0 3.5 8.00 \n", + "2 3 322 110 3 3.5 2.5 8.67 \n", + "3 4 314 103 2 2.0 3.0 8.21 \n", + "4 5 330 115 5 4.5 3.0 9.34 \n", + ".. ... ... ... ... ... ... ... \n", + "380 381 324 110 3 3.5 3.5 9.04 \n", + "381 382 325 107 3 3.0 3.5 9.11 \n", + "382 383 330 116 4 5.0 4.5 9.45 \n", + "383 384 312 103 3 3.5 4.0 8.78 \n", + "384 385 333 117 4 5.0 4.0 9.66 \n", + "\n", + " Research Chance of Admit Boolean TOEFL Score \n", + "0 1 0.92 True \n", + "1 1 0.72 True \n", + "2 1 0.80 True \n", + "3 0 0.65 True \n", + "4 1 0.90 True \n", + ".. ... ... ... \n", + "380 1 0.82 True \n", + "381 1 0.84 True \n", + "382 1 0.91 True \n", + "383 0 0.67 True \n", + "384 1 0.95 True \n", + "\n", + "[385 rows x 10 columns]" + ] + }, + "execution_count": 446, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "Let's create a new column based on the TOEFL Score column, where we put True if the TOEFL value is greater than 100 and False if it is less. We are going to do it with a function and an apply. \n", - "To do this, we first create a function that receives an argument. The function will return True if the parameter entered is greater than 100, otherwise it will return False." + "df" ] }, { @@ -393,15 +1763,272 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Now we create a new column called \"Decision\" and apply to the TOEFL Score column" + "#### Now we create a new column called \"Decision\" and apply to the TOEFL Score column" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 463, "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Serial No.GRE ScoreTOEFL ScoreUniversity RatingSOPLORCGPAResearchChance of AdmitBoolean TOEFL ScoreUnique GRE & CGPADecision
0133711844.54.59.6510.92True346.65118
1231610433.03.58.0010.72True324.00104
2332211033.52.58.6710.80True330.67110
3431410322.03.08.2100.65True322.21103
4533011554.53.09.3410.90True339.34115
.......................................
38038132411033.53.59.0410.82True333.04110
38138232510733.03.59.1110.84True334.11107
38238333011645.04.59.4510.91True339.45116
38338431210333.54.08.7800.67True320.78103
38438533311745.04.09.6610.95True342.66117
\n", + "

385 rows × 12 columns

\n", + "
" + ], + "text/plain": [ + " Serial No. GRE Score TOEFL Score University Rating SOP LOR CGPA \\\n", + "0 1 337 118 4 4.5 4.5 9.65 \n", + "1 2 316 104 3 3.0 3.5 8.00 \n", + "2 3 322 110 3 3.5 2.5 8.67 \n", + "3 4 314 103 2 2.0 3.0 8.21 \n", + "4 5 330 115 5 4.5 3.0 9.34 \n", + ".. ... ... ... ... ... ... ... \n", + "380 381 324 110 3 3.5 3.5 9.04 \n", + "381 382 325 107 3 3.0 3.5 9.11 \n", + "382 383 330 116 4 5.0 4.5 9.45 \n", + "383 384 312 103 3 3.5 4.0 8.78 \n", + "384 385 333 117 4 5.0 4.0 9.66 \n", + "\n", + " Research Chance of Admit Boolean TOEFL Score Unique GRE & CGPA \\\n", + "0 1 0.92 True 346.65 \n", + "1 1 0.72 True 324.00 \n", + "2 1 0.80 True 330.67 \n", + "3 0 0.65 True 322.21 \n", + "4 1 0.90 True 339.34 \n", + ".. ... ... ... ... \n", + "380 1 0.82 True 333.04 \n", + "381 1 0.84 True 334.11 \n", + "382 1 0.91 True 339.45 \n", + "383 0 0.67 True 320.78 \n", + "384 1 0.95 True 342.66 \n", + "\n", + " Decision \n", + "0 118 \n", + "1 104 \n", + "2 110 \n", + "3 103 \n", + "4 115 \n", + ".. ... \n", + "380 110 \n", + "381 107 \n", + "382 116 \n", + "383 103 \n", + "384 117 \n", + "\n", + "[385 rows x 12 columns]" + ] + }, + "execution_count": 463, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df['Decision'] = df['TOEFL Score']\n", + "df" + ] }, { "cell_type": "code", @@ -421,10 +2048,296 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Create a column called `decision2` in the `admissions` dataframe. Assign 1 to this column if the value of `SOP` is greater than 3 and 0 otherwise. \n", + "#### Create a column called `decision2` in the `admissions` dataframe. Assign 1 to this column if the value of `SOP` is greater than 3 and 0 otherwise. \n", "HINT (use np.where)" ] }, + { + "cell_type": "code", + "execution_count": 464, + "metadata": {}, + "outputs": [], + "source": [ + "def score_greater2(x):\n", + " return x > 3\n", + "df['Decision 2'] = df['SOP'].apply(score_greater2)" + ] + }, + { + "cell_type": "code", + "execution_count": 465, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Serial No.GRE ScoreTOEFL ScoreUniversity RatingSOPLORCGPAResearchChance of AdmitBoolean TOEFL ScoreUnique GRE & CGPADecisionDecision 2
0133711844.54.59.6510.92True346.65118True
1231610433.03.58.0010.72True324.00104False
2332211033.52.58.6710.80True330.67110True
3431410322.03.08.2100.65True322.21103False
4533011554.53.09.3410.90True339.34115True
..........................................
38038132411033.53.59.0410.82True333.04110True
38138232510733.03.59.1110.84True334.11107False
38238333011645.04.59.4510.91True339.45116True
38338431210333.54.08.7800.67True320.78103True
38438533311745.04.09.6610.95True342.66117True
\n", + "

385 rows × 13 columns

\n", + "
" + ], + "text/plain": [ + " Serial No. GRE Score TOEFL Score University Rating SOP LOR CGPA \\\n", + "0 1 337 118 4 4.5 4.5 9.65 \n", + "1 2 316 104 3 3.0 3.5 8.00 \n", + "2 3 322 110 3 3.5 2.5 8.67 \n", + "3 4 314 103 2 2.0 3.0 8.21 \n", + "4 5 330 115 5 4.5 3.0 9.34 \n", + ".. ... ... ... ... ... ... ... \n", + "380 381 324 110 3 3.5 3.5 9.04 \n", + "381 382 325 107 3 3.0 3.5 9.11 \n", + "382 383 330 116 4 5.0 4.5 9.45 \n", + "383 384 312 103 3 3.5 4.0 8.78 \n", + "384 385 333 117 4 5.0 4.0 9.66 \n", + "\n", + " Research Chance of Admit Boolean TOEFL Score Unique GRE & CGPA \\\n", + "0 1 0.92 True 346.65 \n", + "1 1 0.72 True 324.00 \n", + "2 1 0.80 True 330.67 \n", + "3 0 0.65 True 322.21 \n", + "4 1 0.90 True 339.34 \n", + ".. ... ... ... ... \n", + "380 1 0.82 True 333.04 \n", + "381 1 0.84 True 334.11 \n", + "382 1 0.91 True 339.45 \n", + "383 0 0.67 True 320.78 \n", + "384 1 0.95 True 342.66 \n", + "\n", + " Decision Decision 2 \n", + "0 118 True \n", + "1 104 False \n", + "2 110 True \n", + "3 103 False \n", + "4 115 True \n", + ".. ... ... \n", + "380 110 True \n", + "381 107 False \n", + "382 116 True \n", + "383 103 True \n", + "384 117 True \n", + "\n", + "[385 rows x 13 columns]" + ] + }, + "execution_count": 465, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df" + ] + }, { "cell_type": "code", "execution_count": null, @@ -449,7 +2362,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.8" + "version": "3.11.6" }, "toc": { "base_numbering": "",