diff --git a/your-code/pandas_1.ipynb b/your-code/pandas_1.ipynb index 4f428ac..adb92b3 100644 --- a/your-code/pandas_1.ipynb +++ b/your-code/pandas_1.ipynb @@ -18,7 +18,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 184, "metadata": {}, "outputs": [], "source": [ @@ -35,7 +35,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 185, "metadata": {}, "outputs": [], "source": [ @@ -44,10 +44,34 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 186, "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "text/plain": [ + "0 5.7\n", + "1 75.2\n", + "2 74.4\n", + "3 84.0\n", + "4 66.5\n", + "5 66.3\n", + "6 55.8\n", + "7 75.7\n", + "8 29.1\n", + "9 43.7\n", + "dtype: float64" + ] + }, + "execution_count": 186, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "s = pd.Series(lst)\n", + "s" + ] }, { "cell_type": "markdown", @@ -60,10 +84,23 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 187, "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "text/plain": [ + "74.4" + ] + }, + "execution_count": 187, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "s[2]\n" + ] }, { "cell_type": "markdown", @@ -74,7 +111,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 188, "metadata": {}, "outputs": [], "source": [ @@ -92,10 +129,145 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 189, "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
01234
053.195.067.535.078.4
161.340.830.837.887.6
220.673.244.214.691.8
357.40.196.14.269.5
483.620.585.422.835.9
549.069.00.131.889.1
623.340.795.083.826.9
727.626.453.888.868.5
896.696.453.472.450.1
973.739.043.281.634.7
\n", + "
" + ], + "text/plain": [ + " 0 1 2 3 4\n", + "0 53.1 95.0 67.5 35.0 78.4\n", + "1 61.3 40.8 30.8 37.8 87.6\n", + "2 20.6 73.2 44.2 14.6 91.8\n", + "3 57.4 0.1 96.1 4.2 69.5\n", + "4 83.6 20.5 85.4 22.8 35.9\n", + "5 49.0 69.0 0.1 31.8 89.1\n", + "6 23.3 40.7 95.0 83.8 26.9\n", + "7 27.6 26.4 53.8 88.8 68.5\n", + "8 96.6 96.4 53.4 72.4 50.1\n", + "9 73.7 39.0 43.2 81.6 34.7" + ] + }, + "execution_count": 189, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df = pd.DataFrame(b)\n", + "df" + ] }, { "cell_type": "markdown", @@ -106,7 +278,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 190, "metadata": {}, "outputs": [], "source": [ @@ -124,7 +296,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 191, "metadata": {}, "outputs": [], "source": [ @@ -133,10 +305,146 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 192, "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Score_1Score_2Score_3Score_4Score_5
053.195.067.535.078.4
161.340.830.837.887.6
220.673.244.214.691.8
357.40.196.14.269.5
483.620.585.422.835.9
549.069.00.131.889.1
623.340.795.083.826.9
727.626.453.888.868.5
896.696.453.472.450.1
973.739.043.281.634.7
\n", + "
" + ], + "text/plain": [ + " Score_1 Score_2 Score_3 Score_4 Score_5\n", + "0 53.1 95.0 67.5 35.0 78.4\n", + "1 61.3 40.8 30.8 37.8 87.6\n", + "2 20.6 73.2 44.2 14.6 91.8\n", + "3 57.4 0.1 96.1 4.2 69.5\n", + "4 83.6 20.5 85.4 22.8 35.9\n", + "5 49.0 69.0 0.1 31.8 89.1\n", + "6 23.3 40.7 95.0 83.8 26.9\n", + "7 27.6 26.4 53.8 88.8 68.5\n", + "8 96.6 96.4 53.4 72.4 50.1\n", + "9 73.7 39.0 43.2 81.6 34.7" + ] + }, + "execution_count": 192, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df = pd.DataFrame(b)\n", + "df.columns=colnames\n", + "df\n" + ] }, { "cell_type": "markdown", @@ -147,10 +455,123 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 193, "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Score_1Score_3Score_5
053.167.578.4
161.330.887.6
220.644.291.8
357.496.169.5
483.685.435.9
549.00.189.1
623.395.026.9
727.653.868.5
896.653.450.1
973.743.234.7
\n", + "
" + ], + "text/plain": [ + " Score_1 Score_3 Score_5\n", + "0 53.1 67.5 78.4\n", + "1 61.3 30.8 87.6\n", + "2 20.6 44.2 91.8\n", + "3 57.4 96.1 69.5\n", + "4 83.6 85.4 35.9\n", + "5 49.0 0.1 89.1\n", + "6 23.3 95.0 26.9\n", + "7 27.6 53.8 68.5\n", + "8 96.6 53.4 50.1\n", + "9 73.7 43.2 34.7" + ] + }, + "execution_count": 193, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_subset = df[[\"Score_1\",\"Score_3\",\"Score_5\"]]\n", + "df_subset\n" + ] }, { "cell_type": "markdown", @@ -161,10 +582,23 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 194, "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "text/plain": [ + "56.95000000000001" + ] + }, + "execution_count": 194, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.Score_3.mean()\n" + ] }, { "cell_type": "markdown", @@ -175,10 +609,23 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 195, "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "text/plain": [ + "88.8" + ] + }, + "execution_count": 195, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.Score_4.max()" + ] }, { "cell_type": "markdown", @@ -189,10 +636,23 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 196, "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "text/plain": [ + "40.75" + ] + }, + "execution_count": 196, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.Score_2.median()" + ] }, { "cell_type": "markdown", @@ -203,7 +663,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 197, "metadata": {}, "outputs": [], "source": [ @@ -224,10 +684,134 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 198, "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
DescriptionQuantityUnitPriceRevenue
0LUNCH BAG APPLE DESIGN11.651.65
1SET OF 60 VINTAGE LEAF CAKE CASES240.5513.20
2RIBBON REEL STRIPES DESIGN11.651.65
3WORLD WAR 2 GLIDERS ASSTD DESIGNS28800.18518.40
4PLAYING CARDS JUBILEE UNION JACK21.252.50
5POPCORN HOLDER70.855.95
6BOX OF VINTAGE ALPHABET BLOCKS111.9511.95
7PARTY BUNTING44.9519.80
8JAZZ HEARTS ADDRESS BOOK100.191.90
9SET OF 4 SANTA PLACE SETTINGS481.2560.00
\n", + "
" + ], + "text/plain": [ + " Description Quantity UnitPrice Revenue\n", + "0 LUNCH BAG APPLE DESIGN 1 1.65 1.65\n", + "1 SET OF 60 VINTAGE LEAF CAKE CASES 24 0.55 13.20\n", + "2 RIBBON REEL STRIPES DESIGN 1 1.65 1.65\n", + "3 WORLD WAR 2 GLIDERS ASSTD DESIGNS 2880 0.18 518.40\n", + "4 PLAYING CARDS JUBILEE UNION JACK 2 1.25 2.50\n", + "5 POPCORN HOLDER 7 0.85 5.95\n", + "6 BOX OF VINTAGE ALPHABET BLOCKS 1 11.95 11.95\n", + "7 PARTY BUNTING 4 4.95 19.80\n", + "8 JAZZ HEARTS ADDRESS BOOK 10 0.19 1.90\n", + "9 SET OF 4 SANTA PLACE SETTINGS 48 1.25 60.00" + ] + }, + "execution_count": 198, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_orders = pd.DataFrame(orders)\n", + "df_orders" + ] }, { "cell_type": "markdown", @@ -238,10 +822,24 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 199, "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Total orders :2978\n", + "Total revenues: 637.0\n" + ] + } + ], + "source": [ + "total_orders = df_orders['Quantity'].sum()\n", + "total_revenues = df_orders['Revenue'].sum()\n", + "print(f\"Total orders :{total_orders}\")\n", + "print(f\"Total revenues: {total_revenues}\")" + ] }, { "cell_type": "markdown", @@ -252,10 +850,25 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 200, "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Most expensive price is 11.95\n", + "Less expensive price is 0.18\n", + "Difference from most expensive to less expensive is 11.77\n" + ] + } + ], + "source": [ + "print(f\"Most expensive price is {df_orders.UnitPrice.max()}\")\n", + "print(f\"Less expensive price is {df_orders.UnitPrice.min()}\")\n", + "print(f\"Difference from most expensive to less expensive is {df_orders.UnitPrice.max()-df_orders.UnitPrice.min()}\")\n", + "\n" + ] }, { "cell_type": "markdown", @@ -266,7 +879,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 201, "metadata": {}, "outputs": [], "source": [ @@ -285,10 +898,130 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 202, "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Serial No.GRE ScoreTOEFL ScoreUniversity RatingSOPLORCGPAResearchChance of Admit
0133711844.54.59.6510.92
1231610433.03.58.0010.72
2332211033.52.58.6710.80
3431410322.03.08.2100.65
4533011554.53.09.3410.90
\n", + "
" + ], + "text/plain": [ + " Serial No. GRE Score TOEFL Score University Rating SOP LOR CGPA \\\n", + "0 1 337 118 4 4.5 4.5 9.65 \n", + "1 2 316 104 3 3.0 3.5 8.00 \n", + "2 3 322 110 3 3.5 2.5 8.67 \n", + "3 4 314 103 2 2.0 3.0 8.21 \n", + "4 5 330 115 5 4.5 3.0 9.34 \n", + "\n", + " Research Chance of Admit \n", + "0 1 0.92 \n", + "1 1 0.72 \n", + "2 1 0.80 \n", + "3 0 0.65 \n", + "4 1 0.90 " + ] + }, + "execution_count": 202, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "admissions.head()" + ] }, { "cell_type": "markdown", @@ -299,10 +1032,43 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 203, "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Serial No. 0\n", + "GRE Score 0\n", + "TOEFL Score 0\n", + "University Rating 0\n", + "SOP 0\n", + "LOR 0\n", + "CGPA 0\n", + "Research 0\n", + "Chance of Admit 0\n", + "dtype: int64\n", + " Serial No. GRE Score TOEFL Score University Rating SOP LOR CGPA \\\n", + "0 1 337 118 4 4.5 4.5 9.65 \n", + "1 2 316 104 3 3.0 3.5 8.00 \n", + "2 3 322 110 3 3.5 2.5 8.67 \n", + "3 4 314 103 2 2.0 3.0 8.21 \n", + "4 5 330 115 5 4.5 3.0 9.34 \n", + "\n", + " Research Chance of Admit \n", + "0 1 0.92 \n", + "1 1 0.72 \n", + "2 1 0.80 \n", + "3 0 0.65 \n", + "4 1 0.90 \n" + ] + } + ], + "source": [ + "print(admissions.isnull().sum())\n", + "print(admissions.head())" + ] }, { "cell_type": "markdown", @@ -313,10 +1079,131 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 204, "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Serial No.GRE ScoreTOEFL ScoreUniversity RatingSOPLORCGPAResearchChance of Admit
0133711844.54.59.6510.92
1231610433.03.58.0010.72
2332211033.52.58.6710.80
3431410322.03.08.2100.65
4533011554.53.09.3410.90
\n", + "
" + ], + "text/plain": [ + " Serial No. GRE Score TOEFL Score University Rating SOP LOR CGPA \\\n", + "0 1 337 118 4 4.5 4.5 9.65 \n", + "1 2 316 104 3 3.0 3.5 8.00 \n", + "2 3 322 110 3 3.5 2.5 8.67 \n", + "3 4 314 103 2 2.0 3.0 8.21 \n", + "4 5 330 115 5 4.5 3.0 9.34 \n", + "\n", + " Research Chance of Admit \n", + "0 1 0.92 \n", + "1 1 0.72 \n", + "2 1 0.80 \n", + "3 0 0.65 \n", + "4 1 0.90 " + ] + }, + "execution_count": 204, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "admissions.set_index(\"Serial No.\",drop= False)\n", + "admissions.head()" + ] }, { "cell_type": "code", @@ -329,15 +1216,39 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "\"Turns out that GRE Score and CGPA also uniquely identify the data. Show this in the cell below.\"" + "#### Turns out that GRE Score and CGPA also uniquely identify the data. Show this in the cell below.\n" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 205, "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Total unique values of GRE Score and CPGA: 385\n", + "Total rows in df: 385\n" + ] + }, + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 205, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "print(f\"Total unique values of GRE Score and CPGA: {admissions[['GRE Score', 'CGPA']].value_counts().sum()}\")\n", + "print(f\"Total rows in df: {len(admissions.index)}\")\n", + "\n", + "len(admissions.index) == admissions[['GRE Score', 'CGPA']].value_counts().sum()\n" + ] }, { "cell_type": "markdown", @@ -348,10 +1259,131 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 206, "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Serial No.GRE ScoreTOEFL ScoreUniversity RatingSOPLORCGPAResearchChance of Admit
0133711844.54.59.6510.92
4533011554.53.09.3410.90
101132811244.04.59.1010.78
192032811655.05.09.5010.94
202133411955.04.59.7010.95
\n", + "
" + ], + "text/plain": [ + " Serial No. GRE Score TOEFL Score University Rating SOP LOR CGPA \\\n", + "0 1 337 118 4 4.5 4.5 9.65 \n", + "4 5 330 115 5 4.5 3.0 9.34 \n", + "10 11 328 112 4 4.0 4.5 9.10 \n", + "19 20 328 116 5 5.0 5.0 9.50 \n", + "20 21 334 119 5 5.0 4.5 9.70 \n", + "\n", + " Research Chance of Admit \n", + "0 1 0.92 \n", + "4 1 0.90 \n", + "10 1 0.78 \n", + "19 1 0.94 \n", + "20 1 0.95 " + ] + }, + "execution_count": 206, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_conditions = admissions[(admissions['CGPA'] > 9) & (admissions['Research'] == 1)]\n", + "df_conditions.head()" + ] }, { "cell_type": "markdown", @@ -362,75 +1394,568 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 207, "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Serial No.GRE ScoreTOEFL ScoreUniversity RatingSOPLORCGPAResearchChance of Admit
282933811843.04.59.4010.91
14014132611433.03.09.1110.83
21721832411143.03.09.0110.82
38138232510733.03.59.1110.84
\n", + "
" + ], + "text/plain": [ + " Serial No. GRE Score TOEFL Score University Rating SOP LOR CGPA \\\n", + "28 29 338 118 4 3.0 4.5 9.40 \n", + "140 141 326 114 3 3.0 3.0 9.11 \n", + "217 218 324 111 4 3.0 3.0 9.01 \n", + "381 382 325 107 3 3.0 3.5 9.11 \n", + "\n", + " Research Chance of Admit \n", + "28 1 0.91 \n", + "140 1 0.83 \n", + "217 1 0.82 \n", + "381 1 0.84 " + ] + }, + "execution_count": 207, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_applicants = df_conditions[(df_conditions['CGPA'] > 9) & (df_conditions['SOP'] < 3.5)]\n", + "df_applicants.head()" + ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 208, "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "text/plain": [ + "0.85" + ] + }, + "execution_count": 208, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_applicants[\"Chance of Admit\"].mean()" + ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "Let's create a new column based on the TOEFL Score column, where we put True if the TOEFL value is greater than 100 and False if it is less. We are going to do it with a function and an apply. \n", - "To do this, we first create a function that receives an argument. The function will return True if the parameter entered is greater than 100, otherwise it will return False." + "### Let's create a new column based on the TOEFL Score column, where we put True if the TOEFL value is greater than 100 and False if it is less. We are going to do it with a function and an apply. \n", + "### To do this, we first create a function that receives an argument. The function will return True if the parameter entered is greater than 100, otherwise it will return False." ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 209, "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/var/folders/gy/rmsk3kj95dz94q2k0p3dx96h0000gn/T/ipykernel_64648/856229188.py:8: SettingWithCopyWarning: \n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", + " df_applicants[\"TOEFL_Op\"] = df_applicants[\"TOEFL Score\"].apply(f_toefl)\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Serial No.GRE ScoreTOEFL ScoreUniversity RatingSOPLORCGPAResearchChance of AdmitTOEFL_Op
282933811843.04.59.4010.91True
14014132611433.03.09.1110.83True
21721832411143.03.09.0110.82True
38138232510733.03.59.1110.84True
\n", + "
" + ], + "text/plain": [ + " Serial No. GRE Score TOEFL Score University Rating SOP LOR CGPA \\\n", + "28 29 338 118 4 3.0 4.5 9.40 \n", + "140 141 326 114 3 3.0 3.0 9.11 \n", + "217 218 324 111 4 3.0 3.0 9.01 \n", + "381 382 325 107 3 3.0 3.5 9.11 \n", + "\n", + " Research Chance of Admit TOEFL_Op \n", + "28 1 0.91 True \n", + "140 1 0.83 True \n", + "217 1 0.82 True \n", + "381 1 0.84 True " + ] + }, + "execution_count": 209, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "def f_toefl(score):\n", + " if score > 100:\n", + " return True\n", + " return False\n", + "\n", + "\n", + "\n", + "df_applicants[\"TOEFL_Op\"] = df_applicants[\"TOEFL Score\"].apply(f_toefl)\n", + "df_applicants.head()" + ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "Now we create a new column called \"Decision\" and apply to the TOEFL Score column" + "### Now we create a new column called \"Decision\" and apply to the TOEFL Score column" ] }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, + "execution_count": 210, "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/var/folders/gy/rmsk3kj95dz94q2k0p3dx96h0000gn/T/ipykernel_64648/1232300171.py:1: SettingWithCopyWarning: \n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", + " df_applicants[\"Decision\"] = df_applicants[\"TOEFL Score\"]\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Serial No.GRE ScoreTOEFL ScoreUniversity RatingSOPLORCGPAResearchChance of AdmitTOEFL_OpDecision
282933811843.04.59.4010.91True118
14014132611433.03.09.1110.83True114
21721832411143.03.09.0110.82True111
38138232510733.03.59.1110.84True107
\n", + "
" + ], + "text/plain": [ + " Serial No. GRE Score TOEFL Score University Rating SOP LOR CGPA \\\n", + "28 29 338 118 4 3.0 4.5 9.40 \n", + "140 141 326 114 3 3.0 3.0 9.11 \n", + "217 218 324 111 4 3.0 3.0 9.01 \n", + "381 382 325 107 3 3.0 3.5 9.11 \n", + "\n", + " Research Chance of Admit TOEFL_Op Decision \n", + "28 1 0.91 True 118 \n", + "140 1 0.83 True 114 \n", + "217 1 0.82 True 111 \n", + "381 1 0.84 True 107 " + ] + }, + "execution_count": 210, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_applicants[\"Decision\"] = df_applicants[\"TOEFL Score\"]\n", + "df_applicants.head()" + ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "Create a column called `decision2` in the `admissions` dataframe. Assign 1 to this column if the value of `SOP` is greater than 3 and 0 otherwise. \n", - "HINT (use np.where)" + "### Create a column called `decision2` in the `admissions` dataframe. Assign 1 to this column if the value of `SOP` is greater than 3 and 0 otherwise. \n", + "### HINT (use np.where)" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 211, "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Serial No.GRE ScoreTOEFL ScoreUniversity RatingSOPLORCGPAResearchChance of Admitdecision2
0133711844.54.59.6510.921
1231610433.03.58.0010.720
2332211033.52.58.6710.801
3431410322.03.08.2100.650
4533011554.53.09.3410.901
\n", + "
" + ], + "text/plain": [ + " Serial No. GRE Score TOEFL Score University Rating SOP LOR CGPA \\\n", + "0 1 337 118 4 4.5 4.5 9.65 \n", + "1 2 316 104 3 3.0 3.5 8.00 \n", + "2 3 322 110 3 3.5 2.5 8.67 \n", + "3 4 314 103 2 2.0 3.0 8.21 \n", + "4 5 330 115 5 4.5 3.0 9.34 \n", + "\n", + " Research Chance of Admit decision2 \n", + "0 1 0.92 1 \n", + "1 1 0.72 0 \n", + "2 1 0.80 1 \n", + "3 0 0.65 0 \n", + "4 1 0.90 1 " + ] + }, + "execution_count": 211, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "admissions[\"decision2\"] = np.where(admissions[\"SOP\"] > 3, 1,0 )\n", + "admissions.head()" + ] } ], "metadata": { @@ -449,7 +1974,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.8" + "version": "3.11.4" }, "toc": { "base_numbering": "",