From 9010845bc90ca9681f702a6a78a7bcab6e908778 Mon Sep 17 00:00:00 2001 From: Zeeshan17CO50 <48279593+Zeeshan17CO50@users.noreply.github.com> Date: Mon, 24 Aug 2020 14:53:20 +0530 Subject: [PATCH 1/6] Zeeshan Assignment 1 Siddique Zeeshan Azam Assignment 1 --- Zeeshan_Assignment1.ipynb | 350 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 350 insertions(+) create mode 100644 Zeeshan_Assignment1.ipynb diff --git a/Zeeshan_Assignment1.ipynb b/Zeeshan_Assignment1.ipynb new file mode 100644 index 0000000..1752abd --- /dev/null +++ b/Zeeshan_Assignment1.ipynb @@ -0,0 +1,350 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Assignment" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Import numpy as np" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Make a python list => \\[1,2,3,4,5\\]\n", + "\n", + "Convert it into numpy array and print it" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[1 2 3 4 5]\n" + ] + } + ], + "source": [ + "li = [1,2,3,4,5]\n", + "li_np_arr = np.array(li)\n", + "print(li_np_arr)\n", + "# print(type(li_np_arr))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Make a python matrix (3 x 3) => \\[[1,2,3],[4,5,6],[7,8,9]\\]\n", + "\n", + "Convert it into numpy array and print it" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[[1 2 3]\n", + " [4 5 6]\n", + " [7 8 9]]\n" + ] + } + ], + "source": [ + "li2 = [[1,2,3],[4,5,6],[7,8,9]]\n", + "li2_np_arr = np.array(li2)\n", + "print(li2_np_arr)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Make a matrix (3 x 3) using built-in methods (like arange(), reshape() etc.):\n", + "\n", + "\\[ [1,3,5],\n", + "\n", + " [7,9,11],\n", + " \n", + " [13,15,17] \\]" + ] + }, + { + "cell_type": "code", + "execution_count": 70, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[[ 1 3 5]\n", + " [ 7 9 11]\n", + " [13 15 17]]\n" + ] + } + ], + "source": [ + "arr_built = np.arange(1,18,2).reshape(3,3)\n", + "print(arr_built)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Create a numpy array with 10 random numbers from 0 to 10 (there should be few numbers greater than 1)" + ] + }, + { + "cell_type": "code", + "execution_count": 118, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[9 5 0 1 3 5 8 9 9 7]\n" + ] + } + ], + "source": [ + "ranarr1 = np.random.randint(0,10,10)\n", + "# ranarr1 = np.random.rand(10?\n", + "print(ranarr1)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Create numpy array => \\[1,2,3,4,5\\] and convert it to 2D array with 5 rows" + ] + }, + { + "cell_type": "code", + "execution_count": 85, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[1],\n", + " [2],\n", + " [3],\n", + " [4],\n", + " [5]])" + ] + }, + "execution_count": 85, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "arr2d = np.arange(1,6).reshape(5,1)\n", + "\n", + "arr2d" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Print the shape of the above created array" + ] + }, + { + "cell_type": "code", + "execution_count": 86, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(5, 1)" + ] + }, + "execution_count": 86, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "arr2d.shape" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Create a numpy array with 10 elements in it. Access and print its 3rd, 4th and 9th element." + ] + }, + { + "cell_type": "code", + "execution_count": 119, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[ 1 2 3 4 5 6 7 8 9 10] \n", + "\n", + "3 4 9\n" + ] + } + ], + "source": [ + "arr2 = np.arange(1,11)\n", + "print(arr2,\"\\n\")\n", + "print(arr2[2],arr2[3],arr2[8])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Print alternate elements of that array" + ] + }, + { + "cell_type": "code", + "execution_count": 120, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[1 3 5 7 9]\n" + ] + } + ], + "source": [ + "print(arr2[::2])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Change last 3 elements into 100 using broadcasting and print" + ] + }, + { + "cell_type": "code", + "execution_count": 121, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[1, 2, 3, 4, 5, 6, 7, 100, 100, 100]\n" + ] + } + ], + "source": [ + "# arr2[7:10]=100\n", + "arr2 = [i//i*100 if i>=arr2[-3] else i for i in arr2]\n", + "print(arr2)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Create a 5 x 5 matrix (fill it with any element you like), print it.\n", + "\n", + "Then print the middle (3 x 3) matrix." + ] + }, + { + "cell_type": "code", + "execution_count": 122, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[[ 7 8 9]\n", + " [12 13 14]\n", + " [17 18 19]]\n" + ] + } + ], + "source": [ + "arr_five = np.arange(1,26).reshape(5,5)\n", + "# print(arr_five)\n", + "print(arr_five[1:4,1:4])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.9" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} From 04887e379e0eaf6104338243f6875e7715594569 Mon Sep 17 00:00:00 2001 From: Zeeshan17CO50 <48279593+Zeeshan17CO50@users.noreply.github.com> Date: Sun, 6 Sep 2020 11:32:52 +0530 Subject: [PATCH 2/6] Add files via upload SIDDIQUE ZEESHAN ASSIGNMENT 2 (Python4DS) --- Assignment/Zeeshan_Siddique_Assignment2.ipynb | 941 ++++++++++++++++++ 1 file changed, 941 insertions(+) create mode 100644 Assignment/Zeeshan_Siddique_Assignment2.ipynb diff --git a/Assignment/Zeeshan_Siddique_Assignment2.ipynb b/Assignment/Zeeshan_Siddique_Assignment2.ipynb new file mode 100644 index 0000000..0898660 --- /dev/null +++ b/Assignment/Zeeshan_Siddique_Assignment2.ipynb @@ -0,0 +1,941 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import pandas as pd\n", + "#%matplotlib notebook\n", + "%matplotlib inline" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "import the dataset into a dataframe" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "sal_df = pd.read_csv(\"Salaries.csv\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "display the column names" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Index(['Id', 'EmployeeName', 'JobTitle', 'BasePay', 'OvertimePay', 'OtherPay',\n", + " 'Benefits', 'TotalPay', 'TotalPayBenefits', 'Year', 'Notes', 'Agency',\n", + " 'Status'],\n", + " dtype='object')" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# sal_df.head()\n", + "sal_df.columns" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "display the number of rows and cols" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(148654, 13)" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "sal_df.shape" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "display the dataframe info (types of data in columns and not null values etc.)" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "RangeIndex: 148654 entries, 0 to 148653\n", + "Data columns (total 13 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 Id 148654 non-null int64 \n", + " 1 EmployeeName 148654 non-null object \n", + " 2 JobTitle 148654 non-null object \n", + " 3 BasePay 148045 non-null float64\n", + " 4 OvertimePay 148650 non-null float64\n", + " 5 OtherPay 148650 non-null float64\n", + " 6 Benefits 112491 non-null float64\n", + " 7 TotalPay 148654 non-null float64\n", + " 8 TotalPayBenefits 148654 non-null float64\n", + " 9 Year 148654 non-null int64 \n", + " 10 Notes 0 non-null float64\n", + " 11 Agency 148654 non-null object \n", + " 12 Status 0 non-null float64\n", + "dtypes: float64(8), int64(2), object(3)\n", + "memory usage: 14.7+ MB\n" + ] + } + ], + "source": [ + "sal_df.info()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "display stats of the dataframe like count, mean, std, max, 25% etc....." + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
IdBasePayOvertimePayOtherPayBenefitsTotalPayTotalPayBenefitsYearNotesStatus
count148654.000000148045.000000148650.000000148650.000000112491.000000148654.000000148654.000000148654.0000000.00.0
mean74327.50000066325.4488405066.0598863648.76729725007.89315174768.32197293692.5548112012.522643NaNNaN
std42912.85779542764.63549511454.3805598056.60186615402.21585850517.00527462793.5334831.117538NaNNaN
min1.000000-166.010000-0.010000-7058.590000-33.890000-618.130000-618.1300002011.000000NaNNaN
25%37164.25000033588.2000000.0000000.00000011535.39500036168.99500044065.6500002012.000000NaNNaN
50%74327.50000065007.4500000.000000811.27000028628.62000071426.61000092404.0900002013.000000NaNNaN
75%111490.75000094691.0500004658.1750004236.06500035566.855000105839.135000132876.4500002014.000000NaNNaN
max148654.000000319275.010000245131.880000400184.25000096570.660000567595.430000567595.4300002014.000000NaNNaN
\n", + "
" + ], + "text/plain": [ + " Id BasePay OvertimePay OtherPay \\\n", + "count 148654.000000 148045.000000 148650.000000 148650.000000 \n", + "mean 74327.500000 66325.448840 5066.059886 3648.767297 \n", + "std 42912.857795 42764.635495 11454.380559 8056.601866 \n", + "min 1.000000 -166.010000 -0.010000 -7058.590000 \n", + "25% 37164.250000 33588.200000 0.000000 0.000000 \n", + "50% 74327.500000 65007.450000 0.000000 811.270000 \n", + "75% 111490.750000 94691.050000 4658.175000 4236.065000 \n", + "max 148654.000000 319275.010000 245131.880000 400184.250000 \n", + "\n", + " Benefits TotalPay TotalPayBenefits Year Notes \\\n", + "count 112491.000000 148654.000000 148654.000000 148654.000000 0.0 \n", + "mean 25007.893151 74768.321972 93692.554811 2012.522643 NaN \n", + "std 15402.215858 50517.005274 62793.533483 1.117538 NaN \n", + "min -33.890000 -618.130000 -618.130000 2011.000000 NaN \n", + "25% 11535.395000 36168.995000 44065.650000 2012.000000 NaN \n", + "50% 28628.620000 71426.610000 92404.090000 2013.000000 NaN \n", + "75% 35566.855000 105839.135000 132876.450000 2014.000000 NaN \n", + "max 96570.660000 567595.430000 567595.430000 2014.000000 NaN \n", + "\n", + " Status \n", + "count 0.0 \n", + "mean NaN \n", + "std NaN \n", + "min NaN \n", + "25% NaN \n", + "50% NaN \n", + "75% NaN \n", + "max NaN " + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "sal_df.describe()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "display null values per column" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Id 0\n", + "EmployeeName 0\n", + "JobTitle 0\n", + "BasePay 609\n", + "OvertimePay 4\n", + "OtherPay 4\n", + "Benefits 36163\n", + "TotalPay 0\n", + "TotalPayBenefits 0\n", + "Year 0\n", + "Notes 148654\n", + "Agency 0\n", + "Status 148654\n", + "dtype: int64" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "sal_df.isna().sum()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "remove columns will all values as NaN" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
IdEmployeeNameJobTitleBasePayOvertimePayOtherPayBenefitsTotalPayTotalPayBenefitsYearAgency
01NATHANIEL FORDGENERAL MANAGER-METROPOLITAN TRANSIT AUTHORITY167411.180.00400184.25NaN567595.43567595.432011San Francisco
12GARY JIMENEZCAPTAIN III (POLICE DEPARTMENT)155966.02245131.88137811.38NaN538909.28538909.282011San Francisco
23ALBERT PARDINICAPTAIN III (POLICE DEPARTMENT)212739.13106088.1816452.60NaN335279.91335279.912011San Francisco
34CHRISTOPHER CHONGWIRE ROPE CABLE MAINTENANCE MECHANIC77916.0056120.71198306.90NaN332343.61332343.612011San Francisco
45PATRICK GARDNERDEPUTY CHIEF OF DEPARTMENT,(FIRE DEPARTMENT)134401.609737.00182234.59NaN326373.19326373.192011San Francisco
....................................
148649148650Roy I TilleryCustodian0.000.000.000.00.000.002014San Francisco
148650148651Not providedNot providedNaNNaNNaNNaN0.000.002014San Francisco
148651148652Not providedNot providedNaNNaNNaNNaN0.000.002014San Francisco
148652148653Not providedNot providedNaNNaNNaNNaN0.000.002014San Francisco
148653148654Joe LopezCounselor, Log Cabin Ranch0.000.00-618.130.0-618.13-618.132014San Francisco
\n", + "

148654 rows × 11 columns

\n", + "
" + ], + "text/plain": [ + " Id EmployeeName \\\n", + "0 1 NATHANIEL FORD \n", + "1 2 GARY JIMENEZ \n", + "2 3 ALBERT PARDINI \n", + "3 4 CHRISTOPHER CHONG \n", + "4 5 PATRICK GARDNER \n", + "... ... ... \n", + "148649 148650 Roy I Tillery \n", + "148650 148651 Not provided \n", + "148651 148652 Not provided \n", + "148652 148653 Not provided \n", + "148653 148654 Joe Lopez \n", + "\n", + " JobTitle BasePay \\\n", + "0 GENERAL MANAGER-METROPOLITAN TRANSIT AUTHORITY 167411.18 \n", + "1 CAPTAIN III (POLICE DEPARTMENT) 155966.02 \n", + "2 CAPTAIN III (POLICE DEPARTMENT) 212739.13 \n", + "3 WIRE ROPE CABLE MAINTENANCE MECHANIC 77916.00 \n", + "4 DEPUTY CHIEF OF DEPARTMENT,(FIRE DEPARTMENT) 134401.60 \n", + "... ... ... \n", + "148649 Custodian 0.00 \n", + "148650 Not provided NaN \n", + "148651 Not provided NaN \n", + "148652 Not provided NaN \n", + "148653 Counselor, Log Cabin Ranch 0.00 \n", + "\n", + " OvertimePay OtherPay Benefits TotalPay TotalPayBenefits Year \\\n", + "0 0.00 400184.25 NaN 567595.43 567595.43 2011 \n", + "1 245131.88 137811.38 NaN 538909.28 538909.28 2011 \n", + "2 106088.18 16452.60 NaN 335279.91 335279.91 2011 \n", + "3 56120.71 198306.90 NaN 332343.61 332343.61 2011 \n", + "4 9737.00 182234.59 NaN 326373.19 326373.19 2011 \n", + "... ... ... ... ... ... ... \n", + "148649 0.00 0.00 0.0 0.00 0.00 2014 \n", + "148650 NaN NaN NaN 0.00 0.00 2014 \n", + "148651 NaN NaN NaN 0.00 0.00 2014 \n", + "148652 NaN NaN NaN 0.00 0.00 2014 \n", + "148653 0.00 -618.13 0.0 -618.13 -618.13 2014 \n", + "\n", + " Agency \n", + "0 San Francisco \n", + "1 San Francisco \n", + "2 San Francisco \n", + "3 San Francisco \n", + "4 San Francisco \n", + "... ... \n", + "148649 San Francisco \n", + "148650 San Francisco \n", + "148651 San Francisco \n", + "148652 San Francisco \n", + "148653 San Francisco \n", + "\n", + "[148654 rows x 11 columns]" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "sal_df.dropna(how='all',axis=1, inplace=True)\n", + "sal_df" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "display number of unique values in each column" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Id 148654\n", + "EmployeeName 110811\n", + "JobTitle 2159\n", + "BasePay 109489\n", + "OvertimePay 65998\n", + "OtherPay 83225\n", + "Benefits 98465\n", + "TotalPay 138486\n", + "TotalPayBenefits 142098\n", + "Year 4\n", + "Agency 1\n", + "dtype: int64" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "sal_df.apply(pd.Series.nunique)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "mean of total pay of all people based on year" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Year\n", + "2011 71744.103871\n", + "2012 74113.262265\n", + "2013 77611.443142\n", + "2014 75463.918140\n", + "Name: TotalPay, dtype: float64" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "sal_df.groupby('Year').mean()['TotalPay']" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "how many people have 0 overtime pay" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "77321" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "sum(sal_df['OvertimePay']==0)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "max, min, mean, median and other stats of TotalPay of people having 0 OvertimePay" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "count 77321.000000\n", + "mean 60229.348901\n", + "std 49307.912350\n", + "min -618.130000\n", + "25% 13290.450000\n", + "50% 58158.590000\n", + "75% 91115.090000\n", + "max 567595.430000\n", + "Name: TotalPay, dtype: float64" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "sal_df[sal_df['OvertimePay']==0]['TotalPay'].describe()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "find Id of that person with max TotalPay you got in previous question" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "1" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# maxi = sal_df[sal_df['OvertimePay']==0]['TotalPay'].max()\n", + "max_index = sal_df[sal_df['OvertimePay']==0]['TotalPay'].idxmax()\n", + "sal_df.loc[(max_index, 'Id')]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "name of employee with total pay benefits = 87619.78" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "12345 REBECCA CHIU\n", + "Name: EmployeeName, dtype: object" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "sal_df[sal_df['TotalPayBenefits']==87619.78]['EmployeeName']" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "how many people have BasePay > 150000 and OvertimePay > 100000" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "12" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "sal_df[(sal_df['BasePay']>150000) & (sal_df['OvertimePay']>100000)].count()['EmployeeName']" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "which job title generally has highest average TotalPayBenefits" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'GENERAL MANAGER-METROPOLITAN TRANSIT AUTHORITY'" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "highest_paid = sal_df.loc[sal_df['TotalPayBenefits'].idxmax()]\n", + "highest_paid['JobTitle']" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "How many employees are POLICE" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "2512" + ] + }, + "execution_count": 25, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# .str.contains()\n", + "# sal_df['JobTitle'].str.contains('POLICE').value_counts()\n", + "sum(sal_df['JobTitle'].str.contains('POLICE'))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.9" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} From f2a491e25997aed3ce7aa70393c062d8ecb9382d Mon Sep 17 00:00:00 2001 From: Zeeshan17CO50 <48279593+Zeeshan17CO50@users.noreply.github.com> Date: Sun, 6 Sep 2020 11:44:10 +0530 Subject: [PATCH 3/6] Add files via upload ZEESHAN SIDDIQUE ASSIGNMENT 2 (PYTHON4DS) --- Zeeshan_Siddique_Assignment2.ipynb | 941 +++++++++++++++++++++++++++++ 1 file changed, 941 insertions(+) create mode 100644 Zeeshan_Siddique_Assignment2.ipynb diff --git a/Zeeshan_Siddique_Assignment2.ipynb b/Zeeshan_Siddique_Assignment2.ipynb new file mode 100644 index 0000000..0898660 --- /dev/null +++ b/Zeeshan_Siddique_Assignment2.ipynb @@ -0,0 +1,941 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import pandas as pd\n", + "#%matplotlib notebook\n", + "%matplotlib inline" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "import the dataset into a dataframe" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "sal_df = pd.read_csv(\"Salaries.csv\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "display the column names" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Index(['Id', 'EmployeeName', 'JobTitle', 'BasePay', 'OvertimePay', 'OtherPay',\n", + " 'Benefits', 'TotalPay', 'TotalPayBenefits', 'Year', 'Notes', 'Agency',\n", + " 'Status'],\n", + " dtype='object')" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# sal_df.head()\n", + "sal_df.columns" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "display the number of rows and cols" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(148654, 13)" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "sal_df.shape" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "display the dataframe info (types of data in columns and not null values etc.)" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "RangeIndex: 148654 entries, 0 to 148653\n", + "Data columns (total 13 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 Id 148654 non-null int64 \n", + " 1 EmployeeName 148654 non-null object \n", + " 2 JobTitle 148654 non-null object \n", + " 3 BasePay 148045 non-null float64\n", + " 4 OvertimePay 148650 non-null float64\n", + " 5 OtherPay 148650 non-null float64\n", + " 6 Benefits 112491 non-null float64\n", + " 7 TotalPay 148654 non-null float64\n", + " 8 TotalPayBenefits 148654 non-null float64\n", + " 9 Year 148654 non-null int64 \n", + " 10 Notes 0 non-null float64\n", + " 11 Agency 148654 non-null object \n", + " 12 Status 0 non-null float64\n", + "dtypes: float64(8), int64(2), object(3)\n", + "memory usage: 14.7+ MB\n" + ] + } + ], + "source": [ + "sal_df.info()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "display stats of the dataframe like count, mean, std, max, 25% etc....." + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
IdBasePayOvertimePayOtherPayBenefitsTotalPayTotalPayBenefitsYearNotesStatus
count148654.000000148045.000000148650.000000148650.000000112491.000000148654.000000148654.000000148654.0000000.00.0
mean74327.50000066325.4488405066.0598863648.76729725007.89315174768.32197293692.5548112012.522643NaNNaN
std42912.85779542764.63549511454.3805598056.60186615402.21585850517.00527462793.5334831.117538NaNNaN
min1.000000-166.010000-0.010000-7058.590000-33.890000-618.130000-618.1300002011.000000NaNNaN
25%37164.25000033588.2000000.0000000.00000011535.39500036168.99500044065.6500002012.000000NaNNaN
50%74327.50000065007.4500000.000000811.27000028628.62000071426.61000092404.0900002013.000000NaNNaN
75%111490.75000094691.0500004658.1750004236.06500035566.855000105839.135000132876.4500002014.000000NaNNaN
max148654.000000319275.010000245131.880000400184.25000096570.660000567595.430000567595.4300002014.000000NaNNaN
\n", + "
" + ], + "text/plain": [ + " Id BasePay OvertimePay OtherPay \\\n", + "count 148654.000000 148045.000000 148650.000000 148650.000000 \n", + "mean 74327.500000 66325.448840 5066.059886 3648.767297 \n", + "std 42912.857795 42764.635495 11454.380559 8056.601866 \n", + "min 1.000000 -166.010000 -0.010000 -7058.590000 \n", + "25% 37164.250000 33588.200000 0.000000 0.000000 \n", + "50% 74327.500000 65007.450000 0.000000 811.270000 \n", + "75% 111490.750000 94691.050000 4658.175000 4236.065000 \n", + "max 148654.000000 319275.010000 245131.880000 400184.250000 \n", + "\n", + " Benefits TotalPay TotalPayBenefits Year Notes \\\n", + "count 112491.000000 148654.000000 148654.000000 148654.000000 0.0 \n", + "mean 25007.893151 74768.321972 93692.554811 2012.522643 NaN \n", + "std 15402.215858 50517.005274 62793.533483 1.117538 NaN \n", + "min -33.890000 -618.130000 -618.130000 2011.000000 NaN \n", + "25% 11535.395000 36168.995000 44065.650000 2012.000000 NaN \n", + "50% 28628.620000 71426.610000 92404.090000 2013.000000 NaN \n", + "75% 35566.855000 105839.135000 132876.450000 2014.000000 NaN \n", + "max 96570.660000 567595.430000 567595.430000 2014.000000 NaN \n", + "\n", + " Status \n", + "count 0.0 \n", + "mean NaN \n", + "std NaN \n", + "min NaN \n", + "25% NaN \n", + "50% NaN \n", + "75% NaN \n", + "max NaN " + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "sal_df.describe()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "display null values per column" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Id 0\n", + "EmployeeName 0\n", + "JobTitle 0\n", + "BasePay 609\n", + "OvertimePay 4\n", + "OtherPay 4\n", + "Benefits 36163\n", + "TotalPay 0\n", + "TotalPayBenefits 0\n", + "Year 0\n", + "Notes 148654\n", + "Agency 0\n", + "Status 148654\n", + "dtype: int64" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "sal_df.isna().sum()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "remove columns will all values as NaN" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
IdEmployeeNameJobTitleBasePayOvertimePayOtherPayBenefitsTotalPayTotalPayBenefitsYearAgency
01NATHANIEL FORDGENERAL MANAGER-METROPOLITAN TRANSIT AUTHORITY167411.180.00400184.25NaN567595.43567595.432011San Francisco
12GARY JIMENEZCAPTAIN III (POLICE DEPARTMENT)155966.02245131.88137811.38NaN538909.28538909.282011San Francisco
23ALBERT PARDINICAPTAIN III (POLICE DEPARTMENT)212739.13106088.1816452.60NaN335279.91335279.912011San Francisco
34CHRISTOPHER CHONGWIRE ROPE CABLE MAINTENANCE MECHANIC77916.0056120.71198306.90NaN332343.61332343.612011San Francisco
45PATRICK GARDNERDEPUTY CHIEF OF DEPARTMENT,(FIRE DEPARTMENT)134401.609737.00182234.59NaN326373.19326373.192011San Francisco
....................................
148649148650Roy I TilleryCustodian0.000.000.000.00.000.002014San Francisco
148650148651Not providedNot providedNaNNaNNaNNaN0.000.002014San Francisco
148651148652Not providedNot providedNaNNaNNaNNaN0.000.002014San Francisco
148652148653Not providedNot providedNaNNaNNaNNaN0.000.002014San Francisco
148653148654Joe LopezCounselor, Log Cabin Ranch0.000.00-618.130.0-618.13-618.132014San Francisco
\n", + "

148654 rows × 11 columns

\n", + "
" + ], + "text/plain": [ + " Id EmployeeName \\\n", + "0 1 NATHANIEL FORD \n", + "1 2 GARY JIMENEZ \n", + "2 3 ALBERT PARDINI \n", + "3 4 CHRISTOPHER CHONG \n", + "4 5 PATRICK GARDNER \n", + "... ... ... \n", + "148649 148650 Roy I Tillery \n", + "148650 148651 Not provided \n", + "148651 148652 Not provided \n", + "148652 148653 Not provided \n", + "148653 148654 Joe Lopez \n", + "\n", + " JobTitle BasePay \\\n", + "0 GENERAL MANAGER-METROPOLITAN TRANSIT AUTHORITY 167411.18 \n", + "1 CAPTAIN III (POLICE DEPARTMENT) 155966.02 \n", + "2 CAPTAIN III (POLICE DEPARTMENT) 212739.13 \n", + "3 WIRE ROPE CABLE MAINTENANCE MECHANIC 77916.00 \n", + "4 DEPUTY CHIEF OF DEPARTMENT,(FIRE DEPARTMENT) 134401.60 \n", + "... ... ... \n", + "148649 Custodian 0.00 \n", + "148650 Not provided NaN \n", + "148651 Not provided NaN \n", + "148652 Not provided NaN \n", + "148653 Counselor, Log Cabin Ranch 0.00 \n", + "\n", + " OvertimePay OtherPay Benefits TotalPay TotalPayBenefits Year \\\n", + "0 0.00 400184.25 NaN 567595.43 567595.43 2011 \n", + "1 245131.88 137811.38 NaN 538909.28 538909.28 2011 \n", + "2 106088.18 16452.60 NaN 335279.91 335279.91 2011 \n", + "3 56120.71 198306.90 NaN 332343.61 332343.61 2011 \n", + "4 9737.00 182234.59 NaN 326373.19 326373.19 2011 \n", + "... ... ... ... ... ... ... \n", + "148649 0.00 0.00 0.0 0.00 0.00 2014 \n", + "148650 NaN NaN NaN 0.00 0.00 2014 \n", + "148651 NaN NaN NaN 0.00 0.00 2014 \n", + "148652 NaN NaN NaN 0.00 0.00 2014 \n", + "148653 0.00 -618.13 0.0 -618.13 -618.13 2014 \n", + "\n", + " Agency \n", + "0 San Francisco \n", + "1 San Francisco \n", + "2 San Francisco \n", + "3 San Francisco \n", + "4 San Francisco \n", + "... ... \n", + "148649 San Francisco \n", + "148650 San Francisco \n", + "148651 San Francisco \n", + "148652 San Francisco \n", + "148653 San Francisco \n", + "\n", + "[148654 rows x 11 columns]" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "sal_df.dropna(how='all',axis=1, inplace=True)\n", + "sal_df" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "display number of unique values in each column" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Id 148654\n", + "EmployeeName 110811\n", + "JobTitle 2159\n", + "BasePay 109489\n", + "OvertimePay 65998\n", + "OtherPay 83225\n", + "Benefits 98465\n", + "TotalPay 138486\n", + "TotalPayBenefits 142098\n", + "Year 4\n", + "Agency 1\n", + "dtype: int64" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "sal_df.apply(pd.Series.nunique)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "mean of total pay of all people based on year" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Year\n", + "2011 71744.103871\n", + "2012 74113.262265\n", + "2013 77611.443142\n", + "2014 75463.918140\n", + "Name: TotalPay, dtype: float64" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "sal_df.groupby('Year').mean()['TotalPay']" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "how many people have 0 overtime pay" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "77321" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "sum(sal_df['OvertimePay']==0)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "max, min, mean, median and other stats of TotalPay of people having 0 OvertimePay" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "count 77321.000000\n", + "mean 60229.348901\n", + "std 49307.912350\n", + "min -618.130000\n", + "25% 13290.450000\n", + "50% 58158.590000\n", + "75% 91115.090000\n", + "max 567595.430000\n", + "Name: TotalPay, dtype: float64" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "sal_df[sal_df['OvertimePay']==0]['TotalPay'].describe()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "find Id of that person with max TotalPay you got in previous question" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "1" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# maxi = sal_df[sal_df['OvertimePay']==0]['TotalPay'].max()\n", + "max_index = sal_df[sal_df['OvertimePay']==0]['TotalPay'].idxmax()\n", + "sal_df.loc[(max_index, 'Id')]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "name of employee with total pay benefits = 87619.78" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "12345 REBECCA CHIU\n", + "Name: EmployeeName, dtype: object" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "sal_df[sal_df['TotalPayBenefits']==87619.78]['EmployeeName']" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "how many people have BasePay > 150000 and OvertimePay > 100000" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "12" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "sal_df[(sal_df['BasePay']>150000) & (sal_df['OvertimePay']>100000)].count()['EmployeeName']" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "which job title generally has highest average TotalPayBenefits" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'GENERAL MANAGER-METROPOLITAN TRANSIT AUTHORITY'" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "highest_paid = sal_df.loc[sal_df['TotalPayBenefits'].idxmax()]\n", + "highest_paid['JobTitle']" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "How many employees are POLICE" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "2512" + ] + }, + "execution_count": 25, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# .str.contains()\n", + "# sal_df['JobTitle'].str.contains('POLICE').value_counts()\n", + "sum(sal_df['JobTitle'].str.contains('POLICE'))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.9" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} From 9aefad28fff32b0b0d934e0f79bd36dbfdb1e22f Mon Sep 17 00:00:00 2001 From: Zeeshan17CO50 <48279593+Zeeshan17CO50@users.noreply.github.com> Date: Sun, 6 Sep 2020 11:58:04 +0530 Subject: [PATCH 4/6] Add files via upload Assignment 2 (SIDDIQUE ZEESHAN AZAM) From adc6028b50b262e05ad61172f125bedf5aa007fe Mon Sep 17 00:00:00 2001 From: Zeeshan17CO50 <48279593+Zeeshan17CO50@users.noreply.github.com> Date: Sun, 6 Sep 2020 12:01:47 +0530 Subject: [PATCH 5/6] Update Zeeshan_Siddique_Assignment2.ipynb --- Zeeshan_Siddique_Assignment2.ipynb | 1 + 1 file changed, 1 insertion(+) diff --git a/Zeeshan_Siddique_Assignment2.ipynb b/Zeeshan_Siddique_Assignment2.ipynb index 0898660..dd79a6d 100644 --- a/Zeeshan_Siddique_Assignment2.ipynb +++ b/Zeeshan_Siddique_Assignment2.ipynb @@ -1,3 +1,4 @@ + { "cells": [ { From a78e2b7bd0b4a579219cc02a887405f892d8bb22 Mon Sep 17 00:00:00 2001 From: Zeeshan17CO50 <48279593+Zeeshan17CO50@users.noreply.github.com> Date: Sun, 6 Sep 2020 12:04:48 +0530 Subject: [PATCH 6/6] Update Zeeshan_Siddique_Assignment2.ipynb Zeeshan Siddique Assignment 2 --- Assignment/Zeeshan_Siddique_Assignment2.ipynb | 1 + 1 file changed, 1 insertion(+) diff --git a/Assignment/Zeeshan_Siddique_Assignment2.ipynb b/Assignment/Zeeshan_Siddique_Assignment2.ipynb index 0898660..dd79a6d 100644 --- a/Assignment/Zeeshan_Siddique_Assignment2.ipynb +++ b/Assignment/Zeeshan_Siddique_Assignment2.ipynb @@ -1,3 +1,4 @@ + { "cells": [ {