From cfa2ac68a72457f67fb4d82ac8773e16f5edd8d1 Mon Sep 17 00:00:00 2001
From: Shrenik-Bhalgat <59737962+Shrenik-Bhalgat@users.noreply.github.com>
Date: Mon, 24 Aug 2020 14:49:59 +0530
Subject: [PATCH 1/5] Add files via upload
---
ShrenikBhalgat-assignment1.ipynb | 426 +++++++++++++++++++++++++++++++
1 file changed, 426 insertions(+)
create mode 100644 ShrenikBhalgat-assignment1.ipynb
diff --git a/ShrenikBhalgat-assignment1.ipynb b/ShrenikBhalgat-assignment1.ipynb
new file mode 100644
index 0000000..ae2a14f
--- /dev/null
+++ b/ShrenikBhalgat-assignment1.ipynb
@@ -0,0 +1,426 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import numpy as np"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "list1=[1,2,3,4,5]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "array([1, 2, 3, 4, 5])"
+ ]
+ },
+ "execution_count": 3,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "array1=np.array(list1)\n",
+ "array1"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "list2=[[1,2,3],[4,5,6],[7,8,9]]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "array([[1, 2, 3],\n",
+ " [4, 5, 6],\n",
+ " [7, 8, 9]])"
+ ]
+ },
+ "execution_count": 5,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "array2=np.array(list2)\n",
+ "array2"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 14,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "array([ 1, 3, 5, 7, 9, 11, 13, 15, 17])"
+ ]
+ },
+ "execution_count": 14,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "array3=np.arange(1,18,2)\n",
+ "array3"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 15,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "array([[ 1, 3, 5],\n",
+ " [ 7, 9, 11],\n",
+ " [13, 15, 17]])"
+ ]
+ },
+ "execution_count": 15,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "array3.reshape(3,3)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 18,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "array([1, 6, 7, 2, 9, 7, 7, 2, 1, 4])"
+ ]
+ },
+ "execution_count": 18,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "array4=np.random.randint(1,10,10)\n",
+ "array4"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 29,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "array([1, 2, 3, 4, 5])"
+ ]
+ },
+ "execution_count": 29,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "array5=np.arange(1,6,1)\n",
+ "array5"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 30,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "array([[1],\n",
+ " [2],\n",
+ " [3],\n",
+ " [4],\n",
+ " [5]])"
+ ]
+ },
+ "execution_count": 30,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "array5.reshape(5,1)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 31,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "(5,)"
+ ]
+ },
+ "execution_count": 31,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "array5.shape"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 33,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])"
+ ]
+ },
+ "execution_count": 33,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "array6=np.arange(10)\n",
+ "array6"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 34,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "3"
+ ]
+ },
+ "execution_count": 34,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "array6[3]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 35,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "4"
+ ]
+ },
+ "execution_count": 35,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "array6[4]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 36,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "9"
+ ]
+ },
+ "execution_count": 36,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "array6[9]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 41,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "0\n",
+ "2\n",
+ "4\n",
+ "6\n",
+ "8\n"
+ ]
+ }
+ ],
+ "source": [
+ "for i in array6:\n",
+ " if i%2==0:\n",
+ " print(i)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 47,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "for i in range(7,10):\n",
+ " array6[i]=100"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 48,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "array([ 0, 1, 2, 3, 4, 5, 6, 100, 100, 100])"
+ ]
+ },
+ "execution_count": 48,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "array6"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 58,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "array7=np.arange(25)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 62,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "array7=array7.reshape(5,5)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 63,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "(5, 5)"
+ ]
+ },
+ "execution_count": 63,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "array7.shape"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 64,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "array([[ 6, 7, 8],\n",
+ " [11, 12, 13],\n",
+ " [16, 17, 18]])"
+ ]
+ },
+ "execution_count": 64,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "array7[1:4,1:4]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.8.2"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
From 75a0a755e66fe25aa40a7b773b3c26be7f494cde Mon Sep 17 00:00:00 2001
From: Shrenik-Bhalgat <59737962+Shrenik-Bhalgat@users.noreply.github.com>
Date: Sun, 6 Sep 2020 13:05:53 +0530
Subject: [PATCH 2/5] Shreniik Bhalgat Asssignment 2
---
Assignment2 Shrenik Bhalgat.ipynb | 1270 +++++++++++++++++++++++++++++
1 file changed, 1270 insertions(+)
create mode 100644 Assignment2 Shrenik Bhalgat.ipynb
diff --git a/Assignment2 Shrenik Bhalgat.ipynb b/Assignment2 Shrenik Bhalgat.ipynb
new file mode 100644
index 0000000..20d821b
--- /dev/null
+++ b/Assignment2 Shrenik Bhalgat.ipynb
@@ -0,0 +1,1270 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import numpy as np\n",
+ "import pandas as pd"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Id | \n",
+ " EmployeeName | \n",
+ " JobTitle | \n",
+ " BasePay | \n",
+ " OvertimePay | \n",
+ " OtherPay | \n",
+ " Benefits | \n",
+ " TotalPay | \n",
+ " TotalPayBenefits | \n",
+ " Year | \n",
+ " Notes | \n",
+ " Agency | \n",
+ " Status | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 1 | \n",
+ " NATHANIEL FORD | \n",
+ " GENERAL MANAGER-METROPOLITAN TRANSIT AUTHORITY | \n",
+ " 167411.18 | \n",
+ " 0.00 | \n",
+ " 400184.25 | \n",
+ " NaN | \n",
+ " 567595.43 | \n",
+ " 567595.43 | \n",
+ " 2011 | \n",
+ " NaN | \n",
+ " San Francisco | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 2 | \n",
+ " GARY JIMENEZ | \n",
+ " CAPTAIN III (POLICE DEPARTMENT) | \n",
+ " 155966.02 | \n",
+ " 245131.88 | \n",
+ " 137811.38 | \n",
+ " NaN | \n",
+ " 538909.28 | \n",
+ " 538909.28 | \n",
+ " 2011 | \n",
+ " NaN | \n",
+ " San Francisco | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 3 | \n",
+ " ALBERT PARDINI | \n",
+ " CAPTAIN III (POLICE DEPARTMENT) | \n",
+ " 212739.13 | \n",
+ " 106088.18 | \n",
+ " 16452.60 | \n",
+ " NaN | \n",
+ " 335279.91 | \n",
+ " 335279.91 | \n",
+ " 2011 | \n",
+ " NaN | \n",
+ " San Francisco | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 4 | \n",
+ " CHRISTOPHER CHONG | \n",
+ " WIRE ROPE CABLE MAINTENANCE MECHANIC | \n",
+ " 77916.00 | \n",
+ " 56120.71 | \n",
+ " 198306.90 | \n",
+ " NaN | \n",
+ " 332343.61 | \n",
+ " 332343.61 | \n",
+ " 2011 | \n",
+ " NaN | \n",
+ " San Francisco | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 5 | \n",
+ " PATRICK GARDNER | \n",
+ " DEPUTY CHIEF OF DEPARTMENT,(FIRE DEPARTMENT) | \n",
+ " 134401.60 | \n",
+ " 9737.00 | \n",
+ " 182234.59 | \n",
+ " NaN | \n",
+ " 326373.19 | \n",
+ " 326373.19 | \n",
+ " 2011 | \n",
+ " NaN | \n",
+ " San Francisco | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " | 148649 | \n",
+ " 148650 | \n",
+ " Roy I Tillery | \n",
+ " Custodian | \n",
+ " 0.00 | \n",
+ " 0.00 | \n",
+ " 0.00 | \n",
+ " 0.0 | \n",
+ " 0.00 | \n",
+ " 0.00 | \n",
+ " 2014 | \n",
+ " NaN | \n",
+ " San Francisco | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 148650 | \n",
+ " 148651 | \n",
+ " Not provided | \n",
+ " Not provided | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0.00 | \n",
+ " 0.00 | \n",
+ " 2014 | \n",
+ " NaN | \n",
+ " San Francisco | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 148651 | \n",
+ " 148652 | \n",
+ " Not provided | \n",
+ " Not provided | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0.00 | \n",
+ " 0.00 | \n",
+ " 2014 | \n",
+ " NaN | \n",
+ " San Francisco | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 148652 | \n",
+ " 148653 | \n",
+ " Not provided | \n",
+ " Not provided | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0.00 | \n",
+ " 0.00 | \n",
+ " 2014 | \n",
+ " NaN | \n",
+ " San Francisco | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 148653 | \n",
+ " 148654 | \n",
+ " Joe Lopez | \n",
+ " Counselor, Log Cabin Ranch | \n",
+ " 0.00 | \n",
+ " 0.00 | \n",
+ " -618.13 | \n",
+ " 0.0 | \n",
+ " -618.13 | \n",
+ " -618.13 | \n",
+ " 2014 | \n",
+ " NaN | \n",
+ " San Francisco | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
148654 rows × 13 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Id EmployeeName \\\n",
+ "0 1 NATHANIEL FORD \n",
+ "1 2 GARY JIMENEZ \n",
+ "2 3 ALBERT PARDINI \n",
+ "3 4 CHRISTOPHER CHONG \n",
+ "4 5 PATRICK GARDNER \n",
+ "... ... ... \n",
+ "148649 148650 Roy I Tillery \n",
+ "148650 148651 Not provided \n",
+ "148651 148652 Not provided \n",
+ "148652 148653 Not provided \n",
+ "148653 148654 Joe Lopez \n",
+ "\n",
+ " JobTitle BasePay \\\n",
+ "0 GENERAL MANAGER-METROPOLITAN TRANSIT AUTHORITY 167411.18 \n",
+ "1 CAPTAIN III (POLICE DEPARTMENT) 155966.02 \n",
+ "2 CAPTAIN III (POLICE DEPARTMENT) 212739.13 \n",
+ "3 WIRE ROPE CABLE MAINTENANCE MECHANIC 77916.00 \n",
+ "4 DEPUTY CHIEF OF DEPARTMENT,(FIRE DEPARTMENT) 134401.60 \n",
+ "... ... ... \n",
+ "148649 Custodian 0.00 \n",
+ "148650 Not provided NaN \n",
+ "148651 Not provided NaN \n",
+ "148652 Not provided NaN \n",
+ "148653 Counselor, Log Cabin Ranch 0.00 \n",
+ "\n",
+ " OvertimePay OtherPay Benefits TotalPay TotalPayBenefits Year \\\n",
+ "0 0.00 400184.25 NaN 567595.43 567595.43 2011 \n",
+ "1 245131.88 137811.38 NaN 538909.28 538909.28 2011 \n",
+ "2 106088.18 16452.60 NaN 335279.91 335279.91 2011 \n",
+ "3 56120.71 198306.90 NaN 332343.61 332343.61 2011 \n",
+ "4 9737.00 182234.59 NaN 326373.19 326373.19 2011 \n",
+ "... ... ... ... ... ... ... \n",
+ "148649 0.00 0.00 0.0 0.00 0.00 2014 \n",
+ "148650 NaN NaN NaN 0.00 0.00 2014 \n",
+ "148651 NaN NaN NaN 0.00 0.00 2014 \n",
+ "148652 NaN NaN NaN 0.00 0.00 2014 \n",
+ "148653 0.00 -618.13 0.0 -618.13 -618.13 2014 \n",
+ "\n",
+ " Notes Agency Status \n",
+ "0 NaN San Francisco NaN \n",
+ "1 NaN San Francisco NaN \n",
+ "2 NaN San Francisco NaN \n",
+ "3 NaN San Francisco NaN \n",
+ "4 NaN San Francisco NaN \n",
+ "... ... ... ... \n",
+ "148649 NaN San Francisco NaN \n",
+ "148650 NaN San Francisco NaN \n",
+ "148651 NaN San Francisco NaN \n",
+ "148652 NaN San Francisco NaN \n",
+ "148653 NaN San Francisco NaN \n",
+ "\n",
+ "[148654 rows x 13 columns]"
+ ]
+ },
+ "execution_count": 3,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df=pd.read_csv('Salaries.csv')\n",
+ "df"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "Index(['Id', 'EmployeeName', 'JobTitle', 'BasePay', 'OvertimePay', 'OtherPay',\n",
+ " 'Benefits', 'TotalPay', 'TotalPayBenefits', 'Year', 'Notes', 'Agency',\n",
+ " 'Status'],\n",
+ " dtype='object')"
+ ]
+ },
+ "execution_count": 4,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "#Display colum names\n",
+ "df.columns"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "(148654, 13)"
+ ]
+ },
+ "execution_count": 5,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "#Display the number of rows and columns\n",
+ "df.shape"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "RangeIndex: 148654 entries, 0 to 148653\n",
+ "Data columns (total 13 columns):\n",
+ " # Column Non-Null Count Dtype \n",
+ "--- ------ -------------- ----- \n",
+ " 0 Id 148654 non-null int64 \n",
+ " 1 EmployeeName 148654 non-null object \n",
+ " 2 JobTitle 148654 non-null object \n",
+ " 3 BasePay 148045 non-null float64\n",
+ " 4 OvertimePay 148650 non-null float64\n",
+ " 5 OtherPay 148650 non-null float64\n",
+ " 6 Benefits 112491 non-null float64\n",
+ " 7 TotalPay 148654 non-null float64\n",
+ " 8 TotalPayBenefits 148654 non-null float64\n",
+ " 9 Year 148654 non-null int64 \n",
+ " 10 Notes 0 non-null float64\n",
+ " 11 Agency 148654 non-null object \n",
+ " 12 Status 0 non-null float64\n",
+ "dtypes: float64(8), int64(2), object(3)\n",
+ "memory usage: 14.7+ MB\n"
+ ]
+ }
+ ],
+ "source": [
+ "#Display the dataframes info\n",
+ "df.info()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Id | \n",
+ " BasePay | \n",
+ " OvertimePay | \n",
+ " OtherPay | \n",
+ " Benefits | \n",
+ " TotalPay | \n",
+ " TotalPayBenefits | \n",
+ " Year | \n",
+ " Notes | \n",
+ " Status | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | count | \n",
+ " 148654.000000 | \n",
+ " 148045.000000 | \n",
+ " 148650.000000 | \n",
+ " 148650.000000 | \n",
+ " 112491.000000 | \n",
+ " 148654.000000 | \n",
+ " 148654.000000 | \n",
+ " 148654.000000 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ " | mean | \n",
+ " 74327.500000 | \n",
+ " 66325.448840 | \n",
+ " 5066.059886 | \n",
+ " 3648.767297 | \n",
+ " 25007.893151 | \n",
+ " 74768.321972 | \n",
+ " 93692.554811 | \n",
+ " 2012.522643 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | std | \n",
+ " 42912.857795 | \n",
+ " 42764.635495 | \n",
+ " 11454.380559 | \n",
+ " 8056.601866 | \n",
+ " 15402.215858 | \n",
+ " 50517.005274 | \n",
+ " 62793.533483 | \n",
+ " 1.117538 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | min | \n",
+ " 1.000000 | \n",
+ " -166.010000 | \n",
+ " -0.010000 | \n",
+ " -7058.590000 | \n",
+ " -33.890000 | \n",
+ " -618.130000 | \n",
+ " -618.130000 | \n",
+ " 2011.000000 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 25% | \n",
+ " 37164.250000 | \n",
+ " 33588.200000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 11535.395000 | \n",
+ " 36168.995000 | \n",
+ " 44065.650000 | \n",
+ " 2012.000000 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 50% | \n",
+ " 74327.500000 | \n",
+ " 65007.450000 | \n",
+ " 0.000000 | \n",
+ " 811.270000 | \n",
+ " 28628.620000 | \n",
+ " 71426.610000 | \n",
+ " 92404.090000 | \n",
+ " 2013.000000 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 75% | \n",
+ " 111490.750000 | \n",
+ " 94691.050000 | \n",
+ " 4658.175000 | \n",
+ " 4236.065000 | \n",
+ " 35566.855000 | \n",
+ " 105839.135000 | \n",
+ " 132876.450000 | \n",
+ " 2014.000000 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | max | \n",
+ " 148654.000000 | \n",
+ " 319275.010000 | \n",
+ " 245131.880000 | \n",
+ " 400184.250000 | \n",
+ " 96570.660000 | \n",
+ " 567595.430000 | \n",
+ " 567595.430000 | \n",
+ " 2014.000000 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Id BasePay OvertimePay OtherPay \\\n",
+ "count 148654.000000 148045.000000 148650.000000 148650.000000 \n",
+ "mean 74327.500000 66325.448840 5066.059886 3648.767297 \n",
+ "std 42912.857795 42764.635495 11454.380559 8056.601866 \n",
+ "min 1.000000 -166.010000 -0.010000 -7058.590000 \n",
+ "25% 37164.250000 33588.200000 0.000000 0.000000 \n",
+ "50% 74327.500000 65007.450000 0.000000 811.270000 \n",
+ "75% 111490.750000 94691.050000 4658.175000 4236.065000 \n",
+ "max 148654.000000 319275.010000 245131.880000 400184.250000 \n",
+ "\n",
+ " Benefits TotalPay TotalPayBenefits Year Notes \\\n",
+ "count 112491.000000 148654.000000 148654.000000 148654.000000 0.0 \n",
+ "mean 25007.893151 74768.321972 93692.554811 2012.522643 NaN \n",
+ "std 15402.215858 50517.005274 62793.533483 1.117538 NaN \n",
+ "min -33.890000 -618.130000 -618.130000 2011.000000 NaN \n",
+ "25% 11535.395000 36168.995000 44065.650000 2012.000000 NaN \n",
+ "50% 28628.620000 71426.610000 92404.090000 2013.000000 NaN \n",
+ "75% 35566.855000 105839.135000 132876.450000 2014.000000 NaN \n",
+ "max 96570.660000 567595.430000 567595.430000 2014.000000 NaN \n",
+ "\n",
+ " Status \n",
+ "count 0.0 \n",
+ "mean NaN \n",
+ "std NaN \n",
+ "min NaN \n",
+ "25% NaN \n",
+ "50% NaN \n",
+ "75% NaN \n",
+ "max NaN "
+ ]
+ },
+ "execution_count": 7,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "#Display stats of the dataframe\n",
+ "df.describe()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "Id 0\n",
+ "EmployeeName 0\n",
+ "JobTitle 0\n",
+ "BasePay 609\n",
+ "OvertimePay 4\n",
+ "OtherPay 4\n",
+ "Benefits 36163\n",
+ "TotalPay 0\n",
+ "TotalPayBenefits 0\n",
+ "Year 0\n",
+ "Notes 148654\n",
+ "Agency 0\n",
+ "Status 148654\n",
+ "dtype: int64"
+ ]
+ },
+ "execution_count": 8,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "#display null values per column\n",
+ "df.isnull().sum(axis=0)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Id | \n",
+ " EmployeeName | \n",
+ " JobTitle | \n",
+ " BasePay | \n",
+ " OvertimePay | \n",
+ " OtherPay | \n",
+ " Benefits | \n",
+ " TotalPay | \n",
+ " TotalPayBenefits | \n",
+ " Year | \n",
+ " Agency | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 1 | \n",
+ " NATHANIEL FORD | \n",
+ " GENERAL MANAGER-METROPOLITAN TRANSIT AUTHORITY | \n",
+ " 167411.18 | \n",
+ " 0.00 | \n",
+ " 400184.25 | \n",
+ " NaN | \n",
+ " 567595.43 | \n",
+ " 567595.43 | \n",
+ " 2011 | \n",
+ " San Francisco | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 2 | \n",
+ " GARY JIMENEZ | \n",
+ " CAPTAIN III (POLICE DEPARTMENT) | \n",
+ " 155966.02 | \n",
+ " 245131.88 | \n",
+ " 137811.38 | \n",
+ " NaN | \n",
+ " 538909.28 | \n",
+ " 538909.28 | \n",
+ " 2011 | \n",
+ " San Francisco | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 3 | \n",
+ " ALBERT PARDINI | \n",
+ " CAPTAIN III (POLICE DEPARTMENT) | \n",
+ " 212739.13 | \n",
+ " 106088.18 | \n",
+ " 16452.60 | \n",
+ " NaN | \n",
+ " 335279.91 | \n",
+ " 335279.91 | \n",
+ " 2011 | \n",
+ " San Francisco | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 4 | \n",
+ " CHRISTOPHER CHONG | \n",
+ " WIRE ROPE CABLE MAINTENANCE MECHANIC | \n",
+ " 77916.00 | \n",
+ " 56120.71 | \n",
+ " 198306.90 | \n",
+ " NaN | \n",
+ " 332343.61 | \n",
+ " 332343.61 | \n",
+ " 2011 | \n",
+ " San Francisco | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 5 | \n",
+ " PATRICK GARDNER | \n",
+ " DEPUTY CHIEF OF DEPARTMENT,(FIRE DEPARTMENT) | \n",
+ " 134401.60 | \n",
+ " 9737.00 | \n",
+ " 182234.59 | \n",
+ " NaN | \n",
+ " 326373.19 | \n",
+ " 326373.19 | \n",
+ " 2011 | \n",
+ " San Francisco | \n",
+ "
\n",
+ " \n",
+ " | ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " | 148649 | \n",
+ " 148650 | \n",
+ " Roy I Tillery | \n",
+ " Custodian | \n",
+ " 0.00 | \n",
+ " 0.00 | \n",
+ " 0.00 | \n",
+ " 0.0 | \n",
+ " 0.00 | \n",
+ " 0.00 | \n",
+ " 2014 | \n",
+ " San Francisco | \n",
+ "
\n",
+ " \n",
+ " | 148650 | \n",
+ " 148651 | \n",
+ " Not provided | \n",
+ " Not provided | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0.00 | \n",
+ " 0.00 | \n",
+ " 2014 | \n",
+ " San Francisco | \n",
+ "
\n",
+ " \n",
+ " | 148651 | \n",
+ " 148652 | \n",
+ " Not provided | \n",
+ " Not provided | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0.00 | \n",
+ " 0.00 | \n",
+ " 2014 | \n",
+ " San Francisco | \n",
+ "
\n",
+ " \n",
+ " | 148652 | \n",
+ " 148653 | \n",
+ " Not provided | \n",
+ " Not provided | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0.00 | \n",
+ " 0.00 | \n",
+ " 2014 | \n",
+ " San Francisco | \n",
+ "
\n",
+ " \n",
+ " | 148653 | \n",
+ " 148654 | \n",
+ " Joe Lopez | \n",
+ " Counselor, Log Cabin Ranch | \n",
+ " 0.00 | \n",
+ " 0.00 | \n",
+ " -618.13 | \n",
+ " 0.0 | \n",
+ " -618.13 | \n",
+ " -618.13 | \n",
+ " 2014 | \n",
+ " San Francisco | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
148654 rows × 11 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Id EmployeeName \\\n",
+ "0 1 NATHANIEL FORD \n",
+ "1 2 GARY JIMENEZ \n",
+ "2 3 ALBERT PARDINI \n",
+ "3 4 CHRISTOPHER CHONG \n",
+ "4 5 PATRICK GARDNER \n",
+ "... ... ... \n",
+ "148649 148650 Roy I Tillery \n",
+ "148650 148651 Not provided \n",
+ "148651 148652 Not provided \n",
+ "148652 148653 Not provided \n",
+ "148653 148654 Joe Lopez \n",
+ "\n",
+ " JobTitle BasePay \\\n",
+ "0 GENERAL MANAGER-METROPOLITAN TRANSIT AUTHORITY 167411.18 \n",
+ "1 CAPTAIN III (POLICE DEPARTMENT) 155966.02 \n",
+ "2 CAPTAIN III (POLICE DEPARTMENT) 212739.13 \n",
+ "3 WIRE ROPE CABLE MAINTENANCE MECHANIC 77916.00 \n",
+ "4 DEPUTY CHIEF OF DEPARTMENT,(FIRE DEPARTMENT) 134401.60 \n",
+ "... ... ... \n",
+ "148649 Custodian 0.00 \n",
+ "148650 Not provided NaN \n",
+ "148651 Not provided NaN \n",
+ "148652 Not provided NaN \n",
+ "148653 Counselor, Log Cabin Ranch 0.00 \n",
+ "\n",
+ " OvertimePay OtherPay Benefits TotalPay TotalPayBenefits Year \\\n",
+ "0 0.00 400184.25 NaN 567595.43 567595.43 2011 \n",
+ "1 245131.88 137811.38 NaN 538909.28 538909.28 2011 \n",
+ "2 106088.18 16452.60 NaN 335279.91 335279.91 2011 \n",
+ "3 56120.71 198306.90 NaN 332343.61 332343.61 2011 \n",
+ "4 9737.00 182234.59 NaN 326373.19 326373.19 2011 \n",
+ "... ... ... ... ... ... ... \n",
+ "148649 0.00 0.00 0.0 0.00 0.00 2014 \n",
+ "148650 NaN NaN NaN 0.00 0.00 2014 \n",
+ "148651 NaN NaN NaN 0.00 0.00 2014 \n",
+ "148652 NaN NaN NaN 0.00 0.00 2014 \n",
+ "148653 0.00 -618.13 0.0 -618.13 -618.13 2014 \n",
+ "\n",
+ " Agency \n",
+ "0 San Francisco \n",
+ "1 San Francisco \n",
+ "2 San Francisco \n",
+ "3 San Francisco \n",
+ "4 San Francisco \n",
+ "... ... \n",
+ "148649 San Francisco \n",
+ "148650 San Francisco \n",
+ "148651 San Francisco \n",
+ "148652 San Francisco \n",
+ "148653 San Francisco \n",
+ "\n",
+ "[148654 rows x 11 columns]"
+ ]
+ },
+ "execution_count": 9,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "#remove columns with all values as nan\n",
+ "df.dropna(axis=1,how='all',inplace=True)\n",
+ "df"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "Id 148654\n",
+ "EmployeeName 110811\n",
+ "JobTitle 2159\n",
+ "BasePay 109489\n",
+ "OvertimePay 65998\n",
+ "OtherPay 83225\n",
+ "Benefits 98465\n",
+ "TotalPay 138486\n",
+ "TotalPayBenefits 142098\n",
+ "Year 4\n",
+ "Agency 1\n",
+ "dtype: int64"
+ ]
+ },
+ "execution_count": 10,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "#display number of unique values in each column\n",
+ "df.nunique()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "Id 148654\n",
+ "EmployeeName 110811\n",
+ "JobTitle 2159\n",
+ "BasePay 109489\n",
+ "OvertimePay 65998\n",
+ "OtherPay 83225\n",
+ "Benefits 98465\n",
+ "TotalPay 138486\n",
+ "TotalPayBenefits 142098\n",
+ "Year 4\n",
+ "Agency 1\n",
+ "dtype: int64"
+ ]
+ },
+ "execution_count": 11,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "#display number of unique values in each column\n",
+ "df.apply(lambda x: x.nunique(), axis=0)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "Id 148654\n",
+ "EmployeeName 110811\n",
+ "JobTitle 2159\n",
+ "BasePay 109489\n",
+ "OvertimePay 65998\n",
+ "OtherPay 83225\n",
+ "Benefits 98465\n",
+ "TotalPay 138486\n",
+ "TotalPayBenefits 142098\n",
+ "Year 4\n",
+ "Agency 1\n",
+ "dtype: int64"
+ ]
+ },
+ "execution_count": 12,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "#display number of unique values in each column\n",
+ "df.apply(pd.Series.nunique)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 13,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Year | \n",
+ " TotalPay | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 2011 | \n",
+ " 71744.103871 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 2012 | \n",
+ " 74113.262265 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 2013 | \n",
+ " 77611.443142 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 2014 | \n",
+ " 75463.918140 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Year TotalPay\n",
+ "0 2011 71744.103871\n",
+ "1 2012 74113.262265\n",
+ "2 2013 77611.443142\n",
+ "3 2014 75463.918140"
+ ]
+ },
+ "execution_count": 13,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "#mean of total pay based on year\n",
+ "df.groupby('Year', as_index=False)['TotalPay'].mean()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 14,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "77321"
+ ]
+ },
+ "execution_count": 14,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "#how many people have 0 overtime pay\n",
+ "(df['OvertimePay']==0).sum()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 15,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "count 0 71333.000000\n",
+ " 1 77321.000000\n",
+ "mean 0 90527.759214\n",
+ " 1 60229.348901\n",
+ "std 0 46961.054390\n",
+ " 1 49307.912350\n",
+ "min 0 0.000000\n",
+ " 1 -618.130000\n",
+ "25% 0 59104.230000\n",
+ " 1 13290.450000\n",
+ "50% 0 82984.830000\n",
+ " 1 58158.590000\n",
+ "75% 0 121560.910000\n",
+ " 1 91115.090000\n",
+ "max 0 538909.280000\n",
+ " 1 567595.430000\n",
+ "dtype: float64"
+ ]
+ },
+ "execution_count": 15,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "#stats of total pay of people having 0 overtime pay\n",
+ "df.groupby(df['OvertimePay']==0, as_index=False)['TotalPay'].describe()\n",
+ "# 0 represents people with non 0 overtime pay\n",
+ "# 1 represents people with 0 overtime pay"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 16,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "count 77321.000000\n",
+ "mean 60229.348901\n",
+ "std 49307.912350\n",
+ "min -618.130000\n",
+ "25% 13290.450000\n",
+ "50% 58158.590000\n",
+ "75% 91115.090000\n",
+ "max 567595.430000\n",
+ "Name: TotalPay, dtype: float64"
+ ]
+ },
+ "execution_count": 16,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "#stats of total pay of people having 0 overtime pay\n",
+ "df[df.OvertimePay==0].describe()['TotalPay']"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 19,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "0 1\n",
+ "Name: Id, dtype: int64"
+ ]
+ },
+ "execution_count": 19,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "#find Id of that person with max TotalPay you got in previous question\n",
+ "df[df.TotalPay==df[df.OvertimePay==0].max()['TotalPay']]['Id']\n",
+ "#we can also use .Id instead of ['Id']"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 18,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "12345 REBECCA CHIU\n",
+ "Name: EmployeeName, dtype: object"
+ ]
+ },
+ "execution_count": 18,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "#name of employee with total pay benefits = 87619.78\n",
+ "df[df.TotalPayBenefits==87619.78]['EmployeeName'] # or .EmployeeName "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 26,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "132"
+ ]
+ },
+ "execution_count": 26,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "#how many people have BasePay > 150000 and OvertimePay > 100000\n",
+ "df[(df['BasePay']> 150000) & (df['OvertimePay']> 100000)].size"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 42,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "JobTitle ZOO CURATOR\n",
+ "TotalPayBenefits 436224\n",
+ "dtype: object"
+ ]
+ },
+ "execution_count": 42,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# which job title generally has highest average TotalPayBenefits\n",
+ "df.groupby('JobTitle', as_index=False)['TotalPayBenefits'].mean().max()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 43,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "2512"
+ ]
+ },
+ "execution_count": 43,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "#How many employees are POLICE\n",
+ "df[df['JobTitle'].str.contains('POLICE')] ['JobTitle'].size"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.8.2"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
From e446bf76dbdf877e11cb4da6080db3ed809c8acf Mon Sep 17 00:00:00 2001
From: Shrenik-Bhalgat <59737962+Shrenik-Bhalgat@users.noreply.github.com>
Date: Sun, 6 Sep 2020 13:22:07 +0530
Subject: [PATCH 3/5] Add files via upload
From c99e7368e00abad4bdd8d2b78f4fa0febccb0887 Mon Sep 17 00:00:00 2001
From: Shrenik-Bhalgat <59737962+Shrenik-Bhalgat@users.noreply.github.com>
Date: Sun, 6 Sep 2020 13:27:58 +0530
Subject: [PATCH 4/5] Add files via upload
From 4a5e0a463f401a59d3c64d1e8e3dffa5eb321e7e Mon Sep 17 00:00:00 2001
From: Shrenik-Bhalgat <59737962+Shrenik-Bhalgat@users.noreply.github.com>
Date: Sun, 6 Sep 2020 13:40:56 +0530
Subject: [PATCH 5/5] Add files via upload