diff --git a/Assignment1 Nikita_Jain.ipynb b/Assignment1 Nikita_Jain.ipynb
new file mode 100644
index 0000000..e76693d
--- /dev/null
+++ b/Assignment1 Nikita_Jain.ipynb
@@ -0,0 +1,378 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Assignment"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Import numpy as np"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import numpy as np"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Make a python list => \\[1,2,3,4,5\\]\n",
+ "\n",
+ "Convert it into numpy array and print it"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "array([1, 2, 3, 4, 5])"
+ ]
+ },
+ "execution_count": 3,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "l=[1,2,3,4,5]\n",
+ "np.array(l)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Make a python matrix (3 x 3) => \\[[1,2,3],[4,5,6],[7,8,9]\\]\n",
+ "\n",
+ "Convert it into numpy array and print it"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "array([[1, 2, 3],\n",
+ " [4, 5, 6],\n",
+ " [7, 8, 9]])"
+ ]
+ },
+ "execution_count": 4,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "mat= [[1,2,3],[4,5,6],[7,8,9]]\n",
+ "np.array(mat)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Make a matrix (3 x 3) using built-in methods (like arange(), reshape() etc.):\n",
+ "\n",
+ "\\[ [1,3,5],\n",
+ "\n",
+ " [7,9,11],\n",
+ " \n",
+ " [13,15,17] \\]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "array([[ 1, 3, 5],\n",
+ " [ 7, 9, 11],\n",
+ " [13, 15, 17]])"
+ ]
+ },
+ "execution_count": 5,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "np.arange(1,18,2).reshape(3,3)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Create a numpy array with 10 random numbers from 0 to 10 (there should be few numbers greater than 1)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "array([0, 0, 7, 2, 8, 2, 5, 2, 3, 2])"
+ ]
+ },
+ "execution_count": 9,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "np.random.randint(0,10,10)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Create numpy array => \\[1,2,3,4,5\\] and convert it to 2D array with 5 rows"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "array([[1],\n",
+ " [2],\n",
+ " [3],\n",
+ " [4],\n",
+ " [5]])"
+ ]
+ },
+ "execution_count": 10,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "l=np.array([1,2,3,4,5])\n",
+ "l.reshape(5,1)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Print the shape of the above created array"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "(5,)"
+ ]
+ },
+ "execution_count": 11,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "l.shape"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Create a numpy array with 10 elements in it. Access and print its 3rd, 4th and 9th element."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "3\n",
+ "4\n",
+ "9\n"
+ ]
+ }
+ ],
+ "source": [
+ "list1=np.array([1,2,3,4,5,6,7,8,9,10])\n",
+ "print(list1[2])\n",
+ "print(list1[3])\n",
+ "print(list1[-2])"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Print alternate elements of that array"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 13,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "array([1, 3, 5, 7, 9])"
+ ]
+ },
+ "execution_count": 13,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "list1[0:len(list1):2]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Change last 3 elements into 100 using broadcasting and print"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 16,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "array([ 1, 2, 3, 4, 5, 6, 7, 100, 100, 100])"
+ ]
+ },
+ "execution_count": 16,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "list1[7:10]=100\n",
+ "list1"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Create a 5 x 5 matrix (fill it with any element you like), print it.\n",
+ "\n",
+ "Then print the middle (3 x 3) matrix."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 21,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "array([[31, 45, 1, 47, 40],\n",
+ " [31, 18, 44, 16, 34],\n",
+ " [31, 20, 43, 7, 7],\n",
+ " [22, 10, 1, 4, 10],\n",
+ " [29, 21, 48, 12, 39]])"
+ ]
+ },
+ "execution_count": 21,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "mat=np.random.randint(0,50,25).reshape(5,5)\n",
+ "mat"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 22,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "array([[18, 44, 16],\n",
+ " [20, 43, 7],\n",
+ " [10, 1, 4]])"
+ ]
+ },
+ "execution_count": 22,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "mat[1:4,1:4]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.7.3"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/Assignment2 Nikita Jain.ipynb b/Assignment2 Nikita Jain.ipynb
new file mode 100644
index 0000000..04e3797
--- /dev/null
+++ b/Assignment2 Nikita Jain.ipynb
@@ -0,0 +1,1030 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import numpy as np\n",
+ "import pandas as pd\n",
+ "#%matplotlib notebook\n",
+ "%matplotlib inline"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "import the dataset into a dataframe"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Id | \n",
+ " EmployeeName | \n",
+ " JobTitle | \n",
+ " BasePay | \n",
+ " OvertimePay | \n",
+ " OtherPay | \n",
+ " Benefits | \n",
+ " TotalPay | \n",
+ " TotalPayBenefits | \n",
+ " Year | \n",
+ " Notes | \n",
+ " Agency | \n",
+ " Status | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 1 | \n",
+ " NATHANIEL FORD | \n",
+ " GENERAL MANAGER-METROPOLITAN TRANSIT AUTHORITY | \n",
+ " 167411.18 | \n",
+ " 0.00 | \n",
+ " 400184.25 | \n",
+ " NaN | \n",
+ " 567595.43 | \n",
+ " 567595.43 | \n",
+ " 2011 | \n",
+ " NaN | \n",
+ " San Francisco | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 2 | \n",
+ " GARY JIMENEZ | \n",
+ " CAPTAIN III (POLICE DEPARTMENT) | \n",
+ " 155966.02 | \n",
+ " 245131.88 | \n",
+ " 137811.38 | \n",
+ " NaN | \n",
+ " 538909.28 | \n",
+ " 538909.28 | \n",
+ " 2011 | \n",
+ " NaN | \n",
+ " San Francisco | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 3 | \n",
+ " ALBERT PARDINI | \n",
+ " CAPTAIN III (POLICE DEPARTMENT) | \n",
+ " 212739.13 | \n",
+ " 106088.18 | \n",
+ " 16452.60 | \n",
+ " NaN | \n",
+ " 335279.91 | \n",
+ " 335279.91 | \n",
+ " 2011 | \n",
+ " NaN | \n",
+ " San Francisco | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 4 | \n",
+ " CHRISTOPHER CHONG | \n",
+ " WIRE ROPE CABLE MAINTENANCE MECHANIC | \n",
+ " 77916.00 | \n",
+ " 56120.71 | \n",
+ " 198306.90 | \n",
+ " NaN | \n",
+ " 332343.61 | \n",
+ " 332343.61 | \n",
+ " 2011 | \n",
+ " NaN | \n",
+ " San Francisco | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 5 | \n",
+ " PATRICK GARDNER | \n",
+ " DEPUTY CHIEF OF DEPARTMENT,(FIRE DEPARTMENT) | \n",
+ " 134401.60 | \n",
+ " 9737.00 | \n",
+ " 182234.59 | \n",
+ " NaN | \n",
+ " 326373.19 | \n",
+ " 326373.19 | \n",
+ " 2011 | \n",
+ " NaN | \n",
+ " San Francisco | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Id EmployeeName JobTitle \\\n",
+ "0 1 NATHANIEL FORD GENERAL MANAGER-METROPOLITAN TRANSIT AUTHORITY \n",
+ "1 2 GARY JIMENEZ CAPTAIN III (POLICE DEPARTMENT) \n",
+ "2 3 ALBERT PARDINI CAPTAIN III (POLICE DEPARTMENT) \n",
+ "3 4 CHRISTOPHER CHONG WIRE ROPE CABLE MAINTENANCE MECHANIC \n",
+ "4 5 PATRICK GARDNER DEPUTY CHIEF OF DEPARTMENT,(FIRE DEPARTMENT) \n",
+ "\n",
+ " BasePay OvertimePay OtherPay Benefits TotalPay TotalPayBenefits \\\n",
+ "0 167411.18 0.00 400184.25 NaN 567595.43 567595.43 \n",
+ "1 155966.02 245131.88 137811.38 NaN 538909.28 538909.28 \n",
+ "2 212739.13 106088.18 16452.60 NaN 335279.91 335279.91 \n",
+ "3 77916.00 56120.71 198306.90 NaN 332343.61 332343.61 \n",
+ "4 134401.60 9737.00 182234.59 NaN 326373.19 326373.19 \n",
+ "\n",
+ " Year Notes Agency Status \n",
+ "0 2011 NaN San Francisco NaN \n",
+ "1 2011 NaN San Francisco NaN \n",
+ "2 2011 NaN San Francisco NaN \n",
+ "3 2011 NaN San Francisco NaN \n",
+ "4 2011 NaN San Francisco NaN "
+ ]
+ },
+ "execution_count": 2,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df=pd.read_csv(\"Salaries.csv\")\n",
+ "df.head()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "display the column names"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "Index(['Id', 'EmployeeName', 'JobTitle', 'BasePay', 'OvertimePay', 'OtherPay',\n",
+ " 'Benefits', 'TotalPay', 'TotalPayBenefits', 'Year', 'Notes', 'Agency',\n",
+ " 'Status'],\n",
+ " dtype='object')"
+ ]
+ },
+ "execution_count": 3,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df.columns"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "display the number of rows and cols"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "(148654, 13)"
+ ]
+ },
+ "execution_count": 4,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df.shape"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "display the dataframe info (types of data in columns and not null values etc.)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "RangeIndex: 148654 entries, 0 to 148653\n",
+ "Data columns (total 13 columns):\n",
+ "Id 148654 non-null int64\n",
+ "EmployeeName 148654 non-null object\n",
+ "JobTitle 148654 non-null object\n",
+ "BasePay 148045 non-null float64\n",
+ "OvertimePay 148650 non-null float64\n",
+ "OtherPay 148650 non-null float64\n",
+ "Benefits 112491 non-null float64\n",
+ "TotalPay 148654 non-null float64\n",
+ "TotalPayBenefits 148654 non-null float64\n",
+ "Year 148654 non-null int64\n",
+ "Notes 0 non-null float64\n",
+ "Agency 148654 non-null object\n",
+ "Status 0 non-null float64\n",
+ "dtypes: float64(8), int64(2), object(3)\n",
+ "memory usage: 14.7+ MB\n"
+ ]
+ }
+ ],
+ "source": [
+ "df.info()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "display stats of the dataframe like count, mean, std, max, 25% etc....."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Id | \n",
+ " BasePay | \n",
+ " OvertimePay | \n",
+ " OtherPay | \n",
+ " Benefits | \n",
+ " TotalPay | \n",
+ " TotalPayBenefits | \n",
+ " Year | \n",
+ " Notes | \n",
+ " Status | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | count | \n",
+ " 148654.000000 | \n",
+ " 148045.000000 | \n",
+ " 148650.000000 | \n",
+ " 148650.000000 | \n",
+ " 112491.000000 | \n",
+ " 148654.000000 | \n",
+ " 148654.000000 | \n",
+ " 148654.000000 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ " | mean | \n",
+ " 74327.500000 | \n",
+ " 66325.448841 | \n",
+ " 5066.059886 | \n",
+ " 3648.767297 | \n",
+ " 25007.893151 | \n",
+ " 74768.321972 | \n",
+ " 93692.554811 | \n",
+ " 2012.522643 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | std | \n",
+ " 42912.857795 | \n",
+ " 42764.635495 | \n",
+ " 11454.380559 | \n",
+ " 8056.601866 | \n",
+ " 15402.215858 | \n",
+ " 50517.005274 | \n",
+ " 62793.533483 | \n",
+ " 1.117538 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | min | \n",
+ " 1.000000 | \n",
+ " -166.010000 | \n",
+ " -0.010000 | \n",
+ " -7058.590000 | \n",
+ " -33.890000 | \n",
+ " -618.130000 | \n",
+ " -618.130000 | \n",
+ " 2011.000000 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 25% | \n",
+ " 37164.250000 | \n",
+ " 33588.200000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 11535.395000 | \n",
+ " 36168.995000 | \n",
+ " 44065.650000 | \n",
+ " 2012.000000 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 50% | \n",
+ " 74327.500000 | \n",
+ " 65007.450000 | \n",
+ " 0.000000 | \n",
+ " 811.270000 | \n",
+ " 28628.620000 | \n",
+ " 71426.610000 | \n",
+ " 92404.090000 | \n",
+ " 2013.000000 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 75% | \n",
+ " 111490.750000 | \n",
+ " 94691.050000 | \n",
+ " 4658.175000 | \n",
+ " 4236.065000 | \n",
+ " 35566.855000 | \n",
+ " 105839.135000 | \n",
+ " 132876.450000 | \n",
+ " 2014.000000 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | max | \n",
+ " 148654.000000 | \n",
+ " 319275.010000 | \n",
+ " 245131.880000 | \n",
+ " 400184.250000 | \n",
+ " 96570.660000 | \n",
+ " 567595.430000 | \n",
+ " 567595.430000 | \n",
+ " 2014.000000 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Id BasePay OvertimePay OtherPay \\\n",
+ "count 148654.000000 148045.000000 148650.000000 148650.000000 \n",
+ "mean 74327.500000 66325.448841 5066.059886 3648.767297 \n",
+ "std 42912.857795 42764.635495 11454.380559 8056.601866 \n",
+ "min 1.000000 -166.010000 -0.010000 -7058.590000 \n",
+ "25% 37164.250000 33588.200000 0.000000 0.000000 \n",
+ "50% 74327.500000 65007.450000 0.000000 811.270000 \n",
+ "75% 111490.750000 94691.050000 4658.175000 4236.065000 \n",
+ "max 148654.000000 319275.010000 245131.880000 400184.250000 \n",
+ "\n",
+ " Benefits TotalPay TotalPayBenefits Year Notes \\\n",
+ "count 112491.000000 148654.000000 148654.000000 148654.000000 0.0 \n",
+ "mean 25007.893151 74768.321972 93692.554811 2012.522643 NaN \n",
+ "std 15402.215858 50517.005274 62793.533483 1.117538 NaN \n",
+ "min -33.890000 -618.130000 -618.130000 2011.000000 NaN \n",
+ "25% 11535.395000 36168.995000 44065.650000 2012.000000 NaN \n",
+ "50% 28628.620000 71426.610000 92404.090000 2013.000000 NaN \n",
+ "75% 35566.855000 105839.135000 132876.450000 2014.000000 NaN \n",
+ "max 96570.660000 567595.430000 567595.430000 2014.000000 NaN \n",
+ "\n",
+ " Status \n",
+ "count 0.0 \n",
+ "mean NaN \n",
+ "std NaN \n",
+ "min NaN \n",
+ "25% NaN \n",
+ "50% NaN \n",
+ "75% NaN \n",
+ "max NaN "
+ ]
+ },
+ "execution_count": 6,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df.describe()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "display null values per column"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "Id 0\n",
+ "EmployeeName 0\n",
+ "JobTitle 0\n",
+ "BasePay 609\n",
+ "OvertimePay 4\n",
+ "OtherPay 4\n",
+ "Benefits 36163\n",
+ "TotalPay 0\n",
+ "TotalPayBenefits 0\n",
+ "Year 0\n",
+ "Notes 148654\n",
+ "Agency 0\n",
+ "Status 148654\n",
+ "dtype: int64"
+ ]
+ },
+ "execution_count": 7,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df.isnull().sum()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "remove columns will all values as NaN"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Id | \n",
+ " EmployeeName | \n",
+ " JobTitle | \n",
+ " BasePay | \n",
+ " OvertimePay | \n",
+ " OtherPay | \n",
+ " Benefits | \n",
+ " TotalPay | \n",
+ " TotalPayBenefits | \n",
+ " Year | \n",
+ " Agency | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 1 | \n",
+ " NATHANIEL FORD | \n",
+ " GENERAL MANAGER-METROPOLITAN TRANSIT AUTHORITY | \n",
+ " 167411.18 | \n",
+ " 0.00 | \n",
+ " 400184.25 | \n",
+ " NaN | \n",
+ " 567595.43 | \n",
+ " 567595.43 | \n",
+ " 2011 | \n",
+ " San Francisco | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 2 | \n",
+ " GARY JIMENEZ | \n",
+ " CAPTAIN III (POLICE DEPARTMENT) | \n",
+ " 155966.02 | \n",
+ " 245131.88 | \n",
+ " 137811.38 | \n",
+ " NaN | \n",
+ " 538909.28 | \n",
+ " 538909.28 | \n",
+ " 2011 | \n",
+ " San Francisco | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 3 | \n",
+ " ALBERT PARDINI | \n",
+ " CAPTAIN III (POLICE DEPARTMENT) | \n",
+ " 212739.13 | \n",
+ " 106088.18 | \n",
+ " 16452.60 | \n",
+ " NaN | \n",
+ " 335279.91 | \n",
+ " 335279.91 | \n",
+ " 2011 | \n",
+ " San Francisco | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 4 | \n",
+ " CHRISTOPHER CHONG | \n",
+ " WIRE ROPE CABLE MAINTENANCE MECHANIC | \n",
+ " 77916.00 | \n",
+ " 56120.71 | \n",
+ " 198306.90 | \n",
+ " NaN | \n",
+ " 332343.61 | \n",
+ " 332343.61 | \n",
+ " 2011 | \n",
+ " San Francisco | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 5 | \n",
+ " PATRICK GARDNER | \n",
+ " DEPUTY CHIEF OF DEPARTMENT,(FIRE DEPARTMENT) | \n",
+ " 134401.60 | \n",
+ " 9737.00 | \n",
+ " 182234.59 | \n",
+ " NaN | \n",
+ " 326373.19 | \n",
+ " 326373.19 | \n",
+ " 2011 | \n",
+ " San Francisco | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Id EmployeeName JobTitle \\\n",
+ "0 1 NATHANIEL FORD GENERAL MANAGER-METROPOLITAN TRANSIT AUTHORITY \n",
+ "1 2 GARY JIMENEZ CAPTAIN III (POLICE DEPARTMENT) \n",
+ "2 3 ALBERT PARDINI CAPTAIN III (POLICE DEPARTMENT) \n",
+ "3 4 CHRISTOPHER CHONG WIRE ROPE CABLE MAINTENANCE MECHANIC \n",
+ "4 5 PATRICK GARDNER DEPUTY CHIEF OF DEPARTMENT,(FIRE DEPARTMENT) \n",
+ "\n",
+ " BasePay OvertimePay OtherPay Benefits TotalPay TotalPayBenefits \\\n",
+ "0 167411.18 0.00 400184.25 NaN 567595.43 567595.43 \n",
+ "1 155966.02 245131.88 137811.38 NaN 538909.28 538909.28 \n",
+ "2 212739.13 106088.18 16452.60 NaN 335279.91 335279.91 \n",
+ "3 77916.00 56120.71 198306.90 NaN 332343.61 332343.61 \n",
+ "4 134401.60 9737.00 182234.59 NaN 326373.19 326373.19 \n",
+ "\n",
+ " Year Agency \n",
+ "0 2011 San Francisco \n",
+ "1 2011 San Francisco \n",
+ "2 2011 San Francisco \n",
+ "3 2011 San Francisco \n",
+ "4 2011 San Francisco "
+ ]
+ },
+ "execution_count": 8,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df1=df.dropna(axis=1,how='all')\n",
+ "df1.head()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "display number of unique values in each column"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "Id 148654\n",
+ "EmployeeName 110811\n",
+ "JobTitle 2159\n",
+ "BasePay 109489\n",
+ "OvertimePay 65998\n",
+ "OtherPay 83225\n",
+ "Benefits 98465\n",
+ "TotalPay 138486\n",
+ "TotalPayBenefits 142098\n",
+ "Year 4\n",
+ "Agency 1\n",
+ "dtype: int64"
+ ]
+ },
+ "execution_count": 9,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df1.nunique()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "mean of total pay of all people based on year"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "74768.321971703"
+ ]
+ },
+ "execution_count": 10,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df['TotalPay'].mean()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "how many people have 0 overtime pay"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "77321"
+ ]
+ },
+ "execution_count": 11,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df2=df1[df1['OvertimePay']== 0.00]\n",
+ "df1[df1['OvertimePay']== 0.00]['Id'].count()\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "max, min, mean, median and other stats of TotalPay of people having 0 OvertimePay"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "count 77321.000000\n",
+ "mean 60229.348901\n",
+ "std 49307.912350\n",
+ "min -618.130000\n",
+ "25% 13290.450000\n",
+ "50% 58158.590000\n",
+ "75% 91115.090000\n",
+ "max 567595.430000\n",
+ "Name: TotalPay, dtype: float64"
+ ]
+ },
+ "execution_count": 12,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df2['TotalPay'].describe()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "find Id of that person with max TotalPay you got in previous question"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 13,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "0 1\n",
+ "Name: Id, dtype: int64\n"
+ ]
+ }
+ ],
+ "source": [
+ "max1=df2['TotalPay'].max()\n",
+ "print(df2[df2['TotalPay']==max1]['Id'])"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "name of employee with total pay benefits = 87619.78"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 14,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "12345 REBECCA CHIU\n",
+ "Name: EmployeeName, dtype: object"
+ ]
+ },
+ "execution_count": 14,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df2[df2['TotalPayBenefits']==87619.78]['EmployeeName']"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "how many people have BasePay > 150000 and OvertimePay > 100000"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 15,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "12"
+ ]
+ },
+ "execution_count": 15,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df1[(df1['BasePay']>150000) & (df1['OvertimePay']>100000)]['Id'].count()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "which job title generally has highest average TotalPayBenefits"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 16,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "JobTitle\n",
+ "Chief Investment Officer 436224.360000\n",
+ "Chief of Police 411732.266667\n",
+ "Chief, Fire Department 408865.326667\n",
+ "GENERAL MANAGER-METROPOLITAN TRANSIT AUTHORITY 399211.275000\n",
+ "Gen Mgr, Public Trnsp Dept 380696.440000\n",
+ "Dep Dir for Investments, Ret 355731.260000\n",
+ "Mayor 354212.906667\n",
+ "Adm, SFGH Medical Center 347079.706667\n",
+ "Controller 343061.140000\n",
+ "Asst Chf of Dept (Fire Dept) 342597.550556\n",
+ "Dept Head V 340643.507222\n",
+ "Administrator, DPH 331564.035000\n",
+ "Dep Chf of Dept (Fire Dept) 326752.242857\n",
+ "Deputy Chief 3 324670.490952\n",
+ "Port Director 322142.336667\n",
+ "Assistant Deputy Chief 2 317775.503636\n",
+ "Manager, Dept Public Health 314138.620000\n",
+ "Emergency Medical Svcs Chief 313422.996667\n",
+ "Executive Contract Employee 309776.574286\n",
+ "District Attorney 309675.956667\n",
+ "DEPUTY DIRECTOR OF INVESTMENTS 307899.460000\n",
+ "Battalion Chief, Fire Suppress 306954.552769\n",
+ "CHIEF OF DEPARTMENT, (FIRE DEPARTMENT) 302377.730000\n",
+ "City Attorney 295917.083333\n",
+ "Public Defender 287050.430000\n",
+ "Sheriff (SFERS) 285238.856667\n",
+ "Asst Med Examiner 284933.354167\n",
+ "Commander 3 284245.975556\n",
+ "Cfdntal Chf Atty 2,(Cvl&Crmnl) 280048.710000\n",
+ "Forensic Toxicologist 279469.755000\n",
+ " ... \n",
+ "Pool Lifeguard 5856.973396\n",
+ "Camp Assistant 4376.790738\n",
+ "RECREATION DIRECTOR 4185.114000\n",
+ "Swimming Instructor 4143.219016\n",
+ "JUNIOR CLERK 4054.687547\n",
+ "CAMP ASSISTANT 3997.515763\n",
+ "Special Assistant 8 3941.350000\n",
+ "Testing Technician 3672.217986\n",
+ "ORTHOPEDIC TECHNICIAN I 3583.400000\n",
+ "WAREHOUSE WORKER 3369.710000\n",
+ "Conversion 3361.440000\n",
+ "Public Service Aide-Technical 2774.270000\n",
+ "ASSISTANT RECREATION SUPERVISOR 2474.410000\n",
+ "Commissioner No Benefits 2449.257015\n",
+ "TESTING TECHNICIAN 2308.294262\n",
+ "Barber 2281.268571\n",
+ "COMMISSIONER 2271.694348\n",
+ "Cashier 3 2074.600000\n",
+ "AIRPORT ASSISTANT DEPUTY DIRECTOR, BUSINESS ADMINI 1927.500000\n",
+ "Special Examiner 1680.148364\n",
+ "BdComm Mbr, Grp5,M$100/Mo 1274.827470\n",
+ "SPECIAL EXAMINER 1201.260000\n",
+ "BOARD/COMMISSION MEMBER, GROUP V 1195.904464\n",
+ "BdComm Mbr, Grp3,M=$50/Mtg 973.106034\n",
+ "SPECIAL ASSISTANT XIV 673.800000\n",
+ "BOARD/COMMISSION MEMBER, GROUP III 638.787879\n",
+ "BdComm Mbr, Grp2,M=$25/Mtg 475.047742\n",
+ "BOARD/COMMISSION MEMBER, GROUP II 296.511628\n",
+ "PUBLIC SAFETY COMMUNICATIONS TECHNICIAN 149.510000\n",
+ "Not provided 0.000000\n",
+ "Name: TotalPayBenefits, Length: 2159, dtype: float64"
+ ]
+ },
+ "execution_count": 16,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df1.groupby('JobTitle')['TotalPayBenefits'].mean().sort_values(ascending=False)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "How many employees are POLICE"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 17,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "8201"
+ ]
+ },
+ "execution_count": 17,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# .str.contains()\n",
+ "sum(df1['JobTitle'].str.upper().str.contains('POLICE'))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.7.3"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}