From 8eda066bb63a2816d34fd770f5fb1589212a7947 Mon Sep 17 00:00:00 2001
From: Rachitha <47741826+Rachithaa@users.noreply.github.com>
Date: Mon, 24 Aug 2020 03:43:31 +0530
Subject: [PATCH 1/6] Assignment:1-Rachitha
First assignment completed
---
Assignment1 Numpy.ipynb | 357 ++++++++++++++++++++++++++++++++++++++++
1 file changed, 357 insertions(+)
create mode 100644 Assignment1 Numpy.ipynb
diff --git a/Assignment1 Numpy.ipynb b/Assignment1 Numpy.ipynb
new file mode 100644
index 0000000..a087d87
--- /dev/null
+++ b/Assignment1 Numpy.ipynb
@@ -0,0 +1,357 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Assignment"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Import numpy as np"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 35,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import numpy as np"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Make a python list => \\[1,2,3,4,5\\]\n",
+ "\n",
+ "Convert it into numpy array and print it"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 36,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "[1 2 3 4 5]\n"
+ ]
+ }
+ ],
+ "source": [
+ "l1=[1,2,3,4,5]\n",
+ "n1=np.array(l1)\n",
+ "print(n1)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Make a python matrix (3 x 3) => \\[[1,2,3],[4,5,6],[7,8,9]\\]\n",
+ "\n",
+ "Convert it into numpy array and print it"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 37,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "[[1 2 3]\n",
+ " [4 5 6]\n",
+ " [7 8 9]]\n"
+ ]
+ }
+ ],
+ "source": [
+ "matrix1=[[1,2,3],[4,5,6],[7,8,9]]\n",
+ "n2=np.array(matrix1)\n",
+ "print(n2)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Make a matrix (3 x 3) using built-in methods (like arange(), reshape() etc.):\n",
+ "\n",
+ "\\[ [1,3,5],\n",
+ "\n",
+ " [7,9,11],\n",
+ " \n",
+ " [13,15,17] \\]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 38,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "array([[ 1, 3, 5],\n",
+ " [ 7, 9, 11],\n",
+ " [13, 15, 17]])"
+ ]
+ },
+ "execution_count": 38,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "n3=np.arange(1,19,2)\n",
+ "n3.reshape(3,3)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Create a numpy array with 10 random numbers from 0 to 10 (there should be few numbers greater than 1)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 39,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "array([9, 1, 0, 1, 0, 7, 3, 3, 3, 9])"
+ ]
+ },
+ "execution_count": 39,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "np.random.randint(0,10,10)\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Create numpy array => \\[1,2,3,4,5\\] and convert it to 2D array with 5 rows"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 40,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "array([[1],\n",
+ " [2],\n",
+ " [3],\n",
+ " [4],\n",
+ " [5]])"
+ ]
+ },
+ "execution_count": 40,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "r=np.arange(1,6)\n",
+ "a1=r.reshape(5,1)\n",
+ "a1"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Print the shape of the above created array"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 41,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "(5, 1)"
+ ]
+ },
+ "execution_count": 41,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "a1.shape"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Create a numpy array with 10 elements in it. Access and print its 3rd, 4th and 9th element."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 42,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "[ 1 2 3 4 5 6 7 8 9 10]\n",
+ "3\n",
+ "4\n",
+ "9\n"
+ ]
+ }
+ ],
+ "source": [
+ "a1=np.arange(1,11)\n",
+ "print(a1)\n",
+ "print(a1[2])\n",
+ "print(a1[3])\n",
+ "print(a1[8])\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Print alternate elements of that array"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 43,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "[1 3 5 7 9]\n"
+ ]
+ }
+ ],
+ "source": [
+ "print(a1[0:10:2])\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Change last 3 elements into 100 using broadcasting and print"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 44,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "array([ 1, 2, 3, 4, 5, 6, 7, 100, 100, 100])"
+ ]
+ },
+ "execution_count": 44,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "a1[7:10]=100\n",
+ "a1"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Create a 5 x 5 matrix (fill it with any element you like), print it.\n",
+ "\n",
+ "Then print the middle (3 x 3) matrix."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 45,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "[[ 1 2 3 4 5]\n",
+ " [ 6 7 8 9 10]\n",
+ " [11 12 13 14 15]\n",
+ " [16 17 18 19 20]\n",
+ " [21 22 23 24 25]]\n"
+ ]
+ },
+ {
+ "data": {
+ "text/plain": [
+ "array([[ 7, 8, 9],\n",
+ " [12, 13, 14],\n",
+ " [17, 18, 19]])"
+ ]
+ },
+ "execution_count": 45,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "r1=np.arange(1,26)\n",
+ "r2=r1.reshape(5,5)\n",
+ "print(r2)\n",
+ "r2[1:4,1:4]"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.7.6"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
From c0ffac793e70d42510d266387a10f00c0aaaf681 Mon Sep 17 00:00:00 2001
From: Rachitha <47741826+Rachithaa@users.noreply.github.com>
Date: Mon, 24 Aug 2020 13:19:12 +0530
Subject: [PATCH 2/6] Delete Assignment1 Numpy.ipynb
---
Assignment1 Numpy.ipynb | 357 ----------------------------------------
1 file changed, 357 deletions(-)
delete mode 100644 Assignment1 Numpy.ipynb
diff --git a/Assignment1 Numpy.ipynb b/Assignment1 Numpy.ipynb
deleted file mode 100644
index a087d87..0000000
--- a/Assignment1 Numpy.ipynb
+++ /dev/null
@@ -1,357 +0,0 @@
-{
- "cells": [
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "# Assignment"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "Import numpy as np"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 35,
- "metadata": {},
- "outputs": [],
- "source": [
- "import numpy as np"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "Make a python list => \\[1,2,3,4,5\\]\n",
- "\n",
- "Convert it into numpy array and print it"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 36,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "[1 2 3 4 5]\n"
- ]
- }
- ],
- "source": [
- "l1=[1,2,3,4,5]\n",
- "n1=np.array(l1)\n",
- "print(n1)"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "Make a python matrix (3 x 3) => \\[[1,2,3],[4,5,6],[7,8,9]\\]\n",
- "\n",
- "Convert it into numpy array and print it"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 37,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "[[1 2 3]\n",
- " [4 5 6]\n",
- " [7 8 9]]\n"
- ]
- }
- ],
- "source": [
- "matrix1=[[1,2,3],[4,5,6],[7,8,9]]\n",
- "n2=np.array(matrix1)\n",
- "print(n2)"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "Make a matrix (3 x 3) using built-in methods (like arange(), reshape() etc.):\n",
- "\n",
- "\\[ [1,3,5],\n",
- "\n",
- " [7,9,11],\n",
- " \n",
- " [13,15,17] \\]"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 38,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "array([[ 1, 3, 5],\n",
- " [ 7, 9, 11],\n",
- " [13, 15, 17]])"
- ]
- },
- "execution_count": 38,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "n3=np.arange(1,19,2)\n",
- "n3.reshape(3,3)"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "Create a numpy array with 10 random numbers from 0 to 10 (there should be few numbers greater than 1)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 39,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "array([9, 1, 0, 1, 0, 7, 3, 3, 3, 9])"
- ]
- },
- "execution_count": 39,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "np.random.randint(0,10,10)\n"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "Create numpy array => \\[1,2,3,4,5\\] and convert it to 2D array with 5 rows"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 40,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "array([[1],\n",
- " [2],\n",
- " [3],\n",
- " [4],\n",
- " [5]])"
- ]
- },
- "execution_count": 40,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "r=np.arange(1,6)\n",
- "a1=r.reshape(5,1)\n",
- "a1"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "Print the shape of the above created array"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 41,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "(5, 1)"
- ]
- },
- "execution_count": 41,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "a1.shape"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "Create a numpy array with 10 elements in it. Access and print its 3rd, 4th and 9th element."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 42,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "[ 1 2 3 4 5 6 7 8 9 10]\n",
- "3\n",
- "4\n",
- "9\n"
- ]
- }
- ],
- "source": [
- "a1=np.arange(1,11)\n",
- "print(a1)\n",
- "print(a1[2])\n",
- "print(a1[3])\n",
- "print(a1[8])\n"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "Print alternate elements of that array"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 43,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "[1 3 5 7 9]\n"
- ]
- }
- ],
- "source": [
- "print(a1[0:10:2])\n"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "Change last 3 elements into 100 using broadcasting and print"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 44,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "array([ 1, 2, 3, 4, 5, 6, 7, 100, 100, 100])"
- ]
- },
- "execution_count": 44,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "a1[7:10]=100\n",
- "a1"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "Create a 5 x 5 matrix (fill it with any element you like), print it.\n",
- "\n",
- "Then print the middle (3 x 3) matrix."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 45,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "[[ 1 2 3 4 5]\n",
- " [ 6 7 8 9 10]\n",
- " [11 12 13 14 15]\n",
- " [16 17 18 19 20]\n",
- " [21 22 23 24 25]]\n"
- ]
- },
- {
- "data": {
- "text/plain": [
- "array([[ 7, 8, 9],\n",
- " [12, 13, 14],\n",
- " [17, 18, 19]])"
- ]
- },
- "execution_count": 45,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "r1=np.arange(1,26)\n",
- "r2=r1.reshape(5,5)\n",
- "print(r2)\n",
- "r2[1:4,1:4]"
- ]
- }
- ],
- "metadata": {
- "kernelspec": {
- "display_name": "Python 3",
- "language": "python",
- "name": "python3"
- },
- "language_info": {
- "codemirror_mode": {
- "name": "ipython",
- "version": 3
- },
- "file_extension": ".py",
- "mimetype": "text/x-python",
- "name": "python",
- "nbconvert_exporter": "python",
- "pygments_lexer": "ipython3",
- "version": "3.7.6"
- }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}
From cd8060858a81e41792aa17e9d66883005f8e38a1 Mon Sep 17 00:00:00 2001
From: Rachitha <47741826+Rachithaa@users.noreply.github.com>
Date: Mon, 24 Aug 2020 14:10:16 +0530
Subject: [PATCH 3/6] Assignment 1-Rachitha
---
Assignment1 Numpy.ipynb | 357 ++++++++++++++++++++++++++++++++++++++++
1 file changed, 357 insertions(+)
create mode 100644 Assignment1 Numpy.ipynb
diff --git a/Assignment1 Numpy.ipynb b/Assignment1 Numpy.ipynb
new file mode 100644
index 0000000..a087d87
--- /dev/null
+++ b/Assignment1 Numpy.ipynb
@@ -0,0 +1,357 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Assignment"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Import numpy as np"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 35,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import numpy as np"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Make a python list => \\[1,2,3,4,5\\]\n",
+ "\n",
+ "Convert it into numpy array and print it"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 36,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "[1 2 3 4 5]\n"
+ ]
+ }
+ ],
+ "source": [
+ "l1=[1,2,3,4,5]\n",
+ "n1=np.array(l1)\n",
+ "print(n1)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Make a python matrix (3 x 3) => \\[[1,2,3],[4,5,6],[7,8,9]\\]\n",
+ "\n",
+ "Convert it into numpy array and print it"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 37,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "[[1 2 3]\n",
+ " [4 5 6]\n",
+ " [7 8 9]]\n"
+ ]
+ }
+ ],
+ "source": [
+ "matrix1=[[1,2,3],[4,5,6],[7,8,9]]\n",
+ "n2=np.array(matrix1)\n",
+ "print(n2)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Make a matrix (3 x 3) using built-in methods (like arange(), reshape() etc.):\n",
+ "\n",
+ "\\[ [1,3,5],\n",
+ "\n",
+ " [7,9,11],\n",
+ " \n",
+ " [13,15,17] \\]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 38,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "array([[ 1, 3, 5],\n",
+ " [ 7, 9, 11],\n",
+ " [13, 15, 17]])"
+ ]
+ },
+ "execution_count": 38,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "n3=np.arange(1,19,2)\n",
+ "n3.reshape(3,3)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Create a numpy array with 10 random numbers from 0 to 10 (there should be few numbers greater than 1)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 39,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "array([9, 1, 0, 1, 0, 7, 3, 3, 3, 9])"
+ ]
+ },
+ "execution_count": 39,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "np.random.randint(0,10,10)\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Create numpy array => \\[1,2,3,4,5\\] and convert it to 2D array with 5 rows"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 40,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "array([[1],\n",
+ " [2],\n",
+ " [3],\n",
+ " [4],\n",
+ " [5]])"
+ ]
+ },
+ "execution_count": 40,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "r=np.arange(1,6)\n",
+ "a1=r.reshape(5,1)\n",
+ "a1"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Print the shape of the above created array"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 41,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "(5, 1)"
+ ]
+ },
+ "execution_count": 41,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "a1.shape"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Create a numpy array with 10 elements in it. Access and print its 3rd, 4th and 9th element."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 42,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "[ 1 2 3 4 5 6 7 8 9 10]\n",
+ "3\n",
+ "4\n",
+ "9\n"
+ ]
+ }
+ ],
+ "source": [
+ "a1=np.arange(1,11)\n",
+ "print(a1)\n",
+ "print(a1[2])\n",
+ "print(a1[3])\n",
+ "print(a1[8])\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Print alternate elements of that array"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 43,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "[1 3 5 7 9]\n"
+ ]
+ }
+ ],
+ "source": [
+ "print(a1[0:10:2])\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Change last 3 elements into 100 using broadcasting and print"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 44,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "array([ 1, 2, 3, 4, 5, 6, 7, 100, 100, 100])"
+ ]
+ },
+ "execution_count": 44,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "a1[7:10]=100\n",
+ "a1"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Create a 5 x 5 matrix (fill it with any element you like), print it.\n",
+ "\n",
+ "Then print the middle (3 x 3) matrix."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 45,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "[[ 1 2 3 4 5]\n",
+ " [ 6 7 8 9 10]\n",
+ " [11 12 13 14 15]\n",
+ " [16 17 18 19 20]\n",
+ " [21 22 23 24 25]]\n"
+ ]
+ },
+ {
+ "data": {
+ "text/plain": [
+ "array([[ 7, 8, 9],\n",
+ " [12, 13, 14],\n",
+ " [17, 18, 19]])"
+ ]
+ },
+ "execution_count": 45,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "r1=np.arange(1,26)\n",
+ "r2=r1.reshape(5,5)\n",
+ "print(r2)\n",
+ "r2[1:4,1:4]"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.7.6"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
From aaf8e37f2e2d63176a2bda2a382155ba4d6771d1 Mon Sep 17 00:00:00 2001
From: Rachitha <47741826+Rachithaa@users.noreply.github.com>
Date: Sun, 6 Sep 2020 21:51:21 +0530
Subject: [PATCH 4/6] Add files via upload
RACHITHA-SECOND ASSIGNMENT
---
Assignment2 pandas-RACHITHA.ipynb | 1384 +++++++++++++++++++++++++++++
1 file changed, 1384 insertions(+)
create mode 100644 Assignment2 pandas-RACHITHA.ipynb
diff --git a/Assignment2 pandas-RACHITHA.ipynb b/Assignment2 pandas-RACHITHA.ipynb
new file mode 100644
index 0000000..3b934bf
--- /dev/null
+++ b/Assignment2 pandas-RACHITHA.ipynb
@@ -0,0 +1,1384 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 58,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import numpy as np\n",
+ "import pandas as pd\n",
+ "#%matplotlib notebook\n",
+ "%matplotlib inline"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "import the dataset into a dataframe"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 59,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " EmployeeName | \n",
+ " JobTitle | \n",
+ " BasePay | \n",
+ " OvertimePay | \n",
+ " OtherPay | \n",
+ " Benefits | \n",
+ " TotalPay | \n",
+ " TotalPayBenefits | \n",
+ " Year | \n",
+ " Notes | \n",
+ " Agency | \n",
+ " Status | \n",
+ "
\n",
+ " \n",
+ " | Id | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 1 | \n",
+ " NATHANIEL FORD | \n",
+ " GENERAL MANAGER-METROPOLITAN TRANSIT AUTHORITY | \n",
+ " 167411.18 | \n",
+ " 0.00 | \n",
+ " 400184.25 | \n",
+ " NaN | \n",
+ " 567595.43 | \n",
+ " 567595.43 | \n",
+ " 2011 | \n",
+ " NaN | \n",
+ " San Francisco | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " GARY JIMENEZ | \n",
+ " CAPTAIN III (POLICE DEPARTMENT) | \n",
+ " 155966.02 | \n",
+ " 245131.88 | \n",
+ " 137811.38 | \n",
+ " NaN | \n",
+ " 538909.28 | \n",
+ " 538909.28 | \n",
+ " 2011 | \n",
+ " NaN | \n",
+ " San Francisco | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " ALBERT PARDINI | \n",
+ " CAPTAIN III (POLICE DEPARTMENT) | \n",
+ " 212739.13 | \n",
+ " 106088.18 | \n",
+ " 16452.60 | \n",
+ " NaN | \n",
+ " 335279.91 | \n",
+ " 335279.91 | \n",
+ " 2011 | \n",
+ " NaN | \n",
+ " San Francisco | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " CHRISTOPHER CHONG | \n",
+ " WIRE ROPE CABLE MAINTENANCE MECHANIC | \n",
+ " 77916.00 | \n",
+ " 56120.71 | \n",
+ " 198306.90 | \n",
+ " NaN | \n",
+ " 332343.61 | \n",
+ " 332343.61 | \n",
+ " 2011 | \n",
+ " NaN | \n",
+ " San Francisco | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 5 | \n",
+ " PATRICK GARDNER | \n",
+ " DEPUTY CHIEF OF DEPARTMENT,(FIRE DEPARTMENT) | \n",
+ " 134401.60 | \n",
+ " 9737.00 | \n",
+ " 182234.59 | \n",
+ " NaN | \n",
+ " 326373.19 | \n",
+ " 326373.19 | \n",
+ " 2011 | \n",
+ " NaN | \n",
+ " San Francisco | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " | 148650 | \n",
+ " Roy I Tillery | \n",
+ " Custodian | \n",
+ " 0.00 | \n",
+ " 0.00 | \n",
+ " 0.00 | \n",
+ " 0.0 | \n",
+ " 0.00 | \n",
+ " 0.00 | \n",
+ " 2014 | \n",
+ " NaN | \n",
+ " San Francisco | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 148651 | \n",
+ " Not provided | \n",
+ " Not provided | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0.00 | \n",
+ " 0.00 | \n",
+ " 2014 | \n",
+ " NaN | \n",
+ " San Francisco | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 148652 | \n",
+ " Not provided | \n",
+ " Not provided | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0.00 | \n",
+ " 0.00 | \n",
+ " 2014 | \n",
+ " NaN | \n",
+ " San Francisco | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 148653 | \n",
+ " Not provided | \n",
+ " Not provided | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0.00 | \n",
+ " 0.00 | \n",
+ " 2014 | \n",
+ " NaN | \n",
+ " San Francisco | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 148654 | \n",
+ " Joe Lopez | \n",
+ " Counselor, Log Cabin Ranch | \n",
+ " 0.00 | \n",
+ " 0.00 | \n",
+ " -618.13 | \n",
+ " 0.0 | \n",
+ " -618.13 | \n",
+ " -618.13 | \n",
+ " 2014 | \n",
+ " NaN | \n",
+ " San Francisco | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
148654 rows × 12 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " EmployeeName JobTitle \\\n",
+ "Id \n",
+ "1 NATHANIEL FORD GENERAL MANAGER-METROPOLITAN TRANSIT AUTHORITY \n",
+ "2 GARY JIMENEZ CAPTAIN III (POLICE DEPARTMENT) \n",
+ "3 ALBERT PARDINI CAPTAIN III (POLICE DEPARTMENT) \n",
+ "4 CHRISTOPHER CHONG WIRE ROPE CABLE MAINTENANCE MECHANIC \n",
+ "5 PATRICK GARDNER DEPUTY CHIEF OF DEPARTMENT,(FIRE DEPARTMENT) \n",
+ "... ... ... \n",
+ "148650 Roy I Tillery Custodian \n",
+ "148651 Not provided Not provided \n",
+ "148652 Not provided Not provided \n",
+ "148653 Not provided Not provided \n",
+ "148654 Joe Lopez Counselor, Log Cabin Ranch \n",
+ "\n",
+ " BasePay OvertimePay OtherPay Benefits TotalPay \\\n",
+ "Id \n",
+ "1 167411.18 0.00 400184.25 NaN 567595.43 \n",
+ "2 155966.02 245131.88 137811.38 NaN 538909.28 \n",
+ "3 212739.13 106088.18 16452.60 NaN 335279.91 \n",
+ "4 77916.00 56120.71 198306.90 NaN 332343.61 \n",
+ "5 134401.60 9737.00 182234.59 NaN 326373.19 \n",
+ "... ... ... ... ... ... \n",
+ "148650 0.00 0.00 0.00 0.0 0.00 \n",
+ "148651 NaN NaN NaN NaN 0.00 \n",
+ "148652 NaN NaN NaN NaN 0.00 \n",
+ "148653 NaN NaN NaN NaN 0.00 \n",
+ "148654 0.00 0.00 -618.13 0.0 -618.13 \n",
+ "\n",
+ " TotalPayBenefits Year Notes Agency Status \n",
+ "Id \n",
+ "1 567595.43 2011 NaN San Francisco NaN \n",
+ "2 538909.28 2011 NaN San Francisco NaN \n",
+ "3 335279.91 2011 NaN San Francisco NaN \n",
+ "4 332343.61 2011 NaN San Francisco NaN \n",
+ "5 326373.19 2011 NaN San Francisco NaN \n",
+ "... ... ... ... ... ... \n",
+ "148650 0.00 2014 NaN San Francisco NaN \n",
+ "148651 0.00 2014 NaN San Francisco NaN \n",
+ "148652 0.00 2014 NaN San Francisco NaN \n",
+ "148653 0.00 2014 NaN San Francisco NaN \n",
+ "148654 -618.13 2014 NaN San Francisco NaN \n",
+ "\n",
+ "[148654 rows x 12 columns]"
+ ]
+ },
+ "execution_count": 59,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "d1=pd.read_csv('Salaries.csv',index_col='Id')\n",
+ "d1"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 60,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " EmployeeName | \n",
+ " JobTitle | \n",
+ " BasePay | \n",
+ " OvertimePay | \n",
+ " OtherPay | \n",
+ " Benefits | \n",
+ " TotalPay | \n",
+ " TotalPayBenefits | \n",
+ " Year | \n",
+ " Notes | \n",
+ " Agency | \n",
+ " Status | \n",
+ "
\n",
+ " \n",
+ " | Id | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 1 | \n",
+ " NATHANIEL FORD | \n",
+ " GENERAL MANAGER-METROPOLITAN TRANSIT AUTHORITY | \n",
+ " 167411.18 | \n",
+ " 0.00 | \n",
+ " 400184.25 | \n",
+ " NaN | \n",
+ " 567595.43 | \n",
+ " 567595.43 | \n",
+ " 2011 | \n",
+ " NaN | \n",
+ " San Francisco | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " GARY JIMENEZ | \n",
+ " CAPTAIN III (POLICE DEPARTMENT) | \n",
+ " 155966.02 | \n",
+ " 245131.88 | \n",
+ " 137811.38 | \n",
+ " NaN | \n",
+ " 538909.28 | \n",
+ " 538909.28 | \n",
+ " 2011 | \n",
+ " NaN | \n",
+ " San Francisco | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " ALBERT PARDINI | \n",
+ " CAPTAIN III (POLICE DEPARTMENT) | \n",
+ " 212739.13 | \n",
+ " 106088.18 | \n",
+ " 16452.60 | \n",
+ " NaN | \n",
+ " 335279.91 | \n",
+ " 335279.91 | \n",
+ " 2011 | \n",
+ " NaN | \n",
+ " San Francisco | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " CHRISTOPHER CHONG | \n",
+ " WIRE ROPE CABLE MAINTENANCE MECHANIC | \n",
+ " 77916.00 | \n",
+ " 56120.71 | \n",
+ " 198306.90 | \n",
+ " NaN | \n",
+ " 332343.61 | \n",
+ " 332343.61 | \n",
+ " 2011 | \n",
+ " NaN | \n",
+ " San Francisco | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 5 | \n",
+ " PATRICK GARDNER | \n",
+ " DEPUTY CHIEF OF DEPARTMENT,(FIRE DEPARTMENT) | \n",
+ " 134401.60 | \n",
+ " 9737.00 | \n",
+ " 182234.59 | \n",
+ " NaN | \n",
+ " 326373.19 | \n",
+ " 326373.19 | \n",
+ " 2011 | \n",
+ " NaN | \n",
+ " San Francisco | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " EmployeeName JobTitle \\\n",
+ "Id \n",
+ "1 NATHANIEL FORD GENERAL MANAGER-METROPOLITAN TRANSIT AUTHORITY \n",
+ "2 GARY JIMENEZ CAPTAIN III (POLICE DEPARTMENT) \n",
+ "3 ALBERT PARDINI CAPTAIN III (POLICE DEPARTMENT) \n",
+ "4 CHRISTOPHER CHONG WIRE ROPE CABLE MAINTENANCE MECHANIC \n",
+ "5 PATRICK GARDNER DEPUTY CHIEF OF DEPARTMENT,(FIRE DEPARTMENT) \n",
+ "\n",
+ " BasePay OvertimePay OtherPay Benefits TotalPay TotalPayBenefits \\\n",
+ "Id \n",
+ "1 167411.18 0.00 400184.25 NaN 567595.43 567595.43 \n",
+ "2 155966.02 245131.88 137811.38 NaN 538909.28 538909.28 \n",
+ "3 212739.13 106088.18 16452.60 NaN 335279.91 335279.91 \n",
+ "4 77916.00 56120.71 198306.90 NaN 332343.61 332343.61 \n",
+ "5 134401.60 9737.00 182234.59 NaN 326373.19 326373.19 \n",
+ "\n",
+ " Year Notes Agency Status \n",
+ "Id \n",
+ "1 2011 NaN San Francisco NaN \n",
+ "2 2011 NaN San Francisco NaN \n",
+ "3 2011 NaN San Francisco NaN \n",
+ "4 2011 NaN San Francisco NaN \n",
+ "5 2011 NaN San Francisco NaN "
+ ]
+ },
+ "execution_count": 60,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "d1.head()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "display the column names"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 61,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "Index(['EmployeeName', 'JobTitle', 'BasePay', 'OvertimePay', 'OtherPay',\n",
+ " 'Benefits', 'TotalPay', 'TotalPayBenefits', 'Year', 'Notes', 'Agency',\n",
+ " 'Status'],\n",
+ " dtype='object')"
+ ]
+ },
+ "execution_count": 61,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "d1.columns"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "display the number of rows and cols"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 62,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "rows: 148654\n",
+ "columns: 12\n"
+ ]
+ }
+ ],
+ "source": [
+ "print(\"rows: \"+str(len(d1.axes[0])))\n",
+ "print(\"columns: \"+str(len(d1.axes[1])))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 63,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "(148654, 12)"
+ ]
+ },
+ "execution_count": 63,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "d1.shape"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "display the dataframe info (types of data in columns and not null values etc.)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 64,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "Int64Index: 148654 entries, 1 to 148654\n",
+ "Data columns (total 12 columns):\n",
+ " # Column Non-Null Count Dtype \n",
+ "--- ------ -------------- ----- \n",
+ " 0 EmployeeName 148654 non-null object \n",
+ " 1 JobTitle 148654 non-null object \n",
+ " 2 BasePay 148045 non-null float64\n",
+ " 3 OvertimePay 148650 non-null float64\n",
+ " 4 OtherPay 148650 non-null float64\n",
+ " 5 Benefits 112491 non-null float64\n",
+ " 6 TotalPay 148654 non-null float64\n",
+ " 7 TotalPayBenefits 148654 non-null float64\n",
+ " 8 Year 148654 non-null int64 \n",
+ " 9 Notes 0 non-null float64\n",
+ " 10 Agency 148654 non-null object \n",
+ " 11 Status 0 non-null float64\n",
+ "dtypes: float64(8), int64(1), object(3)\n",
+ "memory usage: 14.7+ MB\n"
+ ]
+ }
+ ],
+ "source": [
+ "d1.info()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "display stats of the dataframe like count, mean, std, max, 25% etc....."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 65,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " BasePay | \n",
+ " OvertimePay | \n",
+ " OtherPay | \n",
+ " Benefits | \n",
+ " TotalPay | \n",
+ " TotalPayBenefits | \n",
+ " Year | \n",
+ " Notes | \n",
+ " Status | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | count | \n",
+ " 148045.000000 | \n",
+ " 148650.000000 | \n",
+ " 148650.000000 | \n",
+ " 112491.000000 | \n",
+ " 148654.000000 | \n",
+ " 148654.000000 | \n",
+ " 148654.000000 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ " | mean | \n",
+ " 66325.448841 | \n",
+ " 5066.059886 | \n",
+ " 3648.767297 | \n",
+ " 25007.893151 | \n",
+ " 74768.321972 | \n",
+ " 93692.554811 | \n",
+ " 2012.522643 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | std | \n",
+ " 42764.635495 | \n",
+ " 11454.380559 | \n",
+ " 8056.601866 | \n",
+ " 15402.215858 | \n",
+ " 50517.005274 | \n",
+ " 62793.533483 | \n",
+ " 1.117538 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | min | \n",
+ " -166.010000 | \n",
+ " -0.010000 | \n",
+ " -7058.590000 | \n",
+ " -33.890000 | \n",
+ " -618.130000 | \n",
+ " -618.130000 | \n",
+ " 2011.000000 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 25% | \n",
+ " 33588.200000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 11535.395000 | \n",
+ " 36168.995000 | \n",
+ " 44065.650000 | \n",
+ " 2012.000000 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 50% | \n",
+ " 65007.450000 | \n",
+ " 0.000000 | \n",
+ " 811.270000 | \n",
+ " 28628.620000 | \n",
+ " 71426.610000 | \n",
+ " 92404.090000 | \n",
+ " 2013.000000 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 75% | \n",
+ " 94691.050000 | \n",
+ " 4658.175000 | \n",
+ " 4236.065000 | \n",
+ " 35566.855000 | \n",
+ " 105839.135000 | \n",
+ " 132876.450000 | \n",
+ " 2014.000000 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | max | \n",
+ " 319275.010000 | \n",
+ " 245131.880000 | \n",
+ " 400184.250000 | \n",
+ " 96570.660000 | \n",
+ " 567595.430000 | \n",
+ " 567595.430000 | \n",
+ " 2014.000000 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " BasePay OvertimePay OtherPay Benefits \\\n",
+ "count 148045.000000 148650.000000 148650.000000 112491.000000 \n",
+ "mean 66325.448841 5066.059886 3648.767297 25007.893151 \n",
+ "std 42764.635495 11454.380559 8056.601866 15402.215858 \n",
+ "min -166.010000 -0.010000 -7058.590000 -33.890000 \n",
+ "25% 33588.200000 0.000000 0.000000 11535.395000 \n",
+ "50% 65007.450000 0.000000 811.270000 28628.620000 \n",
+ "75% 94691.050000 4658.175000 4236.065000 35566.855000 \n",
+ "max 319275.010000 245131.880000 400184.250000 96570.660000 \n",
+ "\n",
+ " TotalPay TotalPayBenefits Year Notes Status \n",
+ "count 148654.000000 148654.000000 148654.000000 0.0 0.0 \n",
+ "mean 74768.321972 93692.554811 2012.522643 NaN NaN \n",
+ "std 50517.005274 62793.533483 1.117538 NaN NaN \n",
+ "min -618.130000 -618.130000 2011.000000 NaN NaN \n",
+ "25% 36168.995000 44065.650000 2012.000000 NaN NaN \n",
+ "50% 71426.610000 92404.090000 2013.000000 NaN NaN \n",
+ "75% 105839.135000 132876.450000 2014.000000 NaN NaN \n",
+ "max 567595.430000 567595.430000 2014.000000 NaN NaN "
+ ]
+ },
+ "execution_count": 65,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "d1.describe()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "display null values per column"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 66,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "EmployeeName 0\n",
+ "JobTitle 0\n",
+ "BasePay 609\n",
+ "OvertimePay 4\n",
+ "OtherPay 4\n",
+ "Benefits 36163\n",
+ "TotalPay 0\n",
+ "TotalPayBenefits 0\n",
+ "Year 0\n",
+ "Notes 148654\n",
+ "Agency 0\n",
+ "Status 148654\n",
+ "dtype: int64"
+ ]
+ },
+ "execution_count": 66,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "d1.isna().sum()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "remove columns will all values as NaN"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 67,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " EmployeeName | \n",
+ " JobTitle | \n",
+ " BasePay | \n",
+ " OvertimePay | \n",
+ " OtherPay | \n",
+ " Benefits | \n",
+ " TotalPay | \n",
+ " TotalPayBenefits | \n",
+ " Year | \n",
+ " Agency | \n",
+ "
\n",
+ " \n",
+ " | Id | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 1 | \n",
+ " NATHANIEL FORD | \n",
+ " GENERAL MANAGER-METROPOLITAN TRANSIT AUTHORITY | \n",
+ " 167411.18 | \n",
+ " 0.00 | \n",
+ " 400184.25 | \n",
+ " NaN | \n",
+ " 567595.43 | \n",
+ " 567595.43 | \n",
+ " 2011 | \n",
+ " San Francisco | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " GARY JIMENEZ | \n",
+ " CAPTAIN III (POLICE DEPARTMENT) | \n",
+ " 155966.02 | \n",
+ " 245131.88 | \n",
+ " 137811.38 | \n",
+ " NaN | \n",
+ " 538909.28 | \n",
+ " 538909.28 | \n",
+ " 2011 | \n",
+ " San Francisco | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " ALBERT PARDINI | \n",
+ " CAPTAIN III (POLICE DEPARTMENT) | \n",
+ " 212739.13 | \n",
+ " 106088.18 | \n",
+ " 16452.60 | \n",
+ " NaN | \n",
+ " 335279.91 | \n",
+ " 335279.91 | \n",
+ " 2011 | \n",
+ " San Francisco | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " CHRISTOPHER CHONG | \n",
+ " WIRE ROPE CABLE MAINTENANCE MECHANIC | \n",
+ " 77916.00 | \n",
+ " 56120.71 | \n",
+ " 198306.90 | \n",
+ " NaN | \n",
+ " 332343.61 | \n",
+ " 332343.61 | \n",
+ " 2011 | \n",
+ " San Francisco | \n",
+ "
\n",
+ " \n",
+ " | 5 | \n",
+ " PATRICK GARDNER | \n",
+ " DEPUTY CHIEF OF DEPARTMENT,(FIRE DEPARTMENT) | \n",
+ " 134401.60 | \n",
+ " 9737.00 | \n",
+ " 182234.59 | \n",
+ " NaN | \n",
+ " 326373.19 | \n",
+ " 326373.19 | \n",
+ " 2011 | \n",
+ " San Francisco | \n",
+ "
\n",
+ " \n",
+ " | ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " | 148650 | \n",
+ " Roy I Tillery | \n",
+ " Custodian | \n",
+ " 0.00 | \n",
+ " 0.00 | \n",
+ " 0.00 | \n",
+ " 0.0 | \n",
+ " 0.00 | \n",
+ " 0.00 | \n",
+ " 2014 | \n",
+ " San Francisco | \n",
+ "
\n",
+ " \n",
+ " | 148651 | \n",
+ " Not provided | \n",
+ " Not provided | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0.00 | \n",
+ " 0.00 | \n",
+ " 2014 | \n",
+ " San Francisco | \n",
+ "
\n",
+ " \n",
+ " | 148652 | \n",
+ " Not provided | \n",
+ " Not provided | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0.00 | \n",
+ " 0.00 | \n",
+ " 2014 | \n",
+ " San Francisco | \n",
+ "
\n",
+ " \n",
+ " | 148653 | \n",
+ " Not provided | \n",
+ " Not provided | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0.00 | \n",
+ " 0.00 | \n",
+ " 2014 | \n",
+ " San Francisco | \n",
+ "
\n",
+ " \n",
+ " | 148654 | \n",
+ " Joe Lopez | \n",
+ " Counselor, Log Cabin Ranch | \n",
+ " 0.00 | \n",
+ " 0.00 | \n",
+ " -618.13 | \n",
+ " 0.0 | \n",
+ " -618.13 | \n",
+ " -618.13 | \n",
+ " 2014 | \n",
+ " San Francisco | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
148654 rows × 10 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " EmployeeName JobTitle \\\n",
+ "Id \n",
+ "1 NATHANIEL FORD GENERAL MANAGER-METROPOLITAN TRANSIT AUTHORITY \n",
+ "2 GARY JIMENEZ CAPTAIN III (POLICE DEPARTMENT) \n",
+ "3 ALBERT PARDINI CAPTAIN III (POLICE DEPARTMENT) \n",
+ "4 CHRISTOPHER CHONG WIRE ROPE CABLE MAINTENANCE MECHANIC \n",
+ "5 PATRICK GARDNER DEPUTY CHIEF OF DEPARTMENT,(FIRE DEPARTMENT) \n",
+ "... ... ... \n",
+ "148650 Roy I Tillery Custodian \n",
+ "148651 Not provided Not provided \n",
+ "148652 Not provided Not provided \n",
+ "148653 Not provided Not provided \n",
+ "148654 Joe Lopez Counselor, Log Cabin Ranch \n",
+ "\n",
+ " BasePay OvertimePay OtherPay Benefits TotalPay \\\n",
+ "Id \n",
+ "1 167411.18 0.00 400184.25 NaN 567595.43 \n",
+ "2 155966.02 245131.88 137811.38 NaN 538909.28 \n",
+ "3 212739.13 106088.18 16452.60 NaN 335279.91 \n",
+ "4 77916.00 56120.71 198306.90 NaN 332343.61 \n",
+ "5 134401.60 9737.00 182234.59 NaN 326373.19 \n",
+ "... ... ... ... ... ... \n",
+ "148650 0.00 0.00 0.00 0.0 0.00 \n",
+ "148651 NaN NaN NaN NaN 0.00 \n",
+ "148652 NaN NaN NaN NaN 0.00 \n",
+ "148653 NaN NaN NaN NaN 0.00 \n",
+ "148654 0.00 0.00 -618.13 0.0 -618.13 \n",
+ "\n",
+ " TotalPayBenefits Year Agency \n",
+ "Id \n",
+ "1 567595.43 2011 San Francisco \n",
+ "2 538909.28 2011 San Francisco \n",
+ "3 335279.91 2011 San Francisco \n",
+ "4 332343.61 2011 San Francisco \n",
+ "5 326373.19 2011 San Francisco \n",
+ "... ... ... ... \n",
+ "148650 0.00 2014 San Francisco \n",
+ "148651 0.00 2014 San Francisco \n",
+ "148652 0.00 2014 San Francisco \n",
+ "148653 0.00 2014 San Francisco \n",
+ "148654 -618.13 2014 San Francisco \n",
+ "\n",
+ "[148654 rows x 10 columns]"
+ ]
+ },
+ "execution_count": 67,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "d1.dropna(how='all',axis=1)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "display number of unique values in each column"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 68,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "EmployeeName 110811\n",
+ "JobTitle 2159\n",
+ "BasePay 109489\n",
+ "OvertimePay 65998\n",
+ "OtherPay 83225\n",
+ "Benefits 98465\n",
+ "TotalPay 138486\n",
+ "TotalPayBenefits 142098\n",
+ "Year 4\n",
+ "Notes 0\n",
+ "Agency 1\n",
+ "Status 0\n",
+ "dtype: int64"
+ ]
+ },
+ "execution_count": 68,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "d1.nunique()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "mean of total pay of all people based on year"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 69,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "Year\n",
+ "2011 71744.103871\n",
+ "2012 74113.262265\n",
+ "2013 77611.443142\n",
+ "2014 75463.918140\n",
+ "Name: TotalPay, dtype: float64"
+ ]
+ },
+ "execution_count": 69,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "d1.groupby('Year').mean()['TotalPay']"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "how many people have 0 overtime pay"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 70,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "77321"
+ ]
+ },
+ "execution_count": 70,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "len(d1[d1['OvertimePay']==0]['OvertimePay'])"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "max, min, mean, median and other stats of TotalPay of people having 0 OvertimePay"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 71,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "count 77321.000000\n",
+ "mean 60229.348901\n",
+ "std 49307.912350\n",
+ "min -618.130000\n",
+ "25% 13290.450000\n",
+ "50% 58158.590000\n",
+ "75% 91115.090000\n",
+ "max 567595.430000\n",
+ "Name: TotalPay, dtype: float64"
+ ]
+ },
+ "execution_count": 71,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "d1[d1['OvertimePay']==0]['TotalPay'].describe()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "find Id of that person with max TotalPay you got in previous question"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 72,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "Int64Index([1], dtype='int64', name='Id')"
+ ]
+ },
+ "execution_count": 72,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "f1=d1[d1['OvertimePay']==0]['TotalPay'].max()\n",
+ "d1[d1['TotalPay']==f1].index"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 73,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "Int64Index([1], dtype='int64', name='Id')"
+ ]
+ },
+ "execution_count": 73,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "d1[d1['TotalPay']==567595.430000].index\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "name of employee with total pay benefits = 87619.78"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 74,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "Id\n",
+ "12346 REBECCA CHIU\n",
+ "Name: EmployeeName, dtype: object"
+ ]
+ },
+ "execution_count": 74,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "d1[d1['TotalPayBenefits']==87619.78]['EmployeeName']"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "how many people have BasePay > 150000 and OvertimePay > 100000"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 75,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "12"
+ ]
+ },
+ "execution_count": 75,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "d1[(d1['BasePay']> 150000) & (d1['OvertimePay'] > 100000)].count()['EmployeeName']"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "which job title generally has highest average TotalPayBenefits"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 76,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "'GENERAL MANAGER-METROPOLITAN TRANSIT AUTHORITY'"
+ ]
+ },
+ "execution_count": 76,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "f1=d1.loc[d1['TotalPayBenefits'].idxmax()]\n",
+ "f1['JobTitle']"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "How many employees are POLICE"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 77,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "2512"
+ ]
+ },
+ "execution_count": 77,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# .str.contains()\n",
+ "len(d1[d1['JobTitle'].str.contains('POLICE')])\n"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.7.6"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
From aac7cbe524dae4717e1abd1dc66b9e4b540f495e Mon Sep 17 00:00:00 2001
From: Rachitha <47741826+Rachithaa@users.noreply.github.com>
Date: Sun, 6 Sep 2020 21:52:41 +0530
Subject: [PATCH 5/6] Delete Assignment2 pandas-RACHITHA.ipynb
---
Assignment2 pandas-RACHITHA.ipynb | 1384 -----------------------------
1 file changed, 1384 deletions(-)
delete mode 100644 Assignment2 pandas-RACHITHA.ipynb
diff --git a/Assignment2 pandas-RACHITHA.ipynb b/Assignment2 pandas-RACHITHA.ipynb
deleted file mode 100644
index 3b934bf..0000000
--- a/Assignment2 pandas-RACHITHA.ipynb
+++ /dev/null
@@ -1,1384 +0,0 @@
-{
- "cells": [
- {
- "cell_type": "code",
- "execution_count": 58,
- "metadata": {},
- "outputs": [],
- "source": [
- "import numpy as np\n",
- "import pandas as pd\n",
- "#%matplotlib notebook\n",
- "%matplotlib inline"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "import the dataset into a dataframe"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 59,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " EmployeeName | \n",
- " JobTitle | \n",
- " BasePay | \n",
- " OvertimePay | \n",
- " OtherPay | \n",
- " Benefits | \n",
- " TotalPay | \n",
- " TotalPayBenefits | \n",
- " Year | \n",
- " Notes | \n",
- " Agency | \n",
- " Status | \n",
- "
\n",
- " \n",
- " | Id | \n",
- " | \n",
- " | \n",
- " | \n",
- " | \n",
- " | \n",
- " | \n",
- " | \n",
- " | \n",
- " | \n",
- " | \n",
- " | \n",
- " | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " | 1 | \n",
- " NATHANIEL FORD | \n",
- " GENERAL MANAGER-METROPOLITAN TRANSIT AUTHORITY | \n",
- " 167411.18 | \n",
- " 0.00 | \n",
- " 400184.25 | \n",
- " NaN | \n",
- " 567595.43 | \n",
- " 567595.43 | \n",
- " 2011 | \n",
- " NaN | \n",
- " San Francisco | \n",
- " NaN | \n",
- "
\n",
- " \n",
- " | 2 | \n",
- " GARY JIMENEZ | \n",
- " CAPTAIN III (POLICE DEPARTMENT) | \n",
- " 155966.02 | \n",
- " 245131.88 | \n",
- " 137811.38 | \n",
- " NaN | \n",
- " 538909.28 | \n",
- " 538909.28 | \n",
- " 2011 | \n",
- " NaN | \n",
- " San Francisco | \n",
- " NaN | \n",
- "
\n",
- " \n",
- " | 3 | \n",
- " ALBERT PARDINI | \n",
- " CAPTAIN III (POLICE DEPARTMENT) | \n",
- " 212739.13 | \n",
- " 106088.18 | \n",
- " 16452.60 | \n",
- " NaN | \n",
- " 335279.91 | \n",
- " 335279.91 | \n",
- " 2011 | \n",
- " NaN | \n",
- " San Francisco | \n",
- " NaN | \n",
- "
\n",
- " \n",
- " | 4 | \n",
- " CHRISTOPHER CHONG | \n",
- " WIRE ROPE CABLE MAINTENANCE MECHANIC | \n",
- " 77916.00 | \n",
- " 56120.71 | \n",
- " 198306.90 | \n",
- " NaN | \n",
- " 332343.61 | \n",
- " 332343.61 | \n",
- " 2011 | \n",
- " NaN | \n",
- " San Francisco | \n",
- " NaN | \n",
- "
\n",
- " \n",
- " | 5 | \n",
- " PATRICK GARDNER | \n",
- " DEPUTY CHIEF OF DEPARTMENT,(FIRE DEPARTMENT) | \n",
- " 134401.60 | \n",
- " 9737.00 | \n",
- " 182234.59 | \n",
- " NaN | \n",
- " 326373.19 | \n",
- " 326373.19 | \n",
- " 2011 | \n",
- " NaN | \n",
- " San Francisco | \n",
- " NaN | \n",
- "
\n",
- " \n",
- " | ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- "
\n",
- " \n",
- " | 148650 | \n",
- " Roy I Tillery | \n",
- " Custodian | \n",
- " 0.00 | \n",
- " 0.00 | \n",
- " 0.00 | \n",
- " 0.0 | \n",
- " 0.00 | \n",
- " 0.00 | \n",
- " 2014 | \n",
- " NaN | \n",
- " San Francisco | \n",
- " NaN | \n",
- "
\n",
- " \n",
- " | 148651 | \n",
- " Not provided | \n",
- " Not provided | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " 0.00 | \n",
- " 0.00 | \n",
- " 2014 | \n",
- " NaN | \n",
- " San Francisco | \n",
- " NaN | \n",
- "
\n",
- " \n",
- " | 148652 | \n",
- " Not provided | \n",
- " Not provided | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " 0.00 | \n",
- " 0.00 | \n",
- " 2014 | \n",
- " NaN | \n",
- " San Francisco | \n",
- " NaN | \n",
- "
\n",
- " \n",
- " | 148653 | \n",
- " Not provided | \n",
- " Not provided | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " 0.00 | \n",
- " 0.00 | \n",
- " 2014 | \n",
- " NaN | \n",
- " San Francisco | \n",
- " NaN | \n",
- "
\n",
- " \n",
- " | 148654 | \n",
- " Joe Lopez | \n",
- " Counselor, Log Cabin Ranch | \n",
- " 0.00 | \n",
- " 0.00 | \n",
- " -618.13 | \n",
- " 0.0 | \n",
- " -618.13 | \n",
- " -618.13 | \n",
- " 2014 | \n",
- " NaN | \n",
- " San Francisco | \n",
- " NaN | \n",
- "
\n",
- " \n",
- "
\n",
- "
148654 rows × 12 columns
\n",
- "
"
- ],
- "text/plain": [
- " EmployeeName JobTitle \\\n",
- "Id \n",
- "1 NATHANIEL FORD GENERAL MANAGER-METROPOLITAN TRANSIT AUTHORITY \n",
- "2 GARY JIMENEZ CAPTAIN III (POLICE DEPARTMENT) \n",
- "3 ALBERT PARDINI CAPTAIN III (POLICE DEPARTMENT) \n",
- "4 CHRISTOPHER CHONG WIRE ROPE CABLE MAINTENANCE MECHANIC \n",
- "5 PATRICK GARDNER DEPUTY CHIEF OF DEPARTMENT,(FIRE DEPARTMENT) \n",
- "... ... ... \n",
- "148650 Roy I Tillery Custodian \n",
- "148651 Not provided Not provided \n",
- "148652 Not provided Not provided \n",
- "148653 Not provided Not provided \n",
- "148654 Joe Lopez Counselor, Log Cabin Ranch \n",
- "\n",
- " BasePay OvertimePay OtherPay Benefits TotalPay \\\n",
- "Id \n",
- "1 167411.18 0.00 400184.25 NaN 567595.43 \n",
- "2 155966.02 245131.88 137811.38 NaN 538909.28 \n",
- "3 212739.13 106088.18 16452.60 NaN 335279.91 \n",
- "4 77916.00 56120.71 198306.90 NaN 332343.61 \n",
- "5 134401.60 9737.00 182234.59 NaN 326373.19 \n",
- "... ... ... ... ... ... \n",
- "148650 0.00 0.00 0.00 0.0 0.00 \n",
- "148651 NaN NaN NaN NaN 0.00 \n",
- "148652 NaN NaN NaN NaN 0.00 \n",
- "148653 NaN NaN NaN NaN 0.00 \n",
- "148654 0.00 0.00 -618.13 0.0 -618.13 \n",
- "\n",
- " TotalPayBenefits Year Notes Agency Status \n",
- "Id \n",
- "1 567595.43 2011 NaN San Francisco NaN \n",
- "2 538909.28 2011 NaN San Francisco NaN \n",
- "3 335279.91 2011 NaN San Francisco NaN \n",
- "4 332343.61 2011 NaN San Francisco NaN \n",
- "5 326373.19 2011 NaN San Francisco NaN \n",
- "... ... ... ... ... ... \n",
- "148650 0.00 2014 NaN San Francisco NaN \n",
- "148651 0.00 2014 NaN San Francisco NaN \n",
- "148652 0.00 2014 NaN San Francisco NaN \n",
- "148653 0.00 2014 NaN San Francisco NaN \n",
- "148654 -618.13 2014 NaN San Francisco NaN \n",
- "\n",
- "[148654 rows x 12 columns]"
- ]
- },
- "execution_count": 59,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "d1=pd.read_csv('Salaries.csv',index_col='Id')\n",
- "d1"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 60,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " EmployeeName | \n",
- " JobTitle | \n",
- " BasePay | \n",
- " OvertimePay | \n",
- " OtherPay | \n",
- " Benefits | \n",
- " TotalPay | \n",
- " TotalPayBenefits | \n",
- " Year | \n",
- " Notes | \n",
- " Agency | \n",
- " Status | \n",
- "
\n",
- " \n",
- " | Id | \n",
- " | \n",
- " | \n",
- " | \n",
- " | \n",
- " | \n",
- " | \n",
- " | \n",
- " | \n",
- " | \n",
- " | \n",
- " | \n",
- " | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " | 1 | \n",
- " NATHANIEL FORD | \n",
- " GENERAL MANAGER-METROPOLITAN TRANSIT AUTHORITY | \n",
- " 167411.18 | \n",
- " 0.00 | \n",
- " 400184.25 | \n",
- " NaN | \n",
- " 567595.43 | \n",
- " 567595.43 | \n",
- " 2011 | \n",
- " NaN | \n",
- " San Francisco | \n",
- " NaN | \n",
- "
\n",
- " \n",
- " | 2 | \n",
- " GARY JIMENEZ | \n",
- " CAPTAIN III (POLICE DEPARTMENT) | \n",
- " 155966.02 | \n",
- " 245131.88 | \n",
- " 137811.38 | \n",
- " NaN | \n",
- " 538909.28 | \n",
- " 538909.28 | \n",
- " 2011 | \n",
- " NaN | \n",
- " San Francisco | \n",
- " NaN | \n",
- "
\n",
- " \n",
- " | 3 | \n",
- " ALBERT PARDINI | \n",
- " CAPTAIN III (POLICE DEPARTMENT) | \n",
- " 212739.13 | \n",
- " 106088.18 | \n",
- " 16452.60 | \n",
- " NaN | \n",
- " 335279.91 | \n",
- " 335279.91 | \n",
- " 2011 | \n",
- " NaN | \n",
- " San Francisco | \n",
- " NaN | \n",
- "
\n",
- " \n",
- " | 4 | \n",
- " CHRISTOPHER CHONG | \n",
- " WIRE ROPE CABLE MAINTENANCE MECHANIC | \n",
- " 77916.00 | \n",
- " 56120.71 | \n",
- " 198306.90 | \n",
- " NaN | \n",
- " 332343.61 | \n",
- " 332343.61 | \n",
- " 2011 | \n",
- " NaN | \n",
- " San Francisco | \n",
- " NaN | \n",
- "
\n",
- " \n",
- " | 5 | \n",
- " PATRICK GARDNER | \n",
- " DEPUTY CHIEF OF DEPARTMENT,(FIRE DEPARTMENT) | \n",
- " 134401.60 | \n",
- " 9737.00 | \n",
- " 182234.59 | \n",
- " NaN | \n",
- " 326373.19 | \n",
- " 326373.19 | \n",
- " 2011 | \n",
- " NaN | \n",
- " San Francisco | \n",
- " NaN | \n",
- "
\n",
- " \n",
- "
\n",
- "
"
- ],
- "text/plain": [
- " EmployeeName JobTitle \\\n",
- "Id \n",
- "1 NATHANIEL FORD GENERAL MANAGER-METROPOLITAN TRANSIT AUTHORITY \n",
- "2 GARY JIMENEZ CAPTAIN III (POLICE DEPARTMENT) \n",
- "3 ALBERT PARDINI CAPTAIN III (POLICE DEPARTMENT) \n",
- "4 CHRISTOPHER CHONG WIRE ROPE CABLE MAINTENANCE MECHANIC \n",
- "5 PATRICK GARDNER DEPUTY CHIEF OF DEPARTMENT,(FIRE DEPARTMENT) \n",
- "\n",
- " BasePay OvertimePay OtherPay Benefits TotalPay TotalPayBenefits \\\n",
- "Id \n",
- "1 167411.18 0.00 400184.25 NaN 567595.43 567595.43 \n",
- "2 155966.02 245131.88 137811.38 NaN 538909.28 538909.28 \n",
- "3 212739.13 106088.18 16452.60 NaN 335279.91 335279.91 \n",
- "4 77916.00 56120.71 198306.90 NaN 332343.61 332343.61 \n",
- "5 134401.60 9737.00 182234.59 NaN 326373.19 326373.19 \n",
- "\n",
- " Year Notes Agency Status \n",
- "Id \n",
- "1 2011 NaN San Francisco NaN \n",
- "2 2011 NaN San Francisco NaN \n",
- "3 2011 NaN San Francisco NaN \n",
- "4 2011 NaN San Francisco NaN \n",
- "5 2011 NaN San Francisco NaN "
- ]
- },
- "execution_count": 60,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "d1.head()"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "display the column names"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 61,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "Index(['EmployeeName', 'JobTitle', 'BasePay', 'OvertimePay', 'OtherPay',\n",
- " 'Benefits', 'TotalPay', 'TotalPayBenefits', 'Year', 'Notes', 'Agency',\n",
- " 'Status'],\n",
- " dtype='object')"
- ]
- },
- "execution_count": 61,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "d1.columns"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "display the number of rows and cols"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 62,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "rows: 148654\n",
- "columns: 12\n"
- ]
- }
- ],
- "source": [
- "print(\"rows: \"+str(len(d1.axes[0])))\n",
- "print(\"columns: \"+str(len(d1.axes[1])))"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 63,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "(148654, 12)"
- ]
- },
- "execution_count": 63,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "d1.shape"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "display the dataframe info (types of data in columns and not null values etc.)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 64,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "\n",
- "Int64Index: 148654 entries, 1 to 148654\n",
- "Data columns (total 12 columns):\n",
- " # Column Non-Null Count Dtype \n",
- "--- ------ -------------- ----- \n",
- " 0 EmployeeName 148654 non-null object \n",
- " 1 JobTitle 148654 non-null object \n",
- " 2 BasePay 148045 non-null float64\n",
- " 3 OvertimePay 148650 non-null float64\n",
- " 4 OtherPay 148650 non-null float64\n",
- " 5 Benefits 112491 non-null float64\n",
- " 6 TotalPay 148654 non-null float64\n",
- " 7 TotalPayBenefits 148654 non-null float64\n",
- " 8 Year 148654 non-null int64 \n",
- " 9 Notes 0 non-null float64\n",
- " 10 Agency 148654 non-null object \n",
- " 11 Status 0 non-null float64\n",
- "dtypes: float64(8), int64(1), object(3)\n",
- "memory usage: 14.7+ MB\n"
- ]
- }
- ],
- "source": [
- "d1.info()"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "display stats of the dataframe like count, mean, std, max, 25% etc....."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 65,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " BasePay | \n",
- " OvertimePay | \n",
- " OtherPay | \n",
- " Benefits | \n",
- " TotalPay | \n",
- " TotalPayBenefits | \n",
- " Year | \n",
- " Notes | \n",
- " Status | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " | count | \n",
- " 148045.000000 | \n",
- " 148650.000000 | \n",
- " 148650.000000 | \n",
- " 112491.000000 | \n",
- " 148654.000000 | \n",
- " 148654.000000 | \n",
- " 148654.000000 | \n",
- " 0.0 | \n",
- " 0.0 | \n",
- "
\n",
- " \n",
- " | mean | \n",
- " 66325.448841 | \n",
- " 5066.059886 | \n",
- " 3648.767297 | \n",
- " 25007.893151 | \n",
- " 74768.321972 | \n",
- " 93692.554811 | \n",
- " 2012.522643 | \n",
- " NaN | \n",
- " NaN | \n",
- "
\n",
- " \n",
- " | std | \n",
- " 42764.635495 | \n",
- " 11454.380559 | \n",
- " 8056.601866 | \n",
- " 15402.215858 | \n",
- " 50517.005274 | \n",
- " 62793.533483 | \n",
- " 1.117538 | \n",
- " NaN | \n",
- " NaN | \n",
- "
\n",
- " \n",
- " | min | \n",
- " -166.010000 | \n",
- " -0.010000 | \n",
- " -7058.590000 | \n",
- " -33.890000 | \n",
- " -618.130000 | \n",
- " -618.130000 | \n",
- " 2011.000000 | \n",
- " NaN | \n",
- " NaN | \n",
- "
\n",
- " \n",
- " | 25% | \n",
- " 33588.200000 | \n",
- " 0.000000 | \n",
- " 0.000000 | \n",
- " 11535.395000 | \n",
- " 36168.995000 | \n",
- " 44065.650000 | \n",
- " 2012.000000 | \n",
- " NaN | \n",
- " NaN | \n",
- "
\n",
- " \n",
- " | 50% | \n",
- " 65007.450000 | \n",
- " 0.000000 | \n",
- " 811.270000 | \n",
- " 28628.620000 | \n",
- " 71426.610000 | \n",
- " 92404.090000 | \n",
- " 2013.000000 | \n",
- " NaN | \n",
- " NaN | \n",
- "
\n",
- " \n",
- " | 75% | \n",
- " 94691.050000 | \n",
- " 4658.175000 | \n",
- " 4236.065000 | \n",
- " 35566.855000 | \n",
- " 105839.135000 | \n",
- " 132876.450000 | \n",
- " 2014.000000 | \n",
- " NaN | \n",
- " NaN | \n",
- "
\n",
- " \n",
- " | max | \n",
- " 319275.010000 | \n",
- " 245131.880000 | \n",
- " 400184.250000 | \n",
- " 96570.660000 | \n",
- " 567595.430000 | \n",
- " 567595.430000 | \n",
- " 2014.000000 | \n",
- " NaN | \n",
- " NaN | \n",
- "
\n",
- " \n",
- "
\n",
- "
"
- ],
- "text/plain": [
- " BasePay OvertimePay OtherPay Benefits \\\n",
- "count 148045.000000 148650.000000 148650.000000 112491.000000 \n",
- "mean 66325.448841 5066.059886 3648.767297 25007.893151 \n",
- "std 42764.635495 11454.380559 8056.601866 15402.215858 \n",
- "min -166.010000 -0.010000 -7058.590000 -33.890000 \n",
- "25% 33588.200000 0.000000 0.000000 11535.395000 \n",
- "50% 65007.450000 0.000000 811.270000 28628.620000 \n",
- "75% 94691.050000 4658.175000 4236.065000 35566.855000 \n",
- "max 319275.010000 245131.880000 400184.250000 96570.660000 \n",
- "\n",
- " TotalPay TotalPayBenefits Year Notes Status \n",
- "count 148654.000000 148654.000000 148654.000000 0.0 0.0 \n",
- "mean 74768.321972 93692.554811 2012.522643 NaN NaN \n",
- "std 50517.005274 62793.533483 1.117538 NaN NaN \n",
- "min -618.130000 -618.130000 2011.000000 NaN NaN \n",
- "25% 36168.995000 44065.650000 2012.000000 NaN NaN \n",
- "50% 71426.610000 92404.090000 2013.000000 NaN NaN \n",
- "75% 105839.135000 132876.450000 2014.000000 NaN NaN \n",
- "max 567595.430000 567595.430000 2014.000000 NaN NaN "
- ]
- },
- "execution_count": 65,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "d1.describe()"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "display null values per column"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 66,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "EmployeeName 0\n",
- "JobTitle 0\n",
- "BasePay 609\n",
- "OvertimePay 4\n",
- "OtherPay 4\n",
- "Benefits 36163\n",
- "TotalPay 0\n",
- "TotalPayBenefits 0\n",
- "Year 0\n",
- "Notes 148654\n",
- "Agency 0\n",
- "Status 148654\n",
- "dtype: int64"
- ]
- },
- "execution_count": 66,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "d1.isna().sum()"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "remove columns will all values as NaN"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 67,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " EmployeeName | \n",
- " JobTitle | \n",
- " BasePay | \n",
- " OvertimePay | \n",
- " OtherPay | \n",
- " Benefits | \n",
- " TotalPay | \n",
- " TotalPayBenefits | \n",
- " Year | \n",
- " Agency | \n",
- "
\n",
- " \n",
- " | Id | \n",
- " | \n",
- " | \n",
- " | \n",
- " | \n",
- " | \n",
- " | \n",
- " | \n",
- " | \n",
- " | \n",
- " | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " | 1 | \n",
- " NATHANIEL FORD | \n",
- " GENERAL MANAGER-METROPOLITAN TRANSIT AUTHORITY | \n",
- " 167411.18 | \n",
- " 0.00 | \n",
- " 400184.25 | \n",
- " NaN | \n",
- " 567595.43 | \n",
- " 567595.43 | \n",
- " 2011 | \n",
- " San Francisco | \n",
- "
\n",
- " \n",
- " | 2 | \n",
- " GARY JIMENEZ | \n",
- " CAPTAIN III (POLICE DEPARTMENT) | \n",
- " 155966.02 | \n",
- " 245131.88 | \n",
- " 137811.38 | \n",
- " NaN | \n",
- " 538909.28 | \n",
- " 538909.28 | \n",
- " 2011 | \n",
- " San Francisco | \n",
- "
\n",
- " \n",
- " | 3 | \n",
- " ALBERT PARDINI | \n",
- " CAPTAIN III (POLICE DEPARTMENT) | \n",
- " 212739.13 | \n",
- " 106088.18 | \n",
- " 16452.60 | \n",
- " NaN | \n",
- " 335279.91 | \n",
- " 335279.91 | \n",
- " 2011 | \n",
- " San Francisco | \n",
- "
\n",
- " \n",
- " | 4 | \n",
- " CHRISTOPHER CHONG | \n",
- " WIRE ROPE CABLE MAINTENANCE MECHANIC | \n",
- " 77916.00 | \n",
- " 56120.71 | \n",
- " 198306.90 | \n",
- " NaN | \n",
- " 332343.61 | \n",
- " 332343.61 | \n",
- " 2011 | \n",
- " San Francisco | \n",
- "
\n",
- " \n",
- " | 5 | \n",
- " PATRICK GARDNER | \n",
- " DEPUTY CHIEF OF DEPARTMENT,(FIRE DEPARTMENT) | \n",
- " 134401.60 | \n",
- " 9737.00 | \n",
- " 182234.59 | \n",
- " NaN | \n",
- " 326373.19 | \n",
- " 326373.19 | \n",
- " 2011 | \n",
- " San Francisco | \n",
- "
\n",
- " \n",
- " | ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- "
\n",
- " \n",
- " | 148650 | \n",
- " Roy I Tillery | \n",
- " Custodian | \n",
- " 0.00 | \n",
- " 0.00 | \n",
- " 0.00 | \n",
- " 0.0 | \n",
- " 0.00 | \n",
- " 0.00 | \n",
- " 2014 | \n",
- " San Francisco | \n",
- "
\n",
- " \n",
- " | 148651 | \n",
- " Not provided | \n",
- " Not provided | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " 0.00 | \n",
- " 0.00 | \n",
- " 2014 | \n",
- " San Francisco | \n",
- "
\n",
- " \n",
- " | 148652 | \n",
- " Not provided | \n",
- " Not provided | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " 0.00 | \n",
- " 0.00 | \n",
- " 2014 | \n",
- " San Francisco | \n",
- "
\n",
- " \n",
- " | 148653 | \n",
- " Not provided | \n",
- " Not provided | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " 0.00 | \n",
- " 0.00 | \n",
- " 2014 | \n",
- " San Francisco | \n",
- "
\n",
- " \n",
- " | 148654 | \n",
- " Joe Lopez | \n",
- " Counselor, Log Cabin Ranch | \n",
- " 0.00 | \n",
- " 0.00 | \n",
- " -618.13 | \n",
- " 0.0 | \n",
- " -618.13 | \n",
- " -618.13 | \n",
- " 2014 | \n",
- " San Francisco | \n",
- "
\n",
- " \n",
- "
\n",
- "
148654 rows × 10 columns
\n",
- "
"
- ],
- "text/plain": [
- " EmployeeName JobTitle \\\n",
- "Id \n",
- "1 NATHANIEL FORD GENERAL MANAGER-METROPOLITAN TRANSIT AUTHORITY \n",
- "2 GARY JIMENEZ CAPTAIN III (POLICE DEPARTMENT) \n",
- "3 ALBERT PARDINI CAPTAIN III (POLICE DEPARTMENT) \n",
- "4 CHRISTOPHER CHONG WIRE ROPE CABLE MAINTENANCE MECHANIC \n",
- "5 PATRICK GARDNER DEPUTY CHIEF OF DEPARTMENT,(FIRE DEPARTMENT) \n",
- "... ... ... \n",
- "148650 Roy I Tillery Custodian \n",
- "148651 Not provided Not provided \n",
- "148652 Not provided Not provided \n",
- "148653 Not provided Not provided \n",
- "148654 Joe Lopez Counselor, Log Cabin Ranch \n",
- "\n",
- " BasePay OvertimePay OtherPay Benefits TotalPay \\\n",
- "Id \n",
- "1 167411.18 0.00 400184.25 NaN 567595.43 \n",
- "2 155966.02 245131.88 137811.38 NaN 538909.28 \n",
- "3 212739.13 106088.18 16452.60 NaN 335279.91 \n",
- "4 77916.00 56120.71 198306.90 NaN 332343.61 \n",
- "5 134401.60 9737.00 182234.59 NaN 326373.19 \n",
- "... ... ... ... ... ... \n",
- "148650 0.00 0.00 0.00 0.0 0.00 \n",
- "148651 NaN NaN NaN NaN 0.00 \n",
- "148652 NaN NaN NaN NaN 0.00 \n",
- "148653 NaN NaN NaN NaN 0.00 \n",
- "148654 0.00 0.00 -618.13 0.0 -618.13 \n",
- "\n",
- " TotalPayBenefits Year Agency \n",
- "Id \n",
- "1 567595.43 2011 San Francisco \n",
- "2 538909.28 2011 San Francisco \n",
- "3 335279.91 2011 San Francisco \n",
- "4 332343.61 2011 San Francisco \n",
- "5 326373.19 2011 San Francisco \n",
- "... ... ... ... \n",
- "148650 0.00 2014 San Francisco \n",
- "148651 0.00 2014 San Francisco \n",
- "148652 0.00 2014 San Francisco \n",
- "148653 0.00 2014 San Francisco \n",
- "148654 -618.13 2014 San Francisco \n",
- "\n",
- "[148654 rows x 10 columns]"
- ]
- },
- "execution_count": 67,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "d1.dropna(how='all',axis=1)"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "display number of unique values in each column"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 68,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "EmployeeName 110811\n",
- "JobTitle 2159\n",
- "BasePay 109489\n",
- "OvertimePay 65998\n",
- "OtherPay 83225\n",
- "Benefits 98465\n",
- "TotalPay 138486\n",
- "TotalPayBenefits 142098\n",
- "Year 4\n",
- "Notes 0\n",
- "Agency 1\n",
- "Status 0\n",
- "dtype: int64"
- ]
- },
- "execution_count": 68,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "d1.nunique()"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "mean of total pay of all people based on year"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 69,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "Year\n",
- "2011 71744.103871\n",
- "2012 74113.262265\n",
- "2013 77611.443142\n",
- "2014 75463.918140\n",
- "Name: TotalPay, dtype: float64"
- ]
- },
- "execution_count": 69,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "d1.groupby('Year').mean()['TotalPay']"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "how many people have 0 overtime pay"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 70,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "77321"
- ]
- },
- "execution_count": 70,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "len(d1[d1['OvertimePay']==0]['OvertimePay'])"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "max, min, mean, median and other stats of TotalPay of people having 0 OvertimePay"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 71,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "count 77321.000000\n",
- "mean 60229.348901\n",
- "std 49307.912350\n",
- "min -618.130000\n",
- "25% 13290.450000\n",
- "50% 58158.590000\n",
- "75% 91115.090000\n",
- "max 567595.430000\n",
- "Name: TotalPay, dtype: float64"
- ]
- },
- "execution_count": 71,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "d1[d1['OvertimePay']==0]['TotalPay'].describe()"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "find Id of that person with max TotalPay you got in previous question"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 72,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "Int64Index([1], dtype='int64', name='Id')"
- ]
- },
- "execution_count": 72,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "f1=d1[d1['OvertimePay']==0]['TotalPay'].max()\n",
- "d1[d1['TotalPay']==f1].index"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 73,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "Int64Index([1], dtype='int64', name='Id')"
- ]
- },
- "execution_count": 73,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "d1[d1['TotalPay']==567595.430000].index\n"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "name of employee with total pay benefits = 87619.78"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 74,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "Id\n",
- "12346 REBECCA CHIU\n",
- "Name: EmployeeName, dtype: object"
- ]
- },
- "execution_count": 74,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "d1[d1['TotalPayBenefits']==87619.78]['EmployeeName']"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "how many people have BasePay > 150000 and OvertimePay > 100000"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 75,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "12"
- ]
- },
- "execution_count": 75,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "d1[(d1['BasePay']> 150000) & (d1['OvertimePay'] > 100000)].count()['EmployeeName']"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "which job title generally has highest average TotalPayBenefits"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 76,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "'GENERAL MANAGER-METROPOLITAN TRANSIT AUTHORITY'"
- ]
- },
- "execution_count": 76,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "f1=d1.loc[d1['TotalPayBenefits'].idxmax()]\n",
- "f1['JobTitle']"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "How many employees are POLICE"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 77,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "2512"
- ]
- },
- "execution_count": 77,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "# .str.contains()\n",
- "len(d1[d1['JobTitle'].str.contains('POLICE')])\n"
- ]
- }
- ],
- "metadata": {
- "kernelspec": {
- "display_name": "Python 3",
- "language": "python",
- "name": "python3"
- },
- "language_info": {
- "codemirror_mode": {
- "name": "ipython",
- "version": 3
- },
- "file_extension": ".py",
- "mimetype": "text/x-python",
- "name": "python",
- "nbconvert_exporter": "python",
- "pygments_lexer": "ipython3",
- "version": "3.7.6"
- }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}
From 6adcb577f4c672b94fcc34974a9ea0c8778d143c Mon Sep 17 00:00:00 2001
From: Rachitha <47741826+Rachithaa@users.noreply.github.com>
Date: Sun, 6 Sep 2020 21:53:26 +0530
Subject: [PATCH 6/6] RACHITHA-SECOND ASSIGNMENT
---
Assignment2 pandas-RACHITHA.ipynb | 1384 +++++++++++++++++++++++++++++
1 file changed, 1384 insertions(+)
create mode 100644 Assignment2 pandas-RACHITHA.ipynb
diff --git a/Assignment2 pandas-RACHITHA.ipynb b/Assignment2 pandas-RACHITHA.ipynb
new file mode 100644
index 0000000..3b934bf
--- /dev/null
+++ b/Assignment2 pandas-RACHITHA.ipynb
@@ -0,0 +1,1384 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 58,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import numpy as np\n",
+ "import pandas as pd\n",
+ "#%matplotlib notebook\n",
+ "%matplotlib inline"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "import the dataset into a dataframe"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 59,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " EmployeeName | \n",
+ " JobTitle | \n",
+ " BasePay | \n",
+ " OvertimePay | \n",
+ " OtherPay | \n",
+ " Benefits | \n",
+ " TotalPay | \n",
+ " TotalPayBenefits | \n",
+ " Year | \n",
+ " Notes | \n",
+ " Agency | \n",
+ " Status | \n",
+ "
\n",
+ " \n",
+ " | Id | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 1 | \n",
+ " NATHANIEL FORD | \n",
+ " GENERAL MANAGER-METROPOLITAN TRANSIT AUTHORITY | \n",
+ " 167411.18 | \n",
+ " 0.00 | \n",
+ " 400184.25 | \n",
+ " NaN | \n",
+ " 567595.43 | \n",
+ " 567595.43 | \n",
+ " 2011 | \n",
+ " NaN | \n",
+ " San Francisco | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " GARY JIMENEZ | \n",
+ " CAPTAIN III (POLICE DEPARTMENT) | \n",
+ " 155966.02 | \n",
+ " 245131.88 | \n",
+ " 137811.38 | \n",
+ " NaN | \n",
+ " 538909.28 | \n",
+ " 538909.28 | \n",
+ " 2011 | \n",
+ " NaN | \n",
+ " San Francisco | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " ALBERT PARDINI | \n",
+ " CAPTAIN III (POLICE DEPARTMENT) | \n",
+ " 212739.13 | \n",
+ " 106088.18 | \n",
+ " 16452.60 | \n",
+ " NaN | \n",
+ " 335279.91 | \n",
+ " 335279.91 | \n",
+ " 2011 | \n",
+ " NaN | \n",
+ " San Francisco | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " CHRISTOPHER CHONG | \n",
+ " WIRE ROPE CABLE MAINTENANCE MECHANIC | \n",
+ " 77916.00 | \n",
+ " 56120.71 | \n",
+ " 198306.90 | \n",
+ " NaN | \n",
+ " 332343.61 | \n",
+ " 332343.61 | \n",
+ " 2011 | \n",
+ " NaN | \n",
+ " San Francisco | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 5 | \n",
+ " PATRICK GARDNER | \n",
+ " DEPUTY CHIEF OF DEPARTMENT,(FIRE DEPARTMENT) | \n",
+ " 134401.60 | \n",
+ " 9737.00 | \n",
+ " 182234.59 | \n",
+ " NaN | \n",
+ " 326373.19 | \n",
+ " 326373.19 | \n",
+ " 2011 | \n",
+ " NaN | \n",
+ " San Francisco | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " | 148650 | \n",
+ " Roy I Tillery | \n",
+ " Custodian | \n",
+ " 0.00 | \n",
+ " 0.00 | \n",
+ " 0.00 | \n",
+ " 0.0 | \n",
+ " 0.00 | \n",
+ " 0.00 | \n",
+ " 2014 | \n",
+ " NaN | \n",
+ " San Francisco | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 148651 | \n",
+ " Not provided | \n",
+ " Not provided | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0.00 | \n",
+ " 0.00 | \n",
+ " 2014 | \n",
+ " NaN | \n",
+ " San Francisco | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 148652 | \n",
+ " Not provided | \n",
+ " Not provided | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0.00 | \n",
+ " 0.00 | \n",
+ " 2014 | \n",
+ " NaN | \n",
+ " San Francisco | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 148653 | \n",
+ " Not provided | \n",
+ " Not provided | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0.00 | \n",
+ " 0.00 | \n",
+ " 2014 | \n",
+ " NaN | \n",
+ " San Francisco | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 148654 | \n",
+ " Joe Lopez | \n",
+ " Counselor, Log Cabin Ranch | \n",
+ " 0.00 | \n",
+ " 0.00 | \n",
+ " -618.13 | \n",
+ " 0.0 | \n",
+ " -618.13 | \n",
+ " -618.13 | \n",
+ " 2014 | \n",
+ " NaN | \n",
+ " San Francisco | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
148654 rows × 12 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " EmployeeName JobTitle \\\n",
+ "Id \n",
+ "1 NATHANIEL FORD GENERAL MANAGER-METROPOLITAN TRANSIT AUTHORITY \n",
+ "2 GARY JIMENEZ CAPTAIN III (POLICE DEPARTMENT) \n",
+ "3 ALBERT PARDINI CAPTAIN III (POLICE DEPARTMENT) \n",
+ "4 CHRISTOPHER CHONG WIRE ROPE CABLE MAINTENANCE MECHANIC \n",
+ "5 PATRICK GARDNER DEPUTY CHIEF OF DEPARTMENT,(FIRE DEPARTMENT) \n",
+ "... ... ... \n",
+ "148650 Roy I Tillery Custodian \n",
+ "148651 Not provided Not provided \n",
+ "148652 Not provided Not provided \n",
+ "148653 Not provided Not provided \n",
+ "148654 Joe Lopez Counselor, Log Cabin Ranch \n",
+ "\n",
+ " BasePay OvertimePay OtherPay Benefits TotalPay \\\n",
+ "Id \n",
+ "1 167411.18 0.00 400184.25 NaN 567595.43 \n",
+ "2 155966.02 245131.88 137811.38 NaN 538909.28 \n",
+ "3 212739.13 106088.18 16452.60 NaN 335279.91 \n",
+ "4 77916.00 56120.71 198306.90 NaN 332343.61 \n",
+ "5 134401.60 9737.00 182234.59 NaN 326373.19 \n",
+ "... ... ... ... ... ... \n",
+ "148650 0.00 0.00 0.00 0.0 0.00 \n",
+ "148651 NaN NaN NaN NaN 0.00 \n",
+ "148652 NaN NaN NaN NaN 0.00 \n",
+ "148653 NaN NaN NaN NaN 0.00 \n",
+ "148654 0.00 0.00 -618.13 0.0 -618.13 \n",
+ "\n",
+ " TotalPayBenefits Year Notes Agency Status \n",
+ "Id \n",
+ "1 567595.43 2011 NaN San Francisco NaN \n",
+ "2 538909.28 2011 NaN San Francisco NaN \n",
+ "3 335279.91 2011 NaN San Francisco NaN \n",
+ "4 332343.61 2011 NaN San Francisco NaN \n",
+ "5 326373.19 2011 NaN San Francisco NaN \n",
+ "... ... ... ... ... ... \n",
+ "148650 0.00 2014 NaN San Francisco NaN \n",
+ "148651 0.00 2014 NaN San Francisco NaN \n",
+ "148652 0.00 2014 NaN San Francisco NaN \n",
+ "148653 0.00 2014 NaN San Francisco NaN \n",
+ "148654 -618.13 2014 NaN San Francisco NaN \n",
+ "\n",
+ "[148654 rows x 12 columns]"
+ ]
+ },
+ "execution_count": 59,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "d1=pd.read_csv('Salaries.csv',index_col='Id')\n",
+ "d1"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 60,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " EmployeeName | \n",
+ " JobTitle | \n",
+ " BasePay | \n",
+ " OvertimePay | \n",
+ " OtherPay | \n",
+ " Benefits | \n",
+ " TotalPay | \n",
+ " TotalPayBenefits | \n",
+ " Year | \n",
+ " Notes | \n",
+ " Agency | \n",
+ " Status | \n",
+ "
\n",
+ " \n",
+ " | Id | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 1 | \n",
+ " NATHANIEL FORD | \n",
+ " GENERAL MANAGER-METROPOLITAN TRANSIT AUTHORITY | \n",
+ " 167411.18 | \n",
+ " 0.00 | \n",
+ " 400184.25 | \n",
+ " NaN | \n",
+ " 567595.43 | \n",
+ " 567595.43 | \n",
+ " 2011 | \n",
+ " NaN | \n",
+ " San Francisco | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " GARY JIMENEZ | \n",
+ " CAPTAIN III (POLICE DEPARTMENT) | \n",
+ " 155966.02 | \n",
+ " 245131.88 | \n",
+ " 137811.38 | \n",
+ " NaN | \n",
+ " 538909.28 | \n",
+ " 538909.28 | \n",
+ " 2011 | \n",
+ " NaN | \n",
+ " San Francisco | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " ALBERT PARDINI | \n",
+ " CAPTAIN III (POLICE DEPARTMENT) | \n",
+ " 212739.13 | \n",
+ " 106088.18 | \n",
+ " 16452.60 | \n",
+ " NaN | \n",
+ " 335279.91 | \n",
+ " 335279.91 | \n",
+ " 2011 | \n",
+ " NaN | \n",
+ " San Francisco | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " CHRISTOPHER CHONG | \n",
+ " WIRE ROPE CABLE MAINTENANCE MECHANIC | \n",
+ " 77916.00 | \n",
+ " 56120.71 | \n",
+ " 198306.90 | \n",
+ " NaN | \n",
+ " 332343.61 | \n",
+ " 332343.61 | \n",
+ " 2011 | \n",
+ " NaN | \n",
+ " San Francisco | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 5 | \n",
+ " PATRICK GARDNER | \n",
+ " DEPUTY CHIEF OF DEPARTMENT,(FIRE DEPARTMENT) | \n",
+ " 134401.60 | \n",
+ " 9737.00 | \n",
+ " 182234.59 | \n",
+ " NaN | \n",
+ " 326373.19 | \n",
+ " 326373.19 | \n",
+ " 2011 | \n",
+ " NaN | \n",
+ " San Francisco | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " EmployeeName JobTitle \\\n",
+ "Id \n",
+ "1 NATHANIEL FORD GENERAL MANAGER-METROPOLITAN TRANSIT AUTHORITY \n",
+ "2 GARY JIMENEZ CAPTAIN III (POLICE DEPARTMENT) \n",
+ "3 ALBERT PARDINI CAPTAIN III (POLICE DEPARTMENT) \n",
+ "4 CHRISTOPHER CHONG WIRE ROPE CABLE MAINTENANCE MECHANIC \n",
+ "5 PATRICK GARDNER DEPUTY CHIEF OF DEPARTMENT,(FIRE DEPARTMENT) \n",
+ "\n",
+ " BasePay OvertimePay OtherPay Benefits TotalPay TotalPayBenefits \\\n",
+ "Id \n",
+ "1 167411.18 0.00 400184.25 NaN 567595.43 567595.43 \n",
+ "2 155966.02 245131.88 137811.38 NaN 538909.28 538909.28 \n",
+ "3 212739.13 106088.18 16452.60 NaN 335279.91 335279.91 \n",
+ "4 77916.00 56120.71 198306.90 NaN 332343.61 332343.61 \n",
+ "5 134401.60 9737.00 182234.59 NaN 326373.19 326373.19 \n",
+ "\n",
+ " Year Notes Agency Status \n",
+ "Id \n",
+ "1 2011 NaN San Francisco NaN \n",
+ "2 2011 NaN San Francisco NaN \n",
+ "3 2011 NaN San Francisco NaN \n",
+ "4 2011 NaN San Francisco NaN \n",
+ "5 2011 NaN San Francisco NaN "
+ ]
+ },
+ "execution_count": 60,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "d1.head()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "display the column names"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 61,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "Index(['EmployeeName', 'JobTitle', 'BasePay', 'OvertimePay', 'OtherPay',\n",
+ " 'Benefits', 'TotalPay', 'TotalPayBenefits', 'Year', 'Notes', 'Agency',\n",
+ " 'Status'],\n",
+ " dtype='object')"
+ ]
+ },
+ "execution_count": 61,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "d1.columns"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "display the number of rows and cols"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 62,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "rows: 148654\n",
+ "columns: 12\n"
+ ]
+ }
+ ],
+ "source": [
+ "print(\"rows: \"+str(len(d1.axes[0])))\n",
+ "print(\"columns: \"+str(len(d1.axes[1])))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 63,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "(148654, 12)"
+ ]
+ },
+ "execution_count": 63,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "d1.shape"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "display the dataframe info (types of data in columns and not null values etc.)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 64,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "Int64Index: 148654 entries, 1 to 148654\n",
+ "Data columns (total 12 columns):\n",
+ " # Column Non-Null Count Dtype \n",
+ "--- ------ -------------- ----- \n",
+ " 0 EmployeeName 148654 non-null object \n",
+ " 1 JobTitle 148654 non-null object \n",
+ " 2 BasePay 148045 non-null float64\n",
+ " 3 OvertimePay 148650 non-null float64\n",
+ " 4 OtherPay 148650 non-null float64\n",
+ " 5 Benefits 112491 non-null float64\n",
+ " 6 TotalPay 148654 non-null float64\n",
+ " 7 TotalPayBenefits 148654 non-null float64\n",
+ " 8 Year 148654 non-null int64 \n",
+ " 9 Notes 0 non-null float64\n",
+ " 10 Agency 148654 non-null object \n",
+ " 11 Status 0 non-null float64\n",
+ "dtypes: float64(8), int64(1), object(3)\n",
+ "memory usage: 14.7+ MB\n"
+ ]
+ }
+ ],
+ "source": [
+ "d1.info()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "display stats of the dataframe like count, mean, std, max, 25% etc....."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 65,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " BasePay | \n",
+ " OvertimePay | \n",
+ " OtherPay | \n",
+ " Benefits | \n",
+ " TotalPay | \n",
+ " TotalPayBenefits | \n",
+ " Year | \n",
+ " Notes | \n",
+ " Status | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | count | \n",
+ " 148045.000000 | \n",
+ " 148650.000000 | \n",
+ " 148650.000000 | \n",
+ " 112491.000000 | \n",
+ " 148654.000000 | \n",
+ " 148654.000000 | \n",
+ " 148654.000000 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ " | mean | \n",
+ " 66325.448841 | \n",
+ " 5066.059886 | \n",
+ " 3648.767297 | \n",
+ " 25007.893151 | \n",
+ " 74768.321972 | \n",
+ " 93692.554811 | \n",
+ " 2012.522643 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | std | \n",
+ " 42764.635495 | \n",
+ " 11454.380559 | \n",
+ " 8056.601866 | \n",
+ " 15402.215858 | \n",
+ " 50517.005274 | \n",
+ " 62793.533483 | \n",
+ " 1.117538 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | min | \n",
+ " -166.010000 | \n",
+ " -0.010000 | \n",
+ " -7058.590000 | \n",
+ " -33.890000 | \n",
+ " -618.130000 | \n",
+ " -618.130000 | \n",
+ " 2011.000000 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 25% | \n",
+ " 33588.200000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 11535.395000 | \n",
+ " 36168.995000 | \n",
+ " 44065.650000 | \n",
+ " 2012.000000 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 50% | \n",
+ " 65007.450000 | \n",
+ " 0.000000 | \n",
+ " 811.270000 | \n",
+ " 28628.620000 | \n",
+ " 71426.610000 | \n",
+ " 92404.090000 | \n",
+ " 2013.000000 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 75% | \n",
+ " 94691.050000 | \n",
+ " 4658.175000 | \n",
+ " 4236.065000 | \n",
+ " 35566.855000 | \n",
+ " 105839.135000 | \n",
+ " 132876.450000 | \n",
+ " 2014.000000 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | max | \n",
+ " 319275.010000 | \n",
+ " 245131.880000 | \n",
+ " 400184.250000 | \n",
+ " 96570.660000 | \n",
+ " 567595.430000 | \n",
+ " 567595.430000 | \n",
+ " 2014.000000 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " BasePay OvertimePay OtherPay Benefits \\\n",
+ "count 148045.000000 148650.000000 148650.000000 112491.000000 \n",
+ "mean 66325.448841 5066.059886 3648.767297 25007.893151 \n",
+ "std 42764.635495 11454.380559 8056.601866 15402.215858 \n",
+ "min -166.010000 -0.010000 -7058.590000 -33.890000 \n",
+ "25% 33588.200000 0.000000 0.000000 11535.395000 \n",
+ "50% 65007.450000 0.000000 811.270000 28628.620000 \n",
+ "75% 94691.050000 4658.175000 4236.065000 35566.855000 \n",
+ "max 319275.010000 245131.880000 400184.250000 96570.660000 \n",
+ "\n",
+ " TotalPay TotalPayBenefits Year Notes Status \n",
+ "count 148654.000000 148654.000000 148654.000000 0.0 0.0 \n",
+ "mean 74768.321972 93692.554811 2012.522643 NaN NaN \n",
+ "std 50517.005274 62793.533483 1.117538 NaN NaN \n",
+ "min -618.130000 -618.130000 2011.000000 NaN NaN \n",
+ "25% 36168.995000 44065.650000 2012.000000 NaN NaN \n",
+ "50% 71426.610000 92404.090000 2013.000000 NaN NaN \n",
+ "75% 105839.135000 132876.450000 2014.000000 NaN NaN \n",
+ "max 567595.430000 567595.430000 2014.000000 NaN NaN "
+ ]
+ },
+ "execution_count": 65,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "d1.describe()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "display null values per column"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 66,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "EmployeeName 0\n",
+ "JobTitle 0\n",
+ "BasePay 609\n",
+ "OvertimePay 4\n",
+ "OtherPay 4\n",
+ "Benefits 36163\n",
+ "TotalPay 0\n",
+ "TotalPayBenefits 0\n",
+ "Year 0\n",
+ "Notes 148654\n",
+ "Agency 0\n",
+ "Status 148654\n",
+ "dtype: int64"
+ ]
+ },
+ "execution_count": 66,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "d1.isna().sum()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "remove columns will all values as NaN"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 67,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " EmployeeName | \n",
+ " JobTitle | \n",
+ " BasePay | \n",
+ " OvertimePay | \n",
+ " OtherPay | \n",
+ " Benefits | \n",
+ " TotalPay | \n",
+ " TotalPayBenefits | \n",
+ " Year | \n",
+ " Agency | \n",
+ "
\n",
+ " \n",
+ " | Id | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 1 | \n",
+ " NATHANIEL FORD | \n",
+ " GENERAL MANAGER-METROPOLITAN TRANSIT AUTHORITY | \n",
+ " 167411.18 | \n",
+ " 0.00 | \n",
+ " 400184.25 | \n",
+ " NaN | \n",
+ " 567595.43 | \n",
+ " 567595.43 | \n",
+ " 2011 | \n",
+ " San Francisco | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " GARY JIMENEZ | \n",
+ " CAPTAIN III (POLICE DEPARTMENT) | \n",
+ " 155966.02 | \n",
+ " 245131.88 | \n",
+ " 137811.38 | \n",
+ " NaN | \n",
+ " 538909.28 | \n",
+ " 538909.28 | \n",
+ " 2011 | \n",
+ " San Francisco | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " ALBERT PARDINI | \n",
+ " CAPTAIN III (POLICE DEPARTMENT) | \n",
+ " 212739.13 | \n",
+ " 106088.18 | \n",
+ " 16452.60 | \n",
+ " NaN | \n",
+ " 335279.91 | \n",
+ " 335279.91 | \n",
+ " 2011 | \n",
+ " San Francisco | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " CHRISTOPHER CHONG | \n",
+ " WIRE ROPE CABLE MAINTENANCE MECHANIC | \n",
+ " 77916.00 | \n",
+ " 56120.71 | \n",
+ " 198306.90 | \n",
+ " NaN | \n",
+ " 332343.61 | \n",
+ " 332343.61 | \n",
+ " 2011 | \n",
+ " San Francisco | \n",
+ "
\n",
+ " \n",
+ " | 5 | \n",
+ " PATRICK GARDNER | \n",
+ " DEPUTY CHIEF OF DEPARTMENT,(FIRE DEPARTMENT) | \n",
+ " 134401.60 | \n",
+ " 9737.00 | \n",
+ " 182234.59 | \n",
+ " NaN | \n",
+ " 326373.19 | \n",
+ " 326373.19 | \n",
+ " 2011 | \n",
+ " San Francisco | \n",
+ "
\n",
+ " \n",
+ " | ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " | 148650 | \n",
+ " Roy I Tillery | \n",
+ " Custodian | \n",
+ " 0.00 | \n",
+ " 0.00 | \n",
+ " 0.00 | \n",
+ " 0.0 | \n",
+ " 0.00 | \n",
+ " 0.00 | \n",
+ " 2014 | \n",
+ " San Francisco | \n",
+ "
\n",
+ " \n",
+ " | 148651 | \n",
+ " Not provided | \n",
+ " Not provided | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0.00 | \n",
+ " 0.00 | \n",
+ " 2014 | \n",
+ " San Francisco | \n",
+ "
\n",
+ " \n",
+ " | 148652 | \n",
+ " Not provided | \n",
+ " Not provided | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0.00 | \n",
+ " 0.00 | \n",
+ " 2014 | \n",
+ " San Francisco | \n",
+ "
\n",
+ " \n",
+ " | 148653 | \n",
+ " Not provided | \n",
+ " Not provided | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0.00 | \n",
+ " 0.00 | \n",
+ " 2014 | \n",
+ " San Francisco | \n",
+ "
\n",
+ " \n",
+ " | 148654 | \n",
+ " Joe Lopez | \n",
+ " Counselor, Log Cabin Ranch | \n",
+ " 0.00 | \n",
+ " 0.00 | \n",
+ " -618.13 | \n",
+ " 0.0 | \n",
+ " -618.13 | \n",
+ " -618.13 | \n",
+ " 2014 | \n",
+ " San Francisco | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
148654 rows × 10 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " EmployeeName JobTitle \\\n",
+ "Id \n",
+ "1 NATHANIEL FORD GENERAL MANAGER-METROPOLITAN TRANSIT AUTHORITY \n",
+ "2 GARY JIMENEZ CAPTAIN III (POLICE DEPARTMENT) \n",
+ "3 ALBERT PARDINI CAPTAIN III (POLICE DEPARTMENT) \n",
+ "4 CHRISTOPHER CHONG WIRE ROPE CABLE MAINTENANCE MECHANIC \n",
+ "5 PATRICK GARDNER DEPUTY CHIEF OF DEPARTMENT,(FIRE DEPARTMENT) \n",
+ "... ... ... \n",
+ "148650 Roy I Tillery Custodian \n",
+ "148651 Not provided Not provided \n",
+ "148652 Not provided Not provided \n",
+ "148653 Not provided Not provided \n",
+ "148654 Joe Lopez Counselor, Log Cabin Ranch \n",
+ "\n",
+ " BasePay OvertimePay OtherPay Benefits TotalPay \\\n",
+ "Id \n",
+ "1 167411.18 0.00 400184.25 NaN 567595.43 \n",
+ "2 155966.02 245131.88 137811.38 NaN 538909.28 \n",
+ "3 212739.13 106088.18 16452.60 NaN 335279.91 \n",
+ "4 77916.00 56120.71 198306.90 NaN 332343.61 \n",
+ "5 134401.60 9737.00 182234.59 NaN 326373.19 \n",
+ "... ... ... ... ... ... \n",
+ "148650 0.00 0.00 0.00 0.0 0.00 \n",
+ "148651 NaN NaN NaN NaN 0.00 \n",
+ "148652 NaN NaN NaN NaN 0.00 \n",
+ "148653 NaN NaN NaN NaN 0.00 \n",
+ "148654 0.00 0.00 -618.13 0.0 -618.13 \n",
+ "\n",
+ " TotalPayBenefits Year Agency \n",
+ "Id \n",
+ "1 567595.43 2011 San Francisco \n",
+ "2 538909.28 2011 San Francisco \n",
+ "3 335279.91 2011 San Francisco \n",
+ "4 332343.61 2011 San Francisco \n",
+ "5 326373.19 2011 San Francisco \n",
+ "... ... ... ... \n",
+ "148650 0.00 2014 San Francisco \n",
+ "148651 0.00 2014 San Francisco \n",
+ "148652 0.00 2014 San Francisco \n",
+ "148653 0.00 2014 San Francisco \n",
+ "148654 -618.13 2014 San Francisco \n",
+ "\n",
+ "[148654 rows x 10 columns]"
+ ]
+ },
+ "execution_count": 67,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "d1.dropna(how='all',axis=1)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "display number of unique values in each column"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 68,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "EmployeeName 110811\n",
+ "JobTitle 2159\n",
+ "BasePay 109489\n",
+ "OvertimePay 65998\n",
+ "OtherPay 83225\n",
+ "Benefits 98465\n",
+ "TotalPay 138486\n",
+ "TotalPayBenefits 142098\n",
+ "Year 4\n",
+ "Notes 0\n",
+ "Agency 1\n",
+ "Status 0\n",
+ "dtype: int64"
+ ]
+ },
+ "execution_count": 68,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "d1.nunique()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "mean of total pay of all people based on year"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 69,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "Year\n",
+ "2011 71744.103871\n",
+ "2012 74113.262265\n",
+ "2013 77611.443142\n",
+ "2014 75463.918140\n",
+ "Name: TotalPay, dtype: float64"
+ ]
+ },
+ "execution_count": 69,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "d1.groupby('Year').mean()['TotalPay']"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "how many people have 0 overtime pay"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 70,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "77321"
+ ]
+ },
+ "execution_count": 70,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "len(d1[d1['OvertimePay']==0]['OvertimePay'])"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "max, min, mean, median and other stats of TotalPay of people having 0 OvertimePay"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 71,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "count 77321.000000\n",
+ "mean 60229.348901\n",
+ "std 49307.912350\n",
+ "min -618.130000\n",
+ "25% 13290.450000\n",
+ "50% 58158.590000\n",
+ "75% 91115.090000\n",
+ "max 567595.430000\n",
+ "Name: TotalPay, dtype: float64"
+ ]
+ },
+ "execution_count": 71,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "d1[d1['OvertimePay']==0]['TotalPay'].describe()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "find Id of that person with max TotalPay you got in previous question"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 72,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "Int64Index([1], dtype='int64', name='Id')"
+ ]
+ },
+ "execution_count": 72,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "f1=d1[d1['OvertimePay']==0]['TotalPay'].max()\n",
+ "d1[d1['TotalPay']==f1].index"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 73,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "Int64Index([1], dtype='int64', name='Id')"
+ ]
+ },
+ "execution_count": 73,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "d1[d1['TotalPay']==567595.430000].index\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "name of employee with total pay benefits = 87619.78"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 74,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "Id\n",
+ "12346 REBECCA CHIU\n",
+ "Name: EmployeeName, dtype: object"
+ ]
+ },
+ "execution_count": 74,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "d1[d1['TotalPayBenefits']==87619.78]['EmployeeName']"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "how many people have BasePay > 150000 and OvertimePay > 100000"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 75,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "12"
+ ]
+ },
+ "execution_count": 75,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "d1[(d1['BasePay']> 150000) & (d1['OvertimePay'] > 100000)].count()['EmployeeName']"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "which job title generally has highest average TotalPayBenefits"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 76,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "'GENERAL MANAGER-METROPOLITAN TRANSIT AUTHORITY'"
+ ]
+ },
+ "execution_count": 76,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "f1=d1.loc[d1['TotalPayBenefits'].idxmax()]\n",
+ "f1['JobTitle']"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "How many employees are POLICE"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 77,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "2512"
+ ]
+ },
+ "execution_count": 77,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# .str.contains()\n",
+ "len(d1[d1['JobTitle'].str.contains('POLICE')])\n"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.7.6"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}