From 8eda066bb63a2816d34fd770f5fb1589212a7947 Mon Sep 17 00:00:00 2001 From: Rachitha <47741826+Rachithaa@users.noreply.github.com> Date: Mon, 24 Aug 2020 03:43:31 +0530 Subject: [PATCH 1/6] Assignment:1-Rachitha First assignment completed --- Assignment1 Numpy.ipynb | 357 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 357 insertions(+) create mode 100644 Assignment1 Numpy.ipynb diff --git a/Assignment1 Numpy.ipynb b/Assignment1 Numpy.ipynb new file mode 100644 index 0000000..a087d87 --- /dev/null +++ b/Assignment1 Numpy.ipynb @@ -0,0 +1,357 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Assignment" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Import numpy as np" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Make a python list => \\[1,2,3,4,5\\]\n", + "\n", + "Convert it into numpy array and print it" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[1 2 3 4 5]\n" + ] + } + ], + "source": [ + "l1=[1,2,3,4,5]\n", + "n1=np.array(l1)\n", + "print(n1)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Make a python matrix (3 x 3) => \\[[1,2,3],[4,5,6],[7,8,9]\\]\n", + "\n", + "Convert it into numpy array and print it" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[[1 2 3]\n", + " [4 5 6]\n", + " [7 8 9]]\n" + ] + } + ], + "source": [ + "matrix1=[[1,2,3],[4,5,6],[7,8,9]]\n", + "n2=np.array(matrix1)\n", + "print(n2)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Make a matrix (3 x 3) using built-in methods (like arange(), reshape() etc.):\n", + "\n", + "\\[ [1,3,5],\n", + "\n", + " [7,9,11],\n", + " \n", + " [13,15,17] \\]" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[ 1, 3, 5],\n", + " [ 7, 9, 11],\n", + " [13, 15, 17]])" + ] + }, + "execution_count": 38, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "n3=np.arange(1,19,2)\n", + "n3.reshape(3,3)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Create a numpy array with 10 random numbers from 0 to 10 (there should be few numbers greater than 1)" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([9, 1, 0, 1, 0, 7, 3, 3, 3, 9])" + ] + }, + "execution_count": 39, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "np.random.randint(0,10,10)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Create numpy array => \\[1,2,3,4,5\\] and convert it to 2D array with 5 rows" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[1],\n", + " [2],\n", + " [3],\n", + " [4],\n", + " [5]])" + ] + }, + "execution_count": 40, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "r=np.arange(1,6)\n", + "a1=r.reshape(5,1)\n", + "a1" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Print the shape of the above created array" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(5, 1)" + ] + }, + "execution_count": 41, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "a1.shape" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Create a numpy array with 10 elements in it. Access and print its 3rd, 4th and 9th element." + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[ 1 2 3 4 5 6 7 8 9 10]\n", + "3\n", + "4\n", + "9\n" + ] + } + ], + "source": [ + "a1=np.arange(1,11)\n", + "print(a1)\n", + "print(a1[2])\n", + "print(a1[3])\n", + "print(a1[8])\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Print alternate elements of that array" + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[1 3 5 7 9]\n" + ] + } + ], + "source": [ + "print(a1[0:10:2])\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Change last 3 elements into 100 using broadcasting and print" + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([ 1, 2, 3, 4, 5, 6, 7, 100, 100, 100])" + ] + }, + "execution_count": 44, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "a1[7:10]=100\n", + "a1" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Create a 5 x 5 matrix (fill it with any element you like), print it.\n", + "\n", + "Then print the middle (3 x 3) matrix." + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[[ 1 2 3 4 5]\n", + " [ 6 7 8 9 10]\n", + " [11 12 13 14 15]\n", + " [16 17 18 19 20]\n", + " [21 22 23 24 25]]\n" + ] + }, + { + "data": { + "text/plain": [ + "array([[ 7, 8, 9],\n", + " [12, 13, 14],\n", + " [17, 18, 19]])" + ] + }, + "execution_count": 45, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "r1=np.arange(1,26)\n", + "r2=r1.reshape(5,5)\n", + "print(r2)\n", + "r2[1:4,1:4]" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.6" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} From c0ffac793e70d42510d266387a10f00c0aaaf681 Mon Sep 17 00:00:00 2001 From: Rachitha <47741826+Rachithaa@users.noreply.github.com> Date: Mon, 24 Aug 2020 13:19:12 +0530 Subject: [PATCH 2/6] Delete Assignment1 Numpy.ipynb --- Assignment1 Numpy.ipynb | 357 ---------------------------------------- 1 file changed, 357 deletions(-) delete mode 100644 Assignment1 Numpy.ipynb diff --git a/Assignment1 Numpy.ipynb b/Assignment1 Numpy.ipynb deleted file mode 100644 index a087d87..0000000 --- a/Assignment1 Numpy.ipynb +++ /dev/null @@ -1,357 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Assignment" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Import numpy as np" - ] - }, - { - "cell_type": "code", - "execution_count": 35, - "metadata": {}, - "outputs": [], - "source": [ - "import numpy as np" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Make a python list => \\[1,2,3,4,5\\]\n", - "\n", - "Convert it into numpy array and print it" - ] - }, - { - "cell_type": "code", - "execution_count": 36, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[1 2 3 4 5]\n" - ] - } - ], - "source": [ - "l1=[1,2,3,4,5]\n", - "n1=np.array(l1)\n", - "print(n1)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Make a python matrix (3 x 3) => \\[[1,2,3],[4,5,6],[7,8,9]\\]\n", - "\n", - "Convert it into numpy array and print it" - ] - }, - { - "cell_type": "code", - "execution_count": 37, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[[1 2 3]\n", - " [4 5 6]\n", - " [7 8 9]]\n" - ] - } - ], - "source": [ - "matrix1=[[1,2,3],[4,5,6],[7,8,9]]\n", - "n2=np.array(matrix1)\n", - "print(n2)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Make a matrix (3 x 3) using built-in methods (like arange(), reshape() etc.):\n", - "\n", - "\\[ [1,3,5],\n", - "\n", - " [7,9,11],\n", - " \n", - " [13,15,17] \\]" - ] - }, - { - "cell_type": "code", - "execution_count": 38, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "array([[ 1, 3, 5],\n", - " [ 7, 9, 11],\n", - " [13, 15, 17]])" - ] - }, - "execution_count": 38, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "n3=np.arange(1,19,2)\n", - "n3.reshape(3,3)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Create a numpy array with 10 random numbers from 0 to 10 (there should be few numbers greater than 1)" - ] - }, - { - "cell_type": "code", - "execution_count": 39, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "array([9, 1, 0, 1, 0, 7, 3, 3, 3, 9])" - ] - }, - "execution_count": 39, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "np.random.randint(0,10,10)\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Create numpy array => \\[1,2,3,4,5\\] and convert it to 2D array with 5 rows" - ] - }, - { - "cell_type": "code", - "execution_count": 40, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "array([[1],\n", - " [2],\n", - " [3],\n", - " [4],\n", - " [5]])" - ] - }, - "execution_count": 40, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "r=np.arange(1,6)\n", - "a1=r.reshape(5,1)\n", - "a1" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Print the shape of the above created array" - ] - }, - { - "cell_type": "code", - "execution_count": 41, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "(5, 1)" - ] - }, - "execution_count": 41, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "a1.shape" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Create a numpy array with 10 elements in it. Access and print its 3rd, 4th and 9th element." - ] - }, - { - "cell_type": "code", - "execution_count": 42, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[ 1 2 3 4 5 6 7 8 9 10]\n", - "3\n", - "4\n", - "9\n" - ] - } - ], - "source": [ - "a1=np.arange(1,11)\n", - "print(a1)\n", - "print(a1[2])\n", - "print(a1[3])\n", - "print(a1[8])\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Print alternate elements of that array" - ] - }, - { - "cell_type": "code", - "execution_count": 43, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[1 3 5 7 9]\n" - ] - } - ], - "source": [ - "print(a1[0:10:2])\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Change last 3 elements into 100 using broadcasting and print" - ] - }, - { - "cell_type": "code", - "execution_count": 44, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "array([ 1, 2, 3, 4, 5, 6, 7, 100, 100, 100])" - ] - }, - "execution_count": 44, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "a1[7:10]=100\n", - "a1" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Create a 5 x 5 matrix (fill it with any element you like), print it.\n", - "\n", - "Then print the middle (3 x 3) matrix." - ] - }, - { - "cell_type": "code", - "execution_count": 45, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[[ 1 2 3 4 5]\n", - " [ 6 7 8 9 10]\n", - " [11 12 13 14 15]\n", - " [16 17 18 19 20]\n", - " [21 22 23 24 25]]\n" - ] - }, - { - "data": { - "text/plain": [ - "array([[ 7, 8, 9],\n", - " [12, 13, 14],\n", - " [17, 18, 19]])" - ] - }, - "execution_count": 45, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "r1=np.arange(1,26)\n", - "r2=r1.reshape(5,5)\n", - "print(r2)\n", - "r2[1:4,1:4]" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.7.6" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} From cd8060858a81e41792aa17e9d66883005f8e38a1 Mon Sep 17 00:00:00 2001 From: Rachitha <47741826+Rachithaa@users.noreply.github.com> Date: Mon, 24 Aug 2020 14:10:16 +0530 Subject: [PATCH 3/6] Assignment 1-Rachitha --- Assignment1 Numpy.ipynb | 357 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 357 insertions(+) create mode 100644 Assignment1 Numpy.ipynb diff --git a/Assignment1 Numpy.ipynb b/Assignment1 Numpy.ipynb new file mode 100644 index 0000000..a087d87 --- /dev/null +++ b/Assignment1 Numpy.ipynb @@ -0,0 +1,357 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Assignment" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Import numpy as np" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Make a python list => \\[1,2,3,4,5\\]\n", + "\n", + "Convert it into numpy array and print it" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[1 2 3 4 5]\n" + ] + } + ], + "source": [ + "l1=[1,2,3,4,5]\n", + "n1=np.array(l1)\n", + "print(n1)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Make a python matrix (3 x 3) => \\[[1,2,3],[4,5,6],[7,8,9]\\]\n", + "\n", + "Convert it into numpy array and print it" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[[1 2 3]\n", + " [4 5 6]\n", + " [7 8 9]]\n" + ] + } + ], + "source": [ + "matrix1=[[1,2,3],[4,5,6],[7,8,9]]\n", + "n2=np.array(matrix1)\n", + "print(n2)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Make a matrix (3 x 3) using built-in methods (like arange(), reshape() etc.):\n", + "\n", + "\\[ [1,3,5],\n", + "\n", + " [7,9,11],\n", + " \n", + " [13,15,17] \\]" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[ 1, 3, 5],\n", + " [ 7, 9, 11],\n", + " [13, 15, 17]])" + ] + }, + "execution_count": 38, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "n3=np.arange(1,19,2)\n", + "n3.reshape(3,3)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Create a numpy array with 10 random numbers from 0 to 10 (there should be few numbers greater than 1)" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([9, 1, 0, 1, 0, 7, 3, 3, 3, 9])" + ] + }, + "execution_count": 39, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "np.random.randint(0,10,10)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Create numpy array => \\[1,2,3,4,5\\] and convert it to 2D array with 5 rows" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[1],\n", + " [2],\n", + " [3],\n", + " [4],\n", + " [5]])" + ] + }, + "execution_count": 40, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "r=np.arange(1,6)\n", + "a1=r.reshape(5,1)\n", + "a1" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Print the shape of the above created array" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(5, 1)" + ] + }, + "execution_count": 41, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "a1.shape" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Create a numpy array with 10 elements in it. Access and print its 3rd, 4th and 9th element." + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[ 1 2 3 4 5 6 7 8 9 10]\n", + "3\n", + "4\n", + "9\n" + ] + } + ], + "source": [ + "a1=np.arange(1,11)\n", + "print(a1)\n", + "print(a1[2])\n", + "print(a1[3])\n", + "print(a1[8])\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Print alternate elements of that array" + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[1 3 5 7 9]\n" + ] + } + ], + "source": [ + "print(a1[0:10:2])\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Change last 3 elements into 100 using broadcasting and print" + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([ 1, 2, 3, 4, 5, 6, 7, 100, 100, 100])" + ] + }, + "execution_count": 44, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "a1[7:10]=100\n", + "a1" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Create a 5 x 5 matrix (fill it with any element you like), print it.\n", + "\n", + "Then print the middle (3 x 3) matrix." + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[[ 1 2 3 4 5]\n", + " [ 6 7 8 9 10]\n", + " [11 12 13 14 15]\n", + " [16 17 18 19 20]\n", + " [21 22 23 24 25]]\n" + ] + }, + { + "data": { + "text/plain": [ + "array([[ 7, 8, 9],\n", + " [12, 13, 14],\n", + " [17, 18, 19]])" + ] + }, + "execution_count": 45, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "r1=np.arange(1,26)\n", + "r2=r1.reshape(5,5)\n", + "print(r2)\n", + "r2[1:4,1:4]" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.6" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} From aaf8e37f2e2d63176a2bda2a382155ba4d6771d1 Mon Sep 17 00:00:00 2001 From: Rachitha <47741826+Rachithaa@users.noreply.github.com> Date: Sun, 6 Sep 2020 21:51:21 +0530 Subject: [PATCH 4/6] Add files via upload RACHITHA-SECOND ASSIGNMENT --- Assignment2 pandas-RACHITHA.ipynb | 1384 +++++++++++++++++++++++++++++ 1 file changed, 1384 insertions(+) create mode 100644 Assignment2 pandas-RACHITHA.ipynb diff --git a/Assignment2 pandas-RACHITHA.ipynb b/Assignment2 pandas-RACHITHA.ipynb new file mode 100644 index 0000000..3b934bf --- /dev/null +++ b/Assignment2 pandas-RACHITHA.ipynb @@ -0,0 +1,1384 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 58, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import pandas as pd\n", + "#%matplotlib notebook\n", + "%matplotlib inline" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "import the dataset into a dataframe" + ] + }, + { + "cell_type": "code", + "execution_count": 59, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
EmployeeNameJobTitleBasePayOvertimePayOtherPayBenefitsTotalPayTotalPayBenefitsYearNotesAgencyStatus
Id
1NATHANIEL FORDGENERAL MANAGER-METROPOLITAN TRANSIT AUTHORITY167411.180.00400184.25NaN567595.43567595.432011NaNSan FranciscoNaN
2GARY JIMENEZCAPTAIN III (POLICE DEPARTMENT)155966.02245131.88137811.38NaN538909.28538909.282011NaNSan FranciscoNaN
3ALBERT PARDINICAPTAIN III (POLICE DEPARTMENT)212739.13106088.1816452.60NaN335279.91335279.912011NaNSan FranciscoNaN
4CHRISTOPHER CHONGWIRE ROPE CABLE MAINTENANCE MECHANIC77916.0056120.71198306.90NaN332343.61332343.612011NaNSan FranciscoNaN
5PATRICK GARDNERDEPUTY CHIEF OF DEPARTMENT,(FIRE DEPARTMENT)134401.609737.00182234.59NaN326373.19326373.192011NaNSan FranciscoNaN
.......................................
148650Roy I TilleryCustodian0.000.000.000.00.000.002014NaNSan FranciscoNaN
148651Not providedNot providedNaNNaNNaNNaN0.000.002014NaNSan FranciscoNaN
148652Not providedNot providedNaNNaNNaNNaN0.000.002014NaNSan FranciscoNaN
148653Not providedNot providedNaNNaNNaNNaN0.000.002014NaNSan FranciscoNaN
148654Joe LopezCounselor, Log Cabin Ranch0.000.00-618.130.0-618.13-618.132014NaNSan FranciscoNaN
\n", + "

148654 rows × 12 columns

\n", + "
" + ], + "text/plain": [ + " EmployeeName JobTitle \\\n", + "Id \n", + "1 NATHANIEL FORD GENERAL MANAGER-METROPOLITAN TRANSIT AUTHORITY \n", + "2 GARY JIMENEZ CAPTAIN III (POLICE DEPARTMENT) \n", + "3 ALBERT PARDINI CAPTAIN III (POLICE DEPARTMENT) \n", + "4 CHRISTOPHER CHONG WIRE ROPE CABLE MAINTENANCE MECHANIC \n", + "5 PATRICK GARDNER DEPUTY CHIEF OF DEPARTMENT,(FIRE DEPARTMENT) \n", + "... ... ... \n", + "148650 Roy I Tillery Custodian \n", + "148651 Not provided Not provided \n", + "148652 Not provided Not provided \n", + "148653 Not provided Not provided \n", + "148654 Joe Lopez Counselor, Log Cabin Ranch \n", + "\n", + " BasePay OvertimePay OtherPay Benefits TotalPay \\\n", + "Id \n", + "1 167411.18 0.00 400184.25 NaN 567595.43 \n", + "2 155966.02 245131.88 137811.38 NaN 538909.28 \n", + "3 212739.13 106088.18 16452.60 NaN 335279.91 \n", + "4 77916.00 56120.71 198306.90 NaN 332343.61 \n", + "5 134401.60 9737.00 182234.59 NaN 326373.19 \n", + "... ... ... ... ... ... \n", + "148650 0.00 0.00 0.00 0.0 0.00 \n", + "148651 NaN NaN NaN NaN 0.00 \n", + "148652 NaN NaN NaN NaN 0.00 \n", + "148653 NaN NaN NaN NaN 0.00 \n", + "148654 0.00 0.00 -618.13 0.0 -618.13 \n", + "\n", + " TotalPayBenefits Year Notes Agency Status \n", + "Id \n", + "1 567595.43 2011 NaN San Francisco NaN \n", + "2 538909.28 2011 NaN San Francisco NaN \n", + "3 335279.91 2011 NaN San Francisco NaN \n", + "4 332343.61 2011 NaN San Francisco NaN \n", + "5 326373.19 2011 NaN San Francisco NaN \n", + "... ... ... ... ... ... \n", + "148650 0.00 2014 NaN San Francisco NaN \n", + "148651 0.00 2014 NaN San Francisco NaN \n", + "148652 0.00 2014 NaN San Francisco NaN \n", + "148653 0.00 2014 NaN San Francisco NaN \n", + "148654 -618.13 2014 NaN San Francisco NaN \n", + "\n", + "[148654 rows x 12 columns]" + ] + }, + "execution_count": 59, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "d1=pd.read_csv('Salaries.csv',index_col='Id')\n", + "d1" + ] + }, + { + "cell_type": "code", + "execution_count": 60, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
EmployeeNameJobTitleBasePayOvertimePayOtherPayBenefitsTotalPayTotalPayBenefitsYearNotesAgencyStatus
Id
1NATHANIEL FORDGENERAL MANAGER-METROPOLITAN TRANSIT AUTHORITY167411.180.00400184.25NaN567595.43567595.432011NaNSan FranciscoNaN
2GARY JIMENEZCAPTAIN III (POLICE DEPARTMENT)155966.02245131.88137811.38NaN538909.28538909.282011NaNSan FranciscoNaN
3ALBERT PARDINICAPTAIN III (POLICE DEPARTMENT)212739.13106088.1816452.60NaN335279.91335279.912011NaNSan FranciscoNaN
4CHRISTOPHER CHONGWIRE ROPE CABLE MAINTENANCE MECHANIC77916.0056120.71198306.90NaN332343.61332343.612011NaNSan FranciscoNaN
5PATRICK GARDNERDEPUTY CHIEF OF DEPARTMENT,(FIRE DEPARTMENT)134401.609737.00182234.59NaN326373.19326373.192011NaNSan FranciscoNaN
\n", + "
" + ], + "text/plain": [ + " EmployeeName JobTitle \\\n", + "Id \n", + "1 NATHANIEL FORD GENERAL MANAGER-METROPOLITAN TRANSIT AUTHORITY \n", + "2 GARY JIMENEZ CAPTAIN III (POLICE DEPARTMENT) \n", + "3 ALBERT PARDINI CAPTAIN III (POLICE DEPARTMENT) \n", + "4 CHRISTOPHER CHONG WIRE ROPE CABLE MAINTENANCE MECHANIC \n", + "5 PATRICK GARDNER DEPUTY CHIEF OF DEPARTMENT,(FIRE DEPARTMENT) \n", + "\n", + " BasePay OvertimePay OtherPay Benefits TotalPay TotalPayBenefits \\\n", + "Id \n", + "1 167411.18 0.00 400184.25 NaN 567595.43 567595.43 \n", + "2 155966.02 245131.88 137811.38 NaN 538909.28 538909.28 \n", + "3 212739.13 106088.18 16452.60 NaN 335279.91 335279.91 \n", + "4 77916.00 56120.71 198306.90 NaN 332343.61 332343.61 \n", + "5 134401.60 9737.00 182234.59 NaN 326373.19 326373.19 \n", + "\n", + " Year Notes Agency Status \n", + "Id \n", + "1 2011 NaN San Francisco NaN \n", + "2 2011 NaN San Francisco NaN \n", + "3 2011 NaN San Francisco NaN \n", + "4 2011 NaN San Francisco NaN \n", + "5 2011 NaN San Francisco NaN " + ] + }, + "execution_count": 60, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "d1.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "display the column names" + ] + }, + { + "cell_type": "code", + "execution_count": 61, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Index(['EmployeeName', 'JobTitle', 'BasePay', 'OvertimePay', 'OtherPay',\n", + " 'Benefits', 'TotalPay', 'TotalPayBenefits', 'Year', 'Notes', 'Agency',\n", + " 'Status'],\n", + " dtype='object')" + ] + }, + "execution_count": 61, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "d1.columns" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "display the number of rows and cols" + ] + }, + { + "cell_type": "code", + "execution_count": 62, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "rows: 148654\n", + "columns: 12\n" + ] + } + ], + "source": [ + "print(\"rows: \"+str(len(d1.axes[0])))\n", + "print(\"columns: \"+str(len(d1.axes[1])))" + ] + }, + { + "cell_type": "code", + "execution_count": 63, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(148654, 12)" + ] + }, + "execution_count": 63, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "d1.shape" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "display the dataframe info (types of data in columns and not null values etc.)" + ] + }, + { + "cell_type": "code", + "execution_count": 64, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Int64Index: 148654 entries, 1 to 148654\n", + "Data columns (total 12 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 EmployeeName 148654 non-null object \n", + " 1 JobTitle 148654 non-null object \n", + " 2 BasePay 148045 non-null float64\n", + " 3 OvertimePay 148650 non-null float64\n", + " 4 OtherPay 148650 non-null float64\n", + " 5 Benefits 112491 non-null float64\n", + " 6 TotalPay 148654 non-null float64\n", + " 7 TotalPayBenefits 148654 non-null float64\n", + " 8 Year 148654 non-null int64 \n", + " 9 Notes 0 non-null float64\n", + " 10 Agency 148654 non-null object \n", + " 11 Status 0 non-null float64\n", + "dtypes: float64(8), int64(1), object(3)\n", + "memory usage: 14.7+ MB\n" + ] + } + ], + "source": [ + "d1.info()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "display stats of the dataframe like count, mean, std, max, 25% etc....." + ] + }, + { + "cell_type": "code", + "execution_count": 65, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
BasePayOvertimePayOtherPayBenefitsTotalPayTotalPayBenefitsYearNotesStatus
count148045.000000148650.000000148650.000000112491.000000148654.000000148654.000000148654.0000000.00.0
mean66325.4488415066.0598863648.76729725007.89315174768.32197293692.5548112012.522643NaNNaN
std42764.63549511454.3805598056.60186615402.21585850517.00527462793.5334831.117538NaNNaN
min-166.010000-0.010000-7058.590000-33.890000-618.130000-618.1300002011.000000NaNNaN
25%33588.2000000.0000000.00000011535.39500036168.99500044065.6500002012.000000NaNNaN
50%65007.4500000.000000811.27000028628.62000071426.61000092404.0900002013.000000NaNNaN
75%94691.0500004658.1750004236.06500035566.855000105839.135000132876.4500002014.000000NaNNaN
max319275.010000245131.880000400184.25000096570.660000567595.430000567595.4300002014.000000NaNNaN
\n", + "
" + ], + "text/plain": [ + " BasePay OvertimePay OtherPay Benefits \\\n", + "count 148045.000000 148650.000000 148650.000000 112491.000000 \n", + "mean 66325.448841 5066.059886 3648.767297 25007.893151 \n", + "std 42764.635495 11454.380559 8056.601866 15402.215858 \n", + "min -166.010000 -0.010000 -7058.590000 -33.890000 \n", + "25% 33588.200000 0.000000 0.000000 11535.395000 \n", + "50% 65007.450000 0.000000 811.270000 28628.620000 \n", + "75% 94691.050000 4658.175000 4236.065000 35566.855000 \n", + "max 319275.010000 245131.880000 400184.250000 96570.660000 \n", + "\n", + " TotalPay TotalPayBenefits Year Notes Status \n", + "count 148654.000000 148654.000000 148654.000000 0.0 0.0 \n", + "mean 74768.321972 93692.554811 2012.522643 NaN NaN \n", + "std 50517.005274 62793.533483 1.117538 NaN NaN \n", + "min -618.130000 -618.130000 2011.000000 NaN NaN \n", + "25% 36168.995000 44065.650000 2012.000000 NaN NaN \n", + "50% 71426.610000 92404.090000 2013.000000 NaN NaN \n", + "75% 105839.135000 132876.450000 2014.000000 NaN NaN \n", + "max 567595.430000 567595.430000 2014.000000 NaN NaN " + ] + }, + "execution_count": 65, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "d1.describe()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "display null values per column" + ] + }, + { + "cell_type": "code", + "execution_count": 66, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "EmployeeName 0\n", + "JobTitle 0\n", + "BasePay 609\n", + "OvertimePay 4\n", + "OtherPay 4\n", + "Benefits 36163\n", + "TotalPay 0\n", + "TotalPayBenefits 0\n", + "Year 0\n", + "Notes 148654\n", + "Agency 0\n", + "Status 148654\n", + "dtype: int64" + ] + }, + "execution_count": 66, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "d1.isna().sum()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "remove columns will all values as NaN" + ] + }, + { + "cell_type": "code", + "execution_count": 67, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
EmployeeNameJobTitleBasePayOvertimePayOtherPayBenefitsTotalPayTotalPayBenefitsYearAgency
Id
1NATHANIEL FORDGENERAL MANAGER-METROPOLITAN TRANSIT AUTHORITY167411.180.00400184.25NaN567595.43567595.432011San Francisco
2GARY JIMENEZCAPTAIN III (POLICE DEPARTMENT)155966.02245131.88137811.38NaN538909.28538909.282011San Francisco
3ALBERT PARDINICAPTAIN III (POLICE DEPARTMENT)212739.13106088.1816452.60NaN335279.91335279.912011San Francisco
4CHRISTOPHER CHONGWIRE ROPE CABLE MAINTENANCE MECHANIC77916.0056120.71198306.90NaN332343.61332343.612011San Francisco
5PATRICK GARDNERDEPUTY CHIEF OF DEPARTMENT,(FIRE DEPARTMENT)134401.609737.00182234.59NaN326373.19326373.192011San Francisco
.................................
148650Roy I TilleryCustodian0.000.000.000.00.000.002014San Francisco
148651Not providedNot providedNaNNaNNaNNaN0.000.002014San Francisco
148652Not providedNot providedNaNNaNNaNNaN0.000.002014San Francisco
148653Not providedNot providedNaNNaNNaNNaN0.000.002014San Francisco
148654Joe LopezCounselor, Log Cabin Ranch0.000.00-618.130.0-618.13-618.132014San Francisco
\n", + "

148654 rows × 10 columns

\n", + "
" + ], + "text/plain": [ + " EmployeeName JobTitle \\\n", + "Id \n", + "1 NATHANIEL FORD GENERAL MANAGER-METROPOLITAN TRANSIT AUTHORITY \n", + "2 GARY JIMENEZ CAPTAIN III (POLICE DEPARTMENT) \n", + "3 ALBERT PARDINI CAPTAIN III (POLICE DEPARTMENT) \n", + "4 CHRISTOPHER CHONG WIRE ROPE CABLE MAINTENANCE MECHANIC \n", + "5 PATRICK GARDNER DEPUTY CHIEF OF DEPARTMENT,(FIRE DEPARTMENT) \n", + "... ... ... \n", + "148650 Roy I Tillery Custodian \n", + "148651 Not provided Not provided \n", + "148652 Not provided Not provided \n", + "148653 Not provided Not provided \n", + "148654 Joe Lopez Counselor, Log Cabin Ranch \n", + "\n", + " BasePay OvertimePay OtherPay Benefits TotalPay \\\n", + "Id \n", + "1 167411.18 0.00 400184.25 NaN 567595.43 \n", + "2 155966.02 245131.88 137811.38 NaN 538909.28 \n", + "3 212739.13 106088.18 16452.60 NaN 335279.91 \n", + "4 77916.00 56120.71 198306.90 NaN 332343.61 \n", + "5 134401.60 9737.00 182234.59 NaN 326373.19 \n", + "... ... ... ... ... ... \n", + "148650 0.00 0.00 0.00 0.0 0.00 \n", + "148651 NaN NaN NaN NaN 0.00 \n", + "148652 NaN NaN NaN NaN 0.00 \n", + "148653 NaN NaN NaN NaN 0.00 \n", + "148654 0.00 0.00 -618.13 0.0 -618.13 \n", + "\n", + " TotalPayBenefits Year Agency \n", + "Id \n", + "1 567595.43 2011 San Francisco \n", + "2 538909.28 2011 San Francisco \n", + "3 335279.91 2011 San Francisco \n", + "4 332343.61 2011 San Francisco \n", + "5 326373.19 2011 San Francisco \n", + "... ... ... ... \n", + "148650 0.00 2014 San Francisco \n", + "148651 0.00 2014 San Francisco \n", + "148652 0.00 2014 San Francisco \n", + "148653 0.00 2014 San Francisco \n", + "148654 -618.13 2014 San Francisco \n", + "\n", + "[148654 rows x 10 columns]" + ] + }, + "execution_count": 67, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "d1.dropna(how='all',axis=1)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "display number of unique values in each column" + ] + }, + { + "cell_type": "code", + "execution_count": 68, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "EmployeeName 110811\n", + "JobTitle 2159\n", + "BasePay 109489\n", + "OvertimePay 65998\n", + "OtherPay 83225\n", + "Benefits 98465\n", + "TotalPay 138486\n", + "TotalPayBenefits 142098\n", + "Year 4\n", + "Notes 0\n", + "Agency 1\n", + "Status 0\n", + "dtype: int64" + ] + }, + "execution_count": 68, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "d1.nunique()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "mean of total pay of all people based on year" + ] + }, + { + "cell_type": "code", + "execution_count": 69, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Year\n", + "2011 71744.103871\n", + "2012 74113.262265\n", + "2013 77611.443142\n", + "2014 75463.918140\n", + "Name: TotalPay, dtype: float64" + ] + }, + "execution_count": 69, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "d1.groupby('Year').mean()['TotalPay']" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "how many people have 0 overtime pay" + ] + }, + { + "cell_type": "code", + "execution_count": 70, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "77321" + ] + }, + "execution_count": 70, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "len(d1[d1['OvertimePay']==0]['OvertimePay'])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "max, min, mean, median and other stats of TotalPay of people having 0 OvertimePay" + ] + }, + { + "cell_type": "code", + "execution_count": 71, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "count 77321.000000\n", + "mean 60229.348901\n", + "std 49307.912350\n", + "min -618.130000\n", + "25% 13290.450000\n", + "50% 58158.590000\n", + "75% 91115.090000\n", + "max 567595.430000\n", + "Name: TotalPay, dtype: float64" + ] + }, + "execution_count": 71, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "d1[d1['OvertimePay']==0]['TotalPay'].describe()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "find Id of that person with max TotalPay you got in previous question" + ] + }, + { + "cell_type": "code", + "execution_count": 72, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Int64Index([1], dtype='int64', name='Id')" + ] + }, + "execution_count": 72, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "f1=d1[d1['OvertimePay']==0]['TotalPay'].max()\n", + "d1[d1['TotalPay']==f1].index" + ] + }, + { + "cell_type": "code", + "execution_count": 73, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Int64Index([1], dtype='int64', name='Id')" + ] + }, + "execution_count": 73, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "d1[d1['TotalPay']==567595.430000].index\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "name of employee with total pay benefits = 87619.78" + ] + }, + { + "cell_type": "code", + "execution_count": 74, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Id\n", + "12346 REBECCA CHIU\n", + "Name: EmployeeName, dtype: object" + ] + }, + "execution_count": 74, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "d1[d1['TotalPayBenefits']==87619.78]['EmployeeName']" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "how many people have BasePay > 150000 and OvertimePay > 100000" + ] + }, + { + "cell_type": "code", + "execution_count": 75, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "12" + ] + }, + "execution_count": 75, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "d1[(d1['BasePay']> 150000) & (d1['OvertimePay'] > 100000)].count()['EmployeeName']" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "which job title generally has highest average TotalPayBenefits" + ] + }, + { + "cell_type": "code", + "execution_count": 76, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'GENERAL MANAGER-METROPOLITAN TRANSIT AUTHORITY'" + ] + }, + "execution_count": 76, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "f1=d1.loc[d1['TotalPayBenefits'].idxmax()]\n", + "f1['JobTitle']" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "How many employees are POLICE" + ] + }, + { + "cell_type": "code", + "execution_count": 77, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "2512" + ] + }, + "execution_count": 77, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# .str.contains()\n", + "len(d1[d1['JobTitle'].str.contains('POLICE')])\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.6" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} From aac7cbe524dae4717e1abd1dc66b9e4b540f495e Mon Sep 17 00:00:00 2001 From: Rachitha <47741826+Rachithaa@users.noreply.github.com> Date: Sun, 6 Sep 2020 21:52:41 +0530 Subject: [PATCH 5/6] Delete Assignment2 pandas-RACHITHA.ipynb --- Assignment2 pandas-RACHITHA.ipynb | 1384 ----------------------------- 1 file changed, 1384 deletions(-) delete mode 100644 Assignment2 pandas-RACHITHA.ipynb diff --git a/Assignment2 pandas-RACHITHA.ipynb b/Assignment2 pandas-RACHITHA.ipynb deleted file mode 100644 index 3b934bf..0000000 --- a/Assignment2 pandas-RACHITHA.ipynb +++ /dev/null @@ -1,1384 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 58, - "metadata": {}, - "outputs": [], - "source": [ - "import numpy as np\n", - "import pandas as pd\n", - "#%matplotlib notebook\n", - "%matplotlib inline" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "import the dataset into a dataframe" - ] - }, - { - "cell_type": "code", - "execution_count": 59, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
EmployeeNameJobTitleBasePayOvertimePayOtherPayBenefitsTotalPayTotalPayBenefitsYearNotesAgencyStatus
Id
1NATHANIEL FORDGENERAL MANAGER-METROPOLITAN TRANSIT AUTHORITY167411.180.00400184.25NaN567595.43567595.432011NaNSan FranciscoNaN
2GARY JIMENEZCAPTAIN III (POLICE DEPARTMENT)155966.02245131.88137811.38NaN538909.28538909.282011NaNSan FranciscoNaN
3ALBERT PARDINICAPTAIN III (POLICE DEPARTMENT)212739.13106088.1816452.60NaN335279.91335279.912011NaNSan FranciscoNaN
4CHRISTOPHER CHONGWIRE ROPE CABLE MAINTENANCE MECHANIC77916.0056120.71198306.90NaN332343.61332343.612011NaNSan FranciscoNaN
5PATRICK GARDNERDEPUTY CHIEF OF DEPARTMENT,(FIRE DEPARTMENT)134401.609737.00182234.59NaN326373.19326373.192011NaNSan FranciscoNaN
.......................................
148650Roy I TilleryCustodian0.000.000.000.00.000.002014NaNSan FranciscoNaN
148651Not providedNot providedNaNNaNNaNNaN0.000.002014NaNSan FranciscoNaN
148652Not providedNot providedNaNNaNNaNNaN0.000.002014NaNSan FranciscoNaN
148653Not providedNot providedNaNNaNNaNNaN0.000.002014NaNSan FranciscoNaN
148654Joe LopezCounselor, Log Cabin Ranch0.000.00-618.130.0-618.13-618.132014NaNSan FranciscoNaN
\n", - "

148654 rows × 12 columns

\n", - "
" - ], - "text/plain": [ - " EmployeeName JobTitle \\\n", - "Id \n", - "1 NATHANIEL FORD GENERAL MANAGER-METROPOLITAN TRANSIT AUTHORITY \n", - "2 GARY JIMENEZ CAPTAIN III (POLICE DEPARTMENT) \n", - "3 ALBERT PARDINI CAPTAIN III (POLICE DEPARTMENT) \n", - "4 CHRISTOPHER CHONG WIRE ROPE CABLE MAINTENANCE MECHANIC \n", - "5 PATRICK GARDNER DEPUTY CHIEF OF DEPARTMENT,(FIRE DEPARTMENT) \n", - "... ... ... \n", - "148650 Roy I Tillery Custodian \n", - "148651 Not provided Not provided \n", - "148652 Not provided Not provided \n", - "148653 Not provided Not provided \n", - "148654 Joe Lopez Counselor, Log Cabin Ranch \n", - "\n", - " BasePay OvertimePay OtherPay Benefits TotalPay \\\n", - "Id \n", - "1 167411.18 0.00 400184.25 NaN 567595.43 \n", - "2 155966.02 245131.88 137811.38 NaN 538909.28 \n", - "3 212739.13 106088.18 16452.60 NaN 335279.91 \n", - "4 77916.00 56120.71 198306.90 NaN 332343.61 \n", - "5 134401.60 9737.00 182234.59 NaN 326373.19 \n", - "... ... ... ... ... ... \n", - "148650 0.00 0.00 0.00 0.0 0.00 \n", - "148651 NaN NaN NaN NaN 0.00 \n", - "148652 NaN NaN NaN NaN 0.00 \n", - "148653 NaN NaN NaN NaN 0.00 \n", - "148654 0.00 0.00 -618.13 0.0 -618.13 \n", - "\n", - " TotalPayBenefits Year Notes Agency Status \n", - "Id \n", - "1 567595.43 2011 NaN San Francisco NaN \n", - "2 538909.28 2011 NaN San Francisco NaN \n", - "3 335279.91 2011 NaN San Francisco NaN \n", - "4 332343.61 2011 NaN San Francisco NaN \n", - "5 326373.19 2011 NaN San Francisco NaN \n", - "... ... ... ... ... ... \n", - "148650 0.00 2014 NaN San Francisco NaN \n", - "148651 0.00 2014 NaN San Francisco NaN \n", - "148652 0.00 2014 NaN San Francisco NaN \n", - "148653 0.00 2014 NaN San Francisco NaN \n", - "148654 -618.13 2014 NaN San Francisco NaN \n", - "\n", - "[148654 rows x 12 columns]" - ] - }, - "execution_count": 59, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "d1=pd.read_csv('Salaries.csv',index_col='Id')\n", - "d1" - ] - }, - { - "cell_type": "code", - "execution_count": 60, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
EmployeeNameJobTitleBasePayOvertimePayOtherPayBenefitsTotalPayTotalPayBenefitsYearNotesAgencyStatus
Id
1NATHANIEL FORDGENERAL MANAGER-METROPOLITAN TRANSIT AUTHORITY167411.180.00400184.25NaN567595.43567595.432011NaNSan FranciscoNaN
2GARY JIMENEZCAPTAIN III (POLICE DEPARTMENT)155966.02245131.88137811.38NaN538909.28538909.282011NaNSan FranciscoNaN
3ALBERT PARDINICAPTAIN III (POLICE DEPARTMENT)212739.13106088.1816452.60NaN335279.91335279.912011NaNSan FranciscoNaN
4CHRISTOPHER CHONGWIRE ROPE CABLE MAINTENANCE MECHANIC77916.0056120.71198306.90NaN332343.61332343.612011NaNSan FranciscoNaN
5PATRICK GARDNERDEPUTY CHIEF OF DEPARTMENT,(FIRE DEPARTMENT)134401.609737.00182234.59NaN326373.19326373.192011NaNSan FranciscoNaN
\n", - "
" - ], - "text/plain": [ - " EmployeeName JobTitle \\\n", - "Id \n", - "1 NATHANIEL FORD GENERAL MANAGER-METROPOLITAN TRANSIT AUTHORITY \n", - "2 GARY JIMENEZ CAPTAIN III (POLICE DEPARTMENT) \n", - "3 ALBERT PARDINI CAPTAIN III (POLICE DEPARTMENT) \n", - "4 CHRISTOPHER CHONG WIRE ROPE CABLE MAINTENANCE MECHANIC \n", - "5 PATRICK GARDNER DEPUTY CHIEF OF DEPARTMENT,(FIRE DEPARTMENT) \n", - "\n", - " BasePay OvertimePay OtherPay Benefits TotalPay TotalPayBenefits \\\n", - "Id \n", - "1 167411.18 0.00 400184.25 NaN 567595.43 567595.43 \n", - "2 155966.02 245131.88 137811.38 NaN 538909.28 538909.28 \n", - "3 212739.13 106088.18 16452.60 NaN 335279.91 335279.91 \n", - "4 77916.00 56120.71 198306.90 NaN 332343.61 332343.61 \n", - "5 134401.60 9737.00 182234.59 NaN 326373.19 326373.19 \n", - "\n", - " Year Notes Agency Status \n", - "Id \n", - "1 2011 NaN San Francisco NaN \n", - "2 2011 NaN San Francisco NaN \n", - "3 2011 NaN San Francisco NaN \n", - "4 2011 NaN San Francisco NaN \n", - "5 2011 NaN San Francisco NaN " - ] - }, - "execution_count": 60, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "d1.head()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "display the column names" - ] - }, - { - "cell_type": "code", - "execution_count": 61, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "Index(['EmployeeName', 'JobTitle', 'BasePay', 'OvertimePay', 'OtherPay',\n", - " 'Benefits', 'TotalPay', 'TotalPayBenefits', 'Year', 'Notes', 'Agency',\n", - " 'Status'],\n", - " dtype='object')" - ] - }, - "execution_count": 61, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "d1.columns" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "display the number of rows and cols" - ] - }, - { - "cell_type": "code", - "execution_count": 62, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "rows: 148654\n", - "columns: 12\n" - ] - } - ], - "source": [ - "print(\"rows: \"+str(len(d1.axes[0])))\n", - "print(\"columns: \"+str(len(d1.axes[1])))" - ] - }, - { - "cell_type": "code", - "execution_count": 63, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "(148654, 12)" - ] - }, - "execution_count": 63, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "d1.shape" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "display the dataframe info (types of data in columns and not null values etc.)" - ] - }, - { - "cell_type": "code", - "execution_count": 64, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "Int64Index: 148654 entries, 1 to 148654\n", - "Data columns (total 12 columns):\n", - " # Column Non-Null Count Dtype \n", - "--- ------ -------------- ----- \n", - " 0 EmployeeName 148654 non-null object \n", - " 1 JobTitle 148654 non-null object \n", - " 2 BasePay 148045 non-null float64\n", - " 3 OvertimePay 148650 non-null float64\n", - " 4 OtherPay 148650 non-null float64\n", - " 5 Benefits 112491 non-null float64\n", - " 6 TotalPay 148654 non-null float64\n", - " 7 TotalPayBenefits 148654 non-null float64\n", - " 8 Year 148654 non-null int64 \n", - " 9 Notes 0 non-null float64\n", - " 10 Agency 148654 non-null object \n", - " 11 Status 0 non-null float64\n", - "dtypes: float64(8), int64(1), object(3)\n", - "memory usage: 14.7+ MB\n" - ] - } - ], - "source": [ - "d1.info()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "display stats of the dataframe like count, mean, std, max, 25% etc....." - ] - }, - { - "cell_type": "code", - "execution_count": 65, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
BasePayOvertimePayOtherPayBenefitsTotalPayTotalPayBenefitsYearNotesStatus
count148045.000000148650.000000148650.000000112491.000000148654.000000148654.000000148654.0000000.00.0
mean66325.4488415066.0598863648.76729725007.89315174768.32197293692.5548112012.522643NaNNaN
std42764.63549511454.3805598056.60186615402.21585850517.00527462793.5334831.117538NaNNaN
min-166.010000-0.010000-7058.590000-33.890000-618.130000-618.1300002011.000000NaNNaN
25%33588.2000000.0000000.00000011535.39500036168.99500044065.6500002012.000000NaNNaN
50%65007.4500000.000000811.27000028628.62000071426.61000092404.0900002013.000000NaNNaN
75%94691.0500004658.1750004236.06500035566.855000105839.135000132876.4500002014.000000NaNNaN
max319275.010000245131.880000400184.25000096570.660000567595.430000567595.4300002014.000000NaNNaN
\n", - "
" - ], - "text/plain": [ - " BasePay OvertimePay OtherPay Benefits \\\n", - "count 148045.000000 148650.000000 148650.000000 112491.000000 \n", - "mean 66325.448841 5066.059886 3648.767297 25007.893151 \n", - "std 42764.635495 11454.380559 8056.601866 15402.215858 \n", - "min -166.010000 -0.010000 -7058.590000 -33.890000 \n", - "25% 33588.200000 0.000000 0.000000 11535.395000 \n", - "50% 65007.450000 0.000000 811.270000 28628.620000 \n", - "75% 94691.050000 4658.175000 4236.065000 35566.855000 \n", - "max 319275.010000 245131.880000 400184.250000 96570.660000 \n", - "\n", - " TotalPay TotalPayBenefits Year Notes Status \n", - "count 148654.000000 148654.000000 148654.000000 0.0 0.0 \n", - "mean 74768.321972 93692.554811 2012.522643 NaN NaN \n", - "std 50517.005274 62793.533483 1.117538 NaN NaN \n", - "min -618.130000 -618.130000 2011.000000 NaN NaN \n", - "25% 36168.995000 44065.650000 2012.000000 NaN NaN \n", - "50% 71426.610000 92404.090000 2013.000000 NaN NaN \n", - "75% 105839.135000 132876.450000 2014.000000 NaN NaN \n", - "max 567595.430000 567595.430000 2014.000000 NaN NaN " - ] - }, - "execution_count": 65, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "d1.describe()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "display null values per column" - ] - }, - { - "cell_type": "code", - "execution_count": 66, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "EmployeeName 0\n", - "JobTitle 0\n", - "BasePay 609\n", - "OvertimePay 4\n", - "OtherPay 4\n", - "Benefits 36163\n", - "TotalPay 0\n", - "TotalPayBenefits 0\n", - "Year 0\n", - "Notes 148654\n", - "Agency 0\n", - "Status 148654\n", - "dtype: int64" - ] - }, - "execution_count": 66, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "d1.isna().sum()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "remove columns will all values as NaN" - ] - }, - { - "cell_type": "code", - "execution_count": 67, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
EmployeeNameJobTitleBasePayOvertimePayOtherPayBenefitsTotalPayTotalPayBenefitsYearAgency
Id
1NATHANIEL FORDGENERAL MANAGER-METROPOLITAN TRANSIT AUTHORITY167411.180.00400184.25NaN567595.43567595.432011San Francisco
2GARY JIMENEZCAPTAIN III (POLICE DEPARTMENT)155966.02245131.88137811.38NaN538909.28538909.282011San Francisco
3ALBERT PARDINICAPTAIN III (POLICE DEPARTMENT)212739.13106088.1816452.60NaN335279.91335279.912011San Francisco
4CHRISTOPHER CHONGWIRE ROPE CABLE MAINTENANCE MECHANIC77916.0056120.71198306.90NaN332343.61332343.612011San Francisco
5PATRICK GARDNERDEPUTY CHIEF OF DEPARTMENT,(FIRE DEPARTMENT)134401.609737.00182234.59NaN326373.19326373.192011San Francisco
.................................
148650Roy I TilleryCustodian0.000.000.000.00.000.002014San Francisco
148651Not providedNot providedNaNNaNNaNNaN0.000.002014San Francisco
148652Not providedNot providedNaNNaNNaNNaN0.000.002014San Francisco
148653Not providedNot providedNaNNaNNaNNaN0.000.002014San Francisco
148654Joe LopezCounselor, Log Cabin Ranch0.000.00-618.130.0-618.13-618.132014San Francisco
\n", - "

148654 rows × 10 columns

\n", - "
" - ], - "text/plain": [ - " EmployeeName JobTitle \\\n", - "Id \n", - "1 NATHANIEL FORD GENERAL MANAGER-METROPOLITAN TRANSIT AUTHORITY \n", - "2 GARY JIMENEZ CAPTAIN III (POLICE DEPARTMENT) \n", - "3 ALBERT PARDINI CAPTAIN III (POLICE DEPARTMENT) \n", - "4 CHRISTOPHER CHONG WIRE ROPE CABLE MAINTENANCE MECHANIC \n", - "5 PATRICK GARDNER DEPUTY CHIEF OF DEPARTMENT,(FIRE DEPARTMENT) \n", - "... ... ... \n", - "148650 Roy I Tillery Custodian \n", - "148651 Not provided Not provided \n", - "148652 Not provided Not provided \n", - "148653 Not provided Not provided \n", - "148654 Joe Lopez Counselor, Log Cabin Ranch \n", - "\n", - " BasePay OvertimePay OtherPay Benefits TotalPay \\\n", - "Id \n", - "1 167411.18 0.00 400184.25 NaN 567595.43 \n", - "2 155966.02 245131.88 137811.38 NaN 538909.28 \n", - "3 212739.13 106088.18 16452.60 NaN 335279.91 \n", - "4 77916.00 56120.71 198306.90 NaN 332343.61 \n", - "5 134401.60 9737.00 182234.59 NaN 326373.19 \n", - "... ... ... ... ... ... \n", - "148650 0.00 0.00 0.00 0.0 0.00 \n", - "148651 NaN NaN NaN NaN 0.00 \n", - "148652 NaN NaN NaN NaN 0.00 \n", - "148653 NaN NaN NaN NaN 0.00 \n", - "148654 0.00 0.00 -618.13 0.0 -618.13 \n", - "\n", - " TotalPayBenefits Year Agency \n", - "Id \n", - "1 567595.43 2011 San Francisco \n", - "2 538909.28 2011 San Francisco \n", - "3 335279.91 2011 San Francisco \n", - "4 332343.61 2011 San Francisco \n", - "5 326373.19 2011 San Francisco \n", - "... ... ... ... \n", - "148650 0.00 2014 San Francisco \n", - "148651 0.00 2014 San Francisco \n", - "148652 0.00 2014 San Francisco \n", - "148653 0.00 2014 San Francisco \n", - "148654 -618.13 2014 San Francisco \n", - "\n", - "[148654 rows x 10 columns]" - ] - }, - "execution_count": 67, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "d1.dropna(how='all',axis=1)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "display number of unique values in each column" - ] - }, - { - "cell_type": "code", - "execution_count": 68, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "EmployeeName 110811\n", - "JobTitle 2159\n", - "BasePay 109489\n", - "OvertimePay 65998\n", - "OtherPay 83225\n", - "Benefits 98465\n", - "TotalPay 138486\n", - "TotalPayBenefits 142098\n", - "Year 4\n", - "Notes 0\n", - "Agency 1\n", - "Status 0\n", - "dtype: int64" - ] - }, - "execution_count": 68, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "d1.nunique()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "mean of total pay of all people based on year" - ] - }, - { - "cell_type": "code", - "execution_count": 69, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "Year\n", - "2011 71744.103871\n", - "2012 74113.262265\n", - "2013 77611.443142\n", - "2014 75463.918140\n", - "Name: TotalPay, dtype: float64" - ] - }, - "execution_count": 69, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "d1.groupby('Year').mean()['TotalPay']" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "how many people have 0 overtime pay" - ] - }, - { - "cell_type": "code", - "execution_count": 70, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "77321" - ] - }, - "execution_count": 70, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "len(d1[d1['OvertimePay']==0]['OvertimePay'])" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "max, min, mean, median and other stats of TotalPay of people having 0 OvertimePay" - ] - }, - { - "cell_type": "code", - "execution_count": 71, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "count 77321.000000\n", - "mean 60229.348901\n", - "std 49307.912350\n", - "min -618.130000\n", - "25% 13290.450000\n", - "50% 58158.590000\n", - "75% 91115.090000\n", - "max 567595.430000\n", - "Name: TotalPay, dtype: float64" - ] - }, - "execution_count": 71, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "d1[d1['OvertimePay']==0]['TotalPay'].describe()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "find Id of that person with max TotalPay you got in previous question" - ] - }, - { - "cell_type": "code", - "execution_count": 72, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "Int64Index([1], dtype='int64', name='Id')" - ] - }, - "execution_count": 72, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "f1=d1[d1['OvertimePay']==0]['TotalPay'].max()\n", - "d1[d1['TotalPay']==f1].index" - ] - }, - { - "cell_type": "code", - "execution_count": 73, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "Int64Index([1], dtype='int64', name='Id')" - ] - }, - "execution_count": 73, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "d1[d1['TotalPay']==567595.430000].index\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "name of employee with total pay benefits = 87619.78" - ] - }, - { - "cell_type": "code", - "execution_count": 74, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "Id\n", - "12346 REBECCA CHIU\n", - "Name: EmployeeName, dtype: object" - ] - }, - "execution_count": 74, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "d1[d1['TotalPayBenefits']==87619.78]['EmployeeName']" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "how many people have BasePay > 150000 and OvertimePay > 100000" - ] - }, - { - "cell_type": "code", - "execution_count": 75, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "12" - ] - }, - "execution_count": 75, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "d1[(d1['BasePay']> 150000) & (d1['OvertimePay'] > 100000)].count()['EmployeeName']" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "which job title generally has highest average TotalPayBenefits" - ] - }, - { - "cell_type": "code", - "execution_count": 76, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "'GENERAL MANAGER-METROPOLITAN TRANSIT AUTHORITY'" - ] - }, - "execution_count": 76, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "f1=d1.loc[d1['TotalPayBenefits'].idxmax()]\n", - "f1['JobTitle']" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "How many employees are POLICE" - ] - }, - { - "cell_type": "code", - "execution_count": 77, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "2512" - ] - }, - "execution_count": 77, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# .str.contains()\n", - "len(d1[d1['JobTitle'].str.contains('POLICE')])\n" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.7.6" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} From 6adcb577f4c672b94fcc34974a9ea0c8778d143c Mon Sep 17 00:00:00 2001 From: Rachitha <47741826+Rachithaa@users.noreply.github.com> Date: Sun, 6 Sep 2020 21:53:26 +0530 Subject: [PATCH 6/6] RACHITHA-SECOND ASSIGNMENT --- Assignment2 pandas-RACHITHA.ipynb | 1384 +++++++++++++++++++++++++++++ 1 file changed, 1384 insertions(+) create mode 100644 Assignment2 pandas-RACHITHA.ipynb diff --git a/Assignment2 pandas-RACHITHA.ipynb b/Assignment2 pandas-RACHITHA.ipynb new file mode 100644 index 0000000..3b934bf --- /dev/null +++ b/Assignment2 pandas-RACHITHA.ipynb @@ -0,0 +1,1384 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 58, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import pandas as pd\n", + "#%matplotlib notebook\n", + "%matplotlib inline" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "import the dataset into a dataframe" + ] + }, + { + "cell_type": "code", + "execution_count": 59, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
EmployeeNameJobTitleBasePayOvertimePayOtherPayBenefitsTotalPayTotalPayBenefitsYearNotesAgencyStatus
Id
1NATHANIEL FORDGENERAL MANAGER-METROPOLITAN TRANSIT AUTHORITY167411.180.00400184.25NaN567595.43567595.432011NaNSan FranciscoNaN
2GARY JIMENEZCAPTAIN III (POLICE DEPARTMENT)155966.02245131.88137811.38NaN538909.28538909.282011NaNSan FranciscoNaN
3ALBERT PARDINICAPTAIN III (POLICE DEPARTMENT)212739.13106088.1816452.60NaN335279.91335279.912011NaNSan FranciscoNaN
4CHRISTOPHER CHONGWIRE ROPE CABLE MAINTENANCE MECHANIC77916.0056120.71198306.90NaN332343.61332343.612011NaNSan FranciscoNaN
5PATRICK GARDNERDEPUTY CHIEF OF DEPARTMENT,(FIRE DEPARTMENT)134401.609737.00182234.59NaN326373.19326373.192011NaNSan FranciscoNaN
.......................................
148650Roy I TilleryCustodian0.000.000.000.00.000.002014NaNSan FranciscoNaN
148651Not providedNot providedNaNNaNNaNNaN0.000.002014NaNSan FranciscoNaN
148652Not providedNot providedNaNNaNNaNNaN0.000.002014NaNSan FranciscoNaN
148653Not providedNot providedNaNNaNNaNNaN0.000.002014NaNSan FranciscoNaN
148654Joe LopezCounselor, Log Cabin Ranch0.000.00-618.130.0-618.13-618.132014NaNSan FranciscoNaN
\n", + "

148654 rows × 12 columns

\n", + "
" + ], + "text/plain": [ + " EmployeeName JobTitle \\\n", + "Id \n", + "1 NATHANIEL FORD GENERAL MANAGER-METROPOLITAN TRANSIT AUTHORITY \n", + "2 GARY JIMENEZ CAPTAIN III (POLICE DEPARTMENT) \n", + "3 ALBERT PARDINI CAPTAIN III (POLICE DEPARTMENT) \n", + "4 CHRISTOPHER CHONG WIRE ROPE CABLE MAINTENANCE MECHANIC \n", + "5 PATRICK GARDNER DEPUTY CHIEF OF DEPARTMENT,(FIRE DEPARTMENT) \n", + "... ... ... \n", + "148650 Roy I Tillery Custodian \n", + "148651 Not provided Not provided \n", + "148652 Not provided Not provided \n", + "148653 Not provided Not provided \n", + "148654 Joe Lopez Counselor, Log Cabin Ranch \n", + "\n", + " BasePay OvertimePay OtherPay Benefits TotalPay \\\n", + "Id \n", + "1 167411.18 0.00 400184.25 NaN 567595.43 \n", + "2 155966.02 245131.88 137811.38 NaN 538909.28 \n", + "3 212739.13 106088.18 16452.60 NaN 335279.91 \n", + "4 77916.00 56120.71 198306.90 NaN 332343.61 \n", + "5 134401.60 9737.00 182234.59 NaN 326373.19 \n", + "... ... ... ... ... ... \n", + "148650 0.00 0.00 0.00 0.0 0.00 \n", + "148651 NaN NaN NaN NaN 0.00 \n", + "148652 NaN NaN NaN NaN 0.00 \n", + "148653 NaN NaN NaN NaN 0.00 \n", + "148654 0.00 0.00 -618.13 0.0 -618.13 \n", + "\n", + " TotalPayBenefits Year Notes Agency Status \n", + "Id \n", + "1 567595.43 2011 NaN San Francisco NaN \n", + "2 538909.28 2011 NaN San Francisco NaN \n", + "3 335279.91 2011 NaN San Francisco NaN \n", + "4 332343.61 2011 NaN San Francisco NaN \n", + "5 326373.19 2011 NaN San Francisco NaN \n", + "... ... ... ... ... ... \n", + "148650 0.00 2014 NaN San Francisco NaN \n", + "148651 0.00 2014 NaN San Francisco NaN \n", + "148652 0.00 2014 NaN San Francisco NaN \n", + "148653 0.00 2014 NaN San Francisco NaN \n", + "148654 -618.13 2014 NaN San Francisco NaN \n", + "\n", + "[148654 rows x 12 columns]" + ] + }, + "execution_count": 59, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "d1=pd.read_csv('Salaries.csv',index_col='Id')\n", + "d1" + ] + }, + { + "cell_type": "code", + "execution_count": 60, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
EmployeeNameJobTitleBasePayOvertimePayOtherPayBenefitsTotalPayTotalPayBenefitsYearNotesAgencyStatus
Id
1NATHANIEL FORDGENERAL MANAGER-METROPOLITAN TRANSIT AUTHORITY167411.180.00400184.25NaN567595.43567595.432011NaNSan FranciscoNaN
2GARY JIMENEZCAPTAIN III (POLICE DEPARTMENT)155966.02245131.88137811.38NaN538909.28538909.282011NaNSan FranciscoNaN
3ALBERT PARDINICAPTAIN III (POLICE DEPARTMENT)212739.13106088.1816452.60NaN335279.91335279.912011NaNSan FranciscoNaN
4CHRISTOPHER CHONGWIRE ROPE CABLE MAINTENANCE MECHANIC77916.0056120.71198306.90NaN332343.61332343.612011NaNSan FranciscoNaN
5PATRICK GARDNERDEPUTY CHIEF OF DEPARTMENT,(FIRE DEPARTMENT)134401.609737.00182234.59NaN326373.19326373.192011NaNSan FranciscoNaN
\n", + "
" + ], + "text/plain": [ + " EmployeeName JobTitle \\\n", + "Id \n", + "1 NATHANIEL FORD GENERAL MANAGER-METROPOLITAN TRANSIT AUTHORITY \n", + "2 GARY JIMENEZ CAPTAIN III (POLICE DEPARTMENT) \n", + "3 ALBERT PARDINI CAPTAIN III (POLICE DEPARTMENT) \n", + "4 CHRISTOPHER CHONG WIRE ROPE CABLE MAINTENANCE MECHANIC \n", + "5 PATRICK GARDNER DEPUTY CHIEF OF DEPARTMENT,(FIRE DEPARTMENT) \n", + "\n", + " BasePay OvertimePay OtherPay Benefits TotalPay TotalPayBenefits \\\n", + "Id \n", + "1 167411.18 0.00 400184.25 NaN 567595.43 567595.43 \n", + "2 155966.02 245131.88 137811.38 NaN 538909.28 538909.28 \n", + "3 212739.13 106088.18 16452.60 NaN 335279.91 335279.91 \n", + "4 77916.00 56120.71 198306.90 NaN 332343.61 332343.61 \n", + "5 134401.60 9737.00 182234.59 NaN 326373.19 326373.19 \n", + "\n", + " Year Notes Agency Status \n", + "Id \n", + "1 2011 NaN San Francisco NaN \n", + "2 2011 NaN San Francisco NaN \n", + "3 2011 NaN San Francisco NaN \n", + "4 2011 NaN San Francisco NaN \n", + "5 2011 NaN San Francisco NaN " + ] + }, + "execution_count": 60, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "d1.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "display the column names" + ] + }, + { + "cell_type": "code", + "execution_count": 61, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Index(['EmployeeName', 'JobTitle', 'BasePay', 'OvertimePay', 'OtherPay',\n", + " 'Benefits', 'TotalPay', 'TotalPayBenefits', 'Year', 'Notes', 'Agency',\n", + " 'Status'],\n", + " dtype='object')" + ] + }, + "execution_count": 61, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "d1.columns" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "display the number of rows and cols" + ] + }, + { + "cell_type": "code", + "execution_count": 62, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "rows: 148654\n", + "columns: 12\n" + ] + } + ], + "source": [ + "print(\"rows: \"+str(len(d1.axes[0])))\n", + "print(\"columns: \"+str(len(d1.axes[1])))" + ] + }, + { + "cell_type": "code", + "execution_count": 63, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(148654, 12)" + ] + }, + "execution_count": 63, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "d1.shape" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "display the dataframe info (types of data in columns and not null values etc.)" + ] + }, + { + "cell_type": "code", + "execution_count": 64, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Int64Index: 148654 entries, 1 to 148654\n", + "Data columns (total 12 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 EmployeeName 148654 non-null object \n", + " 1 JobTitle 148654 non-null object \n", + " 2 BasePay 148045 non-null float64\n", + " 3 OvertimePay 148650 non-null float64\n", + " 4 OtherPay 148650 non-null float64\n", + " 5 Benefits 112491 non-null float64\n", + " 6 TotalPay 148654 non-null float64\n", + " 7 TotalPayBenefits 148654 non-null float64\n", + " 8 Year 148654 non-null int64 \n", + " 9 Notes 0 non-null float64\n", + " 10 Agency 148654 non-null object \n", + " 11 Status 0 non-null float64\n", + "dtypes: float64(8), int64(1), object(3)\n", + "memory usage: 14.7+ MB\n" + ] + } + ], + "source": [ + "d1.info()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "display stats of the dataframe like count, mean, std, max, 25% etc....." + ] + }, + { + "cell_type": "code", + "execution_count": 65, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
BasePayOvertimePayOtherPayBenefitsTotalPayTotalPayBenefitsYearNotesStatus
count148045.000000148650.000000148650.000000112491.000000148654.000000148654.000000148654.0000000.00.0
mean66325.4488415066.0598863648.76729725007.89315174768.32197293692.5548112012.522643NaNNaN
std42764.63549511454.3805598056.60186615402.21585850517.00527462793.5334831.117538NaNNaN
min-166.010000-0.010000-7058.590000-33.890000-618.130000-618.1300002011.000000NaNNaN
25%33588.2000000.0000000.00000011535.39500036168.99500044065.6500002012.000000NaNNaN
50%65007.4500000.000000811.27000028628.62000071426.61000092404.0900002013.000000NaNNaN
75%94691.0500004658.1750004236.06500035566.855000105839.135000132876.4500002014.000000NaNNaN
max319275.010000245131.880000400184.25000096570.660000567595.430000567595.4300002014.000000NaNNaN
\n", + "
" + ], + "text/plain": [ + " BasePay OvertimePay OtherPay Benefits \\\n", + "count 148045.000000 148650.000000 148650.000000 112491.000000 \n", + "mean 66325.448841 5066.059886 3648.767297 25007.893151 \n", + "std 42764.635495 11454.380559 8056.601866 15402.215858 \n", + "min -166.010000 -0.010000 -7058.590000 -33.890000 \n", + "25% 33588.200000 0.000000 0.000000 11535.395000 \n", + "50% 65007.450000 0.000000 811.270000 28628.620000 \n", + "75% 94691.050000 4658.175000 4236.065000 35566.855000 \n", + "max 319275.010000 245131.880000 400184.250000 96570.660000 \n", + "\n", + " TotalPay TotalPayBenefits Year Notes Status \n", + "count 148654.000000 148654.000000 148654.000000 0.0 0.0 \n", + "mean 74768.321972 93692.554811 2012.522643 NaN NaN \n", + "std 50517.005274 62793.533483 1.117538 NaN NaN \n", + "min -618.130000 -618.130000 2011.000000 NaN NaN \n", + "25% 36168.995000 44065.650000 2012.000000 NaN NaN \n", + "50% 71426.610000 92404.090000 2013.000000 NaN NaN \n", + "75% 105839.135000 132876.450000 2014.000000 NaN NaN \n", + "max 567595.430000 567595.430000 2014.000000 NaN NaN " + ] + }, + "execution_count": 65, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "d1.describe()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "display null values per column" + ] + }, + { + "cell_type": "code", + "execution_count": 66, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "EmployeeName 0\n", + "JobTitle 0\n", + "BasePay 609\n", + "OvertimePay 4\n", + "OtherPay 4\n", + "Benefits 36163\n", + "TotalPay 0\n", + "TotalPayBenefits 0\n", + "Year 0\n", + "Notes 148654\n", + "Agency 0\n", + "Status 148654\n", + "dtype: int64" + ] + }, + "execution_count": 66, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "d1.isna().sum()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "remove columns will all values as NaN" + ] + }, + { + "cell_type": "code", + "execution_count": 67, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
EmployeeNameJobTitleBasePayOvertimePayOtherPayBenefitsTotalPayTotalPayBenefitsYearAgency
Id
1NATHANIEL FORDGENERAL MANAGER-METROPOLITAN TRANSIT AUTHORITY167411.180.00400184.25NaN567595.43567595.432011San Francisco
2GARY JIMENEZCAPTAIN III (POLICE DEPARTMENT)155966.02245131.88137811.38NaN538909.28538909.282011San Francisco
3ALBERT PARDINICAPTAIN III (POLICE DEPARTMENT)212739.13106088.1816452.60NaN335279.91335279.912011San Francisco
4CHRISTOPHER CHONGWIRE ROPE CABLE MAINTENANCE MECHANIC77916.0056120.71198306.90NaN332343.61332343.612011San Francisco
5PATRICK GARDNERDEPUTY CHIEF OF DEPARTMENT,(FIRE DEPARTMENT)134401.609737.00182234.59NaN326373.19326373.192011San Francisco
.................................
148650Roy I TilleryCustodian0.000.000.000.00.000.002014San Francisco
148651Not providedNot providedNaNNaNNaNNaN0.000.002014San Francisco
148652Not providedNot providedNaNNaNNaNNaN0.000.002014San Francisco
148653Not providedNot providedNaNNaNNaNNaN0.000.002014San Francisco
148654Joe LopezCounselor, Log Cabin Ranch0.000.00-618.130.0-618.13-618.132014San Francisco
\n", + "

148654 rows × 10 columns

\n", + "
" + ], + "text/plain": [ + " EmployeeName JobTitle \\\n", + "Id \n", + "1 NATHANIEL FORD GENERAL MANAGER-METROPOLITAN TRANSIT AUTHORITY \n", + "2 GARY JIMENEZ CAPTAIN III (POLICE DEPARTMENT) \n", + "3 ALBERT PARDINI CAPTAIN III (POLICE DEPARTMENT) \n", + "4 CHRISTOPHER CHONG WIRE ROPE CABLE MAINTENANCE MECHANIC \n", + "5 PATRICK GARDNER DEPUTY CHIEF OF DEPARTMENT,(FIRE DEPARTMENT) \n", + "... ... ... \n", + "148650 Roy I Tillery Custodian \n", + "148651 Not provided Not provided \n", + "148652 Not provided Not provided \n", + "148653 Not provided Not provided \n", + "148654 Joe Lopez Counselor, Log Cabin Ranch \n", + "\n", + " BasePay OvertimePay OtherPay Benefits TotalPay \\\n", + "Id \n", + "1 167411.18 0.00 400184.25 NaN 567595.43 \n", + "2 155966.02 245131.88 137811.38 NaN 538909.28 \n", + "3 212739.13 106088.18 16452.60 NaN 335279.91 \n", + "4 77916.00 56120.71 198306.90 NaN 332343.61 \n", + "5 134401.60 9737.00 182234.59 NaN 326373.19 \n", + "... ... ... ... ... ... \n", + "148650 0.00 0.00 0.00 0.0 0.00 \n", + "148651 NaN NaN NaN NaN 0.00 \n", + "148652 NaN NaN NaN NaN 0.00 \n", + "148653 NaN NaN NaN NaN 0.00 \n", + "148654 0.00 0.00 -618.13 0.0 -618.13 \n", + "\n", + " TotalPayBenefits Year Agency \n", + "Id \n", + "1 567595.43 2011 San Francisco \n", + "2 538909.28 2011 San Francisco \n", + "3 335279.91 2011 San Francisco \n", + "4 332343.61 2011 San Francisco \n", + "5 326373.19 2011 San Francisco \n", + "... ... ... ... \n", + "148650 0.00 2014 San Francisco \n", + "148651 0.00 2014 San Francisco \n", + "148652 0.00 2014 San Francisco \n", + "148653 0.00 2014 San Francisco \n", + "148654 -618.13 2014 San Francisco \n", + "\n", + "[148654 rows x 10 columns]" + ] + }, + "execution_count": 67, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "d1.dropna(how='all',axis=1)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "display number of unique values in each column" + ] + }, + { + "cell_type": "code", + "execution_count": 68, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "EmployeeName 110811\n", + "JobTitle 2159\n", + "BasePay 109489\n", + "OvertimePay 65998\n", + "OtherPay 83225\n", + "Benefits 98465\n", + "TotalPay 138486\n", + "TotalPayBenefits 142098\n", + "Year 4\n", + "Notes 0\n", + "Agency 1\n", + "Status 0\n", + "dtype: int64" + ] + }, + "execution_count": 68, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "d1.nunique()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "mean of total pay of all people based on year" + ] + }, + { + "cell_type": "code", + "execution_count": 69, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Year\n", + "2011 71744.103871\n", + "2012 74113.262265\n", + "2013 77611.443142\n", + "2014 75463.918140\n", + "Name: TotalPay, dtype: float64" + ] + }, + "execution_count": 69, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "d1.groupby('Year').mean()['TotalPay']" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "how many people have 0 overtime pay" + ] + }, + { + "cell_type": "code", + "execution_count": 70, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "77321" + ] + }, + "execution_count": 70, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "len(d1[d1['OvertimePay']==0]['OvertimePay'])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "max, min, mean, median and other stats of TotalPay of people having 0 OvertimePay" + ] + }, + { + "cell_type": "code", + "execution_count": 71, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "count 77321.000000\n", + "mean 60229.348901\n", + "std 49307.912350\n", + "min -618.130000\n", + "25% 13290.450000\n", + "50% 58158.590000\n", + "75% 91115.090000\n", + "max 567595.430000\n", + "Name: TotalPay, dtype: float64" + ] + }, + "execution_count": 71, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "d1[d1['OvertimePay']==0]['TotalPay'].describe()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "find Id of that person with max TotalPay you got in previous question" + ] + }, + { + "cell_type": "code", + "execution_count": 72, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Int64Index([1], dtype='int64', name='Id')" + ] + }, + "execution_count": 72, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "f1=d1[d1['OvertimePay']==0]['TotalPay'].max()\n", + "d1[d1['TotalPay']==f1].index" + ] + }, + { + "cell_type": "code", + "execution_count": 73, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Int64Index([1], dtype='int64', name='Id')" + ] + }, + "execution_count": 73, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "d1[d1['TotalPay']==567595.430000].index\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "name of employee with total pay benefits = 87619.78" + ] + }, + { + "cell_type": "code", + "execution_count": 74, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Id\n", + "12346 REBECCA CHIU\n", + "Name: EmployeeName, dtype: object" + ] + }, + "execution_count": 74, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "d1[d1['TotalPayBenefits']==87619.78]['EmployeeName']" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "how many people have BasePay > 150000 and OvertimePay > 100000" + ] + }, + { + "cell_type": "code", + "execution_count": 75, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "12" + ] + }, + "execution_count": 75, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "d1[(d1['BasePay']> 150000) & (d1['OvertimePay'] > 100000)].count()['EmployeeName']" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "which job title generally has highest average TotalPayBenefits" + ] + }, + { + "cell_type": "code", + "execution_count": 76, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'GENERAL MANAGER-METROPOLITAN TRANSIT AUTHORITY'" + ] + }, + "execution_count": 76, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "f1=d1.loc[d1['TotalPayBenefits'].idxmax()]\n", + "f1['JobTitle']" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "How many employees are POLICE" + ] + }, + { + "cell_type": "code", + "execution_count": 77, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "2512" + ] + }, + "execution_count": 77, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# .str.contains()\n", + "len(d1[d1['JobTitle'].str.contains('POLICE')])\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.6" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}