From 5a25ca8131cc6fa9ee6e8f4c6e3c0b247355c7f5 Mon Sep 17 00:00:00 2001 From: Naga Kishan Munjulury Venkata <32195210+Nagakishan@users.noreply.github.com> Date: Tue, 25 Aug 2020 00:26:54 +0530 Subject: [PATCH 1/5] Add files via upload Naga Kishan Munjulury Venkata Assignment 1 - Numpy Arrays --- Naga Assignment1 Numpy.ipynb | 363 +++++++++++++++++++++++++++++++++++ 1 file changed, 363 insertions(+) create mode 100644 Naga Assignment1 Numpy.ipynb diff --git a/Naga Assignment1 Numpy.ipynb b/Naga Assignment1 Numpy.ipynb new file mode 100644 index 0000000..e243790 --- /dev/null +++ b/Naga Assignment1 Numpy.ipynb @@ -0,0 +1,363 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Assignment" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Import numpy as np" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Make a python list => \\[1,2,3,4,5\\]\n", + "\n", + "Convert it into numpy array and print it" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([1, 2, 3, 4, 5])" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "list1 = [1,2,3,4,5]\n", + "np.array(list1)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Make a python matrix (3 x 3) => \\[[1,2,3],[4,5,6],[7,8,9]\\]\n", + "\n", + "Convert it into numpy array and print it" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[1, 2, 3],\n", + " [4, 5, 6],\n", + " [7, 8, 9]])" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "matrix1 = [[1,2,3],[4,5,6],[7,8,9]]\n", + "np.array(matrix1)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Make a matrix (3 x 3) using built-in methods (like arange(), reshape() etc.):\n", + "\n", + "\\[ [1,3,5],\n", + "\n", + " [7,9,11],\n", + " \n", + " [13,15,17] \\]" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[ 1, 3, 5],\n", + " [ 7, 9, 11],\n", + " [13, 15, 17]])" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "arr2 = np.arange(1,19,2) # does not consider the last element or value\n", + "arr2.reshape(3,3)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Create a numpy array with 10 random numbers from 0 to 10 (there should be few numbers greater than 1)" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([-0.21018049, 2.44756364, -1.15507536, -0.52912029, 0.36448852,\n", + " 0.36441865, -1.4346832 , -1.05009222, 0.77984683, 1.07404423])" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "arr3 = np.random.randn(10)\n", + "arr3" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Create numpy array => \\[1,2,3,4,5\\] and convert it to 2D array with 5 rows" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[1],\n", + " [2],\n", + " [3],\n", + " [4],\n", + " [5]])" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "list4 = [1,2,3,4,5]\n", + "arr4 = np.array(list4)\n", + "arr4.reshape(5,1)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Print the shape of the above created array" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(5,)" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "arr4.shape" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Create a numpy array with 10 elements in it. Access and print its 3rd, 4th and 9th element." + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]\n", + "3 4 9\n" + ] + } + ], + "source": [ + "list6 = [1,2,3,4,5,6,7,8,9,10]\n", + "print(list6)\n", + "arr5 = np.array(list6)\n", + "print(arr5[2],arr5[3],arr5[8]) # takes the values under the index 3,4, and 9 " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Print alternate elements of that array" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([1, 3, 5, 7, 9])" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "arr6 = arr5[::2]\n", + "arr6" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Change last 3 elements into 100 using broadcasting and print" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([ 1, 3, 100, 100, 100])" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# broadcasting\n", + "arr6[-3:]=100\n", + "arr6" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Create a 5 x 5 matrix (fill it with any element you like), print it.\n", + "\n", + "Then print the middle (3 x 3) matrix." + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[[ 0 1 2 3 4]\n", + " [ 5 6 7 8 9]\n", + " [10 11 12 13 14]\n", + " [15 16 17 18 19]\n", + " [20 21 22 23 24]]\n", + "Mid 3X3 matrix:\n", + "[[ 6 7 8]\n", + " [11 12 13]\n", + " [16 17 18]]\n" + ] + } + ], + "source": [ + "matrix2 = np.arange(25).reshape(5, 5)\n", + "print(matrix2)\n", + "print('Mid 3X3 matrix:',matrix2[1:4,1:4],sep='\\n')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.6" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} From 96e0d72f97c068823b4ebf1c5086c4188a2c7508 Mon Sep 17 00:00:00 2001 From: Naga Kishan Munjulury Venkata <32195210+Nagakishan@users.noreply.github.com> Date: Tue, 25 Aug 2020 00:28:41 +0530 Subject: [PATCH 2/5] Delete Naga Assignment1 Numpy.ipynb --- Naga Assignment1 Numpy.ipynb | 363 ----------------------------------- 1 file changed, 363 deletions(-) delete mode 100644 Naga Assignment1 Numpy.ipynb diff --git a/Naga Assignment1 Numpy.ipynb b/Naga Assignment1 Numpy.ipynb deleted file mode 100644 index e243790..0000000 --- a/Naga Assignment1 Numpy.ipynb +++ /dev/null @@ -1,363 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Assignment" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Import numpy as np" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [], - "source": [ - "import numpy as np" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Make a python list => \\[1,2,3,4,5\\]\n", - "\n", - "Convert it into numpy array and print it" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "array([1, 2, 3, 4, 5])" - ] - }, - "execution_count": 3, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "list1 = [1,2,3,4,5]\n", - "np.array(list1)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Make a python matrix (3 x 3) => \\[[1,2,3],[4,5,6],[7,8,9]\\]\n", - "\n", - "Convert it into numpy array and print it" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "array([[1, 2, 3],\n", - " [4, 5, 6],\n", - " [7, 8, 9]])" - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "matrix1 = [[1,2,3],[4,5,6],[7,8,9]]\n", - "np.array(matrix1)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Make a matrix (3 x 3) using built-in methods (like arange(), reshape() etc.):\n", - "\n", - "\\[ [1,3,5],\n", - "\n", - " [7,9,11],\n", - " \n", - " [13,15,17] \\]" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "array([[ 1, 3, 5],\n", - " [ 7, 9, 11],\n", - " [13, 15, 17]])" - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "arr2 = np.arange(1,19,2) # does not consider the last element or value\n", - "arr2.reshape(3,3)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Create a numpy array with 10 random numbers from 0 to 10 (there should be few numbers greater than 1)" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "array([-0.21018049, 2.44756364, -1.15507536, -0.52912029, 0.36448852,\n", - " 0.36441865, -1.4346832 , -1.05009222, 0.77984683, 1.07404423])" - ] - }, - "execution_count": 7, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "arr3 = np.random.randn(10)\n", - "arr3" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Create numpy array => \\[1,2,3,4,5\\] and convert it to 2D array with 5 rows" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "array([[1],\n", - " [2],\n", - " [3],\n", - " [4],\n", - " [5]])" - ] - }, - "execution_count": 8, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "list4 = [1,2,3,4,5]\n", - "arr4 = np.array(list4)\n", - "arr4.reshape(5,1)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Print the shape of the above created array" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "(5,)" - ] - }, - "execution_count": 9, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "arr4.shape" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Create a numpy array with 10 elements in it. Access and print its 3rd, 4th and 9th element." - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]\n", - "3 4 9\n" - ] - } - ], - "source": [ - "list6 = [1,2,3,4,5,6,7,8,9,10]\n", - "print(list6)\n", - "arr5 = np.array(list6)\n", - "print(arr5[2],arr5[3],arr5[8]) # takes the values under the index 3,4, and 9 " - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Print alternate elements of that array" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "array([1, 3, 5, 7, 9])" - ] - }, - "execution_count": 11, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "arr6 = arr5[::2]\n", - "arr6" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Change last 3 elements into 100 using broadcasting and print" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "array([ 1, 3, 100, 100, 100])" - ] - }, - "execution_count": 12, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# broadcasting\n", - "arr6[-3:]=100\n", - "arr6" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Create a 5 x 5 matrix (fill it with any element you like), print it.\n", - "\n", - "Then print the middle (3 x 3) matrix." - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[[ 0 1 2 3 4]\n", - " [ 5 6 7 8 9]\n", - " [10 11 12 13 14]\n", - " [15 16 17 18 19]\n", - " [20 21 22 23 24]]\n", - "Mid 3X3 matrix:\n", - "[[ 6 7 8]\n", - " [11 12 13]\n", - " [16 17 18]]\n" - ] - } - ], - "source": [ - "matrix2 = np.arange(25).reshape(5, 5)\n", - "print(matrix2)\n", - "print('Mid 3X3 matrix:',matrix2[1:4,1:4],sep='\\n')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.7.6" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} From 8a087db239cacf3fd360bc6588b13afb23826366 Mon Sep 17 00:00:00 2001 From: Naga Kishan Munjulury Venkata <32195210+Nagakishan@users.noreply.github.com> Date: Tue, 25 Aug 2020 00:34:49 +0530 Subject: [PATCH 3/5] Add files via upload Naga Kishan Munjulury Venkata Numpy Arrays - Assignment 1 --- Assignment/Naga Assignment1 Numpy.ipynb | 363 ++++++++++++++++++++++++ 1 file changed, 363 insertions(+) create mode 100644 Assignment/Naga Assignment1 Numpy.ipynb diff --git a/Assignment/Naga Assignment1 Numpy.ipynb b/Assignment/Naga Assignment1 Numpy.ipynb new file mode 100644 index 0000000..e243790 --- /dev/null +++ b/Assignment/Naga Assignment1 Numpy.ipynb @@ -0,0 +1,363 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Assignment" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Import numpy as np" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Make a python list => \\[1,2,3,4,5\\]\n", + "\n", + "Convert it into numpy array and print it" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([1, 2, 3, 4, 5])" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "list1 = [1,2,3,4,5]\n", + "np.array(list1)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Make a python matrix (3 x 3) => \\[[1,2,3],[4,5,6],[7,8,9]\\]\n", + "\n", + "Convert it into numpy array and print it" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[1, 2, 3],\n", + " [4, 5, 6],\n", + " [7, 8, 9]])" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "matrix1 = [[1,2,3],[4,5,6],[7,8,9]]\n", + "np.array(matrix1)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Make a matrix (3 x 3) using built-in methods (like arange(), reshape() etc.):\n", + "\n", + "\\[ [1,3,5],\n", + "\n", + " [7,9,11],\n", + " \n", + " [13,15,17] \\]" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[ 1, 3, 5],\n", + " [ 7, 9, 11],\n", + " [13, 15, 17]])" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "arr2 = np.arange(1,19,2) # does not consider the last element or value\n", + "arr2.reshape(3,3)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Create a numpy array with 10 random numbers from 0 to 10 (there should be few numbers greater than 1)" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([-0.21018049, 2.44756364, -1.15507536, -0.52912029, 0.36448852,\n", + " 0.36441865, -1.4346832 , -1.05009222, 0.77984683, 1.07404423])" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "arr3 = np.random.randn(10)\n", + "arr3" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Create numpy array => \\[1,2,3,4,5\\] and convert it to 2D array with 5 rows" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[1],\n", + " [2],\n", + " [3],\n", + " [4],\n", + " [5]])" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "list4 = [1,2,3,4,5]\n", + "arr4 = np.array(list4)\n", + "arr4.reshape(5,1)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Print the shape of the above created array" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(5,)" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "arr4.shape" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Create a numpy array with 10 elements in it. Access and print its 3rd, 4th and 9th element." + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]\n", + "3 4 9\n" + ] + } + ], + "source": [ + "list6 = [1,2,3,4,5,6,7,8,9,10]\n", + "print(list6)\n", + "arr5 = np.array(list6)\n", + "print(arr5[2],arr5[3],arr5[8]) # takes the values under the index 3,4, and 9 " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Print alternate elements of that array" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([1, 3, 5, 7, 9])" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "arr6 = arr5[::2]\n", + "arr6" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Change last 3 elements into 100 using broadcasting and print" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([ 1, 3, 100, 100, 100])" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# broadcasting\n", + "arr6[-3:]=100\n", + "arr6" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Create a 5 x 5 matrix (fill it with any element you like), print it.\n", + "\n", + "Then print the middle (3 x 3) matrix." + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[[ 0 1 2 3 4]\n", + " [ 5 6 7 8 9]\n", + " [10 11 12 13 14]\n", + " [15 16 17 18 19]\n", + " [20 21 22 23 24]]\n", + "Mid 3X3 matrix:\n", + "[[ 6 7 8]\n", + " [11 12 13]\n", + " [16 17 18]]\n" + ] + } + ], + "source": [ + "matrix2 = np.arange(25).reshape(5, 5)\n", + "print(matrix2)\n", + "print('Mid 3X3 matrix:',matrix2[1:4,1:4],sep='\\n')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.6" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} From d336da92499805404260f28c4cee0e93d4b0584d Mon Sep 17 00:00:00 2001 From: Naga Kishan Munjulury Venkata <32195210+Nagakishan@users.noreply.github.com> Date: Tue, 8 Sep 2020 10:53:12 +0530 Subject: [PATCH 4/5] Add files via upload --- Assignment/Assignment2 pd.ipynb | 1206 +++++++++++++++++++++++++++++++ 1 file changed, 1206 insertions(+) create mode 100644 Assignment/Assignment2 pd.ipynb diff --git a/Assignment/Assignment2 pd.ipynb b/Assignment/Assignment2 pd.ipynb new file mode 100644 index 0000000..2bb9e5b --- /dev/null +++ b/Assignment/Assignment2 pd.ipynb @@ -0,0 +1,1206 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import pandas as pd\n", + "#%matplotlib notebook\n", + "%matplotlib inline" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "import the dataset into a dataframe" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
IdEmployeeNameJobTitleBasePayOvertimePayOtherPayBenefitsTotalPayTotalPayBenefitsYearNotesAgencyStatus
01NATHANIEL FORDGENERAL MANAGER-METROPOLITAN TRANSIT AUTHORITY167411.180.00400184.25NaN567595.43567595.432011NaNSan FranciscoNaN
12GARY JIMENEZCAPTAIN III (POLICE DEPARTMENT)155966.02245131.88137811.38NaN538909.28538909.282011NaNSan FranciscoNaN
23ALBERT PARDINICAPTAIN III (POLICE DEPARTMENT)212739.13106088.1816452.60NaN335279.91335279.912011NaNSan FranciscoNaN
34CHRISTOPHER CHONGWIRE ROPE CABLE MAINTENANCE MECHANIC77916.0056120.71198306.90NaN332343.61332343.612011NaNSan FranciscoNaN
45PATRICK GARDNERDEPUTY CHIEF OF DEPARTMENT,(FIRE DEPARTMENT)134401.609737.00182234.59NaN326373.19326373.192011NaNSan FranciscoNaN
..........................................
148649148650Roy I TilleryCustodian0.000.000.000.00.000.002014NaNSan FranciscoNaN
148650148651Not providedNot providedNaNNaNNaNNaN0.000.002014NaNSan FranciscoNaN
148651148652Not providedNot providedNaNNaNNaNNaN0.000.002014NaNSan FranciscoNaN
148652148653Not providedNot providedNaNNaNNaNNaN0.000.002014NaNSan FranciscoNaN
148653148654Joe LopezCounselor, Log Cabin Ranch0.000.00-618.130.0-618.13-618.132014NaNSan FranciscoNaN
\n", + "

148654 rows × 13 columns

\n", + "
" + ], + "text/plain": [ + " Id EmployeeName \\\n", + "0 1 NATHANIEL FORD \n", + "1 2 GARY JIMENEZ \n", + "2 3 ALBERT PARDINI \n", + "3 4 CHRISTOPHER CHONG \n", + "4 5 PATRICK GARDNER \n", + "... ... ... \n", + "148649 148650 Roy I Tillery \n", + "148650 148651 Not provided \n", + "148651 148652 Not provided \n", + "148652 148653 Not provided \n", + "148653 148654 Joe Lopez \n", + "\n", + " JobTitle BasePay \\\n", + "0 GENERAL MANAGER-METROPOLITAN TRANSIT AUTHORITY 167411.18 \n", + "1 CAPTAIN III (POLICE DEPARTMENT) 155966.02 \n", + "2 CAPTAIN III (POLICE DEPARTMENT) 212739.13 \n", + "3 WIRE ROPE CABLE MAINTENANCE MECHANIC 77916.00 \n", + "4 DEPUTY CHIEF OF DEPARTMENT,(FIRE DEPARTMENT) 134401.60 \n", + "... ... ... \n", + "148649 Custodian 0.00 \n", + "148650 Not provided NaN \n", + "148651 Not provided NaN \n", + "148652 Not provided NaN \n", + "148653 Counselor, Log Cabin Ranch 0.00 \n", + "\n", + " OvertimePay OtherPay Benefits TotalPay TotalPayBenefits Year \\\n", + "0 0.00 400184.25 NaN 567595.43 567595.43 2011 \n", + "1 245131.88 137811.38 NaN 538909.28 538909.28 2011 \n", + "2 106088.18 16452.60 NaN 335279.91 335279.91 2011 \n", + "3 56120.71 198306.90 NaN 332343.61 332343.61 2011 \n", + "4 9737.00 182234.59 NaN 326373.19 326373.19 2011 \n", + "... ... ... ... ... ... ... \n", + "148649 0.00 0.00 0.0 0.00 0.00 2014 \n", + "148650 NaN NaN NaN 0.00 0.00 2014 \n", + "148651 NaN NaN NaN 0.00 0.00 2014 \n", + "148652 NaN NaN NaN 0.00 0.00 2014 \n", + "148653 0.00 -618.13 0.0 -618.13 -618.13 2014 \n", + "\n", + " Notes Agency Status \n", + "0 NaN San Francisco NaN \n", + "1 NaN San Francisco NaN \n", + "2 NaN San Francisco NaN \n", + "3 NaN San Francisco NaN \n", + "4 NaN San Francisco NaN \n", + "... ... ... ... \n", + "148649 NaN San Francisco NaN \n", + "148650 NaN San Francisco NaN \n", + "148651 NaN San Francisco NaN \n", + "148652 NaN San Francisco NaN \n", + "148653 NaN San Francisco NaN \n", + "\n", + "[148654 rows x 13 columns]" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df = pd.read_csv(\"Salary.csv\")\n", + "df" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "display the column names" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Empty DataFrame\n", + "Columns: [Id, EmployeeName, JobTitle, BasePay, OvertimePay, OtherPay, Benefits, TotalPay, TotalPayBenefits, Year, Notes, Agency, Status]\n", + "Index: []\n" + ] + }, + { + "data": { + "text/plain": [ + "Index(['Id', 'EmployeeName', 'JobTitle', 'BasePay', 'OvertimePay', 'OtherPay',\n", + " 'Benefits', 'TotalPay', 'TotalPayBenefits', 'Year', 'Notes', 'Agency',\n", + " 'Status'],\n", + " dtype='object')" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "a = df.columns\n", + "print(df[:0]) # checking the column names in the row 1\n", + "a" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "display the number of rows and cols" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(148654, 13)" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.shape" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "display the dataframe info (types of data in columns and not null values etc.)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "RangeIndex: 148654 entries, 0 to 148653\n", + "Data columns (total 13 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 Id 148654 non-null int64 \n", + " 1 EmployeeName 148654 non-null object \n", + " 2 JobTitle 148654 non-null object \n", + " 3 BasePay 148045 non-null float64\n", + " 4 OvertimePay 148650 non-null float64\n", + " 5 OtherPay 148650 non-null float64\n", + " 6 Benefits 112491 non-null float64\n", + " 7 TotalPay 148654 non-null float64\n", + " 8 TotalPayBenefits 148654 non-null float64\n", + " 9 Year 148654 non-null int64 \n", + " 10 Notes 0 non-null float64\n", + " 11 Agency 148654 non-null object \n", + " 12 Status 0 non-null float64\n", + "dtypes: float64(8), int64(2), object(3)\n", + "memory usage: 14.7+ MB\n" + ] + } + ], + "source": [ + "df.info() # (verbose=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "display stats of the dataframe like count, mean, std, max, 25% etc....." + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
IdBasePayOvertimePayOtherPayBenefitsTotalPayTotalPayBenefitsYearNotesStatus
count148654.000000148045.000000148650.000000148650.000000112491.000000148654.000000148654.000000148654.0000000.00.0
mean74327.50000066325.4488415066.0598863648.76729725007.89315174768.32197293692.5548112012.522643NaNNaN
std42912.85779542764.63549511454.3805598056.60186615402.21585850517.00527462793.5334831.117538NaNNaN
min1.000000-166.010000-0.010000-7058.590000-33.890000-618.130000-618.1300002011.000000NaNNaN
25%37164.25000033588.2000000.0000000.00000011535.39500036168.99500044065.6500002012.000000NaNNaN
50%74327.50000065007.4500000.000000811.27000028628.62000071426.61000092404.0900002013.000000NaNNaN
75%111490.75000094691.0500004658.1750004236.06500035566.855000105839.135000132876.4500002014.000000NaNNaN
max148654.000000319275.010000245131.880000400184.25000096570.660000567595.430000567595.4300002014.000000NaNNaN
\n", + "
" + ], + "text/plain": [ + " Id BasePay OvertimePay OtherPay \\\n", + "count 148654.000000 148045.000000 148650.000000 148650.000000 \n", + "mean 74327.500000 66325.448841 5066.059886 3648.767297 \n", + "std 42912.857795 42764.635495 11454.380559 8056.601866 \n", + "min 1.000000 -166.010000 -0.010000 -7058.590000 \n", + "25% 37164.250000 33588.200000 0.000000 0.000000 \n", + "50% 74327.500000 65007.450000 0.000000 811.270000 \n", + "75% 111490.750000 94691.050000 4658.175000 4236.065000 \n", + "max 148654.000000 319275.010000 245131.880000 400184.250000 \n", + "\n", + " Benefits TotalPay TotalPayBenefits Year Notes \\\n", + "count 112491.000000 148654.000000 148654.000000 148654.000000 0.0 \n", + "mean 25007.893151 74768.321972 93692.554811 2012.522643 NaN \n", + "std 15402.215858 50517.005274 62793.533483 1.117538 NaN \n", + "min -33.890000 -618.130000 -618.130000 2011.000000 NaN \n", + "25% 11535.395000 36168.995000 44065.650000 2012.000000 NaN \n", + "50% 28628.620000 71426.610000 92404.090000 2013.000000 NaN \n", + "75% 35566.855000 105839.135000 132876.450000 2014.000000 NaN \n", + "max 96570.660000 567595.430000 567595.430000 2014.000000 NaN \n", + "\n", + " Status \n", + "count 0.0 \n", + "mean NaN \n", + "std NaN \n", + "min NaN \n", + "25% NaN \n", + "50% NaN \n", + "75% NaN \n", + "max NaN " + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.describe() # statistics - Summary" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "display null values per column" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "BasePay 609\n", + "OvertimePay 4\n", + "OtherPay 4\n", + "Benefits 36163\n", + "Notes 148654\n", + "Status 148654\n", + "dtype: int64" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "null_values=df.columns[df.isnull().any()]\n", + "df[null_values].isnull().sum()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "remove columns will all values as NaN" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
IdEmployeeNameJobTitleTotalPayTotalPayBenefitsYearAgency
01NATHANIEL FORDGENERAL MANAGER-METROPOLITAN TRANSIT AUTHORITY567595.43567595.432011San Francisco
12GARY JIMENEZCAPTAIN III (POLICE DEPARTMENT)538909.28538909.282011San Francisco
23ALBERT PARDINICAPTAIN III (POLICE DEPARTMENT)335279.91335279.912011San Francisco
34CHRISTOPHER CHONGWIRE ROPE CABLE MAINTENANCE MECHANIC332343.61332343.612011San Francisco
45PATRICK GARDNERDEPUTY CHIEF OF DEPARTMENT,(FIRE DEPARTMENT)326373.19326373.192011San Francisco
........................
148649148650Roy I TilleryCustodian0.000.002014San Francisco
148650148651Not providedNot provided0.000.002014San Francisco
148651148652Not providedNot provided0.000.002014San Francisco
148652148653Not providedNot provided0.000.002014San Francisco
148653148654Joe LopezCounselor, Log Cabin Ranch-618.13-618.132014San Francisco
\n", + "

148654 rows × 7 columns

\n", + "
" + ], + "text/plain": [ + " Id EmployeeName \\\n", + "0 1 NATHANIEL FORD \n", + "1 2 GARY JIMENEZ \n", + "2 3 ALBERT PARDINI \n", + "3 4 CHRISTOPHER CHONG \n", + "4 5 PATRICK GARDNER \n", + "... ... ... \n", + "148649 148650 Roy I Tillery \n", + "148650 148651 Not provided \n", + "148651 148652 Not provided \n", + "148652 148653 Not provided \n", + "148653 148654 Joe Lopez \n", + "\n", + " JobTitle TotalPay \\\n", + "0 GENERAL MANAGER-METROPOLITAN TRANSIT AUTHORITY 567595.43 \n", + "1 CAPTAIN III (POLICE DEPARTMENT) 538909.28 \n", + "2 CAPTAIN III (POLICE DEPARTMENT) 335279.91 \n", + "3 WIRE ROPE CABLE MAINTENANCE MECHANIC 332343.61 \n", + "4 DEPUTY CHIEF OF DEPARTMENT,(FIRE DEPARTMENT) 326373.19 \n", + "... ... ... \n", + "148649 Custodian 0.00 \n", + "148650 Not provided 0.00 \n", + "148651 Not provided 0.00 \n", + "148652 Not provided 0.00 \n", + "148653 Counselor, Log Cabin Ranch -618.13 \n", + "\n", + " TotalPayBenefits Year Agency \n", + "0 567595.43 2011 San Francisco \n", + "1 538909.28 2011 San Francisco \n", + "2 335279.91 2011 San Francisco \n", + "3 332343.61 2011 San Francisco \n", + "4 326373.19 2011 San Francisco \n", + "... ... ... ... \n", + "148649 0.00 2014 San Francisco \n", + "148650 0.00 2014 San Francisco \n", + "148651 0.00 2014 San Francisco \n", + "148652 0.00 2014 San Francisco \n", + "148653 -618.13 2014 San Francisco \n", + "\n", + "[148654 rows x 7 columns]" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df1 = df.dropna(axis=1) # inplace = True\n", + "df1" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "display number of unique values in each column" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Id 148654\n", + "EmployeeName 110811\n", + "JobTitle 2159\n", + "BasePay 109489\n", + "OvertimePay 65998\n", + "OtherPay 83225\n", + "Benefits 98465\n", + "TotalPay 138486\n", + "TotalPayBenefits 142098\n", + "Year 4\n", + "Notes 0\n", + "Agency 1\n", + "Status 0\n", + "dtype: int64" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df2 = pd.read_csv(\"Salary.csv\")\n", + "df2.nunique()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "mean of total pay of all people based on year" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Year\n", + "2011 71744.103871\n", + "2012 74113.262265\n", + "2013 77611.443142\n", + "2014 75463.918140\n", + "Name: TotalPay, dtype: float64" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dftp = df2.groupby('Year').mean()['TotalPay'] #(total pay vs year))\n", + "dftp" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "how many people have 0 overtime pay" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "77321" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df3 = sum(df2[df2['OvertimePay']==0]['Id'].value_counts()==1)\n", + "df3" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "max, min, mean, median and other stats of TotalPay of people having 0 OvertimePay" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "count 77321.000000\n", + "mean 60229.348901\n", + "std 49307.912350\n", + "min -618.130000\n", + "25% 13290.450000\n", + "50% 58158.590000\n", + "75% 91115.090000\n", + "max 567595.430000\n", + "Name: TotalPay, dtype: float64" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "Stats = df2.loc[df2['OvertimePay'] == 0]\n", + "Stats['TotalPay'].describe()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "find Id of that person with max TotalPay you got in previous question" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/plain": [ + "1" + ] + }, + "execution_count": 31, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "Id_person = df2.loc[df2['TotalPay'].idxmax()]\n", + "Id_person['Id']\n", + "#df2.groupby(['TotalPay']).max()['Id']" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "name of employee with total pay benefits = 87619.78" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "12345 REBECCA CHIU\n", + "Name: EmployeeName, dtype: object" + ] + }, + "execution_count": 32, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "Name_Emp = df1.loc[df1['TotalPayBenefits'] == 87619.78]\n", + "Name_Emp['EmployeeName']" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "how many people have BasePay > 150000 and OvertimePay > 100000" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "156" + ] + }, + "execution_count": 36, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "Total_people = df2[(df2['BasePay'] > 150000) & (df2['OvertimePay'] > 100000)]\n", + "Total_people.size" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "which job title generally has highest average TotalPayBenefits" + ] + }, + { + "cell_type": "code", + "execution_count": 72, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "JobTitle ZOO CURATOR\n", + "TotalPayBenefits 436224\n", + "dtype: object" + ] + }, + "execution_count": 72, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "Job_Title = df2.groupby('JobTitle', as_index = False)['TotalPayBenefits'].mean().max()\n", + "Job_Title" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "How many employees are POLICE" + ] + }, + { + "cell_type": "code", + "execution_count": 74, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "2512" + ] + }, + "execution_count": 74, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df2[df2['JobTitle'].str.contains('POLICE')]['JobTitle'].size" + ] + }, + { + "cell_type": "code", + "execution_count": 75, + "metadata": {}, + "outputs": [], + "source": [ + "# How many employees are POLICE\n", + "def police_string(title):\n", + " if 'police' in title.lower().split():\n", + " return True\n", + " else:\n", + " return False" + ] + }, + { + "cell_type": "code", + "execution_count": 77, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Total employees who are Police: 7489\n" + ] + } + ], + "source": [ + "P = sum(df2['JobTitle'].apply(lambda x:police_string(x)))\n", + "print(\"Total employees who are Police: \",P)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.6" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} From c8da0b4ea96495f61dc6f17ffc76c50ee7088296 Mon Sep 17 00:00:00 2001 From: Naga Kishan Munjulury Venkata <32195210+Nagakishan@users.noreply.github.com> Date: Tue, 8 Sep 2020 11:01:06 +0530 Subject: [PATCH 5/5] Add files via upload --- Assignment2 pd.ipynb | 1206 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 1206 insertions(+) create mode 100644 Assignment2 pd.ipynb diff --git a/Assignment2 pd.ipynb b/Assignment2 pd.ipynb new file mode 100644 index 0000000..2bb9e5b --- /dev/null +++ b/Assignment2 pd.ipynb @@ -0,0 +1,1206 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import pandas as pd\n", + "#%matplotlib notebook\n", + "%matplotlib inline" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "import the dataset into a dataframe" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
IdEmployeeNameJobTitleBasePayOvertimePayOtherPayBenefitsTotalPayTotalPayBenefitsYearNotesAgencyStatus
01NATHANIEL FORDGENERAL MANAGER-METROPOLITAN TRANSIT AUTHORITY167411.180.00400184.25NaN567595.43567595.432011NaNSan FranciscoNaN
12GARY JIMENEZCAPTAIN III (POLICE DEPARTMENT)155966.02245131.88137811.38NaN538909.28538909.282011NaNSan FranciscoNaN
23ALBERT PARDINICAPTAIN III (POLICE DEPARTMENT)212739.13106088.1816452.60NaN335279.91335279.912011NaNSan FranciscoNaN
34CHRISTOPHER CHONGWIRE ROPE CABLE MAINTENANCE MECHANIC77916.0056120.71198306.90NaN332343.61332343.612011NaNSan FranciscoNaN
45PATRICK GARDNERDEPUTY CHIEF OF DEPARTMENT,(FIRE DEPARTMENT)134401.609737.00182234.59NaN326373.19326373.192011NaNSan FranciscoNaN
..........................................
148649148650Roy I TilleryCustodian0.000.000.000.00.000.002014NaNSan FranciscoNaN
148650148651Not providedNot providedNaNNaNNaNNaN0.000.002014NaNSan FranciscoNaN
148651148652Not providedNot providedNaNNaNNaNNaN0.000.002014NaNSan FranciscoNaN
148652148653Not providedNot providedNaNNaNNaNNaN0.000.002014NaNSan FranciscoNaN
148653148654Joe LopezCounselor, Log Cabin Ranch0.000.00-618.130.0-618.13-618.132014NaNSan FranciscoNaN
\n", + "

148654 rows × 13 columns

\n", + "
" + ], + "text/plain": [ + " Id EmployeeName \\\n", + "0 1 NATHANIEL FORD \n", + "1 2 GARY JIMENEZ \n", + "2 3 ALBERT PARDINI \n", + "3 4 CHRISTOPHER CHONG \n", + "4 5 PATRICK GARDNER \n", + "... ... ... \n", + "148649 148650 Roy I Tillery \n", + "148650 148651 Not provided \n", + "148651 148652 Not provided \n", + "148652 148653 Not provided \n", + "148653 148654 Joe Lopez \n", + "\n", + " JobTitle BasePay \\\n", + "0 GENERAL MANAGER-METROPOLITAN TRANSIT AUTHORITY 167411.18 \n", + "1 CAPTAIN III (POLICE DEPARTMENT) 155966.02 \n", + "2 CAPTAIN III (POLICE DEPARTMENT) 212739.13 \n", + "3 WIRE ROPE CABLE MAINTENANCE MECHANIC 77916.00 \n", + "4 DEPUTY CHIEF OF DEPARTMENT,(FIRE DEPARTMENT) 134401.60 \n", + "... ... ... \n", + "148649 Custodian 0.00 \n", + "148650 Not provided NaN \n", + "148651 Not provided NaN \n", + "148652 Not provided NaN \n", + "148653 Counselor, Log Cabin Ranch 0.00 \n", + "\n", + " OvertimePay OtherPay Benefits TotalPay TotalPayBenefits Year \\\n", + "0 0.00 400184.25 NaN 567595.43 567595.43 2011 \n", + "1 245131.88 137811.38 NaN 538909.28 538909.28 2011 \n", + "2 106088.18 16452.60 NaN 335279.91 335279.91 2011 \n", + "3 56120.71 198306.90 NaN 332343.61 332343.61 2011 \n", + "4 9737.00 182234.59 NaN 326373.19 326373.19 2011 \n", + "... ... ... ... ... ... ... \n", + "148649 0.00 0.00 0.0 0.00 0.00 2014 \n", + "148650 NaN NaN NaN 0.00 0.00 2014 \n", + "148651 NaN NaN NaN 0.00 0.00 2014 \n", + "148652 NaN NaN NaN 0.00 0.00 2014 \n", + "148653 0.00 -618.13 0.0 -618.13 -618.13 2014 \n", + "\n", + " Notes Agency Status \n", + "0 NaN San Francisco NaN \n", + "1 NaN San Francisco NaN \n", + "2 NaN San Francisco NaN \n", + "3 NaN San Francisco NaN \n", + "4 NaN San Francisco NaN \n", + "... ... ... ... \n", + "148649 NaN San Francisco NaN \n", + "148650 NaN San Francisco NaN \n", + "148651 NaN San Francisco NaN \n", + "148652 NaN San Francisco NaN \n", + "148653 NaN San Francisco NaN \n", + "\n", + "[148654 rows x 13 columns]" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df = pd.read_csv(\"Salary.csv\")\n", + "df" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "display the column names" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Empty DataFrame\n", + "Columns: [Id, EmployeeName, JobTitle, BasePay, OvertimePay, OtherPay, Benefits, TotalPay, TotalPayBenefits, Year, Notes, Agency, Status]\n", + "Index: []\n" + ] + }, + { + "data": { + "text/plain": [ + "Index(['Id', 'EmployeeName', 'JobTitle', 'BasePay', 'OvertimePay', 'OtherPay',\n", + " 'Benefits', 'TotalPay', 'TotalPayBenefits', 'Year', 'Notes', 'Agency',\n", + " 'Status'],\n", + " dtype='object')" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "a = df.columns\n", + "print(df[:0]) # checking the column names in the row 1\n", + "a" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "display the number of rows and cols" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(148654, 13)" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.shape" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "display the dataframe info (types of data in columns and not null values etc.)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "RangeIndex: 148654 entries, 0 to 148653\n", + "Data columns (total 13 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 Id 148654 non-null int64 \n", + " 1 EmployeeName 148654 non-null object \n", + " 2 JobTitle 148654 non-null object \n", + " 3 BasePay 148045 non-null float64\n", + " 4 OvertimePay 148650 non-null float64\n", + " 5 OtherPay 148650 non-null float64\n", + " 6 Benefits 112491 non-null float64\n", + " 7 TotalPay 148654 non-null float64\n", + " 8 TotalPayBenefits 148654 non-null float64\n", + " 9 Year 148654 non-null int64 \n", + " 10 Notes 0 non-null float64\n", + " 11 Agency 148654 non-null object \n", + " 12 Status 0 non-null float64\n", + "dtypes: float64(8), int64(2), object(3)\n", + "memory usage: 14.7+ MB\n" + ] + } + ], + "source": [ + "df.info() # (verbose=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "display stats of the dataframe like count, mean, std, max, 25% etc....." + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
IdBasePayOvertimePayOtherPayBenefitsTotalPayTotalPayBenefitsYearNotesStatus
count148654.000000148045.000000148650.000000148650.000000112491.000000148654.000000148654.000000148654.0000000.00.0
mean74327.50000066325.4488415066.0598863648.76729725007.89315174768.32197293692.5548112012.522643NaNNaN
std42912.85779542764.63549511454.3805598056.60186615402.21585850517.00527462793.5334831.117538NaNNaN
min1.000000-166.010000-0.010000-7058.590000-33.890000-618.130000-618.1300002011.000000NaNNaN
25%37164.25000033588.2000000.0000000.00000011535.39500036168.99500044065.6500002012.000000NaNNaN
50%74327.50000065007.4500000.000000811.27000028628.62000071426.61000092404.0900002013.000000NaNNaN
75%111490.75000094691.0500004658.1750004236.06500035566.855000105839.135000132876.4500002014.000000NaNNaN
max148654.000000319275.010000245131.880000400184.25000096570.660000567595.430000567595.4300002014.000000NaNNaN
\n", + "
" + ], + "text/plain": [ + " Id BasePay OvertimePay OtherPay \\\n", + "count 148654.000000 148045.000000 148650.000000 148650.000000 \n", + "mean 74327.500000 66325.448841 5066.059886 3648.767297 \n", + "std 42912.857795 42764.635495 11454.380559 8056.601866 \n", + "min 1.000000 -166.010000 -0.010000 -7058.590000 \n", + "25% 37164.250000 33588.200000 0.000000 0.000000 \n", + "50% 74327.500000 65007.450000 0.000000 811.270000 \n", + "75% 111490.750000 94691.050000 4658.175000 4236.065000 \n", + "max 148654.000000 319275.010000 245131.880000 400184.250000 \n", + "\n", + " Benefits TotalPay TotalPayBenefits Year Notes \\\n", + "count 112491.000000 148654.000000 148654.000000 148654.000000 0.0 \n", + "mean 25007.893151 74768.321972 93692.554811 2012.522643 NaN \n", + "std 15402.215858 50517.005274 62793.533483 1.117538 NaN \n", + "min -33.890000 -618.130000 -618.130000 2011.000000 NaN \n", + "25% 11535.395000 36168.995000 44065.650000 2012.000000 NaN \n", + "50% 28628.620000 71426.610000 92404.090000 2013.000000 NaN \n", + "75% 35566.855000 105839.135000 132876.450000 2014.000000 NaN \n", + "max 96570.660000 567595.430000 567595.430000 2014.000000 NaN \n", + "\n", + " Status \n", + "count 0.0 \n", + "mean NaN \n", + "std NaN \n", + "min NaN \n", + "25% NaN \n", + "50% NaN \n", + "75% NaN \n", + "max NaN " + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.describe() # statistics - Summary" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "display null values per column" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "BasePay 609\n", + "OvertimePay 4\n", + "OtherPay 4\n", + "Benefits 36163\n", + "Notes 148654\n", + "Status 148654\n", + "dtype: int64" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "null_values=df.columns[df.isnull().any()]\n", + "df[null_values].isnull().sum()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "remove columns will all values as NaN" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
IdEmployeeNameJobTitleTotalPayTotalPayBenefitsYearAgency
01NATHANIEL FORDGENERAL MANAGER-METROPOLITAN TRANSIT AUTHORITY567595.43567595.432011San Francisco
12GARY JIMENEZCAPTAIN III (POLICE DEPARTMENT)538909.28538909.282011San Francisco
23ALBERT PARDINICAPTAIN III (POLICE DEPARTMENT)335279.91335279.912011San Francisco
34CHRISTOPHER CHONGWIRE ROPE CABLE MAINTENANCE MECHANIC332343.61332343.612011San Francisco
45PATRICK GARDNERDEPUTY CHIEF OF DEPARTMENT,(FIRE DEPARTMENT)326373.19326373.192011San Francisco
........................
148649148650Roy I TilleryCustodian0.000.002014San Francisco
148650148651Not providedNot provided0.000.002014San Francisco
148651148652Not providedNot provided0.000.002014San Francisco
148652148653Not providedNot provided0.000.002014San Francisco
148653148654Joe LopezCounselor, Log Cabin Ranch-618.13-618.132014San Francisco
\n", + "

148654 rows × 7 columns

\n", + "
" + ], + "text/plain": [ + " Id EmployeeName \\\n", + "0 1 NATHANIEL FORD \n", + "1 2 GARY JIMENEZ \n", + "2 3 ALBERT PARDINI \n", + "3 4 CHRISTOPHER CHONG \n", + "4 5 PATRICK GARDNER \n", + "... ... ... \n", + "148649 148650 Roy I Tillery \n", + "148650 148651 Not provided \n", + "148651 148652 Not provided \n", + "148652 148653 Not provided \n", + "148653 148654 Joe Lopez \n", + "\n", + " JobTitle TotalPay \\\n", + "0 GENERAL MANAGER-METROPOLITAN TRANSIT AUTHORITY 567595.43 \n", + "1 CAPTAIN III (POLICE DEPARTMENT) 538909.28 \n", + "2 CAPTAIN III (POLICE DEPARTMENT) 335279.91 \n", + "3 WIRE ROPE CABLE MAINTENANCE MECHANIC 332343.61 \n", + "4 DEPUTY CHIEF OF DEPARTMENT,(FIRE DEPARTMENT) 326373.19 \n", + "... ... ... \n", + "148649 Custodian 0.00 \n", + "148650 Not provided 0.00 \n", + "148651 Not provided 0.00 \n", + "148652 Not provided 0.00 \n", + "148653 Counselor, Log Cabin Ranch -618.13 \n", + "\n", + " TotalPayBenefits Year Agency \n", + "0 567595.43 2011 San Francisco \n", + "1 538909.28 2011 San Francisco \n", + "2 335279.91 2011 San Francisco \n", + "3 332343.61 2011 San Francisco \n", + "4 326373.19 2011 San Francisco \n", + "... ... ... ... \n", + "148649 0.00 2014 San Francisco \n", + "148650 0.00 2014 San Francisco \n", + "148651 0.00 2014 San Francisco \n", + "148652 0.00 2014 San Francisco \n", + "148653 -618.13 2014 San Francisco \n", + "\n", + "[148654 rows x 7 columns]" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df1 = df.dropna(axis=1) # inplace = True\n", + "df1" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "display number of unique values in each column" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Id 148654\n", + "EmployeeName 110811\n", + "JobTitle 2159\n", + "BasePay 109489\n", + "OvertimePay 65998\n", + "OtherPay 83225\n", + "Benefits 98465\n", + "TotalPay 138486\n", + "TotalPayBenefits 142098\n", + "Year 4\n", + "Notes 0\n", + "Agency 1\n", + "Status 0\n", + "dtype: int64" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df2 = pd.read_csv(\"Salary.csv\")\n", + "df2.nunique()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "mean of total pay of all people based on year" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Year\n", + "2011 71744.103871\n", + "2012 74113.262265\n", + "2013 77611.443142\n", + "2014 75463.918140\n", + "Name: TotalPay, dtype: float64" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dftp = df2.groupby('Year').mean()['TotalPay'] #(total pay vs year))\n", + "dftp" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "how many people have 0 overtime pay" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "77321" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df3 = sum(df2[df2['OvertimePay']==0]['Id'].value_counts()==1)\n", + "df3" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "max, min, mean, median and other stats of TotalPay of people having 0 OvertimePay" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "count 77321.000000\n", + "mean 60229.348901\n", + "std 49307.912350\n", + "min -618.130000\n", + "25% 13290.450000\n", + "50% 58158.590000\n", + "75% 91115.090000\n", + "max 567595.430000\n", + "Name: TotalPay, dtype: float64" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "Stats = df2.loc[df2['OvertimePay'] == 0]\n", + "Stats['TotalPay'].describe()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "find Id of that person with max TotalPay you got in previous question" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/plain": [ + "1" + ] + }, + "execution_count": 31, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "Id_person = df2.loc[df2['TotalPay'].idxmax()]\n", + "Id_person['Id']\n", + "#df2.groupby(['TotalPay']).max()['Id']" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "name of employee with total pay benefits = 87619.78" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "12345 REBECCA CHIU\n", + "Name: EmployeeName, dtype: object" + ] + }, + "execution_count": 32, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "Name_Emp = df1.loc[df1['TotalPayBenefits'] == 87619.78]\n", + "Name_Emp['EmployeeName']" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "how many people have BasePay > 150000 and OvertimePay > 100000" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "156" + ] + }, + "execution_count": 36, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "Total_people = df2[(df2['BasePay'] > 150000) & (df2['OvertimePay'] > 100000)]\n", + "Total_people.size" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "which job title generally has highest average TotalPayBenefits" + ] + }, + { + "cell_type": "code", + "execution_count": 72, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "JobTitle ZOO CURATOR\n", + "TotalPayBenefits 436224\n", + "dtype: object" + ] + }, + "execution_count": 72, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "Job_Title = df2.groupby('JobTitle', as_index = False)['TotalPayBenefits'].mean().max()\n", + "Job_Title" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "How many employees are POLICE" + ] + }, + { + "cell_type": "code", + "execution_count": 74, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "2512" + ] + }, + "execution_count": 74, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df2[df2['JobTitle'].str.contains('POLICE')]['JobTitle'].size" + ] + }, + { + "cell_type": "code", + "execution_count": 75, + "metadata": {}, + "outputs": [], + "source": [ + "# How many employees are POLICE\n", + "def police_string(title):\n", + " if 'police' in title.lower().split():\n", + " return True\n", + " else:\n", + " return False" + ] + }, + { + "cell_type": "code", + "execution_count": 77, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Total employees who are Police: 7489\n" + ] + } + ], + "source": [ + "P = sum(df2['JobTitle'].apply(lambda x:police_string(x)))\n", + "print(\"Total employees who are Police: \",P)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.6" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}