diff --git a/Assignment1 Numpy.ipynb b/Assignment1 Numpy.ipynb new file mode 100644 index 0000000..c275ac4 --- /dev/null +++ b/Assignment1 Numpy.ipynb @@ -0,0 +1 @@ +{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"Assignment1 Numpy.ipynb","provenance":[],"authorship_tag":"ABX9TyPjJhiw9UZMcdcNfMiL21DQ"},"kernelspec":{"name":"python3","display_name":"Python 3"}},"cells":[{"cell_type":"code","metadata":{"id":"ig7-yF0L9ZYl","colab_type":"code","colab":{"base_uri":"https://localhost:8080/","height":34},"executionInfo":{"status":"ok","timestamp":1598163531606,"user_tz":-330,"elapsed":2640,"user":{"displayName":"VISHAL JENA","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14Gi3s4vwgm4-WxCI3Z00c46UXHGpRud4hAkH5mp_MA=s64","userId":"10167023063644792477"}},"outputId":"154c9eb2-3478-4fb9-b28d-a409e4c52c76"},"source":["# Q1 - create a list and convert into numpy and print it?\n","\n","import numpy as np\n","list = [1,2,3,4,5]\n","np.array(list)"],"execution_count":1,"outputs":[{"output_type":"execute_result","data":{"text/plain":["array([1, 2, 3, 4, 5])"]},"metadata":{"tags":[]},"execution_count":1}]},{"cell_type":"code","metadata":{"id":"dDi5cIDU9-wF","colab_type":"code","colab":{"base_uri":"https://localhost:8080/","height":68},"executionInfo":{"status":"ok","timestamp":1598163715099,"user_tz":-330,"elapsed":1114,"user":{"displayName":"VISHAL JENA","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14Gi3s4vwgm4-WxCI3Z00c46UXHGpRud4hAkH5mp_MA=s64","userId":"10167023063644792477"}},"outputId":"e0baec02-57c4-4c54-f685-e6c3d4800f36"},"source":["# Q2- make a 3x3 matrix , convert in into numpy array and then print it?\n","matrix = [[1,2,3],[4,5,6],[7,8,9]]\n","np.array(matrix)"],"execution_count":2,"outputs":[{"output_type":"execute_result","data":{"text/plain":["array([[1, 2, 3],\n"," [4, 5, 6],\n"," [7, 8, 9]])"]},"metadata":{"tags":[]},"execution_count":2}]},{"cell_type":"code","metadata":{"id":"HcMkXlw6-v8T","colab_type":"code","colab":{"base_uri":"https://localhost:8080/","height":68},"executionInfo":{"status":"ok","timestamp":1598165846560,"user_tz":-330,"elapsed":1311,"user":{"displayName":"VISHAL JENA","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14Gi3s4vwgm4-WxCI3Z00c46UXHGpRud4hAkH5mp_MA=s64","userId":"10167023063644792477"}},"outputId":"7319facf-8b30-4969-f4c1-1e21db19ac32"},"source":["# Q3 - make a 3x3 matrix using arange,reshape?\n","arr = []\n","for i in range(1,18):\n"," if(i%2!=0):\n"," arr.append(i) # all i values is converted into list (name of list is arr)\n","arr2=np.array(arr) # arr2 list is converted into array\n","matrix1 = arr2.reshape(3,3)\n","matrix1\n","\n","\n","\n"],"execution_count":35,"outputs":[{"output_type":"execute_result","data":{"text/plain":["array([[ 1, 3, 5],\n"," [ 7, 9, 11],\n"," [13, 15, 17]])"]},"metadata":{"tags":[]},"execution_count":35}]},{"cell_type":"code","metadata":{"id":"MzhRrpCb_Pv_","colab_type":"code","colab":{"base_uri":"https://localhost:8080/","height":34},"executionInfo":{"status":"ok","timestamp":1598167309850,"user_tz":-330,"elapsed":1653,"user":{"displayName":"VISHAL JENA","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14Gi3s4vwgm4-WxCI3Z00c46UXHGpRud4hAkH5mp_MA=s64","userId":"10167023063644792477"}},"outputId":"f252b919-f82f-46b1-eea4-2e9afdfc95fa"},"source":["# Q4- Create numpy array with 10 random no's from 0 to 10(one no should be greater than 1)?\n","rand_nums = np.random.randint(0,10,10)\n","rand_nums\n","\n","\n"],"execution_count":67,"outputs":[{"output_type":"execute_result","data":{"text/plain":["array([6, 9, 9, 8, 5, 9, 7, 7, 8, 0])"]},"metadata":{"tags":[]},"execution_count":67}]},{"cell_type":"code","metadata":{"id":"pVsKEC33Mdb4","colab_type":"code","colab":{"base_uri":"https://localhost:8080/","height":102},"executionInfo":{"status":"ok","timestamp":1598167563507,"user_tz":-330,"elapsed":1126,"user":{"displayName":"VISHAL JENA","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14Gi3s4vwgm4-WxCI3Z00c46UXHGpRud4hAkH5mp_MA=s64","userId":"10167023063644792477"}},"outputId":"b3558c06-c040-4534-fecf-7ff125067c22"},"source":["# Q5 - create a numpy array, convert it into 2d array with 5 rows and print it?\n","arr5 = np.array([1,2,3,4,5])\n","arr5.reshape(5,1)"],"execution_count":73,"outputs":[{"output_type":"execute_result","data":{"text/plain":["array([[1],\n"," [2],\n"," [3],\n"," [4],\n"," [5]])"]},"metadata":{"tags":[]},"execution_count":73}]},{"cell_type":"code","metadata":{"id":"nd9j_Kn-Nbft","colab_type":"code","colab":{"base_uri":"https://localhost:8080/","height":34},"executionInfo":{"status":"ok","timestamp":1598167717920,"user_tz":-330,"elapsed":1252,"user":{"displayName":"VISHAL JENA","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14Gi3s4vwgm4-WxCI3Z00c46UXHGpRud4hAkH5mp_MA=s64","userId":"10167023063644792477"}},"outputId":"5b0305d7-96c0-4554-cf08-a7c73444247a"},"source":["# Q6- PRINT SHAPE OF THE ABOVE CURATED ARRAY?\n","arr5.shape"],"execution_count":75,"outputs":[{"output_type":"execute_result","data":{"text/plain":["(5,)"]},"metadata":{"tags":[]},"execution_count":75}]},{"cell_type":"code","metadata":{"id":"Eo4O4CYKOBKQ","colab_type":"code","colab":{"base_uri":"https://localhost:8080/","height":34},"executionInfo":{"status":"ok","timestamp":1598168136620,"user_tz":-330,"elapsed":1006,"user":{"displayName":"VISHAL JENA","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14Gi3s4vwgm4-WxCI3Z00c46UXHGpRud4hAkH5mp_MA=s64","userId":"10167023063644792477"}},"outputId":"83b52393-c81d-4956-f6ee-3ce4dd3aa6a5"},"source":["# Q7- Create a numpy array with 10 elements in it. Access the 3rd,4th,9th element?\n","arr6 = np.array([1,2,3,4,5,6,7,8,9,10])\n","print(arr6[2],arr6[3],arr6[8])\n"],"execution_count":83,"outputs":[{"output_type":"stream","text":["3 4 9\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"JlrBqemkPnb5","colab_type":"code","colab":{"base_uri":"https://localhost:8080/","height":34},"executionInfo":{"status":"ok","timestamp":1598169267514,"user_tz":-330,"elapsed":1097,"user":{"displayName":"VISHAL JENA","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14Gi3s4vwgm4-WxCI3Z00c46UXHGpRud4hAkH5mp_MA=s64","userId":"10167023063644792477"}},"outputId":"45f6a7eb-c7da-4d54-f5f1-c608051a54d2"},"source":["# Q8- Print alternate elements of above array?\n","print(arr6[0:2],arr6[4:8],arr6[9])\n","\n"],"execution_count":96,"outputs":[{"output_type":"stream","text":["[1 2] [5 6 7 8] 10\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"CLYrPZigT7g9","colab_type":"code","colab":{"base_uri":"https://localhost:8080/","height":34},"executionInfo":{"status":"ok","timestamp":1598169477045,"user_tz":-330,"elapsed":1238,"user":{"displayName":"VISHAL JENA","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14Gi3s4vwgm4-WxCI3Z00c46UXHGpRud4hAkH5mp_MA=s64","userId":"10167023063644792477"}},"outputId":"8b9639de-42c1-43cd-cb04-d86ed5e61fa6"},"source":["# Q9- Change the last 3 elements into 100?\n","arr6[7:len(arr6)] = 100\n","arr6"],"execution_count":98,"outputs":[{"output_type":"execute_result","data":{"text/plain":["array([ 1, 2, 3, 4, 5, 6, 7, 100, 100, 100])"]},"metadata":{"tags":[]},"execution_count":98}]},{"cell_type":"code","metadata":{"id":"Fl_fcTf0Uuoz","colab_type":"code","colab":{"base_uri":"https://localhost:8080/","height":102},"executionInfo":{"status":"ok","timestamp":1598170147965,"user_tz":-330,"elapsed":1170,"user":{"displayName":"VISHAL JENA","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14Gi3s4vwgm4-WxCI3Z00c46UXHGpRud4hAkH5mp_MA=s64","userId":"10167023063644792477"}},"outputId":"f34586ba-d754-4393-dd5e-6b2d5c8aa2d7"},"source":["# Q10- Create a 5x5 matrix ,print it. Then print the middle (3x3) matrix?\n","array = np.arange(25)\n","matrix5=array.reshape(5,5)\n","matrix5"],"execution_count":110,"outputs":[{"output_type":"execute_result","data":{"text/plain":["array([[ 0, 1, 2, 3, 4],\n"," [ 5, 6, 7, 8, 9],\n"," [10, 11, 12, 13, 14],\n"," [15, 16, 17, 18, 19],\n"," [20, 21, 22, 23, 24]])"]},"metadata":{"tags":[]},"execution_count":110}]},{"cell_type":"code","metadata":{"id":"_og_4VffXSdG","colab_type":"code","colab":{"base_uri":"https://localhost:8080/","height":68},"executionInfo":{"status":"ok","timestamp":1598170290702,"user_tz":-330,"elapsed":1534,"user":{"displayName":"VISHAL JENA","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14Gi3s4vwgm4-WxCI3Z00c46UXHGpRud4hAkH5mp_MA=s64","userId":"10167023063644792477"}},"outputId":"19bfb681-4bfe-431e-f989-2f90821f8301"},"source":["# now printing the middle (3x3) matrix\n","matrix5[1:4,1:4]"],"execution_count":111,"outputs":[{"output_type":"execute_result","data":{"text/plain":["array([[ 6, 7, 8],\n"," [11, 12, 13],\n"," [16, 17, 18]])"]},"metadata":{"tags":[]},"execution_count":111}]}]} \ No newline at end of file diff --git a/Assignment1_Numpy.ipynb b/Assignment1_Numpy.ipynb new file mode 100644 index 0000000..3fb0323 --- /dev/null +++ b/Assignment1_Numpy.ipynb @@ -0,0 +1,395 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "name": "Assignment1 Numpy.ipynb", + "provenance": [], + "authorship_tag": "ABX9TyPjJhiw9UZMcdcNfMiL21DQ", + "include_colab_link": true + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + } + }, + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "view-in-github", + "colab_type": "text" + }, + "source": [ + "\"Open" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "ig7-yF0L9ZYl", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 34 + }, + "outputId": "154c9eb2-3478-4fb9-b28d-a409e4c52c76" + }, + "source": [ + "# Q1 - create a list and convert into numpy and print it?\n", + "\n", + "import numpy as np\n", + "list = [1,2,3,4,5]\n", + "np.array(list)" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "array([1, 2, 3, 4, 5])" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 1 + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "dDi5cIDU9-wF", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 68 + }, + "outputId": "e0baec02-57c4-4c54-f685-e6c3d4800f36" + }, + "source": [ + "# Q2- make a 3x3 matrix , convert in into numpy array and then print it?\n", + "matrix = [[1,2,3],[4,5,6],[7,8,9]]\n", + "np.array(matrix)" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "array([[1, 2, 3],\n", + " [4, 5, 6],\n", + " [7, 8, 9]])" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 2 + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "HcMkXlw6-v8T", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 68 + }, + "outputId": "7319facf-8b30-4969-f4c1-1e21db19ac32" + }, + "source": [ + "# Q3 - make a 3x3 matrix using arange,reshape?\n", + "arr = []\n", + "for i in range(1,18):\n", + " if(i%2!=0):\n", + " arr.append(i) # all i values is converted into list (name of list is arr)\n", + "arr2=np.array(arr) # arr2 list is converted into array\n", + "matrix1 = arr2.reshape(3,3)\n", + "matrix1\n", + "\n", + "\n", + "\n" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "array([[ 1, 3, 5],\n", + " [ 7, 9, 11],\n", + " [13, 15, 17]])" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 35 + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "MzhRrpCb_Pv_", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 34 + }, + "outputId": "f252b919-f82f-46b1-eea4-2e9afdfc95fa" + }, + "source": [ + "# Q4- Create numpy array with 10 random no's from 0 to 10(one no should be greater than 1)?\n", + "rand_nums = np.random.randint(0,10,10)\n", + "rand_nums\n", + "\n", + "\n" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "array([6, 9, 9, 8, 5, 9, 7, 7, 8, 0])" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 67 + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "pVsKEC33Mdb4", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 102 + }, + "outputId": "b3558c06-c040-4534-fecf-7ff125067c22" + }, + "source": [ + "# Q5 - create a numpy array, convert it into 2d array with 5 rows and print it?\n", + "arr5 = np.array([1,2,3,4,5])\n", + "arr5.reshape(5,1)" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "array([[1],\n", + " [2],\n", + " [3],\n", + " [4],\n", + " [5]])" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 73 + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "nd9j_Kn-Nbft", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 34 + }, + "outputId": "5b0305d7-96c0-4554-cf08-a7c73444247a" + }, + "source": [ + "# Q6- PRINT SHAPE OF THE ABOVE CURATED ARRAY?\n", + "arr5.shape" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "(5,)" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 75 + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "Eo4O4CYKOBKQ", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 34 + }, + "outputId": "83b52393-c81d-4956-f6ee-3ce4dd3aa6a5" + }, + "source": [ + "# Q7- Create a numpy array with 10 elements in it. Access the 3rd,4th,9th element?\n", + "arr6 = np.array([1,2,3,4,5,6,7,8,9,10])\n", + "print(arr6[2],arr6[3],arr6[8])\n" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "text": [ + "3 4 9\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "JlrBqemkPnb5", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 34 + }, + "outputId": "45f6a7eb-c7da-4d54-f5f1-c608051a54d2" + }, + "source": [ + "# Q8- Print alternate elements of above array?\n", + "print(arr6[0:2],arr6[4:8],arr6[9])\n", + "\n" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "text": [ + "[1 2] [5 6 7 8] 10\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "CLYrPZigT7g9", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 34 + }, + "outputId": "8b9639de-42c1-43cd-cb04-d86ed5e61fa6" + }, + "source": [ + "# Q9- Change the last 3 elements into 100?\n", + "arr6[7:len(arr6)] = 100\n", + "arr6" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "array([ 1, 2, 3, 4, 5, 6, 7, 100, 100, 100])" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 98 + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "Fl_fcTf0Uuoz", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 102 + }, + "outputId": "f34586ba-d754-4393-dd5e-6b2d5c8aa2d7" + }, + "source": [ + "# Q10- Create a 5x5 matrix ,print it. Then print the middle (3x3) matrix?\n", + "array = np.arange(25)\n", + "matrix5=array.reshape(5,5)\n", + "matrix5" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "array([[ 0, 1, 2, 3, 4],\n", + " [ 5, 6, 7, 8, 9],\n", + " [10, 11, 12, 13, 14],\n", + " [15, 16, 17, 18, 19],\n", + " [20, 21, 22, 23, 24]])" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 110 + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "_og_4VffXSdG", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 68 + }, + "outputId": "19bfb681-4bfe-431e-f989-2f90821f8301" + }, + "source": [ + "# now printing the middle (3x3) matrix\n", + "matrix5[1:4,1:4]" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "array([[ 6, 7, 8],\n", + " [11, 12, 13],\n", + " [16, 17, 18]])" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 111 + } + ] + } + ] +} \ No newline at end of file diff --git a/Assignment_2_pandas.ipynb b/Assignment_2_pandas.ipynb new file mode 100644 index 0000000..810809d --- /dev/null +++ b/Assignment_2_pandas.ipynb @@ -0,0 +1,1033 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "name": "Assignment 2 pandas.ipynb", + "provenance": [], + "collapsed_sections": [], + "mount_file_id": "1rmYmHfrhMlt1clZuVjj7ztEwnZQUpbnd", + "authorship_tag": "ABX9TyPYDf8/+SkBpBMizALQluxR", + "include_colab_link": true + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + } + }, + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "view-in-github", + "colab_type": "text" + }, + "source": [ + "\"Open" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "NeK7uH4glp6q", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 623 + }, + "outputId": "f80ffc75-6d99-42cd-e274-1e360291ab8e" + }, + "source": [ + "import numpy as np\n", + "import pandas as pd\n", + "#q1:- import the dataset into dataframe?\n", + "df = pd.read_csv(\"/content/drive/My Drive/csv files/Salaries.csv\")\n", + "df" + ], + "execution_count": 100, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
IdEmployeeNameJobTitleBasePayOvertimePayOtherPayBenefitsTotalPayTotalPayBenefitsYearNotesAgencyStatus
01NATHANIEL FORDGENERAL MANAGER-METROPOLITAN TRANSIT AUTHORITY167411.180.00400184.25NaN567595.43567595.432011NaNSan FranciscoNaN
12GARY JIMENEZCAPTAIN III (POLICE DEPARTMENT)155966.02245131.88137811.38NaN538909.28538909.282011NaNSan FranciscoNaN
23ALBERT PARDINICAPTAIN III (POLICE DEPARTMENT)212739.13106088.1816452.60NaN335279.91335279.912011NaNSan FranciscoNaN
34CHRISTOPHER CHONGWIRE ROPE CABLE MAINTENANCE MECHANIC77916.0056120.71198306.90NaN332343.61332343.612011NaNSan FranciscoNaN
45PATRICK GARDNERDEPUTY CHIEF OF DEPARTMENT,(FIRE DEPARTMENT)134401.609737.00182234.59NaN326373.19326373.192011NaNSan FranciscoNaN
..........................................
148649148650Roy I TilleryCustodian0.000.000.000.00.000.002014NaNSan FranciscoNaN
148650148651Not providedNot providedNaNNaNNaNNaN0.000.002014NaNSan FranciscoNaN
148651148652Not providedNot providedNaNNaNNaNNaN0.000.002014NaNSan FranciscoNaN
148652148653Not providedNot providedNaNNaNNaNNaN0.000.002014NaNSan FranciscoNaN
148653148654Joe LopezCounselor, Log Cabin Ranch0.000.00-618.130.0-618.13-618.132014NaNSan FranciscoNaN
\n", + "

148654 rows × 13 columns

\n", + "
" + ], + "text/plain": [ + " Id EmployeeName ... Agency Status\n", + "0 1 NATHANIEL FORD ... San Francisco NaN\n", + "1 2 GARY JIMENEZ ... San Francisco NaN\n", + "2 3 ALBERT PARDINI ... San Francisco NaN\n", + "3 4 CHRISTOPHER CHONG ... San Francisco NaN\n", + "4 5 PATRICK GARDNER ... San Francisco NaN\n", + "... ... ... ... ... ...\n", + "148649 148650 Roy I Tillery ... San Francisco NaN\n", + "148650 148651 Not provided ... San Francisco NaN\n", + "148651 148652 Not provided ... San Francisco NaN\n", + "148652 148653 Not provided ... San Francisco NaN\n", + "148653 148654 Joe Lopez ... San Francisco NaN\n", + "\n", + "[148654 rows x 13 columns]" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 100 + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "nNaaM-I2n25T", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 85 + }, + "outputId": "bedec11b-045a-43ea-ec27-5207fa736fe4" + }, + "source": [ + "#q2:-Display the column names?\n", + "df.columns" + ], + "execution_count": 101, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "Index(['Id', 'EmployeeName', 'JobTitle', 'BasePay', 'OvertimePay', 'OtherPay',\n", + " 'Benefits', 'TotalPay', 'TotalPayBenefits', 'Year', 'Notes', 'Agency',\n", + " 'Status'],\n", + " dtype='object')" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 101 + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "j4Y7RxwHoTU7", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 34 + }, + "outputId": "31b48d81-602f-4c39-fedc-abf3f6447ae9" + }, + "source": [ + "#q3:-display the number of rows and cols?\n", + "#no_of_columns:-\n", + "no_of_columns = len(df.columns)\n", + "no_of_columns" + ], + "execution_count": 102, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "13" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 102 + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "4L7nc0AYo3uM", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 34 + }, + "outputId": "310c1939-2cba-470e-a83f-a578678d30ed" + }, + "source": [ + "# no_of_rows:-\n", + "no_of_rows = len(df.index)\n", + "no_of_rows" + ], + "execution_count": 103, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "148654" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 103 + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "Xaq0WV_Dp7Pa", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 357 + }, + "outputId": "fe25c4c1-5135-45b1-e0e7-2da0df7e9fad" + }, + "source": [ + "#q3:-display the dataframe info (types of data in columns and not null values etc.)?\n", + "df.info()" + ], + "execution_count": 104, + "outputs": [ + { + "output_type": "stream", + "text": [ + "\n", + "RangeIndex: 148654 entries, 0 to 148653\n", + "Data columns (total 13 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 Id 148654 non-null int64 \n", + " 1 EmployeeName 148654 non-null object \n", + " 2 JobTitle 148654 non-null object \n", + " 3 BasePay 148045 non-null float64\n", + " 4 OvertimePay 148650 non-null float64\n", + " 5 OtherPay 148650 non-null float64\n", + " 6 Benefits 112491 non-null float64\n", + " 7 TotalPay 148654 non-null float64\n", + " 8 TotalPayBenefits 148654 non-null float64\n", + " 9 Year 148654 non-null int64 \n", + " 10 Notes 0 non-null float64\n", + " 11 Agency 148654 non-null object \n", + " 12 Status 0 non-null float64\n", + "dtypes: float64(8), int64(2), object(3)\n", + "memory usage: 14.7+ MB\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "dtdkITuVqNkD", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 297 + }, + "outputId": "eccbf7e4-3145-4a42-e9f7-306ab7cfa4dd" + }, + "source": [ + "#q4:-display stats of the dataframe like count, mean, std, max, 25% etc.....?\n", + "df.describe()" + ], + "execution_count": 105, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
IdBasePayOvertimePayOtherPayBenefitsTotalPayTotalPayBenefitsYearNotesStatus
count148654.000000148045.000000148650.000000148650.000000112491.000000148654.000000148654.000000148654.0000000.00.0
mean74327.50000066325.4488415066.0598863648.76729725007.89315174768.32197293692.5548112012.522643NaNNaN
std42912.85779542764.63549511454.3805598056.60186615402.21585850517.00527462793.5334831.117538NaNNaN
min1.000000-166.010000-0.010000-7058.590000-33.890000-618.130000-618.1300002011.000000NaNNaN
25%37164.25000033588.2000000.0000000.00000011535.39500036168.99500044065.6500002012.000000NaNNaN
50%74327.50000065007.4500000.000000811.27000028628.62000071426.61000092404.0900002013.000000NaNNaN
75%111490.75000094691.0500004658.1750004236.06500035566.855000105839.135000132876.4500002014.000000NaNNaN
max148654.000000319275.010000245131.880000400184.25000096570.660000567595.430000567595.4300002014.000000NaNNaN
\n", + "
" + ], + "text/plain": [ + " Id BasePay ... Notes Status\n", + "count 148654.000000 148045.000000 ... 0.0 0.0\n", + "mean 74327.500000 66325.448841 ... NaN NaN\n", + "std 42912.857795 42764.635495 ... NaN NaN\n", + "min 1.000000 -166.010000 ... NaN NaN\n", + "25% 37164.250000 33588.200000 ... NaN NaN\n", + "50% 74327.500000 65007.450000 ... NaN NaN\n", + "75% 111490.750000 94691.050000 ... NaN NaN\n", + "max 148654.000000 319275.010000 ... NaN NaN\n", + "\n", + "[8 rows x 10 columns]" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 105 + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "h8dBWWEoqbW_", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 255 + }, + "outputId": "bb5fcd26-15a7-4a37-8a6b-9a564db8452d" + }, + "source": [ + "#q5:-display null values per column?\n", + "df.isnull().sum() # sum() will return the no of NaN values per column" + ], + "execution_count": 106, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "Id 0\n", + "EmployeeName 0\n", + "JobTitle 0\n", + "BasePay 609\n", + "OvertimePay 4\n", + "OtherPay 4\n", + "Benefits 36163\n", + "TotalPay 0\n", + "TotalPayBenefits 0\n", + "Year 0\n", + "Notes 148654\n", + "Agency 0\n", + "Status 148654\n", + "dtype: int64" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 106 + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "4DA6j19xq-3A", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 589 + }, + "outputId": "b42f9f61-5793-4053-a383-01cef74b9739" + }, + "source": [ + "#q6:-remove columns with all values as NaN?\n", + "# see in above output showing no of nan values ,as there are total 148654 rows and column 'notes','statues' have all values null\n", + "# i.e removing notes and status\n", + "df.drop(['Notes','Status'],axis=1)" + ], + "execution_count": 107, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
IdEmployeeNameJobTitleBasePayOvertimePayOtherPayBenefitsTotalPayTotalPayBenefitsYearAgency
01NATHANIEL FORDGENERAL MANAGER-METROPOLITAN TRANSIT AUTHORITY167411.180.00400184.25NaN567595.43567595.432011San Francisco
12GARY JIMENEZCAPTAIN III (POLICE DEPARTMENT)155966.02245131.88137811.38NaN538909.28538909.282011San Francisco
23ALBERT PARDINICAPTAIN III (POLICE DEPARTMENT)212739.13106088.1816452.60NaN335279.91335279.912011San Francisco
34CHRISTOPHER CHONGWIRE ROPE CABLE MAINTENANCE MECHANIC77916.0056120.71198306.90NaN332343.61332343.612011San Francisco
45PATRICK GARDNERDEPUTY CHIEF OF DEPARTMENT,(FIRE DEPARTMENT)134401.609737.00182234.59NaN326373.19326373.192011San Francisco
....................................
148649148650Roy I TilleryCustodian0.000.000.000.00.000.002014San Francisco
148650148651Not providedNot providedNaNNaNNaNNaN0.000.002014San Francisco
148651148652Not providedNot providedNaNNaNNaNNaN0.000.002014San Francisco
148652148653Not providedNot providedNaNNaNNaNNaN0.000.002014San Francisco
148653148654Joe LopezCounselor, Log Cabin Ranch0.000.00-618.130.0-618.13-618.132014San Francisco
\n", + "

148654 rows × 11 columns

\n", + "
" + ], + "text/plain": [ + " Id EmployeeName ... Year Agency\n", + "0 1 NATHANIEL FORD ... 2011 San Francisco\n", + "1 2 GARY JIMENEZ ... 2011 San Francisco\n", + "2 3 ALBERT PARDINI ... 2011 San Francisco\n", + "3 4 CHRISTOPHER CHONG ... 2011 San Francisco\n", + "4 5 PATRICK GARDNER ... 2011 San Francisco\n", + "... ... ... ... ... ...\n", + "148649 148650 Roy I Tillery ... 2014 San Francisco\n", + "148650 148651 Not provided ... 2014 San Francisco\n", + "148651 148652 Not provided ... 2014 San Francisco\n", + "148652 148653 Not provided ... 2014 San Francisco\n", + "148653 148654 Joe Lopez ... 2014 San Francisco\n", + "\n", + "[148654 rows x 11 columns]" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 107 + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "XIQdYEIks-BW", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 238 + }, + "outputId": "a9bf1060-e383-4c49-e385-395ca9fdde62" + }, + "source": [ + "#q7:-display number of unique values in each column?\n", + "for columns in df: # access all columns in df usinfg for loop\n", + " print(columns,\":-\",df[columns].nunique()) # getting no of unique values in each column" + ], + "execution_count": 108, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Id :- 148654\n", + "EmployeeName :- 110811\n", + "JobTitle :- 2159\n", + "BasePay :- 109489\n", + "OvertimePay :- 65998\n", + "OtherPay :- 83225\n", + "Benefits :- 98465\n", + "TotalPay :- 138486\n", + "TotalPayBenefits :- 142098\n", + "Year :- 4\n", + "Notes :- 0\n", + "Agency :- 1\n", + "Status :- 0\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "S6HZo_t9tpoz", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 119 + }, + "outputId": "3bb2e70a-ba99-4ab6-d60c-1423584a63fb" + }, + "source": [ + "#q8:-mean of total pay of all people based on year?\n", + "df.groupby('Year').mean()['TotalPay']" + ], + "execution_count": 109, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "Year\n", + "2011 71744.103871\n", + "2012 74113.262265\n", + "2013 77611.443142\n", + "2014 75463.918140\n", + "Name: TotalPay, dtype: float64" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 109 + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "3Y6wNi8Nw-QP", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 34 + }, + "outputId": "bb446d38-1f02-477f-848e-114fab202260" + }, + "source": [ + "#q9:-how many people have 0 overtime pay?\n", + "df['OvertimePay'].value_counts()[0] #[0] will give the values or no of people having 0 overtimepay" + ], + "execution_count": 137, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "77321" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 137 + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "mdZCeypKSSj0", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 34 + }, + "outputId": "e9bccb69-f236-4268-bab3-c16ae2942005" + }, + "source": [ + "#q13:-how many people have BasePay > 150000 and OvertimePay > 100000?\n", + "n = df[(df['BasePay']>150000) & (df['OvertimePay']>100000)]['EmployeeName']\n", + "print(\"no of people have BasePay > 150000 and OvertimePay > 100000 is \",len(n))" + ], + "execution_count": 148, + "outputs": [ + { + "output_type": "stream", + "text": [ + "no of people have BasePay > 150000 and OvertimePay > 100000 is 12\n" + ], + "name": "stdout" + } + ] + } + ] +} \ No newline at end of file