Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
370 changes: 370 additions & 0 deletions AssignmentMongoDB-Ammar.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,370 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import ijson\n",
"filename = \"../rows.json\"\n",
"with open(filename, 'r') as f:\n",
" objects = ijson.items(f, 'meta.view.columns.item')\n",
" columns = list(objects)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[u':sid',\n",
" u':id',\n",
" u':position',\n",
" u':created_at',\n",
" u':created_meta',\n",
" u':updated_at',\n",
" u':updated_meta',\n",
" u':meta',\n",
" u'date_of_stop',\n",
" u'time_of_stop',\n",
" u'agency',\n",
" u'subagency',\n",
" u'description',\n",
" u'location',\n",
" u'latitude',\n",
" u'longitude',\n",
" u'accident',\n",
" u'belts',\n",
" u'personal_injury',\n",
" u'property_damage',\n",
" u'fatal',\n",
" u'commercial_license',\n",
" u'hazmat',\n",
" u'commercial_vehicle',\n",
" u'alcohol',\n",
" u'work_zone',\n",
" u'state',\n",
" u'vehicle_type',\n",
" u'year',\n",
" u'make',\n",
" u'model',\n",
" u'color',\n",
" u'violation_type',\n",
" u'charge',\n",
" u'article',\n",
" u'contributed_to_accident',\n",
" u'race',\n",
" u'gender',\n",
" u'driver_city',\n",
" u'driver_state',\n",
" u'dl_state',\n",
" u'arrest_type',\n",
" u'geolocation']"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"column_names = [col[\"fieldName\"] for col in columns]\n",
"column_names"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"good_columns = [\n",
" \"date_of_stop\", \n",
" \"time_of_stop\", \n",
" \"agency\", \n",
" \"subagency\",\n",
" \"description\",\n",
" \"location\", \n",
" \"latitude\", \n",
" \"longitude\", \n",
" \"vehicle_type\", \n",
" \"year\", \n",
" \"make\", \n",
" \"model\", \n",
" \"color\", \n",
" \"violation_type\",\n",
" \"race\", \n",
" \"gender\", \n",
" \"driver_state\", \n",
" \"driver_city\", \n",
" \"dl_state\",\n",
" \"arrest_type\"\n",
"]\n",
"data = []\n",
"with open(filename, 'r') as f:\n",
" objects = ijson.items(f, 'data.item')\n",
" for row in objects:\n",
" selected_row = []\n",
" for item in good_columns:\n",
" selected_row.append(row[column_names.index(item)])\n",
" data.append(selected_row)"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"(1093289, 20)\n"
]
}
],
"source": [
"df = pd.DataFrame(data, columns=good_columns)\n",
"print df.shape"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"df.date_of_stop=pd.to_datetime(df.date_of_stop)"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"from pymongo import MongoClient\n",
"import json\n",
"client = MongoClient('localhost', 27017)\n",
"db = client['traffic_violations']\n",
"colls = db.collections"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<pymongo.results.InsertManyResult at 0x7f2a99b90cd0>"
]
},
"execution_count": 18,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"coll.insert_many(df.to_dict('records'))"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"rColor=coll.aggregate([{\"$group\" : {\"_id\":\"$color\", \"count\":{\"$sum\":1}}}])"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"{u'count': 23, u'_id': u'CHROME'}\n",
"{u'count': 357, u'_id': u'COPPER'}\n",
"{u'count': 4247, u'_id': u'YELLOW'}\n",
"{u'count': 2588, u'_id': u'BRONZE'}\n",
"{u'count': 5245, u'_id': u'BROWN'}\n",
"{u'count': 6491, u'_id': u'GREEN, LGT'}\n",
"{u'count': 19330, u'_id': u'MAROON'}\n",
"{u'count': 2211, u'_id': u'PURPLE'}\n",
"{u'count': 35901, u'_id': u'GOLD'}\n",
"{u'count': 167, u'_id': u'PINK'}\n",
"{u'count': 746, u'_id': u'CREAM'}\n",
"{u'count': 23864, u'_id': u'TAN'}\n",
"{u'count': 12812, u'_id': u'GREEN, DK'}\n",
"{u'count': 14942, u'_id': u'BLUE, LIGHT'}\n",
"{u'count': 14254, u'_id': nan}\n",
"{u'count': 43944, u'_id': u'GREEN'}\n",
"{u'count': 3829, u'_id': u'ORANGE'}\n",
"{u'count': 164915, u'_id': u'WHITE'}\n",
"{u'count': 20, u'_id': u'CAMOUFLAGE'}\n",
"{u'count': 924, u'_id': u'MULTICOLOR'}\n",
"{u'count': 80581, u'_id': u'BLUE'}\n",
"{u'count': 199971, u'_id': u'SILVER'}\n",
"{u'count': 23227, u'_id': u'BLUE, DARK'}\n",
"{u'count': 86613, u'_id': u'RED'}\n",
"{u'count': 116635, u'_id': u'GRAY'}\n",
"{u'count': 216292, u'_id': u'BLACK'}\n",
"{u'count': 13160, u'_id': u'BEIGE'}\n"
]
}
],
"source": [
"for item in rColor:\n",
" print item"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"rArrest=coll.aggregate([{\"$group\" : {\"_id\":\"$arrest_type\", \"count\":{\"$sum\":1}}}])"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"{u'count': 44, u'_id': u'K - Aircraft Assist'}\n",
"{u'count': 299, u'_id': u'J - Unmarked Moving Radar (Moving)'}\n",
"{u'count': 260, u'_id': u'P - Mounted Patrol'}\n",
"{u'count': 561, u'_id': u'H - Unmarked Moving Radar (Stationary)'}\n",
"{u'count': 1545, u'_id': u'I - Marked Moving Radar (Moving)'}\n",
"{u'count': 399, u'_id': u'C - Marked VASCAR'}\n",
"{u'count': 232, u'_id': u'D - Unmarked VASCAR'}\n",
"{u'count': 6882, u'_id': u'E - Marked Stationary Radar'}\n",
"{u'count': 12875, u'_id': u'S - License Plate Recognition'}\n",
"{u'count': 1632, u'_id': u'M - Marked (Off-Duty)'}\n",
"{u'count': 5083, u'_id': u'R - Unmarked Laser'}\n",
"{u'count': 161, u'_id': u'N - Unmarked (Off-Duty)'}\n",
"{u'count': 10336, u'_id': u'L - Motorcycle'}\n",
"{u'count': 110653, u'_id': u'Q - Marked Laser'}\n",
"{u'count': 4077, u'_id': u'G - Marked Moving Radar (Stationary)'}\n",
"{u'count': 11397, u'_id': u'O - Foot Patrol'}\n",
"{u'count': 35195, u'_id': u'B - Unmarked Patrol'}\n",
"{u'count': 772, u'_id': u'F - Unmarked Stationary Radar'}\n",
"{u'count': 890886, u'_id': u'A - Marked Patrol'}\n"
]
}
],
"source": [
"for item in rArrest:\n",
" print item"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"rDay=coll.aggregate(\n",
" [\n",
" {\n",
" \"$project\":\n",
" {\n",
" \"dayRes\": { \"$dayOfWeek\": \"$date_of_stop\" },\n",
" } \n",
" \n",
" },{\n",
" \"$group\": {\n",
" \"_id\":\"$dayRes\", \"count\":{\"$sum\":1}\n",
" }\n",
" }\n",
" ]\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"ename": "NameError",
"evalue": "name 'rDay' is not defined",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m<ipython-input-12-92a7fd41c9b4>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0mdays\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[0mi\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 3\u001b[0;31m \u001b[0;32mfor\u001b[0m \u001b[0mitem\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mrDay\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 4\u001b[0m \u001b[0;32mprint\u001b[0m \u001b[0mdays\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0minsert\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mitem\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;31mNameError\u001b[0m: name 'rDay' is not defined"
]
}
],
"source": [
"for item in rDay:\n",
" print item\n",
" \n",
"# 1 means Sunday and 7 means Saturday"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"client.close()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 2",
"language": "python",
"name": "python2"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.11+"
}
},
"nbformat": 4,
"nbformat_minor": 2
}