diff --git a/src/notebook/notebook.ipynb b/src/notebook/notebook.ipynb index 2361ed7..fcbd97b 100644 --- a/src/notebook/notebook.ipynb +++ b/src/notebook/notebook.ipynb @@ -2,38 +2,22 @@ "cells": [ { "cell_type": "code", - "execution_count": 14, + "execution_count": 52, "id": "cc02d731", "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", + "import numpy as np\n", "from typing import List, Optional" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 53, "id": "5987e035", "metadata": {}, - "outputs": [ - { - "ename": "FileNotFoundError", - "evalue": "[Errno 2] No such file or directory: '../../data/Attendance_Template_v3 - Mentors.csv'", - "output_type": "error", - "traceback": [ - "\u001b[31m---------------------------------------------------------------------------\u001b[39m", - "\u001b[31mFileNotFoundError\u001b[39m Traceback (most recent call last)", - "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[5]\u001b[39m\u001b[32m, line 1\u001b[39m\n\u001b[32m----> \u001b[39m\u001b[32m1\u001b[39m mentor_df = \u001b[43mpd\u001b[49m\u001b[43m.\u001b[49m\u001b[43mread_csv\u001b[49m\u001b[43m(\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43m../../data/Attendance_Template_v3 - Mentors.csv\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[32m 2\u001b[39m staff_df = pd.read_csv(\u001b[33m'\u001b[39m\u001b[33m../../data/Attendance_Template_v3 - Staff.csv\u001b[39m\u001b[33m'\u001b[39m)\n\u001b[32m 3\u001b[39m meeting_1 = pd.read_csv(\u001b[33m'\u001b[39m\u001b[33m../../data/2025-07-29 17_45 wrt-nsga-feo Attendance Report - Attendees.csv\u001b[39m\u001b[33m'\u001b[39m)\n", - "\u001b[36mFile \u001b[39m\u001b[32m~/Desktop/CodeYou_AttendanceTracker/venv/lib/python3.12/site-packages/pandas/io/parsers/readers.py:1026\u001b[39m, in \u001b[36mread_csv\u001b[39m\u001b[34m(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, skipfooter, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, date_format, dayfirst, cache_dates, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, doublequote, escapechar, comment, encoding, encoding_errors, dialect, on_bad_lines, delim_whitespace, low_memory, memory_map, float_precision, storage_options, dtype_backend)\u001b[39m\n\u001b[32m 1013\u001b[39m kwds_defaults = _refine_defaults_read(\n\u001b[32m 1014\u001b[39m dialect,\n\u001b[32m 1015\u001b[39m delimiter,\n\u001b[32m (...)\u001b[39m\u001b[32m 1022\u001b[39m dtype_backend=dtype_backend,\n\u001b[32m 1023\u001b[39m )\n\u001b[32m 1024\u001b[39m kwds.update(kwds_defaults)\n\u001b[32m-> \u001b[39m\u001b[32m1026\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43m_read\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfilepath_or_buffer\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mkwds\u001b[49m\u001b[43m)\u001b[49m\n", - "\u001b[36mFile \u001b[39m\u001b[32m~/Desktop/CodeYou_AttendanceTracker/venv/lib/python3.12/site-packages/pandas/io/parsers/readers.py:620\u001b[39m, in \u001b[36m_read\u001b[39m\u001b[34m(filepath_or_buffer, kwds)\u001b[39m\n\u001b[32m 617\u001b[39m _validate_names(kwds.get(\u001b[33m\"\u001b[39m\u001b[33mnames\u001b[39m\u001b[33m\"\u001b[39m, \u001b[38;5;28;01mNone\u001b[39;00m))\n\u001b[32m 619\u001b[39m \u001b[38;5;66;03m# Create the parser.\u001b[39;00m\n\u001b[32m--> \u001b[39m\u001b[32m620\u001b[39m parser = \u001b[43mTextFileReader\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfilepath_or_buffer\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mkwds\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 622\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m chunksize \u001b[38;5;129;01mor\u001b[39;00m iterator:\n\u001b[32m 623\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m parser\n", - "\u001b[36mFile \u001b[39m\u001b[32m~/Desktop/CodeYou_AttendanceTracker/venv/lib/python3.12/site-packages/pandas/io/parsers/readers.py:1620\u001b[39m, in \u001b[36mTextFileReader.__init__\u001b[39m\u001b[34m(self, f, engine, **kwds)\u001b[39m\n\u001b[32m 1617\u001b[39m \u001b[38;5;28mself\u001b[39m.options[\u001b[33m\"\u001b[39m\u001b[33mhas_index_names\u001b[39m\u001b[33m\"\u001b[39m] = kwds[\u001b[33m\"\u001b[39m\u001b[33mhas_index_names\u001b[39m\u001b[33m\"\u001b[39m]\n\u001b[32m 1619\u001b[39m \u001b[38;5;28mself\u001b[39m.handles: IOHandles | \u001b[38;5;28;01mNone\u001b[39;00m = \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[32m-> \u001b[39m\u001b[32m1620\u001b[39m \u001b[38;5;28mself\u001b[39m._engine = \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_make_engine\u001b[49m\u001b[43m(\u001b[49m\u001b[43mf\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mengine\u001b[49m\u001b[43m)\u001b[49m\n", - "\u001b[36mFile \u001b[39m\u001b[32m~/Desktop/CodeYou_AttendanceTracker/venv/lib/python3.12/site-packages/pandas/io/parsers/readers.py:1880\u001b[39m, in \u001b[36mTextFileReader._make_engine\u001b[39m\u001b[34m(self, f, engine)\u001b[39m\n\u001b[32m 1878\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[33m\"\u001b[39m\u001b[33mb\u001b[39m\u001b[33m\"\u001b[39m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m mode:\n\u001b[32m 1879\u001b[39m mode += \u001b[33m\"\u001b[39m\u001b[33mb\u001b[39m\u001b[33m\"\u001b[39m\n\u001b[32m-> \u001b[39m\u001b[32m1880\u001b[39m \u001b[38;5;28mself\u001b[39m.handles = \u001b[43mget_handle\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 1881\u001b[39m \u001b[43m \u001b[49m\u001b[43mf\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 1882\u001b[39m \u001b[43m \u001b[49m\u001b[43mmode\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 1883\u001b[39m \u001b[43m \u001b[49m\u001b[43mencoding\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43moptions\u001b[49m\u001b[43m.\u001b[49m\u001b[43mget\u001b[49m\u001b[43m(\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mencoding\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 1884\u001b[39m \u001b[43m \u001b[49m\u001b[43mcompression\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43moptions\u001b[49m\u001b[43m.\u001b[49m\u001b[43mget\u001b[49m\u001b[43m(\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mcompression\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 1885\u001b[39m \u001b[43m \u001b[49m\u001b[43mmemory_map\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43moptions\u001b[49m\u001b[43m.\u001b[49m\u001b[43mget\u001b[49m\u001b[43m(\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mmemory_map\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 1886\u001b[39m \u001b[43m \u001b[49m\u001b[43mis_text\u001b[49m\u001b[43m=\u001b[49m\u001b[43mis_text\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 1887\u001b[39m \u001b[43m \u001b[49m\u001b[43merrors\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43moptions\u001b[49m\u001b[43m.\u001b[49m\u001b[43mget\u001b[49m\u001b[43m(\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mencoding_errors\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mstrict\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 1888\u001b[39m \u001b[43m \u001b[49m\u001b[43mstorage_options\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43moptions\u001b[49m\u001b[43m.\u001b[49m\u001b[43mget\u001b[49m\u001b[43m(\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mstorage_options\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 1889\u001b[39m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 1890\u001b[39m \u001b[38;5;28;01massert\u001b[39;00m \u001b[38;5;28mself\u001b[39m.handles \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[32m 1891\u001b[39m f = \u001b[38;5;28mself\u001b[39m.handles.handle\n", - "\u001b[36mFile \u001b[39m\u001b[32m~/Desktop/CodeYou_AttendanceTracker/venv/lib/python3.12/site-packages/pandas/io/common.py:873\u001b[39m, in \u001b[36mget_handle\u001b[39m\u001b[34m(path_or_buf, mode, encoding, compression, memory_map, is_text, errors, storage_options)\u001b[39m\n\u001b[32m 868\u001b[39m \u001b[38;5;28;01melif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(handle, \u001b[38;5;28mstr\u001b[39m):\n\u001b[32m 869\u001b[39m \u001b[38;5;66;03m# Check whether the filename is to be opened in binary mode.\u001b[39;00m\n\u001b[32m 870\u001b[39m \u001b[38;5;66;03m# Binary mode does not support 'encoding' and 'newline'.\u001b[39;00m\n\u001b[32m 871\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m ioargs.encoding \u001b[38;5;129;01mand\u001b[39;00m \u001b[33m\"\u001b[39m\u001b[33mb\u001b[39m\u001b[33m\"\u001b[39m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m ioargs.mode:\n\u001b[32m 872\u001b[39m \u001b[38;5;66;03m# Encoding\u001b[39;00m\n\u001b[32m--> \u001b[39m\u001b[32m873\u001b[39m handle = \u001b[38;5;28;43mopen\u001b[39;49m\u001b[43m(\u001b[49m\n\u001b[32m 874\u001b[39m \u001b[43m \u001b[49m\u001b[43mhandle\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 875\u001b[39m \u001b[43m \u001b[49m\u001b[43mioargs\u001b[49m\u001b[43m.\u001b[49m\u001b[43mmode\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 876\u001b[39m \u001b[43m \u001b[49m\u001b[43mencoding\u001b[49m\u001b[43m=\u001b[49m\u001b[43mioargs\u001b[49m\u001b[43m.\u001b[49m\u001b[43mencoding\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 877\u001b[39m \u001b[43m \u001b[49m\u001b[43merrors\u001b[49m\u001b[43m=\u001b[49m\u001b[43merrors\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 878\u001b[39m \u001b[43m \u001b[49m\u001b[43mnewline\u001b[49m\u001b[43m=\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[32m 879\u001b[39m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 880\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[32m 881\u001b[39m \u001b[38;5;66;03m# Binary mode\u001b[39;00m\n\u001b[32m 882\u001b[39m handle = \u001b[38;5;28mopen\u001b[39m(handle, ioargs.mode)\n", - "\u001b[31mFileNotFoundError\u001b[39m: [Errno 2] No such file or directory: '../../data/Attendance_Template_v3 - Mentors.csv'" - ] - } - ], + "outputs": [], "source": [ "mentor_df = pd.read_csv('../../data/Attendance_Template_v3 - Mentors.csv')\n", "staff_df = pd.read_csv('../../data/Attendance_Template_v3 - Staff.csv')\n", @@ -43,7 +27,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 54, "id": "886a505c", "metadata": {}, "outputs": [ @@ -72,7 +56,7 @@ "type": "unknown" } ], - "ref": "386d8423-da78-44f7-8320-baabcd221172", + "ref": "1e961b2f-9b34-49f4-9617-148ad4c8ec89", "rows": [ [ "0", @@ -581,7 +565,7 @@ "35 Sunny Zanchi NaN" ] }, - "execution_count": 3, + "execution_count": 54, "metadata": {}, "output_type": "execute_result" } @@ -592,7 +576,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 55, "id": "14c2c742", "metadata": {}, "outputs": [ @@ -613,7 +597,7 @@ { "name": "Last name", "rawType": "object", - "type": "string" + "type": "unknown" }, { "name": "Alternate Name", @@ -631,7 +615,7 @@ "type": "float" } ], - "ref": "90d01c97-494f-4e03-8fda-1d0fbf437129", + "ref": "22a255a6-c9a8-4968-bb4f-877554cd8882", "rows": [ [ "0", @@ -768,11 +752,27 @@ null, null, null + ], + [ + "17", + "John Smith", + null, + null, + null, + null + ], + [ + "18", + "Klive", + null, + null, + null, + null ] ], "shape": { "columns": 5, - "rows": 17 + "rows": 19 } }, "text/html": [ @@ -938,32 +938,50 @@ " NaN\n", " NaN\n", " \n", + " \n", + " 17\n", + " John Smith\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " \n", + " \n", + " 18\n", + " Klive\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " \n", " \n", "\n", "" ], "text/plain": [ - " First Name Last name Alternate Name Email Region\n", - "0 Ailene Johnston NaN NaN NaN\n", - "1 Alli Rippy NaN NaN NaN\n", - "2 Amanda Gearhart NaN NaN NaN\n", - "3 August Mapp NaN NaN NaN\n", - "4 Blake Herbert NaN NaN NaN\n", - "5 Brian Luerman NaN NaN NaN\n", - "6 classroom admin NaN NaN NaN\n", - "7 Dan Collins NaN NaN NaN\n", - "8 Danny Morton NaN NaN NaN\n", - "9 David York NaN NaN NaN\n", - "10 Emily Sullivan NaN NaN NaN\n", - "11 Jared Mueller NaN NaN NaN\n", - "12 Jenny Terry NaN NaN NaN\n", - "13 Kalee Howard NaN NaN NaN\n", - "14 Rekkai Steed NaN NaN NaN\n", - "15 Shannon Sheehy NaN NaN NaN\n", - "16 Tonia Nolden NaN NaN NaN" + " First Name Last name Alternate Name Email Region\n", + "0 Ailene Johnston NaN NaN NaN\n", + "1 Alli Rippy NaN NaN NaN\n", + "2 Amanda Gearhart NaN NaN NaN\n", + "3 August Mapp NaN NaN NaN\n", + "4 Blake Herbert NaN NaN NaN\n", + "5 Brian Luerman NaN NaN NaN\n", + "6 classroom admin NaN NaN NaN\n", + "7 Dan Collins NaN NaN NaN\n", + "8 Danny Morton NaN NaN NaN\n", + "9 David York NaN NaN NaN\n", + "10 Emily Sullivan NaN NaN NaN\n", + "11 Jared Mueller NaN NaN NaN\n", + "12 Jenny Terry NaN NaN NaN\n", + "13 Kalee Howard NaN NaN NaN\n", + "14 Rekkai Steed NaN NaN NaN\n", + "15 Shannon Sheehy NaN NaN NaN\n", + "16 Tonia Nolden NaN NaN NaN\n", + "17 John Smith NaN NaN NaN NaN\n", + "18 Klive NaN NaN NaN NaN" ] }, - "execution_count": 4, + "execution_count": 55, "metadata": {}, "output_type": "execute_result" } @@ -974,33 +992,137 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 56, + "id": "3df8b097", + "metadata": {}, + "outputs": [], + "source": [ + "def keep_name(df: pd.DataFrame) -> pd.DataFrame:\n", + " \"\"\"\n", + " Titles columns in a DataFrame and drops all other columns that is not the 'First Name' and 'Last Name'.\n", + "\n", + " Args:\n", + " df (pd.DataFrame): A data frame containing at least 'First Name' and 'Last Name' columns.\n", + "\n", + " Returns:\n", + " pd.DataFrame: A Data Frame with only the 'First Name' and 'Last Name' Columns\n", + " \"\"\"\n", + " df.columns = df.columns.str.title()\n", + " df['First Name'] = df['First Name'].str.title()\n", + " df['Last Name'] = df['Last Name'].str.title()\n", + " col_to_keep = ['First Name', 'Last Name']\n", + " return df[col_to_keep].copy()" + ] + }, + { + "cell_type": "code", + "execution_count": 57, + "id": "a3b7e280", + "metadata": {}, + "outputs": [], + "source": [ + "def process_meeting_times(df: pd.DataFrame) -> pd.DataFrame:\n", + " \"\"\"\n", + " Standardize meeting time columns and calculate duration.\n", + "\n", + " Steps performed:\n", + " 1. Converts 'Time Joined' and 'Time Exited' columns to datetime objects.\n", + " 2. Calculates the meeting duration in minutes and stores it in 'Duration in Minutes'.\n", + "\n", + " Parameters\n", + " ----------\n", + " df : pd.DataFrame\n", + " DataFrame containing meeting data with 'Time Joined' and 'Time Exited' columns.\n", + "\n", + " Returns\n", + " -------\n", + " pd.DataFrame\n", + " Updated DataFrame with time columns as datetime and a duration column.\n", + " \"\"\"\n", + " df.columns = df.columns.str.title()\n", + " df['Time Joined'] = pd.to_datetime(df['Time Joined'], format='%I:%M %p', errors='coerce')\n", + " df['Time Exited'] = pd.to_datetime(df['Time Exited'], format='%I:%M %p', errors='coerce')\n", + " df['Duration In Minutes'] = (\n", + " df['Time Exited'] - df['Time Joined']\n", + " ).dt.total_seconds() / 60\n", + " return df\n", + "\n", + "\n", + "def add_full_name(df: pd.DataFrame) -> pd.DataFrame:\n", + " \"\"\"\n", + " Add a 'Full Name' column by combining 'First Name' and 'Last Name'.\n", + "\n", + " Parameters\n", + " ----------\n", + " df : pd.DataFrame\n", + " DataFrame containing 'First Name' and 'Last Name' columns.\n", + "\n", + " Returns\n", + " -------\n", + " pd.DataFrame\n", + " DataFrame with a new 'Full Name' column.\n", + " \"\"\"\n", + " df['Full Name'] = df['First Name'].str.title() + ' ' + df['Last Name'].str.title()\n", + " return df" + ] + }, + { + "cell_type": "code", + "execution_count": 58, + "id": "bd9d1679", + "metadata": {}, + "outputs": [], + "source": [ + "def split_names(df: pd.DataFrame) -> pd.DataFrame:\n", + " \"\"\"Function accounting for instance where first name column contains more than just the first name\n", + "\n", + " Args:\n", + " df (pd.DataFrame): DataFrame that needs to be checked for more than one name in first name\n", + "\n", + " Returns:\n", + " pd.DataFrame: DataFrame containing only the first name in the first name column and fills the last name column with the last name\n", + " \"\"\"\n", + " if ' ' in df['First Name']:\n", + " names = df['First Name'].split()\n", + " if len(names) >= 2:\n", + " df['First Name'] = names[0]\n", + " df['Last Name'] = names[-1]\n", + " return df" + ] + }, + { + "cell_type": "code", + "execution_count": 59, "id": "c3b40f9d", "metadata": {}, "outputs": [], "source": [ - "def clean_staff_df(df: pd.DataFrame) -> pd.DataFrame:\n", + "def clean_name_df(df: pd.DataFrame) -> pd.DataFrame:\n", " \"\"\"\n", - " Clean and standardize a staff DataFrame.\n", + " Clean and standardize names for a DataFrame.\n", "\n", " Steps performed:\n", " 1. Capitalizes column names for consistency.\n", - " 2. Drops unused columns: 'Alternate Name', 'Email', 'Region'.\n", - " 3. Creates a 'Full Name' column by combining 'First Name' and 'Last Name'.\n", + " 2. Drops all comlumns that are not 'Fist Name' or 'Last Name'\n", + " 3. Fill null values in name columns.\n", + " 4. Accounts for instance where 'First Name' contains 'Last Name'\n", + " 5. Creates a 'Full Name' column by combining 'First Name' and 'Last Name'.\n", "\n", " Parameters\n", " ----------\n", " df : pd.DataFrame\n", - " Input DataFrame containing staff data.\n", + " Input DataFrame containing name data.\n", "\n", " Returns\n", " -------\n", " pd.DataFrame\n", " A cleaned DataFrame with standardized columns and a 'Full Name' field.\n", " \"\"\"\n", - " df.columns = df.columns.str.title()\n", - " df = df.drop(columns=['Alternate Name', 'Email', 'Region'], errors='ignore')\n", - " df[\"Full Name\"] = df['First Name'] + ' ' + df['Last Name']\n", + " df = keep_name(df)\n", + " if df.isna().any().any() == True:\n", + " df = df.fillna('Unknown')\n", + " df = df.apply(split_names, 1) \n", + " df = add_full_name(df)\n", "\n", " return df" ] @@ -1015,7 +1137,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 60, "id": "628949fb", "metadata": {}, "outputs": [ @@ -1023,36 +1145,38 @@ "name": "stdout", "output_type": "stream", "text": [ - "\u001b[31mSignature:\u001b[39m clean_staff_df(df: pandas.core.frame.DataFrame) -> pandas.core.frame.DataFrame\n", - "\u001b[31mDocstring:\u001b[39m\n", - "Clean and standardize a staff DataFrame.\n", + "\u001b[1;31mSignature:\u001b[0m \u001b[0mclean_name_df\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mdf\u001b[0m\u001b[1;33m:\u001b[0m \u001b[0mpandas\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcore\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mframe\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mDataFrame\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;33m->\u001b[0m \u001b[0mpandas\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcore\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mframe\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mDataFrame\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;31mDocstring:\u001b[0m\n", + "Clean and standardize names for a DataFrame.\n", "\n", "Steps performed:\n", "1. Capitalizes column names for consistency.\n", - "2. Drops unused columns: 'Alternate Name', 'Email', 'Region'.\n", - "3. Creates a 'Full Name' column by combining 'First Name' and 'Last Name'.\n", + "2. Drops all comlumns that are not 'Fist Name' or 'Last Name'\n", + "3. Fill null values in name columns.\n", + "4. Accounts for instance where 'First Name' contains 'Last Name'\n", + "5. Creates a 'Full Name' column by combining 'First Name' and 'Last Name'.\n", "\n", "Parameters\n", "----------\n", "df : pd.DataFrame\n", - " Input DataFrame containing staff data.\n", + " Input DataFrame containing name data.\n", "\n", "Returns\n", "-------\n", "pd.DataFrame\n", " A cleaned DataFrame with standardized columns and a 'Full Name' field.\n", - "\u001b[31mFile:\u001b[39m /var/folders/2d/yt4_w6zn5pbfjg_jx5sdmm180000gn/T/ipykernel_38707/3720898642.py\n", - "\u001b[31mType:\u001b[39m function" + "\u001b[1;31mFile:\u001b[0m c:\\users\\leolion023\\appdata\\local\\temp\\ipykernel_59672\\4077920601.py\n", + "\u001b[1;31mType:\u001b[0m function" ] } ], "source": [ - "clean_staff_df?" + "clean_name_df?" ] }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 61, "id": "9e6b2ba9", "metadata": {}, "outputs": [ @@ -1081,7 +1205,7 @@ "type": "string" } ], - "ref": "511a1322-e76a-470a-bb55-1fd0d04b55ba", + "ref": "fe6c61ab-e4f1-40e2-863d-71bd92af124e", "rows": [ [ "0", @@ -1121,9 +1245,9 @@ ], [ "6", - "classroom", - "admin", - "classroom admin" + "Classroom", + "Admin", + "Classroom Admin" ], [ "7", @@ -1184,11 +1308,23 @@ "Tonia", "Nolden", "Tonia Nolden" + ], + [ + "17", + "John", + "Smith", + "John Smith" + ], + [ + "18", + "Klive", + "Unknown", + "Klive Unknown" ] ], "shape": { "columns": 3, - "rows": 17 + "rows": 19 } }, "text/html": [ @@ -1254,9 +1390,9 @@ " \n", " \n", " 6\n", - " classroom\n", - " admin\n", - " classroom admin\n", + " Classroom\n", + " Admin\n", + " Classroom Admin\n", " \n", " \n", " 7\n", @@ -1318,6 +1454,18 @@ " Nolden\n", " Tonia Nolden\n", " \n", + " \n", + " 17\n", + " John\n", + " Smith\n", + " John Smith\n", + " \n", + " \n", + " 18\n", + " Klive\n", + " Unknown\n", + " Klive Unknown\n", + " \n", " \n", "\n", "" @@ -1330,7 +1478,7 @@ "3 August Mapp August Mapp\n", "4 Blake Herbert Blake Herbert\n", "5 Brian Luerman Brian Luerman\n", - "6 classroom admin classroom admin\n", + "6 Classroom Admin Classroom Admin\n", "7 Dan Collins Dan Collins\n", "8 Danny Morton Danny Morton\n", "9 David York David York\n", @@ -1340,93 +1488,34 @@ "13 Kalee Howard Kalee Howard\n", "14 Rekkai Steed Rekkai Steed\n", "15 Shannon Sheehy Shannon Sheehy\n", - "16 Tonia Nolden Tonia Nolden" + "16 Tonia Nolden Tonia Nolden\n", + "17 John Smith John Smith\n", + "18 Klive Unknown Klive Unknown" ] }, - "execution_count": 6, + "execution_count": 61, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "staff_list = clean_staff_df(staff_df)\n", + "staff_list = clean_name_df(staff_df)\n", "staff_list" ] }, { "cell_type": "code", - "execution_count": 11, - "id": "56dba6e7", - "metadata": {}, - "outputs": [], - "source": [ - "def clean_mentor_df(df: pd.DataFrame) -> pd.DataFrame:\n", - " \"\"\"\n", - " Clean and standardize a mentor DataFrame.\n", - "\n", - " Steps performed:\n", - " 1. Fills missing values in the 'Last Name' column with 'Unknown'.\n", - " 2. Drops the 'Alternate Name' column if it exists.\n", - " 3. Creates a 'Full Name' column by combining 'First Name' and 'Last Name'.\n", - "\n", - " Parameters\n", - " ----------\n", - " df : pd.DataFrame\n", - " Input DataFrame containing mentor data.\n", - "\n", - " Returns\n", - " -------\n", - " pd.DataFrame\n", - " A cleaned DataFrame with standardized fields and a 'Full Name' column.\n", - " \"\"\"\n", - " \n", - " df['Last Name'] = df['Last Name'].fillna('Unknown')\n", - " df = df.drop(columns='Alternate Name', errors='ignore')\n", - " df['Full Name'] = df['First Name'] + ' ' + df['Last Name']\n", - " \n", - " return df" - ] - }, - { - "cell_type": "code", - "execution_count": 12, + "execution_count": 62, "id": "6c86df50", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\u001b[31mSignature:\u001b[39m clean_mentor_df(df: pandas.core.frame.DataFrame) -> pandas.core.frame.DataFrame\n", - "\u001b[31mDocstring:\u001b[39m\n", - "Clean and standardize a mentor DataFrame.\n", - "\n", - "Steps performed:\n", - "1. Fills missing values in the 'Last Name' column with 'Unknown'.\n", - "2. Drops the 'Alternate Name' column if it exists.\n", - "3. Creates a 'Full Name' column by combining 'First Name' and 'Last Name'.\n", - "\n", - "Parameters\n", - "----------\n", - "df : pd.DataFrame\n", - " Input DataFrame containing mentor data.\n", - "\n", - "Returns\n", - "-------\n", - "pd.DataFrame\n", - " A cleaned DataFrame with standardized fields and a 'Full Name' column.\n", - "\u001b[31mFile:\u001b[39m /var/folders/2d/yt4_w6zn5pbfjg_jx5sdmm180000gn/T/ipykernel_38707/1949478838.py\n", - "\u001b[31mType:\u001b[39m function" - ] - } - ], + "outputs": [], "source": [ - "clean_mentor_df?" + "##clean_mentor_df?" ] }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 63, "id": "115b41b9", "metadata": {}, "outputs": [ @@ -1455,7 +1544,7 @@ "type": "string" } ], - "ref": "e85843c5-f48a-4042-b413-bf0fa35e4f26", + "ref": "6af6b282-fc16-414e-ad7f-ed6c9b10b662", "rows": [ [ "0", @@ -1964,19 +2053,19 @@ "35 Sunny Zanchi Sunny Zanchi" ] }, - "execution_count": 8, + "execution_count": 63, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "mentor_list = clean_mentor_df(mentor_df)\n", + "mentor_list = clean_name_df(mentor_df)\n", "mentor_list" ] }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 64, "id": "74a3b369", "metadata": {}, "outputs": [ @@ -2020,7 +2109,7 @@ "type": "string" } ], - "ref": "a89cf578-6332-40a2-bbc7-e2ce543ae452", + "ref": "94693d9e-2c1b-415a-882f-460903e77e18", "rows": [ [ "0", @@ -2386,7 +2475,7 @@ "15 6:01 PM 8:26 PM " ] }, - "execution_count": 9, + "execution_count": 64, "metadata": {}, "output_type": "execute_result" } @@ -2397,76 +2486,8 @@ }, { "cell_type": "code", - "execution_count": null, - "id": "125e4482", - "metadata": {}, - "outputs": [], - "source": [ - "def clean_meeting(df: pd.DataFrame) -> pd.DataFrame:\n", - " df.columns = df.columns.str.title() #make column names consistent\n", - " df['Time Joined'] = pd.to_datetime(df['Time Joined'], format='%I:%M %p') #convert time to make it more accessible\n", - " df['Time Exited'] = pd.to_datetime(df['Time Exited'], format='%I:%M %p') #convert time to make it more accessible\n", - " df['Duration in Minutes'] = (df['Time Exited'] - df['Time Joined']).dt.total_seconds() / 60 #calculate numnber of minutes\n", - " df[\"Full Name\"] = df['First Name'] + ' ' + df['Last Name'] #create full name column\n", - " return df" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "id": "64fd8af3", - "metadata": {}, - "outputs": [], - "source": [ - "def process_meeting_times(df: pd.DataFrame) -> pd.DataFrame:\n", - " \"\"\"\n", - " Standardize meeting time columns and calculate duration.\n", - "\n", - " Steps performed:\n", - " 1. Converts 'Time Joined' and 'Time Exited' columns to datetime objects.\n", - " 2. Calculates the meeting duration in minutes and stores it in 'Duration in Minutes'.\n", - "\n", - " Parameters\n", - " ----------\n", - " df : pd.DataFrame\n", - " DataFrame containing meeting data with 'Time Joined' and 'Time Exited' columns.\n", - "\n", - " Returns\n", - " -------\n", - " pd.DataFrame\n", - " Updated DataFrame with time columns as datetime and a duration column.\n", - " \"\"\"\n", - " df.columns = df.columns.str.title()\n", - " df['Time Joined'] = pd.to_datetime(df['Time Joined'], format='%I:%M %p', errors='coerce')\n", - " df['Time Exited'] = pd.to_datetime(df['Time Exited'], format='%I:%M %p', errors='coerce')\n", - " df['Duration In Minutes'] = (\n", - " df['Time Exited'] - df['Time Joined']\n", - " ).dt.total_seconds() / 60\n", - " return df\n", - "\n", - "\n", - "def add_full_name(df: pd.DataFrame) -> pd.DataFrame:\n", - " \"\"\"\n", - " Add a 'Full Name' column by combining 'First Name' and 'Last Name'.\n", - "\n", - " Parameters\n", - " ----------\n", - " df : pd.DataFrame\n", - " DataFrame containing 'First Name' and 'Last Name' columns.\n", - "\n", - " Returns\n", - " -------\n", - " pd.DataFrame\n", - " DataFrame with a new 'Full Name' column.\n", - " \"\"\"\n", - " df['Full Name'] = df['First Name'] + ' ' + df['Last Name']\n", - " return df" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "id": "03fa1b54", + "execution_count": 65, + "id": "00cc7690", "metadata": {}, "outputs": [ { @@ -2509,7 +2530,7 @@ "type": "datetime" }, { - "name": "Duration in Minutes", + "name": "Duration In Minutes", "rawType": "float64", "type": "float" }, @@ -2519,7 +2540,7 @@ "type": "string" } ], - "ref": "478cb0d6-23c0-4a3b-8903-4a493f6302fa", + "ref": "96c6aeb8-70dd-405d-805a-348b21c4ab95", "rows": [ [ "0", @@ -2728,7 +2749,7 @@ " Duration\n", " Time Joined\n", " Time Exited\n", - " Duration in Minutes\n", + " Duration In Minutes\n", " Full Name\n", " \n", " \n", @@ -2932,7 +2953,7 @@ "14 Zachary Young zach*************@***.com 2 hr 6 min \n", "15 Zhenzhen Xie xzz.**********@***.com 2 hr 24 min \n", "\n", - " Time Joined Time Exited Duration in Minutes \\\n", + " Time Joined Time Exited Duration In Minutes \\\n", "0 1900-01-01 18:01:00 1900-01-01 20:26:00 145.0 \n", "1 1900-01-01 17:59:00 1900-01-01 20:27:00 148.0 \n", "2 1900-01-01 17:59:00 1900-01-01 20:26:00 147.0 \n", @@ -2969,20 +2990,35 @@ "15 Zhenzhen Xie " ] }, - "execution_count": 11, + "execution_count": 65, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "meeting_1_cleaned = clean_meeting(meeting_1)\n", - "meeting_1_cleaned" + "test = process_meeting_times(meeting_1)\n", + "test = add_full_name(test)\n", + "test" ] }, { "cell_type": "code", - "execution_count": null, - "id": "223affd0", + "execution_count": 66, + "id": "838ca4e1", + "metadata": {}, + "outputs": [], + "source": [ + "def clean_meeting(df: pd.DataFrame) -> pd.DataFrame:\n", + " df = process_meeting_times(df)\n", + " df = add_full_name(df)\n", + " col_to_keep = ['Full Name', 'Duration In Minutes']\n", + " return df[col_to_keep].copy()" + ] + }, + { + "cell_type": "code", + "execution_count": 67, + "id": "03fa1b54", "metadata": {}, "outputs": [ { @@ -3000,12 +3036,12 @@ "type": "string" }, { - "name": "Duration in Minutes", + "name": "Duration In Minutes", "rawType": "float64", "type": "float" } ], - "ref": "3fc867e1-bb6c-4849-9679-9581dfb7fada", + "ref": "6a0f0e25-20f4-4b04-b984-4634b9ca9e85", "rows": [ [ "0", @@ -3014,14 +3050,309 @@ ], [ "1", + "Angela Moore", + "148.0" + ], + [ + "2", "Bill Montgomery", "147.0" ], [ - "2", + "3", + "Charles Norman", + "162.0" + ], + [ + "4", "Christina Porter", "124.0" - ] + ], + [ + "5", + "Cindy Wedding", + "142.0" + ], + [ + "6", + "Classroom Admin", + "1.0" + ], + [ + "7", + "Joel Anderson", + "147.0" + ], + [ + "8", + "Jonathan Chadwell", + "144.0" + ], + [ + "9", + "Mendell M", + "120.0" + ], + [ + "10", + "Oli Ivanova", + "134.0" + ], + [ + "11", + "S. Byrnes", + "118.0" + ], + [ + "12", + "Tara Leigh Hylton", + "125.0" + ], + [ + "13", + "Whitney Parks", + "158.0" + ], + [ + "14", + "Zachary Young", + "126.0" + ], + [ + "15", + "Zhenzhen Xie", + "145.0" + ] + ], + "shape": { + "columns": 2, + "rows": 16 + } + }, + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Full NameDuration In Minutes
0Alex D145.0
1Angela Moore148.0
2Bill Montgomery147.0
3Charles Norman162.0
4Christina Porter124.0
5Cindy Wedding142.0
6Classroom Admin1.0
7Joel Anderson147.0
8Jonathan Chadwell144.0
9Mendell M120.0
10Oli Ivanova134.0
11S. Byrnes118.0
12Tara Leigh Hylton125.0
13Whitney Parks158.0
14Zachary Young126.0
15Zhenzhen Xie145.0
\n", + "
" + ], + "text/plain": [ + " Full Name Duration In Minutes\n", + "0 Alex D 145.0\n", + "1 Angela Moore 148.0\n", + "2 Bill Montgomery 147.0\n", + "3 Charles Norman 162.0\n", + "4 Christina Porter 124.0\n", + "5 Cindy Wedding 142.0\n", + "6 Classroom Admin 1.0\n", + "7 Joel Anderson 147.0\n", + "8 Jonathan Chadwell 144.0\n", + "9 Mendell M 120.0\n", + "10 Oli Ivanova 134.0\n", + "11 S. Byrnes 118.0\n", + "12 Tara Leigh Hylton 125.0\n", + "13 Whitney Parks 158.0\n", + "14 Zachary Young 126.0\n", + "15 Zhenzhen Xie 145.0" + ] + }, + "execution_count": 67, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "meeting_1_cleaned = clean_meeting(meeting_1)\n", + "meeting_1_cleaned" + ] + }, + { + "cell_type": "code", + "execution_count": 68, + "id": "0edd39e3", + "metadata": {}, + "outputs": [], + "source": [ + "def match_meeting_times(clean_list: pd.DataFrame,clean_meeting_time: pd.DataFrame) -> pd.DataFrame:\n", + " \"\"\"\n", + " Merges two DataFrame containing the name and duration of members that were present for a meeting.\n", + "\n", + " Args:\n", + " clean_list (pd.DataFrame): A DataFrame containing the 'Full Name' column.\n", + " clean_meeting_time (pd.DataFrame): A DataFrame containing 'Duration in Minutes' and 'Full Name' columns.\n", + "\n", + " Returns:\n", + " pd.DataFrame: A DataFrame containing the 'Full Name' and 'Duration in Minutes' columns.\n", + " \"\"\"\n", + " df = pd.merge(clean_list['Full Name'],clean_meeting_time[['Duration In Minutes','Full Name']], how = 'inner',on='Full Name')\n", + " return df" + ] + }, + { + "cell_type": "code", + "execution_count": 69, + "id": "78bfd7b7", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Index(['Full Name', 'Duration In Minutes'], dtype='object')" + ] + }, + "execution_count": 69, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "meeting_1_cleaned.columns" + ] + }, + { + "cell_type": "code", + "execution_count": 70, + "id": "976408a3", + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.microsoft.datawrangler.viewer.v0+json": { + "columns": [ + { + "name": "index", + "rawType": "int64", + "type": "integer" + }, + { + "name": "Full Name", + "rawType": "object", + "type": "string" + }, + { + "name": "Duration In Minutes", + "rawType": "float64", + "type": "float" + } + ], + "ref": "5f7e0092-dcb4-46ba-8ac9-88d91052ec54", + "rows": [ + [ + "0", + "Alex D", + "145.0" + ], + [ + "1", + "Bill Montgomery", + "147.0" + ], + [ + "2", + "Christina Porter", + "124.0" + ] ], "shape": { "columns": 2, @@ -3048,7 +3379,7 @@ " \n", " \n", " Full Name\n", - " Duration in Minutes\n", + " Duration In Minutes\n", " \n", " \n", " \n", @@ -3072,27 +3403,27 @@ "" ], "text/plain": [ - " Full Name Duration in Minutes\n", + " Full Name Duration In Minutes\n", "0 Alex D 145.0\n", "1 Bill Montgomery 147.0\n", "2 Christina Porter 124.0" ] }, - "execution_count": 12, + "execution_count": 70, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#merge meeting 1 with mentor list to consolidate into list of mentors that where present and how long they where present\n", - "mentor_time_df = pd.merge(mentor_list['Full Name'], meeting_1_cleaned[['Duration in Minutes', 'Full Name']], how= 'inner', on='Full Name')\n", - "mentor_time_df" + "mentor_time = match_meeting_times(mentor_list,meeting_1_cleaned)\n", + "mentor_time" ] }, { "cell_type": "code", - "execution_count": null, - "id": "976408a3", + "execution_count": 71, + "id": "4aed423e", "metadata": {}, "outputs": [ { @@ -3110,16 +3441,22 @@ "type": "string" }, { - "name": "Duration in Minutes", + "name": "Duration In Minutes", "rawType": "float64", "type": "float" } ], - "ref": "ba38fd82-f12a-46dd-8eed-e6d90293376d", - "rows": [], + "ref": "2c577d97-b807-4135-8b32-9bc0947eca8b", + "rows": [ + [ + "0", + "Classroom Admin", + "1.0" + ] + ], "shape": { "columns": 2, - "rows": 0 + "rows": 1 } }, "text/html": [ @@ -3142,35 +3479,677 @@ " \n", " \n", " Full Name\n", - " Duration in Minutes\n", + " Duration In Minutes\n", " \n", " \n", " \n", + " \n", + " 0\n", + " Classroom Admin\n", + " 1.0\n", + " \n", " \n", "\n", "" ], "text/plain": [ - "Empty DataFrame\n", - "Columns: [Full Name, Duration in Minutes]\n", - "Index: []" + " Full Name Duration In Minutes\n", + "0 Classroom Admin 1.0" + ] + }, + "execution_count": 71, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#merge staff 1 with mentor list to consolidate into list of staff that where present and how long they where present\n", + "staff_time = match_meeting_times(staff_list,meeting_1_cleaned)\n", + "staff_time" + ] + }, + { + "cell_type": "code", + "execution_count": 72, + "id": "2bfdd92e", + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.microsoft.datawrangler.viewer.v0+json": { + "columns": [ + { + "name": "index", + "rawType": "int64", + "type": "integer" + }, + { + "name": "Full Name", + "rawType": "object", + "type": "string" + }, + { + "name": "Duration In Minutes", + "rawType": "float64", + "type": "float" + } + ], + "ref": "5bf4bf82-4e69-499e-ae22-96cf024622e1", + "rows": [ + [ + "0", + "Sheyla Diaz", + "103.0" + ], + [ + "1", + "James Glosser", + "100.0" + ], + [ + "2", + "Reed Haddix", + "106.0" + ], + [ + "3", + "John Hankins", + "98.0" + ], + [ + "4", + "Stephanie Jones", + "108.0" + ], + [ + "5", + "Aaron Laliberty", + "99.0" + ], + [ + "6", + "Dakota Mcmullin", + "72.0" + ], + [ + "7", + "Toni-Ivy Ownn.", + "100.0" + ], + [ + "8", + "Michael Puckett", + "97.0" + ], + [ + "9", + "Leighton Pulliam", + "101.0" + ], + [ + "10", + "Ken Quiggins", + "103.0" + ], + [ + "11", + "Amber Ratliff", + "100.0" + ], + [ + "12", + "Jill Schrank", + "102.0" + ], + [ + "13", + "Huan Song", + "103.0" + ] + ], + "shape": { + "columns": 2, + "rows": 14 + } + }, + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Full NameDuration In Minutes
0Sheyla Diaz103.0
1James Glosser100.0
2Reed Haddix106.0
3John Hankins98.0
4Stephanie Jones108.0
5Aaron Laliberty99.0
6Dakota Mcmullin72.0
7Toni-Ivy Ownn.100.0
8Michael Puckett97.0
9Leighton Pulliam101.0
10Ken Quiggins103.0
11Amber Ratliff100.0
12Jill Schrank102.0
13Huan Song103.0
\n", + "
" + ], + "text/plain": [ + " Full Name Duration In Minutes\n", + "0 Sheyla Diaz 103.0\n", + "1 James Glosser 100.0\n", + "2 Reed Haddix 106.0\n", + "3 John Hankins 98.0\n", + "4 Stephanie Jones 108.0\n", + "5 Aaron Laliberty 99.0\n", + "6 Dakota Mcmullin 72.0\n", + "7 Toni-Ivy Ownn. 100.0\n", + "8 Michael Puckett 97.0\n", + "9 Leighton Pulliam 101.0\n", + "10 Ken Quiggins 103.0\n", + "11 Amber Ratliff 100.0\n", + "12 Jill Schrank 102.0\n", + "13 Huan Song 103.0" + ] + }, + "execution_count": 72, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "meeting_2_cleaned = clean_meeting(meeting_2)\n", + "meeting_2_cleaned" + ] + }, + { + "cell_type": "code", + "execution_count": 73, + "id": "b489007c", + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.microsoft.datawrangler.viewer.v0+json": { + "columns": [ + { + "name": "index", + "rawType": "int64", + "type": "integer" + }, + { + "name": "Full Name", + "rawType": "object", + "type": "string" + }, + { + "name": "Duration In Minutes", + "rawType": "float64", + "type": "float" + } + ], + "ref": "976368be-ed62-493b-aa3f-c5d6755b1ac8", + "rows": [ + [ + "0", + "Ken Quiggins", + "103.0" + ], + [ + "1", + "Michael Puckett", + "97.0" + ] + ], + "shape": { + "columns": 2, + "rows": 2 + } + }, + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Full NameDuration In Minutes
0Ken Quiggins103.0
1Michael Puckett97.0
\n", + "
" + ], + "text/plain": [ + " Full Name Duration In Minutes\n", + "0 Ken Quiggins 103.0\n", + "1 Michael Puckett 97.0" + ] + }, + "execution_count": 73, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#merge meeting 2 with mentor list to consolidate into list of mentors that where present and how long they where present\n", + "mentor_time_2 = match_meeting_times(mentor_list,meeting_2_cleaned)\n", + "mentor_time_2" + ] + }, + { + "cell_type": "code", + "execution_count": 74, + "id": "20023162", + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.microsoft.datawrangler.viewer.v0+json": { + "columns": [ + { + "name": "index", + "rawType": "int64", + "type": "integer" + }, + { + "name": "Full Name", + "rawType": "object", + "type": "string" + }, + { + "name": "Duration In Minutes", + "rawType": "float64", + "type": "float" + } + ], + "ref": "a1085e69-3fdf-446d-bfa9-296ef7f6b0a0", + "rows": [], + "shape": { + "columns": 2, + "rows": 0 + } + }, + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Full NameDuration In Minutes
\n", + "
" + ], + "text/plain": [ + "Empty DataFrame\n", + "Columns: [Full Name, Duration In Minutes]\n", + "Index: []" + ] + }, + "execution_count": 74, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#merge meeting 2 with staff list to consolidate into list of staff that where present and how long they where present\n", + "staff_time_2 = match_meeting_times(staff_list,meeting_2_cleaned)\n", + "staff_time_2" + ] + }, + { + "cell_type": "code", + "execution_count": 75, + "id": "b88830f9", + "metadata": {}, + "outputs": [], + "source": [ + "def remove_staff_mentor_meeting(meeting: pd.DataFrame, staff_list: pd.DataFrame, mentor_list: pd.DataFrame) -> pd.DataFrame:\n", + " \"\"\"\n", + " Removes staff and mentor names from the meeting list\n", + " Args:\n", + " meeting (pd.DataFrame): meeting being reduced\n", + " staff_list (pd.DataFrame): list of staff names\n", + " mentor_list (pd.DataFrame): list of mentor names\n", + "\n", + " Returns:\n", + " pd.DataFrame: list of members and time duration excluding staff and mentor names\n", + " \"\"\"\n", + " df = meeting[~meeting['Full Name'].isin(mentor_list['Full Name'])]\n", + " df = df[~df['Full Name'].isin(staff_list['Full Name'])]\n", + " return df" + ] + }, + { + "cell_type": "code", + "execution_count": 76, + "id": "205ddadf", + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.microsoft.datawrangler.viewer.v0+json": { + "columns": [ + { + "name": "index", + "rawType": "int64", + "type": "integer" + }, + { + "name": "Full Name", + "rawType": "object", + "type": "string" + }, + { + "name": "Duration In Minutes", + "rawType": "float64", + "type": "float" + } + ], + "ref": "9078f185-1d69-4c12-b0b1-922767f1117b", + "rows": [ + [ + "1", + "Angela Moore", + "148.0" + ], + [ + "3", + "Charles Norman", + "162.0" + ], + [ + "5", + "Cindy Wedding", + "142.0" + ], + [ + "7", + "Joel Anderson", + "147.0" + ], + [ + "8", + "Jonathan Chadwell", + "144.0" + ], + [ + "9", + "Mendell M", + "120.0" + ], + [ + "10", + "Oli Ivanova", + "134.0" + ], + [ + "11", + "S. Byrnes", + "118.0" + ], + [ + "12", + "Tara Leigh Hylton", + "125.0" + ], + [ + "13", + "Whitney Parks", + "158.0" + ], + [ + "14", + "Zachary Young", + "126.0" + ], + [ + "15", + "Zhenzhen Xie", + "145.0" + ] + ], + "shape": { + "columns": 2, + "rows": 12 + } + }, + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Full NameDuration In Minutes
1Angela Moore148.0
3Charles Norman162.0
5Cindy Wedding142.0
7Joel Anderson147.0
8Jonathan Chadwell144.0
9Mendell M120.0
10Oli Ivanova134.0
11S. Byrnes118.0
12Tara Leigh Hylton125.0
13Whitney Parks158.0
14Zachary Young126.0
15Zhenzhen Xie145.0
\n", + "
" + ], + "text/plain": [ + " Full Name Duration In Minutes\n", + "1 Angela Moore 148.0\n", + "3 Charles Norman 162.0\n", + "5 Cindy Wedding 142.0\n", + "7 Joel Anderson 147.0\n", + "8 Jonathan Chadwell 144.0\n", + "9 Mendell M 120.0\n", + "10 Oli Ivanova 134.0\n", + "11 S. Byrnes 118.0\n", + "12 Tara Leigh Hylton 125.0\n", + "13 Whitney Parks 158.0\n", + "14 Zachary Young 126.0\n", + "15 Zhenzhen Xie 145.0" ] }, - "execution_count": 13, + "execution_count": 76, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "#merge meeting 1 with staff list to consolidate into list of staff that where present and how long they where present\n", - "staff_time = pd.merge(staff_list['Full Name'], meeting_1_cleaned[['Duration in Minutes', 'Full Name']], how= 'inner', on='Full Name')\n", - "staff_time" + "meeting_1_students = remove_staff_mentor_meeting(meeting_1_cleaned,staff_list,mentor_list)\n", + "meeting_1_students" ] }, { "cell_type": "code", - "execution_count": 14, - "id": "2bfdd92e", + "execution_count": 78, + "id": "d21aae8a", "metadata": {}, "outputs": [ { @@ -3183,206 +4162,82 @@ "type": "integer" }, { - "name": "First Name", - "rawType": "object", - "type": "string" - }, - { - "name": "Last Name", - "rawType": "object", - "type": "string" - }, - { - "name": "Email", - "rawType": "object", - "type": "string" - }, - { - "name": "Duration", + "name": "Full Name", "rawType": "object", "type": "string" }, { - "name": "Time Joined", - "rawType": "datetime64[ns]", - "type": "datetime" - }, - { - "name": "Time Exited", - "rawType": "datetime64[ns]", - "type": "datetime" - }, - { - "name": "Duration in Minutes", + "name": "Duration In Minutes", "rawType": "float64", "type": "float" - }, - { - "name": "Full Name", - "rawType": "object", - "type": "string" } ], - "ref": "1ec04c56-7868-44a7-8ad2-9258af5f4e2d", + "ref": "f52c6f68-8230-4454-bd5c-aa008981772c", "rows": [ [ "0", - "Sheyla", - "Diaz", - "dshe*****@***.com", - "1 hr 43 min", - "1900-01-01 17:58:00", - "1900-01-01 19:41:00", - "103.0", - "Sheyla Diaz" + "Sheyla Diaz", + "103.0" ], [ "1", - "James", - "Glosser", - "demo**********@***.com", - "1 hr 40 min", - "1900-01-01 18:01:00", - "1900-01-01 19:41:00", - "100.0", - "James Glosser" + "James Glosser", + "100.0" ], [ "2", - "Reed", - "Haddix", - "shir***********@***.com", - "1 hr 46 min", - "1900-01-01 17:55:00", - "1900-01-01 19:41:00", - "106.0", - "Reed Haddix" + "Reed Haddix", + "106.0" ], [ "3", - "John", - "Hankins", - "john**********@***.com", - "1 hr 38 min", - "1900-01-01 18:03:00", - "1900-01-01 19:41:00", - "98.0", - "John Hankins" + "John Hankins", + "98.0" ], [ "4", - "Stephanie", - "Jones", - "step*******************@***.com", - "1 hr 48 min", - "1900-01-01 17:53:00", - "1900-01-01 19:41:00", - "108.0", - "Stephanie Jones" + "Stephanie Jones", + "108.0" ], [ "5", - "Aaron", - "LaLiberty", - "daei**@***.com", - "1 hr 39 min", - "1900-01-01 18:02:00", - "1900-01-01 19:41:00", - "99.0", - "Aaron LaLiberty" + "Aaron Laliberty", + "99.0" ], [ "6", - "Dakota", - "McMullin", - "kota****@***.com", - "1 hr 11 min", - "1900-01-01 17:59:00", - "1900-01-01 19:11:00", - "72.0", - "Dakota McMullin" + "Dakota Mcmullin", + "72.0" ], [ "7", - "Toni-Ivy", - "Ownn.", - "toni*********@***.com", - "1 hr 40 min", - "1900-01-01 18:01:00", - "1900-01-01 19:41:00", - "100.0", - "Toni-Ivy Ownn." - ], - [ - "8", - "Michael", - "Puckett", - "mich***********@***.com", - "1 hr 37 min", - "1900-01-01 18:04:00", - "1900-01-01 19:41:00", - "97.0", - "Michael Puckett" + "Toni-Ivy Ownn.", + "100.0" ], [ "9", - "Leighton", - "Pulliam", - "l8on**@***.com", - "1 hr 40 min", - "1900-01-01 18:00:00", - "1900-01-01 19:41:00", - "101.0", - "Leighton Pulliam" - ], - [ - "10", - "ken", - "quiggins", - "kash************@***.com", - "1 hr 43 min", - "1900-01-01 17:58:00", - "1900-01-01 19:41:00", - "103.0", - "ken quiggins" + "Leighton Pulliam", + "101.0" ], [ "11", - "Amber", - "Ratliff", - "arat********@***.com", - "1 hr 40 min", - "1900-01-01 18:01:00", - "1900-01-01 19:41:00", - "100.0", - "Amber Ratliff" + "Amber Ratliff", + "100.0" ], [ "12", - "Jill", - "Schrank", - "just*******@***.com", - "1 hr 41 min", - "1900-01-01 17:59:00", - "1900-01-01 19:41:00", - "102.0", - "Jill Schrank" + "Jill Schrank", + "102.0" ], [ "13", - "Huan", - "Song", - "huan********@***.com", - "1 hr 43 min", - "1900-01-01 17:58:00", - "1900-01-01 19:41:00", - "103.0", - "Huan Song" + "Huan Song", + "103.0" ] ], "shape": { - "columns": 8, - "rows": 14 + "columns": 2, + "rows": 12 } }, "text/html": [ @@ -3404,405 +4259,113 @@ " \n", " \n", " \n", - " First Name\n", - " Last Name\n", - " Email\n", - " Duration\n", - " Time Joined\n", - " Time Exited\n", - " Duration in Minutes\n", " Full Name\n", + " Duration In Minutes\n", " \n", " \n", " \n", " \n", " 0\n", - " Sheyla\n", - " Diaz\n", - " dshe*****@***.com\n", - " 1 hr 43 min\n", - " 1900-01-01 17:58:00\n", - " 1900-01-01 19:41:00\n", - " 103.0\n", " Sheyla Diaz\n", + " 103.0\n", " \n", " \n", " 1\n", - " James\n", - " Glosser\n", - " demo**********@***.com\n", - " 1 hr 40 min\n", - " 1900-01-01 18:01:00\n", - " 1900-01-01 19:41:00\n", - " 100.0\n", " James Glosser\n", + " 100.0\n", " \n", " \n", " 2\n", - " Reed\n", - " Haddix\n", - " shir***********@***.com\n", - " 1 hr 46 min\n", - " 1900-01-01 17:55:00\n", - " 1900-01-01 19:41:00\n", - " 106.0\n", " Reed Haddix\n", + " 106.0\n", " \n", " \n", " 3\n", - " John\n", - " Hankins\n", - " john**********@***.com\n", - " 1 hr 38 min\n", - " 1900-01-01 18:03:00\n", - " 1900-01-01 19:41:00\n", - " 98.0\n", " John Hankins\n", + " 98.0\n", " \n", " \n", " 4\n", - " Stephanie\n", - " Jones\n", - " step*******************@***.com\n", - " 1 hr 48 min\n", - " 1900-01-01 17:53:00\n", - " 1900-01-01 19:41:00\n", - " 108.0\n", " Stephanie Jones\n", + " 108.0\n", " \n", " \n", " 5\n", - " Aaron\n", - " LaLiberty\n", - " daei**@***.com\n", - " 1 hr 39 min\n", - " 1900-01-01 18:02:00\n", - " 1900-01-01 19:41:00\n", + " Aaron Laliberty\n", " 99.0\n", - " Aaron LaLiberty\n", " \n", " \n", " 6\n", - " Dakota\n", - " McMullin\n", - " kota****@***.com\n", - " 1 hr 11 min\n", - " 1900-01-01 17:59:00\n", - " 1900-01-01 19:11:00\n", + " Dakota Mcmullin\n", " 72.0\n", - " Dakota McMullin\n", " \n", " \n", " 7\n", - " Toni-Ivy\n", - " Ownn.\n", - " toni*********@***.com\n", - " 1 hr 40 min\n", - " 1900-01-01 18:01:00\n", - " 1900-01-01 19:41:00\n", - " 100.0\n", " Toni-Ivy Ownn.\n", - " \n", - " \n", - " 8\n", - " Michael\n", - " Puckett\n", - " mich***********@***.com\n", - " 1 hr 37 min\n", - " 1900-01-01 18:04:00\n", - " 1900-01-01 19:41:00\n", - " 97.0\n", - " Michael Puckett\n", + " 100.0\n", " \n", " \n", " 9\n", - " Leighton\n", - " Pulliam\n", - " l8on**@***.com\n", - " 1 hr 40 min\n", - " 1900-01-01 18:00:00\n", - " 1900-01-01 19:41:00\n", - " 101.0\n", " Leighton Pulliam\n", - " \n", - " \n", - " 10\n", - " ken\n", - " quiggins\n", - " kash************@***.com\n", - " 1 hr 43 min\n", - " 1900-01-01 17:58:00\n", - " 1900-01-01 19:41:00\n", - " 103.0\n", - " ken quiggins\n", + " 101.0\n", " \n", " \n", " 11\n", - " Amber\n", - " Ratliff\n", - " arat********@***.com\n", - " 1 hr 40 min\n", - " 1900-01-01 18:01:00\n", - " 1900-01-01 19:41:00\n", - " 100.0\n", " Amber Ratliff\n", + " 100.0\n", " \n", " \n", " 12\n", - " Jill\n", - " Schrank\n", - " just*******@***.com\n", - " 1 hr 41 min\n", - " 1900-01-01 17:59:00\n", - " 1900-01-01 19:41:00\n", - " 102.0\n", " Jill Schrank\n", + " 102.0\n", " \n", " \n", " 13\n", - " Huan\n", - " Song\n", - " huan********@***.com\n", - " 1 hr 43 min\n", - " 1900-01-01 17:58:00\n", - " 1900-01-01 19:41:00\n", - " 103.0\n", " Huan Song\n", + " 103.0\n", " \n", " \n", "\n", "" ], "text/plain": [ - " First Name Last Name Email Duration \\\n", - "0 Sheyla Diaz dshe*****@***.com 1 hr 43 min \n", - "1 James Glosser demo**********@***.com 1 hr 40 min \n", - "2 Reed Haddix shir***********@***.com 1 hr 46 min \n", - "3 John Hankins john**********@***.com 1 hr 38 min \n", - "4 Stephanie Jones step*******************@***.com 1 hr 48 min \n", - "5 Aaron LaLiberty daei**@***.com 1 hr 39 min \n", - "6 Dakota McMullin kota****@***.com 1 hr 11 min \n", - "7 Toni-Ivy Ownn. toni*********@***.com 1 hr 40 min \n", - "8 Michael Puckett mich***********@***.com 1 hr 37 min \n", - "9 Leighton Pulliam l8on**@***.com 1 hr 40 min \n", - "10 ken quiggins kash************@***.com 1 hr 43 min \n", - "11 Amber Ratliff arat********@***.com 1 hr 40 min \n", - "12 Jill Schrank just*******@***.com 1 hr 41 min \n", - "13 Huan Song huan********@***.com 1 hr 43 min \n", - "\n", - " Time Joined Time Exited Duration in Minutes \\\n", - "0 1900-01-01 17:58:00 1900-01-01 19:41:00 103.0 \n", - "1 1900-01-01 18:01:00 1900-01-01 19:41:00 100.0 \n", - "2 1900-01-01 17:55:00 1900-01-01 19:41:00 106.0 \n", - "3 1900-01-01 18:03:00 1900-01-01 19:41:00 98.0 \n", - "4 1900-01-01 17:53:00 1900-01-01 19:41:00 108.0 \n", - "5 1900-01-01 18:02:00 1900-01-01 19:41:00 99.0 \n", - "6 1900-01-01 17:59:00 1900-01-01 19:11:00 72.0 \n", - "7 1900-01-01 18:01:00 1900-01-01 19:41:00 100.0 \n", - "8 1900-01-01 18:04:00 1900-01-01 19:41:00 97.0 \n", - "9 1900-01-01 18:00:00 1900-01-01 19:41:00 101.0 \n", - "10 1900-01-01 17:58:00 1900-01-01 19:41:00 103.0 \n", - "11 1900-01-01 18:01:00 1900-01-01 19:41:00 100.0 \n", - "12 1900-01-01 17:59:00 1900-01-01 19:41:00 102.0 \n", - "13 1900-01-01 17:58:00 1900-01-01 19:41:00 103.0 \n", - "\n", - " Full Name \n", - "0 Sheyla Diaz \n", - "1 James Glosser \n", - "2 Reed Haddix \n", - "3 John Hankins \n", - "4 Stephanie Jones \n", - "5 Aaron LaLiberty \n", - "6 Dakota McMullin \n", - "7 Toni-Ivy Ownn. \n", - "8 Michael Puckett \n", - "9 Leighton Pulliam \n", - "10 ken quiggins \n", - "11 Amber Ratliff \n", - "12 Jill Schrank \n", - "13 Huan Song " - ] - }, - "execution_count": 14, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "meeting_2_cleaned = clean_meeting(meeting_2)\n", - "meeting_2_cleaned" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "id": "b489007c", - "metadata": {}, - "outputs": [ - { - "data": { - "application/vnd.microsoft.datawrangler.viewer.v0+json": { - "columns": [ - { - "name": "index", - "rawType": "int64", - "type": "integer" - }, - { - "name": "Full Name", - "rawType": "object", - "type": "string" - }, - { - "name": "Duration in Minutes", - "rawType": "float64", - "type": "float" - } - ], - "ref": "8a873a00-7eaf-4171-95b5-5eae1f6200ce", - "rows": [ - [ - "0", - "Michael Puckett", - "97.0" - ] - ], - "shape": { - "columns": 2, - "rows": 1 - } - }, - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
Full NameDuration in Minutes
0Michael Puckett97.0
\n", - "
" - ], - "text/plain": [ - " Full Name Duration in Minutes\n", - "0 Michael Puckett 97.0" + " Full Name Duration In Minutes\n", + "0 Sheyla Diaz 103.0\n", + "1 James Glosser 100.0\n", + "2 Reed Haddix 106.0\n", + "3 John Hankins 98.0\n", + "4 Stephanie Jones 108.0\n", + "5 Aaron Laliberty 99.0\n", + "6 Dakota Mcmullin 72.0\n", + "7 Toni-Ivy Ownn. 100.0\n", + "9 Leighton Pulliam 101.0\n", + "11 Amber Ratliff 100.0\n", + "12 Jill Schrank 102.0\n", + "13 Huan Song 103.0" ] }, - "execution_count": 15, + "execution_count": 78, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "#merge meeting 2 with mentor list to consolidate into list of mentors that where present and how long they where present\n", - "mentor_time_df2 = pd.merge(mentor_list['Full Name'], meeting_2_cleaned[['Duration in Minutes', 'Full Name']], how= 'inner', on='Full Name')\n", - "mentor_time_df2" + "meeting_2_students = remove_staff_mentor_meeting(meeting_2_cleaned,staff_list,mentor_list)\n", + "meeting_2_students" ] }, { "cell_type": "code", - "execution_count": 16, - "id": "20023162", + "execution_count": null, + "id": "007cf203", "metadata": {}, - "outputs": [ - { - "data": { - "application/vnd.microsoft.datawrangler.viewer.v0+json": { - "columns": [ - { - "name": "index", - "rawType": "int64", - "type": "integer" - }, - { - "name": "Full Name", - "rawType": "object", - "type": "string" - }, - { - "name": "Duration in Minutes", - "rawType": "float64", - "type": "float" - } - ], - "ref": "7a386f2f-97a8-4c48-8c00-a21c52db1767", - "rows": [], - "shape": { - "columns": 2, - "rows": 0 - } - }, - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
Full NameDuration in Minutes
\n", - "
" - ], - "text/plain": [ - "Empty DataFrame\n", - "Columns: [Full Name, Duration in Minutes]\n", - "Index: []" - ] - }, - "execution_count": 16, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "#merge meeting 2 with staff list to consolidate into list of staff that where present and how long they where present\n", - "staff_time_2 = pd.merge(staff_list['Full Name'], meeting_1_cleaned[['Duration in Minutes', 'Full Name']], how= 'inner', on='Full Name')\n", - "staff_time_2" - ] + "outputs": [], + "source": [] } ], "metadata": { "kernelspec": { - "display_name": "venv (3.12.2)", + "display_name": "venv", "language": "python", "name": "python3" }, @@ -3816,7 +4379,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.12.2" + "version": "3.13.0" } }, "nbformat": 4,