From 707cc391d1b9dbccbfc1750e43813ed46261610b Mon Sep 17 00:00:00 2001
From: Euclides <Euclides@ALIEN>
Date: Sun, 10 Aug 2025 18:54:59 -0400
Subject: [PATCH] Added DocStrings to all the methods and functions

---
 cleaning_enrollments_data.py       | 100 ++++++++++++++++++++++++-----
 completion_rate_data.py            |  19 +++++-
 most_common_pathways_taken_data.py |  21 +++++-
 visualization_examples.ipynb       |  26 +++-----
 4 files changed, 129 insertions(+), 37 deletions(-)

diff --git a/cleaning_enrollments_data.py b/cleaning_enrollments_data.py
index 870303d..62fa63b 100644
--- a/cleaning_enrollments_data.py
+++ b/cleaning_enrollments_data.py
@@ -5,12 +5,31 @@ class EnrollmentsCleaning:
     def __init__(self, raw_data):
         self.raw_data = raw_data
     
-    def Drop_columns(self, df):
+    def __Drop_columns(self, df):
+        """
+            Deletes the columns not needed for the analysis, 
+            if you want to add columns to delete change the const variable 'COLUMNS_TO_DROP'.
+
+            Args:
+                df: pandas.DataFrame
+
+            Return:
+                pandas.DataFrame
+        """
         COLUMNS_TO_DROP = ['Full Name']
         result = df.drop(columns=COLUMNS_TO_DROP)
         return result
     
-    def Fix_nan_values(self, df):
+    def __Fix_nan_values(self, df):
+        """
+            Gives values to NaN.
+
+            Args:
+                df: pandas.DataFrame
+
+            Return:
+                pandas.DataFrame
+        """
         # Fix NaN values
         NAN_VALUE_SUBSTITUTE = 'NA'
         columns_to_fix = {
@@ -23,22 +42,48 @@ def Fix_nan_values(self, df):
         
         return df
     
-    def Rename_values(self, df):
-        # Fix change name Data Analitics 2 to Data Analysis 2 for consistency
+    def __Rename_values(self, df):
+        """
+            Changes values for consistency.
+
+            Args:
+                df: pandas.DataFrame
+
+            Return:
+                pandas.DataFrame
+        """
         df.loc[df['Service'] == 'Data Analytics 2', 'Service'] = 'Data Analysis 2'
         return df
     
-    def Delete_values(self, df):
-        # Delete values not needed
+    def __Delete_values(self, df):
+        """
+            Deletes values not needed, if you want to add values to delete change the const variable 'VALUES_NOT_NEEDED'.
+
+
+            Args:
+                df: pandas.DataFrame
+
+            Return:
+                pandas.DataFrame
+        """
         # 'Referral to External Service', 'Supportive Services Referral', are deleted because dont have a "Projected Start Date" 
-        values_not_needed = {
+        VALUES_NOT_NEEDED = {
             'Service': ['Software Development 1', 'Software Development 2', 'Web Development 1', 'Web Development 2', 'Data Analysis 1','Data Analysis 2', 'Referral to External Service', 'Supportive Services Referral']
         }
-        for column, value in values_not_needed.items():
+        for column, value in VALUES_NOT_NEEDED.items():
             df = df[~df[column].isin(value)]
         return df
         
-    def Set_data_types(self, df):
+    def __Set_data_types(self, df):
+        """
+            Sets data type for each column.
+
+            Args:
+                df: pandas.DataFrame
+
+            Return:
+                pandas.DataFrame
+        """
         # DataTypes
         column_datatype: dict = {'Auto Id': str, 'KY Region': str, 'Assessment ID': str, 'EnrollmentId': str,
         'Enrollment Service Name': str, 'Service': str, 'Projected Start Date': str,
@@ -50,7 +95,21 @@ def Set_data_types(self, df):
             df[column] = df[column].astype(type)
         return df
     
-    def Find_cohort(self, id: str, projected_start_date: str, cohort_to_find: str, df_to_clean: pd.DataFrame):
+    def __Find_cohort(self, id: str, projected_start_date: str, cohort_to_find: str, df_to_clean: pd.DataFrame):
+        """
+            Finds values for each NaN of 'ATP Cohort' column.
+            This function was created with the idea of using pandas.DataFrame.apply().
+
+
+            Args:
+                id: str
+                projected_start_date: str
+                cohort_to_find: str
+                df_to_clean: pandas.DataFrame
+
+            Return:
+                numpy.array
+        """
         ## Q: What to do with Service: ['Referral to External Service', 'Supportive Services Referral']
         ## TODO: Clean the NaTType before this function runs
         if pd.isna(cohort_to_find):
@@ -75,11 +134,20 @@ def Find_cohort(self, id: str, projected_start_date: str, cohort_to_find: str, d
             return np.datetime64(cohort_to_find)
 
     def Get_clean_data(self):
+        """
+            Cleans the raw data.
+
+            Args:
+                df: pandas.DataFrame
+
+            Return:
+                pandas.DataFrame
+        """
         df = self.raw_data
-        df = self.Drop_columns(df)
-        df = self.Fix_nan_values(df)
-        df = self.Rename_values(df)
-        df = self.Delete_values(df)
-        df = self.Set_data_types(df)
-        df['ATP Cohort'] = df.apply(lambda row: self.Find_cohort(row['Auto Id'], row['Projected Start Date'], row['ATP Cohort'], df), axis=1)
+        df = self.__Drop_columns(df)
+        df = self.__Fix_nan_values(df)
+        df = self.__Rename_values(df)
+        df = self.__Delete_values(df)
+        df = self.__Set_data_types(df)
+        df['ATP Cohort'] = df.apply(lambda row: self.__Find_cohort(row['Auto Id'], row['Projected Start Date'], row['ATP Cohort'], df), axis=1)
         return df
\ No newline at end of file
diff --git a/completion_rate_data.py b/completion_rate_data.py
index 48597d9..aeca043 100644
--- a/completion_rate_data.py
+++ b/completion_rate_data.py
@@ -28,8 +28,15 @@ def __init__(self, data):
 
         # Not the best Pandas way to do it:
     def Get_completion_percentages(self, cohort: str = 'All cohorts') -> pd.DataFrame:
-        
+        """
+            Creates a pandas.Datafreme that contains the % of completion of each pathway.
+
+            Args:
+                cohort: str
 
+            Return:
+                pandas.DataFrame
+        """
         if cohort == 'All cohorts':
             data = self.data
         else:
@@ -45,8 +52,16 @@ def Get_completion_percentages(self, cohort: str = 'All cohorts') -> pd.DataFram
 
         result_df['Pathway'] = result_df['Module'].apply(lambda x: x[:x.rfind(' ')]) # intended to be able to sort by pathway
         return result_df
-    # TODO: Add test
 
     def Get_pathways_name(self, df: pd.DataFrame) -> list:
+        """
+            List of all the pathways in a pandas.DataFrame generated by self.Get_completion_percentages().
+
+            Args:
+                df: pandas.DataFrame
+
+            Return:
+                list
+        """
         return list(df['Pathway'].unique())
 
diff --git a/most_common_pathways_taken_data.py b/most_common_pathways_taken_data.py
index 845c59f..f43bb70 100644
--- a/most_common_pathways_taken_data.py
+++ b/most_common_pathways_taken_data.py
@@ -14,7 +14,7 @@ def __init__(self, data):
 
     def Get_starting_pathways(self): 
         """
-            Returns a pandas.DataFrame were all the services are the biginning paths
+            Returns a pandas.DataFrame were all the services are the biginning pathways.
 
             Args: 
                 df: pandas.DataFrame
@@ -26,12 +26,31 @@ def Get_starting_pathways(self):
         return self.data[mask_starter_pathways]
 
     def Get_cohorts_list(self):
+        """
+            List of cohorts from starting pathways.
+
+            Args: 
+                df: pandas.DataFrame
+
+            Return:
+                list
+        """
         df = self.starter_only_df
         cohorts = list(pd.to_datetime(df['ATP Cohort'][df['ATP Cohort'] != 'NA']).sort_values(ascending=True).astype(str).unique())
         cohorts.insert(0, 'All cohorts')
         return cohorts
 
     def Get_data_by_cohort(self, cohort: str = 'All cohorts') -> pd.DataFrame:
+        """
+            Returns a pandas.DataFrame for a specific cohort or all cohorts.
+
+            Args: 
+                df: pandas.DataFrame
+                cohort: str
+
+            Return:
+                pandas.DataFrame
+        """
         df = self.starter_only_df
         if cohort == 'All cohorts':
             result = df.value_counts('Service').reset_index()
diff --git a/visualization_examples.ipynb b/visualization_examples.ipynb
index 8fa5ef8..c40b6af 100644
--- a/visualization_examples.ipynb
+++ b/visualization_examples.ipynb
@@ -20,17 +20,16 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 11,
    "id": "47cd23cd",
    "metadata": {},
    "outputs": [],
    "source": [
     "import pandas as pd\n",
     "import plotly.express as px\n",
-    "import plotly.graph_objects as go\n",
     "from dash import Dash, dcc, html, Input, Output\n",
     "from most_common_pathways_taken_data import Most_common_pathways_taken_data\n",
-    "from compleation_rate_data import Compleation_rate_data\n",
+    "from completion_rate_data import Completion_rate_data\n",
     "from cleaning_enrollments_data import EnrollmentsCleaning"
    ]
   },
@@ -46,7 +45,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 12,
    "id": "ba57e157",
    "metadata": {},
    "outputs": [],
@@ -64,7 +63,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 13,
    "id": "fa1b6e02",
    "metadata": {},
    "outputs": [
@@ -83,7 +82,7 @@
        "        "
       ],
       "text/plain": [
-       "<IPython.lib.display.IFrame at 0x15cfa21ba70>"
+       "<IPython.lib.display.IFrame at 0x15cfc4d0e30>"
       ]
      },
      "metadata": {},
@@ -95,7 +94,6 @@
     "    app = Dash(__name__)\n",
     "    # Const\n",
     "    data_class = Most_common_pathways_taken_data(cleaner.Get_clean_data())\n",
-    "    starter_only_enrollments = data_class.Get_starting_pathways() # This function should be able to comunicate with the data without argument\n",
     "\n",
     "    dropdown_options = data_class.Get_cohorts_list()\n",
     "    pathway_color = {\n",
@@ -149,7 +147,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": null,
    "id": "c0b7d44e",
    "metadata": {},
    "outputs": [
@@ -168,7 +166,7 @@
        "        "
       ],
       "text/plain": [
-       "<IPython.lib.display.IFrame at 0x15cf9e13c50>"
+       "<IPython.lib.display.IFrame at 0x15cf73437d0>"
       ]
      },
      "metadata": {},
@@ -179,18 +177,10 @@
     "def Dash_completion_rates_by_path() -> Dash: # TODO: fix data structure so visualization doesn't use df\n",
     "    app2 = Dash(__name__)\n",
     "    # Const\n",
-    "    data_class = Compleation_rate_data(cleaner.Get_clean_data())\n",
+    "    data_class = Completion_rate_data(cleaner.Get_clean_data())\n",
     "    completion_df = data_class.Get_completion_percentages()\n",
     "    options = data_class.Get_pathways_name(completion_df)\n",
     "\n",
-    "    pathway_color = {\n",
-    "        'Software Development': 'green', \n",
-    "        'Web Development': 'blue', \n",
-    "        'Data Analysis': 'red',\n",
-    "        'Quality Assurance': 'yellow', \n",
-    "        'User Experience': 'purple'\n",
-    "    }\n",
-    "\n",
     "    # Display\n",
     "    app2.layout = html.Div([\n",
     "        html.H2('Pathways Completion', style={'text-align': \"center\"}),\n",