From 0d089285a29291ae433bffa15f02bbd345a3e7e5 Mon Sep 17 00:00:00 2001
From: JayTicku <147661917+JayTicku@users.noreply.github.com>
Date: Tue, 18 Jun 2024 21:25:41 -0500
Subject: [PATCH 01/15] Fixed error that resulted in benchmark score being
 calculated incorrectly.

---
 FSRLearning/feature_selector.py | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/FSRLearning/feature_selector.py b/FSRLearning/feature_selector.py
index 363a9ce..eb3220a 100644
--- a/FSRLearning/feature_selector.py
+++ b/FSRLearning/feature_selector.py
@@ -13,7 +13,6 @@
 from sklearn.ensemble import RandomForestClassifier
 from sklearn.model_selection import cross_val_score, cross_validate
 
-
 class FeatureSelectorRL:
     """
         This is the class used to create a feature selector with the RL method
@@ -228,18 +227,16 @@ def compare_with_benchmark(self, X, y, results) -> list:
         for i in range(1, self.feature_number):
             # From RL
             clf = RandomForestClassifier(n_jobs=-1)
-            df = pd.concat([X.iloc[:, results[-1][i:]], y], axis=1)
-            df = df.drop_duplicates(ignore_index=True)
 
-            min_samples = np.min(np.array(df.iloc[:, -1].value_counts()))
+            min_samples = np.min(np.array(y.value_counts()))
             if min_samples < 5 and min_samples >= 2:
                 accuracy: float = np.mean(
-                    cross_val_score(clf, df.iloc[:, :-1], df.iloc[:, -1], cv=min_samples, scoring='balanced_accuracy'))
+                    cross_val_score(clf, X.iloc[:, results[-1][i:]], y, cv=min_samples, scoring='balanced_accuracy'))
             elif min_samples < 2:
                 accuracy: float = 0
             else:
                 accuracy: float = np.mean(
-                    cross_val_score(clf, df.iloc[:, :-1], df.iloc[:, -1], cv=5, scoring='balanced_accuracy'))
+                    cross_val_score(clf, X.iloc[:, results[-1][i:]], y, cv=5, scoring='balanced_accuracy'))
 
             # Benchmark
             estimator = RandomForestClassifier(n_jobs=-1)

From 8caab7ab97aef7a35fc748e468ec941bb8943cb5 Mon Sep 17 00:00:00 2001
From: JayTicku <147661917+JayTicku@users.noreply.github.com>
Date: Wed, 19 Jun 2024 01:40:35 -0500
Subject: [PATCH 02/15] Fixed vulnerability.

---
 FSRLearning/state.py | 75 +++++++++++++++++++++++++++-----------------
 1 file changed, 47 insertions(+), 28 deletions(-)

diff --git a/FSRLearning/state.py b/FSRLearning/state.py
index 7da9591..be37a99 100644
--- a/FSRLearning/state.py
+++ b/FSRLearning/state.py
@@ -7,14 +7,15 @@
 from sklearn.base import is_classifier, is_regressor
 
 class State:
-    '''
+    """
         State object
 
         number: position in the dictionary of the graph
         description: represents the set of feature in the set
         v_value: V value of the state
         nb_visited: number of times that the set has been visited
-    '''
+    """
+    
     def __init__(self, 
                  number: list,
                  description: list,
@@ -27,13 +28,14 @@ def __init__(self,
         self.reward = reward
         self.nb_visited = nb_visited
 
-    def get_reward(self, clf, X, y) -> float:
-        '''
+    def get_reward(self, estimator, X, y) -> float:
+        """
             Returns the reward of a set of variable
 
-            clf: type of the classifier with which we want to evaluate the data
-        '''
-        # Train classifier with state_t variable and state t+1 variables and compute the diff of the accuracy
+            estimator: type of estimator with which we want to evaluate the data
+        """
+        
+        # Train estimator with state_t variable and state t+1 variables and compute the diff of the accuracy
         if self.reward == 0:
             if self.description == []:
                 self.reward = 0
@@ -43,31 +45,43 @@ def get_reward(self, clf, X, y) -> float:
                 df = pd.concat([X.iloc[:, self.description], y], axis = 1)
                 df = df.drop_duplicates(ignore_index = True) 
 
-                if is_classifier(clf):
-                    min_samples = np.min(np.array(df.iloc[:, -1].value_counts()))
-                    if min_samples < 5 and min_samples >= 2:
-                        accuracy: float = np.mean(cross_val_score(clf, df.iloc[:, :-1], df.iloc[:, -1], cv = min_samples, scoring = 'balanced_accuracy'))
-                    elif min_samples < 2:
-                        accuracy: float = 0
+                if is_classifier(estimator):
+                    min_samples = df.iloc[:, -1].value_counts().min()
+                    if min_samples >= 5:
+                        accuracy = np.mean(cross_val_score(estimator, df.iloc[:, :-1], df.iloc[:, -1], cv = 5, scoring = 'balanced_accuracy'))
+                    elif min_samples < 5 and min_samples >= 2:
+                        accuracy = np.mean(cross_val_score(estimator, df.iloc[:, :-1], df.iloc[:, -1], cv = min_samples, scoring = 'balanced_accuracy'))
+                    else:
+                        accuracy = 0
+                elif is_regressor(estimator):
+                    num_samples = len(df)
+                    if num_samples >= 10:
+                        accuracy = np.mean(cross_val_score(estimator, df.iloc[:, :-1], df.iloc[:, -1], cv = 5, scoring = 'r2'))
+                    elif num_samples < 10 and num_samples >= 4:
+                        accuracy = np.mean(cross_val_score(estimator, df.iloc[:, :-1], df.iloc[:, -1], cv = num_samples // 2, scoring = 'r2'))
                     else:
-                        accuracy: float = np.mean(cross_val_score(clf, df.iloc[:, :-1], df.iloc[:, -1], cv = 5, scoring = 'balanced_accuracy'))
-                elif is_regressor(clf):
-                    accuracy: float = np.mean(cross_val_score(clf, df.iloc[:, :-1], df.iloc[:, -1], cv = 5, scoring = 'r2'))
+                        accuracy = 0
+                else:
+                    raise TypeError("The provided estimator is neither a classifier nor a regressor. Please make sure to pass a classifier or regressor to the method.")
 
+                if np.isnan(accuracy):
+                    accuracy = 0
+                    
                 self.reward = accuracy
                 return self.reward
         else:
             return self.reward
 
     def select_action(self, feature_structure: dict, eps: float, aorf_histo: list, is_empty_state: bool):
-        ''' 
+        """
             Returns an action object
 
             feature_structure: current dictionnary of the structure of the graph
             eps: probability of choosing a random action [between 0 and 1]
 
             This method enables to train only once a model and get the accuracy
-        '''
+        """
+        
         #We get the neighboors
         get_neigh: list = self.get_neighboors(feature_structure, [i for i in range(0, len(aorf_histo[0]))])
 
@@ -93,12 +107,13 @@ def select_action(self, feature_structure: dict, eps: float, aorf_histo: list, i
             return Action(self, next_state), next_state, False
         
     def get_argmax(self, get_neigh: list, aorf_histo):
-        '''
+        """
             Returns the argmax of the list of neighbors 
 
             get_neigh: list of the neighbors of the self state
             aorf_histo: value of the aor
-        '''
+        """
+        
         #We select a state where the possible next feature has the maximum AORf
         possible_feature: list = [list(set(neigh.description) - set(self.description))[0] for neigh in get_neigh]
     
@@ -117,12 +132,13 @@ def get_argmax(self, get_neigh: list, aorf_histo):
         return next_state
 
     def get_neighboors(self, feature_structure: dict, feature_list: list) -> list:
-        '''
+        """
             Returns the list of the neighboors of the current state
 
             feature_structure: current dictionnary of the structure of the graph
             feature_list: list of the int identifiers of the features in the data set (len = number of features in the datas set)
-        '''
+        """
+        
         neigh_depth_graph: int = self.number[0] + 1
 
         if neigh_depth_graph in feature_structure: 
@@ -148,7 +164,7 @@ def get_neighboors(self, feature_structure: dict, feature_list: list) -> list:
             return possible_neigh
 
     def update_v_value(self, alpha: float, gamma: float, next_state) -> float:
-        '''
+        """
             Update the v_value of a state
 
             Alpha [0; 1] : rate of updates
@@ -156,30 +172,33 @@ def update_v_value(self, alpha: float, gamma: float, next_state) -> float:
             next_state: the next state that has been chosen by the eps_greedy algorithm
 
             Returns a float number
-        '''
+        """
+        
         self.v_value += alpha * ((next_state.reward - self.reward) + gamma * next_state.v_value - self.v_value)   
         
     def is_final(self, nb_of_features: int) -> bool:
-        '''
+        """
             Check if a state is a final state (with all the features in the state)
 
             nb_of_features: number of features in the data set 
 
             Returns True if all the possible features are in the state
-        '''
+        """
+        
         if len(self.description) == nb_of_features:
             return True
         else:
             return False
 
     def is_equal(self, compared_state) -> bool:
-        '''
+        """
             Compare if two State objects are equal
 
             compared_state: state to be compared with the self state
 
             Returns True if yes else returns False
-        '''
+        """
+        
         if set(self.description) == set(compared_state.description):
             return True
         else:

From 8d0cdc27343fcde4730b8bc78b7033bae87f7e5d Mon Sep 17 00:00:00 2001
From: JayTicku <147661917+JayTicku@users.noreply.github.com>
Date: Wed, 19 Jun 2024 01:41:27 -0500
Subject: [PATCH 03/15] Update fsrlearning.py

---
 FSRLearning/fsrlearning.py | 24 +++++++++++++-----------
 1 file changed, 13 insertions(+), 11 deletions(-)

diff --git a/FSRLearning/fsrlearning.py b/FSRLearning/fsrlearning.py
index ba99898..4fd270e 100644
--- a/FSRLearning/fsrlearning.py
+++ b/FSRLearning/fsrlearning.py
@@ -1,16 +1,15 @@
 from .state import State
 import numpy as np
 
-
 class FeatureSelectionProcess:
-    '''
+    """
         Init aor list such that aor = [[np.zeros(nb_of_features)], [np.zeros(nb_of_features)]]
 
         nb_of_features: Number of feature in the data set
         eps: probability of choosing a random action (uniform or softmax)
         alpha: 
         gamma: 
-    '''
+    """
 
     def __init__(self,
                  nb_of_features: int,
@@ -31,11 +30,12 @@ def __init__(self,
         self.feature_structure = feature_structure
 
     def pick_random_state(self) -> State:
-        '''
+        """
             Select a random state in all the possible state space
             
             Return a state randomly picked
-        '''
+        """
+        
         #Check if the dict is empty
         if bool(self.feature_structure) == True:
             random_depth: int = np.random.choice(list(self.feature_structure.keys()))
@@ -46,11 +46,12 @@ def pick_random_state(self) -> State:
             return self.start_from_empty_set()
 
     def start_from_empty_set(self) -> State:
-        '''
+        """
             Start from the empty set (with no feature selected)
             
             Returns the empty initial state
-        '''
+        """
+        
         depth = 0
         if not bool(self.feature_structure):
             return State([0, 0], [], 0, 0.75), True
@@ -58,11 +59,12 @@ def start_from_empty_set(self) -> State:
             return self.feature_structure[depth][0], True
 
     def add_to_historic(self, visited_state: State):
-        '''
+        """
             Add to the feature structure historic function
 
             visited_state: current state visited by the simulation
-        '''
+        """
+        
         state_depth: int = visited_state.number[0]
 
         #We increment the number of visit of the current state
@@ -93,14 +95,14 @@ def add_to_historic(self, visited_state: State):
             self.feature_structure[state_depth] = [visited_state]
 
     def get_final_aor_sorted(self) -> list:
-        '''
+        """
             Returns the aor table sorted by ascending
 
             Index of the feature
             Number of time the feature has been played
             Value of the feature
             Best feature (from the lowest to the biggest)
-        '''
+        """
 
         index: list = [i for i in range(self.nb_of_features)]
         nb_played: list = self.aor[0]

From 0c156c457290fdfa6ee13d4b69040476b8310e63 Mon Sep 17 00:00:00 2001
From: JayTicku <147661917+JayTicku@users.noreply.github.com>
Date: Wed, 19 Jun 2024 01:41:53 -0500
Subject: [PATCH 04/15] Update action.py

---
 FSRLearning/action.py | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/FSRLearning/action.py b/FSRLearning/action.py
index d656c6b..524e88e 100644
--- a/FSRLearning/action.py
+++ b/FSRLearning/action.py
@@ -1,7 +1,8 @@
 class Action:
-    '''
+    """
         Action Object
-    '''
+    """
+    
     def __init__(self,
                  state_t,
                  state_next) -> None:
@@ -9,13 +10,14 @@ def __init__(self,
         self.state_next = state_next            
 
     def get_aorf(self, aor_historic: list) -> list:
-        '''
+        """
         Update the ARO of a feature
 
         aor_historic: get the not updated aor table
 
         Return the AOR table
-        '''
+        """
+        
         # Get the feature played and information about it
         chosen_feature: int = list(set(self.state_next.description)-set(self.state_t.description))[0]
 
@@ -23,9 +25,8 @@ def get_aorf(self, aor_historic: list) -> list:
         aorf_value: float = aor_historic[1][chosen_feature]
 
         aor_new = aor_historic.copy()
-
         # Update the value
         aor_new[0][chosen_feature] = nb_played
         aor_new[1][chosen_feature] = ((nb_played-1) * aorf_value + self.state_t.v_value) / nb_played
 
-        return aor_new
\ No newline at end of file
+        return aor_new

From 610cbae7c965664d48719a1c45f0588fdf5b34ad Mon Sep 17 00:00:00 2001
From: JayTicku <147661917+JayTicku@users.noreply.github.com>
Date: Wed, 19 Jun 2024 01:57:02 -0500
Subject: [PATCH 05/15] Updated documentation and fixed errors from removing
 duplicates.

---
 FSRLearning/feature_selector.py | 203 +++++++++++++++++++++-----------
 1 file changed, 131 insertions(+), 72 deletions(-)

diff --git a/FSRLearning/feature_selector.py b/FSRLearning/feature_selector.py
index eb3220a..d12f54f 100644
--- a/FSRLearning/feature_selector.py
+++ b/FSRLearning/feature_selector.py
@@ -12,62 +12,73 @@
 from sklearn.feature_selection import RFE
 from sklearn.ensemble import RandomForestClassifier
 from sklearn.model_selection import cross_val_score, cross_validate
+from sklearn.base import is_classifier, is_regressor
 
 class FeatureSelectorRL:
-    """
-        This is the class used to create a feature selector with the RL method
-
-        fit enable to get the results structured as follows:
-            [
-                Feature index : list,
-                Number of times a feature has been played: list
-                AOR value per feature: list,
-                Sorted list of feature from the less to the most important for the model: list
-            ]
-
-        Parameters explanation:
-            [
-                Alpha [0; 1] : rate of updates
-                Gamma [0; 1] : discount factor to moderate the effect of observing the next state (0=shortsighted; 1=farsighted)
-                Starting_state : string empty of random --> if empty then the starting state is empty elif random we start from a random state
-            ]
-    """
-
-    def __init__(self,
-                 feature_number: int,
-                 nb_explored: list = None,
-                 nb_not_explored: list = None,
-                 feature_structure: dict = None,
-                 aor: list = None,
-                 eps: float = .1,
-                 alpha: float = .5,
-                 gamma: float = .70,
-                 nb_iter: int = 100,
-                 explored: int = 0,
-                 not_explored: int = 0,
-                 starting_state: str = 'empty'):
+    """Class for feature selector using the RL method."""
+        
+    def __init__(self, feature_number: int, eps: float = .1, alpha: float = .5, gamma: float = .70, nb_iter: int = 100, 
+                 starting_state: str = 'empty', nb_explored: list = None, nb_not_explored: list = None, 
+                 feature_structure: dict = None, aor: list = None, explored: int = 0, not_explored: int = 0):
+        """
+        Constructor for FeatureSelectorRL.
+
+        Parameters
+        ----------
+        feature_number : integer
+            Number of features.
+        eps : float [0, 1], default = 0.1
+            Probability of choosing a random next state. 0 is an only greedy algorithm and 1 is an only random algorithm.
+        alpha : float [0, 1], default = 0.5
+            Controls the rate of updates. 0 is a very not updating state and 1 is a very updating state.
+        gamma : float [0, 1], default = 0.7
+            Discount factor to moderate the effect of observing the next state. 0 exhibits shortsighted behavior and 1 exhibits farsighted behavior.
+        nb_iter : integer, default = 100
+            Number of sequences to go through the graph.
+        starting_state : {"empty", "random"}, default = "empty"
+            Starting state of the algorithm. 
+
+            If "empty", the algorithm starts from an empty state.
+            If "random", the algorithm starts from a random state in the graph.
+        """
+                     
         self.feature_number = feature_number
-        self.nb_explored = nb_explored
-        self.nb_not_explored = nb_not_explored
-        self.feature_structure = feature_structure
-        self.aor = aor
         self.eps = eps
         self.alpha = alpha
         self.gamma = gamma
         self.nb_iter = nb_iter
+        self.starting_state = starting_state
+        self.nb_explored = nb_explored
+        self.nb_not_explored = nb_not_explored
+        self.feature_structure = feature_structure
+        self.aor = aor
         self.explored = explored
         self.not_explored = not_explored
-        self.starting_state = starting_state
-
-    def fit_predict(self, X, y, clf=RandomForestClassifier(n_jobs=-1)) -> tuple[list, int]:
-        """
-            Get the sorted weighted variables
 
-            Input :
-            [
-                X, y : input data
-                clf : classifier used for reward evaluation
-            ]
+    def fit_predict(self, X, y, estimator):
+        """        
+        Fit the FeatureSelectorRL algorithm according to the given data.
+
+        Parameters
+        ----------
+        X : {array-like, sparse matrix} of shape (n_samples, n_features)
+            Data vector, where `n_samples` is the number of samples and `n_features` is the number of features.
+        y : array-like of shape (n_samples,)
+            Target values.
+        estimator : Classifier or Regressor Estimator 
+            A supervised learning estimator with a ``fit`` method. Used for reward evaluation.
+
+        Returns
+        ----------
+        results : tuple
+            Output of the selection process (2-object tuple):
+                List:
+                    Index of the features that have been sorted.
+                    Number of times that each feature has been chosen.
+                    Mean reward brought by each feature.
+                    Ranking of the features from the less important to the most important.
+                Integer:
+                    Number of states visited.
         """
 
         # Init the process
@@ -127,7 +138,7 @@ def fit_predict(self, X, y, clf=RandomForestClassifier(n_jobs=-1)) -> tuple[list
                     self.not_explored += 1
 
                 # We evaluate the reward of the next_state
-                next_state.get_reward(clf, X, y)
+                next_state.get_reward(estimator, X, y)
 
                 # We update the v_value of the current_state
                 current_state.update_v_value(.99, .99, next_state)
@@ -168,8 +179,9 @@ def fit_predict(self, X, y, clf=RandomForestClassifier(n_jobs=-1)) -> tuple[list
 
     def get_plot_ratio_exploration(self):
         """
-            Plots the graph of the evolution of the already and newly visited states
+        Plot a graph comparing the number of already visited nodes and visited nodes.
         """
+        
         plt.plot([i for i in range(len(self.nb_not_explored))], self.nb_not_explored, label='Already explored State')
         plt.plot([i for i in range(len(self.nb_explored))], self.nb_explored, label='Explored State')
         plt.xlabel('Number of iterations')
@@ -178,10 +190,16 @@ def get_plot_ratio_exploration(self):
 
         plt.show()
 
-    def get_feature_strengh(self, results):
+    def get_feature_strength(self, results):
         """
-            Plots the graph of the relative strengh of each variable
+        Plot a graph of the relative impact of each feature on the model.
+
+        Parameter
+        ----------
+        results : 2-object tuple
+            Results returned from fit_predict.
         """
+        
         #Relative strengh of the variable
         plt.bar(x=results[0][0], height=results[0][2], color=['blue' if rew >= 0 else 'red' for rew in results[0][2]])
         plt.xlabel('Feature\'s name')
@@ -191,8 +209,9 @@ def get_feature_strengh(self, results):
 
     def get_depth_of_visited_states(self):
         """
-            Plot the evolution of the size of the visited states in function of the iterations
+        Plot a graph of the number of times that a state of a certain size has been visited.
         """
+        
         sum_depth = []
         for key in self.feature_structure:
             #Browse every state with one size in the graph
@@ -207,12 +226,26 @@ def get_depth_of_visited_states(self):
         plt.ylabel('Number of visits')
         plt.plot()
 
-    def compare_with_benchmark(self, X, y, results) -> list:
+    def compare_with_benchmark(self, X, y, results, estimator):
         """
-            Returns all the metrics at each iteration on the set of feature
-
-            Plots the graph of these evolutions
+        Compare the performance of FeatureSelectorRL with RFE from Sickit-Learn.
+        Return balanced accuracy score for classifiers and r2 score for regressors at each iteration 
+        and plot the graph of these evolutions.
+        
+        Parameters
+        ----------
+        X : {array-like, sparse matrix} of shape (n_samples, n_features)
+            Data vector, where `n_samples` is the number of samples and `n_features` is the number of features.
+            Same as the X passed to fit_predict.
+        y : array-like of shape (n_samples,)
+            Target values. Same as the y passed to fit_predict.
+        results : 2-object tuple
+            Results returned from fit_predict.
+        estimator : Classifier or Regressor Estimator
+            A supervised learning estimator with a ``fit`` method that provides information about feature importance 
+            (e.g. `coef_`, `feature_importances_`).
         """
+        
         is_better_list: list = []
         avg_benchmark_acccuracy: list = []
         avg_rl_acccuracy: list = []
@@ -226,23 +259,40 @@ def compare_with_benchmark(self, X, y, results) -> list:
         print('---------- Score ----------')
         for i in range(1, self.feature_number):
             # From RL
-            clf = RandomForestClassifier(n_jobs=-1)
-
-            min_samples = np.min(np.array(y.value_counts()))
-            if min_samples < 5 and min_samples >= 2:
-                accuracy: float = np.mean(
-                    cross_val_score(clf, X.iloc[:, results[-1][i:]], y, cv=min_samples, scoring='balanced_accuracy'))
-            elif min_samples < 2:
-                accuracy: float = 0
+            if is_classifier(estimator):
+                min_samples = y.value_counts().min()
+                if min_samples >= 5:
+                    accuracy = np.mean(cross_val_score(estimator, X.iloc[:, results[-1][i:]], y, cv = 5, scoring = 'balanced_accuracy'))
+                elif min_samples < 5 and min_samples >= 2:
+                    accuracy = np.mean(cross_val_score(estimator, X.iloc[:, results[-1][i:]], y, cv = min_samples, scoring = 'balanced_accuracy'))
+                else:
+                    accuracy = 0
+            elif is_regressor(estimator):
+                num_samples = len(y)
+                if num_samples >= 10:
+                    accuracy = np.mean(cross_val_score(estimator, X.iloc[:, results[-1][i:]], y, cv = 5, scoring = 'r2'))
+                elif num_samples < 10 and num_samples >= 4:
+                    accuracy = np.mean(cross_val_score(estimator, X.iloc[:, results[-1][i:]], y, cv = num_samples // 2, scoring = 'r2'))
+                else:
+                    accuracy = 0
             else:
-                accuracy: float = np.mean(
-                    cross_val_score(clf, X.iloc[:, results[-1][i:]], y, cv=5, scoring='balanced_accuracy'))
-
+                raise TypeError("The provided estimator is neither a classifier nor a regressor. Please make sure to pass a classifier or regressor to the method.")
+            
+            if np.isnan(accuracy):
+                accuracy = 0
+            
             # Benchmark
-            estimator = RandomForestClassifier(n_jobs=-1)
             selector = RFE(estimator, n_features_to_select=len(results[-1]) - i, step=1)
-            cv_results = cross_validate(selector, X, y, cv=5, scoring='balanced_accuracy', return_estimator=True)
+            if is_classifier(estimator):
+                cv_results = cross_validate(selector, X, y, cv=5, scoring='balanced_accuracy', return_estimator=True)
+            elif is_regressor(estimator):
+                cv_results = cross_validate(selector, X, y, cv=5, scoring='r2', return_estimator=True)
+            else:
+                raise TypeError("The provided estimator is neither a classifier nor a regressor. Please make sure to pass a classifier or regressor to the method.")
+            
             sele_acc = np.mean(cv_results['test_score'])
+            if np.isnan(sele_acc):
+                sele_acc = 0
 
             if accuracy >= sele_acc:
                 is_better_list.append(1)
@@ -291,11 +341,20 @@ def compare_with_benchmark(self, X, y, results) -> list:
 
         return is_better_list
 
-    def get_best_state(self) -> tuple[list[State | float], list[State | float]]:
+    def get_best_state(self):
         """
-            Returns the optimal state
-
-            Returns : Tuple(Best_rewarded_state, Best_feature_set)
+        Return the optimal state.
+        
+        Returns
+        ----------
+        state : tuple
+            2-object tuple:
+                List:
+                    Best state reward.
+                    Best reward. 
+                List:
+                    Best state v value.
+                    Best v value.
         """
 
         best_v_value: float = 0

From 80a778f57c402246cdf87232f1c1ff3ab011d32a Mon Sep 17 00:00:00 2001
From: JayTicku <147661917+JayTicku@users.noreply.github.com>
Date: Wed, 19 Jun 2024 02:07:22 -0500
Subject: [PATCH 06/15] Update README.md

---
 README.md | 40 ++++++++++++++++++----------------------
 1 file changed, 18 insertions(+), 22 deletions(-)

diff --git a/README.md b/README.md
index e74a5b5..380a466 100644
--- a/README.md
+++ b/README.md
@@ -1,9 +1,9 @@
-# FSRLeaning - Python Library
+# FSRLearning - Python Library
 
 [![Downloads](https://static.pepy.tech/badge/FSRLearning)](https://pepy.tech/project/FSRLearning)
 [![Downloads](https://static.pepy.tech/badge/FSRLearning/month)](https://pepy.tech/project/FSRLearning)
 
-FSRLeaning is a Python library for feature selection using reinforcement learning. It's designed to be easy to use and efficient, particularly for selecting the most relevant features from a very large set.
+FSRLearning is a Python library for feature selection using reinforcement learning. It's designed to be easy to use and efficient, particularly for selecting the most relevant features from a very large set.
 
 ## Installation
 
@@ -31,52 +31,49 @@ import pandas as pd
 australian_data = pd.read_csv('australian_data.csv', header=None)
 
 # Get the dataset with the features
-X = australian_data.drop(14, axis=1)
+X = australian_data.drop(australian_data.columns[14], axis=1)
 
 # Get the dataset with the label values
-y = australian_data[14]
+y = australian_data[australian_data.columns[14]]
 ```
 
 After this step we can simply run a feature selection and ranking process that maximises a metric. 
 
 ```python
 from FSRLearning import FeatureSelectorRL
+from sklearn.ensemble import RandomForestClassifier
 
 # Create the object of feature selection with RL
-fsrl_obj = FeatureSelectorRL(14, nb_iter=200)
+fsrl_obj = FeatureSelectorRL(14, nb_iter = 200)
 
 # Returns the results of the selection and the ranking
-results = fsrl_obj.fit_predict(X, y)
+results = fsrl_obj.fit_predict(X, y, RandomForestClassifier(n_jobs = -1))
 results
 ```
 
-The feature_Selector_RL has several parameters that can be tuned. Here is all of them and the values that they can take.
-
+FeatureSelectorRL has several parameters that can be tuned:
 - feature_number (integer) : number of features in the DataFrame X
-
-- feature_structure (dictionary, optional) : dictionary for the graph implementation
 - eps (float [0; 1], optional) : probability of choosing a random next state, 0 is an only greedy algorithm and 1 only random
 - alpha (float [0; 1], optional): control the rate of updates, 0 is a very not updating state and 1 a very updated
 - gamma (float [0, 1], optional): factor of moderation of the observation of the next state, 0 is a shortsighted condition and 1 it exhibits farsighted behavior
 - nb_iter (int, optional): number of sequences to go through the graph
 - starting_state ("empty" or "random", optional) : if "empty" the algorithm starts from the empty state and if "random" the algorithm starts from a random state in the graph 
 
-The output of the selection process is a 5-tuple object.
-
-- Index of the features that have been sorted
-
-- Number of times that each feature has been chosen
-- Mean reward brought by each feature
-- Ranking of the features from the less important to the most important
-- Number of states visited
-
+The output of fit_predict is a 2-object tuple:
+- List:
+  - Index of the features that have been sorted.
+  - Number of times that each feature has been chosen.
+  - Mean reward brought by each feature.
+  - Ranking of the features from the less important to the most important.
+- Integer:
+  - Number of states visited.
 
 ## Existing methods
 
 - Compare the performance of the FSRLearning library with RFE from Sickit-Learn :
 
 ```python
-fsrl_obj.compare_with_benchmark(X, y, results)
+fsrl_obj.compare_with_benchmark(X, y, results, RandomForestClassifier(n_jobs = -1))
 ```
 Returns some comparisons and plot a graph with the metric for each set of features selected. It is useful for parameters tuning. 
 
@@ -90,7 +87,7 @@ Returns a plot. It is useful to get an overview of how the graph is browse and t
 - Get an overview of the relative impact of each feature on the model :
 
 ```python
-fsrl_obj.get_feature_strengh(results)
+fsrl_obj.get_feature_strength(results)
 ```
 
 Returns a bar plot.
@@ -114,4 +111,3 @@ Returns a plot. It is useful to see how deep the Markovian Decision Process goes
 This library has been implemented with the help of these two articles :
 - Sali Rasoul, Sodiq Adewole and Alphonse Akakpo, FEATURE SELECTION USING REINFORCEMENT LEARNING (2021)
 - Seyed Mehdin Hazrati Fard, Ali Hamzeh and Sattar Hashemi, USING REINFORCEMENT LEARNING TO FIND AN OPTIMAL SET OF FEATURES (2013)
-

From e1d6ffacc0e273654048c88031eecc2a0c72de61 Mon Sep 17 00:00:00 2001
From: JayTicku <147661917+JayTicku@users.noreply.github.com>
Date: Wed, 19 Jun 2024 02:10:28 -0500
Subject: [PATCH 07/15] Update README.md

---
 README.md | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/README.md b/README.md
index 380a466..312084a 100644
--- a/README.md
+++ b/README.md
@@ -52,12 +52,12 @@ results
 ```
 
 FeatureSelectorRL has several parameters that can be tuned:
-- feature_number (integer) : number of features in the DataFrame X
-- eps (float [0; 1], optional) : probability of choosing a random next state, 0 is an only greedy algorithm and 1 only random
-- alpha (float [0; 1], optional): control the rate of updates, 0 is a very not updating state and 1 a very updated
-- gamma (float [0, 1], optional): factor of moderation of the observation of the next state, 0 is a shortsighted condition and 1 it exhibits farsighted behavior
-- nb_iter (int, optional): number of sequences to go through the graph
-- starting_state ("empty" or "random", optional) : if "empty" the algorithm starts from the empty state and if "random" the algorithm starts from a random state in the graph 
+- feature_number (integer) : Number of features.
+- eps (float [0, 1], default = 0.1) : Probability of choosing a random next state. 0 is an only greedy algorithm and 1 is an only random algorithm.
+- alpha (float [0, 1], default = 0.5) : Controls the rate of updates. 0 is a very not updating state and 1 is a very updating state.
+- gamma (float [0, 1], default = 0.7) : Discount factor to moderate the effect of observing the next state. 0 exhibits shortsighted behavior and 1 exhibits farsighted behavior.
+nb_iter (integer, default = 100) : Number of sequences to go through the graph.
+starting_state ({"empty", "random"}, default = "empty") : Starting state of the algorithm. 
 
 The output of fit_predict is a 2-object tuple:
 - List:

From cd89d7adb11d1db4b4f41aacba25ffaa988c65f4 Mon Sep 17 00:00:00 2001
From: JayTicku <147661917+JayTicku@users.noreply.github.com>
Date: Wed, 19 Jun 2024 02:19:04 -0500
Subject: [PATCH 08/15] Update README.md

---
 README.md | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/README.md b/README.md
index 312084a..331ded0 100644
--- a/README.md
+++ b/README.md
@@ -23,7 +23,7 @@ In this example, we're using the Australian credit approval dataset. It has 14 f
 
 #### The process
 
-The first step is a pre-processing of the data. You need to give as input to the method for feature selection a X and y pandas DataFrame. X is the dataset with all the features that we want to evaluate and y the label to be predicted. **It is highly recommended to create a mapping between features and a list of number.** For example each feature is associated with a number. Here is an example of the data pre-processing step on a data set with 14 features including 1 label.
+The first step is pre-processing the data. You need to give an input to the method for feature selection as X and y pandas DataFrames. X is the dataset with all the features that we want to evaluate and y the label to be predicted. Here is an example of the data pre-processing step on a data set with 14 features including 1 label.
 ```python
 import pandas as pd
 
@@ -37,7 +37,7 @@ X = australian_data.drop(australian_data.columns[14], axis=1)
 y = australian_data[australian_data.columns[14]]
 ```
 
-After this step we can simply run a feature selection and ranking process that maximises a metric. 
+After this step we can simply run the feature selection and ranking process that maximises balanced accuracy for classification problems. 
 
 ```python
 from FSRLearning import FeatureSelectorRL
@@ -56,8 +56,8 @@ FeatureSelectorRL has several parameters that can be tuned:
 - eps (float [0, 1], default = 0.1) : Probability of choosing a random next state. 0 is an only greedy algorithm and 1 is an only random algorithm.
 - alpha (float [0, 1], default = 0.5) : Controls the rate of updates. 0 is a very not updating state and 1 is a very updating state.
 - gamma (float [0, 1], default = 0.7) : Discount factor to moderate the effect of observing the next state. 0 exhibits shortsighted behavior and 1 exhibits farsighted behavior.
-nb_iter (integer, default = 100) : Number of sequences to go through the graph.
-starting_state ({"empty", "random"}, default = "empty") : Starting state of the algorithm. 
+- nb_iter (integer, default = 100) : Number of sequences to go through the graph.
+- starting_state ({"empty", "random"}, default = "empty") : Starting state of the algorithm. 
 
 The output of fit_predict is a 2-object tuple:
 - List:
@@ -68,23 +68,23 @@ The output of fit_predict is a 2-object tuple:
 - Integer:
   - Number of states visited.
 
-## Existing methods
+## Methods
 
-- Compare the performance of the FSRLearning library with RFE from Sickit-Learn :
+- Compare the performance of the FSRLearning library with RFE from Sickit-Learn:
 
 ```python
 fsrl_obj.compare_with_benchmark(X, y, results, RandomForestClassifier(n_jobs = -1))
 ```
 Returns some comparisons and plot a graph with the metric for each set of features selected. It is useful for parameters tuning. 
 
-- Get the evolution of the number of the visited states for the first time and the already visited states :
+- Get the evolution of the number of the visited states for the first time and the already visited states:
 
 ```python
 fsrl_obj.get_plot_ratio_exploration()
 ```
 Returns a plot. It is useful to get an overview of how the graph is browse and to tune the epsilon parameter (exploration parameter).
 
-- Get an overview of the relative impact of each feature on the model :
+- Get an overview of the relative impact of each feature on the model:
 
 ```python
 fsrl_obj.get_feature_strength(results)
@@ -92,7 +92,7 @@ fsrl_obj.get_feature_strength(results)
 
 Returns a bar plot.
 
-- Get an overview of the action of the stop conditions :
+- Get an overview of the action of the stop conditions:
 
 ```python
 fsrl_obj.get_depth_of_visited_states()
@@ -108,6 +108,6 @@ Returns a plot. It is useful to see how deep the Markovian Decision Process goes
 
 ## References
 
-This library has been implemented with the help of these two articles :
+This library has been implemented with the help of these two articles:
 - Sali Rasoul, Sodiq Adewole and Alphonse Akakpo, FEATURE SELECTION USING REINFORCEMENT LEARNING (2021)
 - Seyed Mehdin Hazrati Fard, Ali Hamzeh and Sattar Hashemi, USING REINFORCEMENT LEARNING TO FIND AN OPTIMAL SET OF FEATURES (2013)

From 170c832cb5e7e843c868bce3131bbca904457ba5 Mon Sep 17 00:00:00 2001
From: Jay Ticku <147661917+JayTicku@users.noreply.github.com>
Date: Thu, 27 Jun 2024 18:15:45 -0500
Subject: [PATCH 09/15] Update README.md

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 331ded0..b76a582 100644
--- a/README.md
+++ b/README.md
@@ -70,7 +70,7 @@ The output of fit_predict is a 2-object tuple:
 
 ## Methods
 
-- Compare the performance of the FSRLearning library with RFE from Sickit-Learn:
+- Compare the performance of the FSRLearning library with RFECV from Sickit-Learn:
 
 ```python
 fsrl_obj.compare_with_benchmark(X, y, results, RandomForestClassifier(n_jobs = -1))

From f3b81ee49c7229c9a8da5ed298c45f80960b92dc Mon Sep 17 00:00:00 2001
From: Jay Ticku <147661917+JayTicku@users.noreply.github.com>
Date: Thu, 27 Jun 2024 18:16:02 -0500
Subject: [PATCH 10/15] Update README.md

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index b76a582..34fc810 100644
--- a/README.md
+++ b/README.md
@@ -70,7 +70,7 @@ The output of fit_predict is a 2-object tuple:
 
 ## Methods
 
-- Compare the performance of the FSRLearning library with RFECV from Sickit-Learn:
+- Compare the performance of the FSRLearning library with RFECV from Scikit-Learn:
 
 ```python
 fsrl_obj.compare_with_benchmark(X, y, results, RandomForestClassifier(n_jobs = -1))

From b99b6e9e1e246b6063bdafb1e798175c006591e1 Mon Sep 17 00:00:00 2001
From: Jay Ticku <147661917+JayTicku@users.noreply.github.com>
Date: Thu, 27 Jun 2024 18:17:58 -0500
Subject: [PATCH 11/15] Update README.md

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 34fc810..a24ebed 100644
--- a/README.md
+++ b/README.md
@@ -70,7 +70,7 @@ The output of fit_predict is a 2-object tuple:
 
 ## Methods
 
-- Compare the performance of the FSRLearning library with RFECV from Scikit-Learn:
+- Compare the performance of the FSRLearning library with RFE from Scikit-Learn:
 
 ```python
 fsrl_obj.compare_with_benchmark(X, y, results, RandomForestClassifier(n_jobs = -1))

From 9394e2a732bccf32fed105341053b2fe55f25d96 Mon Sep 17 00:00:00 2001
From: Jay Ticku <147661917+JayTicku@users.noreply.github.com>
Date: Thu, 27 Jun 2024 18:18:46 -0500
Subject: [PATCH 12/15] Update action.py

---
 FSRLearning/action.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/FSRLearning/action.py b/FSRLearning/action.py
index 524e88e..3fe3232 100644
--- a/FSRLearning/action.py
+++ b/FSRLearning/action.py
@@ -1,6 +1,6 @@
 class Action:
     """
-        Action Object
+    Action Object.
     """
     
     def __init__(self,
@@ -11,11 +11,11 @@ def __init__(self,
 
     def get_aorf(self, aor_historic: list) -> list:
         """
-        Update the ARO of a feature
+        Update the ARO of a feature.
 
-        aor_historic: get the not updated aor table
+        aor_historic: get the not updated aor table.
 
-        Return the AOR table
+        Return the AOR table.
         """
         
         # Get the feature played and information about it

From cd25181b69dc35b514397b6c76ef18f565d85532 Mon Sep 17 00:00:00 2001
From: Jay Ticku <147661917+JayTicku@users.noreply.github.com>
Date: Thu, 27 Jun 2024 18:19:02 -0500
Subject: [PATCH 13/15] Update feature_selector.py

---
 FSRLearning/feature_selector.py | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/FSRLearning/feature_selector.py b/FSRLearning/feature_selector.py
index d12f54f..0d4d103 100644
--- a/FSRLearning/feature_selector.py
+++ b/FSRLearning/feature_selector.py
@@ -1,5 +1,3 @@
-from typing import Tuple, List
-
 import numpy as np
 import pandas as pd
 import matplotlib.pyplot as plt
@@ -10,7 +8,6 @@
 from .fsrlearning import FeatureSelectionProcess
 
 from sklearn.feature_selection import RFE
-from sklearn.ensemble import RandomForestClassifier
 from sklearn.model_selection import cross_val_score, cross_validate
 from sklearn.base import is_classifier, is_regressor
 
@@ -228,7 +225,7 @@ def get_depth_of_visited_states(self):
 
     def compare_with_benchmark(self, X, y, results, estimator):
         """
-        Compare the performance of FeatureSelectorRL with RFE from Sickit-Learn.
+        Compare the performance of FeatureSelectorRL with RFE from Scikit-Learn.
         Return balanced accuracy score for classifiers and r2 score for regressors at each iteration 
         and plot the graph of these evolutions.
         

From 4d0e79c375063f1647f329bd8eebc56ea23b0f27 Mon Sep 17 00:00:00 2001
From: Jay Ticku <147661917+JayTicku@users.noreply.github.com>
Date: Thu, 27 Jun 2024 18:20:48 -0500
Subject: [PATCH 14/15] Update fsrlearning.py

---
 FSRLearning/fsrlearning.py | 36 ++++++++++++++++++------------------
 1 file changed, 18 insertions(+), 18 deletions(-)

diff --git a/FSRLearning/fsrlearning.py b/FSRLearning/fsrlearning.py
index 4fd270e..6e3a1e2 100644
--- a/FSRLearning/fsrlearning.py
+++ b/FSRLearning/fsrlearning.py
@@ -3,12 +3,12 @@
 
 class FeatureSelectionProcess:
     """
-        Init aor list such that aor = [[np.zeros(nb_of_features)], [np.zeros(nb_of_features)]]
+    Init aor list such that aor = [[np.zeros(nb_of_features)], [np.zeros(nb_of_features)]].
 
-        nb_of_features: Number of feature in the data set
-        eps: probability of choosing a random action (uniform or softmax)
-        alpha: 
-        gamma: 
+    nb_of_features: Number of feature in the data set
+    eps: probability of choosing a random action (uniform or softmax)
+    alpha: Controls the rate of updates. 0 is a very not updating state and 1 is a very updating state
+    gamma: Discount factor to moderate the effect of observing the next state. 0 exhibits shortsighted behavior and 1 exhibits farsighted behavior
     """
 
     def __init__(self,
@@ -31,9 +31,9 @@ def __init__(self,
 
     def pick_random_state(self) -> State:
         """
-            Select a random state in all the possible state space
-            
-            Return a state randomly picked
+        Select a random state in all the possible state space.
+        
+        Return a state randomly picked.
         """
         
         #Check if the dict is empty
@@ -47,9 +47,9 @@ def pick_random_state(self) -> State:
 
     def start_from_empty_set(self) -> State:
         """
-            Start from the empty set (with no feature selected)
-            
-            Returns the empty initial state
+        Start from the empty set (with no feature selected).
+        
+        Returns the empty initial state.
         """
         
         depth = 0
@@ -60,9 +60,9 @@ def start_from_empty_set(self) -> State:
 
     def add_to_historic(self, visited_state: State):
         """
-            Add to the feature structure historic function
+        Add to the feature structure historic function.
 
-            visited_state: current state visited by the simulation
+        visited_state: current state visited by the simulation
         """
         
         state_depth: int = visited_state.number[0]
@@ -96,12 +96,12 @@ def add_to_historic(self, visited_state: State):
 
     def get_final_aor_sorted(self) -> list:
         """
-            Returns the aor table sorted by ascending
+        Returns the aor table sorted by ascending:
 
-            Index of the feature
-            Number of time the feature has been played
-            Value of the feature
-            Best feature (from the lowest to the biggest)
+        Index of the feature
+        Number of time the feature has been played
+        Value of the feature
+        Best feature (from the lowest to the biggest)
         """
 
         index: list = [i for i in range(self.nb_of_features)]

From d6022af5acdcc6f17edc006b568968cbc9f6009a Mon Sep 17 00:00:00 2001
From: Jay Ticku <147661917+JayTicku@users.noreply.github.com>
Date: Thu, 27 Jun 2024 18:20:59 -0500
Subject: [PATCH 15/15] Update state.py

---
 FSRLearning/state.py | 72 +++++++++++++++++++++-----------------------
 1 file changed, 34 insertions(+), 38 deletions(-)

diff --git a/FSRLearning/state.py b/FSRLearning/state.py
index be37a99..328f918 100644
--- a/FSRLearning/state.py
+++ b/FSRLearning/state.py
@@ -1,19 +1,18 @@
 from .action import Action
 
 import numpy as np
-import pandas as pd
 
 from sklearn.model_selection import cross_val_score
 from sklearn.base import is_classifier, is_regressor
 
 class State:
     """
-        State object
+    State object.
 
-        number: position in the dictionary of the graph
-        description: represents the set of feature in the set
-        v_value: V value of the state
-        nb_visited: number of times that the set has been visited
+    number: position in the dictionary of the graph
+    description: represents the set of feature in the set
+    v_value: V value of the state
+    nb_visited: number of times that the set has been visited
     """
     
     def __init__(self, 
@@ -30,9 +29,9 @@ def __init__(self,
 
     def get_reward(self, estimator, X, y) -> float:
         """
-            Returns the reward of a set of variable
+        Returns the reward of a set of variable.
 
-            estimator: type of estimator with which we want to evaluate the data
+        estimator: type of estimator with which we want to evaluate the data
         """
         
         # Train estimator with state_t variable and state t+1 variables and compute the diff of the accuracy
@@ -42,23 +41,20 @@ def get_reward(self, estimator, X, y) -> float:
                 return 0
             else:
                 # The state has never been visited and init the reward
-                df = pd.concat([X.iloc[:, self.description], y], axis = 1)
-                df = df.drop_duplicates(ignore_index = True) 
-
                 if is_classifier(estimator):
-                    min_samples = df.iloc[:, -1].value_counts().min()
+                    min_samples = y.value_counts().min()
                     if min_samples >= 5:
-                        accuracy = np.mean(cross_val_score(estimator, df.iloc[:, :-1], df.iloc[:, -1], cv = 5, scoring = 'balanced_accuracy'))
+                        accuracy = np.mean(cross_val_score(estimator, X.iloc[:, self.description], y, cv = 5, scoring = 'balanced_accuracy'))
                     elif min_samples < 5 and min_samples >= 2:
-                        accuracy = np.mean(cross_val_score(estimator, df.iloc[:, :-1], df.iloc[:, -1], cv = min_samples, scoring = 'balanced_accuracy'))
+                        accuracy = np.mean(cross_val_score(estimator, X.iloc[:, self.description], y, cv = min_samples, scoring = 'balanced_accuracy'))
                     else:
                         accuracy = 0
                 elif is_regressor(estimator):
-                    num_samples = len(df)
+                    num_samples = len(y)
                     if num_samples >= 10:
-                        accuracy = np.mean(cross_val_score(estimator, df.iloc[:, :-1], df.iloc[:, -1], cv = 5, scoring = 'r2'))
+                        accuracy = np.mean(cross_val_score(estimator, X.iloc[:, self.description], y, cv = 5, scoring = 'r2'))
                     elif num_samples < 10 and num_samples >= 4:
-                        accuracy = np.mean(cross_val_score(estimator, df.iloc[:, :-1], df.iloc[:, -1], cv = num_samples // 2, scoring = 'r2'))
+                        accuracy = np.mean(cross_val_score(estimator, X.iloc[:, self.description], y, cv = num_samples // 2, scoring = 'r2'))
                     else:
                         accuracy = 0
                 else:
@@ -74,12 +70,12 @@ def get_reward(self, estimator, X, y) -> float:
 
     def select_action(self, feature_structure: dict, eps: float, aorf_histo: list, is_empty_state: bool):
         """
-            Returns an action object
+        Returns an action object.
 
-            feature_structure: current dictionnary of the structure of the graph
-            eps: probability of choosing a random action [between 0 and 1]
+        feature_structure: current dictionnary of the structure of the graph
+        eps: probability of choosing a random action [between 0 and 1]
 
-            This method enables to train only once a model and get the accuracy
+        This method enables to train only once a model and get the accuracy.
         """
         
         #We get the neighboors
@@ -108,10 +104,10 @@ def select_action(self, feature_structure: dict, eps: float, aorf_histo: list, i
         
     def get_argmax(self, get_neigh: list, aorf_histo):
         """
-            Returns the argmax of the list of neighbors 
+        Returns the argmax of the list of neighbors.
 
-            get_neigh: list of the neighbors of the self state
-            aorf_histo: value of the aor
+        get_neigh: list of the neighbors of the self state
+        aorf_histo: value of the aor
         """
         
         #We select a state where the possible next feature has the maximum AORf
@@ -133,10 +129,10 @@ def get_argmax(self, get_neigh: list, aorf_histo):
 
     def get_neighboors(self, feature_structure: dict, feature_list: list) -> list:
         """
-            Returns the list of the neighboors of the current state
+        Returns the list of the neighboors of the current state.
 
-            feature_structure: current dictionnary of the structure of the graph
-            feature_list: list of the int identifiers of the features in the data set (len = number of features in the datas set)
+        feature_structure: current dictionnary of the structure of the graph
+        feature_list: list of the int identifiers of the features in the data set (len = number of features in the datas set)
         """
         
         neigh_depth_graph: int = self.number[0] + 1
@@ -165,24 +161,24 @@ def get_neighboors(self, feature_structure: dict, feature_list: list) -> list:
 
     def update_v_value(self, alpha: float, gamma: float, next_state) -> float:
         """
-            Update the v_value of a state
+        Update the v_value of a state.
 
-            Alpha [0; 1] : rate of updates
-            Gamma [0; 1] : discount factor to moderate the effect of observing the next state (0=shortsighted; 1=farsighted)
-            next_state: the next state that has been chosen by the eps_greedy algorithm
+        Alpha [0; 1] : rate of updates
+        Gamma [0; 1] : discount factor to moderate the effect of observing the next state (0=shortsighted; 1=farsighted)
+        next_state: the next state that has been chosen by the eps_greedy algorithm
 
-            Returns a float number
+        Returns a float number.
         """
         
         self.v_value += alpha * ((next_state.reward - self.reward) + gamma * next_state.v_value - self.v_value)   
         
     def is_final(self, nb_of_features: int) -> bool:
         """
-            Check if a state is a final state (with all the features in the state)
+        Check if a state is a final state (with all the features in the state).
 
-            nb_of_features: number of features in the data set 
+        nb_of_features: number of features in the data set 
 
-            Returns True if all the possible features are in the state
+        Returns True if all the possible features are in the state.
         """
         
         if len(self.description) == nb_of_features:
@@ -192,11 +188,11 @@ def is_final(self, nb_of_features: int) -> bool:
 
     def is_equal(self, compared_state) -> bool:
         """
-            Compare if two State objects are equal
+        Compare if two State objects are equal.
 
-            compared_state: state to be compared with the self state
+        compared_state: state to be compared with the self state
 
-            Returns True if yes else returns False
+        Returns True if yes else returns False.
         """
         
         if set(self.description) == set(compared_state.description):