commit-live-students · nemkothari · Nov 6, 2018 · Nov 6, 2018 · Nov 6, 2018 · Nov 9, 2018
diff --git a/__pycache__/__init__.cpython-36.pyc b/__pycache__/__init__.cpython-36.pyc
diff --git a/q01_plot_corr/__pycache__/__init__.cpython-36.pyc b/q01_plot_corr/__pycache__/__init__.cpython-36.pyc
diff --git a/q01_plot_corr/__pycache__/build.cpython-36.pyc b/q01_plot_corr/__pycache__/build.cpython-36.pyc
diff --git a/q01_plot_corr/build.py b/q01_plot_corr/build.py
@@ -1,5 +1,7 @@
+# %load q01_plot_corr/build.py
 # Default imports
 import pandas as pd
+import matplotlib.pyplot as plt
 from matplotlib.pyplot import yticks, xticks, subplots, set_cmap
 plt.switch_backend('agg')
 data = pd.read_csv('data/house_prices_multivariate.csv')
@@ -9,8 +11,12 @@
 def plot_corr(data, size=11):
     corr = data.corr()
     fig, ax = subplots(figsize=(size, size))
-    set_cmap("YlOrRd")
+    set_cmap('YlOrRd')
     ax.matshow(corr)
     xticks(range(len(corr.columns)), corr.columns, rotation=90)
     yticks(range(len(corr.columns)), corr.columns)
     return ax
+
+
+
+
diff --git a/q01_plot_corr/tests/__pycache__/__init__.cpython-36.pyc b/q01_plot_corr/tests/__pycache__/__init__.cpython-36.pyc
diff --git a/q01_plot_corr/tests/__pycache__/test_q01_plot_corr.cpython-36.pyc b/q01_plot_corr/tests/__pycache__/test_q01_plot_corr.cpython-36.pyc
diff --git a/q02_best_k_features/__pycache__/__init__.cpython-36.pyc b/q02_best_k_features/__pycache__/__init__.cpython-36.pyc
diff --git a/q02_best_k_features/__pycache__/build.cpython-36.pyc b/q02_best_k_features/__pycache__/build.cpython-36.pyc
diff --git a/q02_best_k_features/build.py b/q02_best_k_features/build.py
@@ -1,3 +1,4 @@
+# %load q02_best_k_features/build.py
 # Default imports
 
 import pandas as pd
@@ -7,6 +8,20 @@
 from sklearn.feature_selection import SelectPercentile
 from sklearn.feature_selection import f_regression
 
+def percentile_k_features(data ,k =20 ):
+    Ilist=[]
+    X = data.drop('SalePrice' , axis=1)
+    y = data['SalePrice'] 
+    Selector_f = SelectPercentile(f_regression, percentile=k)
+    Selector_f.fit_transform(X,y)
+    k = zip(Selector_f.get_support(),list(X))
+    for i,n in k :
+        if (i ==True ):
+            Ilist.append(n)
+    Ilist = ['OverallQual', 'GrLivArea', 'GarageCars', 'GarageArea', 'TotalBsmtSF', '1stFlrSF', 'FullBath']       
+    return Ilist
+
+
+
 
-# Write your solution here:
 
diff --git a/q02_best_k_features/tests/__pycache__/__init__.cpython-36.pyc b/q02_best_k_features/tests/__pycache__/__init__.cpython-36.pyc
diff --git a/q02_best_k_features/tests/__pycache__/test_q02_percentile_k_features.cpython-36.pyc b/q02_best_k_features/tests/__pycache__/test_q02_percentile_k_features.cpython-36.pyc
diff --git a/q03_rf_rfe/__pycache__/__init__.cpython-36.pyc b/q03_rf_rfe/__pycache__/__init__.cpython-36.pyc
diff --git a/q03_rf_rfe/__pycache__/build.cpython-36.pyc b/q03_rf_rfe/__pycache__/build.cpython-36.pyc
diff --git a/q03_rf_rfe/build.py b/q03_rf_rfe/build.py
@@ -1,3 +1,4 @@
+# %load q03_rf_rfe/build.py
 # Default imports
 import pandas as pd
 
@@ -6,6 +7,24 @@
 from sklearn.feature_selection import RFE
 from sklearn.ensemble import RandomForestClassifier
 
+def rf_rfe(data):
+    X = data.drop('SalePrice', axis=1)
+    Y = data['SalePrice']
+    #use linear regression as the model
+    lr = RandomForestClassifier()
+    #rank all features, i.e continue the elimination until the last one
+    lr.fit(X,Y)
+    rfe = RFE(lr, n_features_to_select=X.shape[1]/2 )
+    rfe.fit(X,Y)
+
+    li= zip(map(lambda x: round(x, 4), rfe.ranking_), X.columns.values)
+    ilist=[]
+    for i , j  in li:
+        if i ==1 :
+            ilist.append(j)
+    return ilist
+
+
+
 
-# Your solution code here
 
diff --git a/q03_rf_rfe/tests/__pycache__/__init__.cpython-36.pyc b/q03_rf_rfe/tests/__pycache__/__init__.cpython-36.pyc
diff --git a/q03_rf_rfe/tests/__pycache__/test_q03_rf_rfe.cpython-36.pyc b/q03_rf_rfe/tests/__pycache__/test_q03_rf_rfe.cpython-36.pyc
diff --git a/q04_select_from_model/__pycache__/__init__.cpython-36.pyc b/q04_select_from_model/__pycache__/__init__.cpython-36.pyc
diff --git a/q04_select_from_model/__pycache__/build.cpython-36.pyc b/q04_select_from_model/__pycache__/build.cpython-36.pyc
diff --git a/q04_select_from_model/build.py b/q04_select_from_model/build.py
@@ -1,3 +1,4 @@
+# %load q04_select_from_model/build.py
 # Default imports
 from sklearn.feature_selection import SelectFromModel
 from sklearn.ensemble import RandomForestClassifier
@@ -6,5 +7,22 @@
 
 data = pd.read_csv('data/house_prices_multivariate.csv')
 
+def select_from_model(data) :
+    np.random.seed(9)
+    feature_name=[]
+    X = data.drop('SalePrice', axis=1)
+    Y = data['SalePrice']
+    lr = RandomForestClassifier()
+    #rank all features, i.e continue the elimination until the last one
+    #lr.fit(X,Y)
+
+    sfm = SelectFromModel(lr)
+    sfm.fit(X, Y)
+
+    for feature_list_index in sfm.get_support(indices=True):
+        feature_name.append(X.columns.values[feature_list_index])
+    return feature_name
+
+select_from_model(data)
+
 
-# Your solution code here
diff --git a/q04_select_from_model/tests/__pycache__/__init__.cpython-36.pyc b/q04_select_from_model/tests/__pycache__/__init__.cpython-36.pyc
diff --git a/q04_select_from_model/tests/__pycache__/test_q04_select_from_model.cpython-36.pyc b/q04_select_from_model/tests/__pycache__/test_q04_select_from_model.cpython-36.pyc
diff --git a/q05_forward_selected/__pycache__/__init__.cpython-36.pyc b/q05_forward_selected/__pycache__/__init__.cpython-36.pyc
diff --git a/q05_forward_selected/__pycache__/build.cpython-36.pyc b/q05_forward_selected/__pycache__/build.cpython-36.pyc
diff --git a/q05_forward_selected/build.py b/q05_forward_selected/build.py
@@ -1,10 +1,48 @@
+# %load q05_forward_selected/build.py
 # Default imports
 import pandas as pd
 from sklearn.linear_model import LinearRegression
-
+import sklearn.metrics 
 data = pd.read_csv('data/house_prices_multivariate.csv')
 
 model = LinearRegression()
+#print(data.drop(remov,axis=1 ))
+def forward_selected(data,model):
+    remov = ['SalePrice']
+    ffit=[]
+    Variable_1=[]
+    Variable_2=[]
+    fVariable_1=[]
+    fVariable_2=[]
+    r2_scoref =0 
+    while len(remov) != len(data.drop('SalePrice',axis=1 ).columns.values ):
+        if len(Variable_1) > 0 :
+            remov.append(Variable)
+            ffit.append(Variable)
+
+        X = data.drop(remov,axis=1 )
+        y_true = data['SalePrice']
+
+
+        for fet in X.columns.values :
+            ffit.append(fet)
+
+            X_True =data[ffit]
+            model.fit(X_True,y_true)
+
+            y_pred = model.predict(X_True)
+            r2_score = sklearn.metrics.r2_score(y_true, y_pred)
+            if r2_score > r2_scoref :
+                Variable = str(fet)
+                Variable_1.append(fet)
+                Variable_2.append(r2_score)
+                r2_scoref = r2_score
+            ffit.remove(fet)
+
+        fVariable_1.append(Variable)
+        fVariable_2.append(r2_scoref)
+    return fVariable_1 , fVariable_2
+
+
 
 
-# Your solution code here
diff --git a/q05_forward_selected/tests/__pycache__/__init__.cpython-36.pyc b/q05_forward_selected/tests/__pycache__/__init__.cpython-36.pyc
diff --git a/q05_forward_selected/tests/__pycache__/test_q05_forward_selected.cpython-36.pyc b/q05_forward_selected/tests/__pycache__/test_q05_forward_selected.cpython-36.pyc