commit-live-students · tracedence · Nov 30, 2018 · Dec 6, 2018 · Dec 7, 2018 · Dec 7, 2018
diff --git a/__pycache__/__init__.cpython-36.pyc b/__pycache__/__init__.cpython-36.pyc
diff --git a/q01_plot_corr/__pycache__/__init__.cpython-36.pyc b/q01_plot_corr/__pycache__/__init__.cpython-36.pyc
diff --git a/q01_plot_corr/__pycache__/build.cpython-36.pyc b/q01_plot_corr/__pycache__/build.cpython-36.pyc
diff --git a/q01_plot_corr/build.py b/q01_plot_corr/build.py
@@ -1,16 +1,25 @@
+# %load q01_plot_corr/build.py
 # Default imports
 import pandas as pd
 from matplotlib.pyplot import yticks, xticks, subplots, set_cmap
+import matplotlib.pyplot as plt
 plt.switch_backend('agg')
 data = pd.read_csv('data/house_prices_multivariate.csv')
 
 
-# Write your solution here:
+
+#Write your solution here:
 def plot_corr(data, size=11):
     corr = data.corr()
     fig, ax = subplots(figsize=(size, size))
-    set_cmap("YlOrRd")
+    set_cmap('YlOrRd')
     ax.matshow(corr)
     xticks(range(len(corr.columns)), corr.columns, rotation=90)
     yticks(range(len(corr.columns)), corr.columns)
-    return ax
+    #return ax
+
+plot_corr(data,)
+data._get_numeric_data().columns
+
+
+
diff --git a/q01_plot_corr/tests/__pycache__/__init__.cpython-36.pyc b/q01_plot_corr/tests/__pycache__/__init__.cpython-36.pyc
diff --git a/q01_plot_corr/tests/__pycache__/test_q01_plot_corr.cpython-36.pyc b/q01_plot_corr/tests/__pycache__/test_q01_plot_corr.cpython-36.pyc
diff --git a/q02_best_k_features/__pycache__/__init__.cpython-36.pyc b/q02_best_k_features/__pycache__/__init__.cpython-36.pyc
diff --git a/q02_best_k_features/__pycache__/build.cpython-36.pyc b/q02_best_k_features/__pycache__/build.cpython-36.pyc
diff --git a/q02_best_k_features/build.py b/q02_best_k_features/build.py
@@ -1,3 +1,4 @@
+# %load q02_best_k_features/build.py
 # Default imports
 
 import pandas as pd
@@ -10,3 +11,21 @@
 
 # Write your solution here:
 
+def percentile_k_features(df, K=20):
+    x = df.iloc[:,:-1]
+    y = df.iloc[:,-1]
+    #selecting features on the basis of p-value i.e whose value less than percentile is true
+    best_feature = SelectPercentile(f_regression, percentile=K)
+    #selecting best features from X
+    best_feature.fit_transform(x,y)
+    #creating dataframe from score, get_support, result
+    d =  {'support': best_feature.get_support(),'values':best_feature.scores_}
+    df1 = pd.DataFrame(d,index = x.columns)
+    #sorting values according get_support
+    df1 = df1.sort_values('values', ascending=False)
+    #selecting only rows whose value of support is True
+    col = df1[df1.support].index
+    return list(col) # returning list of features 
+percentile_k_features(data ,20)
+
+
diff --git a/q02_best_k_features/tests/__pycache__/__init__.cpython-36.pyc b/q02_best_k_features/tests/__pycache__/__init__.cpython-36.pyc
diff --git a/q02_best_k_features/tests/__pycache__/test_q02_percentile_k_features.cpython-36.pyc b/q02_best_k_features/tests/__pycache__/test_q02_percentile_k_features.cpython-36.pyc
diff --git a/q03_rf_rfe/__pycache__/__init__.cpython-36.pyc b/q03_rf_rfe/__pycache__/__init__.cpython-36.pyc
diff --git a/q03_rf_rfe/__pycache__/build.cpython-36.pyc b/q03_rf_rfe/__pycache__/build.cpython-36.pyc
diff --git a/q03_rf_rfe/build.py b/q03_rf_rfe/build.py
@@ -1,3 +1,4 @@
+# %load q03_rf_rfe/build.py
 # Default imports
 import pandas as pd
 
@@ -7,5 +8,18 @@
 from sklearn.ensemble import RandomForestClassifier
 
 
-# Your solution code here
+# Your solution code hered
+def rf_rfe(df):
+
+    x = df.iloc[:,:-1]
+    y = df.iloc[:,-1]
+    #creating a model
+    Ra = RandomForestClassifier()
+    rf = RFE(Ra)
+    #selecting best features all avaiable features
+    rf.fit(x,y) 
+    most_sig = list(x.columns[rf.support_])
+    return most_sig
+rf_rfe(data)
+
 
diff --git a/q03_rf_rfe/tests/__pycache__/__init__.cpython-36.pyc b/q03_rf_rfe/tests/__pycache__/__init__.cpython-36.pyc
diff --git a/q03_rf_rfe/tests/__pycache__/test_q03_rf_rfe.cpython-36.pyc b/q03_rf_rfe/tests/__pycache__/test_q03_rf_rfe.cpython-36.pyc
diff --git a/q04_select_from_model/__pycache__/__init__.cpython-36.pyc b/q04_select_from_model/__pycache__/__init__.cpython-36.pyc
diff --git a/q04_select_from_model/__pycache__/build.cpython-36.pyc b/q04_select_from_model/__pycache__/build.cpython-36.pyc
diff --git a/q04_select_from_model/build.py b/q04_select_from_model/build.py
@@ -1,3 +1,4 @@
+# %load q04_select_from_model/build.py
 # Default imports
 from sklearn.feature_selection import SelectFromModel
 from sklearn.ensemble import RandomForestClassifier
@@ -6,5 +7,16 @@
 
 data = pd.read_csv('data/house_prices_multivariate.csv')
 
-
+np.random.seed(9)
 # Your solution code here
+def select_from_model(df):
+    x = df.iloc[:,:-1]
+    y = df.iloc[:,-1]
+    Random = RandomForestClassifier()
+    best_features = SelectFromModel(Random)
+    best_features.fit(x,y)
+    feature_name = list(x.columns[best_features.get_support()])
+    return feature_name
+select_from_model(data)
+
+
diff --git a/q04_select_from_model/tests/__pycache__/__init__.cpython-36.pyc b/q04_select_from_model/tests/__pycache__/__init__.cpython-36.pyc
diff --git a/q04_select_from_model/tests/__pycache__/test_q04_select_from_model.cpython-36.pyc b/q04_select_from_model/tests/__pycache__/test_q04_select_from_model.cpython-36.pyc
diff --git a/q05_forward_selected/__pycache__/__init__.cpython-36.pyc b/q05_forward_selected/__pycache__/__init__.cpython-36.pyc
diff --git a/q05_forward_selected/__pycache__/build.cpython-36.pyc b/q05_forward_selected/__pycache__/build.cpython-36.pyc
diff --git a/q05_forward_selected/build.py b/q05_forward_selected/build.py
@@ -1,10 +1,103 @@
+# %load q05_forward_selected/build.py
 # Default imports
 import pandas as pd
 from sklearn.linear_model import LinearRegression
+from sklearn.model_selection import train_test_split
+from sklearn.metrics import r2_score, accuracy_score
+model = LinearRegression()
 
 data = pd.read_csv('data/house_prices_multivariate.csv')
 
 model = LinearRegression()
 
 
 # Your solution code here
+
+
+
+def forward_selected(data, model):
+    X  = data.iloc[:,:-1]
+    y = data.iloc[:,-1]
+    l = []
+
+    score = -1000
+    c = ''
+    variable_1 = []
+    variable_2 = []
+    column = list(X.columns)
+    for i in range(len(column)):
+
+
+        for col in column:
+            #print(col)
+            l.append(col)
+            model.fit(X[l],y)
+            acc = model.score(X[l],y)
+            #print(col, acc)
+            if acc > score:
+                score = acc
+                c = col
+            l.pop(len(l)-1)
+#         print('  ')
+            #print(col,c, score, acc)
+        #print('  ')
+        if c in l:
+            pass
+        else:
+            l.append(c)
+            column.remove(c)
+            variable_2.append(c)
+        variable_1.append(score)
+    return variable_2, variable_1
+
+
+var1 , var2 = forward_selected(data, model)
+var2
+# from sklearn.linear_model import LinearRegression
+# from sklearn.model_selection import train_test_split
+# from sklearn.metrics import r2_score, accuracy_score
+# model = LinearRegression()
+
+
+
+# def forward_selected(X,y,i):
+#     X_train, X_test, y_train, y_test = train_test_split(X,y, test_size = 0.3, random_state =i)
+#     l = []
+
+#     score = -1000
+#     c = ''
+#     variable_1 = []
+#     variable_2 = []
+#     column = list(X_train.columns)
+#     for i in range(len(column)):
+
+
+#         for col in column:
+#             #print(col)
+#             l.append(col)
+#             model.fit(X_train[l],y_train)
+#             y_pred = model.predict(X_test[l])
+#             acc = r2_score(y_pred, y_test)
+#             #print(col, acc)
+#             if acc > score:
+#                 score = acc
+#                 c = col
+#             l.pop(len(l)-1)
+# #         print('  ')
+# #         print(c, score)
+#         if c in l:
+#             pass
+#         else:
+#             l.append(c)
+#             column.remove(c)
+#             variable_2.append(c)
+#         variable_1.append(score)
+#     return variable_2
+# data.columns
+
+
+
+
+
+
+
diff --git a/q05_forward_selected/tests/__pycache__/__init__.cpython-36.pyc b/q05_forward_selected/tests/__pycache__/__init__.cpython-36.pyc
diff --git a/q05_forward_selected/tests/__pycache__/test_q05_forward_selected.cpython-36.pyc b/q05_forward_selected/tests/__pycache__/test_q05_forward_selected.cpython-36.pyc