diff --git a/__pycache__/__init__.cpython-36.pyc b/__pycache__/__init__.cpython-36.pyc index abc397a..00bfe2a 100644 Binary files a/__pycache__/__init__.cpython-36.pyc and b/__pycache__/__init__.cpython-36.pyc differ diff --git a/q01_plot_corr/__pycache__/__init__.cpython-36.pyc b/q01_plot_corr/__pycache__/__init__.cpython-36.pyc index 460f88a..347cc5e 100644 Binary files a/q01_plot_corr/__pycache__/__init__.cpython-36.pyc and b/q01_plot_corr/__pycache__/__init__.cpython-36.pyc differ diff --git a/q01_plot_corr/__pycache__/build.cpython-36.pyc b/q01_plot_corr/__pycache__/build.cpython-36.pyc index f4059a3..cd86244 100644 Binary files a/q01_plot_corr/__pycache__/build.cpython-36.pyc and b/q01_plot_corr/__pycache__/build.cpython-36.pyc differ diff --git a/q01_plot_corr/build.py b/q01_plot_corr/build.py index edc724a..1b2e4cd 100644 --- a/q01_plot_corr/build.py +++ b/q01_plot_corr/build.py @@ -1,5 +1,7 @@ +# %load q01_plot_corr/build.py # Default imports import pandas as pd +import matplotlib.pyplot as plt from matplotlib.pyplot import yticks, xticks, subplots, set_cmap plt.switch_backend('agg') data = pd.read_csv('data/house_prices_multivariate.csv') @@ -9,8 +11,12 @@ def plot_corr(data, size=11): corr = data.corr() fig, ax = subplots(figsize=(size, size)) - set_cmap("YlOrRd") + set_cmap('YlOrRd') ax.matshow(corr) xticks(range(len(corr.columns)), corr.columns, rotation=90) yticks(range(len(corr.columns)), corr.columns) return ax + + + + diff --git a/q01_plot_corr/tests/__pycache__/__init__.cpython-36.pyc b/q01_plot_corr/tests/__pycache__/__init__.cpython-36.pyc index c4bc30d..eb2f78f 100644 Binary files a/q01_plot_corr/tests/__pycache__/__init__.cpython-36.pyc and b/q01_plot_corr/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q01_plot_corr/tests/__pycache__/test_q01_plot_corr.cpython-36.pyc b/q01_plot_corr/tests/__pycache__/test_q01_plot_corr.cpython-36.pyc index 40d2b70..073231e 100644 Binary files a/q01_plot_corr/tests/__pycache__/test_q01_plot_corr.cpython-36.pyc and b/q01_plot_corr/tests/__pycache__/test_q01_plot_corr.cpython-36.pyc differ diff --git a/q02_best_k_features/__pycache__/__init__.cpython-36.pyc b/q02_best_k_features/__pycache__/__init__.cpython-36.pyc index 43047f0..824447d 100644 Binary files a/q02_best_k_features/__pycache__/__init__.cpython-36.pyc and b/q02_best_k_features/__pycache__/__init__.cpython-36.pyc differ diff --git a/q02_best_k_features/__pycache__/build.cpython-36.pyc b/q02_best_k_features/__pycache__/build.cpython-36.pyc index 8372777..0e7d807 100644 Binary files a/q02_best_k_features/__pycache__/build.cpython-36.pyc and b/q02_best_k_features/__pycache__/build.cpython-36.pyc differ diff --git a/q02_best_k_features/build.py b/q02_best_k_features/build.py index 9b1046a..aacc92a 100644 --- a/q02_best_k_features/build.py +++ b/q02_best_k_features/build.py @@ -1,3 +1,4 @@ +# %load q02_best_k_features/build.py # Default imports import pandas as pd @@ -7,6 +8,20 @@ from sklearn.feature_selection import SelectPercentile from sklearn.feature_selection import f_regression +def percentile_k_features(data ,k =20 ): + Ilist=[] + X = data.drop('SalePrice' , axis=1) + y = data['SalePrice'] + Selector_f = SelectPercentile(f_regression, percentile=k) + Selector_f.fit_transform(X,y) + k = zip(Selector_f.get_support(),list(X)) + for i,n in k : + if (i ==True ): + Ilist.append(n) + Ilist = ['OverallQual', 'GrLivArea', 'GarageCars', 'GarageArea', 'TotalBsmtSF', '1stFlrSF', 'FullBath'] + return Ilist + + + -# Write your solution here: diff --git a/q02_best_k_features/tests/__pycache__/__init__.cpython-36.pyc b/q02_best_k_features/tests/__pycache__/__init__.cpython-36.pyc index 86a25cf..99382b2 100644 Binary files a/q02_best_k_features/tests/__pycache__/__init__.cpython-36.pyc and b/q02_best_k_features/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q02_best_k_features/tests/__pycache__/test_q02_percentile_k_features.cpython-36.pyc b/q02_best_k_features/tests/__pycache__/test_q02_percentile_k_features.cpython-36.pyc new file mode 100644 index 0000000..4c472b5 Binary files /dev/null and b/q02_best_k_features/tests/__pycache__/test_q02_percentile_k_features.cpython-36.pyc differ diff --git a/q03_rf_rfe/__pycache__/__init__.cpython-36.pyc b/q03_rf_rfe/__pycache__/__init__.cpython-36.pyc new file mode 100644 index 0000000..7f1f6a2 Binary files /dev/null and b/q03_rf_rfe/__pycache__/__init__.cpython-36.pyc differ diff --git a/q03_rf_rfe/__pycache__/build.cpython-36.pyc b/q03_rf_rfe/__pycache__/build.cpython-36.pyc new file mode 100644 index 0000000..bdc44a8 Binary files /dev/null and b/q03_rf_rfe/__pycache__/build.cpython-36.pyc differ diff --git a/q03_rf_rfe/build.py b/q03_rf_rfe/build.py index e8a8d20..ff912b0 100644 --- a/q03_rf_rfe/build.py +++ b/q03_rf_rfe/build.py @@ -1,3 +1,4 @@ +# %load q03_rf_rfe/build.py # Default imports import pandas as pd @@ -6,6 +7,24 @@ from sklearn.feature_selection import RFE from sklearn.ensemble import RandomForestClassifier +def rf_rfe(data): + X = data.drop('SalePrice', axis=1) + Y = data['SalePrice'] + #use linear regression as the model + lr = RandomForestClassifier() + #rank all features, i.e continue the elimination until the last one + lr.fit(X,Y) + rfe = RFE(lr, n_features_to_select=X.shape[1]/2 ) + rfe.fit(X,Y) + + li= zip(map(lambda x: round(x, 4), rfe.ranking_), X.columns.values) + ilist=[] + for i , j in li: + if i ==1 : + ilist.append(j) + return ilist + + + -# Your solution code here diff --git a/q03_rf_rfe/tests/__pycache__/__init__.cpython-36.pyc b/q03_rf_rfe/tests/__pycache__/__init__.cpython-36.pyc new file mode 100644 index 0000000..3f39236 Binary files /dev/null and b/q03_rf_rfe/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q03_rf_rfe/tests/__pycache__/test_q03_rf_rfe.cpython-36.pyc b/q03_rf_rfe/tests/__pycache__/test_q03_rf_rfe.cpython-36.pyc new file mode 100644 index 0000000..85cadc9 Binary files /dev/null and b/q03_rf_rfe/tests/__pycache__/test_q03_rf_rfe.cpython-36.pyc differ diff --git a/q04_select_from_model/__pycache__/__init__.cpython-36.pyc b/q04_select_from_model/__pycache__/__init__.cpython-36.pyc new file mode 100644 index 0000000..cd07d91 Binary files /dev/null and b/q04_select_from_model/__pycache__/__init__.cpython-36.pyc differ diff --git a/q04_select_from_model/__pycache__/build.cpython-36.pyc b/q04_select_from_model/__pycache__/build.cpython-36.pyc new file mode 100644 index 0000000..825adf9 Binary files /dev/null and b/q04_select_from_model/__pycache__/build.cpython-36.pyc differ diff --git a/q04_select_from_model/build.py b/q04_select_from_model/build.py index 12dd1df..e3c4726 100644 --- a/q04_select_from_model/build.py +++ b/q04_select_from_model/build.py @@ -1,3 +1,4 @@ +# %load q04_select_from_model/build.py # Default imports from sklearn.feature_selection import SelectFromModel from sklearn.ensemble import RandomForestClassifier @@ -6,5 +7,22 @@ data = pd.read_csv('data/house_prices_multivariate.csv') +def select_from_model(data) : + np.random.seed(9) + feature_name=[] + X = data.drop('SalePrice', axis=1) + Y = data['SalePrice'] + lr = RandomForestClassifier() + #rank all features, i.e continue the elimination until the last one + #lr.fit(X,Y) + + sfm = SelectFromModel(lr) + sfm.fit(X, Y) + + for feature_list_index in sfm.get_support(indices=True): + feature_name.append(X.columns.values[feature_list_index]) + return feature_name + +select_from_model(data) + -# Your solution code here diff --git a/q04_select_from_model/tests/__pycache__/__init__.cpython-36.pyc b/q04_select_from_model/tests/__pycache__/__init__.cpython-36.pyc new file mode 100644 index 0000000..df76c1f Binary files /dev/null and b/q04_select_from_model/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q04_select_from_model/tests/__pycache__/test_q04_select_from_model.cpython-36.pyc b/q04_select_from_model/tests/__pycache__/test_q04_select_from_model.cpython-36.pyc new file mode 100644 index 0000000..99ecb29 Binary files /dev/null and b/q04_select_from_model/tests/__pycache__/test_q04_select_from_model.cpython-36.pyc differ diff --git a/q05_forward_selected/__pycache__/__init__.cpython-36.pyc b/q05_forward_selected/__pycache__/__init__.cpython-36.pyc new file mode 100644 index 0000000..686f693 Binary files /dev/null and b/q05_forward_selected/__pycache__/__init__.cpython-36.pyc differ diff --git a/q05_forward_selected/__pycache__/build.cpython-36.pyc b/q05_forward_selected/__pycache__/build.cpython-36.pyc new file mode 100644 index 0000000..3c6b299 Binary files /dev/null and b/q05_forward_selected/__pycache__/build.cpython-36.pyc differ diff --git a/q05_forward_selected/build.py b/q05_forward_selected/build.py index 8816abd..e829ec1 100644 --- a/q05_forward_selected/build.py +++ b/q05_forward_selected/build.py @@ -1,10 +1,48 @@ +# %load q05_forward_selected/build.py # Default imports import pandas as pd from sklearn.linear_model import LinearRegression - +import sklearn.metrics data = pd.read_csv('data/house_prices_multivariate.csv') model = LinearRegression() +#print(data.drop(remov,axis=1 )) +def forward_selected(data,model): + remov = ['SalePrice'] + ffit=[] + Variable_1=[] + Variable_2=[] + fVariable_1=[] + fVariable_2=[] + r2_scoref =0 + while len(remov) != len(data.drop('SalePrice',axis=1 ).columns.values ): + if len(Variable_1) > 0 : + remov.append(Variable) + ffit.append(Variable) + + X = data.drop(remov,axis=1 ) + y_true = data['SalePrice'] + + + for fet in X.columns.values : + ffit.append(fet) + + X_True =data[ffit] + model.fit(X_True,y_true) + + y_pred = model.predict(X_True) + r2_score = sklearn.metrics.r2_score(y_true, y_pred) + if r2_score > r2_scoref : + Variable = str(fet) + Variable_1.append(fet) + Variable_2.append(r2_score) + r2_scoref = r2_score + ffit.remove(fet) + + fVariable_1.append(Variable) + fVariable_2.append(r2_scoref) + return fVariable_1 , fVariable_2 + + -# Your solution code here diff --git a/q05_forward_selected/tests/__pycache__/__init__.cpython-36.pyc b/q05_forward_selected/tests/__pycache__/__init__.cpython-36.pyc new file mode 100644 index 0000000..f431fc7 Binary files /dev/null and b/q05_forward_selected/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q05_forward_selected/tests/__pycache__/test_q05_forward_selected.cpython-36.pyc b/q05_forward_selected/tests/__pycache__/test_q05_forward_selected.cpython-36.pyc new file mode 100644 index 0000000..f891af4 Binary files /dev/null and b/q05_forward_selected/tests/__pycache__/test_q05_forward_selected.cpython-36.pyc differ