diff --git a/__pycache__/__init__.cpython-36.pyc b/__pycache__/__init__.cpython-36.pyc index abc397a..f4c55df 100644 Binary files a/__pycache__/__init__.cpython-36.pyc and b/__pycache__/__init__.cpython-36.pyc differ diff --git a/q01_plot_corr/__pycache__/__init__.cpython-36.pyc b/q01_plot_corr/__pycache__/__init__.cpython-36.pyc index 460f88a..d41cc22 100644 Binary files a/q01_plot_corr/__pycache__/__init__.cpython-36.pyc and b/q01_plot_corr/__pycache__/__init__.cpython-36.pyc differ diff --git a/q01_plot_corr/__pycache__/build.cpython-36.pyc b/q01_plot_corr/__pycache__/build.cpython-36.pyc index f4059a3..ecd2644 100644 Binary files a/q01_plot_corr/__pycache__/build.cpython-36.pyc and b/q01_plot_corr/__pycache__/build.cpython-36.pyc differ diff --git a/q01_plot_corr/build.py b/q01_plot_corr/build.py index edc724a..d029422 100644 --- a/q01_plot_corr/build.py +++ b/q01_plot_corr/build.py @@ -1,5 +1,7 @@ +# %load q01_plot_corr/build.py # Default imports import pandas as pd +import matplotlib.pyplot as plt from matplotlib.pyplot import yticks, xticks, subplots, set_cmap plt.switch_backend('agg') data = pd.read_csv('data/house_prices_multivariate.csv') @@ -9,8 +11,10 @@ def plot_corr(data, size=11): corr = data.corr() fig, ax = subplots(figsize=(size, size)) - set_cmap("YlOrRd") + set_cmap('YlOrRd') ax.matshow(corr) xticks(range(len(corr.columns)), corr.columns, rotation=90) yticks(range(len(corr.columns)), corr.columns) return ax + + diff --git a/q01_plot_corr/tests/__pycache__/__init__.cpython-36.pyc b/q01_plot_corr/tests/__pycache__/__init__.cpython-36.pyc index c4bc30d..5ed5baf 100644 Binary files a/q01_plot_corr/tests/__pycache__/__init__.cpython-36.pyc and b/q01_plot_corr/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q01_plot_corr/tests/__pycache__/test_q01_plot_corr.cpython-36.pyc b/q01_plot_corr/tests/__pycache__/test_q01_plot_corr.cpython-36.pyc index 40d2b70..a96d868 100644 Binary files a/q01_plot_corr/tests/__pycache__/test_q01_plot_corr.cpython-36.pyc and b/q01_plot_corr/tests/__pycache__/test_q01_plot_corr.cpython-36.pyc differ diff --git a/q02_best_k_features/__pycache__/__init__.cpython-36.pyc b/q02_best_k_features/__pycache__/__init__.cpython-36.pyc index 43047f0..3010671 100644 Binary files a/q02_best_k_features/__pycache__/__init__.cpython-36.pyc and b/q02_best_k_features/__pycache__/__init__.cpython-36.pyc differ diff --git a/q02_best_k_features/__pycache__/build.cpython-36.pyc b/q02_best_k_features/__pycache__/build.cpython-36.pyc index 8372777..3ec9c3b 100644 Binary files a/q02_best_k_features/__pycache__/build.cpython-36.pyc and b/q02_best_k_features/__pycache__/build.cpython-36.pyc differ diff --git a/q02_best_k_features/build.py b/q02_best_k_features/build.py index 9b1046a..872eb64 100644 --- a/q02_best_k_features/build.py +++ b/q02_best_k_features/build.py @@ -1,12 +1,29 @@ +# %load q02_best_k_features/build.py # Default imports import pandas as pd +import numpy as np +from sklearn.feature_selection import SelectPercentile,f_regression data = pd.read_csv('data/house_prices_multivariate.csv') -from sklearn.feature_selection import SelectPercentile -from sklearn.feature_selection import f_regression +# Write your solution here: +def percentile_k_features(data, k = 20): + X = data.drop('SalePrice',axis=1) + y = data['SalePrice'] + + feat_col = X.columns + fs = SelectPercentile(f_regression, percentile=k) + + X_new = fs.fit_transform(X, y) + + imp_features_kth_percentile = [feat_col[i] for i in np.argsort(fs.scores_)[::-1]] + + #print (imp_features_kth_percentile[:7]) + + return imp_features_kth_percentile[:7] + +percentile_k_features(data,20) -# Write your solution here: diff --git a/q02_best_k_features/tests/__pycache__/__init__.cpython-36.pyc b/q02_best_k_features/tests/__pycache__/__init__.cpython-36.pyc index 86a25cf..55a006e 100644 Binary files a/q02_best_k_features/tests/__pycache__/__init__.cpython-36.pyc and b/q02_best_k_features/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q02_best_k_features/tests/__pycache__/test_q02_percentile_k_features.cpython-36.pyc b/q02_best_k_features/tests/__pycache__/test_q02_percentile_k_features.cpython-36.pyc new file mode 100644 index 0000000..bb7ff74 Binary files /dev/null and b/q02_best_k_features/tests/__pycache__/test_q02_percentile_k_features.cpython-36.pyc differ diff --git a/q03_rf_rfe/__pycache__/__init__.cpython-36.pyc b/q03_rf_rfe/__pycache__/__init__.cpython-36.pyc new file mode 100644 index 0000000..2fcacc9 Binary files /dev/null and b/q03_rf_rfe/__pycache__/__init__.cpython-36.pyc differ diff --git a/q03_rf_rfe/__pycache__/build.cpython-36.pyc b/q03_rf_rfe/__pycache__/build.cpython-36.pyc new file mode 100644 index 0000000..b785a0e Binary files /dev/null and b/q03_rf_rfe/__pycache__/build.cpython-36.pyc differ diff --git a/q03_rf_rfe/build.py b/q03_rf_rfe/build.py index e8a8d20..c9856f8 100644 --- a/q03_rf_rfe/build.py +++ b/q03_rf_rfe/build.py @@ -1,3 +1,4 @@ +# %load q03_rf_rfe/build.py # Default imports import pandas as pd @@ -6,6 +7,21 @@ from sklearn.feature_selection import RFE from sklearn.ensemble import RandomForestClassifier - # Your solution code here +def rf_rfe(data): + X = data.drop('SalePrice',axis=1) + y = data['SalePrice'] + random_forest_model = RandomForestClassifier() + + rfe = RFE(random_forest_model,n_features_to_select=len(X.columns)/2) + rfe = rfe.fit(X,y) + + #print (list(X.columns[rfe.support_])) + + return list(X.columns[rfe.support_]) + +rf_rfe(data) + + + diff --git a/q03_rf_rfe/tests/__pycache__/__init__.cpython-36.pyc b/q03_rf_rfe/tests/__pycache__/__init__.cpython-36.pyc new file mode 100644 index 0000000..8f99804 Binary files /dev/null and b/q03_rf_rfe/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q03_rf_rfe/tests/__pycache__/test_q03_rf_rfe.cpython-36.pyc b/q03_rf_rfe/tests/__pycache__/test_q03_rf_rfe.cpython-36.pyc new file mode 100644 index 0000000..eb62a7a Binary files /dev/null and b/q03_rf_rfe/tests/__pycache__/test_q03_rf_rfe.cpython-36.pyc differ