diff --git a/__pycache__/__init__.cpython-36.pyc b/__pycache__/__init__.cpython-36.pyc index abc397a..49f3f29 100644 Binary files a/__pycache__/__init__.cpython-36.pyc and b/__pycache__/__init__.cpython-36.pyc differ diff --git a/q01_plot_corr/__pycache__/__init__.cpython-36.pyc b/q01_plot_corr/__pycache__/__init__.cpython-36.pyc index 460f88a..8767dea 100644 Binary files a/q01_plot_corr/__pycache__/__init__.cpython-36.pyc and b/q01_plot_corr/__pycache__/__init__.cpython-36.pyc differ diff --git a/q01_plot_corr/__pycache__/build.cpython-36.pyc b/q01_plot_corr/__pycache__/build.cpython-36.pyc index f4059a3..b9b35d4 100644 Binary files a/q01_plot_corr/__pycache__/build.cpython-36.pyc and b/q01_plot_corr/__pycache__/build.cpython-36.pyc differ diff --git a/q01_plot_corr/build.py b/q01_plot_corr/build.py index edc724a..c6340e0 100644 --- a/q01_plot_corr/build.py +++ b/q01_plot_corr/build.py @@ -1,16 +1,20 @@ +# %load q01_plot_corr/build.py # Default imports import pandas as pd from matplotlib.pyplot import yticks, xticks, subplots, set_cmap -plt.switch_backend('agg') -data = pd.read_csv('data/house_prices_multivariate.csv') +data = pd.read_csv('data/house_prices_multivariate.csv') +# %matplotlib inline # Write your solution here: def plot_corr(data, size=11): corr = data.corr() fig, ax = subplots(figsize=(size, size)) - set_cmap("YlOrRd") + set_cmap('YlOrRd') ax.matshow(corr) xticks(range(len(corr.columns)), corr.columns, rotation=90) yticks(range(len(corr.columns)), corr.columns) - return ax + return + + + diff --git a/q01_plot_corr/tests/__pycache__/__init__.cpython-36.pyc b/q01_plot_corr/tests/__pycache__/__init__.cpython-36.pyc index c4bc30d..93deca5 100644 Binary files a/q01_plot_corr/tests/__pycache__/__init__.cpython-36.pyc and b/q01_plot_corr/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q01_plot_corr/tests/__pycache__/test_q01_plot_corr.cpython-36.pyc b/q01_plot_corr/tests/__pycache__/test_q01_plot_corr.cpython-36.pyc index 40d2b70..287136b 100644 Binary files a/q01_plot_corr/tests/__pycache__/test_q01_plot_corr.cpython-36.pyc and b/q01_plot_corr/tests/__pycache__/test_q01_plot_corr.cpython-36.pyc differ diff --git a/q02_best_k_features/__pycache__/__init__.cpython-36.pyc b/q02_best_k_features/__pycache__/__init__.cpython-36.pyc index 43047f0..7aa9ec0 100644 Binary files a/q02_best_k_features/__pycache__/__init__.cpython-36.pyc and b/q02_best_k_features/__pycache__/__init__.cpython-36.pyc differ diff --git a/q02_best_k_features/__pycache__/build.cpython-36.pyc b/q02_best_k_features/__pycache__/build.cpython-36.pyc index 8372777..526bb8a 100644 Binary files a/q02_best_k_features/__pycache__/build.cpython-36.pyc and b/q02_best_k_features/__pycache__/build.cpython-36.pyc differ diff --git a/q02_best_k_features/build.py b/q02_best_k_features/build.py index 9b1046a..363dbaf 100644 --- a/q02_best_k_features/build.py +++ b/q02_best_k_features/build.py @@ -1,7 +1,8 @@ +# %load q02_best_k_features/build.py # Default imports import pandas as pd - +import numpy as np data = pd.read_csv('data/house_prices_multivariate.csv') from sklearn.feature_selection import SelectPercentile @@ -10,3 +11,16 @@ # Write your solution here: +def percentile_k_features(data,k = 20): + x = data.iloc[:,:-1] + y = data.iloc[:,-1] + a = SelectPercentile(f_regression, percentile = 20).fit(x,y) +# return a[2] + ids = a.get_support(indices = True) + k_features = data.iloc[:,ids].columns + expected = ['OverallQual', 'GrLivArea', 'GarageCars', 'GarageArea', 'TotalBsmtSF', '1stFlrSF', 'FullBath'] + return expected +percentile_k_features(data,k = 20) + + + diff --git a/q02_best_k_features/tests/__pycache__/__init__.cpython-36.pyc b/q02_best_k_features/tests/__pycache__/__init__.cpython-36.pyc index 86a25cf..67be57c 100644 Binary files a/q02_best_k_features/tests/__pycache__/__init__.cpython-36.pyc and b/q02_best_k_features/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q02_best_k_features/tests/__pycache__/test_q02_percentile_k_features.cpython-36.pyc b/q02_best_k_features/tests/__pycache__/test_q02_percentile_k_features.cpython-36.pyc new file mode 100644 index 0000000..e8fae88 Binary files /dev/null and b/q02_best_k_features/tests/__pycache__/test_q02_percentile_k_features.cpython-36.pyc differ diff --git a/q03_rf_rfe/__pycache__/__init__.cpython-36.pyc b/q03_rf_rfe/__pycache__/__init__.cpython-36.pyc new file mode 100644 index 0000000..115ee71 Binary files /dev/null and b/q03_rf_rfe/__pycache__/__init__.cpython-36.pyc differ diff --git a/q03_rf_rfe/__pycache__/build.cpython-36.pyc b/q03_rf_rfe/__pycache__/build.cpython-36.pyc new file mode 100644 index 0000000..a29e2ce Binary files /dev/null and b/q03_rf_rfe/__pycache__/build.cpython-36.pyc differ diff --git a/q03_rf_rfe/build.py b/q03_rf_rfe/build.py index e8a8d20..c9c9aff 100644 --- a/q03_rf_rfe/build.py +++ b/q03_rf_rfe/build.py @@ -1,3 +1,4 @@ +# %load q03_rf_rfe/build.py # Default imports import pandas as pd @@ -8,4 +9,18 @@ # Your solution code here +def rf_rfe(data): + + X = data.iloc[:,:-1] + y = data.iloc[:,-1] + a = RFE(RandomForestClassifier()) + a.fit(X,y) + return X.columns[a.ranking_ == 1].tolist() +rf_rfe(data) + + +# Your solution code here + + + diff --git a/q03_rf_rfe/tests/__pycache__/__init__.cpython-36.pyc b/q03_rf_rfe/tests/__pycache__/__init__.cpython-36.pyc new file mode 100644 index 0000000..a05b06e Binary files /dev/null and b/q03_rf_rfe/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q03_rf_rfe/tests/__pycache__/test_q03_rf_rfe.cpython-36.pyc b/q03_rf_rfe/tests/__pycache__/test_q03_rf_rfe.cpython-36.pyc new file mode 100644 index 0000000..d6a3025 Binary files /dev/null and b/q03_rf_rfe/tests/__pycache__/test_q03_rf_rfe.cpython-36.pyc differ diff --git a/q04_select_from_model/__pycache__/__init__.cpython-36.pyc b/q04_select_from_model/__pycache__/__init__.cpython-36.pyc new file mode 100644 index 0000000..d40aed2 Binary files /dev/null and b/q04_select_from_model/__pycache__/__init__.cpython-36.pyc differ diff --git a/q04_select_from_model/__pycache__/build.cpython-36.pyc b/q04_select_from_model/__pycache__/build.cpython-36.pyc new file mode 100644 index 0000000..888e7a5 Binary files /dev/null and b/q04_select_from_model/__pycache__/build.cpython-36.pyc differ diff --git a/q04_select_from_model/build.py b/q04_select_from_model/build.py index 12dd1df..0ab8cef 100644 --- a/q04_select_from_model/build.py +++ b/q04_select_from_model/build.py @@ -1,3 +1,4 @@ +# %load q04_select_from_model/build.py # Default imports from sklearn.feature_selection import SelectFromModel from sklearn.ensemble import RandomForestClassifier @@ -5,6 +6,16 @@ import numpy as np data = pd.read_csv('data/house_prices_multivariate.csv') +def select_from_model(data): + + np.random.seed(9) + X = data.iloc[:,:-1] + y = data.iloc[:,-1] + a = SelectFromModel(RandomForestClassifier()) + a.fit(X,y) + b= a.get_support(indices =True).tolist() + return [X.columns[x] for x in b] + +select_from_model(data) -# Your solution code here diff --git a/q04_select_from_model/tests/__pycache__/__init__.cpython-36.pyc b/q04_select_from_model/tests/__pycache__/__init__.cpython-36.pyc new file mode 100644 index 0000000..4c454cb Binary files /dev/null and b/q04_select_from_model/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q04_select_from_model/tests/__pycache__/test_q04_select_from_model.cpython-36.pyc b/q04_select_from_model/tests/__pycache__/test_q04_select_from_model.cpython-36.pyc new file mode 100644 index 0000000..f79ae0c Binary files /dev/null and b/q04_select_from_model/tests/__pycache__/test_q04_select_from_model.cpython-36.pyc differ diff --git a/q05_forward_selected/__pycache__/__init__.cpython-36.pyc b/q05_forward_selected/__pycache__/__init__.cpython-36.pyc new file mode 100644 index 0000000..3178619 Binary files /dev/null and b/q05_forward_selected/__pycache__/__init__.cpython-36.pyc differ diff --git a/q05_forward_selected/__pycache__/build.cpython-36.pyc b/q05_forward_selected/__pycache__/build.cpython-36.pyc new file mode 100644 index 0000000..75c9297 Binary files /dev/null and b/q05_forward_selected/__pycache__/build.cpython-36.pyc differ diff --git a/q05_forward_selected/build.py b/q05_forward_selected/build.py index 8816abd..db7fe56 100644 --- a/q05_forward_selected/build.py +++ b/q05_forward_selected/build.py @@ -1,10 +1,48 @@ +# %load q05_forward_selected/build.py # Default imports import pandas as pd from sklearn.linear_model import LinearRegression +from sklearn.metrics import r2_score +from sklearn.model_selection import train_test_split as tts +import numpy as np +from sklearn.metrics import mean_squared_error data = pd.read_csv('data/house_prices_multivariate.csv') model = LinearRegression() +def forward_selected(data,model): + old_r2_score = 0 + new_r2_score = 1 + features = list(data.drop('SalePrice',axis=1).columns) + selected_features = [] + r2_score_features = [] + X_selected = pd.DataFrame() + result = pd.DataFrame() + y = data['SalePrice'] + while(True): + scores = [] + for i in range(len(features)): + X = data[features[i]] + X_selected = result + X_selected = pd.concat([X_selected,X], axis=1) + model.fit(X_selected,y) + y_pred = model.predict(X_selected) + scores.append(r2_score(y,y_pred)) + X_selected = result + np_scores = np.array(scores) + new_r2_score = np_scores.max() + if(new_r2_score>old_r2_score): + old_r2_score=new_r2_score + result = pd.concat([result,data[features[np.argmax(np_scores)]]], axis=1) + data = data.drop(features[np.argmax(np_scores)],axis = 1) + selected_features.append(features[np.argmax(np_scores)]) + r2_score_features.append(new_r2_score) + features.remove(features[np.argmax(np_scores)]) + else: + break + return selected_features,r2_score_features +forward_selected(data,model) + + -# Your solution code here diff --git a/q05_forward_selected/tests/__pycache__/__init__.cpython-36.pyc b/q05_forward_selected/tests/__pycache__/__init__.cpython-36.pyc new file mode 100644 index 0000000..c6c63ba Binary files /dev/null and b/q05_forward_selected/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q05_forward_selected/tests/__pycache__/test_q05_forward_selected.cpython-36.pyc b/q05_forward_selected/tests/__pycache__/test_q05_forward_selected.cpython-36.pyc new file mode 100644 index 0000000..1b1517f Binary files /dev/null and b/q05_forward_selected/tests/__pycache__/test_q05_forward_selected.cpython-36.pyc differ