diff --git a/__pycache__/__init__.cpython-36.pyc b/__pycache__/__init__.cpython-36.pyc index abc397a..82957ce 100644 Binary files a/__pycache__/__init__.cpython-36.pyc and b/__pycache__/__init__.cpython-36.pyc differ diff --git a/q01_plot_corr/__pycache__/__init__.cpython-36.pyc b/q01_plot_corr/__pycache__/__init__.cpython-36.pyc index 460f88a..1a9746d 100644 Binary files a/q01_plot_corr/__pycache__/__init__.cpython-36.pyc and b/q01_plot_corr/__pycache__/__init__.cpython-36.pyc differ diff --git a/q01_plot_corr/__pycache__/build.cpython-36.pyc b/q01_plot_corr/__pycache__/build.cpython-36.pyc index f4059a3..fec97cc 100644 Binary files a/q01_plot_corr/__pycache__/build.cpython-36.pyc and b/q01_plot_corr/__pycache__/build.cpython-36.pyc differ diff --git a/q01_plot_corr/build.py b/q01_plot_corr/build.py index edc724a..c81da1d 100644 --- a/q01_plot_corr/build.py +++ b/q01_plot_corr/build.py @@ -1,16 +1,21 @@ +# %load q01_plot_corr/build.py # Default imports import pandas as pd +import matplotlib.pyplot as plt from matplotlib.pyplot import yticks, xticks, subplots, set_cmap plt.switch_backend('agg') +import seaborn as sns data = pd.read_csv('data/house_prices_multivariate.csv') # Write your solution here: def plot_corr(data, size=11): - corr = data.corr() - fig, ax = subplots(figsize=(size, size)) - set_cmap("YlOrRd") - ax.matshow(corr) - xticks(range(len(corr.columns)), corr.columns, rotation=90) - yticks(range(len(corr.columns)), corr.columns) - return ax + sns.heatmap(data.corr(), cmap='YlOrRd') + plt.show() +plot_corr(data, size=11) +sns.heatmap(data.corr(), cmap='YlOrRd') +plt.show() +data.head() +data.dtypes + + diff --git a/q01_plot_corr/tests/__pycache__/__init__.cpython-36.pyc b/q01_plot_corr/tests/__pycache__/__init__.cpython-36.pyc index c4bc30d..0441c58 100644 Binary files a/q01_plot_corr/tests/__pycache__/__init__.cpython-36.pyc and b/q01_plot_corr/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q01_plot_corr/tests/__pycache__/test_q01_plot_corr.cpython-36.pyc b/q01_plot_corr/tests/__pycache__/test_q01_plot_corr.cpython-36.pyc index 40d2b70..b4de5fd 100644 Binary files a/q01_plot_corr/tests/__pycache__/test_q01_plot_corr.cpython-36.pyc and b/q01_plot_corr/tests/__pycache__/test_q01_plot_corr.cpython-36.pyc differ diff --git a/q02_best_k_features/__pycache__/__init__.cpython-36.pyc b/q02_best_k_features/__pycache__/__init__.cpython-36.pyc index 43047f0..00c66bf 100644 Binary files a/q02_best_k_features/__pycache__/__init__.cpython-36.pyc and b/q02_best_k_features/__pycache__/__init__.cpython-36.pyc differ diff --git a/q02_best_k_features/__pycache__/build.cpython-36.pyc b/q02_best_k_features/__pycache__/build.cpython-36.pyc index 8372777..f750ea7 100644 Binary files a/q02_best_k_features/__pycache__/build.cpython-36.pyc and b/q02_best_k_features/__pycache__/build.cpython-36.pyc differ diff --git a/q02_best_k_features/build.py b/q02_best_k_features/build.py index 9b1046a..e37e092 100644 --- a/q02_best_k_features/build.py +++ b/q02_best_k_features/build.py @@ -1,3 +1,4 @@ +# %load q02_best_k_features/build.py # Default imports import pandas as pd @@ -9,4 +10,30 @@ # Write your solution here: +def percentile_k_features(df,k=20): + X=df.iloc[:,:-1] + y=df.iloc[:,-1] + sp = SelectPercentile(f_regression,percentile=k) + sp.fit_transform(X,y) + features = X.columns.values[sp.get_support()] + scores = sp.scores_[sp.get_support()] + fs_score = list(zip(features,scores)) + df = pd.DataFrame(fs_score,columns=['Name','Score']) + return df.sort_values(['Score','Name'],ascending = [False,True])['Name'].tolist() + + + +X=data.iloc[:,:-1] +y=data.iloc[:,-1] +sp = SelectPercentile(f_regression,percentile=20) +sp.fit_transform(X,y) + +features = X.columns.values[sp.get_support()] +scores = sp.scores_[sp.get_support()] +scores +fs_score = list(zip(features,scores)) +df = pd.DataFrame(fs_score,columns=['Name','Score']) +df.head() +df.sort_values(['Score','Name'],ascending = [False,True])#['Name'].tolist() + diff --git a/q02_best_k_features/tests/__pycache__/__init__.cpython-36.pyc b/q02_best_k_features/tests/__pycache__/__init__.cpython-36.pyc index 86a25cf..3f0afcc 100644 Binary files a/q02_best_k_features/tests/__pycache__/__init__.cpython-36.pyc and b/q02_best_k_features/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q02_best_k_features/tests/__pycache__/test_q02_percentile_k_features.cpython-36.pyc b/q02_best_k_features/tests/__pycache__/test_q02_percentile_k_features.cpython-36.pyc new file mode 100644 index 0000000..3b9f0c9 Binary files /dev/null and b/q02_best_k_features/tests/__pycache__/test_q02_percentile_k_features.cpython-36.pyc differ diff --git a/q03_rf_rfe/__pycache__/__init__.cpython-36.pyc b/q03_rf_rfe/__pycache__/__init__.cpython-36.pyc new file mode 100644 index 0000000..2d7f92c Binary files /dev/null and b/q03_rf_rfe/__pycache__/__init__.cpython-36.pyc differ diff --git a/q03_rf_rfe/__pycache__/build.cpython-36.pyc b/q03_rf_rfe/__pycache__/build.cpython-36.pyc new file mode 100644 index 0000000..fc94651 Binary files /dev/null and b/q03_rf_rfe/__pycache__/build.cpython-36.pyc differ diff --git a/q03_rf_rfe/build.py b/q03_rf_rfe/build.py index e8a8d20..9232ede 100644 --- a/q03_rf_rfe/build.py +++ b/q03_rf_rfe/build.py @@ -1,3 +1,4 @@ +# %load q03_rf_rfe/build.py # Default imports import pandas as pd @@ -9,3 +10,14 @@ # Your solution code here +def rf_rfe(df): + X,y = df.iloc[:,:-1],df.iloc[:,-1] + model = RandomForestClassifier() + features_no = X.columns + rfe = RFE(model,len(X.columns)/2) + rfe.fit(X,y) + return X.columns.values[rfe.get_support()].tolist() +rf_rfe(data) +data.shape + + diff --git a/q03_rf_rfe/tests/__pycache__/__init__.cpython-36.pyc b/q03_rf_rfe/tests/__pycache__/__init__.cpython-36.pyc new file mode 100644 index 0000000..24804d1 Binary files /dev/null and b/q03_rf_rfe/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q03_rf_rfe/tests/__pycache__/test_q03_rf_rfe.cpython-36.pyc b/q03_rf_rfe/tests/__pycache__/test_q03_rf_rfe.cpython-36.pyc new file mode 100644 index 0000000..4836394 Binary files /dev/null and b/q03_rf_rfe/tests/__pycache__/test_q03_rf_rfe.cpython-36.pyc differ diff --git a/q04_select_from_model/__pycache__/__init__.cpython-36.pyc b/q04_select_from_model/__pycache__/__init__.cpython-36.pyc new file mode 100644 index 0000000..c849386 Binary files /dev/null and b/q04_select_from_model/__pycache__/__init__.cpython-36.pyc differ diff --git a/q04_select_from_model/__pycache__/build.cpython-36.pyc b/q04_select_from_model/__pycache__/build.cpython-36.pyc new file mode 100644 index 0000000..0ac5d46 Binary files /dev/null and b/q04_select_from_model/__pycache__/build.cpython-36.pyc differ diff --git a/q04_select_from_model/build.py b/q04_select_from_model/build.py index 12dd1df..fcca888 100644 --- a/q04_select_from_model/build.py +++ b/q04_select_from_model/build.py @@ -1,3 +1,4 @@ +# %load q04_select_from_model/build.py # Default imports from sklearn.feature_selection import SelectFromModel from sklearn.ensemble import RandomForestClassifier @@ -8,3 +9,11 @@ # Your solution code here +def select_from_model(df): + X,y = df.iloc[:,:-1],df.iloc[:,-1] + clf = RandomForestClassifier(random_state=9) + model = SelectFromModel(clf) + model.fit_transform(X,y) + return X.columns.values[model.get_support()].tolist() + + diff --git a/q04_select_from_model/tests/__pycache__/__init__.cpython-36.pyc b/q04_select_from_model/tests/__pycache__/__init__.cpython-36.pyc new file mode 100644 index 0000000..deff3cf Binary files /dev/null and b/q04_select_from_model/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q04_select_from_model/tests/__pycache__/test_q04_select_from_model.cpython-36.pyc b/q04_select_from_model/tests/__pycache__/test_q04_select_from_model.cpython-36.pyc new file mode 100644 index 0000000..982d54a Binary files /dev/null and b/q04_select_from_model/tests/__pycache__/test_q04_select_from_model.cpython-36.pyc differ