diff --git a/__pycache__/__init__.cpython-36.pyc b/__pycache__/__init__.cpython-36.pyc index b0c3719..2c23b20 100644 Binary files a/__pycache__/__init__.cpython-36.pyc and b/__pycache__/__init__.cpython-36.pyc differ diff --git a/q01_load_data/__pycache__/__init__.cpython-36.pyc b/q01_load_data/__pycache__/__init__.cpython-36.pyc index 4596200..10a7590 100644 Binary files a/q01_load_data/__pycache__/__init__.cpython-36.pyc and b/q01_load_data/__pycache__/__init__.cpython-36.pyc differ diff --git a/q01_load_data/__pycache__/build.cpython-36.pyc b/q01_load_data/__pycache__/build.cpython-36.pyc index 98e98a7..0d87570 100644 Binary files a/q01_load_data/__pycache__/build.cpython-36.pyc and b/q01_load_data/__pycache__/build.cpython-36.pyc differ diff --git a/q01_load_data/build.py b/q01_load_data/build.py index 7cd3700..e0f25d7 100644 --- a/q01_load_data/build.py +++ b/q01_load_data/build.py @@ -1,4 +1,10 @@ +# %load q01_load_data/build.py import pandas as pd # Write your code below - +def load_data(path): + return pd.read_csv(path,sep=';') + + + + diff --git a/q01_load_data/tests/__pycache__/__init__.cpython-36.pyc b/q01_load_data/tests/__pycache__/__init__.cpython-36.pyc index d07fd2f..2232223 100644 Binary files a/q01_load_data/tests/__pycache__/__init__.cpython-36.pyc and b/q01_load_data/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q01_load_data/tests/__pycache__/test.cpython-36.pyc b/q01_load_data/tests/__pycache__/test.cpython-36.pyc index 9aa6996..ad699ee 100644 Binary files a/q01_load_data/tests/__pycache__/test.cpython-36.pyc and b/q01_load_data/tests/__pycache__/test.cpython-36.pyc differ diff --git a/q02_data_split/__pycache__/__init__.cpython-36.pyc b/q02_data_split/__pycache__/__init__.cpython-36.pyc index 5d17273..33897c5 100644 Binary files a/q02_data_split/__pycache__/__init__.cpython-36.pyc and b/q02_data_split/__pycache__/__init__.cpython-36.pyc differ diff --git a/q02_data_split/__pycache__/build.cpython-36.pyc b/q02_data_split/__pycache__/build.cpython-36.pyc index e6bd2eb..7ad3e50 100644 Binary files a/q02_data_split/__pycache__/build.cpython-36.pyc and b/q02_data_split/__pycache__/build.cpython-36.pyc differ diff --git a/q02_data_split/build.py b/q02_data_split/build.py index c2e7147..1eb77f7 100644 --- a/q02_data_split/build.py +++ b/q02_data_split/build.py @@ -1,8 +1,19 @@ +# %load q02_data_split/build.py from greyatomlib.multivariate_regression_project.q01_load_data.build import load_data from sklearn.model_selection import train_test_split import pandas as pd df = load_data('data/student-mat.csv') # Write your code below - - +def split_dataset(df): + x_train,x_test,y_train,y_test = train_test_split(df.loc[:, 'school':'G2'],df.loc[:,'G3'],test_size=0.2) + return x_train,x_test,y_train,y_test + + + + + + + + + diff --git a/q02_data_split/tests/__pycache__/__init__.cpython-36.pyc b/q02_data_split/tests/__pycache__/__init__.cpython-36.pyc index e780e63..e4228be 100644 Binary files a/q02_data_split/tests/__pycache__/__init__.cpython-36.pyc and b/q02_data_split/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q02_data_split/tests/__pycache__/test.cpython-36.pyc b/q02_data_split/tests/__pycache__/test.cpython-36.pyc index a1b3fc5..196b2a4 100644 Binary files a/q02_data_split/tests/__pycache__/test.cpython-36.pyc and b/q02_data_split/tests/__pycache__/test.cpython-36.pyc differ diff --git a/q03_data_encoding/__pycache__/__init__.cpython-36.pyc b/q03_data_encoding/__pycache__/__init__.cpython-36.pyc index 884722b..c9c3833 100644 Binary files a/q03_data_encoding/__pycache__/__init__.cpython-36.pyc and b/q03_data_encoding/__pycache__/__init__.cpython-36.pyc differ diff --git a/q03_data_encoding/__pycache__/build.cpython-36.pyc b/q03_data_encoding/__pycache__/build.cpython-36.pyc index 302366c..52fa324 100644 Binary files a/q03_data_encoding/__pycache__/build.cpython-36.pyc and b/q03_data_encoding/__pycache__/build.cpython-36.pyc differ diff --git a/q03_data_encoding/build.py b/q03_data_encoding/build.py index bb4c8ca..259830c 100644 --- a/q03_data_encoding/build.py +++ b/q03_data_encoding/build.py @@ -1,3 +1,4 @@ +# %load q03_data_encoding/build.py from greyatomlib.multivariate_regression_project.q01_load_data.build import load_data from greyatomlib.multivariate_regression_project.q02_data_split.build import split_dataset from sklearn.preprocessing import LabelEncoder @@ -8,7 +9,18 @@ x_train, x_test, y_train, y_test = split_dataset(df) # Write your code below - - +def label_encode(x,x_test): + label_enc = LabelEncoder() + list_of_cats = list(x.select_dtypes(['object']).columns) + for i in range(len(list_of_cats)): + x[list_of_cats[i]] = label_enc.fit_transform(x.loc[:,list_of_cats[i]]) + for i in range(len(list_of_cats)): + x_test[list_of_cats[i]] = label_enc.fit_transform(x_test.loc[:,list_of_cats[i]]) + + return x,x_test + + + + diff --git a/q03_data_encoding/tests/__pycache__/__init__.cpython-36.pyc b/q03_data_encoding/tests/__pycache__/__init__.cpython-36.pyc index 7d18c18..bc52ece 100644 Binary files a/q03_data_encoding/tests/__pycache__/__init__.cpython-36.pyc and b/q03_data_encoding/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q03_data_encoding/tests/__pycache__/test.cpython-36.pyc b/q03_data_encoding/tests/__pycache__/test.cpython-36.pyc index 8ade2b7..d4716ad 100644 Binary files a/q03_data_encoding/tests/__pycache__/test.cpython-36.pyc and b/q03_data_encoding/tests/__pycache__/test.cpython-36.pyc differ diff --git a/q03_ohe_encoder/__pycache__/__init__.cpython-36.pyc b/q03_ohe_encoder/__pycache__/__init__.cpython-36.pyc index e4ec35b..60c3b63 100644 Binary files a/q03_ohe_encoder/__pycache__/__init__.cpython-36.pyc and b/q03_ohe_encoder/__pycache__/__init__.cpython-36.pyc differ diff --git a/q03_ohe_encoder/__pycache__/build.cpython-36.pyc b/q03_ohe_encoder/__pycache__/build.cpython-36.pyc index 1433b7b..618523d 100644 Binary files a/q03_ohe_encoder/__pycache__/build.cpython-36.pyc and b/q03_ohe_encoder/__pycache__/build.cpython-36.pyc differ diff --git a/q03_ohe_encoder/build.py b/q03_ohe_encoder/build.py index 36e4b90..d3baa15 100644 --- a/q03_ohe_encoder/build.py +++ b/q03_ohe_encoder/build.py @@ -1,6 +1,7 @@ +# %load q03_ohe_encoder/build.py from greyatomlib.multivariate_regression_project.q01_load_data.build import load_data from greyatomlib.multivariate_regression_project.q02_data_split.build import split_dataset -from sklearn.preprocessing import OneHotEncoder +from sklearn.preprocessing import OneHotEncoder,LabelEncoder import pandas as pd import numpy as np @@ -12,8 +13,14 @@ # Write your code below +def ohe_encode(X,X_test,category_index=([0, 1, 3, 4, 5, 8, 9, 10, 11, 15, 16, 17, 18, 19, 20, 21, 22])): + ohe_enc = OneHotEncoder() + label_enc = LabelEncoder() + for i in range(len(category_index)): + X.iloc[:,category_index[i]] = label_enc.fit_transform(X.iloc[:,category_index[i]]) + X_test.iloc[:,category_index[i]] = label_enc.fit_transform(X_test.iloc[:,category_index[i]]) + + return ohe_enc.fit_transform(X),ohe_enc.fit_transform(X_test) + - - - diff --git a/q03_ohe_encoder/tests/__pycache__/__init__.cpython-36.pyc b/q03_ohe_encoder/tests/__pycache__/__init__.cpython-36.pyc index 8c87a88..a96b7af 100644 Binary files a/q03_ohe_encoder/tests/__pycache__/__init__.cpython-36.pyc and b/q03_ohe_encoder/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q03_ohe_encoder/tests/__pycache__/test.cpython-36.pyc b/q03_ohe_encoder/tests/__pycache__/test.cpython-36.pyc index 1956a19..ad5a9fd 100644 Binary files a/q03_ohe_encoder/tests/__pycache__/test.cpython-36.pyc and b/q03_ohe_encoder/tests/__pycache__/test.cpython-36.pyc differ diff --git a/q04_data_visualisation/__pycache__/__init__.cpython-36.pyc b/q04_data_visualisation/__pycache__/__init__.cpython-36.pyc index d44a511..c8e1ce3 100644 Binary files a/q04_data_visualisation/__pycache__/__init__.cpython-36.pyc and b/q04_data_visualisation/__pycache__/__init__.cpython-36.pyc differ diff --git a/q04_data_visualisation/__pycache__/build.cpython-36.pyc b/q04_data_visualisation/__pycache__/build.cpython-36.pyc index 2bfbd4e..cc91298 100644 Binary files a/q04_data_visualisation/__pycache__/build.cpython-36.pyc and b/q04_data_visualisation/__pycache__/build.cpython-36.pyc differ diff --git a/q04_data_visualisation/build.py b/q04_data_visualisation/build.py index 9c15ad9..3a4c497 100644 --- a/q04_data_visualisation/build.py +++ b/q04_data_visualisation/build.py @@ -1,4 +1,4 @@ -# -*- coding: utf-8 -*- +# %load q04_data_visualisation/build.py from greyatomlib.multivariate_regression_project.q01_load_data.build import load_data from greyatomlib.multivariate_regression_project.q02_data_split.build import split_dataset from greyatomlib.multivariate_regression_project.q03_data_encoding.build import label_encode @@ -10,7 +10,9 @@ x_train,x_test = label_encode(x_train,x_test) # Write your code below - - - +def visualise_data(data,figname): + return scatter_matrix(data) + + + diff --git a/q04_data_visualisation/tests/__pycache__/__init__.cpython-36.pyc b/q04_data_visualisation/tests/__pycache__/__init__.cpython-36.pyc index 6631d03..817818b 100644 Binary files a/q04_data_visualisation/tests/__pycache__/__init__.cpython-36.pyc and b/q04_data_visualisation/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q04_data_visualisation/tests/__pycache__/test.cpython-36.pyc b/q04_data_visualisation/tests/__pycache__/test.cpython-36.pyc index 5353356..714720a 100644 Binary files a/q04_data_visualisation/tests/__pycache__/test.cpython-36.pyc and b/q04_data_visualisation/tests/__pycache__/test.cpython-36.pyc differ diff --git a/q05_linear_regression_model/__pycache__/__init__.cpython-36.pyc b/q05_linear_regression_model/__pycache__/__init__.cpython-36.pyc index 06a2a9b..31a0c50 100644 Binary files a/q05_linear_regression_model/__pycache__/__init__.cpython-36.pyc and b/q05_linear_regression_model/__pycache__/__init__.cpython-36.pyc differ diff --git a/q05_linear_regression_model/__pycache__/build.cpython-36.pyc b/q05_linear_regression_model/__pycache__/build.cpython-36.pyc index c40d112..b159fba 100644 Binary files a/q05_linear_regression_model/__pycache__/build.cpython-36.pyc and b/q05_linear_regression_model/__pycache__/build.cpython-36.pyc differ diff --git a/q05_linear_regression_model/build.py b/q05_linear_regression_model/build.py index 7a0a243..b18c5bc 100644 --- a/q05_linear_regression_model/build.py +++ b/q05_linear_regression_model/build.py @@ -1,3 +1,4 @@ +# %load q05_linear_regression_model/build.py from greyatomlib.multivariate_regression_project.q01_load_data.build import load_data from greyatomlib.multivariate_regression_project.q02_data_split.build import split_dataset @@ -12,4 +13,9 @@ # Write your code below - +def linear_regression(X,y): + lm = LinearRegression() + return lm.fit(X,y) + + + diff --git a/q05_linear_regression_model/tests/__pycache__/__init__.cpython-36.pyc b/q05_linear_regression_model/tests/__pycache__/__init__.cpython-36.pyc index 296bcce..3ff6580 100644 Binary files a/q05_linear_regression_model/tests/__pycache__/__init__.cpython-36.pyc and b/q05_linear_regression_model/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q05_linear_regression_model/tests/__pycache__/test.cpython-36.pyc b/q05_linear_regression_model/tests/__pycache__/test.cpython-36.pyc index 54551b9..9e193a0 100644 Binary files a/q05_linear_regression_model/tests/__pycache__/test.cpython-36.pyc and b/q05_linear_regression_model/tests/__pycache__/test.cpython-36.pyc differ diff --git a/q06_cross_validation/__pycache__/__init__.cpython-36.pyc b/q06_cross_validation/__pycache__/__init__.cpython-36.pyc index 9a1c3aa..eca2311 100644 Binary files a/q06_cross_validation/__pycache__/__init__.cpython-36.pyc and b/q06_cross_validation/__pycache__/__init__.cpython-36.pyc differ diff --git a/q06_cross_validation/__pycache__/build.cpython-36.pyc b/q06_cross_validation/__pycache__/build.cpython-36.pyc index 2e1c378..15248b4 100644 Binary files a/q06_cross_validation/__pycache__/build.cpython-36.pyc and b/q06_cross_validation/__pycache__/build.cpython-36.pyc differ diff --git a/q06_cross_validation/build.py b/q06_cross_validation/build.py index 406a734..44fe55d 100644 --- a/q06_cross_validation/build.py +++ b/q06_cross_validation/build.py @@ -1,3 +1,4 @@ +# %load q06_cross_validation/build.py from greyatomlib.multivariate_regression_project.q01_load_data.build import load_data from greyatomlib.multivariate_regression_project.q02_data_split.build import split_dataset @@ -18,4 +19,10 @@ model =linear_regression(x_train,y_train) # Write your code below +def cross_validation_regressor(Model,X,y): + return np.mean(cross_val_score(Model,X,y=y,scoring='r2')) + + + + diff --git a/q06_cross_validation/tests/__pycache__/__init__.cpython-36.pyc b/q06_cross_validation/tests/__pycache__/__init__.cpython-36.pyc index b571b36..3f4f7f6 100644 Binary files a/q06_cross_validation/tests/__pycache__/__init__.cpython-36.pyc and b/q06_cross_validation/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q06_cross_validation/tests/__pycache__/test.cpython-36.pyc b/q06_cross_validation/tests/__pycache__/test.cpython-36.pyc index e065247..aeaa99f 100644 Binary files a/q06_cross_validation/tests/__pycache__/test.cpython-36.pyc and b/q06_cross_validation/tests/__pycache__/test.cpython-36.pyc differ diff --git a/q07_regression_pred/__pycache__/__init__.cpython-36.pyc b/q07_regression_pred/__pycache__/__init__.cpython-36.pyc index 3e7e467..57e964b 100644 Binary files a/q07_regression_pred/__pycache__/__init__.cpython-36.pyc and b/q07_regression_pred/__pycache__/__init__.cpython-36.pyc differ diff --git a/q07_regression_pred/__pycache__/build.cpython-36.pyc b/q07_regression_pred/__pycache__/build.cpython-36.pyc index dfa0411..65f2d57 100644 Binary files a/q07_regression_pred/__pycache__/build.cpython-36.pyc and b/q07_regression_pred/__pycache__/build.cpython-36.pyc differ diff --git a/q07_regression_pred/build.py b/q07_regression_pred/build.py index 3f2eee3..93bc488 100644 --- a/q07_regression_pred/build.py +++ b/q07_regression_pred/build.py @@ -1,3 +1,4 @@ +# %load q07_regression_pred/build.py from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score @@ -20,3 +21,17 @@ # Write your code below +def regression_predictor(model,X,y): + model.fit(x_train,y_train) + predictions = model.predict(x_test) + + mse = mean_squared_error(y_test,predictions) + mae = mean_absolute_error(y_test,predictions) + r2 = r2_score(y_test,predictions) + + return predictions,mse,mae,r2 + + + + + diff --git a/q07_regression_pred/tests/__pycache__/__init__.cpython-36.pyc b/q07_regression_pred/tests/__pycache__/__init__.cpython-36.pyc index f1435e5..0f185f3 100644 Binary files a/q07_regression_pred/tests/__pycache__/__init__.cpython-36.pyc and b/q07_regression_pred/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q07_regression_pred/tests/__pycache__/test.cpython-36.pyc b/q07_regression_pred/tests/__pycache__/test.cpython-36.pyc index 203c5ff..d940c28 100644 Binary files a/q07_regression_pred/tests/__pycache__/test.cpython-36.pyc and b/q07_regression_pred/tests/__pycache__/test.cpython-36.pyc differ diff --git a/q08_linear_model/__pycache__/__init__.cpython-36.pyc b/q08_linear_model/__pycache__/__init__.cpython-36.pyc index b91b141..83c478f 100644 Binary files a/q08_linear_model/__pycache__/__init__.cpython-36.pyc and b/q08_linear_model/__pycache__/__init__.cpython-36.pyc differ diff --git a/q08_linear_model/__pycache__/build.cpython-36.pyc b/q08_linear_model/__pycache__/build.cpython-36.pyc index 438fb94..0414bdc 100644 Binary files a/q08_linear_model/__pycache__/build.cpython-36.pyc and b/q08_linear_model/__pycache__/build.cpython-36.pyc differ diff --git a/q08_linear_model/build.py b/q08_linear_model/build.py index 85d49da..178c8cc 100644 --- a/q08_linear_model/build.py +++ b/q08_linear_model/build.py @@ -1,3 +1,4 @@ +# %load q08_linear_model/build.py import pandas as pd import numpy as np from greyatomlib.multivariate_regression_project.q01_load_data.build import load_data @@ -16,5 +17,18 @@ y_pred, mse, mae, r2 = regression_predictor(model, x_test, y_test) # Write your code below - - +def linear_model(x_train,x_test,y_train,y_test): + model = linear_regression(x_train,y_train) + val = cross_validation_regressor(model,x_train,y_train) + y_pred, mse, mae, r2 = regression_predictor(model,x_test,y_test) + stats = pd.DataFrame(columns=['cross_validation', 'rmse','mae','r2']) + stats.loc[0,'cross_validation'] = val + stats.loc[0,'rmse'] = mae + stats.loc[0,'mae'] = mse + stats.loc[0,'r2'] = r2 + return model, y_pred, stats + + + + + diff --git a/q08_linear_model/tests/__pycache__/__init__.cpython-36.pyc b/q08_linear_model/tests/__pycache__/__init__.cpython-36.pyc index 5f231d2..015fa04 100644 Binary files a/q08_linear_model/tests/__pycache__/__init__.cpython-36.pyc and b/q08_linear_model/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q08_linear_model/tests/__pycache__/test.cpython-36.pyc b/q08_linear_model/tests/__pycache__/test.cpython-36.pyc index cbaeda3..53a7de2 100644 Binary files a/q08_linear_model/tests/__pycache__/test.cpython-36.pyc and b/q08_linear_model/tests/__pycache__/test.cpython-36.pyc differ diff --git a/q09_advanced_model_q01_lasso/__pycache__/__init__.cpython-36.pyc b/q09_advanced_model_q01_lasso/__pycache__/__init__.cpython-36.pyc index b8b8fc7..6fdb83c 100644 Binary files a/q09_advanced_model_q01_lasso/__pycache__/__init__.cpython-36.pyc and b/q09_advanced_model_q01_lasso/__pycache__/__init__.cpython-36.pyc differ diff --git a/q09_advanced_model_q01_lasso/__pycache__/build.cpython-36.pyc b/q09_advanced_model_q01_lasso/__pycache__/build.cpython-36.pyc index ad763a5..bbe28c8 100644 Binary files a/q09_advanced_model_q01_lasso/__pycache__/build.cpython-36.pyc and b/q09_advanced_model_q01_lasso/__pycache__/build.cpython-36.pyc differ diff --git a/q09_advanced_model_q01_lasso/build.py b/q09_advanced_model_q01_lasso/build.py index c832d59..91fe999 100644 --- a/q09_advanced_model_q01_lasso/build.py +++ b/q09_advanced_model_q01_lasso/build.py @@ -1,3 +1,4 @@ +# %load q09_advanced_model_q01_lasso/build.py from greyatomlib.multivariate_regression_project.q01_load_data.build import load_data from greyatomlib.multivariate_regression_project.q02_data_split.build import split_dataset @@ -19,5 +20,20 @@ x_train,x_test = label_encode(x_train,x_test) # Write your solution here +def lasso(x_train,x_test,y_train,y_test,alpha=0.1): + model = Lasso(alpha=0.1) + model.fit(x_train,y_train) + val = cross_validation_regressor(model,x_train,y_train) + y_pred, mse, mae, r2 = regression_predictor(model,x_test,y_test) + stats = pd.DataFrame(columns=['cross_validation', 'mae','r2','rmse']) + stats.loc[0,'cross_validation'] = val + stats.loc[0,'rmse'] = mse **(0.5) + stats.loc[0,'mae'] = mae + stats.loc[0,'r2'] = r2 + return model, y_pred, stats + + + + + - diff --git a/q09_advanced_model_q01_lasso/tests/__pycache__/__init__.cpython-36.pyc b/q09_advanced_model_q01_lasso/tests/__pycache__/__init__.cpython-36.pyc index 80296f7..7950a45 100644 Binary files a/q09_advanced_model_q01_lasso/tests/__pycache__/__init__.cpython-36.pyc and b/q09_advanced_model_q01_lasso/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q09_advanced_model_q01_lasso/tests/__pycache__/test.cpython-36.pyc b/q09_advanced_model_q01_lasso/tests/__pycache__/test.cpython-36.pyc index 3d92981..70fe1fa 100644 Binary files a/q09_advanced_model_q01_lasso/tests/__pycache__/test.cpython-36.pyc and b/q09_advanced_model_q01_lasso/tests/__pycache__/test.cpython-36.pyc differ diff --git a/q09_advanced_model_q02_ridge/__pycache__/__init__.cpython-36.pyc b/q09_advanced_model_q02_ridge/__pycache__/__init__.cpython-36.pyc index 222893d..33dfecc 100644 Binary files a/q09_advanced_model_q02_ridge/__pycache__/__init__.cpython-36.pyc and b/q09_advanced_model_q02_ridge/__pycache__/__init__.cpython-36.pyc differ diff --git a/q09_advanced_model_q02_ridge/__pycache__/build.cpython-36.pyc b/q09_advanced_model_q02_ridge/__pycache__/build.cpython-36.pyc index 29083a5..04b14b6 100644 Binary files a/q09_advanced_model_q02_ridge/__pycache__/build.cpython-36.pyc and b/q09_advanced_model_q02_ridge/__pycache__/build.cpython-36.pyc differ diff --git a/q09_advanced_model_q02_ridge/build.py b/q09_advanced_model_q02_ridge/build.py index 0fb3e1a..726637c 100644 --- a/q09_advanced_model_q02_ridge/build.py +++ b/q09_advanced_model_q02_ridge/build.py @@ -1,3 +1,4 @@ +# %load q09_advanced_model_q02_ridge/build.py from greyatomlib.multivariate_regression_project.q01_load_data.build import load_data from greyatomlib.multivariate_regression_project.q02_data_split.build import split_dataset @@ -19,7 +20,21 @@ x_train,x_test = label_encode(x_train,x_test) # Write your code below - +def ridge(x_train, x_test, y_train, y_test,alpha=0.1): + model = Ridge(alpha=alpha,normalize=True) + model.fit(x_train,y_train) + val = cross_validation_regressor(model,x_train,y_train) + y_pred, mse, mae, r2 = regression_predictor(model,x_test,y_test) + stats = pd.DataFrame(columns=['cross_validation', 'mae','r2','rmse']) + stats.loc[0,'cross_validation'] = val + stats.loc[0,'rmse'] = mse **(0.5) + stats.loc[0,'mae'] = mae + stats.loc[0,'r2'] = r2 + return model, y_pred, stats + + + + diff --git a/q09_advanced_model_q02_ridge/tests/__pycache__/__init__.cpython-36.pyc b/q09_advanced_model_q02_ridge/tests/__pycache__/__init__.cpython-36.pyc index 602e1f5..5cae124 100644 Binary files a/q09_advanced_model_q02_ridge/tests/__pycache__/__init__.cpython-36.pyc and b/q09_advanced_model_q02_ridge/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q09_advanced_model_q02_ridge/tests/__pycache__/test.cpython-36.pyc b/q09_advanced_model_q02_ridge/tests/__pycache__/test.cpython-36.pyc index 37f31c3..2ab7b43 100644 Binary files a/q09_advanced_model_q02_ridge/tests/__pycache__/test.cpython-36.pyc and b/q09_advanced_model_q02_ridge/tests/__pycache__/test.cpython-36.pyc differ diff --git a/q10_data_missing_values/__pycache__/__init__.cpython-36.pyc b/q10_data_missing_values/__pycache__/__init__.cpython-36.pyc index 9f50df2..8e5ac28 100644 Binary files a/q10_data_missing_values/__pycache__/__init__.cpython-36.pyc and b/q10_data_missing_values/__pycache__/__init__.cpython-36.pyc differ diff --git a/q10_data_missing_values/__pycache__/build.cpython-36.pyc b/q10_data_missing_values/__pycache__/build.cpython-36.pyc index 5c075f4..22bf181 100644 Binary files a/q10_data_missing_values/__pycache__/build.cpython-36.pyc and b/q10_data_missing_values/__pycache__/build.cpython-36.pyc differ diff --git a/q10_data_missing_values/build.py b/q10_data_missing_values/build.py index 582edbb..e828681 100644 --- a/q10_data_missing_values/build.py +++ b/q10_data_missing_values/build.py @@ -1,3 +1,4 @@ +# %load q10_data_missing_values/build.py from greyatomlib.multivariate_regression_project.q01_load_data.build import load_data from greyatomlib.multivariate_regression_project.q02_data_split.build import split_dataset from greyatomlib.multivariate_regression_project.q03_data_encoding.build import label_encode @@ -10,4 +11,11 @@ x_train,x_test = label_encode(x_train,x_test) # Write your code below - +def describe_df(data): + return data.describe(),x_train.apply(pd.value_counts), + + + + + + diff --git a/q10_data_missing_values/tests/__pycache__/__init__.cpython-36.pyc b/q10_data_missing_values/tests/__pycache__/__init__.cpython-36.pyc index 2fdd38b..73bae33 100644 Binary files a/q10_data_missing_values/tests/__pycache__/__init__.cpython-36.pyc and b/q10_data_missing_values/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q10_data_missing_values/tests/__pycache__/test.cpython-36.pyc b/q10_data_missing_values/tests/__pycache__/test.cpython-36.pyc index 1701926..a8947b8 100644 Binary files a/q10_data_missing_values/tests/__pycache__/test.cpython-36.pyc and b/q10_data_missing_values/tests/__pycache__/test.cpython-36.pyc differ diff --git a/q11_feature_selection_q01_plot_corr/__pycache__/__init__.cpython-36.pyc b/q11_feature_selection_q01_plot_corr/__pycache__/__init__.cpython-36.pyc index 35c8cae..3ed4c1c 100644 Binary files a/q11_feature_selection_q01_plot_corr/__pycache__/__init__.cpython-36.pyc and b/q11_feature_selection_q01_plot_corr/__pycache__/__init__.cpython-36.pyc differ diff --git a/q11_feature_selection_q01_plot_corr/__pycache__/build.cpython-36.pyc b/q11_feature_selection_q01_plot_corr/__pycache__/build.cpython-36.pyc index 35748ec..2a56525 100644 Binary files a/q11_feature_selection_q01_plot_corr/__pycache__/build.cpython-36.pyc and b/q11_feature_selection_q01_plot_corr/__pycache__/build.cpython-36.pyc differ diff --git a/q11_feature_selection_q01_plot_corr/build.py b/q11_feature_selection_q01_plot_corr/build.py index 0427922..9299097 100644 --- a/q11_feature_selection_q01_plot_corr/build.py +++ b/q11_feature_selection_q01_plot_corr/build.py @@ -1,5 +1,7 @@ +# %load q11_feature_selection_q01_plot_corr/build.py import matplotlib.pyplot as plt +import seaborn as sns from matplotlib.pyplot import yticks, xticks, subplots, set_cmap from greyatomlib.multivariate_regression_project.q01_load_data.build import load_data @@ -21,6 +23,14 @@ #Remember to concatenate training features and labels if you want to check that scatterplots which I would prefer.You are free to explore labels to labels, features to features ,etc scatterplots as you want by passing arguments #============================================================================ -#visualise_data(pd.concat([x_train,y_train],axis=1),"../images/data_image.png") +#visualise_data(pd.concat([x_train,y_train],axis=1),'../images/data_image.png') # Write your solution here: +def plot_corr(df,size=(11)): + plt.figure(figsize=size) + sns.heatmap(df.corr()) + plt.savefig('data_image.png') + plt.show() + + + diff --git a/q11_feature_selection_q01_plot_corr/tests/__pycache__/__init__.cpython-36.pyc b/q11_feature_selection_q01_plot_corr/tests/__pycache__/__init__.cpython-36.pyc index 6c1c509..5bcd8e0 100644 Binary files a/q11_feature_selection_q01_plot_corr/tests/__pycache__/__init__.cpython-36.pyc and b/q11_feature_selection_q01_plot_corr/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q11_feature_selection_q01_plot_corr/tests/__pycache__/test.cpython-36.pyc b/q11_feature_selection_q01_plot_corr/tests/__pycache__/test.cpython-36.pyc index 93b5347..5b12033 100644 Binary files a/q11_feature_selection_q01_plot_corr/tests/__pycache__/test.cpython-36.pyc and b/q11_feature_selection_q01_plot_corr/tests/__pycache__/test.cpython-36.pyc differ diff --git a/q11_feature_selection_q02_best_k_features/__pycache__/__init__.cpython-36.pyc b/q11_feature_selection_q02_best_k_features/__pycache__/__init__.cpython-36.pyc index cce1771..4348bd5 100644 Binary files a/q11_feature_selection_q02_best_k_features/__pycache__/__init__.cpython-36.pyc and b/q11_feature_selection_q02_best_k_features/__pycache__/__init__.cpython-36.pyc differ diff --git a/q11_feature_selection_q02_best_k_features/__pycache__/build.cpython-36.pyc b/q11_feature_selection_q02_best_k_features/__pycache__/build.cpython-36.pyc index b0c88c7..71ef70b 100644 Binary files a/q11_feature_selection_q02_best_k_features/__pycache__/build.cpython-36.pyc and b/q11_feature_selection_q02_best_k_features/__pycache__/build.cpython-36.pyc differ diff --git a/q11_feature_selection_q02_best_k_features/build.py b/q11_feature_selection_q02_best_k_features/build.py index 95002c5..6acadb6 100644 --- a/q11_feature_selection_q02_best_k_features/build.py +++ b/q11_feature_selection_q02_best_k_features/build.py @@ -1,3 +1,4 @@ +# %load q11_feature_selection_q02_best_k_features/build.py # Default imports from sklearn.feature_selection import SelectPercentile from sklearn.feature_selection import f_regression @@ -20,7 +21,16 @@ np.random.seed(9) # Write your code below - +def percentile_k_features(features,labels,k=50): + s_per = SelectPercentile(score_func=f_regression,percentile=k).fit(x_train,y_train) + + x_train_col_arr= np.array(x_train.columns) + return [x_train_col_arr[i] for i in np.argsort(s_per.scores_)[::-1]][0:16] + + + + + diff --git a/q11_feature_selection_q02_best_k_features/tests/__pycache__/__init__.cpython-36.pyc b/q11_feature_selection_q02_best_k_features/tests/__pycache__/__init__.cpython-36.pyc index 3a1830b..133b7c2 100644 Binary files a/q11_feature_selection_q02_best_k_features/tests/__pycache__/__init__.cpython-36.pyc and b/q11_feature_selection_q02_best_k_features/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q11_feature_selection_q02_best_k_features/tests/__pycache__/test.cpython-36.pyc b/q11_feature_selection_q02_best_k_features/tests/__pycache__/test.cpython-36.pyc index 7c11282..e009b41 100644 Binary files a/q11_feature_selection_q02_best_k_features/tests/__pycache__/test.cpython-36.pyc and b/q11_feature_selection_q02_best_k_features/tests/__pycache__/test.cpython-36.pyc differ diff --git a/q12_feature_selection/__pycache__/__init__.cpython-36.pyc b/q12_feature_selection/__pycache__/__init__.cpython-36.pyc index 886fe32..09dac98 100644 Binary files a/q12_feature_selection/__pycache__/__init__.cpython-36.pyc and b/q12_feature_selection/__pycache__/__init__.cpython-36.pyc differ diff --git a/q12_feature_selection/__pycache__/build.cpython-36.pyc b/q12_feature_selection/__pycache__/build.cpython-36.pyc index 7c97eeb..c176f7a 100644 Binary files a/q12_feature_selection/__pycache__/build.cpython-36.pyc and b/q12_feature_selection/__pycache__/build.cpython-36.pyc differ diff --git a/q12_feature_selection/build.py b/q12_feature_selection/build.py index 1bbe2b2..5092419 100644 --- a/q12_feature_selection/build.py +++ b/q12_feature_selection/build.py @@ -1,3 +1,4 @@ +# %load q12_feature_selection/build.py # import matplotlib.pyplot as plt from greyatomlib.multivariate_regression_project.q01_load_data.build import load_data from greyatomlib.multivariate_regression_project.q02_data_split.build import split_dataset @@ -16,3 +17,9 @@ x_train,x_test = label_encode(x_train,x_test) # Write your code below +def feature_selection(X,y,k=50): + return percentile_k_features(X,y,k) + + + + diff --git a/q12_feature_selection/tests/__pycache__/__init__.cpython-36.pyc b/q12_feature_selection/tests/__pycache__/__init__.cpython-36.pyc index 199811e..4e0f540 100644 Binary files a/q12_feature_selection/tests/__pycache__/__init__.cpython-36.pyc and b/q12_feature_selection/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q12_feature_selection/tests/__pycache__/test.cpython-36.pyc b/q12_feature_selection/tests/__pycache__/test.cpython-36.pyc index 3a7de81..e68bc4a 100644 Binary files a/q12_feature_selection/tests/__pycache__/test.cpython-36.pyc and b/q12_feature_selection/tests/__pycache__/test.cpython-36.pyc differ diff --git a/q13_plot_residuals/__pycache__/__init__.cpython-36.pyc b/q13_plot_residuals/__pycache__/__init__.cpython-36.pyc index 339472d..0076d09 100644 Binary files a/q13_plot_residuals/__pycache__/__init__.cpython-36.pyc and b/q13_plot_residuals/__pycache__/__init__.cpython-36.pyc differ diff --git a/q13_plot_residuals/__pycache__/build.cpython-36.pyc b/q13_plot_residuals/__pycache__/build.cpython-36.pyc index b3cfbaf..2b48b4b 100644 Binary files a/q13_plot_residuals/__pycache__/build.cpython-36.pyc and b/q13_plot_residuals/__pycache__/build.cpython-36.pyc differ diff --git a/q13_plot_residuals/build.py b/q13_plot_residuals/build.py index 9cdb3e3..5f61093 100644 --- a/q13_plot_residuals/build.py +++ b/q13_plot_residuals/build.py @@ -1,5 +1,11 @@ +# %load q13_plot_residuals/build.py import matplotlib.pyplot as plt - # Write your code below +def plot_residuals(y_test,y_pred,name): + plt.scatter(y_test,y_pred) + plt.title('name') + plt.show() + + diff --git a/q13_plot_residuals/tests/__pycache__/__init__.cpython-36.pyc b/q13_plot_residuals/tests/__pycache__/__init__.cpython-36.pyc index 3aa40f0..1244b88 100644 Binary files a/q13_plot_residuals/tests/__pycache__/__init__.cpython-36.pyc and b/q13_plot_residuals/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q13_plot_residuals/tests/__pycache__/test.cpython-36.pyc b/q13_plot_residuals/tests/__pycache__/test.cpython-36.pyc index 89ecb4e..19ce1a5 100644 Binary files a/q13_plot_residuals/tests/__pycache__/test.cpython-36.pyc and b/q13_plot_residuals/tests/__pycache__/test.cpython-36.pyc differ diff --git a/q14_benchmarking/__pycache__/__init__.cpython-36.pyc b/q14_benchmarking/__pycache__/__init__.cpython-36.pyc index 453edef..39adcdf 100644 Binary files a/q14_benchmarking/__pycache__/__init__.cpython-36.pyc and b/q14_benchmarking/__pycache__/__init__.cpython-36.pyc differ diff --git a/q14_benchmarking/__pycache__/build.cpython-36.pyc b/q14_benchmarking/__pycache__/build.cpython-36.pyc index 28c02f8..d3415ac 100644 Binary files a/q14_benchmarking/__pycache__/build.cpython-36.pyc and b/q14_benchmarking/__pycache__/build.cpython-36.pyc differ diff --git a/q14_benchmarking/build.py b/q14_benchmarking/build.py index 4a4557b..41ab589 100644 --- a/q14_benchmarking/build.py +++ b/q14_benchmarking/build.py @@ -1,3 +1,4 @@ +# %load q14_benchmarking/build.py from greyatomlib.multivariate_regression_project.q01_load_data.build import load_data from greyatomlib.multivariate_regression_project.q02_data_split.build import split_dataset @@ -26,5 +27,18 @@ # Write your code below +def create_stats(x_train, x_test, y_train, y_test): + ft = feature_selection(x_train,y_train,50) + + model_Lasso,preds_lasso,stats_lasso = lasso(x_train, x_test, y_train, y_test) + model_Lasso_ft,pred_lasso_ft,stats_lasso_ft = lasso(x_train.loc[:,ft],x_test.loc[:,ft],y_train,y_test) + model_Ridge,preds_ridge,stats_ridge = ridge(x_train, x_test, y_train, y_test) + model_Ridge_ft,preds_ridge_ft,stats_ridge_ft = ridge(x_train.loc[:,ft],x_test.loc[:,ft],y_train,y_test) + + complete_stats = pd.concat([stats_lasso,stats_lasso_ft,stats_ridge,stats_ridge_ft]) + return complete_stats + + + diff --git a/q14_benchmarking/tests/__pycache__/__init__.cpython-36.pyc b/q14_benchmarking/tests/__pycache__/__init__.cpython-36.pyc index defa63d..6631074 100644 Binary files a/q14_benchmarking/tests/__pycache__/__init__.cpython-36.pyc and b/q14_benchmarking/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q14_benchmarking/tests/__pycache__/test.cpython-36.pyc b/q14_benchmarking/tests/__pycache__/test.cpython-36.pyc index cc77345..5f173f3 100644 Binary files a/q14_benchmarking/tests/__pycache__/test.cpython-36.pyc and b/q14_benchmarking/tests/__pycache__/test.cpython-36.pyc differ