diff --git a/__pycache__/__init__.cpython-36.pyc b/__pycache__/__init__.cpython-36.pyc index b0c3719..1d7413f 100644 Binary files a/__pycache__/__init__.cpython-36.pyc and b/__pycache__/__init__.cpython-36.pyc differ diff --git a/q01_load_data/__pycache__/__init__.cpython-36.pyc b/q01_load_data/__pycache__/__init__.cpython-36.pyc index 4596200..191a339 100644 Binary files a/q01_load_data/__pycache__/__init__.cpython-36.pyc and b/q01_load_data/__pycache__/__init__.cpython-36.pyc differ diff --git a/q01_load_data/__pycache__/build.cpython-36.pyc b/q01_load_data/__pycache__/build.cpython-36.pyc index 98e98a7..556871d 100644 Binary files a/q01_load_data/__pycache__/build.cpython-36.pyc and b/q01_load_data/__pycache__/build.cpython-36.pyc differ diff --git a/q01_load_data/build.py b/q01_load_data/build.py index 7cd3700..6b3b177 100644 --- a/q01_load_data/build.py +++ b/q01_load_data/build.py @@ -1,4 +1,12 @@ +# %load q01_load_data/build.py import pandas as pd +path = './data/student-mat.csv' # Write your code below - +def load_data(path): + df = pd.read_table(path, sep=';') + return df + +df = load_data(path) + + diff --git a/q01_load_data/tests/__pycache__/__init__.cpython-36.pyc b/q01_load_data/tests/__pycache__/__init__.cpython-36.pyc index d07fd2f..88d6348 100644 Binary files a/q01_load_data/tests/__pycache__/__init__.cpython-36.pyc and b/q01_load_data/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q01_load_data/tests/__pycache__/test.cpython-36.pyc b/q01_load_data/tests/__pycache__/test.cpython-36.pyc index 9aa6996..efd381c 100644 Binary files a/q01_load_data/tests/__pycache__/test.cpython-36.pyc and b/q01_load_data/tests/__pycache__/test.cpython-36.pyc differ diff --git a/q02_data_split/__pycache__/__init__.cpython-36.pyc b/q02_data_split/__pycache__/__init__.cpython-36.pyc index 5d17273..52db097 100644 Binary files a/q02_data_split/__pycache__/__init__.cpython-36.pyc and b/q02_data_split/__pycache__/__init__.cpython-36.pyc differ diff --git a/q02_data_split/__pycache__/build.cpython-36.pyc b/q02_data_split/__pycache__/build.cpython-36.pyc index e6bd2eb..8b74507 100644 Binary files a/q02_data_split/__pycache__/build.cpython-36.pyc and b/q02_data_split/__pycache__/build.cpython-36.pyc differ diff --git a/q02_data_split/build.py b/q02_data_split/build.py index c2e7147..65add09 100644 --- a/q02_data_split/build.py +++ b/q02_data_split/build.py @@ -1,8 +1,17 @@ +# %load q02_data_split/build.py from greyatomlib.multivariate_regression_project.q01_load_data.build import load_data from sklearn.model_selection import train_test_split import pandas as pd df = load_data('data/student-mat.csv') # Write your code below +def split_dataset(df): + X = df.drop(['G3'], 1) + y = df['G3'] + X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state = 42) + return X_train, X_test, y_train, y_test +X_train, X_test, y_train, y_test = split_dataset(df) + + diff --git a/q02_data_split/tests/__pycache__/__init__.cpython-36.pyc b/q02_data_split/tests/__pycache__/__init__.cpython-36.pyc index e780e63..f2da98b 100644 Binary files a/q02_data_split/tests/__pycache__/__init__.cpython-36.pyc and b/q02_data_split/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q02_data_split/tests/__pycache__/test.cpython-36.pyc b/q02_data_split/tests/__pycache__/test.cpython-36.pyc index a1b3fc5..9fd7452 100644 Binary files a/q02_data_split/tests/__pycache__/test.cpython-36.pyc and b/q02_data_split/tests/__pycache__/test.cpython-36.pyc differ diff --git a/q03_data_encoding/__pycache__/__init__.cpython-36.pyc b/q03_data_encoding/__pycache__/__init__.cpython-36.pyc index 884722b..72e74c1 100644 Binary files a/q03_data_encoding/__pycache__/__init__.cpython-36.pyc and b/q03_data_encoding/__pycache__/__init__.cpython-36.pyc differ diff --git a/q03_data_encoding/__pycache__/build.cpython-36.pyc b/q03_data_encoding/__pycache__/build.cpython-36.pyc index 302366c..348e264 100644 Binary files a/q03_data_encoding/__pycache__/build.cpython-36.pyc and b/q03_data_encoding/__pycache__/build.cpython-36.pyc differ diff --git a/q03_data_encoding/build.py b/q03_data_encoding/build.py index bb4c8ca..e884a2e 100644 --- a/q03_data_encoding/build.py +++ b/q03_data_encoding/build.py @@ -1,3 +1,4 @@ +# %load q03_data_encoding/build.py from greyatomlib.multivariate_regression_project.q01_load_data.build import load_data from greyatomlib.multivariate_regression_project.q02_data_split.build import split_dataset from sklearn.preprocessing import LabelEncoder @@ -8,7 +9,13 @@ x_train, x_test, y_train, y_test = split_dataset(df) # Write your code below +def label_encode(X_train, X_test): + lblEncd = LabelEncoder() + X_transform = x_train.apply(lblEncd.fit_transform) + X_test_transform = x_test.apply(lblEncd.fit_transform) - + return X_transform, X_test_transform + +X_transform, X_test_transform = label_encode(x_train, x_test) diff --git a/q03_data_encoding/tests/__pycache__/__init__.cpython-36.pyc b/q03_data_encoding/tests/__pycache__/__init__.cpython-36.pyc index 7d18c18..bfddf02 100644 Binary files a/q03_data_encoding/tests/__pycache__/__init__.cpython-36.pyc and b/q03_data_encoding/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q03_data_encoding/tests/__pycache__/test.cpython-36.pyc b/q03_data_encoding/tests/__pycache__/test.cpython-36.pyc index 8ade2b7..08cc17d 100644 Binary files a/q03_data_encoding/tests/__pycache__/test.cpython-36.pyc and b/q03_data_encoding/tests/__pycache__/test.cpython-36.pyc differ diff --git a/q03_ohe_encoder/__pycache__/__init__.cpython-36.pyc b/q03_ohe_encoder/__pycache__/__init__.cpython-36.pyc index e4ec35b..fa1fd46 100644 Binary files a/q03_ohe_encoder/__pycache__/__init__.cpython-36.pyc and b/q03_ohe_encoder/__pycache__/__init__.cpython-36.pyc differ diff --git a/q03_ohe_encoder/__pycache__/build.cpython-36.pyc b/q03_ohe_encoder/__pycache__/build.cpython-36.pyc index 1433b7b..210730a 100644 Binary files a/q03_ohe_encoder/__pycache__/build.cpython-36.pyc and b/q03_ohe_encoder/__pycache__/build.cpython-36.pyc differ diff --git a/q03_ohe_encoder/build.py b/q03_ohe_encoder/build.py index 36e4b90..463bdbb 100644 --- a/q03_ohe_encoder/build.py +++ b/q03_ohe_encoder/build.py @@ -1,6 +1,8 @@ +# %load q03_ohe_encoder/build.py from greyatomlib.multivariate_regression_project.q01_load_data.build import load_data from greyatomlib.multivariate_regression_project.q02_data_split.build import split_dataset from sklearn.preprocessing import OneHotEncoder +from greyatomlib.multivariate_regression_project.q03_data_encoding.build import label_encode import pandas as pd import numpy as np @@ -10,10 +12,15 @@ category_index = [x for x in range(len(df.columns)) if df[df.columns[x]].dtype == 'object'] - # Write your code below - +def ohe_encode(x_train, x_test, category_index=category_index): + x_train,x_test = label_encode(x_train,x_test) + oneHotEncd = OneHotEncoder(categorical_features=category_index, sparse=False) + X_transform = oneHotEncd.fit_transform(x_train) + X_test_transform = oneHotEncd.fit_transform(x_test) - + return X_transform, X_test_transform - +X_transform, X_test_transform = ohe_encode(x_train, x_test) + + diff --git a/q03_ohe_encoder/tests/__pycache__/__init__.cpython-36.pyc b/q03_ohe_encoder/tests/__pycache__/__init__.cpython-36.pyc index 8c87a88..59d550d 100644 Binary files a/q03_ohe_encoder/tests/__pycache__/__init__.cpython-36.pyc and b/q03_ohe_encoder/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q03_ohe_encoder/tests/__pycache__/test.cpython-36.pyc b/q03_ohe_encoder/tests/__pycache__/test.cpython-36.pyc index 1956a19..39d4389 100644 Binary files a/q03_ohe_encoder/tests/__pycache__/test.cpython-36.pyc and b/q03_ohe_encoder/tests/__pycache__/test.cpython-36.pyc differ diff --git a/q04_data_visualisation/__pycache__/__init__.cpython-36.pyc b/q04_data_visualisation/__pycache__/__init__.cpython-36.pyc index d44a511..4bc302b 100644 Binary files a/q04_data_visualisation/__pycache__/__init__.cpython-36.pyc and b/q04_data_visualisation/__pycache__/__init__.cpython-36.pyc differ diff --git a/q04_data_visualisation/__pycache__/build.cpython-36.pyc b/q04_data_visualisation/__pycache__/build.cpython-36.pyc index 2bfbd4e..264019b 100644 Binary files a/q04_data_visualisation/__pycache__/build.cpython-36.pyc and b/q04_data_visualisation/__pycache__/build.cpython-36.pyc differ diff --git a/q04_data_visualisation/build.py b/q04_data_visualisation/build.py index 9c15ad9..aa048ae 100644 --- a/q04_data_visualisation/build.py +++ b/q04_data_visualisation/build.py @@ -1,16 +1,17 @@ -# -*- coding: utf-8 -*- +# %load q04_data_visualisation/build.py from greyatomlib.multivariate_regression_project.q01_load_data.build import load_data from greyatomlib.multivariate_regression_project.q02_data_split.build import split_dataset from greyatomlib.multivariate_regression_project.q03_data_encoding.build import label_encode import matplotlib.pyplot as plt from pandas.plotting import scatter_matrix -data = load_data('data/student-mat.csv') +data = load_data('data/student-mat.csv') x_train, x_test, y_train, y_test = split_dataset(data) x_train,x_test = label_encode(x_train,x_test) # Write your code below - - - +def visualise_data(data, figname): + return scatter_matrix(data) + + diff --git a/q04_data_visualisation/tests/__pycache__/__init__.cpython-36.pyc b/q04_data_visualisation/tests/__pycache__/__init__.cpython-36.pyc index 6631d03..06c449d 100644 Binary files a/q04_data_visualisation/tests/__pycache__/__init__.cpython-36.pyc and b/q04_data_visualisation/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q04_data_visualisation/tests/__pycache__/test.cpython-36.pyc b/q04_data_visualisation/tests/__pycache__/test.cpython-36.pyc index 5353356..39f8e71 100644 Binary files a/q04_data_visualisation/tests/__pycache__/test.cpython-36.pyc and b/q04_data_visualisation/tests/__pycache__/test.cpython-36.pyc differ diff --git a/q05_linear_regression_model/__pycache__/__init__.cpython-36.pyc b/q05_linear_regression_model/__pycache__/__init__.cpython-36.pyc index 06a2a9b..664c2b5 100644 Binary files a/q05_linear_regression_model/__pycache__/__init__.cpython-36.pyc and b/q05_linear_regression_model/__pycache__/__init__.cpython-36.pyc differ diff --git a/q05_linear_regression_model/__pycache__/build.cpython-36.pyc b/q05_linear_regression_model/__pycache__/build.cpython-36.pyc index c40d112..626739b 100644 Binary files a/q05_linear_regression_model/__pycache__/build.cpython-36.pyc and b/q05_linear_regression_model/__pycache__/build.cpython-36.pyc differ diff --git a/q05_linear_regression_model/build.py b/q05_linear_regression_model/build.py index 7a0a243..6196e42 100644 --- a/q05_linear_regression_model/build.py +++ b/q05_linear_regression_model/build.py @@ -1,3 +1,4 @@ +# %load q05_linear_regression_model/build.py from greyatomlib.multivariate_regression_project.q01_load_data.build import load_data from greyatomlib.multivariate_regression_project.q02_data_split.build import split_dataset @@ -12,4 +13,12 @@ # Write your code below +def linear_regression(X, y): + lr = LinearRegression() + lm = lr.fit(X, y) + return lm + +linear_regression(x_train, y_train) + + diff --git a/q05_linear_regression_model/tests/__pycache__/__init__.cpython-36.pyc b/q05_linear_regression_model/tests/__pycache__/__init__.cpython-36.pyc index 296bcce..50d15cc 100644 Binary files a/q05_linear_regression_model/tests/__pycache__/__init__.cpython-36.pyc and b/q05_linear_regression_model/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q05_linear_regression_model/tests/__pycache__/test.cpython-36.pyc b/q05_linear_regression_model/tests/__pycache__/test.cpython-36.pyc index 54551b9..1e5253c 100644 Binary files a/q05_linear_regression_model/tests/__pycache__/test.cpython-36.pyc and b/q05_linear_regression_model/tests/__pycache__/test.cpython-36.pyc differ diff --git a/q06_cross_validation/__pycache__/__init__.cpython-36.pyc b/q06_cross_validation/__pycache__/__init__.cpython-36.pyc index 9a1c3aa..fb281d0 100644 Binary files a/q06_cross_validation/__pycache__/__init__.cpython-36.pyc and b/q06_cross_validation/__pycache__/__init__.cpython-36.pyc differ diff --git a/q06_cross_validation/__pycache__/build.cpython-36.pyc b/q06_cross_validation/__pycache__/build.cpython-36.pyc index 2e1c378..fd6c6ff 100644 Binary files a/q06_cross_validation/__pycache__/build.cpython-36.pyc and b/q06_cross_validation/__pycache__/build.cpython-36.pyc differ diff --git a/q06_cross_validation/build.py b/q06_cross_validation/build.py index 406a734..066eab9 100644 --- a/q06_cross_validation/build.py +++ b/q06_cross_validation/build.py @@ -1,3 +1,4 @@ +# %load q06_cross_validation/build.py from greyatomlib.multivariate_regression_project.q01_load_data.build import load_data from greyatomlib.multivariate_regression_project.q02_data_split.build import split_dataset @@ -18,4 +19,10 @@ model =linear_regression(x_train,y_train) # Write your code below +def cross_validation_regressor(Model, X, y): + r2_score = np.array(cross_val_score(Model, X, y, cv=3)).mean() + return r2_score +cross_validation_regressor(model, x_train, y_train) + + diff --git a/q06_cross_validation/tests/__pycache__/__init__.cpython-36.pyc b/q06_cross_validation/tests/__pycache__/__init__.cpython-36.pyc index b571b36..f1f26db 100644 Binary files a/q06_cross_validation/tests/__pycache__/__init__.cpython-36.pyc and b/q06_cross_validation/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q06_cross_validation/tests/__pycache__/test.cpython-36.pyc b/q06_cross_validation/tests/__pycache__/test.cpython-36.pyc index e065247..ed92b37 100644 Binary files a/q06_cross_validation/tests/__pycache__/test.cpython-36.pyc and b/q06_cross_validation/tests/__pycache__/test.cpython-36.pyc differ diff --git a/q07_regression_pred/__pycache__/__init__.cpython-36.pyc b/q07_regression_pred/__pycache__/__init__.cpython-36.pyc index 3e7e467..169c17f 100644 Binary files a/q07_regression_pred/__pycache__/__init__.cpython-36.pyc and b/q07_regression_pred/__pycache__/__init__.cpython-36.pyc differ diff --git a/q07_regression_pred/__pycache__/build.cpython-36.pyc b/q07_regression_pred/__pycache__/build.cpython-36.pyc index dfa0411..e66fc2c 100644 Binary files a/q07_regression_pred/__pycache__/build.cpython-36.pyc and b/q07_regression_pred/__pycache__/build.cpython-36.pyc differ diff --git a/q07_regression_pred/build.py b/q07_regression_pred/build.py index 3f2eee3..b385583 100644 --- a/q07_regression_pred/build.py +++ b/q07_regression_pred/build.py @@ -1,3 +1,4 @@ +# %load q07_regression_pred/build.py from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score @@ -10,7 +11,7 @@ df = load_data('data/student-mat.csv') -x_train, x_test, y_train, y_test = split_dataset(df) +x_train, x_test, y_train, y_test = split_dataset(df) x_train,x_test = label_encode(x_train,x_test) @@ -18,5 +19,17 @@ val = cross_validation_regressor(model,x_train,y_train) - # Write your code below +def regression_predictor(model,X,y): + y_pred = model.predict(X) + mse = mean_squared_error(y,y_pred) + mae = mean_absolute_error(y,y_pred) + r2 = r2_score(y,y_pred) + + return y_pred, mse, mae, r2 + +regression_predictor(model, x_test, y_test) + + + + diff --git a/q07_regression_pred/tests/__pycache__/__init__.cpython-36.pyc b/q07_regression_pred/tests/__pycache__/__init__.cpython-36.pyc index f1435e5..b2c5327 100644 Binary files a/q07_regression_pred/tests/__pycache__/__init__.cpython-36.pyc and b/q07_regression_pred/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q07_regression_pred/tests/__pycache__/test.cpython-36.pyc b/q07_regression_pred/tests/__pycache__/test.cpython-36.pyc index 203c5ff..6ad8471 100644 Binary files a/q07_regression_pred/tests/__pycache__/test.cpython-36.pyc and b/q07_regression_pred/tests/__pycache__/test.cpython-36.pyc differ diff --git a/q08_linear_model/__pycache__/__init__.cpython-36.pyc b/q08_linear_model/__pycache__/__init__.cpython-36.pyc index b91b141..4fbfb3f 100644 Binary files a/q08_linear_model/__pycache__/__init__.cpython-36.pyc and b/q08_linear_model/__pycache__/__init__.cpython-36.pyc differ diff --git a/q08_linear_model/__pycache__/build.cpython-36.pyc b/q08_linear_model/__pycache__/build.cpython-36.pyc index 438fb94..2fbfcd0 100644 Binary files a/q08_linear_model/__pycache__/build.cpython-36.pyc and b/q08_linear_model/__pycache__/build.cpython-36.pyc differ diff --git a/q08_linear_model/build.py b/q08_linear_model/build.py index 85d49da..29074bf 100644 --- a/q08_linear_model/build.py +++ b/q08_linear_model/build.py @@ -1,3 +1,4 @@ +# %load q08_linear_model/build.py import pandas as pd import numpy as np from greyatomlib.multivariate_regression_project.q01_load_data.build import load_data @@ -16,5 +17,12 @@ y_pred, mse, mae, r2 = regression_predictor(model, x_test, y_test) # Write your code below +def linear_model(x_train, x_test, y_train, y_test): + G = linear_regression(x_train, y_train) + stats = pd.DataFrame([(val,mae,mse,r2)], columns = ['cross_val','rmse','mae','r2']) + return G, y_pred, stats +linear_model(x_train, x_test, y_train, y_test) + + diff --git a/q08_linear_model/tests/__pycache__/__init__.cpython-36.pyc b/q08_linear_model/tests/__pycache__/__init__.cpython-36.pyc index 5f231d2..5a10c9a 100644 Binary files a/q08_linear_model/tests/__pycache__/__init__.cpython-36.pyc and b/q08_linear_model/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q08_linear_model/tests/__pycache__/test.cpython-36.pyc b/q08_linear_model/tests/__pycache__/test.cpython-36.pyc index cbaeda3..f2090de 100644 Binary files a/q08_linear_model/tests/__pycache__/test.cpython-36.pyc and b/q08_linear_model/tests/__pycache__/test.cpython-36.pyc differ diff --git a/q09_advanced_model_q01_lasso/__pycache__/__init__.cpython-36.pyc b/q09_advanced_model_q01_lasso/__pycache__/__init__.cpython-36.pyc index b8b8fc7..5c24b9c 100644 Binary files a/q09_advanced_model_q01_lasso/__pycache__/__init__.cpython-36.pyc and b/q09_advanced_model_q01_lasso/__pycache__/__init__.cpython-36.pyc differ diff --git a/q09_advanced_model_q01_lasso/__pycache__/build.cpython-36.pyc b/q09_advanced_model_q01_lasso/__pycache__/build.cpython-36.pyc index ad763a5..f11f907 100644 Binary files a/q09_advanced_model_q01_lasso/__pycache__/build.cpython-36.pyc and b/q09_advanced_model_q01_lasso/__pycache__/build.cpython-36.pyc differ diff --git a/q09_advanced_model_q01_lasso/build.py b/q09_advanced_model_q01_lasso/build.py index c832d59..e761f24 100644 --- a/q09_advanced_model_q01_lasso/build.py +++ b/q09_advanced_model_q01_lasso/build.py @@ -1,3 +1,4 @@ +# %load q09_advanced_model_q01_lasso/build.py from greyatomlib.multivariate_regression_project.q01_load_data.build import load_data from greyatomlib.multivariate_regression_project.q02_data_split.build import split_dataset @@ -19,5 +20,16 @@ x_train,x_test = label_encode(x_train,x_test) # Write your solution here +def lasso(x_train, x_test, y_train, y_test, alpha=0.1): + G = Lasso(alpha = alpha) + G.fit(x_train, y_train) + val = cross_validation_regressor(G,x_train,y_train) + y_pred, mse, mae, r2 = regression_predictor(G, x_test, y_test) + + stats = pd.DataFrame([(val,mae,r2,np.sqrt(mse))], columns = ['cross_val','mae','r2','rmse']) + return G, y_pred, stats +lasso(x_train, x_test, y_train, y_test) + + diff --git a/q09_advanced_model_q01_lasso/tests/__pycache__/__init__.cpython-36.pyc b/q09_advanced_model_q01_lasso/tests/__pycache__/__init__.cpython-36.pyc index 80296f7..8fa06d4 100644 Binary files a/q09_advanced_model_q01_lasso/tests/__pycache__/__init__.cpython-36.pyc and b/q09_advanced_model_q01_lasso/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q09_advanced_model_q01_lasso/tests/__pycache__/test.cpython-36.pyc b/q09_advanced_model_q01_lasso/tests/__pycache__/test.cpython-36.pyc index 3d92981..35dedf2 100644 Binary files a/q09_advanced_model_q01_lasso/tests/__pycache__/test.cpython-36.pyc and b/q09_advanced_model_q01_lasso/tests/__pycache__/test.cpython-36.pyc differ diff --git a/q09_advanced_model_q02_ridge/__pycache__/__init__.cpython-36.pyc b/q09_advanced_model_q02_ridge/__pycache__/__init__.cpython-36.pyc index 222893d..149b293 100644 Binary files a/q09_advanced_model_q02_ridge/__pycache__/__init__.cpython-36.pyc and b/q09_advanced_model_q02_ridge/__pycache__/__init__.cpython-36.pyc differ diff --git a/q09_advanced_model_q02_ridge/__pycache__/build.cpython-36.pyc b/q09_advanced_model_q02_ridge/__pycache__/build.cpython-36.pyc index 29083a5..a946416 100644 Binary files a/q09_advanced_model_q02_ridge/__pycache__/build.cpython-36.pyc and b/q09_advanced_model_q02_ridge/__pycache__/build.cpython-36.pyc differ diff --git a/q09_advanced_model_q02_ridge/build.py b/q09_advanced_model_q02_ridge/build.py index 0fb3e1a..92b0bb1 100644 --- a/q09_advanced_model_q02_ridge/build.py +++ b/q09_advanced_model_q02_ridge/build.py @@ -1,3 +1,4 @@ +# %load q09_advanced_model_q02_ridge/build.py from greyatomlib.multivariate_regression_project.q01_load_data.build import load_data from greyatomlib.multivariate_regression_project.q02_data_split.build import split_dataset @@ -19,7 +20,16 @@ x_train,x_test = label_encode(x_train,x_test) # Write your code below - +def ridge(x_train, x_test, y_train, y_test, alpha=0.1): + G = Ridge(alpha = alpha,normalize = True) + G.fit(x_train, y_train) + val = cross_validation_regressor(G,x_train,y_train) + y_pred, mse, mae, r2 = regression_predictor(G, x_test, y_test) + + stats = pd.DataFrame([(val,mae,r2,np.sqrt(mse))], columns = ['cross_val','mae','r2','rmse']) + return G, y_pred, stats +ridge(x_train, x_test, y_train, y_test) + diff --git a/q09_advanced_model_q02_ridge/tests/__pycache__/__init__.cpython-36.pyc b/q09_advanced_model_q02_ridge/tests/__pycache__/__init__.cpython-36.pyc index 602e1f5..a1c03b0 100644 Binary files a/q09_advanced_model_q02_ridge/tests/__pycache__/__init__.cpython-36.pyc and b/q09_advanced_model_q02_ridge/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q09_advanced_model_q02_ridge/tests/__pycache__/test.cpython-36.pyc b/q09_advanced_model_q02_ridge/tests/__pycache__/test.cpython-36.pyc index 37f31c3..ecbce88 100644 Binary files a/q09_advanced_model_q02_ridge/tests/__pycache__/test.cpython-36.pyc and b/q09_advanced_model_q02_ridge/tests/__pycache__/test.cpython-36.pyc differ diff --git a/q10_data_missing_values/__pycache__/__init__.cpython-36.pyc b/q10_data_missing_values/__pycache__/__init__.cpython-36.pyc index 9f50df2..bd24cc1 100644 Binary files a/q10_data_missing_values/__pycache__/__init__.cpython-36.pyc and b/q10_data_missing_values/__pycache__/__init__.cpython-36.pyc differ diff --git a/q10_data_missing_values/__pycache__/build.cpython-36.pyc b/q10_data_missing_values/__pycache__/build.cpython-36.pyc index 5c075f4..82cda49 100644 Binary files a/q10_data_missing_values/__pycache__/build.cpython-36.pyc and b/q10_data_missing_values/__pycache__/build.cpython-36.pyc differ diff --git a/q10_data_missing_values/build.py b/q10_data_missing_values/build.py index 582edbb..d49c954 100644 --- a/q10_data_missing_values/build.py +++ b/q10_data_missing_values/build.py @@ -1,3 +1,4 @@ +# %load q10_data_missing_values/build.py from greyatomlib.multivariate_regression_project.q01_load_data.build import load_data from greyatomlib.multivariate_regression_project.q02_data_split.build import split_dataset from greyatomlib.multivariate_regression_project.q03_data_encoding.build import label_encode @@ -10,4 +11,12 @@ x_train,x_test = label_encode(x_train,x_test) # Write your code below +def describe_df(x_train): + describe = x_train.describe() + value_counts = x_train.apply(pd.value_counts) + return describe, value_counts + +describe_df(x_train) + + diff --git a/q10_data_missing_values/tests/__pycache__/__init__.cpython-36.pyc b/q10_data_missing_values/tests/__pycache__/__init__.cpython-36.pyc index 2fdd38b..7a6e080 100644 Binary files a/q10_data_missing_values/tests/__pycache__/__init__.cpython-36.pyc and b/q10_data_missing_values/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q10_data_missing_values/tests/__pycache__/test.cpython-36.pyc b/q10_data_missing_values/tests/__pycache__/test.cpython-36.pyc index 1701926..887818d 100644 Binary files a/q10_data_missing_values/tests/__pycache__/test.cpython-36.pyc and b/q10_data_missing_values/tests/__pycache__/test.cpython-36.pyc differ diff --git a/q11_feature_selection_q01_plot_corr/__pycache__/__init__.cpython-36.pyc b/q11_feature_selection_q01_plot_corr/__pycache__/__init__.cpython-36.pyc index 35c8cae..cc64af3 100644 Binary files a/q11_feature_selection_q01_plot_corr/__pycache__/__init__.cpython-36.pyc and b/q11_feature_selection_q01_plot_corr/__pycache__/__init__.cpython-36.pyc differ diff --git a/q11_feature_selection_q01_plot_corr/__pycache__/build.cpython-36.pyc b/q11_feature_selection_q01_plot_corr/__pycache__/build.cpython-36.pyc index 35748ec..22222a5 100644 Binary files a/q11_feature_selection_q01_plot_corr/__pycache__/build.cpython-36.pyc and b/q11_feature_selection_q01_plot_corr/__pycache__/build.cpython-36.pyc differ diff --git a/q11_feature_selection_q01_plot_corr/build.py b/q11_feature_selection_q01_plot_corr/build.py index 0427922..f74976b 100644 --- a/q11_feature_selection_q01_plot_corr/build.py +++ b/q11_feature_selection_q01_plot_corr/build.py @@ -1,13 +1,12 @@ - +# %load q11_feature_selection_q01_plot_corr/build.py +import matplotlib +matplotlib.use('agg') import matplotlib.pyplot as plt from matplotlib.pyplot import yticks, xticks, subplots, set_cmap from greyatomlib.multivariate_regression_project.q01_load_data.build import load_data - from greyatomlib.multivariate_regression_project.q02_data_split.build import split_dataset - - from greyatomlib.multivariate_regression_project.q03_data_encoding.build import label_encode df = load_data('data/student-mat.csv') @@ -21,6 +20,19 @@ #Remember to concatenate training features and labels if you want to check that scatterplots which I would prefer.You are free to explore labels to labels, features to features ,etc scatterplots as you want by passing arguments #============================================================================ -#visualise_data(pd.concat([x_train,y_train],axis=1),"../images/data_image.png") +#visualise_data(pd.concat([x_train,y_train],axis=1),'../images/data_image.png') # Write your solution here: +def plot_corr(df, size=11): + fig, axis = plt.subplots(figsize=(size, size)) + cor = df.corr() + plt.set_cmap(cmap='YlOrRd') + axis.matshow(cor) + + plt.xticks(range(len(cor.columns)), cor.columns, rotation=90); + plt.yticks(range(len(cor.columns)), cor.columns); + plt.show() + +plot_corr(df) + + diff --git a/q11_feature_selection_q01_plot_corr/tests/__pycache__/__init__.cpython-36.pyc b/q11_feature_selection_q01_plot_corr/tests/__pycache__/__init__.cpython-36.pyc index 6c1c509..51f560d 100644 Binary files a/q11_feature_selection_q01_plot_corr/tests/__pycache__/__init__.cpython-36.pyc and b/q11_feature_selection_q01_plot_corr/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q11_feature_selection_q01_plot_corr/tests/__pycache__/test.cpython-36.pyc b/q11_feature_selection_q01_plot_corr/tests/__pycache__/test.cpython-36.pyc index 93b5347..329a1df 100644 Binary files a/q11_feature_selection_q01_plot_corr/tests/__pycache__/test.cpython-36.pyc and b/q11_feature_selection_q01_plot_corr/tests/__pycache__/test.cpython-36.pyc differ diff --git a/q11_feature_selection_q02_best_k_features/__pycache__/__init__.cpython-36.pyc b/q11_feature_selection_q02_best_k_features/__pycache__/__init__.cpython-36.pyc index cce1771..7a8885e 100644 Binary files a/q11_feature_selection_q02_best_k_features/__pycache__/__init__.cpython-36.pyc and b/q11_feature_selection_q02_best_k_features/__pycache__/__init__.cpython-36.pyc differ diff --git a/q11_feature_selection_q02_best_k_features/__pycache__/build.cpython-36.pyc b/q11_feature_selection_q02_best_k_features/__pycache__/build.cpython-36.pyc index b0c88c7..3583db4 100644 Binary files a/q11_feature_selection_q02_best_k_features/__pycache__/build.cpython-36.pyc and b/q11_feature_selection_q02_best_k_features/__pycache__/build.cpython-36.pyc differ diff --git a/q11_feature_selection_q02_best_k_features/build.py b/q11_feature_selection_q02_best_k_features/build.py index 95002c5..d4fc17d 100644 --- a/q11_feature_selection_q02_best_k_features/build.py +++ b/q11_feature_selection_q02_best_k_features/build.py @@ -1,8 +1,10 @@ +# %load q11_feature_selection_q02_best_k_features/build.py # Default imports from sklearn.feature_selection import SelectPercentile from sklearn.feature_selection import f_regression import numpy as np import matplotlib.pyplot as plt +import operator from greyatomlib.multivariate_regression_project.q01_load_data.build import load_data from greyatomlib.multivariate_regression_project.q02_data_split.build import split_dataset from greyatomlib.multivariate_regression_project.q03_data_encoding.build import label_encode @@ -17,11 +19,26 @@ x_train,x_test = label_encode(x_train,x_test) - np.random.seed(9) # Write your code below +def percentile_k_features(x_train, y_train, k=50): + Selector_f = SelectPercentile(f_regression, percentile=k) + fs = Selector_f.fit(x_train, y_train) + cols = list(x_train.columns) + scores = list(fs.scores_) + + feature_score = dict() + top_k_predictors = [] + for i in range(0,len(cols)): + feature_score[cols[i]] = scores[i] + sorted_x = sorted(feature_score.items(), key=operator.itemgetter(1), reverse=True) + for obj in range(0, 16): + top_k_predictors.append(sorted_x[obj][0]) + return top_k_predictors + +percentile_k_features(x_train, y_train) diff --git a/q11_feature_selection_q02_best_k_features/tests/__pycache__/__init__.cpython-36.pyc b/q11_feature_selection_q02_best_k_features/tests/__pycache__/__init__.cpython-36.pyc index 3a1830b..67720df 100644 Binary files a/q11_feature_selection_q02_best_k_features/tests/__pycache__/__init__.cpython-36.pyc and b/q11_feature_selection_q02_best_k_features/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q11_feature_selection_q02_best_k_features/tests/__pycache__/test.cpython-36.pyc b/q11_feature_selection_q02_best_k_features/tests/__pycache__/test.cpython-36.pyc index 7c11282..0deac3b 100644 Binary files a/q11_feature_selection_q02_best_k_features/tests/__pycache__/test.cpython-36.pyc and b/q11_feature_selection_q02_best_k_features/tests/__pycache__/test.cpython-36.pyc differ diff --git a/q12_feature_selection/__pycache__/__init__.cpython-36.pyc b/q12_feature_selection/__pycache__/__init__.cpython-36.pyc index 886fe32..4889624 100644 Binary files a/q12_feature_selection/__pycache__/__init__.cpython-36.pyc and b/q12_feature_selection/__pycache__/__init__.cpython-36.pyc differ diff --git a/q12_feature_selection/__pycache__/build.cpython-36.pyc b/q12_feature_selection/__pycache__/build.cpython-36.pyc index 7c97eeb..d938446 100644 Binary files a/q12_feature_selection/__pycache__/build.cpython-36.pyc and b/q12_feature_selection/__pycache__/build.cpython-36.pyc differ diff --git a/q12_feature_selection/build.py b/q12_feature_selection/build.py index 1bbe2b2..7e398ea 100644 --- a/q12_feature_selection/build.py +++ b/q12_feature_selection/build.py @@ -1,3 +1,4 @@ +# %load q12_feature_selection/build.py # import matplotlib.pyplot as plt from greyatomlib.multivariate_regression_project.q01_load_data.build import load_data from greyatomlib.multivariate_regression_project.q02_data_split.build import split_dataset @@ -16,3 +17,12 @@ x_train,x_test = label_encode(x_train,x_test) # Write your code below +def feature_selection(x_train, y_train, k=50): + plot_corr(pd.concat([x_train, y_train],axis=1)) + reduced_features = percentile_k_features(x_train,y_train,k) + + return reduced_features + +feature_selection(x_train, y_train) + + diff --git a/q12_feature_selection/tests/__pycache__/__init__.cpython-36.pyc b/q12_feature_selection/tests/__pycache__/__init__.cpython-36.pyc index 199811e..7d1d754 100644 Binary files a/q12_feature_selection/tests/__pycache__/__init__.cpython-36.pyc and b/q12_feature_selection/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q12_feature_selection/tests/__pycache__/test.cpython-36.pyc b/q12_feature_selection/tests/__pycache__/test.cpython-36.pyc index 3a7de81..5b0740a 100644 Binary files a/q12_feature_selection/tests/__pycache__/test.cpython-36.pyc and b/q12_feature_selection/tests/__pycache__/test.cpython-36.pyc differ diff --git a/q13_plot_residuals/__pycache__/__init__.cpython-36.pyc b/q13_plot_residuals/__pycache__/__init__.cpython-36.pyc index 339472d..f17af8c 100644 Binary files a/q13_plot_residuals/__pycache__/__init__.cpython-36.pyc and b/q13_plot_residuals/__pycache__/__init__.cpython-36.pyc differ diff --git a/q13_plot_residuals/__pycache__/build.cpython-36.pyc b/q13_plot_residuals/__pycache__/build.cpython-36.pyc index b3cfbaf..32741a5 100644 Binary files a/q13_plot_residuals/__pycache__/build.cpython-36.pyc and b/q13_plot_residuals/__pycache__/build.cpython-36.pyc differ diff --git a/q13_plot_residuals/build.py b/q13_plot_residuals/build.py index 9cdb3e3..cc8d14a 100644 --- a/q13_plot_residuals/build.py +++ b/q13_plot_residuals/build.py @@ -1,5 +1,11 @@ - - +# %load q13_plot_residuals/build.py +import matplotlib +matplotlib.use('Agg') import matplotlib.pyplot as plt # Write your code below +def plot_residuals(y_test, y_pred, name): + plt.plot(y_test, y_pred) + plt.show() + + diff --git a/q13_plot_residuals/tests/__pycache__/__init__.cpython-36.pyc b/q13_plot_residuals/tests/__pycache__/__init__.cpython-36.pyc index 3aa40f0..f776884 100644 Binary files a/q13_plot_residuals/tests/__pycache__/__init__.cpython-36.pyc and b/q13_plot_residuals/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q13_plot_residuals/tests/__pycache__/test.cpython-36.pyc b/q13_plot_residuals/tests/__pycache__/test.cpython-36.pyc index 89ecb4e..9945061 100644 Binary files a/q13_plot_residuals/tests/__pycache__/test.cpython-36.pyc and b/q13_plot_residuals/tests/__pycache__/test.cpython-36.pyc differ diff --git a/q14_benchmarking/__pycache__/__init__.cpython-36.pyc b/q14_benchmarking/__pycache__/__init__.cpython-36.pyc index 453edef..e10fdd2 100644 Binary files a/q14_benchmarking/__pycache__/__init__.cpython-36.pyc and b/q14_benchmarking/__pycache__/__init__.cpython-36.pyc differ diff --git a/q14_benchmarking/__pycache__/build.cpython-36.pyc b/q14_benchmarking/__pycache__/build.cpython-36.pyc index 28c02f8..58ff8f3 100644 Binary files a/q14_benchmarking/__pycache__/build.cpython-36.pyc and b/q14_benchmarking/__pycache__/build.cpython-36.pyc differ diff --git a/q14_benchmarking/build.py b/q14_benchmarking/build.py index 4a4557b..26ff6e1 100644 --- a/q14_benchmarking/build.py +++ b/q14_benchmarking/build.py @@ -1,3 +1,4 @@ +# %load q14_benchmarking/build.py from greyatomlib.multivariate_regression_project.q01_load_data.build import load_data from greyatomlib.multivariate_regression_project.q02_data_split.build import split_dataset @@ -26,5 +27,22 @@ # Write your code below +def create_stats(x_train, x_test, y_train, y_test): + lasso_modl, lasso_y_pred, lasso_stat = lasso(x_train, x_test, y_train, y_test,0.1) + ridge_modl, ridge_y_pred, ridge_stat = ridge(x_train, x_test, y_train, y_test,0.1) + features = feature_selection(x_train, y_train, k=50) + + x_train_ft = x_train[features].copy() + x_test_ft = x_test[features].copy() + + lasso_modl_ft, lasso_y_pred_ft, lasso_stat_ft = lasso(x_train_ft, x_test_ft, y_train, y_test,0.1) + ridge_modl_ft, ridge_y_pred_ft, ridge_stat_ft = ridge(x_train_ft, x_test_ft, y_train, y_test,0.1) + features = feature_selection(x_train_ft, y_train, k=50) + + complete_stats = pd.concat([lasso_stat,lasso_stat_ft,ridge_stat,ridge_stat_ft]) + + return complete_stats + +create_stats(x_train, x_test, y_train, y_test) diff --git a/q14_benchmarking/tests/__pycache__/__init__.cpython-36.pyc b/q14_benchmarking/tests/__pycache__/__init__.cpython-36.pyc index defa63d..6c34bb4 100644 Binary files a/q14_benchmarking/tests/__pycache__/__init__.cpython-36.pyc and b/q14_benchmarking/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q14_benchmarking/tests/__pycache__/test.cpython-36.pyc b/q14_benchmarking/tests/__pycache__/test.cpython-36.pyc index cc77345..59b71d8 100644 Binary files a/q14_benchmarking/tests/__pycache__/test.cpython-36.pyc and b/q14_benchmarking/tests/__pycache__/test.cpython-36.pyc differ