diff --git a/__pycache__/__init__.cpython-36.pyc b/__pycache__/__init__.cpython-36.pyc index b0c3719..860b9dc 100644 Binary files a/__pycache__/__init__.cpython-36.pyc and b/__pycache__/__init__.cpython-36.pyc differ diff --git a/q01_load_data/__pycache__/__init__.cpython-36.pyc b/q01_load_data/__pycache__/__init__.cpython-36.pyc index 4596200..9700c1a 100644 Binary files a/q01_load_data/__pycache__/__init__.cpython-36.pyc and b/q01_load_data/__pycache__/__init__.cpython-36.pyc differ diff --git a/q01_load_data/__pycache__/build.cpython-36.pyc b/q01_load_data/__pycache__/build.cpython-36.pyc index 98e98a7..7df6bb7 100644 Binary files a/q01_load_data/__pycache__/build.cpython-36.pyc and b/q01_load_data/__pycache__/build.cpython-36.pyc differ diff --git a/q01_load_data/build.py b/q01_load_data/build.py index 7cd3700..b584d81 100644 --- a/q01_load_data/build.py +++ b/q01_load_data/build.py @@ -1,4 +1,16 @@ +# %load q01_load_data/build.py import pandas as pd +from sklearn.utils import shuffle + # Write your code below +def load_data(path): + df=pd.read_table(path,sep=';') + df = shuffle(df) + return df + +#path='data/student-mat.csv' +#load_data(path) + + diff --git a/q01_load_data/tests/__pycache__/__init__.cpython-36.pyc b/q01_load_data/tests/__pycache__/__init__.cpython-36.pyc index d07fd2f..63814ef 100644 Binary files a/q01_load_data/tests/__pycache__/__init__.cpython-36.pyc and b/q01_load_data/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q01_load_data/tests/__pycache__/test.cpython-36.pyc b/q01_load_data/tests/__pycache__/test.cpython-36.pyc index 9aa6996..e6bf5a9 100644 Binary files a/q01_load_data/tests/__pycache__/test.cpython-36.pyc and b/q01_load_data/tests/__pycache__/test.cpython-36.pyc differ diff --git a/q02_data_split/__pycache__/__init__.cpython-36.pyc b/q02_data_split/__pycache__/__init__.cpython-36.pyc index 5d17273..ca977e4 100644 Binary files a/q02_data_split/__pycache__/__init__.cpython-36.pyc and b/q02_data_split/__pycache__/__init__.cpython-36.pyc differ diff --git a/q02_data_split/__pycache__/build.cpython-36.pyc b/q02_data_split/__pycache__/build.cpython-36.pyc index e6bd2eb..85fdc8f 100644 Binary files a/q02_data_split/__pycache__/build.cpython-36.pyc and b/q02_data_split/__pycache__/build.cpython-36.pyc differ diff --git a/q02_data_split/build.py b/q02_data_split/build.py index c2e7147..0b31a9e 100644 --- a/q02_data_split/build.py +++ b/q02_data_split/build.py @@ -1,8 +1,15 @@ +# %load q02_data_split/build.py from greyatomlib.multivariate_regression_project.q01_load_data.build import load_data from sklearn.model_selection import train_test_split import pandas as pd df = load_data('data/student-mat.csv') # Write your code below +def split_dataset(df): + y=df['G3'] + X=df.iloc[:,:-1] + x_train,x_test,y_train,y_test=train_test_split(X,y,train_size=0.8) + return x_train,x_test,y_train,y_test - +#split_data(df) + diff --git a/q02_data_split/tests/__pycache__/__init__.cpython-36.pyc b/q02_data_split/tests/__pycache__/__init__.cpython-36.pyc index e780e63..371154b 100644 Binary files a/q02_data_split/tests/__pycache__/__init__.cpython-36.pyc and b/q02_data_split/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q02_data_split/tests/__pycache__/test.cpython-36.pyc b/q02_data_split/tests/__pycache__/test.cpython-36.pyc index a1b3fc5..67ef068 100644 Binary files a/q02_data_split/tests/__pycache__/test.cpython-36.pyc and b/q02_data_split/tests/__pycache__/test.cpython-36.pyc differ diff --git a/q03_data_encoding/__pycache__/__init__.cpython-36.pyc b/q03_data_encoding/__pycache__/__init__.cpython-36.pyc index 884722b..a328c5f 100644 Binary files a/q03_data_encoding/__pycache__/__init__.cpython-36.pyc and b/q03_data_encoding/__pycache__/__init__.cpython-36.pyc differ diff --git a/q03_data_encoding/__pycache__/build.cpython-36.pyc b/q03_data_encoding/__pycache__/build.cpython-36.pyc index 302366c..f4db8c7 100644 Binary files a/q03_data_encoding/__pycache__/build.cpython-36.pyc and b/q03_data_encoding/__pycache__/build.cpython-36.pyc differ diff --git a/q03_data_encoding/build.py b/q03_data_encoding/build.py index bb4c8ca..6dc262f 100644 --- a/q03_data_encoding/build.py +++ b/q03_data_encoding/build.py @@ -1,3 +1,4 @@ +# %load q03_data_encoding/build.py from greyatomlib.multivariate_regression_project.q01_load_data.build import load_data from greyatomlib.multivariate_regression_project.q02_data_split.build import split_dataset from sklearn.preprocessing import LabelEncoder @@ -8,7 +9,20 @@ x_train, x_test, y_train, y_test = split_dataset(df) # Write your code below +def label_encode(X,X_test): + columnsToEncode = list(X.select_dtypes(include=['category','object'])) + print(columnsToEncode) + le = LabelEncoder() + for feature in columnsToEncode: + try: + le.fit(X[feature]) + X[feature] = le.transform(X[feature]) + X_test[feature] = le.transform(X_test[feature]) + except: + print('Error encoding '+feature) + return X,X_test - +#label_encode(x_train, x_test) + diff --git a/q03_data_encoding/tests/__pycache__/__init__.cpython-36.pyc b/q03_data_encoding/tests/__pycache__/__init__.cpython-36.pyc index 7d18c18..22b0a25 100644 Binary files a/q03_data_encoding/tests/__pycache__/__init__.cpython-36.pyc and b/q03_data_encoding/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q03_data_encoding/tests/__pycache__/test.cpython-36.pyc b/q03_data_encoding/tests/__pycache__/test.cpython-36.pyc index 8ade2b7..ee882b9 100644 Binary files a/q03_data_encoding/tests/__pycache__/test.cpython-36.pyc and b/q03_data_encoding/tests/__pycache__/test.cpython-36.pyc differ diff --git a/q03_ohe_encoder/__pycache__/__init__.cpython-36.pyc b/q03_ohe_encoder/__pycache__/__init__.cpython-36.pyc index e4ec35b..e88d625 100644 Binary files a/q03_ohe_encoder/__pycache__/__init__.cpython-36.pyc and b/q03_ohe_encoder/__pycache__/__init__.cpython-36.pyc differ diff --git a/q03_ohe_encoder/__pycache__/build.cpython-36.pyc b/q03_ohe_encoder/__pycache__/build.cpython-36.pyc index 1433b7b..0ef1e83 100644 Binary files a/q03_ohe_encoder/__pycache__/build.cpython-36.pyc and b/q03_ohe_encoder/__pycache__/build.cpython-36.pyc differ diff --git a/q03_ohe_encoder/build.py b/q03_ohe_encoder/build.py index 36e4b90..5fe2a60 100644 --- a/q03_ohe_encoder/build.py +++ b/q03_ohe_encoder/build.py @@ -1,19 +1,32 @@ +# %load q03_ohe_encoder/build.py + + from greyatomlib.multivariate_regression_project.q01_load_data.build import load_data + from greyatomlib.multivariate_regression_project.q02_data_split.build import split_dataset +from greyatomlib.multivariate_regression_project.q03_data_encoding.build import label_encode from sklearn.preprocessing import OneHotEncoder + import pandas as pd + import numpy as np df = load_data('data/student-mat.csv') - x_train, x_test, y_train, y_test = split_dataset(df) - category_index = [x for x in range(len(df.columns)) if df[df.columns[x]].dtype == 'object'] -# Write your code below - +def ohe_encode(X,X_test,category_index=category_index): + X1,X1_test=label_encode(X,X_test) + ohe = OneHotEncoder(categorical_features=category_index,sparse=False) + ohe.fit(X) + X_transform=ohe.transform(X1) + X_test_transform=ohe.transform(X1_test) + + return pd.DataFrame(X_transform),pd.DataFrame(X_test_transform) - - + +#ohe_encode(x_train, x_test,category_index) + + diff --git a/q03_ohe_encoder/tests/__pycache__/__init__.cpython-36.pyc b/q03_ohe_encoder/tests/__pycache__/__init__.cpython-36.pyc index 8c87a88..b3b4abc 100644 Binary files a/q03_ohe_encoder/tests/__pycache__/__init__.cpython-36.pyc and b/q03_ohe_encoder/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q03_ohe_encoder/tests/__pycache__/test.cpython-36.pyc b/q03_ohe_encoder/tests/__pycache__/test.cpython-36.pyc index 1956a19..772c6df 100644 Binary files a/q03_ohe_encoder/tests/__pycache__/test.cpython-36.pyc and b/q03_ohe_encoder/tests/__pycache__/test.cpython-36.pyc differ diff --git a/q04_data_visualisation/__pycache__/__init__.cpython-36.pyc b/q04_data_visualisation/__pycache__/__init__.cpython-36.pyc index d44a511..5b276bc 100644 Binary files a/q04_data_visualisation/__pycache__/__init__.cpython-36.pyc and b/q04_data_visualisation/__pycache__/__init__.cpython-36.pyc differ diff --git a/q04_data_visualisation/__pycache__/build.cpython-36.pyc b/q04_data_visualisation/__pycache__/build.cpython-36.pyc index 2bfbd4e..bbef976 100644 Binary files a/q04_data_visualisation/__pycache__/build.cpython-36.pyc and b/q04_data_visualisation/__pycache__/build.cpython-36.pyc differ diff --git a/q04_data_visualisation/build.py b/q04_data_visualisation/build.py index 9c15ad9..8a3746a 100644 --- a/q04_data_visualisation/build.py +++ b/q04_data_visualisation/build.py @@ -1,4 +1,4 @@ -# -*- coding: utf-8 -*- +# %load q04_data_visualisation/build.py from greyatomlib.multivariate_regression_project.q01_load_data.build import load_data from greyatomlib.multivariate_regression_project.q02_data_split.build import split_dataset from greyatomlib.multivariate_regression_project.q03_data_encoding.build import label_encode @@ -10,7 +10,13 @@ x_train,x_test = label_encode(x_train,x_test) # Write your code below +def visualise_data(data,figname): + + scatter_matrix(data, alpha=0.2, diagonal='hist') + +#visualise_data(data,'figname') + diff --git a/q04_data_visualisation/tests/__pycache__/__init__.cpython-36.pyc b/q04_data_visualisation/tests/__pycache__/__init__.cpython-36.pyc index 6631d03..e1b28aa 100644 Binary files a/q04_data_visualisation/tests/__pycache__/__init__.cpython-36.pyc and b/q04_data_visualisation/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q04_data_visualisation/tests/__pycache__/test.cpython-36.pyc b/q04_data_visualisation/tests/__pycache__/test.cpython-36.pyc index 5353356..6d35dca 100644 Binary files a/q04_data_visualisation/tests/__pycache__/test.cpython-36.pyc and b/q04_data_visualisation/tests/__pycache__/test.cpython-36.pyc differ diff --git a/q05_linear_regression_model/__pycache__/__init__.cpython-36.pyc b/q05_linear_regression_model/__pycache__/__init__.cpython-36.pyc index 06a2a9b..27d12bc 100644 Binary files a/q05_linear_regression_model/__pycache__/__init__.cpython-36.pyc and b/q05_linear_regression_model/__pycache__/__init__.cpython-36.pyc differ diff --git a/q05_linear_regression_model/__pycache__/build.cpython-36.pyc b/q05_linear_regression_model/__pycache__/build.cpython-36.pyc index c40d112..819fce2 100644 Binary files a/q05_linear_regression_model/__pycache__/build.cpython-36.pyc and b/q05_linear_regression_model/__pycache__/build.cpython-36.pyc differ diff --git a/q05_linear_regression_model/build.py b/q05_linear_regression_model/build.py index 7a0a243..3e200f2 100644 --- a/q05_linear_regression_model/build.py +++ b/q05_linear_regression_model/build.py @@ -1,3 +1,4 @@ +# %load q05_linear_regression_model/build.py from greyatomlib.multivariate_regression_project.q01_load_data.build import load_data from greyatomlib.multivariate_regression_project.q02_data_split.build import split_dataset @@ -12,4 +13,10 @@ # Write your code below +def linear_regression(x_train,y_train): + lm=LinearRegression() + lm.fit(x_train,y_train) + return lm +#linear_regression(x_train,y_train) + diff --git a/q05_linear_regression_model/tests/__pycache__/__init__.cpython-36.pyc b/q05_linear_regression_model/tests/__pycache__/__init__.cpython-36.pyc index 296bcce..bbc9c7d 100644 Binary files a/q05_linear_regression_model/tests/__pycache__/__init__.cpython-36.pyc and b/q05_linear_regression_model/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q05_linear_regression_model/tests/__pycache__/test.cpython-36.pyc b/q05_linear_regression_model/tests/__pycache__/test.cpython-36.pyc index 54551b9..242cb7d 100644 Binary files a/q05_linear_regression_model/tests/__pycache__/test.cpython-36.pyc and b/q05_linear_regression_model/tests/__pycache__/test.cpython-36.pyc differ diff --git a/q06_cross_validation/__pycache__/__init__.cpython-36.pyc b/q06_cross_validation/__pycache__/__init__.cpython-36.pyc index 9a1c3aa..5d45c95 100644 Binary files a/q06_cross_validation/__pycache__/__init__.cpython-36.pyc and b/q06_cross_validation/__pycache__/__init__.cpython-36.pyc differ diff --git a/q06_cross_validation/__pycache__/build.cpython-36.pyc b/q06_cross_validation/__pycache__/build.cpython-36.pyc index 2e1c378..e44a323 100644 Binary files a/q06_cross_validation/__pycache__/build.cpython-36.pyc and b/q06_cross_validation/__pycache__/build.cpython-36.pyc differ diff --git a/q06_cross_validation/build.py b/q06_cross_validation/build.py index 406a734..79f9856 100644 --- a/q06_cross_validation/build.py +++ b/q06_cross_validation/build.py @@ -1,3 +1,4 @@ +# %load q06_cross_validation/build.py from greyatomlib.multivariate_regression_project.q01_load_data.build import load_data from greyatomlib.multivariate_regression_project.q02_data_split.build import split_dataset @@ -17,5 +18,16 @@ model =linear_regression(x_train,y_train) +kfold = KFold(n_splits=3, random_state=7) + # Write your code below +def cross_validation_regressor(model,X,y): + scores = cross_val_score(model, X, y, scoring='r2', cv=kfold)#k-fold + + return scores.mean() + +#cross_validation_regressor(model,x_train,y_train) + + + diff --git a/q06_cross_validation/tests/__pycache__/__init__.cpython-36.pyc b/q06_cross_validation/tests/__pycache__/__init__.cpython-36.pyc index b571b36..c9bdc2a 100644 Binary files a/q06_cross_validation/tests/__pycache__/__init__.cpython-36.pyc and b/q06_cross_validation/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q06_cross_validation/tests/__pycache__/test.cpython-36.pyc b/q06_cross_validation/tests/__pycache__/test.cpython-36.pyc index e065247..b2b5d81 100644 Binary files a/q06_cross_validation/tests/__pycache__/test.cpython-36.pyc and b/q06_cross_validation/tests/__pycache__/test.cpython-36.pyc differ diff --git a/q07_regression_pred/__pycache__/__init__.cpython-36.pyc b/q07_regression_pred/__pycache__/__init__.cpython-36.pyc index 3e7e467..6223e32 100644 Binary files a/q07_regression_pred/__pycache__/__init__.cpython-36.pyc and b/q07_regression_pred/__pycache__/__init__.cpython-36.pyc differ diff --git a/q07_regression_pred/__pycache__/build.cpython-36.pyc b/q07_regression_pred/__pycache__/build.cpython-36.pyc index dfa0411..ca9ee0c 100644 Binary files a/q07_regression_pred/__pycache__/build.cpython-36.pyc and b/q07_regression_pred/__pycache__/build.cpython-36.pyc differ diff --git a/q07_regression_pred/build.py b/q07_regression_pred/build.py index 3f2eee3..1a154fc 100644 --- a/q07_regression_pred/build.py +++ b/q07_regression_pred/build.py @@ -1,3 +1,4 @@ +# %load q07_regression_pred/build.py from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score @@ -19,4 +20,18 @@ val = cross_validation_regressor(model,x_train,y_train) -# Write your code below +# Write your code below +def regression_predictor(model, x_test, y_test): + y_pred=model.predict(x_test) + mse=mean_squared_error(y_test,y_pred) + mae=mean_absolute_error(y_test,y_pred) + r2=r2_score(y_test,y_pred) + return y_pred, mse, mae, r2 + + + + +#y_pred, mse, mae, r2 = regression_predictor(model, x_test, y_test) + + + diff --git a/q07_regression_pred/tests/__pycache__/__init__.cpython-36.pyc b/q07_regression_pred/tests/__pycache__/__init__.cpython-36.pyc index f1435e5..8bc4a09 100644 Binary files a/q07_regression_pred/tests/__pycache__/__init__.cpython-36.pyc and b/q07_regression_pred/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q07_regression_pred/tests/__pycache__/test.cpython-36.pyc b/q07_regression_pred/tests/__pycache__/test.cpython-36.pyc index 203c5ff..7fb64a7 100644 Binary files a/q07_regression_pred/tests/__pycache__/test.cpython-36.pyc and b/q07_regression_pred/tests/__pycache__/test.cpython-36.pyc differ diff --git a/q08_linear_model/__pycache__/__init__.cpython-36.pyc b/q08_linear_model/__pycache__/__init__.cpython-36.pyc index b91b141..5916cb7 100644 Binary files a/q08_linear_model/__pycache__/__init__.cpython-36.pyc and b/q08_linear_model/__pycache__/__init__.cpython-36.pyc differ diff --git a/q08_linear_model/__pycache__/build.cpython-36.pyc b/q08_linear_model/__pycache__/build.cpython-36.pyc index 438fb94..24b7abe 100644 Binary files a/q08_linear_model/__pycache__/build.cpython-36.pyc and b/q08_linear_model/__pycache__/build.cpython-36.pyc differ diff --git a/q08_linear_model/build.py b/q08_linear_model/build.py index 85d49da..f99c1e1 100644 --- a/q08_linear_model/build.py +++ b/q08_linear_model/build.py @@ -1,3 +1,4 @@ +# %load q08_linear_model/build.py import pandas as pd import numpy as np from greyatomlib.multivariate_regression_project.q01_load_data.build import load_data @@ -16,5 +17,19 @@ y_pred, mse, mae, r2 = regression_predictor(model, x_test, y_test) # Write your code below - - +def linear_model(x_train, x_test, y_train, y_test): + model =linear_regression(x_train,y_train) + val = cross_validation_regressor(model,x_train,y_train) + y_pred, mse, mae, r2 = regression_predictor(model, x_test, y_test) + stats=pd.DataFrame(np.array([val,mae,mse,r2]).reshape(1,4),columns=['v','m','s','r'],index=[0]) + + return model,y_pred,stats + + + + + +#G,y_pred,stats = linear_model(x_train, x_test, y_train, y_test) + + + diff --git a/q08_linear_model/tests/__pycache__/__init__.cpython-36.pyc b/q08_linear_model/tests/__pycache__/__init__.cpython-36.pyc index 5f231d2..e4d1f16 100644 Binary files a/q08_linear_model/tests/__pycache__/__init__.cpython-36.pyc and b/q08_linear_model/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q08_linear_model/tests/__pycache__/test.cpython-36.pyc b/q08_linear_model/tests/__pycache__/test.cpython-36.pyc index cbaeda3..bc651d9 100644 Binary files a/q08_linear_model/tests/__pycache__/test.cpython-36.pyc and b/q08_linear_model/tests/__pycache__/test.cpython-36.pyc differ diff --git a/q09_advanced_model_q01_lasso/__pycache__/__init__.cpython-36.pyc b/q09_advanced_model_q01_lasso/__pycache__/__init__.cpython-36.pyc index b8b8fc7..51311f2 100644 Binary files a/q09_advanced_model_q01_lasso/__pycache__/__init__.cpython-36.pyc and b/q09_advanced_model_q01_lasso/__pycache__/__init__.cpython-36.pyc differ diff --git a/q09_advanced_model_q01_lasso/__pycache__/build.cpython-36.pyc b/q09_advanced_model_q01_lasso/__pycache__/build.cpython-36.pyc index ad763a5..5e53714 100644 Binary files a/q09_advanced_model_q01_lasso/__pycache__/build.cpython-36.pyc and b/q09_advanced_model_q01_lasso/__pycache__/build.cpython-36.pyc differ diff --git a/q09_advanced_model_q01_lasso/build.py b/q09_advanced_model_q01_lasso/build.py index c832d59..2467d4a 100644 --- a/q09_advanced_model_q01_lasso/build.py +++ b/q09_advanced_model_q01_lasso/build.py @@ -1,3 +1,4 @@ +# %load q09_advanced_model_q01_lasso/build.py from greyatomlib.multivariate_regression_project.q01_load_data.build import load_data from greyatomlib.multivariate_regression_project.q02_data_split.build import split_dataset @@ -8,6 +9,7 @@ from sklearn.linear_model import Lasso import numpy as np import pandas as pd +import math from greyatomlib.multivariate_regression_project.q06_cross_validation.build import cross_validation_regressor np.random.seed(9) @@ -19,5 +21,23 @@ x_train,x_test = label_encode(x_train,x_test) # Write your solution here +def lasso(x_train, x_test, y_train, y_test, alpha=0.1): + l1=Lasso(alpha=0.1) + l1.fit(x_train,y_train) + val=cross_validation_regressor(l1,x_train,y_train) + y_pred, mse, mae, r2 = regression_predictor(l1, x_test, y_test) + rmse=math.pow(mse,0.5) + stats=pd.DataFrame(np.array([val,mae,r2,rmse]).reshape(1,4),columns=['v','m','s','r'],index=[0]) + return l1,y_pred,stats + + + + + +#model,y_pred,stats = lasso(x_train, x_test, y_train, y_test, alpha=0.1) + + + + diff --git a/q09_advanced_model_q01_lasso/tests/__pycache__/__init__.cpython-36.pyc b/q09_advanced_model_q01_lasso/tests/__pycache__/__init__.cpython-36.pyc index 80296f7..9773e50 100644 Binary files a/q09_advanced_model_q01_lasso/tests/__pycache__/__init__.cpython-36.pyc and b/q09_advanced_model_q01_lasso/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q09_advanced_model_q01_lasso/tests/__pycache__/test.cpython-36.pyc b/q09_advanced_model_q01_lasso/tests/__pycache__/test.cpython-36.pyc index 3d92981..60c69b3 100644 Binary files a/q09_advanced_model_q01_lasso/tests/__pycache__/test.cpython-36.pyc and b/q09_advanced_model_q01_lasso/tests/__pycache__/test.cpython-36.pyc differ diff --git a/q09_advanced_model_q02_ridge/__pycache__/__init__.cpython-36.pyc b/q09_advanced_model_q02_ridge/__pycache__/__init__.cpython-36.pyc index 222893d..490d289 100644 Binary files a/q09_advanced_model_q02_ridge/__pycache__/__init__.cpython-36.pyc and b/q09_advanced_model_q02_ridge/__pycache__/__init__.cpython-36.pyc differ diff --git a/q09_advanced_model_q02_ridge/__pycache__/build.cpython-36.pyc b/q09_advanced_model_q02_ridge/__pycache__/build.cpython-36.pyc index 29083a5..54a0cea 100644 Binary files a/q09_advanced_model_q02_ridge/__pycache__/build.cpython-36.pyc and b/q09_advanced_model_q02_ridge/__pycache__/build.cpython-36.pyc differ diff --git a/q09_advanced_model_q02_ridge/build.py b/q09_advanced_model_q02_ridge/build.py index 0fb3e1a..1dec2c5 100644 --- a/q09_advanced_model_q02_ridge/build.py +++ b/q09_advanced_model_q02_ridge/build.py @@ -1,3 +1,4 @@ +# %load q09_advanced_model_q02_ridge/build.py from greyatomlib.multivariate_regression_project.q01_load_data.build import load_data from greyatomlib.multivariate_regression_project.q02_data_split.build import split_dataset @@ -8,7 +9,7 @@ from sklearn.linear_model import Ridge import numpy as np import pandas as pd - +import math from greyatomlib.multivariate_regression_project.q06_cross_validation.build import cross_validation_regressor np.random.seed(9) @@ -19,7 +20,18 @@ x_train,x_test = label_encode(x_train,x_test) # Write your code below +# Write your solution here +def ridge(x_train, x_test, y_train, y_test, alpha=0.1): + l1=Ridge(alpha=alpha,random_state=7,normalize=True) + l1.fit(x_train,y_train) + val=cross_validation_regressor(l1,x_train,y_train) + y_pred, mse, mae, r2 = regression_predictor(l1, x_test, y_test) + rmse=math.pow(mse,0.5) + stats=pd.DataFrame(np.array([val,mae,r2,rmse]).reshape(1,4),columns=['v','m','s','r'],index=[0]) + return l1,y_pred,stats +#model,y_pred,stats = ridge(x_train, x_test, y_train, y_test, alpha=0.1) +#stats diff --git a/q09_advanced_model_q02_ridge/tests/__pycache__/__init__.cpython-36.pyc b/q09_advanced_model_q02_ridge/tests/__pycache__/__init__.cpython-36.pyc index 602e1f5..d669c3b 100644 Binary files a/q09_advanced_model_q02_ridge/tests/__pycache__/__init__.cpython-36.pyc and b/q09_advanced_model_q02_ridge/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q09_advanced_model_q02_ridge/tests/__pycache__/test.cpython-36.pyc b/q09_advanced_model_q02_ridge/tests/__pycache__/test.cpython-36.pyc index 37f31c3..87d93b7 100644 Binary files a/q09_advanced_model_q02_ridge/tests/__pycache__/test.cpython-36.pyc and b/q09_advanced_model_q02_ridge/tests/__pycache__/test.cpython-36.pyc differ diff --git a/q10_data_missing_values/__pycache__/__init__.cpython-36.pyc b/q10_data_missing_values/__pycache__/__init__.cpython-36.pyc index 9f50df2..43747cd 100644 Binary files a/q10_data_missing_values/__pycache__/__init__.cpython-36.pyc and b/q10_data_missing_values/__pycache__/__init__.cpython-36.pyc differ diff --git a/q10_data_missing_values/__pycache__/build.cpython-36.pyc b/q10_data_missing_values/__pycache__/build.cpython-36.pyc index 5c075f4..faf3eb7 100644 Binary files a/q10_data_missing_values/__pycache__/build.cpython-36.pyc and b/q10_data_missing_values/__pycache__/build.cpython-36.pyc differ diff --git a/q10_data_missing_values/build.py b/q10_data_missing_values/build.py index 582edbb..8949fe4 100644 --- a/q10_data_missing_values/build.py +++ b/q10_data_missing_values/build.py @@ -1,3 +1,4 @@ +# %load q10_data_missing_values/build.py from greyatomlib.multivariate_regression_project.q01_load_data.build import load_data from greyatomlib.multivariate_regression_project.q02_data_split.build import split_dataset from greyatomlib.multivariate_regression_project.q03_data_encoding.build import label_encode @@ -10,4 +11,13 @@ x_train,x_test = label_encode(x_train,x_test) # Write your code below - +def describe_df(x_train): + #print(x_train.shape) + descD=x_train.describe() + #x_train.info() + #abc=x_train.columns.apply(pd.value_counts(x_train[x])) + #print(x_train['absences'].value_counts()) + vc=x_train.apply(pd.value_counts) + return descD,vc +#describe_df(x_train) + diff --git a/q10_data_missing_values/tests/__pycache__/__init__.cpython-36.pyc b/q10_data_missing_values/tests/__pycache__/__init__.cpython-36.pyc index 2fdd38b..d597eda 100644 Binary files a/q10_data_missing_values/tests/__pycache__/__init__.cpython-36.pyc and b/q10_data_missing_values/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q10_data_missing_values/tests/__pycache__/test.cpython-36.pyc b/q10_data_missing_values/tests/__pycache__/test.cpython-36.pyc index 1701926..897aeb3 100644 Binary files a/q10_data_missing_values/tests/__pycache__/test.cpython-36.pyc and b/q10_data_missing_values/tests/__pycache__/test.cpython-36.pyc differ diff --git a/q11_feature_selection_q01_plot_corr/__pycache__/__init__.cpython-36.pyc b/q11_feature_selection_q01_plot_corr/__pycache__/__init__.cpython-36.pyc index 35c8cae..b916ff7 100644 Binary files a/q11_feature_selection_q01_plot_corr/__pycache__/__init__.cpython-36.pyc and b/q11_feature_selection_q01_plot_corr/__pycache__/__init__.cpython-36.pyc differ diff --git a/q11_feature_selection_q01_plot_corr/__pycache__/build.cpython-36.pyc b/q11_feature_selection_q01_plot_corr/__pycache__/build.cpython-36.pyc index 35748ec..9b53fe9 100644 Binary files a/q11_feature_selection_q01_plot_corr/__pycache__/build.cpython-36.pyc and b/q11_feature_selection_q01_plot_corr/__pycache__/build.cpython-36.pyc differ diff --git a/q11_feature_selection_q01_plot_corr/build.py b/q11_feature_selection_q01_plot_corr/build.py index 0427922..5043bd6 100644 --- a/q11_feature_selection_q01_plot_corr/build.py +++ b/q11_feature_selection_q01_plot_corr/build.py @@ -1,9 +1,9 @@ +# %load q11_feature_selection_q01_plot_corr/build.py import matplotlib.pyplot as plt from matplotlib.pyplot import yticks, xticks, subplots, set_cmap from greyatomlib.multivariate_regression_project.q01_load_data.build import load_data - - +import seaborn as sns from greyatomlib.multivariate_regression_project.q02_data_split.build import split_dataset @@ -21,6 +21,12 @@ #Remember to concatenate training features and labels if you want to check that scatterplots which I would prefer.You are free to explore labels to labels, features to features ,etc scatterplots as you want by passing arguments #============================================================================ -#visualise_data(pd.concat([x_train,y_train],axis=1),"../images/data_image.png") +#visualise_data(pd.concat([x_train,y_train],axis=1),'../images/data_image.png') # Write your solution here: +def plot_corr(df,size=11): + plt.figure(figsize=(size,8)) + sns.heatmap(df.corr(), cmap='YlOrRd') + +#plot_corr(df,size=11) + diff --git a/q11_feature_selection_q01_plot_corr/tests/__pycache__/__init__.cpython-36.pyc b/q11_feature_selection_q01_plot_corr/tests/__pycache__/__init__.cpython-36.pyc index 6c1c509..cfc55eb 100644 Binary files a/q11_feature_selection_q01_plot_corr/tests/__pycache__/__init__.cpython-36.pyc and b/q11_feature_selection_q01_plot_corr/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q11_feature_selection_q01_plot_corr/tests/__pycache__/test.cpython-36.pyc b/q11_feature_selection_q01_plot_corr/tests/__pycache__/test.cpython-36.pyc index 93b5347..99e2540 100644 Binary files a/q11_feature_selection_q01_plot_corr/tests/__pycache__/test.cpython-36.pyc and b/q11_feature_selection_q01_plot_corr/tests/__pycache__/test.cpython-36.pyc differ diff --git a/q11_feature_selection_q02_best_k_features/__pycache__/__init__.cpython-36.pyc b/q11_feature_selection_q02_best_k_features/__pycache__/__init__.cpython-36.pyc index cce1771..109e4f7 100644 Binary files a/q11_feature_selection_q02_best_k_features/__pycache__/__init__.cpython-36.pyc and b/q11_feature_selection_q02_best_k_features/__pycache__/__init__.cpython-36.pyc differ diff --git a/q11_feature_selection_q02_best_k_features/__pycache__/build.cpython-36.pyc b/q11_feature_selection_q02_best_k_features/__pycache__/build.cpython-36.pyc index b0c88c7..e7de238 100644 Binary files a/q11_feature_selection_q02_best_k_features/__pycache__/build.cpython-36.pyc and b/q11_feature_selection_q02_best_k_features/__pycache__/build.cpython-36.pyc differ diff --git a/q11_feature_selection_q02_best_k_features/build.py b/q11_feature_selection_q02_best_k_features/build.py index 95002c5..a620a0f 100644 --- a/q11_feature_selection_q02_best_k_features/build.py +++ b/q11_feature_selection_q02_best_k_features/build.py @@ -1,3 +1,4 @@ +# %load q11_feature_selection_q02_best_k_features/build.py # Default imports from sklearn.feature_selection import SelectPercentile from sklearn.feature_selection import f_regression @@ -17,11 +18,28 @@ x_train,x_test = label_encode(x_train,x_test) - +k=10 np.random.seed(9) # Write your code below +def percentile_k_features(features,labels,k=50): + + skp=SelectPercentile(f_regression,percentile=k) + skp.fit(features,labels) + skp_trans=skp.transform(features) + columns = np.asarray(features.columns.values) + support=len(skp.get_support(indices=True)) + ranking=[columns[i] for i in np.argsort(skp.scores_)[:-17:-1]] + return ranking + +#top_k = percentile_k_features(x_train, y_train, k=50) +#top_k + + +#(top_k, ['G2', 'G1', 'failures', 'Medu', 'Fedu', 'higher', 'age', 'romantic', 'goout', + # 'address', 'sex', 'traveltime', 'Mjob', 'paid', 'reason', 'studytime'], + diff --git a/q11_feature_selection_q02_best_k_features/tests/__pycache__/__init__.cpython-36.pyc b/q11_feature_selection_q02_best_k_features/tests/__pycache__/__init__.cpython-36.pyc index 3a1830b..a1e2c32 100644 Binary files a/q11_feature_selection_q02_best_k_features/tests/__pycache__/__init__.cpython-36.pyc and b/q11_feature_selection_q02_best_k_features/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q11_feature_selection_q02_best_k_features/tests/__pycache__/test.cpython-36.pyc b/q11_feature_selection_q02_best_k_features/tests/__pycache__/test.cpython-36.pyc index 7c11282..0390440 100644 Binary files a/q11_feature_selection_q02_best_k_features/tests/__pycache__/test.cpython-36.pyc and b/q11_feature_selection_q02_best_k_features/tests/__pycache__/test.cpython-36.pyc differ diff --git a/q12_feature_selection/__pycache__/__init__.cpython-36.pyc b/q12_feature_selection/__pycache__/__init__.cpython-36.pyc index 886fe32..8ad187c 100644 Binary files a/q12_feature_selection/__pycache__/__init__.cpython-36.pyc and b/q12_feature_selection/__pycache__/__init__.cpython-36.pyc differ diff --git a/q12_feature_selection/__pycache__/build.cpython-36.pyc b/q12_feature_selection/__pycache__/build.cpython-36.pyc index 7c97eeb..8c63b42 100644 Binary files a/q12_feature_selection/__pycache__/build.cpython-36.pyc and b/q12_feature_selection/__pycache__/build.cpython-36.pyc differ diff --git a/q12_feature_selection/build.py b/q12_feature_selection/build.py index 1bbe2b2..4b76401 100644 --- a/q12_feature_selection/build.py +++ b/q12_feature_selection/build.py @@ -1,3 +1,4 @@ +# %load q12_feature_selection/build.py # import matplotlib.pyplot as plt from greyatomlib.multivariate_regression_project.q01_load_data.build import load_data from greyatomlib.multivariate_regression_project.q02_data_split.build import split_dataset @@ -8,7 +9,6 @@ from greyatomlib.multivariate_regression_project.q11_feature_selection_q01_plot_corr.build import plot_corr - import pandas as pd df = load_data('data/student-mat.csv') @@ -16,3 +16,11 @@ x_train,x_test = label_encode(x_train,x_test) # Write your code below +def feature_selection(x_train, y_train, k=50): + plot_corr(x_train, 11) + features= percentile_k_features(x_train, y_train, k=50) + return features + +#features = feature_selection(x_train, y_train, k=50) +#features + diff --git a/q12_feature_selection/tests/__pycache__/__init__.cpython-36.pyc b/q12_feature_selection/tests/__pycache__/__init__.cpython-36.pyc index 199811e..ffb1e26 100644 Binary files a/q12_feature_selection/tests/__pycache__/__init__.cpython-36.pyc and b/q12_feature_selection/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q12_feature_selection/tests/__pycache__/test.cpython-36.pyc b/q12_feature_selection/tests/__pycache__/test.cpython-36.pyc index 3a7de81..f77d6d2 100644 Binary files a/q12_feature_selection/tests/__pycache__/test.cpython-36.pyc and b/q12_feature_selection/tests/__pycache__/test.cpython-36.pyc differ diff --git a/q13_plot_residuals/__pycache__/__init__.cpython-36.pyc b/q13_plot_residuals/__pycache__/__init__.cpython-36.pyc index 339472d..59d1abb 100644 Binary files a/q13_plot_residuals/__pycache__/__init__.cpython-36.pyc and b/q13_plot_residuals/__pycache__/__init__.cpython-36.pyc differ diff --git a/q13_plot_residuals/__pycache__/build.cpython-36.pyc b/q13_plot_residuals/__pycache__/build.cpython-36.pyc index b3cfbaf..db731bd 100644 Binary files a/q13_plot_residuals/__pycache__/build.cpython-36.pyc and b/q13_plot_residuals/__pycache__/build.cpython-36.pyc differ diff --git a/q13_plot_residuals/build.py b/q13_plot_residuals/build.py index 9cdb3e3..3304be5 100644 --- a/q13_plot_residuals/build.py +++ b/q13_plot_residuals/build.py @@ -1,5 +1,37 @@ +# %load q13_plot_residuals/build.py +from greyatomlib.multivariate_regression_project.q01_load_data.build import load_data -import matplotlib.pyplot as plt +from greyatomlib.multivariate_regression_project.q02_data_split.build import split_dataset + +from greyatomlib.multivariate_regression_project.q03_data_encoding.build import label_encode + +from greyatomlib.multivariate_regression_project.q07_regression_pred.build import regression_predictor +from sklearn.linear_model import Ridge +import numpy as np +import pandas as pd +from matplotlib import pyplot as plt + +from greyatomlib.multivariate_regression_project.q06_cross_validation.build import cross_validation_regressor +np.random.seed(9) + +df = load_data('data/student-mat.csv') +x_train, x_test, y_train, y_test = split_dataset(df) + +x_train,x_test = label_encode(x_train,x_test) +import matplotlib.pyplot as plt +l2=Ridge(alpha=0.01) +l2.fit(x_train,y_train) # Write your code below +def plot_residuals(model, x_test, y_test): + y_pred, mse, mae, r2 = regression_predictor(model, x_test, y_test) + error_residuals=y_test-y_pred + plt.scatter(y_test,error_residuals) + plt.title('Residual Plot') + plt.xlabel('SalePrice') + plt.ylabel('Errors') + plt.show() + +#plot_residuals(l2, x_test, y_test) + diff --git a/q13_plot_residuals/tests/__pycache__/__init__.cpython-36.pyc b/q13_plot_residuals/tests/__pycache__/__init__.cpython-36.pyc index 3aa40f0..0a8c8e3 100644 Binary files a/q13_plot_residuals/tests/__pycache__/__init__.cpython-36.pyc and b/q13_plot_residuals/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q13_plot_residuals/tests/__pycache__/test.cpython-36.pyc b/q13_plot_residuals/tests/__pycache__/test.cpython-36.pyc index 89ecb4e..b365edd 100644 Binary files a/q13_plot_residuals/tests/__pycache__/test.cpython-36.pyc and b/q13_plot_residuals/tests/__pycache__/test.cpython-36.pyc differ diff --git a/q14_benchmarking/__pycache__/__init__.cpython-36.pyc b/q14_benchmarking/__pycache__/__init__.cpython-36.pyc index 453edef..94ce575 100644 Binary files a/q14_benchmarking/__pycache__/__init__.cpython-36.pyc and b/q14_benchmarking/__pycache__/__init__.cpython-36.pyc differ diff --git a/q14_benchmarking/__pycache__/build.cpython-36.pyc b/q14_benchmarking/__pycache__/build.cpython-36.pyc index 28c02f8..c480037 100644 Binary files a/q14_benchmarking/__pycache__/build.cpython-36.pyc and b/q14_benchmarking/__pycache__/build.cpython-36.pyc differ diff --git a/q14_benchmarking/build.py b/q14_benchmarking/build.py index 4a4557b..e8b3385 100644 --- a/q14_benchmarking/build.py +++ b/q14_benchmarking/build.py @@ -1,3 +1,4 @@ +# %load q14_benchmarking/build.py from greyatomlib.multivariate_regression_project.q01_load_data.build import load_data from greyatomlib.multivariate_regression_project.q02_data_split.build import split_dataset @@ -27,4 +28,24 @@ # Write your code below +def create_stats(x_train, x_test, y_train, y_test): + + l1,y_pred_l1,lasso_stats = lasso(x_train, x_test, y_train, y_test, alpha=0.1) + l2,y_pred_l2,ridge_stats = ridge(x_train, x_test, y_train, y_test, alpha=0.1) + + features= feature_selection(x_train, y_train, k=50) + x_trainft=x_train[features].copy() + x_testft=x_test[features].copy() + l1ft,y_pred_l1ft,lasso_statsft = lasso(x_trainft, x_testft, y_train, y_test, alpha=0.1) + l2ft,y_pred_l2ft,ridge_statsft = ridge(x_trainft, x_testft, y_train, y_test, alpha=0.1) + complete_stats = pd.concat([lasso_stats,lasso_statsft,ridge_stats,ridge_statsft]) + return complete_stats + + + + + +#complete_stats = create_stats(x_train, x_test, y_train, y_test) + +#complete_stats.shape[0]*complete_stats.shape[1] diff --git a/q14_benchmarking/tests/__pycache__/__init__.cpython-36.pyc b/q14_benchmarking/tests/__pycache__/__init__.cpython-36.pyc index defa63d..4944bbb 100644 Binary files a/q14_benchmarking/tests/__pycache__/__init__.cpython-36.pyc and b/q14_benchmarking/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q14_benchmarking/tests/__pycache__/test.cpython-36.pyc b/q14_benchmarking/tests/__pycache__/test.cpython-36.pyc index cc77345..a7099f2 100644 Binary files a/q14_benchmarking/tests/__pycache__/test.cpython-36.pyc and b/q14_benchmarking/tests/__pycache__/test.cpython-36.pyc differ diff --git a/q15_select_best_model/__pycache__/build.cpython-36.pyc b/q15_select_best_model/__pycache__/build.cpython-36.pyc new file mode 100644 index 0000000..5adf008 Binary files /dev/null and b/q15_select_best_model/__pycache__/build.cpython-36.pyc differ diff --git a/q15_select_best_model/build.py b/q15_select_best_model/build.py index d0ce1ba..860ab7d 100644 --- a/q15_select_best_model/build.py +++ b/q15_select_best_model/build.py @@ -1,3 +1,4 @@ +# %load q15_select_best_model/build.py from greyatomlib.multivariate_regression_project.q01_load_data.build import load_data from greyatomlib.multivariate_regression_project.q02_data_split.build import split_dataset @@ -21,3 +22,20 @@ # Write your code below +def complete_build(x_train, x_test, y_train, y_test): + + category_index = [x for x in range(len(x_train.columns)) if x_train[x_train.columns[x]].dtype == 'object'] + x_train_t,x_test_t=ohe_encode(x_train, x_test,category_index) + train=pd.DataFrame(x_train_t) + test=pd.DataFrame(x_test_t) + #print(x_train.shape) + train.columns=x_train.columns.values + test.columns=x_test.columns.values + complete_stats1 = create_stats(x_train, x_test, y_train, y_test) + complete_stats = create_stats(train, test, y_train, y_test) + return pd.concat([complete_stats1,complete_stats],axis=0) + + +#stats = complete_build(x_train, x_test, y_train, y_test) +#stats.shape + diff --git a/q15_select_best_model/tests/__pycache__/__init__.cpython-36.pyc b/q15_select_best_model/tests/__pycache__/__init__.cpython-36.pyc new file mode 100644 index 0000000..2cef6b9 Binary files /dev/null and b/q15_select_best_model/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q15_select_best_model/tests/__pycache__/test.cpython-36.pyc b/q15_select_best_model/tests/__pycache__/test.cpython-36.pyc new file mode 100644 index 0000000..54be0c9 Binary files /dev/null and b/q15_select_best_model/tests/__pycache__/test.cpython-36.pyc differ