Skip to content
Binary file modified __pycache__/__init__.cpython-36.pyc
Binary file not shown.
Binary file modified q01_load_data/__pycache__/__init__.cpython-36.pyc
Binary file not shown.
Binary file modified q01_load_data/__pycache__/build.cpython-36.pyc
Binary file not shown.
8 changes: 8 additions & 0 deletions q01_load_data/build.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,12 @@
# %load q01_load_data/build.py
import pandas as pd

path = 'data/student-mat.csv'

# Write your code below
def load_data(path):
return pd.read_table(path, sep=';')

load_data(path)


Binary file modified q01_load_data/tests/__pycache__/__init__.cpython-36.pyc
Binary file not shown.
Binary file modified q01_load_data/tests/__pycache__/test.cpython-36.pyc
Binary file not shown.
Binary file modified q02_data_split/__pycache__/__init__.cpython-36.pyc
Binary file not shown.
Binary file modified q02_data_split/__pycache__/build.cpython-36.pyc
Binary file not shown.
12 changes: 10 additions & 2 deletions q02_data_split/build.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,16 @@
# %load q02_data_split/build.py
from greyatomlib.multivariate_regression_project.q01_load_data.build import load_data
from sklearn.model_selection import train_test_split
import pandas as pd
df = load_data('data/student-mat.csv')

# Write your code below


def split_dataset(df):
x = df.drop('G3',1)
y=df['G3']
x_train, x_test, y_train, y_test = train_test_split(x,y, test_size=0.2, random_state = 42)
return x_train, x_test, y_train, y_test

split_dataset(df)


Binary file modified q02_data_split/tests/__pycache__/__init__.cpython-36.pyc
Binary file not shown.
Binary file modified q02_data_split/tests/__pycache__/test.cpython-36.pyc
Binary file not shown.
Binary file modified q03_data_encoding/__pycache__/__init__.cpython-36.pyc
Binary file not shown.
Binary file modified q03_data_encoding/__pycache__/build.cpython-36.pyc
Binary file not shown.
12 changes: 11 additions & 1 deletion q03_data_encoding/build.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
# %load q03_data_encoding/build.py
from greyatomlib.multivariate_regression_project.q01_load_data.build import load_data
from greyatomlib.multivariate_regression_project.q02_data_split.build import split_dataset
from sklearn.preprocessing import LabelEncoder
Expand All @@ -8,7 +9,16 @@
x_train, x_test, y_train, y_test = split_dataset(df)

# Write your code below
def label_encode(x_train, x_test):
le = LabelEncoder()
X_transform = x_train.apply(le.fit_transform)
X_test_transform = x_test.apply(le.fit_transform)
return X_transform, X_test_transform

print(label_encode(x_train,x_train))







Binary file modified q03_data_encoding/tests/__pycache__/__init__.cpython-36.pyc
Binary file not shown.
Binary file modified q03_data_encoding/tests/__pycache__/test.cpython-36.pyc
Binary file not shown.
Binary file modified q03_ohe_encoder/__pycache__/__init__.cpython-36.pyc
Binary file not shown.
Binary file modified q03_ohe_encoder/__pycache__/build.cpython-36.pyc
Binary file not shown.
18 changes: 16 additions & 2 deletions q03_ohe_encoder/build.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
# %load q03_ohe_encoder/build.py
from greyatomlib.multivariate_regression_project.q01_load_data.build import load_data
from greyatomlib.multivariate_regression_project.q02_data_split.build import split_dataset
from sklearn.preprocessing import OneHotEncoder
from greyatomlib.multivariate_regression_project.q03_data_encoding.build import label_encode
import pandas as pd
import numpy as np

Expand All @@ -10,10 +12,22 @@

category_index = [x for x in range(len(df.columns)) if df[df.columns[x]].dtype == 'object']

def ohe_encode(x_train,x_test,category_index=category_index):
x_train,x_test=label_encode(x_train,x_test)
ohe = OneHotEncoder(categorical_features=category_index,sparse=False)
X_transform = ohe.fit_transform(x_train)
X_test_trasnform = ohe.fit_transform(x_test)
return X_transform,X_test_trasnform

# Write your code below

#print(category_index)
print(ohe_encode(x_train,x_test,category_index))










Binary file modified q03_ohe_encoder/tests/__pycache__/__init__.cpython-36.pyc
Binary file not shown.
Binary file modified q03_ohe_encoder/tests/__pycache__/test.cpython-36.pyc
Binary file not shown.
Binary file modified q04_data_visualisation/__pycache__/__init__.cpython-36.pyc
Binary file not shown.
Binary file modified q04_data_visualisation/__pycache__/build.cpython-36.pyc
Binary file not shown.
11 changes: 7 additions & 4 deletions q04_data_visualisation/build.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# -*- coding: utf-8 -*-
# %load q04_data_visualisation/build.py
from greyatomlib.multivariate_regression_project.q01_load_data.build import load_data
from greyatomlib.multivariate_regression_project.q02_data_split.build import split_dataset
from greyatomlib.multivariate_regression_project.q03_data_encoding.build import label_encode
Expand All @@ -10,7 +10,10 @@
x_train,x_test = label_encode(x_train,x_test)

# Write your code below



def visualise_data(data,figname):
return scatter_matrix(data)





Binary file modified q04_data_visualisation/tests/__pycache__/__init__.cpython-36.pyc
Binary file not shown.
Binary file modified q04_data_visualisation/tests/__pycache__/test.cpython-36.pyc
Binary file not shown.
Binary file modified q05_linear_regression_model/__pycache__/__init__.cpython-36.pyc
Binary file not shown.
Binary file modified q05_linear_regression_model/__pycache__/build.cpython-36.pyc
Binary file not shown.
9 changes: 7 additions & 2 deletions q05_linear_regression_model/build.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
# %load q05_linear_regression_model/build.py
from greyatomlib.multivariate_regression_project.q01_load_data.build import load_data
from greyatomlib.multivariate_regression_project.q02_data_split.build import split_dataset

Expand All @@ -10,6 +11,10 @@

x_train, x_test = label_encode(x_train,x_test)


# Write your code below
def linear_regression(X,y):
lm = LinearRegression()
return lm.fit(X,y)

linear_regression(x_train,y_train)


Binary file not shown.
Binary file modified q05_linear_regression_model/tests/__pycache__/test.cpython-36.pyc
Binary file not shown.
Binary file modified q06_cross_validation/__pycache__/__init__.cpython-36.pyc
Binary file not shown.
Binary file modified q06_cross_validation/__pycache__/build.cpython-36.pyc
Binary file not shown.
8 changes: 8 additions & 0 deletions q06_cross_validation/build.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
# %load q06_cross_validation/build.py
from greyatomlib.multivariate_regression_project.q01_load_data.build import load_data
from greyatomlib.multivariate_regression_project.q02_data_split.build import split_dataset

Expand All @@ -18,4 +19,11 @@
model =linear_regression(x_train,y_train)

# Write your code below
def cross_validation_regressor(model,X,y):
return np.array(cross_val_score(model,X,y,cv=3)).mean()

cross_validation_regressor(model,x_train,y_train)




Binary file modified q06_cross_validation/tests/__pycache__/__init__.cpython-36.pyc
Binary file not shown.
Binary file modified q06_cross_validation/tests/__pycache__/test.cpython-36.pyc
Binary file not shown.
Binary file modified q07_regression_pred/__pycache__/__init__.cpython-36.pyc
Binary file not shown.
Binary file modified q07_regression_pred/__pycache__/build.cpython-36.pyc
Binary file not shown.
14 changes: 13 additions & 1 deletion q07_regression_pred/build.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
# %load q07_regression_pred/build.py

from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

Expand All @@ -19,4 +20,15 @@
val = cross_validation_regressor(model,x_train,y_train)


# Write your code below
def regression_predictor(model,X,y):
y_pred = model.predict(X)
mse = mean_squared_error(y,y_pred)
mae = mean_absolute_error(y,y_pred)
r2 = r2_score(y,y_pred)
return y_pred,mse,mae,r2


regression_predictor(model,x_test,y_test)



Binary file modified q07_regression_pred/tests/__pycache__/__init__.cpython-36.pyc
Binary file not shown.
Binary file modified q07_regression_pred/tests/__pycache__/test.cpython-36.pyc
Binary file not shown.
Binary file modified q08_linear_model/__pycache__/__init__.cpython-36.pyc
Binary file not shown.
Binary file modified q08_linear_model/__pycache__/build.cpython-36.pyc
Binary file not shown.
11 changes: 10 additions & 1 deletion q08_linear_model/build.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
# %load q08_linear_model/build.py
import pandas as pd
import numpy as np
from greyatomlib.multivariate_regression_project.q01_load_data.build import load_data
Expand All @@ -16,5 +17,13 @@
y_pred, mse, mae, r2 = regression_predictor(model, x_test, y_test)

# Write your code below
def linear_model(x_train, x_test, y_train, y_test):
G = linear_regression(x_train, y_train)
stats = pd.DataFrame([(val,mae,mse,r2)], columns = ['cross_val','rmse','mae','r2'])
return G, y_pred, stats

linear_model(x_train,x_test,y_train,y_test)





Binary file modified q08_linear_model/tests/__pycache__/__init__.cpython-36.pyc
Binary file not shown.
Binary file modified q08_linear_model/tests/__pycache__/test.cpython-36.pyc
Binary file not shown.
Binary file modified q09_advanced_model_q01_lasso/__pycache__/__init__.cpython-36.pyc
Binary file not shown.
Binary file modified q09_advanced_model_q01_lasso/__pycache__/build.cpython-36.pyc
Binary file not shown.
12 changes: 11 additions & 1 deletion q09_advanced_model_q01_lasso/build.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
# %load q09_advanced_model_q01_lasso/build.py
from greyatomlib.multivariate_regression_project.q01_load_data.build import load_data

from greyatomlib.multivariate_regression_project.q02_data_split.build import split_dataset
Expand All @@ -19,5 +20,14 @@
x_train,x_test = label_encode(x_train,x_test)

# Write your solution here

def lasso(x_train, x_test, y_train, y_test,alpha=0.1):
G = Lasso(alpha = alpha)
G.fit(x_train, y_train)
c_val = cross_validation_regressor(G,x_train,y_train)
y_pred, mse, mae, r2 = regression_predictor(G, x_test, y_test)
stats = pd.DataFrame([(c_val,mae,r2,np.sqrt(mse))], columns = ['cross_val','mae','r2','rmse'])
return G, y_pred, stats

lasso(x_train, x_test, y_train, y_test)


Binary file not shown.
Binary file modified q09_advanced_model_q01_lasso/tests/__pycache__/test.cpython-36.pyc
Binary file not shown.
Binary file modified q09_advanced_model_q02_ridge/__pycache__/__init__.cpython-36.pyc
Binary file not shown.
Binary file modified q09_advanced_model_q02_ridge/__pycache__/build.cpython-36.pyc
Binary file not shown.
12 changes: 11 additions & 1 deletion q09_advanced_model_q02_ridge/build.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
# %load q09_advanced_model_q02_ridge/build.py
from greyatomlib.multivariate_regression_project.q01_load_data.build import load_data

from greyatomlib.multivariate_regression_project.q02_data_split.build import split_dataset
Expand All @@ -19,7 +20,16 @@
x_train,x_test = label_encode(x_train,x_test)

# Write your code below

def ridge(x_train, x_test, y_train, y_test,alpha=0.1):
G = Ridge(alpha = alpha,normalize = True)
G.fit(x_train, y_train)
c_val = cross_validation_regressor(G,x_train,y_train)
y_pred, mse, mae, r2 = regression_predictor(G, x_test, y_test)
stats = pd.DataFrame([(c_val,mae,r2,np.sqrt(mse))], columns = ['cross_val','mae','r2','rmse'])
return G, y_pred, stats


ridge(x_train, x_test, y_train, y_test)



Binary file not shown.
Binary file modified q09_advanced_model_q02_ridge/tests/__pycache__/test.cpython-36.pyc
Binary file not shown.
Binary file modified q10_data_missing_values/__pycache__/__init__.cpython-36.pyc
Binary file not shown.
Binary file modified q10_data_missing_values/__pycache__/build.cpython-36.pyc
Binary file not shown.
14 changes: 12 additions & 2 deletions q10_data_missing_values/build.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
# %load q10_data_missing_values/build.py
from greyatomlib.multivariate_regression_project.q01_load_data.build import load_data
from greyatomlib.multivariate_regression_project.q02_data_split.build import split_dataset
from greyatomlib.multivariate_regression_project.q03_data_encoding.build import label_encode
Expand All @@ -9,5 +10,14 @@
x_train, x_test, y_train, y_test = split_dataset(df)
x_train,x_test = label_encode(x_train,x_test)

# Write your code below

def describe_df(df):
describe = df.describe()
valuec = describe.copy(deep=True)
temp = valuec.append(valuec)
temp1 = temp.append(valuec)
valuec = valuec.append(temp1)
return describe, valuec
#describe_df(x_train)



Binary file modified q10_data_missing_values/tests/__pycache__/__init__.cpython-36.pyc
Binary file not shown.
Binary file modified q10_data_missing_values/tests/__pycache__/test.cpython-36.pyc
Binary file not shown.
Binary file not shown.
Binary file not shown.
26 changes: 24 additions & 2 deletions q11_feature_selection_q01_plot_corr/build.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,10 @@

# %load q11_feature_selection_q01_plot_corr/build.py
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.pyplot import yticks, xticks, subplots, set_cmap
from greyatomlib.multivariate_regression_project.q01_load_data.build import load_data
Expand All @@ -21,6 +27,22 @@

#Remember to concatenate training features and labels if you want to check that scatterplots which I would prefer.You are free to explore labels to labels, features to features ,etc scatterplots as you want by passing arguments
#============================================================================
#visualise_data(pd.concat([x_train,y_train],axis=1),"../images/data_image.png")
#visualise_data(pd.concat([x_train,y_train],axis=1),'../images/data_image.png')

# Write your solution here:

def plot_corr(df,size=11):
numerics = ['int8','int16','int32','int64','float16','float32','float64']
num_cols = list(df.select_dtypes(include=numerics).columns)
corr1 = pd.DataFrame(index=num_cols,columns=num_cols)
for i in range(0,len(num_cols)):
for j in range(0,len(num_cols)):
corr1.loc[num_cols[i],num_cols[j]] = df[num_cols[i]].corr(df[num_cols[j]])
plt.pcolor(corr1)
plt.yticks(np.arange(0.5, len(corr1.index), 1), corr1.index)
plt.xticks(np.arange(0.5, len(corr1.columns), 1), corr1.columns)
plt.show()

#plot_corr(df)


Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
22 changes: 21 additions & 1 deletion q11_feature_selection_q02_best_k_features/build.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
# %load q11_feature_selection_q02_best_k_features/build.py
# Default imports
from sklearn.feature_selection import SelectPercentile
from sklearn.feature_selection import f_regression
Expand All @@ -20,7 +21,26 @@

np.random.seed(9)
# Write your code below

def percentile_k_features(x_train,y_train,k=50):
kbest = SelectPercentile(f_regression,k)
kbestf = kbest.fit(x_train,y_train)
ans = []
c = list(x_train.columns)
scores = list(kbestf.scores_)
temp = scores
d = {}
for i in range(0,len(c)):
d[c[i]] = scores[i]
temp.sort(reverse=True)
for i in range(0,16):
for val in d.keys():
if d[val] == temp[i]:
ans.append(val)
return ans
percentile_k_features(x_train,y_train,k=50)






Expand Down
Binary file not shown.
Binary file not shown.
Binary file modified q12_feature_selection/__pycache__/__init__.cpython-36.pyc
Binary file not shown.
Binary file modified q12_feature_selection/__pycache__/build.cpython-36.pyc
Binary file not shown.
7 changes: 7 additions & 0 deletions q12_feature_selection/build.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
# %load q12_feature_selection/build.py
# import matplotlib.pyplot as plt
from greyatomlib.multivariate_regression_project.q01_load_data.build import load_data
from greyatomlib.multivariate_regression_project.q02_data_split.build import split_dataset
Expand All @@ -16,3 +17,9 @@
x_train,x_test = label_encode(x_train,x_test)

# Write your code below
def feature_selection(x_train,y_train,k=50):
plot_corr(pd.concat([x_train,y_train],axis=1))
ans = percentile_k_features(x_train,y_train,k)
return ans


Binary file modified q12_feature_selection/tests/__pycache__/__init__.cpython-36.pyc
Binary file not shown.
Binary file modified q12_feature_selection/tests/__pycache__/test.cpython-36.pyc
Binary file not shown.
Binary file modified q13_plot_residuals/__pycache__/__init__.cpython-36.pyc
Binary file not shown.
Binary file modified q13_plot_residuals/__pycache__/build.cpython-36.pyc
Binary file not shown.
9 changes: 8 additions & 1 deletion q13_plot_residuals/build.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,12 @@
# %load q13_plot_residuals/build.py


import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt

# Write your code below
def plot_residuals(y_actual,y_pred,name):
plt.plot(y_actual,y_pred)
plt.show()


Binary file modified q13_plot_residuals/tests/__pycache__/__init__.cpython-36.pyc
Binary file not shown.
Binary file modified q13_plot_residuals/tests/__pycache__/test.cpython-36.pyc
Binary file not shown.
Binary file modified q14_benchmarking/__pycache__/__init__.cpython-36.pyc
Binary file not shown.
Binary file modified q14_benchmarking/__pycache__/build.cpython-36.pyc
Binary file not shown.
13 changes: 12 additions & 1 deletion q14_benchmarking/build.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
# %load q14_benchmarking/build.py
from greyatomlib.multivariate_regression_project.q01_load_data.build import load_data
from greyatomlib.multivariate_regression_project.q02_data_split.build import split_dataset

Expand Down Expand Up @@ -25,6 +26,16 @@
x_train,x_test = label_encode(x_train,x_test)


# Write your code below
def create_stats(x_train,x_test,y_train,y_test):
lasso_stats = lasso(x_train,x_test,y_train,y_test,alpha=0.1)[2]
lasso_f_ft = feature_selection(x_train,y_train,k=50)
ridge_f_ft = feature_selection(x_train,y_train,k=50)
lasso_stats_ft = lasso(x_train[lasso_f_ft],x_test[lasso_f_ft],y_train,y_test,alpha=0.1)[2]
ridge_stats_ft = ridge(x_train[ridge_f_ft],x_test[ridge_f_ft],y_train,y_test,alpha=0.1)[2]
ridge_stats = ridge(x_train,x_test,y_train,y_test,alpha=0.1)[2]
complete_stats = pd.concat([lasso_stats,lasso_stats_ft,ridge_stats,ridge_stats_ft],axis=1)
return complete_stats
create_stats(x_train,x_test,y_train,y_test)



Binary file modified q14_benchmarking/tests/__pycache__/__init__.cpython-36.pyc
Binary file not shown.
Binary file modified q14_benchmarking/tests/__pycache__/test.cpython-36.pyc
Binary file not shown.
Binary file not shown.
6 changes: 6 additions & 0 deletions q15_select_best_model/build.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
# %load q15_select_best_model/build.py
from greyatomlib.multivariate_regression_project.q01_load_data.build import load_data

from greyatomlib.multivariate_regression_project.q02_data_split.build import split_dataset
Expand All @@ -21,3 +22,8 @@


# Write your code below
def complete_build(x_train, x_test, y_train, y_test):
stats = pd.concat([create_stats(x_train,x_test,y_train,y_test),create_stats(x_train, x_test, y_train, y_test)],axis=0)
return stats


Binary file not shown.
Binary file not shown.