Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 9 additions & 2 deletions q01_load_data/build.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,14 @@
# %load q01_load_data/build.py
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split

path = 'data/elecdemand.csv'

def q01_load_data(path):
data = pd.read_csv(path)
data['Datetime'] = pd.to_datetime(data['Datetime'])
return data.shape, data





Binary file added q01_load_data/tests/test_sol.pkl
Binary file not shown.
Binary file added q01_load_data/tests/user_sol.pkl
Binary file not shown.
13 changes: 12 additions & 1 deletion q02_data_splitter/build.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,18 @@
# %load q02_data_splitter/build.py
import pandas as pd
import numpy as np
from sklearn.model_selection import TimeSeriesSplit
from greyatomlib.time_series_day_02_project.q01_load_data.build import q01_load_data

def q02_data_splitter(path):
path = 'data/elecdemand.csv'
shape, df = q01_load_data(path)
tscv = TimeSeriesSplit(n_splits=2)
com_idx = []
for train_index, valid_index in tscv.split(df):
com_idx.append((train_index, valid_index))
return com_idx





Binary file added q02_data_splitter/tests/test_sol.pkl
Binary file not shown.
Binary file added q02_data_splitter/tests/user_sol.pkl
Binary file not shown.
12 changes: 12 additions & 0 deletions q03_time_plot/build.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,19 @@
# %load q03_time_plot/build.py
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from greyatomlib.time_series_day_02_project.q01_load_data.build import q01_load_data
plt.switch_backend('agg')

def q03_time_plot(path):
path = 'data/elecdemand.csv'
shape, df = q01_load_data(path)
plt.plot(df['Datetime'], df['Demand'])
plt.title('Electricity Demand for Australia for a year')
plt.xlabel('Year-Month')
plt.ylabel('Demand')
plt.show();




11 changes: 10 additions & 1 deletion q04_boxplot/build.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,16 @@
# %load q04_boxplot/build.py
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from greyatomlib.time_series_day_02_project.q01_load_data.build import q01_load_data
plt.switch_backend('agg')


def q04_boxplot(path):
path = 'data/elecdemand.csv'
shape, df = q01_load_data(path)
df.boxplot(column=['Demand'], by=['WorkDay'])
plt.show();




Binary file added q04_boxplot/tests/test_sol.pkl
Binary file not shown.
Binary file added q04_boxplot/tests/user_sol.pkl
Binary file not shown.
12 changes: 12 additions & 0 deletions q05_feature_engineering/build.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,21 @@
# %load q05_feature_engineering/build.py
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import pearsonr
from greyatomlib.time_series_day_02_project.q01_load_data.build import q01_load_data
plt.switch_backend('agg')

def q05_feature_engineering(path):
path = 'data/elecdemand.csv'
shape, df = q01_load_data(path)
corr, p_value = pearsonr(df['Temperature'], df['Demand'])
plt.scatter(df['Temperature'], df['Demand'])
plt.xlabel('Temperature')
plt.ylabel('Demand')
plt.title('Temperature vs Demand')
plt.show();




Binary file added q05_feature_engineering/tests/test_sol.pkl
Binary file not shown.
Binary file added q05_feature_engineering/tests/user_sol.pkl
Binary file not shown.
31 changes: 29 additions & 2 deletions q05_feature_engineering_part2/build.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,35 @@
# %load q05_feature_engineering_part2/build.py
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from greyatomlib.time_series_day_02_project.q01_load_data.build import q01_load_data
from greyatomlib.time_series_day_02_project.q01_load_data.build import q01_load_data
plt.switch_backend('agg')

def q05_feature_engineering_part2(path):
path = 'data/elecdemand.csv'
shape, data = q01_load_data(path)
data['hour'] = data['Datetime'].dt.hour
data['month'] = data['Datetime'].dt.month
plt.figure(figsize=(16, 6))
demand_hours = []
for i in range(1,25):
one = data[data['hour'] == i]['Demand'].values
demand_hours.append(one)
demand_months = []
for j in range(1,13):
demand_months.append(data[data['month'] == j]['Demand'].values)
plt.subplot(211)
plt.boxplot(demand_hours, labels=[str(i) for i in range(1,25)])
plt.xlabel('Hour')
plt.ylabel('Demand')
plt.title('Change in Electricity demand wrt to Hour')
plt.subplot(212)
plt.boxplot(demand_months, labels=[str(i) for i in range(1,13)])
plt.xlabel('Months')
plt.ylabel('Demand')
plt.title('Change in Electricity demand wrt to months')
plt.show();





Binary file added q05_feature_engineering_part2/tests/test_sol.pkl
Binary file not shown.
Binary file added q05_feature_engineering_part2/tests/user_sol.pkl
Binary file not shown.
27 changes: 26 additions & 1 deletion q05_feature_engineering_part3/build.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,33 @@
# %load q05_feature_engineering_part3/build.py
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from greyatomlib.time_series_day_02_project.q01_load_data.build import q01_load_data
plt.switch_backend('agg')

def q05_feature_engineering_part3(path):
path = 'data/elecdemand.csv'
shape, data = q01_load_data(path)
data['hour'] = data['Datetime'].dt.hour
data['month'] = data['Datetime'].dt.month
plt.figure(figsize=(16, 6))
demand_hours = []
for i in range(1,25):
one = data[data['hour'] == i]['Demand'].values
demand_hours.append(one)
demand_months = []
for j in range(1,13):
demand_months.append(data[data['month'] == j]['Demand'].values)
plt.subplot(211)
plt.boxplot(demand_hours, labels=[str(i) for i in range(1,25)])
plt.xlabel('Hour')
plt.ylabel('Demand')
plt.title('Change in Electricity demand wrt to Hour')
plt.subplot(212)
plt.boxplot(demand_months, labels=[str(i) for i in range(1,13)])
plt.xlabel('Months')
plt.ylabel('Demand')
plt.title('Change in Electricity demand wrt to months')
plt.show();



Binary file added q05_feature_engineering_part3/tests/test_sol.pkl
Binary file not shown.
Binary file added q05_feature_engineering_part3/tests/user_sol.pkl
Binary file not shown.
17 changes: 13 additions & 4 deletions q05_feature_engineering_part4/build.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,18 @@
# %load q05_feature_engineering_part2/build.py
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from greyatomlib.time_series_day_02_project.q01_load_data.build import q01_load_data
plt.switch_backend('agg')

def q05_feature_engineering_part4():

path = 'data/elecdemand.csv'

def q05_feature_engineering_part4(path):
shape, data = q01_load_data(path)
data['hour'] = data['Datetime'].dt.hour
data['month'] = data['Datetime'].dt.strftime('%b')
data['Peakhours'] = list(map(lambda x: 1 if x in range(6,20) else 0, data['Datetime'].dt.hour))
data['Peakmonths'] = list(map(lambda x: 1 if x in ['Feb', 'May', 'Jun', 'Jul', 'Aug'] else 0, data['Datetime'].dt.strftime('%b')))
return data



Binary file added q05_feature_engineering_part4/tests/test_sol.pkl
Binary file not shown.
Binary file added q05_feature_engineering_part4/tests/user_sol.pkl
Binary file not shown.
21 changes: 19 additions & 2 deletions q06_linear_regression/build.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
# %load q06_linear_regression/build.py
import pandas as pd
import numpy as np
import math
Expand All @@ -6,7 +7,23 @@
from greyatomlib.time_series_day_02_project.q05_feature_engineering_part4.build import q05_feature_engineering_part4
from greyatomlib.time_series_day_02_project.q02_data_splitter.build import q02_data_splitter

fe = ["WorkDay", "Peakhours", "Peakmonths"]
fe = ['WorkDay', 'Peakhours', 'Peakmonths']

def q06_linear_regression(path, columns = fe, random_state = 9):
np.random.seed(random_state)
data = q05_feature_engineering_part4(path)
com_idx = q02_data_splitter(path)
rmse = []
for i in com_idx:
train_idx = i[0]
valid_idx = i[1]
X_train, y_train = data.ix[train_idx, fe], data.ix[train_idx, 'Demand']
X_valid, y_valid = data.ix[valid_idx, fe], data.ix[valid_idx, 'Demand']
model = LinearRegression()
model.fit(X_train, y_train)
y_pred = model.predict(X_valid)
rms = mean_squared_error(y_valid, y_pred)**0.5
rmse.append(rms)
return np.mean(rmse)



Binary file added q06_linear_regression/tests/test_sol.pkl
Binary file not shown.
Binary file added q06_linear_regression/tests/user_sol.pkl
Binary file not shown.
21 changes: 19 additions & 2 deletions q07_randomforest_regressor/build.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
# %load q07_randomforest_regressor/build.py
import pandas as pd
import numpy as np
import math
Expand All @@ -6,7 +7,23 @@
from greyatomlib.time_series_day_02_project.q05_feature_engineering_part4.build import q05_feature_engineering_part4
from greyatomlib.time_series_day_02_project.q02_data_splitter.build import q02_data_splitter

fe = ["WorkDay", "Peakhours", "Peakmonths"]
fe = ['WorkDay', 'Peakhours', 'Peakmonths']

def q07_randomforest_regressor(path, columns = fe, random_state = 9):
np.random.seed(random_state)
data = q05_feature_engineering_part4(path)
com_idx = q02_data_splitter(path)
rmse = []
for i in com_idx:
train_idx = i[0]
valid_idx = i[1]
X_train, y_train = data.ix[train_idx, fe], data.ix[train_idx, 'Demand']
X_valid, y_valid = data.ix[valid_idx, fe], data.ix[valid_idx, 'Demand']
model = RandomForestRegressor(n_estimators=50, min_samples_leaf=30, random_state=10)
model.fit(X_train, y_train)
y_pred = model.predict(X_valid)
rms = mean_squared_error(y_valid, y_pred)**0.5
rmse.append(rms)
return np.mean(rmse)



21 changes: 20 additions & 1 deletion q08_gradientboosting_regressor/build.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
# %load q08_gradientboosting_regressor/build.py
import pandas as pd
import numpy as np
import math
Expand All @@ -6,5 +7,23 @@
from greyatomlib.time_series_day_02_project.q05_feature_engineering_part4.build import q05_feature_engineering_part4
from greyatomlib.time_series_day_02_project.q02_data_splitter.build import q02_data_splitter

fe = ["WorkDay", "Peakhours", "Peakmonths"]
fe = ['WorkDay', 'Peakhours', 'Peakmonths']

def q08_gradientboosting_regressor(path, columns = fe, random_state = 9):
np.random.seed(random_state)
data = q05_feature_engineering_part4(path)
com_idx = q02_data_splitter(path)
rmse = []
for i in com_idx:
train_idx = i[0]
valid_idx = i[1]
X_train, y_train = data.ix[train_idx, fe], data.ix[train_idx, 'Demand']
X_valid, y_valid = data.ix[valid_idx, fe], data.ix[valid_idx, 'Demand']
model = GradientBoostingRegressor(n_estimators=200, min_samples_leaf=10, learning_rate=0.01, random_state=random_state)
model.fit(X_train, y_train)
y_pred = model.predict(X_valid)
rms = mean_squared_error(y_valid, y_pred)**0.5
rmse.append(rms)
return np.mean(rmse)


Binary file added test_sol.pkl
Binary file not shown.
Binary file added user_sol.pkl
Binary file not shown.