From afbad1c7765d6fab9b6f45fd9ec554cde047a814 Mon Sep 17 00:00:00 2001 From: SL345 Date: Sun, 12 Aug 2018 06:04:01 +0000 Subject: [PATCH 01/10] Done --- q01_load_data/build.py | 10 +++++++++- q01_load_data/tests/test_sol.pkl | Bin 0 -> 79 bytes q01_load_data/tests/user_sol.pkl | Bin 0 -> 67 bytes 3 files changed, 9 insertions(+), 1 deletion(-) create mode 100644 q01_load_data/tests/test_sol.pkl create mode 100644 q01_load_data/tests/user_sol.pkl diff --git a/q01_load_data/build.py b/q01_load_data/build.py index a29c139..1cf1938 100644 --- a/q01_load_data/build.py +++ b/q01_load_data/build.py @@ -1,7 +1,15 @@ +# %load q01_load_data/build.py import pandas as pd import numpy as np from sklearn.model_selection import train_test_split +path = 'data/elecdemand.csv' - +def q01_load_data(path): + data = pd.read_csv(path) + data['Datetime'] = pd.to_datetime(data['Datetime']) + shape=data.shape + return shape,data + + diff --git a/q01_load_data/tests/test_sol.pkl b/q01_load_data/tests/test_sol.pkl new file mode 100644 index 0000000000000000000000000000000000000000..7912fb668f4a4bff9f60d47546462bb183207435 GIT binary patch literal 79 zcmZo*PEIdMtxPP*&&|n9(ksc#O^q*3Ey_$Sj!#Lfj5jcfFDS~-N=+`&D>N{S&&f|r W0g9I->LrzC=A>|;h;bD%=m7w&xE!N{S&&f|r0g9I->LrzC=A>|; Kh;bD%=m7vu+!yBn literal 0 HcmV?d00001 From e7f10d85160cdb39a96fcc91355827a461a5ab59 Mon Sep 17 00:00:00 2001 From: SL345 Date: Sun, 12 Aug 2018 07:26:43 +0000 Subject: [PATCH 02/10] Done --- q02_data_splitter/build.py | 9 ++++++++- q02_data_splitter/tests/test_sol.pkl | Bin 0 -> 87 bytes q02_data_splitter/tests/user_sol.pkl | Bin 0 -> 75 bytes 3 files changed, 8 insertions(+), 1 deletion(-) create mode 100644 q02_data_splitter/tests/test_sol.pkl create mode 100644 q02_data_splitter/tests/user_sol.pkl diff --git a/q02_data_splitter/build.py b/q02_data_splitter/build.py index b6c715f..9852d80 100644 --- a/q02_data_splitter/build.py +++ b/q02_data_splitter/build.py @@ -1,7 +1,14 @@ +# %load q02_data_splitter/build.py import pandas as pd import numpy as np from sklearn.model_selection import TimeSeriesSplit from greyatomlib.time_series_day_02_project.q01_load_data.build import q01_load_data +path = 'data/elecdemand.csv' +def q02_data_splitter(path): + np.random.seed(9) + shape,data = q01_load_data(path) + tscv = TimeSeriesSplit(n_splits=2) + split_data = list(tscv.split(data)) + return split_data - diff --git a/q02_data_splitter/tests/test_sol.pkl b/q02_data_splitter/tests/test_sol.pkl new file mode 100644 index 0000000000000000000000000000000000000000..a3e9cc57e21382b4149f86a10e44fd859a5d4213 GIT binary patch literal 87 zcmZo*PEIdMtxPP*&&|n9(ksc#O^q*3Ey_$Sj!#Lfj5jcfFDS~-N=+`&D+IDr5=#=} ZiwklxOG;9U^pZ+5b5gi4#JLI?^Z>TKAY=dl literal 0 HcmV?d00001 diff --git a/q02_data_splitter/tests/user_sol.pkl b/q02_data_splitter/tests/user_sol.pkl new file mode 100644 index 0000000000000000000000000000000000000000..70c23336a7d13cc99ae68a1dcdbdc3e6646e261a GIT binary patch literal 75 zcmZo*PA Date: Sun, 12 Aug 2018 07:38:08 +0000 Subject: [PATCH 03/10] Done --- q03_time_plot/build.py | 10 ++++++++++ test_sol.pkl | Bin 0 -> 79 bytes user_sol.pkl | Bin 0 -> 67 bytes 3 files changed, 10 insertions(+) create mode 100644 test_sol.pkl create mode 100644 user_sol.pkl diff --git a/q03_time_plot/build.py b/q03_time_plot/build.py index bf18743..a3e8047 100644 --- a/q03_time_plot/build.py +++ b/q03_time_plot/build.py @@ -1,7 +1,17 @@ +# %load q03_time_plot/build.py import pandas as pd import numpy as np import matplotlib.pyplot as plt from greyatomlib.time_series_day_02_project.q01_load_data.build import q01_load_data plt.switch_backend('agg') +path = 'data/elecdemand.csv' +def q03_time_plot(path): + shape,data = q01_load_data(path) + plt.figure(figsize=(16,7)) + plt.plot(data.Datetime,data.Demand) + plt.title('Electricity Demand in Australia for a year') + plt.xlabel('Time') + plt.ylabel('Demand') + plt.show() diff --git a/test_sol.pkl b/test_sol.pkl new file mode 100644 index 0000000000000000000000000000000000000000..cec104b8f2d3d8445aa4256f364f0ca31036739d GIT binary patch literal 79 zcmZo*PEIdMtxPP*&&|n9(ksc#O^q*3Ey_$Sj!#Lfj5jcfFDS~-N=+`&D>N{U2TK>^ UN{U2TK>^ Date: Sun, 12 Aug 2018 07:57:08 +0000 Subject: [PATCH 04/10] Done --- q04_boxplot/build.py | 17 ++++++++++++++++- q04_boxplot/tests/test_sol.pkl | Bin 0 -> 75 bytes q04_boxplot/tests/user_sol.pkl | Bin 0 -> 63 bytes 3 files changed, 16 insertions(+), 1 deletion(-) create mode 100644 q04_boxplot/tests/test_sol.pkl create mode 100644 q04_boxplot/tests/user_sol.pkl diff --git a/q04_boxplot/build.py b/q04_boxplot/build.py index c69f931..21aeab6 100644 --- a/q04_boxplot/build.py +++ b/q04_boxplot/build.py @@ -1,7 +1,22 @@ +# %load q04_boxplot/build.py import pandas as pd import numpy as np import matplotlib.pyplot as plt +import seaborn as sns from greyatomlib.time_series_day_02_project.q01_load_data.build import q01_load_data plt.switch_backend('agg') +path='data/elecdemand.csv' + +def q04_boxplot(path): + shape,data = q01_load_data(path) + data.head() + plt.figure(figsize=(16,7)) + sns.factorplot(x='WorkDay', y='Demand', data=data, kind='box', size=8, aspect=float(16/7)) + plt.xlabel('Workday') + plt.ylabel('Demand') + plt.title('Change in Electricity Demand wrt to Demand') + plt.show() + + + - diff --git a/q04_boxplot/tests/test_sol.pkl b/q04_boxplot/tests/test_sol.pkl new file mode 100644 index 0000000000000000000000000000000000000000..f863f64a8c994210960bcdfa9c645b392115b34a GIT binary patch literal 75 zcmZo*PEIdMtxPP*&&|n9(ksc#O^q*3Ey_$Sj!#Lfj5jcfFDS~-N=+`&D>N{PPs*<- V$jL9!ODfIGN#Q~g;VNX%0{}mk8<+q9 literal 0 HcmV?d00001 diff --git a/q04_boxplot/tests/user_sol.pkl b/q04_boxplot/tests/user_sol.pkl new file mode 100644 index 0000000000000000000000000000000000000000..44dda795407bb9cf236e1b00c6328005eb47a0ac GIT binary patch literal 63 zcmZo*PAN{PPs*<-$jL9!ODfIGN#Q~g J;VNX%0|4{i7Rvwt literal 0 HcmV?d00001 From 917fbd9a9b87bd8134e4b800d80a150e318f1ce9 Mon Sep 17 00:00:00 2001 From: SL345 Date: Sun, 12 Aug 2018 09:04:17 +0000 Subject: [PATCH 05/10] Done --- q05_feature_engineering/build.py | 12 ++++- q05_feature_engineering/tests/test_sol.pkl | Bin 0 -> 99 bytes q05_feature_engineering/tests/user_sol.pkl | Bin 0 -> 87 bytes q05_feature_engineering_part2/build.py | 56 +++++++++++++++++++-- 4 files changed, 64 insertions(+), 4 deletions(-) create mode 100644 q05_feature_engineering/tests/test_sol.pkl create mode 100644 q05_feature_engineering/tests/user_sol.pkl diff --git a/q05_feature_engineering/build.py b/q05_feature_engineering/build.py index 97e29e7..62b6347 100644 --- a/q05_feature_engineering/build.py +++ b/q05_feature_engineering/build.py @@ -1,9 +1,19 @@ +# %load q05_feature_engineering/build.py import pandas as pd import numpy as np import matplotlib.pyplot as plt from greyatomlib.time_series_day_02_project.q01_load_data.build import q01_load_data plt.switch_backend('agg') - +path = 'data/elecdemand.csv' +def q05_feature_engineering(path): + shape,data = q01_load_data(path) + corr = data['Temperature'].corr(data['Demand']) + plt.figure(figsize=(16,6)) + plt.scatter(data['Temperature'],data['Demand']) + plt.title('Temperature vs. Demand') + plt.xlabel('Temperature') + plt.ylabel('Demand') + plt.show() diff --git a/q05_feature_engineering/tests/test_sol.pkl b/q05_feature_engineering/tests/test_sol.pkl new file mode 100644 index 0000000000000000000000000000000000000000..c8990f6faf7b3093f4ad2a9063625b47c6c393dd GIT binary patch literal 99 zcmZ|F$q9fk5Cu@do5c)4u%02yH;iO4+aWhg*9JVl_pH`eOx<$~VyH_5H&j7mrzfkO e@0u^zPDLGjLibU@NL~`YmOPmu!EHafm4#kvmnAj; literal 0 HcmV?d00001 diff --git a/q05_feature_engineering/tests/user_sol.pkl b/q05_feature_engineering/tests/user_sol.pkl new file mode 100644 index 0000000000000000000000000000000000000000..9f2b9eccc8898617f94e628a44c1bb332af295e3 GIT binary patch literal 87 zcmZo*PAN{TPfJZKDJ@EkPt8lu%u59- U%S+cwD$UGE;ld%yRmh+R0IR4VX#fBK literal 0 HcmV?d00001 diff --git a/q05_feature_engineering_part2/build.py b/q05_feature_engineering_part2/build.py index 53e6749..3cc0a1a 100644 --- a/q05_feature_engineering_part2/build.py +++ b/q05_feature_engineering_part2/build.py @@ -1,8 +1,58 @@ +# %load q05_feature_engineering_part2/build.py import pandas as pd import numpy as np import matplotlib.pyplot as plt -from greyatomlib.time_series_day_02_project.q01_load_data.build import q01_load_data -plt.switch_backend('agg') +from q01_load_data.build import q01_load_data +path = 'data/elecdemand.csv' + +def q05_feature_engineering_part2(path): + shape, data = q01_load_data(path) + +# %load q05_feature_engineering_part2/build.py +import pandas as pd +import numpy as np +import matplotlib.pyplot as plt +from greyatomlib.time_series_day_02_project.q01_load_data.build import q01_load_data +path = 'data/elecdemand.csv' + + +shape, data = q01_load_data(path) +data['hour'] = data['Datetime'].dt.hour + +hour = [] +for i in range(24): + hour_demand = data[data['hour'] == i]['Demand'].values + hour.append(hour_demand) + print(hour) +plt.figure(figsize=(16,6)) +plt.boxplot(hour,labels=[str(i) for i in range(24)]) +plt.xlabel('Hour') +plt.ylabel('Demand') +plt.title('Change in Electricity demand wrt to Hour') +plt.show() + +# %load q05_feature_engineering_part2/build.py +import pandas as pd +import numpy as np +import matplotlib.pyplot as plt +from greyatomlib.time_series_day_02_project.q01_load_data.build import q01_load_data +path = 'data/elecdemand.csv' + + +shape, data = q01_load_data(path) +data['hour'] = data['Datetime'].dt.hour + +plt.figure(figsize=(16, 6)) + +hours = [] +for i in range(24): + one = data[data['hour'] == i]['Demand'].values + hours.append(one) +plt.boxplot(hours, labels=[str(i) for i in range(24)]) +plt.xlabel('Hour') +plt.ylabel('Demand') +plt.title('Change in Electricity demand wrt to Hour') +plt.show() + - From 6aec663c25e37bd5eebc97935893bbf1715af0bb Mon Sep 17 00:00:00 2001 From: SL345 Date: Sun, 12 Aug 2018 09:28:22 +0000 Subject: [PATCH 06/10] Done --- q05_feature_engineering_part3/build.py | 10 +++++++++- q05_feature_engineering_part3/tests/test_sol.pkl | Bin 0 -> 111 bytes q05_feature_engineering_part3/tests/user_sol.pkl | Bin 0 -> 99 bytes 3 files changed, 9 insertions(+), 1 deletion(-) create mode 100644 q05_feature_engineering_part3/tests/test_sol.pkl create mode 100644 q05_feature_engineering_part3/tests/user_sol.pkl diff --git a/q05_feature_engineering_part3/build.py b/q05_feature_engineering_part3/build.py index 7da14f7..f1f046d 100644 --- a/q05_feature_engineering_part3/build.py +++ b/q05_feature_engineering_part3/build.py @@ -1,8 +1,16 @@ +# %load q05_feature_engineering_part3/build.py import pandas as pd import numpy as np import matplotlib.pyplot as plt +import seaborn as sns from greyatomlib.time_series_day_02_project.q01_load_data.build import q01_load_data plt.switch_backend('agg') +path = 'data/elecdemand.csv' +def q05_feature_engineering_part3(path): + shape,data = q01_load_data(path) + data['Month'] = data['Datetime'].dt.strftime('%b') + sns.factorplot(x='Month',y='Demand',data=data,kind='box',size=8,aspect=(16/9)) + plt.title('Change in Demand of Electricity wrt Month') + plt.show() - diff --git a/q05_feature_engineering_part3/tests/test_sol.pkl b/q05_feature_engineering_part3/tests/test_sol.pkl new file mode 100644 index 0000000000000000000000000000000000000000..017cf665feddbd720b48d7ba68ba334a780efe40 GIT binary patch literal 111 zcmaLNO%6an3N{TPfJZKDJ@EkPt8lu%u59- a%S(?hNGvKb)=MhQ%t_(GtA?wPK@R|09whq! literal 0 HcmV?d00001 From 5463f52b02bc1410fd2074b6c0cd86b0301fa4ad Mon Sep 17 00:00:00 2001 From: SL345 Date: Sun, 12 Aug 2018 12:27:30 +0000 Subject: [PATCH 07/10] Done --- q05_feature_engineering_part4/build.py | 16 +++++++++--- .../tests/test_sol.pkl | Bin 0 -> 111 bytes .../tests/user_sol.pkl | Bin 0 -> 99 bytes q06_linear_regression/build.py | 24 ++++++++++++++++-- q06_linear_regression/tests/test_sol.pkl | Bin 0 -> 95 bytes q06_linear_regression/tests/user_sol.pkl | Bin 0 -> 83 bytes 6 files changed, 35 insertions(+), 5 deletions(-) create mode 100644 q05_feature_engineering_part4/tests/test_sol.pkl create mode 100644 q05_feature_engineering_part4/tests/user_sol.pkl create mode 100644 q06_linear_regression/tests/test_sol.pkl create mode 100644 q06_linear_regression/tests/user_sol.pkl diff --git a/q05_feature_engineering_part4/build.py b/q05_feature_engineering_part4/build.py index 2731397..3c02c3c 100644 --- a/q05_feature_engineering_part4/build.py +++ b/q05_feature_engineering_part4/build.py @@ -1,9 +1,19 @@ +# %load q05_feature_engineering_part2/build.py import pandas as pd import numpy as np -from sklearn.model_selection import train_test_split import matplotlib.pyplot as plt +import seaborn as sns from greyatomlib.time_series_day_02_project.q01_load_data.build import q01_load_data -plt.switch_backend('agg') +path = 'data/elecdemand.csv' -def q05_feature_engineering_part4(): +def q05_feature_engineering_part4(path): + shape, data = q01_load_data(path) + data['hour'] = data['Datetime'].dt.hour + data['month'] = data['Datetime'].dt.strftime('%b') + + data['Peakhours'] = data['hour'].apply(lambda x: 1 if ((x >= 6) & (x < 20)) else 0) + data['Peakmonths'] = data['month'].apply(lambda x: 1 if x in ['Feb', 'May', 'Jun', 'Jul', 'Aug'] else 0) + + return data + diff --git a/q05_feature_engineering_part4/tests/test_sol.pkl b/q05_feature_engineering_part4/tests/test_sol.pkl new file mode 100644 index 0000000000000000000000000000000000000000..fca58170afc11aa0ba91572679fb87f1db092e35 GIT binary patch literal 111 zcmaLNO%6an3N{TPfJZKDJ@EkPt8lu%u59- a%S(?hNGvKb(Mu}L%t_(GtA?wPK@R|0OeFmP literal 0 HcmV?d00001 diff --git a/q06_linear_regression/build.py b/q06_linear_regression/build.py index 8c11052..7b89231 100644 --- a/q06_linear_regression/build.py +++ b/q06_linear_regression/build.py @@ -1,3 +1,4 @@ +# %load q06_linear_regression/build.py import pandas as pd import numpy as np import math @@ -6,7 +7,26 @@ from greyatomlib.time_series_day_02_project.q05_feature_engineering_part4.build import q05_feature_engineering_part4 from greyatomlib.time_series_day_02_project.q02_data_splitter.build import q02_data_splitter -fe = ["WorkDay", "Peakhours", "Peakmonths"] - +fe = ['WorkDay', 'Peakhours', 'Peakmonths'] +def q06_linear_regression(path,columns = fe, random_state =9): + np.random.seed(random_state) + data = q05_feature_engineering_part4(path) + splits = q02_data_splitter(path) + 'write your solution here' + rmse = [] + for i in splits: + train = i[0] + valid = i[1] + x_train, y_train = data[fe].values[train], data['Demand'].values[train] + x_valid, y_valid = data[fe].values[valid], data['Demand'].values[valid] + model = LinearRegression() + model.fit(x_train, y_train) + pred = model.predict(x_valid) + measure = math.pow(mean_squared_error(y_valid, pred), 0.5) + rmse.append(measure) + return np.mean(rmse) + + + diff --git a/q06_linear_regression/tests/test_sol.pkl b/q06_linear_regression/tests/test_sol.pkl new file mode 100644 index 0000000000000000000000000000000000000000..e0cbf28ec8ae05c55b655d1ba771b898848292f2 GIT binary patch literal 95 zcmZo*PEIdMtxPP*&&|n9(ksc#O^q*3Ey_$Sj!#Lfj5jcfFDS~-N=+`&D>N{R&&kY7 eO)QEpN(JgEF3!x)(@QGN%t_(GD#umGpa%ffmLtLd literal 0 HcmV?d00001 diff --git a/q06_linear_regression/tests/user_sol.pkl b/q06_linear_regression/tests/user_sol.pkl new file mode 100644 index 0000000000000000000000000000000000000000..6794af11eb97c6033013830c6dee62f85f8732a4 GIT binary patch literal 83 zcmZo*PAN{R&&kY7O)QEpN=+|HEiTT? T&(ljP&CE&R!YapA$e;%RD(W8f literal 0 HcmV?d00001 From d2c39ec3892c9847b96e6c6a200f94a4fb238d35 Mon Sep 17 00:00:00 2001 From: SL345 Date: Tue, 14 Aug 2018 07:28:14 +0000 Subject: [PATCH 08/10] Done --- q05_feature_engineering_part2/build.py | 63 ++++-------------- .../tests/test_sol.pkl | Bin 0 -> 111 bytes .../tests/user_sol.pkl | Bin 0 -> 99 bytes q07_randomforest_regressor/build.py | 24 ++++++- q08_gradientboosting_regressor/build.py | 21 +++++- 5 files changed, 56 insertions(+), 52 deletions(-) create mode 100644 q05_feature_engineering_part2/tests/test_sol.pkl create mode 100644 q05_feature_engineering_part2/tests/user_sol.pkl diff --git a/q05_feature_engineering_part2/build.py b/q05_feature_engineering_part2/build.py index 3cc0a1a..2ff0951 100644 --- a/q05_feature_engineering_part2/build.py +++ b/q05_feature_engineering_part2/build.py @@ -2,57 +2,22 @@ import pandas as pd import numpy as np import matplotlib.pyplot as plt -from q01_load_data.build import q01_load_data +from greyatomlib.time_series_day_02_project.q01_load_data.build import q01_load_data path = 'data/elecdemand.csv' +plt.switch_backend('agg') def q05_feature_engineering_part2(path): shape, data = q01_load_data(path) - -# %load q05_feature_engineering_part2/build.py -import pandas as pd -import numpy as np -import matplotlib.pyplot as plt -from greyatomlib.time_series_day_02_project.q01_load_data.build import q01_load_data -path = 'data/elecdemand.csv' - - -shape, data = q01_load_data(path) -data['hour'] = data['Datetime'].dt.hour - -hour = [] -for i in range(24): - hour_demand = data[data['hour'] == i]['Demand'].values - hour.append(hour_demand) - print(hour) -plt.figure(figsize=(16,6)) -plt.boxplot(hour,labels=[str(i) for i in range(24)]) -plt.xlabel('Hour') -plt.ylabel('Demand') -plt.title('Change in Electricity demand wrt to Hour') -plt.show() - -# %load q05_feature_engineering_part2/build.py -import pandas as pd -import numpy as np -import matplotlib.pyplot as plt -from greyatomlib.time_series_day_02_project.q01_load_data.build import q01_load_data -path = 'data/elecdemand.csv' - - -shape, data = q01_load_data(path) -data['hour'] = data['Datetime'].dt.hour - -plt.figure(figsize=(16, 6)) - -hours = [] -for i in range(24): - one = data[data['hour'] == i]['Demand'].values - hours.append(one) -plt.boxplot(hours, labels=[str(i) for i in range(24)]) -plt.xlabel('Hour') -plt.ylabel('Demand') -plt.title('Change in Electricity demand wrt to Hour') -plt.show() - - + data['hour'] = data['Datetime'].dt.hour + + hour = [] + for i in range(24): + hour_demand = data[data['hour'] == i]['Demand'].values + hour.append(hour_demand) + plt.figure(figsize=(16,6)) + plt.boxplot(hour,labels=[str(i) for i in range(24)]) + plt.xlabel('Hour') + plt.ylabel('Demand') + plt.title('Change in Electricity demand wrt to Hour') + plt.show() diff --git a/q05_feature_engineering_part2/tests/test_sol.pkl b/q05_feature_engineering_part2/tests/test_sol.pkl new file mode 100644 index 0000000000000000000000000000000000000000..2f666a14df1eb6caf5167db48841dd663be1e31b GIT binary patch literal 111 zcmaLNO%6an3N{TPfJZKDJ@EkPt8lu%u59- a%S(?hNGvKb(n~7M%t_(GtA?wPK@R{~@Fe*F literal 0 HcmV?d00001 diff --git a/q07_randomforest_regressor/build.py b/q07_randomforest_regressor/build.py index 4cdb470..658b847 100644 --- a/q07_randomforest_regressor/build.py +++ b/q07_randomforest_regressor/build.py @@ -1,3 +1,4 @@ +# %load q07_randomforest_regressor/build.py import pandas as pd import numpy as np import math @@ -6,7 +7,26 @@ from greyatomlib.time_series_day_02_project.q05_feature_engineering_part4.build import q05_feature_engineering_part4 from greyatomlib.time_series_day_02_project.q02_data_splitter.build import q02_data_splitter -fe = ["WorkDay", "Peakhours", "Peakmonths"] - +fe = ['WorkDay', 'Peakhours', 'Peakmonths'] +path = 'data/elecdemand.csv' +def q07_randomforest_regressor(path,columns = fe, random_state =9): + np.random.seed(random_state) + data = q05_feature_engineering_part4(path) + splits = q02_data_splitter(path) + 'write your solution here' + rmse = [] + for i in splits: + train = i[0] + valid = i[1] + x_train, y_train = data[fe].values[train], data['Demand'].values[train] + x_valid, y_valid = data[fe].values[valid], data['Demand'].values[valid] + model = RandomForestRegressor() + model.fit(x_train, y_train) + pred = model.predict(x_valid) + measure = math.pow(mean_squared_error(y_valid, pred), 0.5) + rmse.append(measure) + return np.mean(rmse) + + diff --git a/q08_gradientboosting_regressor/build.py b/q08_gradientboosting_regressor/build.py index e661aac..5b9fe6c 100644 --- a/q08_gradientboosting_regressor/build.py +++ b/q08_gradientboosting_regressor/build.py @@ -1,3 +1,4 @@ +# %load q08_gradientboosting_regressor/build.py import pandas as pd import numpy as np import math @@ -6,5 +7,23 @@ from greyatomlib.time_series_day_02_project.q05_feature_engineering_part4.build import q05_feature_engineering_part4 from greyatomlib.time_series_day_02_project.q02_data_splitter.build import q02_data_splitter -fe = ["WorkDay", "Peakhours", "Peakmonths"] +fe = ['WorkDay', 'Peakhours', 'Peakmonths'] +def q08_gradientboosting_regressor(path,columns = fe, random_state =9): + np.random.seed(random_state) + data = q05_feature_engineering_part4(path) + splits = q02_data_splitter(path) + + rmse = [] + for i in splits: + train = i[0] + valid = i[1] + x_train, y_train = data[fe].values[train], data['Demand'].values[train] + x_valid, y_valid = data[fe].values[valid], data['Demand'].values[valid] + model = GradientBoostingRegressor() + model.fit(x_train, y_train) + pred = model.predict(x_valid) + measure = math.pow(mean_squared_error(y_valid, pred), 0.5) + rmse.append(measure) + return np.mean(rmse) + From 7e6bd41c45edddb4e712e8ab7e64907d042d8580 Mon Sep 17 00:00:00 2001 From: SL345 Date: Tue, 14 Aug 2018 09:07:28 +0000 Subject: [PATCH 09/10] Done --- q07_randomforest_regressor/build.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/q07_randomforest_regressor/build.py b/q07_randomforest_regressor/build.py index 658b847..7d02947 100644 --- a/q07_randomforest_regressor/build.py +++ b/q07_randomforest_regressor/build.py @@ -22,7 +22,7 @@ def q07_randomforest_regressor(path,columns = fe, random_state =9): valid = i[1] x_train, y_train = data[fe].values[train], data['Demand'].values[train] x_valid, y_valid = data[fe].values[valid], data['Demand'].values[valid] - model = RandomForestRegressor() + model = RandomForestRegressor(n_estimators=50,min_samples_leaf=30,random_state=10) model.fit(x_train, y_train) pred = model.predict(x_valid) measure = math.pow(mean_squared_error(y_valid, pred), 0.5) From 946ff9baef6c6ed5bf8f51dcafcea4b9c9580d7f Mon Sep 17 00:00:00 2001 From: SL345 Date: Tue, 14 Aug 2018 09:10:45 +0000 Subject: [PATCH 10/10] Done --- q08_gradientboosting_regressor/build.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/q08_gradientboosting_regressor/build.py b/q08_gradientboosting_regressor/build.py index 5b9fe6c..70b2a99 100644 --- a/q08_gradientboosting_regressor/build.py +++ b/q08_gradientboosting_regressor/build.py @@ -20,7 +20,7 @@ def q08_gradientboosting_regressor(path,columns = fe, random_state =9): valid = i[1] x_train, y_train = data[fe].values[train], data['Demand'].values[train] x_valid, y_valid = data[fe].values[valid], data['Demand'].values[valid] - model = GradientBoostingRegressor() + model = GradientBoostingRegressor(n_estimators=200,min_samples_leaf=10,learning_rate=0.01,random_state=9) model.fit(x_train, y_train) pred = model.predict(x_valid) measure = math.pow(mean_squared_error(y_valid, pred), 0.5)