From cc14e47805f19e038a43ca5d8ab22d2bdfa106ae Mon Sep 17 00:00:00 2001 From: "martha.frysztacki" Date: Tue, 21 Jan 2020 15:57:42 +0100 Subject: [PATCH 1/5] adaption in load.timeseries_opsd according to new format of opsd output file --- vresutils/load.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/vresutils/load.py b/vresutils/load.py index 6669015..4f9fb70 100644 --- a/vresutils/load.py +++ b/vresutils/load.py @@ -109,7 +109,6 @@ def read_all_excel(fns): return data - def timeseries_opsd(years=slice("2011", "2015"), fn=None): """ Read load data from OPSD time-series package. @@ -130,14 +129,23 @@ def timeseries_opsd(years=slice("2011", "2015"), fn=None): fn = toDataDir('time_series_60min_singleindex_filtered.csv') load = (pd.read_csv(fn, index_col=0, parse_dates=True) - .loc[:, lambda df: df.columns.to_series().str.endswith('_load_old')] - .rename(columns=lambda s: s[:-len('_load_old')]) + .loc[:, lambda df: df.columns.to_series().str.endswith('_load_actual_entsoe_power_statistics')] + .rename(columns=lambda s: s[:-len('_load_actual_entsoe_power_statistics')]) .dropna(how="all", axis=0)) if years is not None: load = load.loc[years] # manual alterations: + # GB input given in 3 regions, there is no "global one": + # GBN (Great Britain), NIR (northern ireland), together forming + # UKM (united kingdom). Therefore, we choose UKM to be global. + # the sum of GBN and NIR seems incomplete, more data is missing + # interpolate the rest (copying from previous weeks might be better) + load['GB'] = load['GB_UKM'] + load['GB'] = load['GB'].interpolate() + #load = load.drop(columns=['GB_GBN', 'GB_NIR', 'GB_UKM']) + # Kosovo gets the same load curve as Serbia # scaled by energy consumption ratio from IEA 2012 load['KV'] = load['RS'] * (4.8 / 27.) From 3d20777294162dede59561dd87cd220c136dd662 Mon Sep 17 00:00:00 2001 From: "martha.frysztacki" Date: Tue, 21 Jan 2020 16:28:51 +0100 Subject: [PATCH 2/5] remove unwanted comment --- vresutils/load.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vresutils/load.py b/vresutils/load.py index 4f9fb70..a5ddd91 100644 --- a/vresutils/load.py +++ b/vresutils/load.py @@ -144,7 +144,7 @@ def timeseries_opsd(years=slice("2011", "2015"), fn=None): # interpolate the rest (copying from previous weeks might be better) load['GB'] = load['GB_UKM'] load['GB'] = load['GB'].interpolate() - #load = load.drop(columns=['GB_GBN', 'GB_NIR', 'GB_UKM']) + load = load.drop(columns=['GB_GBN', 'GB_NIR', 'GB_UKM']) # Kosovo gets the same load curve as Serbia # scaled by energy consumption ratio from IEA 2012 From 93ce38f6b17cd85a1aa230f28227a5e2f7ca0bdd Mon Sep 17 00:00:00 2001 From: "martha.frysztacki" Date: Wed, 22 Jan 2020 12:06:53 +0100 Subject: [PATCH 3/5] adaptions in amanual alternations for opsd timeseries, exported to own function additional timeslices for better accuracy. timeslices up to 4 hours remain interpolated --- vresutils/load.py | 48 +++++++++++++++++++++++++++++------------------ 1 file changed, 30 insertions(+), 18 deletions(-) diff --git a/vresutils/load.py b/vresutils/load.py index a5ddd91..c4843af 100644 --- a/vresutils/load.py +++ b/vresutils/load.py @@ -137,35 +137,47 @@ def timeseries_opsd(years=slice("2011", "2015"), fn=None): load = load.loc[years] # manual alterations: - # GB input given in 3 regions, there is no "global one": + load = manual_alternations_opsd(load) + + return load + +def copy_timeslice(load, cntry, start, stop, delta): + start = pd.Timestamp(start) + stop = pd.Timestamp(stop) + if start in load.index and stop in load.index: + load.loc[start:stop, cntry] = load.loc[start+delta:stop+delta, cntry].values + return load + + +def manual_alternations_opsd(load): + # GB in the input is split in 3 regions: # GBN (Great Britain), NIR (northern ireland), together forming - # UKM (united kingdom). Therefore, we choose UKM to be global. + # UKM (united kingdom). Therefore, we choose only UKM. # the sum of GBN and NIR seems incomplete, more data is missing # interpolate the rest (copying from previous weeks might be better) load['GB'] = load['GB_UKM'] - load['GB'] = load['GB'].interpolate() load = load.drop(columns=['GB_GBN', 'GB_NIR', 'GB_UKM']) + # To fill loder periods of gaps (more than 4 hours), we copy a period before into it + load = copy_timeslice(load, 'GR', '2015-08-11 21:00', '2015-08-15 20:00', pd.Timedelta(weeks=1)) + load = copy_timeslice(load, 'AT', '2018-12-31 22:00', '2019-01-01 22:00', pd.Timedelta(days=2)) + load = copy_timeslice(load, 'CH', '2010-01-19 07:00', '2010-01-19 22:00', pd.Timedelta(days=1)) + load = copy_timeslice(load, 'CH', '2010-03-28 00:00', '2010-03-28 21:00', pd.Timedelta(days=1)) + load = copy_timeslice(load, 'CH', '2010-10-08 13:00', '2010-10-10 21:00', pd.Timedelta(days=3)) + load = copy_timeslice(load, 'CH', '2010-11-04 04:00', '2010-11-04 22:00', pd.Timedelta(days=1)) + load = copy_timeslice(load, 'NO', '2010-12-09 11:00', '2010-12-09 18:00', pd.Timedelta(days=1)) + load = copy_timeslice(load, 'GB', '2009-12-31 23:00', '2010-01-31 23:00', pd.Timedelta(weeks=5)) #whole january missing + # Kosovo gets the same load curve as Serbia # scaled by energy consumption ratio from IEA 2012 load['KV'] = load['RS'] * (4.8 / 27.) # Albania gets the same load curve as Macedonia load['AL'] = load['MK'] * (4.1 / 7.4) - - # To fill the half week gap in Greece from start to stop, - # we copy the week before into it - start = pd.Timestamp('2015-08-11 21:00') - stop = pd.Timestamp('2015-08-15 20:00') - w = pd.Timedelta(weeks=1) - - if start in load.index and stop in load.index: - load.loc[start:stop, 'GR'] = load.loc[start-w:stop-w, 'GR'].values - - # There are three missing hours in 2014 and four in 2015 - # we interpolate linearly (copying from the previous week - # might be better) - load['EE'] = load['EE'].interpolate() - + + # interpolate all countries with missing max 4 hours of demand data (in a row) + interpolate_countries = ['AT', 'EE', 'GR', 'IE', 'KV', 'IS', 'LU', 'NO', 'PL', 'PT', 'RS', 'SE', 'SI', 'GB'] + load[interpolate_countries] = load[interpolate_countries].interpolate(limit=4) + # sometimes, the last hour of 2009 is missing. we ignore this - (alternatively, copy first hour of 2010) return load def _upsampling_fitfunc(weights, gdp, pop): From 81c09424d7f818562b42fff6075f56507235db0f Mon Sep 17 00:00:00 2001 From: "martha.frysztacki" Date: Wed, 22 Jan 2020 13:50:28 +0100 Subject: [PATCH 4/5] take into account weekends and whole months --- vresutils/load.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/vresutils/load.py b/vresutils/load.py index c4843af..3e08671 100644 --- a/vresutils/load.py +++ b/vresutils/load.py @@ -137,7 +137,7 @@ def timeseries_opsd(years=slice("2011", "2015"), fn=None): load = load.loc[years] # manual alterations: - load = manual_alternations_opsd(load) + load = manual_alterations_opsd(load) return load @@ -145,11 +145,11 @@ def copy_timeslice(load, cntry, start, stop, delta): start = pd.Timestamp(start) stop = pd.Timestamp(stop) if start in load.index and stop in load.index: - load.loc[start:stop, cntry] = load.loc[start+delta:stop+delta, cntry].values + load.loc[start:stop, cntry] = load.loc[start-delta:stop-delta, cntry].values return load -def manual_alternations_opsd(load): +def manual_alterations_opsd(load): # GB in the input is split in 3 regions: # GBN (Great Britain), NIR (northern ireland), together forming # UKM (united kingdom). Therefore, we choose only UKM. @@ -158,15 +158,15 @@ def manual_alternations_opsd(load): load['GB'] = load['GB_UKM'] load = load.drop(columns=['GB_GBN', 'GB_NIR', 'GB_UKM']) - # To fill loder periods of gaps (more than 4 hours), we copy a period before into it + # To fill periods of load-gaps (more than 4 hours), we copy a period before into it load = copy_timeslice(load, 'GR', '2015-08-11 21:00', '2015-08-15 20:00', pd.Timedelta(weeks=1)) load = copy_timeslice(load, 'AT', '2018-12-31 22:00', '2019-01-01 22:00', pd.Timedelta(days=2)) load = copy_timeslice(load, 'CH', '2010-01-19 07:00', '2010-01-19 22:00', pd.Timedelta(days=1)) load = copy_timeslice(load, 'CH', '2010-03-28 00:00', '2010-03-28 21:00', pd.Timedelta(days=1)) - load = copy_timeslice(load, 'CH', '2010-10-08 13:00', '2010-10-10 21:00', pd.Timedelta(days=3)) + load = copy_timeslice(load, 'CH', '2010-10-08 13:00', '2010-10-10 21:00', pd.Timedelta(weeks=1)) #is a WE, so take WE before load = copy_timeslice(load, 'CH', '2010-11-04 04:00', '2010-11-04 22:00', pd.Timedelta(days=1)) load = copy_timeslice(load, 'NO', '2010-12-09 11:00', '2010-12-09 18:00', pd.Timedelta(days=1)) - load = copy_timeslice(load, 'GB', '2009-12-31 23:00', '2010-01-31 23:00', pd.Timedelta(weeks=5)) #whole january missing + load = copy_timeslice(load, 'GB', '2009-12-31 23:00', '2010-01-31 23:00', pd.Timedelta(days=-365)) #whole january missing # Kosovo gets the same load curve as Serbia # scaled by energy consumption ratio from IEA 2012 From 58ac2900a5c888d1aa279d42bc16dc26dc3bcf29 Mon Sep 17 00:00:00 2001 From: "martha.frysztacki" Date: Thu, 23 Jan 2020 10:11:16 +0100 Subject: [PATCH 5/5] -364 to take into account weekdays --- vresutils/load.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vresutils/load.py b/vresutils/load.py index 3e08671..4e3de6e 100644 --- a/vresutils/load.py +++ b/vresutils/load.py @@ -166,7 +166,7 @@ def manual_alterations_opsd(load): load = copy_timeslice(load, 'CH', '2010-10-08 13:00', '2010-10-10 21:00', pd.Timedelta(weeks=1)) #is a WE, so take WE before load = copy_timeslice(load, 'CH', '2010-11-04 04:00', '2010-11-04 22:00', pd.Timedelta(days=1)) load = copy_timeslice(load, 'NO', '2010-12-09 11:00', '2010-12-09 18:00', pd.Timedelta(days=1)) - load = copy_timeslice(load, 'GB', '2009-12-31 23:00', '2010-01-31 23:00', pd.Timedelta(days=-365)) #whole january missing + load = copy_timeslice(load, 'GB', '2009-12-31 23:00', '2010-01-31 23:00', pd.Timedelta(days=-364)) #whole january missing # Kosovo gets the same load curve as Serbia # scaled by energy consumption ratio from IEA 2012