From cc14e47805f19e038a43ca5d8ab22d2bdfa106ae Mon Sep 17 00:00:00 2001
From: "martha.frysztacki" <eb5194@iai-esm003.iai.kit.edu>
Date: Tue, 21 Jan 2020 15:57:42 +0100
Subject: [PATCH 1/5] adaption in load.timeseries_opsd according to new format
 of opsd output file

---
 vresutils/load.py | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/vresutils/load.py b/vresutils/load.py
index 6669015..4f9fb70 100644
--- a/vresutils/load.py
+++ b/vresutils/load.py
@@ -109,7 +109,6 @@ def read_all_excel(fns):
 
     return data
 
-
 def timeseries_opsd(years=slice("2011", "2015"), fn=None):
     """
     Read load data from OPSD time-series package.
@@ -130,14 +129,23 @@ def timeseries_opsd(years=slice("2011", "2015"), fn=None):
         fn = toDataDir('time_series_60min_singleindex_filtered.csv')
 
     load = (pd.read_csv(fn, index_col=0, parse_dates=True)
-            .loc[:, lambda df: df.columns.to_series().str.endswith('_load_old')]
-            .rename(columns=lambda s: s[:-len('_load_old')])
+            .loc[:, lambda df: df.columns.to_series().str.endswith('_load_actual_entsoe_power_statistics')]
+            .rename(columns=lambda s: s[:-len('_load_actual_entsoe_power_statistics')])
             .dropna(how="all", axis=0))
 
     if years is not None:
         load = load.loc[years]
 
     # manual alterations:
+    # GB input given in 3 regions, there is no "global one":
+    # GBN (Great Britain), NIR (northern ireland), together forming
+    # UKM (united kingdom). Therefore, we choose UKM to be global.
+    # the sum of GBN and NIR seems incomplete, more data is missing
+    # interpolate the rest (copying from previous weeks might be better)
+    load['GB'] = load['GB_UKM']
+    load['GB'] = load['GB'].interpolate()
+    #load = load.drop(columns=['GB_GBN', 'GB_NIR', 'GB_UKM'])
+    
     # Kosovo gets the same load curve as Serbia
     # scaled by energy consumption ratio from IEA 2012
     load['KV'] = load['RS'] * (4.8 / 27.)

From 3d20777294162dede59561dd87cd220c136dd662 Mon Sep 17 00:00:00 2001
From: "martha.frysztacki" <eb5194@iai-esm003.iai.kit.edu>
Date: Tue, 21 Jan 2020 16:28:51 +0100
Subject: [PATCH 2/5] remove unwanted comment

---
 vresutils/load.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vresutils/load.py b/vresutils/load.py
index 4f9fb70..a5ddd91 100644
--- a/vresutils/load.py
+++ b/vresutils/load.py
@@ -144,7 +144,7 @@ def timeseries_opsd(years=slice("2011", "2015"), fn=None):
     # interpolate the rest (copying from previous weeks might be better)
     load['GB'] = load['GB_UKM']
     load['GB'] = load['GB'].interpolate()
-    #load = load.drop(columns=['GB_GBN', 'GB_NIR', 'GB_UKM'])
+    load = load.drop(columns=['GB_GBN', 'GB_NIR', 'GB_UKM'])
     
     # Kosovo gets the same load curve as Serbia
     # scaled by energy consumption ratio from IEA 2012

From 93ce38f6b17cd85a1aa230f28227a5e2f7ca0bdd Mon Sep 17 00:00:00 2001
From: "martha.frysztacki" <eb5194@iai-esm003.iai.kit.edu>
Date: Wed, 22 Jan 2020 12:06:53 +0100
Subject: [PATCH 3/5] adaptions in amanual alternations for opsd timeseries,
 exported to own function additional timeslices for better accuracy.
 timeslices up to 4 hours remain interpolated

---
 vresutils/load.py | 48 +++++++++++++++++++++++++++++------------------
 1 file changed, 30 insertions(+), 18 deletions(-)

diff --git a/vresutils/load.py b/vresutils/load.py
index a5ddd91..c4843af 100644
--- a/vresutils/load.py
+++ b/vresutils/load.py
@@ -137,35 +137,47 @@ def timeseries_opsd(years=slice("2011", "2015"), fn=None):
         load = load.loc[years]
 
     # manual alterations:
-    # GB input given in 3 regions, there is no "global one":
+    load = manual_alternations_opsd(load)
+
+    return load
+
+def copy_timeslice(load, cntry, start, stop, delta):
+    start = pd.Timestamp(start)
+    stop = pd.Timestamp(stop)
+    if start in load.index and stop in load.index:
+        load.loc[start:stop, cntry] = load.loc[start+delta:stop+delta, cntry].values
+    return load
+    
+
+def manual_alternations_opsd(load):
+    # GB in the input is split in 3 regions:
     # GBN (Great Britain), NIR (northern ireland), together forming
-    # UKM (united kingdom). Therefore, we choose UKM to be global.
+    # UKM (united kingdom). Therefore, we choose only UKM.
     # the sum of GBN and NIR seems incomplete, more data is missing
     # interpolate the rest (copying from previous weeks might be better)
     load['GB'] = load['GB_UKM']
-    load['GB'] = load['GB'].interpolate()
     load = load.drop(columns=['GB_GBN', 'GB_NIR', 'GB_UKM'])
     
+    # To fill loder periods of gaps (more than 4 hours), we copy a period before into it
+    load = copy_timeslice(load, 'GR', '2015-08-11 21:00', '2015-08-15 20:00', pd.Timedelta(weeks=1))
+    load = copy_timeslice(load, 'AT', '2018-12-31 22:00', '2019-01-01 22:00', pd.Timedelta(days=2))
+    load = copy_timeslice(load, 'CH', '2010-01-19 07:00', '2010-01-19 22:00', pd.Timedelta(days=1))
+    load = copy_timeslice(load, 'CH', '2010-03-28 00:00', '2010-03-28 21:00', pd.Timedelta(days=1))
+    load = copy_timeslice(load, 'CH', '2010-10-08 13:00', '2010-10-10 21:00', pd.Timedelta(days=3))
+    load = copy_timeslice(load, 'CH', '2010-11-04 04:00', '2010-11-04 22:00', pd.Timedelta(days=1))
+    load = copy_timeslice(load, 'NO', '2010-12-09 11:00', '2010-12-09 18:00', pd.Timedelta(days=1))
+    load = copy_timeslice(load, 'GB', '2009-12-31 23:00', '2010-01-31 23:00', pd.Timedelta(weeks=5)) #whole january missing
+    
     # Kosovo gets the same load curve as Serbia
     # scaled by energy consumption ratio from IEA 2012
     load['KV'] = load['RS'] * (4.8 / 27.)
     # Albania gets the same load curve as Macedonia
     load['AL'] = load['MK'] * (4.1 / 7.4)
-
-    # To fill the half week gap in Greece from start to stop,
-    # we copy the week before into it
-    start = pd.Timestamp('2015-08-11 21:00')
-    stop = pd.Timestamp('2015-08-15 20:00')
-    w = pd.Timedelta(weeks=1)
-
-    if start in load.index and stop in load.index:
-        load.loc[start:stop, 'GR'] = load.loc[start-w:stop-w, 'GR'].values
-
-    # There are three missing hours in 2014 and four in 2015
-    # we interpolate linearly (copying from the previous week
-    # might be better)
-    load['EE'] = load['EE'].interpolate()
-
+    
+    # interpolate all countries with missing max 4 hours of demand data (in a row)
+    interpolate_countries = ['AT', 'EE', 'GR', 'IE', 'KV', 'IS', 'LU', 'NO', 'PL', 'PT', 'RS', 'SE', 'SI', 'GB']
+    load[interpolate_countries] = load[interpolate_countries].interpolate(limit=4)
+    # sometimes, the last hour of 2009 is missing. we ignore this - (alternatively, copy first hour of 2010)
     return load
 
 def _upsampling_fitfunc(weights, gdp, pop):

From 81c09424d7f818562b42fff6075f56507235db0f Mon Sep 17 00:00:00 2001
From: "martha.frysztacki" <eb5194@iai-esm003.iai.kit.edu>
Date: Wed, 22 Jan 2020 13:50:28 +0100
Subject: [PATCH 4/5] take into account weekends and whole months

---
 vresutils/load.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/vresutils/load.py b/vresutils/load.py
index c4843af..3e08671 100644
--- a/vresutils/load.py
+++ b/vresutils/load.py
@@ -137,7 +137,7 @@ def timeseries_opsd(years=slice("2011", "2015"), fn=None):
         load = load.loc[years]
 
     # manual alterations:
-    load = manual_alternations_opsd(load)
+    load = manual_alterations_opsd(load)
 
     return load
 
@@ -145,11 +145,11 @@ def copy_timeslice(load, cntry, start, stop, delta):
     start = pd.Timestamp(start)
     stop = pd.Timestamp(stop)
     if start in load.index and stop in load.index:
-        load.loc[start:stop, cntry] = load.loc[start+delta:stop+delta, cntry].values
+        load.loc[start:stop, cntry] = load.loc[start-delta:stop-delta, cntry].values
     return load
     
 
-def manual_alternations_opsd(load):
+def manual_alterations_opsd(load):
     # GB in the input is split in 3 regions:
     # GBN (Great Britain), NIR (northern ireland), together forming
     # UKM (united kingdom). Therefore, we choose only UKM.
@@ -158,15 +158,15 @@ def manual_alternations_opsd(load):
     load['GB'] = load['GB_UKM']
     load = load.drop(columns=['GB_GBN', 'GB_NIR', 'GB_UKM'])
     
-    # To fill loder periods of gaps (more than 4 hours), we copy a period before into it
+    # To fill periods of load-gaps (more than 4 hours), we copy a period before into it
     load = copy_timeslice(load, 'GR', '2015-08-11 21:00', '2015-08-15 20:00', pd.Timedelta(weeks=1))
     load = copy_timeslice(load, 'AT', '2018-12-31 22:00', '2019-01-01 22:00', pd.Timedelta(days=2))
     load = copy_timeslice(load, 'CH', '2010-01-19 07:00', '2010-01-19 22:00', pd.Timedelta(days=1))
     load = copy_timeslice(load, 'CH', '2010-03-28 00:00', '2010-03-28 21:00', pd.Timedelta(days=1))
-    load = copy_timeslice(load, 'CH', '2010-10-08 13:00', '2010-10-10 21:00', pd.Timedelta(days=3))
+    load = copy_timeslice(load, 'CH', '2010-10-08 13:00', '2010-10-10 21:00', pd.Timedelta(weeks=1)) #is a WE, so take WE before
     load = copy_timeslice(load, 'CH', '2010-11-04 04:00', '2010-11-04 22:00', pd.Timedelta(days=1))
     load = copy_timeslice(load, 'NO', '2010-12-09 11:00', '2010-12-09 18:00', pd.Timedelta(days=1))
-    load = copy_timeslice(load, 'GB', '2009-12-31 23:00', '2010-01-31 23:00', pd.Timedelta(weeks=5)) #whole january missing
+    load = copy_timeslice(load, 'GB', '2009-12-31 23:00', '2010-01-31 23:00', pd.Timedelta(days=-365)) #whole january missing
     
     # Kosovo gets the same load curve as Serbia
     # scaled by energy consumption ratio from IEA 2012

From 58ac2900a5c888d1aa279d42bc16dc26dc3bcf29 Mon Sep 17 00:00:00 2001
From: "martha.frysztacki" <eb5194@iai-esm003.iai.kit.edu>
Date: Thu, 23 Jan 2020 10:11:16 +0100
Subject: [PATCH 5/5] -364 to take into account weekdays

---
 vresutils/load.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vresutils/load.py b/vresutils/load.py
index 3e08671..4e3de6e 100644
--- a/vresutils/load.py
+++ b/vresutils/load.py
@@ -166,7 +166,7 @@ def manual_alterations_opsd(load):
     load = copy_timeslice(load, 'CH', '2010-10-08 13:00', '2010-10-10 21:00', pd.Timedelta(weeks=1)) #is a WE, so take WE before
     load = copy_timeslice(load, 'CH', '2010-11-04 04:00', '2010-11-04 22:00', pd.Timedelta(days=1))
     load = copy_timeslice(load, 'NO', '2010-12-09 11:00', '2010-12-09 18:00', pd.Timedelta(days=1))
-    load = copy_timeslice(load, 'GB', '2009-12-31 23:00', '2010-01-31 23:00', pd.Timedelta(days=-365)) #whole january missing
+    load = copy_timeslice(load, 'GB', '2009-12-31 23:00', '2010-01-31 23:00', pd.Timedelta(days=-364)) #whole january missing
     
     # Kosovo gets the same load curve as Serbia
     # scaled by energy consumption ratio from IEA 2012