From dc7e289f2aa3321d69483ec72e96aafb845b8d2c Mon Sep 17 00:00:00 2001
From: = <berggrenpeterm@gmail.com>
Date: Fri, 2 Aug 2024 12:13:31 -0400
Subject: [PATCH 01/12] Added data processing workflow and (unimplemented)
 processing script

---
 .github/workflows/schedule.yaml | 24 ++++++++++++++++++++++++
 reweight/logic/process_data.py  |  1 +
 2 files changed, 25 insertions(+)
 create mode 100644 .github/workflows/schedule.yaml
 create mode 100644 reweight/logic/process_data.py

diff --git a/.github/workflows/schedule.yaml b/.github/workflows/schedule.yaml
new file mode 100644
index 0000000..d57cb6f
--- /dev/null
+++ b/.github/workflows/schedule.yaml
@@ -0,0 +1,24 @@
+name: Scheduled Data Processing
+
+on:
+  schedule:
+    - cron: "0 0 1 * *" # Runs at 00:00 on the first day of every month
+  push:
+    branches: [main] # Runs on pushes to the main branch
+  pull_request:
+    branches: [main] # Runs on pull requests to the main branch
+
+jobs:
+  process_data:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout repo
+        uses: actions/checkout@v3
+      - name: Set up Python
+        uses: actions/setup-python@v4
+        with:
+          python-version: 3.9
+      - name: Install dependencies
+        run: make install
+      - name: Run data processing script
+        run: python reweight/logic/process_data.py
diff --git a/reweight/logic/process_data.py b/reweight/logic/process_data.py
new file mode 100644
index 0000000..e843899
--- /dev/null
+++ b/reweight/logic/process_data.py
@@ -0,0 +1 @@
+raise NotImplementedError("Data processing function still in development")
\ No newline at end of file

From 2bd2a5421eb52f6c99ddd85ac7c8e74cc80ccbf7 Mon Sep 17 00:00:00 2001
From: = <berggrenpeterm@gmail.com>
Date: Fri, 2 Aug 2024 18:10:09 -0400
Subject: [PATCH 02/12] Fixed setup.py installation issues with torch

---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index db55313..8bdd301 100644
--- a/setup.py
+++ b/setup.py
@@ -28,7 +28,7 @@
     install_requires=[
         "numpy<2.0",
         "pandas",
-        "torch+cpu",
+        "torch",
         "tensorboard",
         "jupyter-book",
         "pytest",

From 6367ec8aca81c81072689b52c4871b07aa9ec4b3 Mon Sep 17 00:00:00 2001
From: = <berggrenpeterm@gmail.com>
Date: Wed, 7 Aug 2024 11:26:18 -0400
Subject: [PATCH 03/12] Added a gitignore to exclude items in root starting
 with the string test_

---
 .gitignore | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/.gitignore b/.gitignore
index 712df78..1cf11d0 100644
--- a/.gitignore
+++ b/.gitignore
@@ -20,4 +20,5 @@ docs/_build
 
 # Testing notebooks #
 #####################
-/*.ipynb
\ No newline at end of file
+/*.ipynb
+/test_*
\ No newline at end of file

From 8b6e3e73255de7f1248f49dbe77463b0c60eeda3 Mon Sep 17 00:00:00 2001
From: = <berggrenpeterm@gmail.com>
Date: Wed, 7 Aug 2024 11:27:15 -0400
Subject: [PATCH 04/12] Now ignores CSV files in root

---
 .gitignore | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/.gitignore b/.gitignore
index 1cf11d0..9bbaef0 100644
--- a/.gitignore
+++ b/.gitignore
@@ -21,4 +21,8 @@ docs/_build
 # Testing notebooks #
 #####################
 /*.ipynb
-/test_*
\ No newline at end of file
+/test_*
+
+# Temporary CSV files #
+#######################
+/*.csv
\ No newline at end of file

From e310f491f1015bac81dcfe9b97958bec8f9452f7 Mon Sep 17 00:00:00 2001
From: = <berggrenpeterm@gmail.com>
Date: Wed, 7 Aug 2024 11:35:53 -0400
Subject: [PATCH 05/12] Wrote a script to process data and post it to the
 reweight repo

---
 reweight/logic/process_data.py | 105 ++++++++++++++++++++++++++++++++-
 1 file changed, 104 insertions(+), 1 deletion(-)

diff --git a/reweight/logic/process_data.py b/reweight/logic/process_data.py
index e843899..90f4443 100644
--- a/reweight/logic/process_data.py
+++ b/reweight/logic/process_data.py
@@ -1 +1,104 @@
-raise NotImplementedError("Data processing function still in development")
\ No newline at end of file
+import pandas as pd
+import numpy as np
+import torch
+from torch.utils.tensorboard import SummaryWriter
+import os
+import requests
+import base64
+
+import policyengine_uk
+from policyengine_uk.data import RawFRS_2021_22
+from policyengine_uk.data.datasets.frs.calibration.calibrate import generate_model_variables
+
+from reweight import reweight
+
+#UK dataframe generation.
+
+RawFRS_2021_22().download()
+
+uk_weights_df = pd.DataFrame()
+
+for year in range(2024, 2029):
+    (
+        household_weights,
+        weight_adjustment,
+        values_df,
+        targets,
+        targets_array,
+        equivalisation_factors_array
+    ) = generate_model_variables("frs_2021", year)
+    sim_matrix = torch.tensor(values_df.to_numpy(), dtype=torch.float32)
+    uk_final_weights = reweight(household_weights, sim_matrix, targets, targets_array, epochs=1_000)
+    uk_weight_series = pd.Series(uk_final_weights.numpy())
+    uk_weights_df[str(year)] = uk_weight_series
+
+
+csv_filename = "updated_uk_weights.csv"
+uk_weights_df.to_csv(csv_filename)
+
+
+#US dataframe generation.
+
+import policyengine_us
+from policyengine_us.data.datasets.cps.enhanced_cps.loss import generate_model_variables
+
+us_weights_df = pd.DataFrame()
+
+for year in range(2024, 2029):
+    (
+        household_weights,
+        weight_adjustment,
+        values_df,
+        targets,
+        targets_array,
+        equivalisation_factors_array
+    ) = generate_model_variables("cps_2021", year)
+    sim_matrix = torch.tensor(values_df.to_numpy(), dtype=torch.float32)
+    initial_weights = torch.tensor(household_weights, dtype=torch.float32)
+    targets_tensor = torch.tensor(targets_array, dtype=torch.float32)
+    us_final_weights = reweight(initial_weights, sim_matrix, targets, targets_tensor, epochs=1_000)
+    us_weight_series = pd.Series(us_final_weights.numpy())
+    us_weights_df[str(year)] = us_weight_series
+
+#Now, for testing, save these dataframes as CSV.
+
+csv_filename = "updated_us_weights.csv"
+us_weights_df.to_csv(csv_filename)
+
+#Now, create a GitHub release
+
+api_url = 'https://api.github.com/repos/PolicyEngine/reweight/releases'
+
+owner = 'pmberg'
+repo = 'reweight'
+token = os.environ.get('GITHUB_TOKEN')
+
+# Create release
+headers = {
+    'Authorization': f'token {token}',
+    'Accept': 'application/vnd.github.v3+json'
+}
+release_data = {
+    'tag_name': f'v{pd.Timestamp.now().strftime("%Y.%m.%d.%H.%M.%S")}',
+    'name': f'Data Release {pd.Timestamp.now().strftime("%Y.%m.%d.%H.%M.%S")}',
+    'body': 'Automated data release with updated weights'
+}
+response = requests.post(api_url.format(owner=owner, repo=repo), headers=headers, json=release_data)
+release = response.json()
+
+# Upload assets
+upload_url = release['upload_url'].split('{')[0]
+
+def upload_file(file_name):
+    with open(file_name, 'rb') as file:
+        content = file.read()
+    headers['Content-Type'] = 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet'
+    params = {'name': os.path.basename(file_name)}
+    response = requests.post(upload_url, headers=headers, params=params, data=content)
+    if response.status_code == 201:
+        print(f"File added successfully: {release['html_url']}")
+    else:
+        print(f"Failed to add file: {response.content}")
+
+for file_name in ["updated_uk_weights.csv", "updated_us_weights.csv"]:
+    upload_file(file_name)
\ No newline at end of file

From 1849351dfed9e700284ffbafcb6454a9cd541d93 Mon Sep 17 00:00:00 2001
From: = <berggrenpeterm@gmail.com>
Date: Wed, 7 Aug 2024 11:41:07 -0400
Subject: [PATCH 06/12] Reformatted code

---
 reweight/logic/process_data.py | 68 +++++++++++++++++++++-------------
 reweight/logic/reweight.py     |  2 +-
 2 files changed, 43 insertions(+), 27 deletions(-)

diff --git a/reweight/logic/process_data.py b/reweight/logic/process_data.py
index 90f4443..57e3151 100644
--- a/reweight/logic/process_data.py
+++ b/reweight/logic/process_data.py
@@ -8,11 +8,13 @@
 
 import policyengine_uk
 from policyengine_uk.data import RawFRS_2021_22
-from policyengine_uk.data.datasets.frs.calibration.calibrate import generate_model_variables
+from policyengine_uk.data.datasets.frs.calibration.calibrate import (
+    generate_model_variables,
+)
 
 from reweight import reweight
 
-#UK dataframe generation.
+# UK dataframe generation.
 
 RawFRS_2021_22().download()
 
@@ -25,10 +27,12 @@
         values_df,
         targets,
         targets_array,
-        equivalisation_factors_array
+        equivalisation_factors_array,
     ) = generate_model_variables("frs_2021", year)
     sim_matrix = torch.tensor(values_df.to_numpy(), dtype=torch.float32)
-    uk_final_weights = reweight(household_weights, sim_matrix, targets, targets_array, epochs=1_000)
+    uk_final_weights = reweight(
+        household_weights, sim_matrix, targets, targets_array, epochs=1_000
+    )
     uk_weight_series = pd.Series(uk_final_weights.numpy())
     uk_weights_df[str(year)] = uk_weight_series
 
@@ -37,10 +41,12 @@
 uk_weights_df.to_csv(csv_filename)
 
 
-#US dataframe generation.
+# US dataframe generation.
 
 import policyengine_us
-from policyengine_us.data.datasets.cps.enhanced_cps.loss import generate_model_variables
+from policyengine_us.data.datasets.cps.enhanced_cps.loss import (
+    generate_model_variables,
+)
 
 us_weights_df = pd.DataFrame()
 
@@ -51,54 +57,64 @@
         values_df,
         targets,
         targets_array,
-        equivalisation_factors_array
+        equivalisation_factors_array,
     ) = generate_model_variables("cps_2021", year)
     sim_matrix = torch.tensor(values_df.to_numpy(), dtype=torch.float32)
     initial_weights = torch.tensor(household_weights, dtype=torch.float32)
     targets_tensor = torch.tensor(targets_array, dtype=torch.float32)
-    us_final_weights = reweight(initial_weights, sim_matrix, targets, targets_tensor, epochs=1_000)
+    us_final_weights = reweight(
+        initial_weights, sim_matrix, targets, targets_tensor, epochs=1_000
+    )
     us_weight_series = pd.Series(us_final_weights.numpy())
     us_weights_df[str(year)] = us_weight_series
 
-#Now, for testing, save these dataframes as CSV.
+# Now, for testing, save these dataframes as CSV.
 
 csv_filename = "updated_us_weights.csv"
 us_weights_df.to_csv(csv_filename)
 
-#Now, create a GitHub release
+# Now, create a GitHub release
 
-api_url = 'https://api.github.com/repos/PolicyEngine/reweight/releases'
+api_url = "https://api.github.com/repos/PolicyEngine/reweight/releases"
 
-owner = 'pmberg'
-repo = 'reweight'
-token = os.environ.get('GITHUB_TOKEN')
+owner = "pmberg"
+repo = "reweight"
+token = os.environ.get("GITHUB_TOKEN")
 
 # Create release
 headers = {
-    'Authorization': f'token {token}',
-    'Accept': 'application/vnd.github.v3+json'
+    "Authorization": f"token {token}",
+    "Accept": "application/vnd.github.v3+json",
 }
 release_data = {
-    'tag_name': f'v{pd.Timestamp.now().strftime("%Y.%m.%d.%H.%M.%S")}',
-    'name': f'Data Release {pd.Timestamp.now().strftime("%Y.%m.%d.%H.%M.%S")}',
-    'body': 'Automated data release with updated weights'
+    "tag_name": f'v{pd.Timestamp.now().strftime("%Y.%m.%d.%H.%M.%S")}',
+    "name": f'Data Release {pd.Timestamp.now().strftime("%Y.%m.%d.%H.%M.%S")}',
+    "body": "Automated data release with updated weights",
 }
-response = requests.post(api_url.format(owner=owner, repo=repo), headers=headers, json=release_data)
+response = requests.post(
+    api_url.format(owner=owner, repo=repo), headers=headers, json=release_data
+)
 release = response.json()
 
 # Upload assets
-upload_url = release['upload_url'].split('{')[0]
+upload_url = release["upload_url"].split("{")[0]
+
 
 def upload_file(file_name):
-    with open(file_name, 'rb') as file:
+    with open(file_name, "rb") as file:
         content = file.read()
-    headers['Content-Type'] = 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet'
-    params = {'name': os.path.basename(file_name)}
-    response = requests.post(upload_url, headers=headers, params=params, data=content)
+    headers["Content-Type"] = (
+        "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
+    )
+    params = {"name": os.path.basename(file_name)}
+    response = requests.post(
+        upload_url, headers=headers, params=params, data=content
+    )
     if response.status_code == 201:
         print(f"File added successfully: {release['html_url']}")
     else:
         print(f"Failed to add file: {response.content}")
 
+
 for file_name in ["updated_uk_weights.csv", "updated_us_weights.csv"]:
-    upload_file(file_name)
\ No newline at end of file
+    upload_file(file_name)
diff --git a/reweight/logic/reweight.py b/reweight/logic/reweight.py
index 4227c29..1e48f66 100644
--- a/reweight/logic/reweight.py
+++ b/reweight/logic/reweight.py
@@ -43,7 +43,7 @@ def reweight(
 
     optimizer = torch.optim.Adam([log_weights])
 
-    #Report the initial loss:
+    # Report the initial loss:
     targets_estimate = torch.exp(log_weights) @ estimate_matrix
     # Calculate the loss
     loss = torch.mean(

From 5513fbe487bbde84f26fdddd8dadb5feb3b81984 Mon Sep 17 00:00:00 2001
From: = <berggrenpeterm@gmail.com>
Date: Wed, 7 Aug 2024 11:50:05 -0400
Subject: [PATCH 07/12] Added Microsimulation lines to process_data

---
 reweight/logic/process_data.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/reweight/logic/process_data.py b/reweight/logic/process_data.py
index 57e3151..57fb7f4 100644
--- a/reweight/logic/process_data.py
+++ b/reweight/logic/process_data.py
@@ -7,6 +7,7 @@
 import base64
 
 import policyengine_uk
+from policyengine_uk import Microsimulation
 from policyengine_uk.data import RawFRS_2021_22
 from policyengine_uk.data.datasets.frs.calibration.calibrate import (
     generate_model_variables,
@@ -15,6 +16,7 @@
 from reweight import reweight
 
 # UK dataframe generation.
+sim = Microsimulation()
 
 RawFRS_2021_22().download()
 

From 6bfab0220c9a24d52e542cd2558c63402e730fdd Mon Sep 17 00:00:00 2001
From: = <berggrenpeterm@gmail.com>
Date: Wed, 7 Aug 2024 12:02:02 -0400
Subject: [PATCH 08/12] Reworked env in YAML file

---
 .github/workflows/schedule.yaml | 4 ++++
 reweight/logic/process_data.py  | 2 +-
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/schedule.yaml b/.github/workflows/schedule.yaml
index d57cb6f..49bd335 100644
--- a/.github/workflows/schedule.yaml
+++ b/.github/workflows/schedule.yaml
@@ -22,3 +22,7 @@ jobs:
         run: make install
       - name: Run data processing script
         run: python reweight/logic/process_data.py
+        env:
+          POVERTYTRACKER_RAW_URL: ${{ secrets.POVERTYTRACKER_RAW_URL }}
+          POLICYENGINE_GITHUB_MICRODATA_AUTH_TOKEN: ${{ secrets.POLICYENGINE_GITHUB_MICRODATA_AUTH_TOKEN}}
+          API_GITHUB_TOKEN: ${{ secrets.API_GITHUB_TOKEN }}
diff --git a/reweight/logic/process_data.py b/reweight/logic/process_data.py
index 57fb7f4..5226a46 100644
--- a/reweight/logic/process_data.py
+++ b/reweight/logic/process_data.py
@@ -81,7 +81,7 @@
 
 owner = "pmberg"
 repo = "reweight"
-token = os.environ.get("GITHUB_TOKEN")
+token = os.environ.get("API_GITHUB_TOKEN")
 
 # Create release
 headers = {

From bb40b673bfd5b27046c849197f8fcc9b37e2eedf Mon Sep 17 00:00:00 2001
From: Nikhil Woodruff <nikhil.woodruff@outlook.com>
Date: Tue, 13 Aug 2024 14:09:21 +0100
Subject: [PATCH 09/12] Add sketch of condensed code

---
 reweight/logic/process_data.py | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/reweight/logic/process_data.py b/reweight/logic/process_data.py
index 5226a46..9369ab4 100644
--- a/reweight/logic/process_data.py
+++ b/reweight/logic/process_data.py
@@ -13,6 +13,18 @@
     generate_model_variables,
 )
 
+def calibrate_country_weights(
+    household_weights, loss_matrix, target_labels, target_values, epochs
+) -> pd.DataFrame:
+    pass
+
+
+uk_inputs = ...
+us_inputs = ...
+
+calibrate_country_weights(*uk_inputs)
+calibrate_country_weights(*us_inputs)
+
 from reweight import reweight
 
 # UK dataframe generation.

From a96ac6a0797ff51f362d7fa2038b6d3c78562336 Mon Sep 17 00:00:00 2001
From: = <berggrenpeterm@gmail.com>
Date: Tue, 13 Aug 2024 10:40:55 -0400
Subject: [PATCH 10/12] Refactored process_data, splitting repeated code into
 two functions.

---
 reweight/logic/process_data.py | 98 +++++++++++++---------------------
 1 file changed, 38 insertions(+), 60 deletions(-)

diff --git a/reweight/logic/process_data.py b/reweight/logic/process_data.py
index 9369ab4..3a463f0 100644
--- a/reweight/logic/process_data.py
+++ b/reweight/logic/process_data.py
@@ -9,32 +9,23 @@
 import policyengine_uk
 from policyengine_uk import Microsimulation
 from policyengine_uk.data import RawFRS_2021_22
-from policyengine_uk.data.datasets.frs.calibration.calibrate import (
-    generate_model_variables,
-)
-
-def calibrate_country_weights(
-    household_weights, loss_matrix, target_labels, target_values, epochs
-) -> pd.DataFrame:
-    pass
-
-
-uk_inputs = ...
-us_inputs = ...
+from policyengine_uk.data.datasets.frs.calibration.calibrate import generate_model_variables as uk_generate
 
-calibrate_country_weights(*uk_inputs)
-calibrate_country_weights(*us_inputs)
+import policyengine_us
+from policyengine_us.data.datasets.cps.enhanced_cps.loss import generate_model_variables as us_generate
 
 from reweight import reweight
 
-# UK dataframe generation.
-sim = Microsimulation()
-
-RawFRS_2021_22().download()
-
-uk_weights_df = pd.DataFrame()
+def generate_country_weights(year, data_source, generate_func):
+    """
+    Parameters:
+    year (int): The year for which these country values are generated.
+    data_source (str): The name of the data source for that country.
+    generate_func (function): The function used to generate the initial values.
 
-for year in range(2024, 2029):
+    Returns:
+    final_weights (torch.Tensor): a PyTorch tensor of final reweighted weights.
+    """
     (
         household_weights,
         weight_adjustment,
@@ -42,50 +33,37 @@ def calibrate_country_weights(
         targets,
         targets_array,
         equivalisation_factors_array,
-    ) = generate_model_variables("frs_2021", year)
-    sim_matrix = torch.tensor(values_df.to_numpy(), dtype=torch.float32)
-    uk_final_weights = reweight(
-        household_weights, sim_matrix, targets, targets_array, epochs=1_000
-    )
-    uk_weight_series = pd.Series(uk_final_weights.numpy())
-    uk_weights_df[str(year)] = uk_weight_series
-
-
-csv_filename = "updated_uk_weights.csv"
-uk_weights_df.to_csv(csv_filename)
-
-
-# US dataframe generation.
-
-import policyengine_us
-from policyengine_us.data.datasets.cps.enhanced_cps.loss import (
-    generate_model_variables,
-)
-
-us_weights_df = pd.DataFrame()
-
-for year in range(2024, 2029):
-    (
-        household_weights,
-        weight_adjustment,
-        values_df,
-        targets,
-        targets_array,
-        equivalisation_factors_array,
-    ) = generate_model_variables("cps_2021", year)
+    ) = generate_func(data_source, year)
     sim_matrix = torch.tensor(values_df.to_numpy(), dtype=torch.float32)
     initial_weights = torch.tensor(household_weights, dtype=torch.float32)
     targets_tensor = torch.tensor(targets_array, dtype=torch.float32)
-    us_final_weights = reweight(
+    final_weights = reweight(
         initial_weights, sim_matrix, targets, targets_tensor, epochs=1_000
     )
-    us_weight_series = pd.Series(us_final_weights.numpy())
-    us_weights_df[str(year)] = us_weight_series
+    return final_weights
+
+def generate_country_csv(start_year, end_year, data_source, generate_func, csv_filename):
+    """
+    Parameters:
+    start_year (int): The year for which these country values start generating (inclusive).
+    end_year (int): The year for which these country values stop generating (non-inclusive).
+    data_source (str): The name of the data source for that country.
+    generate_func (function): The function used to generate the initial values.
+    csv_filename (str): The name of the file which the generated data are saved under.
+
+    Returns:
+    None. Generates and saves a CSV file of reweighted weights.
+    """
+    weights_df = pd.DataFrame()
+    for year in range(start_year, end_year):
+        final_weights = generate_country_weights(year, data_source, generate_func)
+        weight_series = pd.Series(final_weights.numpy())
+        weights_df[str(year)] = weight_series
+    weights_df.to_csv(csv_filename)
 
-# Now, for testing, save these dataframes as CSV.
-
-csv_filename = "updated_us_weights.csv"
-us_weights_df.to_csv(csv_filename)
+RawFRS_2021_22().download()
+generate_country_csv(2024, 2029, "frs_2021", uk_generate, "updated_uk_weights.csv")
+generate_country_csv(2024, 2029, "cps_2021", us_generate, "updated_us_weights.csv")
 
 # Now, create a GitHub release
 
@@ -109,7 +87,7 @@ def calibrate_country_weights(
     api_url.format(owner=owner, repo=repo), headers=headers, json=release_data
 )
 release = response.json()
-
+print(release)
 # Upload assets
 upload_url = release["upload_url"].split("{")[0]
 

From 78be5c37b39f3c0b65ae84200985e468810d37bf Mon Sep 17 00:00:00 2001
From: = <berggrenpeterm@gmail.com>
Date: Tue, 13 Aug 2024 11:19:51 -0400
Subject: [PATCH 11/12] Reformatted process_data

---
 reweight/logic/process_data.py | 27 +++++++++++++++++++++------
 1 file changed, 21 insertions(+), 6 deletions(-)

diff --git a/reweight/logic/process_data.py b/reweight/logic/process_data.py
index 3a463f0..ec71c5e 100644
--- a/reweight/logic/process_data.py
+++ b/reweight/logic/process_data.py
@@ -9,13 +9,18 @@
 import policyengine_uk
 from policyengine_uk import Microsimulation
 from policyengine_uk.data import RawFRS_2021_22
-from policyengine_uk.data.datasets.frs.calibration.calibrate import generate_model_variables as uk_generate
+from policyengine_uk.data.datasets.frs.calibration.calibrate import (
+    generate_model_variables as uk_generate,
+)
 
 import policyengine_us
-from policyengine_us.data.datasets.cps.enhanced_cps.loss import generate_model_variables as us_generate
+from policyengine_us.data.datasets.cps.enhanced_cps.loss import (
+    generate_model_variables as us_generate,
+)
 
 from reweight import reweight
 
+
 def generate_country_weights(year, data_source, generate_func):
     """
     Parameters:
@@ -42,7 +47,10 @@ def generate_country_weights(year, data_source, generate_func):
     )
     return final_weights
 
-def generate_country_csv(start_year, end_year, data_source, generate_func, csv_filename):
+
+def generate_country_csv(
+    start_year, end_year, data_source, generate_func, csv_filename
+):
     """
     Parameters:
     start_year (int): The year for which these country values start generating (inclusive).
@@ -56,14 +64,21 @@ def generate_country_csv(start_year, end_year, data_source, generate_func, csv_f
     """
     weights_df = pd.DataFrame()
     for year in range(start_year, end_year):
-        final_weights = generate_country_weights(year, data_source, generate_func)
+        final_weights = generate_country_weights(
+            year, data_source, generate_func
+        )
         weight_series = pd.Series(final_weights.numpy())
         weights_df[str(year)] = weight_series
     weights_df.to_csv(csv_filename)
 
+
 RawFRS_2021_22().download()
-generate_country_csv(2024, 2029, "frs_2021", uk_generate, "updated_uk_weights.csv")
-generate_country_csv(2024, 2029, "cps_2021", us_generate, "updated_us_weights.csv")
+generate_country_csv(
+    2024, 2029, "frs_2021", uk_generate, "updated_uk_weights.csv"
+)
+generate_country_csv(
+    2024, 2029, "cps_2021", us_generate, "updated_us_weights.csv"
+)
 
 # Now, create a GitHub release
 

From 9e1120d9a0ea61cb0893d630ee80afe5f7b2f763 Mon Sep 17 00:00:00 2001
From: = <berggrenpeterm@gmail.com>
Date: Tue, 13 Aug 2024 14:00:18 -0400
Subject: [PATCH 12/12] Update reweight