Skip to content

Daily Scrape

Daily Scrape #263

Workflow file for this run

name: Daily Scrape
on:
schedule:
- cron: "0 12 * * *" # Run at 12:00 UTC (7:00 AM EST) daily
workflow_dispatch: # Allow manual trigger
push:
branches:
- main
- master
- "feature/**"
- "fix/**"
permissions:
contents: write
jobs:
scrape:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
with:
ref: ${{ github.head_ref || github.ref_name }}
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: "3.11"
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install -r requirements.txt
- name: Install Playwright browsers
run: |
playwright install chromium
sudo apt-get update
sudo apt-get install -y libnss3 libnspr4 libatk1.0-0 libatk-bridge2.0-0 libcups2 libdrm2 libxkbcommon0 libxcomposite1 libxdamage1 libxfixes3 libxrandr2 libgbm1 libpango-1.0-0 libcairo2 libasound2t64 libatspi2.0-0
- name: Run scraper
env:
PYTHONUNBUFFERED: "1"
run: |
mkdir -p debug data
python src/scraper.py
- name: Upload debug artifacts
if: always()
uses: actions/upload-artifact@v4
with:
name: debug-files
path: |
debug/
data/*.json
- name: Commit and push if changes
if: success()
run: |
git config --local user.email "github-actions[bot]@users.noreply.github.com"
git config --local user.name "github-actions[bot]"
git add data/ debug/
timestamp=$(date -u)
git commit -m "Update census data - ${timestamp}" || exit 0
git push origin ${{ github.head_ref || github.ref_name }}