Skip to content

ingest

ingest #189

Workflow file for this run

name: Ingest Content via Webhook
on:
repository_dispatch:
types: [ingest]
concurrency:
group: ingestion
jobs:
checkRss:
runs-on: "ubuntu-latest"
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Find and checkout latest updated branch
id: brancher
run: |
LATEST_BRANCH=$(git for-each-ref --sort=-committerdate refs/remotes/origin/ --format='%(refname:short)' | grep '^origin/automated-' | head -n 1)
if [[ -z "$LATEST_BRANCH" ]]; then
BRANCH_NAME="main"
else
BRANCH_NAME=${LATEST_BRANCH#origin/}
fi
echo "Latest updated branch: $BRANCH_NAME"
git checkout $BRANCH_NAME
echo "latest-branch=$BRANCH_NAME" >> $GITHUB_OUTPUT
- uses: actions/setup-node@v4
with:
node-version: 22
- name: Get npm cache directory
id: npm-cache-dir
shell: bash
run: echo "dir=$(npm config get cache)" >> ${GITHUB_OUTPUT}
- uses: actions/cache@v4
id: npm-cache # use this to check for `cache-hit` ==> if: steps.npm-cache.outputs.cache-hit != 'true'
with:
path: ${{ steps.npm-cache-dir.outputs.dir }}
key: ${{ runner.os }}-node-${{ hashFiles('**/package-lock.json') }}
restore-keys: |
${{ runner.os }}-node-
- run: npm install
- run: npm run build
env:
DUCKDB_DATABASE_PATH: mrtdown.duckdb
SENTRY_ORG: ${{ secrets.SENTRY_ORG }}
SENTRY_PROJECT: ${{ secrets.SENTRY_PROJECT }}
SENTRY_AUTH_TOKEN: ${{ secrets.SENTRY_AUTH_TOKEN }}
SENTRY_RELEASE: production
- run: npm run ingest:webhook
env:
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
MESSAGE: ${{ toJSON(github.event.client_payload) }}
DUCKDB_DATABASE_PATH: mrtdown.duckdb
- name: Check for changes
id: make-changes
run: |-
if [[ -n $(git status --porcelain) ]]; then
echo "There are uncommitted changes"
echo "has_changes=true" >> $GITHUB_OUTPUT
else
echo "Working directory is clean"
echo "has_changes=false" >> $GITHUB_OUTPUT
fi
- name: Target branch
if: steps.make-changes.outputs.has_changes == 'true'
id: target-branch
run: |-
if [[ "${{ steps.brancher.outputs.latest-branch }}" = "main" ]]; then
echo "branch-name=automated-${{ github.run_id }}" >> $GITHUB_OUTPUT
else
echo "branch-name=${{ steps.brancher.outputs.latest-branch }}" >> $GITHUB_OUTPUT
fi
- name: Push changes
if: steps.make-changes.outputs.has_changes == 'true'
run: |-
BRANCH_NAME="${{ steps.target-branch.outputs.branch-name }}"
if [[ "${{ steps.brancher.outputs.latest-branch }}" = "main" ]]; then
git checkout -b $BRANCH_NAME
else
git checkout $BRANCH_NAME
fi
git add -A
git config user.email "41898282+github-actions[bot]@users.noreply.github.com"
git config user.name "github-actions[bot]"
git commit -m "chore(data): Automated changes"
git push origin $BRANCH_NAME
- name: Create Pull Request
if: steps.make-changes.outputs.has_changes == 'true' && steps.brancher.outputs.latest-branch == 'main'
uses: actions/github-script@v7
with:
script: |
const branchName = '${{ steps.target-branch.outputs.branch-name }}';
// Create PR using GitHub API
const { data: pullRequest } = await github.rest.pulls.create({
owner: context.repo.owner,
repo: context.repo.repo,
title: `[Automated] Update Data`,
body: `
# Automated Data
This PR was automatically created by the scheduled GitHub Action workflow.
`,
head: branchName,
base: 'main'
});
console.log(`Pull Request created: ${pullRequest.html_url}`);