From 5ee096a4c3de548aa2b2901793243f2bf86f6a56 Mon Sep 17 00:00:00 2001 From: kinjalh Date: Wed, 14 Jan 2026 18:38:35 -0500 Subject: [PATCH] gc cron job to clean up dangling policy_bindings entries --- config/prod.exs | 1 + lib/console/deployments/cron.ex | 16 ++++ lib/console/schema/policy_binding.ex | 105 +++++++++++++++++++++++++ test/console/deployments/cron_test.exs | 27 +++++++ 4 files changed, 149 insertions(+) diff --git a/config/prod.exs b/config/prod.exs index 07d2ff8922..e35cb1e4c2 100644 --- a/config/prod.exs +++ b/config/prod.exs @@ -68,6 +68,7 @@ config :console, Console.Cron.Scheduler, {"15 * * * *", {Console.Deployments.Cron, :prune_vuln_reports, []}}, {"*/15 * * * *", {Console.Deployments.Cron, :pr_governance, []}}, {"15 3 * * *", {Console.Deployments.Cron, :prune_dangling_templates, []}}, + {"20 3 * * *", {Console.Deployments.Cron, :prune_dangling_policy_bindings, []}}, {"30 3 * * *", {Console.Deployments.Cron, :prune_insight_components, []}}, {"0 4 * * *", {Console.Deployments.Cron, :prune_helm_repositories, []}}, {"0 5 * * *", {Console.Deployments.Cron, :prune_agent_run_repositories, []}}, diff --git a/lib/console/deployments/cron.ex b/lib/console/deployments/cron.ex index f30baaec1c..0d7cd63eee 100644 --- a/lib/console/deployments/cron.ex +++ b/lib/console/deployments/cron.ex @@ -18,6 +18,7 @@ defmodule Console.Deployments.Cron do AppNotification, Alert, ClusterAuditLog, + PolicyBinding, PolicyConstraint, VulnerabilityReport, ServiceTemplate, @@ -298,6 +299,21 @@ defmodule Console.Deployments.Cron do |> Stream.run() end + def prune_dangling_policy_bindings() do + PolicyBinding.dangling() + |> PolicyBinding.ordered(asc: :id) + |> Repo.stream(method: :keyset) + |> Console.throttle(count: 100, pause: 1) + |> Stream.chunk_every(100) + |> Stream.each(fn bindings -> + ids = Enum.map(bindings, & &1.id) + Logger.info "pruning #{length(ids)} dangling policy bindings" + PolicyBinding.for_ids(ids) + |> Repo.delete_all(timeout: 10_000) + end) + |> Stream.run() + end + def add_ignore_crds(search) do Service.search(search) |> Repo.stream(method: :keyset) diff --git a/lib/console/schema/policy_binding.ex b/lib/console/schema/policy_binding.ex index b9453d61d9..0c40faef89 100644 --- a/lib/console/schema/policy_binding.ex +++ b/lib/console/schema/policy_binding.ex @@ -1,6 +1,8 @@ defmodule Console.Schema.PolicyBinding do use Piazza.Ecto.Schema alias Console.Schema.{User, Group} + alias Console.Repo + import Ecto.Query schema "policy_bindings" do field :policy_id, :binary_id @@ -10,6 +12,109 @@ defmodule Console.Schema.PolicyBinding do timestamps() end + @doc """ + Fetches all tables and their policy-related columns from the database schema. + Returns a map of table_name => [column_names] for uuid columns ending in '_policy_id' or 'bindings_id'. + """ + def policy_columns do + from(c in "columns", + prefix: "information_schema", + where: like(c.column_name, "%policy_id") or c.column_name == "bindings_id", + where: c.table_schema == "public", + where: c.table_name != "policy_bindings", + where: c.data_type == "uuid", + select: {c.table_name, c.column_name}, + order_by: [c.table_name, c.column_name] + ) + |> Repo.all(timeout: 30_000) + |> Enum.group_by(fn {table, _col} -> table end, fn {_table, col} -> col end) + end + + @doc """ + Returns IDs of policy bindings not referenced by any of the given tables. + Uses a database-side subquery with LEFT JOIN to avoid loading all policy_ids into memory. + + ## Parameters + - `table_columns` - A map of table_name => [column_names] to check for references. + If not provided, defaults to all policy columns discovered via `policy_columns/0`. + + ## Examples + + # Check against all tables with policy columns + PolicyBinding.dangling_ids() + + # Check against specific tables only + PolicyBinding.dangling_ids(%{"clusters" => ["read_policy_id", "write_policy_id"]}) + + """ + def dangling_ids(table_columns \\ nil) do + table_columns = table_columns || policy_columns() + + case build_referenced_subquery(table_columns) do + nil -> + # No tables to check against, all bindings are considered "dangling" + from(pb in __MODULE__, select: pb.id, order_by: [asc: pb.id]) + |> Repo.all(timeout: 300_000) + + referenced_subquery -> + from(pb in __MODULE__, + left_join: r in subquery(referenced_subquery), + on: pb.policy_id == r.policy_id, + where: is_nil(r.policy_id), + select: pb.id, + order_by: [asc: pb.id] + ) + |> Repo.all(timeout: 300_000) + end + end + + @doc """ + Builds a subquery that unions all referenced policy_ids from the given tables. + Returns nil if there are no tables/columns to check. + """ + def build_referenced_subquery(table_columns) when map_size(table_columns) == 0, do: nil + + def build_referenced_subquery(table_columns) do + queries = + table_columns + |> Enum.flat_map(fn {table, columns} -> + Enum.map(columns, fn col -> + col_atom = String.to_atom(col) + + from(t in table, + select: %{policy_id: field(t, ^col_atom)}, + where: not is_nil(field(t, ^col_atom)) + ) + end) + end) + + case queries do + [] -> nil + [single] -> single + [first | rest] -> + Enum.reduce(rest, first, fn query, acc -> + from(q in acc, union_all: ^query) + end) + end + end + + @doc """ + Returns an Ecto query for dangling policy bindings. + + ## Parameters + - `query` - Base query to filter (defaults to PolicyBinding) + - `table_columns` - Optional map of table_name => [column_names] to check against + + """ + def dangling(query \\ __MODULE__, table_columns \\ nil) do + ids = dangling_ids(table_columns) + from(p in query, where: p.id in ^ids) + end + + def ordered(query \\ __MODULE__, order \\ [asc: :id]) do + from(p in query, order_by: ^order) + end + @valid ~w(user_id group_id policy_id)a def changeset(model, attrs \\ %{}) do diff --git a/test/console/deployments/cron_test.exs b/test/console/deployments/cron_test.exs index 425f4bc42f..ce9f4cada1 100644 --- a/test/console/deployments/cron_test.exs +++ b/test/console/deployments/cron_test.exs @@ -349,4 +349,31 @@ defmodule Console.Deployments.CronTest do assert refetch(keep) end end + + describe "#prune_dangling_policy_bindings/0" do + test "it will prune dangling policy bindings" do + user = insert(:user) + + # Create a project with write_bindings - these should be kept + project = insert(:project, write_bindings: [%{user_id: user.id}]) + %{write_bindings: [kept_binding]} = Console.Repo.preload(project, [:write_bindings]) + + # Create orphaned policy bindings with random policy_ids that don't exist anywhere + orphaned = for _ <- 1..3 do + %Console.Schema.PolicyBinding{} + |> Console.Schema.PolicyBinding.changeset(%{ + policy_id: Ecto.UUID.generate(), + user_id: user.id + }) + |> Console.Repo.insert!() + end + :ok = Cron.prune_dangling_policy_bindings() + + # Referenced binding should still exist + assert refetch(kept_binding) + + # Orphaned bindings should be deleted + for binding <- orphaned, do: refute refetch(binding) + end + end end