Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions config/prod.exs
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ config :console, Console.Cron.Scheduler,
{"15 * * * *", {Console.Deployments.Cron, :prune_vuln_reports, []}},
{"*/15 * * * *", {Console.Deployments.Cron, :pr_governance, []}},
{"15 3 * * *", {Console.Deployments.Cron, :prune_dangling_templates, []}},
{"20 3 * * *", {Console.Deployments.Cron, :prune_dangling_policy_bindings, []}},
{"30 3 * * *", {Console.Deployments.Cron, :prune_insight_components, []}},
{"0 4 * * *", {Console.Deployments.Cron, :prune_helm_repositories, []}},
{"0 5 * * *", {Console.Deployments.Cron, :prune_agent_run_repositories, []}},
Expand Down
16 changes: 16 additions & 0 deletions lib/console/deployments/cron.ex
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ defmodule Console.Deployments.Cron do
AppNotification,
Alert,
ClusterAuditLog,
PolicyBinding,
PolicyConstraint,
VulnerabilityReport,
ServiceTemplate,
Expand Down Expand Up @@ -298,6 +299,21 @@ defmodule Console.Deployments.Cron do
|> Stream.run()
end

def prune_dangling_policy_bindings() do
PolicyBinding.dangling()
|> PolicyBinding.ordered(asc: :id)
|> Repo.stream(method: :keyset)
|> Console.throttle(count: 100, pause: 1)
|> Stream.chunk_every(100)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we should throttle this (should be examples elsewhere)

|> Stream.each(fn bindings ->
ids = Enum.map(bindings, & &1.id)
Logger.info "pruning #{length(ids)} dangling policy bindings"
PolicyBinding.for_ids(ids)
|> Repo.delete_all(timeout: 10_000)
end)
|> Stream.run()
end

def add_ignore_crds(search) do
Service.search(search)
|> Repo.stream(method: :keyset)
Expand Down
105 changes: 105 additions & 0 deletions lib/console/schema/policy_binding.ex
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
defmodule Console.Schema.PolicyBinding do
use Piazza.Ecto.Schema
alias Console.Schema.{User, Group}
alias Console.Repo
import Ecto.Query

schema "policy_bindings" do
field :policy_id, :binary_id
Expand All @@ -10,6 +12,109 @@ defmodule Console.Schema.PolicyBinding do
timestamps()
end

@doc """
Fetches all tables and their policy-related columns from the database schema.
Returns a map of table_name => [column_names] for uuid columns ending in '_policy_id' or 'bindings_id'.
"""
def policy_columns do
from(c in "columns",
prefix: "information_schema",
where: like(c.column_name, "%policy_id") or c.column_name == "bindings_id",
where: c.table_schema == "public",
where: c.table_name != "policy_bindings",
where: c.data_type == "uuid",
select: {c.table_name, c.column_name},
order_by: [c.table_name, c.column_name]
)
|> Repo.all(timeout: 30_000)
|> Enum.group_by(fn {table, _col} -> table end, fn {_table, col} -> col end)
end

@doc """
Returns IDs of policy bindings not referenced by any of the given tables.
Uses a database-side subquery with LEFT JOIN to avoid loading all policy_ids into memory.

## Parameters
- `table_columns` - A map of table_name => [column_names] to check for references.
If not provided, defaults to all policy columns discovered via `policy_columns/0`.

## Examples

# Check against all tables with policy columns
PolicyBinding.dangling_ids()

# Check against specific tables only
PolicyBinding.dangling_ids(%{"clusters" => ["read_policy_id", "write_policy_id"]})

"""
def dangling_ids(table_columns \\ nil) do
table_columns = table_columns || policy_columns()

case build_referenced_subquery(table_columns) do
nil ->
# No tables to check against, all bindings are considered "dangling"
from(pb in __MODULE__, select: pb.id, order_by: [asc: pb.id])
|> Repo.all(timeout: 300_000)

referenced_subquery ->
from(pb in __MODULE__,
left_join: r in subquery(referenced_subquery),
on: pb.policy_id == r.policy_id,
where: is_nil(r.policy_id),
select: pb.id,
order_by: [asc: pb.id]
)
|> Repo.all(timeout: 300_000)
end
end

@doc """
Builds a subquery that unions all referenced policy_ids from the given tables.
Returns nil if there are no tables/columns to check.
"""
def build_referenced_subquery(table_columns) when map_size(table_columns) == 0, do: nil

def build_referenced_subquery(table_columns) do
queries =
table_columns
|> Enum.flat_map(fn {table, columns} ->
Enum.map(columns, fn col ->
col_atom = String.to_atom(col)

from(t in table,
select: %{policy_id: field(t, ^col_atom)},
where: not is_nil(field(t, ^col_atom))
)
end)
end)

case queries do
[] -> nil
[single] -> single
[first | rest] ->
Enum.reduce(rest, first, fn query, acc ->
from(q in acc, union_all: ^query)
end)
end
end

@doc """
Returns an Ecto query for dangling policy bindings.

## Parameters
- `query` - Base query to filter (defaults to PolicyBinding)
- `table_columns` - Optional map of table_name => [column_names] to check against

"""
def dangling(query \\ __MODULE__, table_columns \\ nil) do
ids = dangling_ids(table_columns)
from(p in query, where: p.id in ^ids)
end

def ordered(query \\ __MODULE__, order \\ [asc: :id]) do
from(p in query, order_by: ^order)
end

@valid ~w(user_id group_id policy_id)a

def changeset(model, attrs \\ %{}) do
Expand Down
27 changes: 27 additions & 0 deletions test/console/deployments/cron_test.exs
Original file line number Diff line number Diff line change
Expand Up @@ -349,4 +349,31 @@ defmodule Console.Deployments.CronTest do
assert refetch(keep)
end
end

describe "#prune_dangling_policy_bindings/0" do
test "it will prune dangling policy bindings" do
user = insert(:user)

# Create a project with write_bindings - these should be kept
project = insert(:project, write_bindings: [%{user_id: user.id}])
%{write_bindings: [kept_binding]} = Console.Repo.preload(project, [:write_bindings])
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i actually don't think this preload should be needed

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i think it's needed if we want a kept_binding that we can run assertions against after the cronjob


# Create orphaned policy bindings with random policy_ids that don't exist anywhere
orphaned = for _ <- 1..3 do
%Console.Schema.PolicyBinding{}
|> Console.Schema.PolicyBinding.changeset(%{
policy_id: Ecto.UUID.generate(),
user_id: user.id
})
|> Console.Repo.insert!()
end
:ok = Cron.prune_dangling_policy_bindings()

# Referenced binding should still exist
assert refetch(kept_binding)

# Orphaned bindings should be deleted
for binding <- orphaned, do: refute refetch(binding)
end
end
end
Loading