From 0043afc1a92f319e7a35e4d79e05d2b890575a30 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luk=C3=A1=C5=A1=20Kl=C3=ADma?= Date: Mon, 4 Aug 2025 12:16:22 +0200 Subject: [PATCH] add missing util functions --- gdpr/management/commands/anonymize_data.py | 4 ++-- gdpr/utils.py | 22 ++++++++++++++++++++++ 2 files changed, 24 insertions(+), 2 deletions(-) diff --git a/gdpr/management/commands/anonymize_data.py b/gdpr/management/commands/anonymize_data.py index 11fea89..ebc1407 100644 --- a/gdpr/management/commands/anonymize_data.py +++ b/gdpr/management/commands/anonymize_data.py @@ -2,11 +2,11 @@ import pyprind from django.core.management.base import BaseCommand -from utils import chunked_queryset_iterator -from utils.commands import ProgressBarStream +from chamber.commands import ProgressBarStream from gdpr.anonymizers import DeleteModelAnonymizer from gdpr.loading import anonymizer_register +from gdpr.utils import chunked_queryset_iterator class Command(BaseCommand): diff --git a/gdpr/utils.py b/gdpr/utils.py index b3fa373..3a175e5 100644 --- a/gdpr/utils.py +++ b/gdpr/utils.py @@ -82,3 +82,25 @@ def get_all_parent_objects(obj: Model) -> List[Model]: parent_objects.append(parent_obj) return [i for i in parent_objects if i is not None] + +def chunked_queryset_iterator(queryset, chunk_size=10000, delete_qs=False): + """ + Helper that chunks queryset to the smaler chunks to save memory. + @param queryset: queryset that will be loaded in chunks + @param chunk_size: maximum size of a chunk + @param delete_qs: if purpose is remove imput queryset is used faster method for generating chunks + @return: generator that generates smaler queryset from the input queryset + """ + if delete_qs: + while queryset.exists(): + yield queryset[:chunk_size] + else: + queryset = queryset.order_by("pk") + last_pk = None + while queryset.exists(): + batch_queryset = queryset.filter() + if last_pk: + batch_queryset = batch_queryset.filter(pk__gt=last_pk) + batch_queryset = queryset.filter(pk__in=batch_queryset[:chunk_size].values("pk")) + last_pk = batch_queryset[batch_queryset.count() - 1].pk + yield batch_queryset