Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions gdpr/management/commands/anonymize_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,11 @@

import pyprind
from django.core.management.base import BaseCommand
from utils import chunked_queryset_iterator
from utils.commands import ProgressBarStream
from chamber.commands import ProgressBarStream

from gdpr.anonymizers import DeleteModelAnonymizer
from gdpr.loading import anonymizer_register
from gdpr.utils import chunked_queryset_iterator


class Command(BaseCommand):
Expand Down
22 changes: 22 additions & 0 deletions gdpr/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,3 +82,25 @@ def get_all_parent_objects(obj: Model) -> List[Model]:
parent_objects.append(parent_obj)

return [i for i in parent_objects if i is not None]

def chunked_queryset_iterator(queryset, chunk_size=10000, delete_qs=False):
"""
Helper that chunks queryset to the smaler chunks to save memory.
@param queryset: queryset that will be loaded in chunks
@param chunk_size: maximum size of a chunk
@param delete_qs: if purpose is remove imput queryset is used faster method for generating chunks
@return: generator that generates smaler queryset from the input queryset
"""
if delete_qs:
while queryset.exists():
yield queryset[:chunk_size]
else:
queryset = queryset.order_by("pk")
last_pk = None
while queryset.exists():
batch_queryset = queryset.filter()
if last_pk:
batch_queryset = batch_queryset.filter(pk__gt=last_pk)
batch_queryset = queryset.filter(pk__in=batch_queryset[:chunk_size].values("pk"))
last_pk = batch_queryset[batch_queryset.count() - 1].pk
yield batch_queryset