Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
a4d9d82
download client side and bulk download
nicomy Sep 4, 2025
5daa6f9
working bulk download
nicomy Sep 8, 2025
eb60cde
cleaned
nicomy Sep 8, 2025
5bece9f
Merge pull request #1999 from nicomy/optimise_download_many
ObadaS Sep 9, 2025
ec29a37
gestion of faulty submission
nicomy Sep 17, 2025
6154fc7
changed to post instead of get and added failed.txt to bundle
nicomy Sep 18, 2025
27357e2
added limit on number of file to download simultaneously
nicomy Sep 19, 2025
1ada62d
it should work
nicomy Sep 19, 2025
102ec84
cleaned comments
nicomy Sep 19, 2025
e66f3a8
Merge pull request #2005 from nicomy/optimise_download_many
ObadaS Sep 22, 2025
48a7f28
Flake8 fixes
Sep 23, 2025
ee5a9ee
added option to bulk download scores files and predictions files
nicomy Oct 1, 2025
20ff485
cleaned code
nicomy Oct 2, 2025
0886f7c
exclude not finished submission inside download pred and scores
nicomy Oct 3, 2025
1e20fb4
merged
nicomy Oct 3, 2025
7610a22
download client side and bulk download
nicomy Sep 4, 2025
2726a24
working bulk download
nicomy Sep 8, 2025
61d5f11
cleaned
nicomy Sep 8, 2025
279faf5
gestion of faulty submission
nicomy Sep 17, 2025
78f9de9
changed to post instead of get and added failed.txt to bundle
nicomy Sep 18, 2025
86e27be
added limit on number of file to download simultaneously
nicomy Sep 19, 2025
0cae8f2
it should work
nicomy Sep 19, 2025
fd84799
cleaned comments
nicomy Sep 19, 2025
53a0f73
Flake8 fixes
Sep 23, 2025
1007769
Merge branch 'optimise_download_many' into optimise_download_many
ObadaS Dec 23, 2025
934605e
Merge pull request #2019 from nicomy/optimise_download_many
ObadaS Dec 23, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
94 changes: 72 additions & 22 deletions src/apps/api/views/submissions.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,10 @@
from rest_framework.viewsets import ModelViewSet
from rest_framework_csv import renderers
from django.core.files.base import ContentFile
from django.http import StreamingHttpResponse

from profiles.models import Organization, Membership
from tasks.models import Task
from api.serializers.submissions import SubmissionCreationSerializer, SubmissionSerializer, SubmissionFilesSerializer
from api.serializers.submissions import SubmissionCreationSerializer, SubmissionSerializer, SubmissionFilesSerializer, SubmissionDetailSerializer
from competitions.models import Submission, SubmissionDetails, Phase, CompetitionParticipant
from leaderboards.strategies import put_on_leaderboard_by_submission_rule
from leaderboards.models import SubmissionScore, Column, Leaderboard
Expand Down Expand Up @@ -219,6 +218,28 @@ def destroy(self, request, *args, **kwargs):
# Otherwise, delete the submission
self.perform_destroy(submission)
return Response(status=status.HTTP_204_NO_CONTENT)


def check_submission_permissions(self,request,submissions):
# Check permissions
if not request.user.is_authenticated:
raise PermissionDenied("You must be logged in to download submissions")
# Allow admins
if request.user.is_superuser or request.user.is_staff:
allowed = True
else:
# Build one Q object for "owner OR organizer"
organiser_q = (
Q(phase__competition__created_by=request.user) |
Q(phase__competition__collaborators=request.user)
)
# Submissions that violate the rule
disallowed = submissions.exclude(Q(owner=request.user) | organiser_q)
allowed = not disallowed.exists()
if not allowed:
raise PermissionDenied(
"You do not have permission to download one or more of the requested submissions"
)

@action(detail=True, methods=('DELETE',))
def soft_delete(self, request, pk):
Expand Down Expand Up @@ -382,26 +403,38 @@ def re_run_many_submissions(self, request):
submission.re_run()
return Response({})

@action(detail=False, methods=['get'])
@action(detail=False, methods=('POST',))
def download_many(self, request):
"""
Download a ZIP containing several submissions.
"""
pks = request.query_params.get('pks')
if pks:
pks = json.loads(pks) # Convert JSON string to list
else:
return Response({"error": "`pks` query parameter is required"}, status=400)
pks = request.data.get('pks')
if not pks:
return Response({"error": "`pks` field is required"}, status=400)

# pks is already parsed as a list if JSON was sent properly
if not isinstance(pks, list):
return Response({"error": "`pks` must be a list"}, status=400)

#Todo : The 3 functions download many should be bundled inside a genereic with the function like "get_prediction_result" as a parameter instead of the same code 3 times.
@action(detail=False, methods=('POST',))
def download_many(self, request):
pks = request.data.get('pks')
if not pks:
return Response({"error": "`pks` field is required"}, status=400)

# pks is already parsed as a list if JSON was sent properly
if not isinstance(pks, list):
return Response({"error": "`pks` must be a list"}, status=400)

# Get submissions
submissions = Submission.objects.filter(pk__in=pks).select_related(
"owner",
"phase__competition",
"phase__competition__created_by",
).prefetch_related("phase__competition__collaborators")
if submissions.count() != len(pks):
"phase",
"data"
)

if len(list(submissions)) != len(pks):
return Response({"error": "One or more submission IDs are invalid"}, status=404)

# Nicolas Homberg : should create a function for this ?
# Check permissions
if not request.user.is_authenticated:
raise PermissionDenied("You must be logged in to download submissions")
Expand All @@ -422,13 +455,30 @@ def download_many(self, request):
"You do not have permission to download one or more of the requested submissions"
)

# Download
from competitions.tasks import stream_batch_download
in_memory_zip = stream_batch_download(pks)
response = StreamingHttpResponse(in_memory_zip, content_type='application/zip')
response['Content-Disposition'] = 'attachment; filename="bulk_submissions.zip"'
return response

files = []

for sub in submissions:
file_path = sub.data.data_file.name.split('/')[-1]
short_name = f"{sub.id}_{sub.owner}_PhaseId{sub.phase.id}_{sub.data.created_when.strftime('%Y-%m-%d:%M-%S')}_{file_path}"
# url = sub.data.data_file.url
url = SubmissionDetailSerializer(sub.data, context=self.get_serializer_context()).data['data_file']
# url = SubmissionFilesSerializer(sub, context=self.get_serializer_context()).data['data_file']
files.append({"name": short_name, "url": url})

return Response(files)

for sub in submissions:
if sub.status not in [ Submission.FINISHED]: #Submission.FAILED, Submission.CANCELLED
continue
file_path = sub.data.data_file.name.split('/')[-1]
complete_name = f"res_{sub.id}_{sub.owner}_PhaseId{sub.phase.id}_{sub.data.created_when.strftime('%Y-%m-%d:%M-%S')}_{file_path}"
result_url = serializer.get_scoring_result(sub)
#detailed results is already in the results zip file but For very large detailed results it could be helpfull to remove it.
# detailed_result_url = serializer.get_scoring_result(sub)
files.append({"name": complete_name, "url": result_url})

return Response(files)

@action(detail=True, methods=('GET',))
def get_details(self, request, pk):
submission = super().get_object()
Expand Down
47 changes: 0 additions & 47 deletions src/apps/competitions/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,6 @@
from django.utils.timezone import now
from rest_framework.exceptions import ValidationError

from urllib.request import urlopen
from contextlib import closing
from urllib.error import ContentTooShortError

from celery_config import app
from competitions.models import Submission, CompetitionCreationTaskStatus, SubmissionDetails, Competition, \
CompetitionDump, Phase
Expand Down Expand Up @@ -280,49 +276,6 @@ def send_child_id(submission, child_id):
})


def retrieve_data(url, data=None):
with closing(urlopen(url, data)) as fp:
headers = fp.info()

bs = 1024 * 8
size = -1
read = 0
if "content-length" in headers:
size = int(headers["Content-Length"])

while True:
block = fp.read(bs)
if not block:
break
read += len(block)
yield(block)

if size >= 0 and read < size:
raise ContentTooShortError(
"retrieval incomplete: got only %i out of %i bytes"
% (read, size))


def zip_generator(submission_pks):
in_memory_zip = BytesIO()
with zipfile.ZipFile(in_memory_zip, 'w', zipfile.ZIP_DEFLATED) as zip_file:
for submission_id in submission_pks:
submission = Submission.objects.get(id=submission_id)
short_name = "ID_" + str(submission_id) + '_' + submission.data.data_file.name.split('/')[-1]
url = make_url_sassy(path=submission.data.data_file.name)
for block in retrieve_data(url):
zip_file.writestr(short_name, block)

in_memory_zip.seek(0)

return in_memory_zip


@app.task(queue='site-worker', soft_time_limit=60 * 60)
def stream_batch_download(submission_pks):
return zip_generator(submission_pks)


@app.task(queue='site-worker', soft_time_limit=60)
def _run_submission(submission_pk, task_pks=None, is_scoring=False):
"""This function is wrapped so that when we run tests we can run this function not
Expand Down
30 changes: 6 additions & 24 deletions src/static/js/ours/client.js
Original file line number Diff line number Diff line change
Expand Up @@ -128,31 +128,13 @@ CODALAB.api = {
return CODALAB.api.request('GET', `${URLS.API}submissions/${id}/get_detail_result/`)
},
download_many_submissions: function (pks) {
console.log('Request bulk');
const params = new URLSearchParams({ pks: JSON.stringify(pks) });
const url = `${URLS.API}submissions/download_many/?${params}`;
return fetch(url, {
method: 'GET',
headers: {
'Content-Type': 'application/json'
}
}).then(response => {
if (!response.ok) {
throw new Error('Network response was not ok ' + response.statusText);
}
return response.blob();
}).then(blob => {
const link = document.createElement('a');
link.href = window.URL.createObjectURL(blob);
link.download = 'bulk_submissions.zip';
document.body.appendChild(link);
link.click();
document.body.removeChild(link);
}).catch(error => {
console.error('Error downloading submissions:', error);
});
return CODALAB.api.request(
'POST',
URLS.API + "submissions/download_many/",
{ pks: pks } // body is JSON by convention
);
},

/*---------------------------------------------------------------------
Leaderboards
---------------------------------------------------------------------*/
Expand Down
Loading