Skip to content

Commit 1ec95ee

Browse files
committed
Download files in chunks
1 parent 5cd1775 commit 1ec95ee

File tree

1 file changed

+28
-17
lines changed

1 file changed

+28
-17
lines changed

superannotate/db/exports.py

Lines changed: 28 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
import zipfile
77
from datetime import datetime
88
from pathlib import Path
9+
import shutil
910

1011
import boto3
1112
import requests
@@ -209,6 +210,15 @@ def __upload_files_to_aws_thread(
209210
already_uploaded[i] = True
210211

211212

213+
def _download_file(url, local_filename):
214+
with requests.get(url, stream=True) as r:
215+
r.raise_for_status()
216+
with open(local_filename, 'wb') as f:
217+
for chunk in r.iter_content(chunk_size=8192):
218+
f.write(chunk)
219+
return local_filename
220+
221+
212222
def download_export(
213223
project, export, folder_path, extract_zip_contents=True, to_s3_bucket=None
214224
):
@@ -243,25 +253,26 @@ def download_export(
243253
break
244254

245255
filename = Path(res['path']).name
246-
r = requests.get(res['download'], allow_redirects=True)
247-
if to_s3_bucket is None:
248-
filepath = Path(folder_path) / filename
249-
open(filepath, 'wb').write(r.content)
250-
if extract_zip_contents:
251-
with zipfile.ZipFile(filepath, 'r') as f:
252-
f.extractall(folder_path)
253-
Path.unlink(filepath)
254-
logger.info("Extracted %s to folder %s", filepath, folder_path)
255-
else:
256-
logger.info("Downloaded export ID %s to %s", res['id'], filepath)
257-
else:
258-
with tempfile.TemporaryDirectory() as tmpdirname:
259-
filepath = Path(tmpdirname) / filename
260-
open(filepath, 'wb').write(r.content)
256+
with tempfile.TemporaryDirectory() as tmpdirname:
257+
temp_filepath = Path(tmpdirname) / filename
258+
_download_file(res['download'], temp_filepath)
259+
if to_s3_bucket is None:
260+
filepath = Path(folder_path) / filename
261+
shutil.copyfile(temp_filepath, filepath)
261262
if extract_zip_contents:
262263
with zipfile.ZipFile(filepath, 'r') as f:
263-
f.extractall(tmpdirname)
264+
f.extractall(folder_path)
264265
Path.unlink(filepath)
266+
logger.info("Extracted %s to folder %s", filepath, folder_path)
267+
else:
268+
logger.info(
269+
"Downloaded export ID %s to %s", res['id'], filepath
270+
)
271+
else:
272+
if extract_zip_contents:
273+
with zipfile.ZipFile(temp_filepath, 'r') as f:
274+
f.extractall(tmpdirname)
275+
Path.unlink(temp_filepath)
265276
files_to_upload = []
266277
for file in Path(tmpdirname).rglob("*.*"):
267278
if not file.is_file():
@@ -296,4 +307,4 @@ def download_export(
296307
t.join()
297308
finish_event.set()
298309
tqdm_thread.join()
299-
logger.info("Exported to AWS %s/%s", to_s3_bucket, folder_path)
310+
logger.info("Exported to AWS %s/%s", to_s3_bucket, folder_path)

0 commit comments

Comments
 (0)