Skip to content

Commit c2e6163

Browse files
committed
Duplicate images won't be uploaded
1 parent 4067f81 commit c2e6163

File tree

3 files changed

+84
-13
lines changed

3 files changed

+84
-13
lines changed

superannotate/db/projects.py

Lines changed: 41 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
)
3131
from .project import get_project_metadata
3232
from .users import get_team_contributor_metadata
33+
from .images import search_images
3334

3435
logger = logging.getLogger("superannotate-python-sdk")
3536

@@ -649,7 +650,6 @@ def __upload_images_to_aws_thread(
649650
image_quality_in_editor,
650651
from_s3_bucket=None,
651652
):
652-
project_type = project["type"]
653653
len_img_paths = len(img_paths)
654654
start_index = thread_id * chunksize
655655
end_index = start_index + chunksize
@@ -672,13 +672,18 @@ def __upload_images_to_aws_thread(
672672
break
673673
path = img_paths[i]
674674
key = prefix + f'{Path(path).name}'
675-
if from_s3_bucket is not None:
676-
file = io.BytesIO()
677-
from_s3_object = from_s3.Object(from_s3_bucket, path)
678-
from_s3_object.download_fileobj(file)
679-
else:
680-
with open(path, "rb") as f:
681-
file = io.BytesIO(f.read())
675+
try:
676+
if from_s3_bucket is not None:
677+
file = io.BytesIO()
678+
from_s3_object = from_s3.Object(from_s3_bucket, path)
679+
from_s3_object.download_fileobj(file)
680+
else:
681+
with open(path, "rb") as f:
682+
file = io.BytesIO(f.read())
683+
except Exception as e:
684+
logger.warning("Unable to open image %s.", e)
685+
couldnt_upload[thread_id].append(path)
686+
continue
682687
try:
683688
orig_image, lores_image, huge_image, thumbnail_image = get_image_array_to_upload(
684689
file, image_quality_in_editor
@@ -736,6 +741,10 @@ def upload_images_to_project(
736741
"""Uploads all images given in list of path objects in img_paths to the project.
737742
Sets status of all the uploaded images to set_status if it is not None.
738743
744+
If an image with existing name already exists in the project it won't be uploaded,
745+
and its path will be appended to the third member of return value of this
746+
function.
747+
739748
:param project: project name or metadata of the project to upload images to
740749
:type project: str or dict
741750
:param img_paths: list of Pathlike (str or Path) objects to upload
@@ -748,22 +757,42 @@ def upload_images_to_project(
748757
Can be either "compressed" or "original". If None then the default value in project settings will be used.
749758
:type image_quality_in_editor: str
750759
751-
:return: uploaded and not-uploaded images' filepaths
752-
:rtype: tuple of list of strs
760+
:return: uploaded, could-not-upload, existing-images filepaths
761+
:rtype: tuple (3 members) of list of strs
753762
"""
754763
if not isinstance(project, dict):
755764
project = get_project_metadata(project)
765+
if not isinstance(img_paths, list):
766+
raise SABaseException(
767+
0, "img_paths argument to upload_images_to_project should be a list"
768+
)
756769
annotation_status = annotation_status_str_to_int(annotation_status)
757770
image_quality_in_editor = _get_project_image_quality_in_editor(
758771
project, image_quality_in_editor
759772
)
760773
team_id, project_id = project["team_id"], project["id"]
774+
existing_images = search_images(project)
775+
duplicate_images = []
776+
for existing_image in existing_images:
777+
i = -1
778+
for j, img_path in enumerate(img_paths):
779+
if str(img_path).endswith(existing_image):
780+
i = j
781+
break
782+
if i != -1:
783+
duplicate_images.append(img_paths[i])
784+
del img_paths[i]
785+
if len(duplicate_images) != 0:
786+
logger.warning(
787+
"%s already existing images found that won't be uploaded.",
788+
len(duplicate_images)
789+
)
761790
len_img_paths = len(img_paths)
762791
logger.info(
763792
"Uploading %s images to project %s.", len_img_paths, project["name"]
764793
)
765794
if len_img_paths == 0:
766-
return ([], [])
795+
return ([], [], duplicate_images)
767796
params = {
768797
'team_id': team_id,
769798
}
@@ -819,7 +848,7 @@ def upload_images_to_project(
819848
for file in upload_thread:
820849
list_of_uploaded.append(str(file))
821850

822-
return (list_of_uploaded, list_of_not_uploaded)
851+
return (list_of_uploaded, list_of_not_uploaded, duplicate_images)
823852

824853

825854
def __upload_annotations_thread(

superannotate/version.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
__version__ = "2.3.9"
1+
__version__ = "2.3.10"
Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
from pathlib import Path
2+
import time
3+
4+
import pytest
5+
6+
import superannotate as sa
7+
8+
PROJECT_NAME_VECTOR = "test duplicate upload images"
9+
10+
11+
def test_duplicate_upload_images(tmpdir):
12+
tmpdir = Path(tmpdir)
13+
14+
projects = sa.search_projects(PROJECT_NAME_VECTOR, return_metadata=True)
15+
for project in projects:
16+
sa.delete_project(project)
17+
18+
project = sa.create_project(PROJECT_NAME_VECTOR, "test", "Vector")
19+
20+
uploads = sa.upload_images_from_folder_to_project(
21+
project, "./tests/sample_project_vector"
22+
)
23+
24+
assert len(uploads[0]) == 4
25+
assert len(uploads[1]) == 0
26+
assert len(uploads[2]) == 0
27+
28+
uploads = sa.upload_images_to_project(
29+
project, ["./tests/sample_project_vector/dd.jpg"]
30+
)
31+
32+
assert len(uploads[0]) == 0
33+
assert len(uploads[1]) == 1
34+
assert len(uploads[2]) == 0
35+
36+
uploads = sa.upload_images_from_folder_to_project(
37+
project, "./tests/sample_project_vector"
38+
)
39+
40+
assert len(uploads[0]) == 0
41+
assert len(uploads[1]) == 0
42+
assert len(uploads[2]) == 4

0 commit comments

Comments
 (0)