Skip to content

Commit d180834

Browse files
authored
Merge pull request #484 from superannotateai/big_file_upload
upload changes
2 parents 0fc5151 + 8293d43 commit d180834

File tree

8 files changed

+85
-77
lines changed

8 files changed

+85
-77
lines changed

src/superannotate/lib/core/entities/classes.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,8 @@ class AttributeGroup(BaseModel):
7777

7878
class Config:
7979
use_enum_values = True
80+
exclude_unset = True
81+
exclude_none = True
8082

8183
def __hash__(self):
8284
return hash(f"{self.id}{self.class_id}{self.name}")

src/superannotate/lib/core/usecases/annotations.py

Lines changed: 25 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,7 @@ class UploadAnnotationsUseCase(BaseReportableUseCase):
7070
STATUS_CHANGE_CHUNK_SIZE = 100
7171
AUTH_DATA_CHUNK_SIZE = 500
7272
THREADS_COUNT = 4
73+
URI_THRESHOLD = 4 * 1024 - 120
7374

7475
@dataclass
7576
class AnnotationToUpload:
@@ -317,26 +318,35 @@ async def _upload_small_annotations(self, chunk) -> Report:
317318

318319
async def upload_small_annotations(self):
319320
chunk = []
321+
322+
async def upload(_chunk):
323+
try:
324+
report = await self._upload_small_annotations(chunk)
325+
self._report.failed_annotations.extend(report.failed_annotations)
326+
self._report.missing_classes.extend(report.missing_classes)
327+
self._report.missing_attr_groups.extend(report.missing_attr_groups)
328+
self._report.missing_attrs.extend(report.missing_attrs)
329+
except Exception:
330+
import traceback
331+
traceback.print_exc()
332+
self._report.failed_annotations.extend([i.name for i in chunk])
333+
320334
while True:
321335
item = await self._small_files_queue.get()
322336
self._small_files_queue.task_done()
323337
if not item:
324338
self._small_files_queue.put_nowait(None)
325339
break
326340
chunk.append(item)
327-
if sys.getsizeof(chunk) >= self.CHUNK_SIZE_MB:
328-
report = await self._upload_small_annotations(chunk)
329-
self._report.failed_annotations.extend(report.failed_annotations)
330-
self._report.missing_classes.extend(report.missing_classes)
331-
self._report.missing_attr_groups.extend(report.missing_attr_groups)
332-
self._report.missing_attrs.extend(report.missing_attrs)
341+
if (
342+
sys.getsizeof(chunk) >= self.CHUNK_SIZE_MB or
343+
sum([len(i.name) for i in chunk]) >= self.URI_THRESHOLD - len(chunk) * 14
344+
):
345+
await upload(chunk)
333346
chunk = []
347+
334348
if chunk:
335-
report = await self._upload_small_annotations(chunk)
336-
self._report.failed_annotations.extend(report.failed_annotations)
337-
self._report.missing_classes.extend(report.missing_classes)
338-
self._report.missing_attr_groups.extend(report.missing_attr_groups)
339-
self._report.missing_attrs.extend(report.missing_attrs)
349+
await upload(chunk)
340350

341351
async def _upload_big_annotation(self, item) -> Tuple[str, bool]:
342352
try:
@@ -583,7 +593,7 @@ def set_defaults(team_id, annotation_data: dict, project_type: int):
583593
default_data = {}
584594
annotation_data["metadata"]["lastAction"] = {
585595
"email": team_id,
586-
"timestamp": int(time.time())
596+
"timestamp": int(round(time.time() * 1000))
587597
}
588598
instances = annotation_data.get("instances", [])
589599
if project_type in constants.ProjectType.images:
@@ -594,14 +604,14 @@ def set_defaults(team_id, annotation_data: dict, project_type: int):
594604
instance["meta"] = {
595605
**default_data,
596606
**instance["meta"],
597-
"creationType": "Preannotation",
598-
} # noqa
607+
"creationType": "Preannotation", # noqa
608+
}
599609
else:
600610
for idx, instance in enumerate(instances):
601611
instances[idx] = {
602612
**default_data,
603613
**instance,
604-
"creationType": "Preannotation",
614+
"creationType": "Preannotation", # noqa
605615
}
606616
return annotation_data
607617

src/superannotate/lib/core/usecases/images.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1795,6 +1795,7 @@ def execute(self):
17951795
headers=annotation_json_creds["headers"],
17961796
)
17971797
if not response.ok:
1798+
# TODO remove
17981799
logger.warning("Couldn't load annotations.")
17991800
self._response.data = (None, None)
18001801
return self._response

src/superannotate/lib/core/usecases/projects.py

Lines changed: 22 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -574,31 +574,42 @@ def execute(self):
574574
)
575575
annotation_classes_entity_mapping = defaultdict(AnnotationClassEntity)
576576
annotation_classes_created = False
577-
if self._include_annotation_classes:
577+
if self._include_settings:
578578
self.reporter.log_info(
579-
f"Cloning annotation classes from {self._project.name} to {self._project_to_create.name}."
579+
f"Cloning settings from {self._project.name} to {self._project_to_create.name}."
580580
)
581581
try:
582-
self._copy_annotation_classes(
583-
annotation_classes_entity_mapping, project
584-
)
585-
annotation_classes_created = True
582+
self._copy_settings(project)
586583
except (AppException, RequestException) as e:
587584
self.reporter.log_warning(
588-
f"Failed to clone annotation classes from {self._project.name} to {self._project_to_create.name}."
585+
f"Failed to clone settings from {self._project.name} to {self._project_to_create.name}."
589586
)
590587
self.reporter.log_debug(str(e), exc_info=True)
591588

589+
if self._include_contributors:
590+
self.reporter.log_info(
591+
f"Cloning contributors from {self._project.name} to {self._project_to_create.name}."
592+
)
593+
try:
594+
self._copy_include_contributors(project)
595+
except (AppException, RequestException) as e:
596+
self.reporter.log_warning(
597+
f"Failed to clone contributors from {self._project.name} to {self._project_to_create.name}."
598+
)
599+
self.reporter.log_debug(str(e), exc_info=True)
592600

593-
if self._include_settings:
601+
if self._include_annotation_classes:
594602
self.reporter.log_info(
595-
f"Cloning settings from {self._project.name} to {self._project_to_create.name}."
603+
f"Cloning annotation classes from {self._project.name} to {self._project_to_create.name}."
596604
)
597605
try:
598-
self._copy_settings(project)
606+
self._copy_annotation_classes(
607+
annotation_classes_entity_mapping, project
608+
)
609+
annotation_classes_created = True
599610
except (AppException, RequestException) as e:
600611
self.reporter.log_warning(
601-
f"Failed to clone settings from {self._project.name} to {self._project_to_create.name}."
612+
f"Failed to clone annotation classes from {self._project.name} to {self._project_to_create.name}."
602613
)
603614
self.reporter.log_debug(str(e), exc_info=True)
604615

@@ -626,17 +637,6 @@ def execute(self):
626637
f"Failed to workflow from {self._project.name} to {self._project_to_create.name}."
627638
)
628639
self.reporter.log_debug(str(e), exc_info=True)
629-
if self._include_contributors:
630-
self.reporter.log_info(
631-
f"Cloning contributors from {self._project.name} to {self._project_to_create.name}."
632-
)
633-
try:
634-
self._copy_include_contributors(project)
635-
except (AppException, RequestException) as e:
636-
self.reporter.log_warning(
637-
f"Failed to clone contributors from {self._project.name} to {self._project_to_create.name}."
638-
)
639-
self.reporter.log_debug(str(e), exc_info=True)
640640

641641
self._response.data = self._projects.get_one(
642642
uuid=project.id, team_id=project.team_id

src/superannotate/lib/infrastructure/services.py

Lines changed: 11 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
import aiohttp
1919
import requests.packages.urllib3
2020
from pydantic import BaseModel
21+
from pydantic import parse_obj_as
2122
from requests.exceptions import HTTPError
2223

2324
import lib.core as constance
@@ -1374,15 +1375,16 @@ async def upload_annotations(
13741375
items_name_file_map: Dict[str, io.StringIO],
13751376
) -> UploadAnnotationsResponse:
13761377
url = urljoin(
1377-
"self.assets_provider_url",
1378-
f"{self.URL_UPLOAD_ANNOTATIONS}?{'&'.join(f'image_names[]={item_name}' for item_name in items_name_file_map.keys())}",
1378+
self.assets_provider_url,
1379+
# "https://0ef1-178-160-196-42.ngrok.io/api/v1.01/",
1380+
(f"{self.URL_UPLOAD_ANNOTATIONS}?{'&'.join(f'image_names[]={item_name}' for item_name in items_name_file_map.keys())}"),
13791381
)
13801382

13811383
headers = copy.copy(self.default_headers)
13821384
del headers["Content-Type"]
13831385
async with aiohttp.ClientSession(
13841386
headers=headers,
1385-
connector=aiohttp.TCPConnector(ssl=self._verify_ssl),
1387+
connector=aiohttp.TCPConnector(ssl=self._verify_ssl)
13861388
) as session:
13871389
data = aiohttp.FormData()
13881390

@@ -1400,13 +1402,12 @@ async def upload_annotations(
14001402
},
14011403
data=data
14021404
)
1403-
from pydantic import parse_obj_as
1404-
data_json = await _response.json()
1405-
response = ServiceResponse()
1406-
response.status = _response.status
1407-
response._content = await _response.text()
1408-
response.data = parse_obj_as(UploadAnnotationsResponse, data_json)
1409-
return response
1405+
data_json = await _response.json()
1406+
response = ServiceResponse()
1407+
response.status = _response.status
1408+
response._content = await _response.text()
1409+
response.data = parse_obj_as(UploadAnnotationsResponse, data_json)
1410+
return response
14101411

14111412
async def upload_big_annotation(
14121413
self,

tests/integration/annotations/test_preannotation_upload.py

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -18,22 +18,19 @@ def folder_path(self):
1818
return os.path.join(Path(__file__).parent.parent.parent, self.TEST_FOLDER_PATH)
1919

2020
def test_pre_annotation_folder_upload_download(self):
21-
sa.upload_images_from_folder_to_project(
22-
self.PROJECT_NAME, self.folder_path, annotation_status="InProgress"
23-
)
21+
self._attach_items()
2422
sa.create_annotation_classes_from_classes_json(
2523
self.PROJECT_NAME, f"{self.folder_path}/classes/classes.json"
2624
)
27-
_, _, _ = sa.upload_annotations_from_folder_to_project(
25+
uploaded, _, _ = sa.upload_annotations_from_folder_to_project(
2826
self.PROJECT_NAME, self.folder_path
2927
)
28+
assert len(uploaded) == 4
3029
count_in = len(list(Path(self.folder_path).glob("*.json")))
3130
images = sa.search_items(self.PROJECT_NAME)
3231
with tempfile.TemporaryDirectory() as tmp_dir:
3332
for image in images:
3433
image_name = image["name"]
3534
sa.download_image_annotations(self.PROJECT_NAME, image_name, tmp_dir)
36-
3735
count_out = len(list(Path(tmp_dir).glob("*.json")))
38-
3936
self.assertEqual(count_in, count_out)

tests/integration/annotations/validations/test_vector_annotation_validation.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,3 +35,24 @@ def test_validate_instances(self, mock_print):
3535
"instances[0] invalid type\n"
3636
"instances[1] 'points' is a required property"
3737
)
38+
39+
@patch('builtins.print')
40+
def test_validate_create_dby(self, mock_print):
41+
is_valid = sa.validate_annotations(
42+
"vector",
43+
{
44+
"metadata": {"name": "12"},
45+
"instances": [
46+
{
47+
"type": "bbox",
48+
"created_by": {}
49+
},
50+
{"type": "bbox"}
51+
]
52+
}
53+
)
54+
assert not is_valid
55+
mock_print.assert_any_call(
56+
"instances[0] invalid type\n"
57+
"instances[1] 'points' is a required property"
58+
)

tests/integration/test_get_exports.py

Lines changed: 0 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -42,27 +42,3 @@ def test_get_exports(self):
4242
exports_new = sa.get_exports(self.PROJECT_NAME)
4343

4444
assert len(exports_new) == len(exports_old) + 1
45-
46-
47-
class TestPixelExportConvert(BaseTestCase):
48-
PROJECT_NAME = "Pixel_Export"
49-
PROJECT_DESCRIPTION = "Desc"
50-
PROJECT_TYPE = "Pixel"
51-
TEST_FOLDER_PTH = "data_set"
52-
TEST_FOLDER_PATH = "data_set/sample_project_pixel"
53-
54-
@property
55-
def folder_path(self):
56-
return os.path.join(dirname(dirname(__file__)), self.TEST_FOLDER_PATH)
57-
58-
def test_convert_pixel_exported_data(self):
59-
sa.upload_images_from_folder_to_project(self.PROJECT_NAME, self.folder_path)
60-
sa.upload_annotations_from_folder_to_project(self.PROJECT_NAME, self.folder_path)
61-
export = sa.prepare_export(self.PROJECT_NAME)
62-
with tempfile.TemporaryDirectory() as tmp_dir:
63-
sa.download_export(self.PROJECT_NAME, export["name"], tmp_dir)
64-
with tempfile.TemporaryDirectory() as converted_data_tmp_dir:
65-
export_annotation(
66-
tmp_dir, converted_data_tmp_dir, "COCO", "export", "Pixel", "panoptic_segmentation"
67-
)
68-
self.assertEqual(1, len(list(glob.glob(converted_data_tmp_dir + "/*.json"))))

0 commit comments

Comments
 (0)