1212from typing import Dict
1313from typing import List
1414from typing import Optional
15+ from typing import Set
1516from typing import Tuple
1617
1718import boto3
3637from lib .core .serviceproviders import SuperannotateServiceProvider
3738from lib .core .types import PriorityScore
3839from lib .core .usecases .base import BaseReportableUseCase
40+ from lib .core .usecases .images import GetBulkImages
3941from lib .core .video_convertor import VideoFrameGenerator
4042from superannotate .logger import get_default_logger
4143
@@ -94,15 +96,11 @@ def get_name_path_mappings(annotation_paths):
9496 name_path_mappings : Dict [str , str ] = {}
9597
9698 for item_path in annotation_paths :
97- name_path_mappings [Path (item_path ).name ] = item_path
99+ name_path_mappings [
100+ UploadAnnotationsUseCase .extract_name (Path (item_path ).name )
101+ ] = item_path
98102 return name_path_mappings
99103
100- @property
101- def missing_annotations (self ):
102- if not self ._missing_annotations :
103- self ._missing_annotations = []
104- return self ._missing_annotations
105-
106104 def _log_report (
107105 self , missing_classes : list , missing_attr_groups : list , missing_attrs : list
108106 ):
@@ -183,34 +181,73 @@ def chunks(data, size: int = 10000):
183181 for i in range (0 , len (data ), size ):
184182 yield {k : data [k ] for k in islice (it , size )}
185183
184+ @staticmethod
185+ def extract_name (value : str ):
186+ return os .path .basename (
187+ value .replace (constances .PIXEL_ANNOTATION_POSTFIX , "" )
188+ .replace (constances .VECTOR_ANNOTATION_POSTFIX , "" )
189+ .replace (constances .ATTACHED_VIDEO_ANNOTATION_POSTFIX , "" ),
190+ )
191+
192+ def get_existing_item_names (self , name_path_mappings : Dict [str , str ]) -> Set [str ]:
193+ item_names = list (name_path_mappings .keys ())
194+ existing_items = set ()
195+ for i in range (0 , len (item_names ), self .CHUNK_SIZE ):
196+ items_to_check = item_names [i : i + self .CHUNK_SIZE ] # noqa: E203
197+ response = GetBulkImages (
198+ service = self ._backend_service ,
199+ project_id = self ._project .id ,
200+ team_id = self ._project .team_id ,
201+ folder_id = self ._folder .uuid ,
202+ images = items_to_check ,
203+ ).execute ()
204+ if not response .errors :
205+ existing_items .update ({item .name for item in response .data })
206+ return existing_items
207+
186208 def execute (self ):
187209 uploaded_annotations = []
188210 failed_annotations = []
211+ missing_annotations = []
189212 self .reporter .start_progress (
190213 len (self ._annotation_paths ), description = "Uploading Annotations"
191214 )
215+ name_path_mappings = self .get_name_path_mappings (self ._annotation_paths )
216+ existing_item_names = self .get_existing_item_names (name_path_mappings )
217+ name_path_mappings_to_upload = {}
218+ for name , path in name_path_mappings .items ():
219+ try :
220+ existing_item_names .remove (name )
221+ name_path_mappings_to_upload [name ] = path
222+ except KeyError :
223+ missing_annotations .append (path )
224+
192225 with concurrent .futures .ThreadPoolExecutor (
193226 max_workers = self .MAX_WORKERS
194227 ) as executor :
195228 results = {}
196- path_name_mappings = self .get_name_path_mappings (self ._annotation_paths )
197- for name_path_mapping in self .chunks (path_name_mappings , self .CHUNK_SIZE ):
229+
230+ for name_path_mapping in self .chunks (
231+ name_path_mappings_to_upload , self .CHUNK_SIZE
232+ ):
198233 items_name_file_map = {}
199234 for name , path in name_path_mapping .items ():
200235 annotation , mask = self .get_annotation (path )
201236 if not annotation :
237+ failed_annotations .append (path )
202238 self .reporter .update_progress ()
203239 continue
204240 items_name_file_map [name ] = annotation
205- results [
206- executor .submit (
207- self ._backend_service .upload_annotations ,
208- team_id = self ._project .team_id ,
209- project_id = self ._project .id ,
210- folder_id = self ._folder .id ,
211- items_name_file_map = items_name_file_map ,
212- )
213- ] = (len (items_name_file_map ), name_path_mapping )
241+ if items_name_file_map :
242+ results [
243+ executor .submit (
244+ self ._backend_service .upload_annotations ,
245+ team_id = self ._project .team_id ,
246+ project_id = self ._project .id ,
247+ folder_id = self ._folder .id ,
248+ items_name_file_map = items_name_file_map ,
249+ )
250+ ] = (len (items_name_file_map ), name_path_mapping )
214251 missing_classes , missing_attr_groups , missing_attrs = [], [], []
215252 for future in concurrent .futures .as_completed (results .keys ()):
216253 response : ServiceResponse = future .result ()
@@ -236,7 +273,7 @@ def execute(self):
236273 self ._response .data = (
237274 uploaded_annotations ,
238275 failed_annotations ,
239- [ annotation . path for annotation in self . missing_annotations ] ,
276+ missing_annotations ,
240277 )
241278 return self ._response
242279
@@ -796,43 +833,52 @@ def __init__(
796833 self ._backend_client = backend_service_provider
797834
798835 @staticmethod
799- def _get_const (items ):
800- return next (
801- (
802- (key , value )
803- for key , value in items
804- if isinstance (value , dict ) and value .get ("const" )
805- ),
806- (None , None ),
807- )
836+ def _get_const (items , path = ()):
837+ properties = items .get ("properties" , {})
838+ _type , _meta = properties .get ("type" ), properties .get ("meta" )
839+ if _meta and _meta .get ("type" ):
840+ path = path + ("meta" ,)
841+ path , _type = ValidateAnnotationUseCase ._get_const (_meta , path )
842+ if _type and properties .get ("type" , {}).get ("const" ):
843+ path = path + ("type" ,)
844+ path , _type = path , properties ["type" ]["const" ]
845+ return path , _type
846+
847+ @staticmethod
848+ def _get_by_path (path : tuple , data : dict ):
849+ tmp = data
850+ for i in path :
851+ tmp = tmp .get (i , {})
852+ return tmp
808853
809854 @staticmethod
810855 def oneOf (validator , oneOf , instance , schema ): # noqa
811856 sub_schemas = enumerate (oneOf )
812857 const_found = False
813858
814859 for index , sub_schema in sub_schemas :
815- key , _type = ValidateAnnotationUseCase . _get_const (
816- sub_schema [ "properties" ]. items ( )
817- )
818- if key and _type :
860+
861+ key , _type = ValidateAnnotationUseCase . _get_const ( sub_schema )
862+ if key :
863+ instance_type = ValidateAnnotationUseCase . _get_by_path ( key , instance )
819864 const_found = True
820- if not instance . get ( key ) :
865+ if not instance_type :
821866 yield ValidationError ("type required" )
822867 raise StopIteration
823-
824- if const_found and instance [key ] == _type ["const" ]:
825- errs = list (validator .descend (instance , sub_schema , schema_path = index ))
826- if not errs :
827- return
828- yield ValidationError ("invalid instance" , context = errs )
829- raise StopIteration
830- elif not const_found :
868+ if const_found and instance_type == _type :
869+ errs = list (
870+ validator .descend (instance , sub_schema , schema_path = index )
871+ )
872+ if not errs :
873+ return
874+ yield ValidationError ("invalid instance" , context = errs )
875+ raise StopIteration
876+ else :
831877 yield from jsonschema ._validators .oneOf ( # noqa
832878 validator , oneOf , instance , schema
833879 )
834-
835- yield ValidationError ("invalid instance" )
880+ if const_found :
881+ yield ValidationError ("invalid instance" )
836882
837883 @staticmethod
838884 def iter_errors (self , instance , _schema = None ):
@@ -884,16 +930,36 @@ def iter_errors(self, instance, _schema=None):
884930
885931 @staticmethod
886932 def extract_path (path ):
933+ path = copy .copy (path )
887934 real_path = []
888- for item in path :
889- if isinstance (item , str ):
890- if real_path :
891- real_path .append ("." )
892- real_path .append (item )
893- elif isinstance (item , int ):
935+ for _ in range (len (path )):
936+ item = path .popleft ()
937+ if isinstance (item , int ):
894938 real_path .append (f"[{ item } ]" )
939+ else :
940+ if real_path and not real_path [- 1 ].endswith ("]" ):
941+ real_path .extend (["." , item ])
942+ else :
943+ real_path .append (item )
944+ # if isinstance(real_path, str):
945+ # if real_path:
946+ # real_path.append(".")
947+ # real_path.append(item)
948+ # elif isinstance(item, int):
949+ # real_path.append(f"[{item}]")
895950 return real_path
896951
952+ # @staticmethod
953+ # def extract_path(path):
954+ # real_path = []
955+ # for item in path:
956+ # if isinstance(item, str):
957+ # if real_path:
958+ # real_path.append(".")
959+ # real_path.append(item)
960+ # elif isinstance(item, int):
961+ # real_path.append(f"[{item}]")
962+ # return real_path
897963 def _get_validator (self , version : str ) -> Draft7Validator :
898964 key = f"{ self ._project_type } __{ version } "
899965 validator = ValidateAnnotationUseCase .SCHEMAS .get (key )
@@ -913,7 +979,10 @@ def _get_validator(self, version: str) -> Draft7Validator:
913979 return validator
914980
915981 def execute (self ) -> Response :
916- version = self ._annotation .get ("version" , self .DEFAULT_VERSION )
982+ try :
983+ version = self ._annotation .get ("version" , self .DEFAULT_VERSION )
984+ except Exception as e :
985+ print ()
917986
918987 extract_path = ValidateAnnotationUseCase .extract_path
919988 validator = self ._get_validator (version )
@@ -925,7 +994,7 @@ def execute(self) -> Response:
925994 if not error .context :
926995 errors_report .append (("" .join (real_path ), error .message ))
927996 for sub_error in sorted (error .context , key = lambda e : e .schema_path ):
928- tmp_path = sub_error .path if sub_error .path else real_path
997+ tmp_path = sub_error .path # if sub_error.path else real_path
929998 errors_report .append (
930999 (
9311000 f"{ '' .join (real_path )} ." + "" .join (extract_path (tmp_path )),
0 commit comments