consensus and benchmark enhancements done without testing

ero1311 · ero1311 · commit de8a1cdefad6 · 2021-03-09T20:12:43.000+04:00
diff --git a/superannotate/analytics/common.py b/superannotate/analytics/common.py
@@ -373,6 +373,9 @@ def __get_user_metadata(annotation):
             annotation_point_labels = annotation.get("pointLabels")
             attributes = annotation.get("attributes")
             user_metadata = __get_user_metadata(annotation)
+            folder_name = None
+            if annotation_path.parent.name != '':
+                folder_name = annotation_path.parent.name
             num_added = 0
             if not attributes:
                 annotation_dict = {
@@ -389,6 +392,7 @@ def __get_user_metadata(annotation):
                     "pointLabels": annotation_point_labels,
                     "classColor": annotation_class_color,
                     "groupId": annotation_group_id,
+                    "folderName": folder_name,
                 }
                 annotation_dict.update(user_metadata)
                 annotation_dict.update(image_metadata)
@@ -428,6 +432,7 @@ def __get_user_metadata(annotation):
                         "pointLabels": annotation_point_labels,
                         "classColor": annotation_class_color,
                         "groupId": annotation_group_id,
+                        "folderName": folder_name,
                     }
                     annotation_dict.update(user_metadata)
                     annotation_dict.update(image_metadata)
diff --git a/superannotate/consensus_benchmark/benchmark.py b/superannotate/consensus_benchmark/benchmark.py
@@ -14,19 +14,22 @@
 
 
 def benchmark(
-    gt_project_name,
-    project_names,
+    project,
+    gt_folder,
+    folder_names,
     export_root=None,
     image_list=None,
     annot_type='bbox',
     show_plots=False
 ):
-    """Computes benchmark score for each instance of given images that are present both gt_project_name project and projects in project_names list:    
+    """Computes benchmark score for each instance of given images that are present both gt_project_name project and projects in folder_names list:    
     
-    :param gt_project_name: Project name that contains the ground truth annotations
-    :type gt_project_name: str
-    :param project_names: list of project names to aggregate through
-    :type project_names: list of str
+    :param project: project name or metadata of the project
+    :type project: str or dict
+    :param gt_folder: project folder name that contains the ground truth annotations
+    :type gt_folder: str
+    :param folder_names: list of folder names in the project for which the scores will be computed
+    :type folder_names: list of str
     :param export_root: root export path of the projects
     :type export_root: Pathlike (str or Path)
     :param image_list: List of image names from the projects list that must be used. If None, then all images from the projects list will be used. Default: None
@@ -36,6 +39,8 @@ def benchmark(
     :param show_plots: If True, show plots based on results of consensus computation. Default: False
     :type show_plots: bool
 
+    :return: Pandas DateFrame with columns (creatorEmail, QA, imageName, instanceId, className, area, attribute, folderName, score)
+    :rtype: pandas DataFrame
     """
     def aggregate_attributes(instance_df):
         def attribute_to_list(attribute_df):
@@ -61,7 +66,7 @@ def attribute_to_list(attribute_df):
             ["attributeGroupName", "attributeName"], axis=1, inplace=True
         )
         instance_df.drop_duplicates(
-            subset=["imageName", "instanceId", "project"], inplace=True
+            subset=["imageName", "instanceId", "folderName"], inplace=True
         )
         instance_df["attributes"] = [attributes]
         return instance_df
@@ -72,27 +77,18 @@ def attribute_to_list(attribute_df):
 
     if export_root is None:
         with tempfile.TemporaryDirectory() as export_dir:
-            gt_project_meta = prepare_export(gt_project_name)
-            download_export(gt_project_name, gt_project_meta, export_dir)
-            gt_project_df = aggregate_annotations_as_df(export_dir)
+            proj_export_meta = prepare_export(project_name)
+            download_export(project_name, proj_export_meta, export_dir)
+            project_df = aggregate_annotations_as_df(export_dir)
     else:
-        export_dir = Path(export_root) / gt_project_name
-        gt_project_df = aggregate_annotations_as_df(export_dir)
-    gt_project_df["project"] = gt_project_name
+        project_df = aggregate_annotations_as_df(export_dir)
 
-    benchmark_dfs = []
-    for project_name in project_names:
-        if export_root is None:
-            with tempfile.TemporaryDirectory() as export_dir:
-                proj_export_meta = prepare_export(project_name)
-                download_export(project_name, proj_export_meta, export_dir)
-                project_df = aggregate_annotations_as_df(export_dir)
-        else:
-            export_dir = Path(export_root) / project_name
-            project_df = aggregate_annotations_as_df(export_dir)
+    gt_project_df = project_df[project_df["folderName"] == gt_folder]
 
-        project_df["project"] = project_name
-        project_gt_df = pd.concat([project_df, gt_project_df])
+    benchmark_dfs = []
+    for folder_name in folder_names:
+        folder_df = project_df[project_df["folderName"] == folder_name]
+        project_gt_df = pd.concat([folder_df, gt_project_df])
         project_gt_df = project_gt_df[project_gt_df["instanceId"].notna()]
 
         if image_list is not None:
@@ -102,7 +98,7 @@ def attribute_to_list(attribute_df):
         project_gt_df.query("type == '" + annot_type + "'", inplace=True)
 
         project_gt_df = project_gt_df.groupby(
-            ["imageName", "instanceId", "project"]
+            ["imageName", "instanceId", "folderName"]
         )
         project_gt_df = project_gt_df.apply(aggregate_attributes).reset_index(
             drop=True
@@ -115,13 +111,13 @@ def attribute_to_list(attribute_df):
 
         benchmark_project_df = pd.concat(all_benchmark_data, ignore_index=True)
         benchmark_project_df = benchmark_project_df[
-            benchmark_project_df["projectName"] == project_name]
+            benchmark_project_df["folderName"] == folder_name]
 
         benchmark_dfs.append(benchmark_project_df)
 
     benchmark_df = pd.concat(benchmark_dfs, ignore_index=True)
 
     if show_plots:
-        consensus_plot(benchmark_df, project_names)
+        consensus_plot(benchmark_df, folder_names)
 
     return benchmark_df
diff --git a/superannotate/consensus_benchmark/consensus.py b/superannotate/consensus_benchmark/consensus.py
@@ -14,16 +14,19 @@
 
 
 def consensus(
-    project_names,
+    project,
+    folder_names,
     export_root=None,
     image_list=None,
     annot_type='bbox',
     show_plots=False
 ):
     """Computes consensus score for each instance of given images that are present in at least 2 of the given projects:    
     
-    :param project_names: list of project names to aggregate through
-    :type project_names: list of str
+    :param project: project name or metadata of the project
+    :type project: str or dict
+    :param folder_names: list of folder names in the project for which the scores will be computed
+    :type folder_names: list of str
     :param export_root: root export path of the projects
     :type export_root: Pathlike (str or Path)
     :param image_list: List of image names from the projects list that must be used. If None, then all images from the projects list will be used. Default: None
@@ -33,26 +36,24 @@ def consensus(
     :param show_plots: If True, show plots based on results of consensus computation. Default: False
     :type show_plots: bool
 
+    :return: Pandas DateFrame with columns (creatorEmail, QA, imageName, instanceId, className, area, attribute, folderName, score)
+    :rtype: pandas DataFrame
     """
     supported_types = ['polygon', 'bbox', 'point']
     if annot_type not in supported_types:
         raise NotImplementedError
 
-    project_dfs = []
-    for project_name in project_names:
-        if export_root is None:
-            with tempfile.TemporaryDirectory() as export_dir:
-                proj_export_meta = prepare_export(project_name)
-                download_export(project_name, proj_export_meta, export_dir)
-                project_df = aggregate_annotations_as_df(export_dir)
-        else:
-            export_dir = Path(export_root) / project_name
+    if export_root is None:
+        with tempfile.TemporaryDirectory() as export_dir:
+            proj_export_meta = prepare_export(project)
+            download_export(project, proj_export_meta, export_dir)
             project_df = aggregate_annotations_as_df(export_dir)
-        project_df["project"] = project_name
-        project_dfs.append(project_df)
+    else:
+        project_df = aggregate_annotations_as_df(export_dir)
 
-    all_projects_df = pd.concat(project_dfs)
-    all_projects_df = all_projects_df[all_projects_df["instanceId"].notna()]
+    all_projects_df = project_df[project_df["instanceId"].notna()]
+    all_projects_df = all_projects_df.loc[
+        all_projects_df["folderName"].isin(folder_names)]
 
     if image_list is not None:
         all_projects_df = all_projects_df.loc[
@@ -84,13 +85,13 @@ def attribute_to_list(attribute_df):
             ["attributeGroupName", "attributeName"], axis=1, inplace=True
         )
         instance_df.drop_duplicates(
-            subset=["imageName", "instanceId", "project"], inplace=True
+            subset=["imageName", "instanceId", "folderName"], inplace=True
         )
         instance_df["attributes"] = [attributes]
         return instance_df
 
     all_projects_df = all_projects_df.groupby(
-        ["imageName", "instanceId", "project"]
+        ["imageName", "instanceId", "folderName"]
     )
     all_projects_df = all_projects_df.apply(aggregate_attributes).reset_index(
         drop=True
@@ -105,6 +106,6 @@ def attribute_to_list(attribute_df):
     consensus_df = pd.concat(all_consensus_data, ignore_index=True)
 
     if show_plots:
-        consensus_plot(consensus_df, project_names)
+        consensus_plot(consensus_df, folder_names)
 
     return consensus_df
diff --git a/superannotate/consensus_benchmark/helpers.py b/superannotate/consensus_benchmark/helpers.py
@@ -39,10 +39,10 @@ def image_consensus(df, image_name, annot_type):
 
     """
     image_df = df[df["imageName"] == image_name]
-    all_projects = list(set(df["project"]))
+    all_projects = list(set(df["folderName"]))
     column_names = [
         "creatorEmail", "imageName", "instanceId", "area", "className",
-        "attributes", "projectName", "score"
+        "attributes", "folderName", "score"
     ]
     instance_id = 0
     image_data = {}
@@ -52,8 +52,8 @@ def image_consensus(df, image_name, annot_type):
     projects_shaply_objs = {}
     # generate shapely objects of instances
     for _, row in image_df.iterrows():
-        if row["project"] not in projects_shaply_objs:
-            projects_shaply_objs[row["project"]] = []
+        if row["folderName"] not in projects_shaply_objs:
+            projects_shaply_objs[row["folderName"]] = []
         inst_data = row["meta"]
         if annot_type == 'bbox':
             inst_coords = inst_data["points"]
@@ -69,7 +69,7 @@ def image_consensus(df, image_name, annot_type):
         elif annot_type == 'point':
             inst = Point(inst_data["x"], inst_data["y"])
         if inst.is_valid:
-            projects_shaply_objs[row["project"]].append(
+            projects_shaply_objs[row["folderName"]].append(
                 (
                     inst, row["className"], row["creatorEmail"],
                     row["attributes"]
@@ -113,7 +113,7 @@ def image_consensus(df, image_name, annot_type):
                 image_data["imageName"].append(image_name)
                 image_data["instanceId"].append(instance_id)
                 image_data["className"].append(max_instances[0][2])
-                image_data["projectName"].append(max_instances[0][0])
+                image_data["folderName"].append(max_instances[0][0])
                 image_data["score"].append(0)
             else:
                 for curr_match_data in max_instances:
@@ -130,7 +130,7 @@ def image_consensus(df, image_name, annot_type):
                     image_data["imageName"].append(image_name)
                     image_data["instanceId"].append(instance_id)
                     image_data["className"].append(curr_match_data[2])
-                    image_data["projectName"].append(curr_match_data[0])
+                    image_data["folderName"].append(curr_match_data[0])
                     image_data["score"].append(
                         proj_cons / (len(all_projects) - 1)
                     )
@@ -156,10 +156,10 @@ def consensus_plot(consensus_df, projects):
     #project-wise boxplot
     project_box_fig = px.box(
         plot_data,
-        x="projectName",
+        x="folderName",
         y="score",
         points="all",
-        color="projectName",
+        color="folderName",
         color_discrete_sequence=px.colors.qualitative.Dark24
     )
     project_box_fig.show()
@@ -171,12 +171,12 @@ def consensus_plot(consensus_df, projects):
         y="score",
         color="className",
         symbol="creatorEmail",
-        facet_col="projectName",
+        facet_col="folderName",
         color_discrete_sequence=px.colors.qualitative.Dark24,
         hover_data={
             "className": False,
             "imageName": True,
-            "projectName": False,
+            "folderName": False,
             "area": False,
             "score": False
         },