Skip to content

Commit a113e5c

Browse files
authored
Merge pull request #14 from superannotateai/consensus_enhancement
Consensus/Benchmark folder structure enhancement
2 parents 3633bdc + c911690 commit a113e5c

File tree

53 files changed

+124
-7045
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

53 files changed

+124
-7045
lines changed

superannotate/analytics/common.py

Lines changed: 26 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -153,7 +153,8 @@ def aggregate_annotations_as_df(
153153
include_classes_wo_annotations=False,
154154
include_comments=False,
155155
include_tags=False,
156-
verbose=True
156+
verbose=True,
157+
folder_names=None
157158
):
158159
"""Aggregate annotations as pandas dataframe from project root.
159160
@@ -166,14 +167,16 @@ def aggregate_annotations_as_df(
166167
:type include_comments: bool
167168
:param include_tags: enables inclusion of tags info as tag column
168169
:type include_tags: bool
170+
:param folder_names: Aggregate the specified folders from project_root. If None aggregate all folders in the project_root.
171+
:type folder_names: (list of str)
169172
170173
:return: DataFrame on annotations with columns: "imageName", "instanceId",
171174
"className", "attributeGroupName", "attributeName", "type", "error", "locked",
172175
"visible", "trackingId", "probability", "pointLabels",
173176
"meta" (geometry information as string), "commentResolved", "classColor",
174177
"groupId", "imageWidth", "imageHeight", "imageStatus", "imagePinned",
175178
"createdAt", "creatorRole", "creationType", "creatorEmail", "updatedAt",
176-
"updatorRole", "updatorEmail", "tag"
179+
"updatorRole", "updatorEmail", "tag", "folderName"
177180
:rtype: pandas DataFrame
178181
"""
179182

@@ -208,7 +211,8 @@ def aggregate_annotations_as_df(
208211
"creatorEmail": [],
209212
"updatedAt": [],
210213
"updatorRole": [],
211-
"updatorEmail": []
214+
"updatorEmail": [],
215+
"folderName": []
212216
}
213217

214218
if include_comments:
@@ -283,15 +287,25 @@ def __get_user_metadata(annotation):
283287

284288
annotations_paths = []
285289

286-
for path in Path(project_root).glob('*.json'):
287-
annotations_paths.append(path)
290+
if folder_names is None:
291+
project_dir_content = Path(project_root).glob('*')
292+
for entry in project_dir_content:
293+
if entry.is_file() and entry.suffix == '.json':
294+
annotations_paths.append(entry)
295+
elif entry.is_dir() and entry.name != "classes":
296+
annotations_paths.extend(list(entry.rglob('*.json')))
297+
else:
298+
for folder_name in folder_names:
299+
annotations_paths.extend(
300+
list((Path(project_root) / folder_name).rglob('*.json'))
301+
)
288302

289303
if not annotations_paths:
290304
logger.warning(
291305
"No annotations found in project export root %s", project_root
292306
)
293-
type_postfix = "___objects.json" if glob.glob(
294-
"{}/*___objects.json".format(project_root)
307+
type_postfix = "___objects.json" if annotations_paths[0].match(
308+
"*___objects.json"
295309
) else "___pixel.json"
296310
for annotation_path in annotations_paths:
297311
annotation_json = json.load(open(annotation_path))
@@ -359,6 +373,9 @@ def __get_user_metadata(annotation):
359373
annotation_point_labels = annotation.get("pointLabels")
360374
attributes = annotation.get("attributes")
361375
user_metadata = __get_user_metadata(annotation)
376+
folder_name = None
377+
if annotation_path.parent != Path(project_root):
378+
folder_name = annotation_path.parent.name
362379
num_added = 0
363380
if not attributes:
364381
annotation_dict = {
@@ -375,6 +392,7 @@ def __get_user_metadata(annotation):
375392
"pointLabels": annotation_point_labels,
376393
"classColor": annotation_class_color,
377394
"groupId": annotation_group_id,
395+
"folderName": folder_name,
378396
}
379397
annotation_dict.update(user_metadata)
380398
annotation_dict.update(image_metadata)
@@ -414,6 +432,7 @@ def __get_user_metadata(annotation):
414432
"pointLabels": annotation_point_labels,
415433
"classColor": annotation_class_color,
416434
"groupId": annotation_group_id,
435+
"folderName": folder_name,
417436
}
418437
annotation_dict.update(user_metadata)
419438
annotation_dict.update(image_metadata)

superannotate/consensus_benchmark/benchmark.py

Lines changed: 25 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -14,19 +14,22 @@
1414

1515

1616
def benchmark(
17-
gt_project_name,
18-
project_names,
17+
project,
18+
gt_folder,
19+
folder_names,
1920
export_root=None,
2021
image_list=None,
2122
annot_type='bbox',
2223
show_plots=False
2324
):
24-
"""Computes benchmark score for each instance of given images that are present both gt_project_name project and projects in project_names list:
25+
"""Computes benchmark score for each instance of given images that are present both gt_project_name project and projects in folder_names list:
2526
26-
:param gt_project_name: Project name that contains the ground truth annotations
27-
:type gt_project_name: str
28-
:param project_names: list of project names to aggregate through
29-
:type project_names: list of str
27+
:param project: project name or metadata of the project
28+
:type project: str or dict
29+
:param gt_folder: project folder name that contains the ground truth annotations
30+
:type gt_folder: str
31+
:param folder_names: list of folder names in the project for which the scores will be computed
32+
:type folder_names: list of str
3033
:param export_root: root export path of the projects
3134
:type export_root: Pathlike (str or Path)
3235
:param image_list: List of image names from the projects list that must be used. If None, then all images from the projects list will be used. Default: None
@@ -36,6 +39,8 @@ def benchmark(
3639
:param show_plots: If True, show plots based on results of consensus computation. Default: False
3740
:type show_plots: bool
3841
42+
:return: Pandas DateFrame with columns (creatorEmail, QA, imageName, instanceId, className, area, attribute, folderName, score)
43+
:rtype: pandas DataFrame
3944
"""
4045
def aggregate_attributes(instance_df):
4146
def attribute_to_list(attribute_df):
@@ -61,7 +66,7 @@ def attribute_to_list(attribute_df):
6166
["attributeGroupName", "attributeName"], axis=1, inplace=True
6267
)
6368
instance_df.drop_duplicates(
64-
subset=["imageName", "instanceId", "project"], inplace=True
69+
subset=["imageName", "instanceId", "folderName"], inplace=True
6570
)
6671
instance_df["attributes"] = [attributes]
6772
return instance_df
@@ -72,27 +77,18 @@ def attribute_to_list(attribute_df):
7277

7378
if export_root is None:
7479
with tempfile.TemporaryDirectory() as export_dir:
75-
gt_project_meta = prepare_export(gt_project_name)
76-
download_export(gt_project_name, gt_project_meta, export_dir)
77-
gt_project_df = aggregate_annotations_as_df(export_dir)
80+
proj_export_meta = prepare_export(project_name)
81+
download_export(project_name, proj_export_meta, export_dir)
82+
project_df = aggregate_annotations_as_df(export_dir)
7883
else:
79-
export_dir = Path(export_root) / gt_project_name
80-
gt_project_df = aggregate_annotations_as_df(export_dir)
81-
gt_project_df["project"] = gt_project_name
84+
project_df = aggregate_annotations_as_df(export_root)
8285

83-
benchmark_dfs = []
84-
for project_name in project_names:
85-
if export_root is None:
86-
with tempfile.TemporaryDirectory() as export_dir:
87-
proj_export_meta = prepare_export(project_name)
88-
download_export(project_name, proj_export_meta, export_dir)
89-
project_df = aggregate_annotations_as_df(export_dir)
90-
else:
91-
export_dir = Path(export_root) / project_name
92-
project_df = aggregate_annotations_as_df(export_dir)
86+
gt_project_df = project_df[project_df["folderName"] == gt_folder]
9387

94-
project_df["project"] = project_name
95-
project_gt_df = pd.concat([project_df, gt_project_df])
88+
benchmark_dfs = []
89+
for folder_name in folder_names:
90+
folder_df = project_df[project_df["folderName"] == folder_name]
91+
project_gt_df = pd.concat([folder_df, gt_project_df])
9692
project_gt_df = project_gt_df[project_gt_df["instanceId"].notna()]
9793

9894
if image_list is not None:
@@ -102,7 +98,7 @@ def attribute_to_list(attribute_df):
10298
project_gt_df.query("type == '" + annot_type + "'", inplace=True)
10399

104100
project_gt_df = project_gt_df.groupby(
105-
["imageName", "instanceId", "project"]
101+
["imageName", "instanceId", "folderName"]
106102
)
107103
project_gt_df = project_gt_df.apply(aggregate_attributes).reset_index(
108104
drop=True
@@ -115,13 +111,13 @@ def attribute_to_list(attribute_df):
115111

116112
benchmark_project_df = pd.concat(all_benchmark_data, ignore_index=True)
117113
benchmark_project_df = benchmark_project_df[
118-
benchmark_project_df["projectName"] == project_name]
114+
benchmark_project_df["folderName"] == folder_name]
119115

120116
benchmark_dfs.append(benchmark_project_df)
121117

122118
benchmark_df = pd.concat(benchmark_dfs, ignore_index=True)
123119

124120
if show_plots:
125-
consensus_plot(benchmark_df, project_names)
121+
consensus_plot(benchmark_df, folder_names)
126122

127123
return benchmark_df

superannotate/consensus_benchmark/consensus.py

Lines changed: 20 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -14,16 +14,19 @@
1414

1515

1616
def consensus(
17-
project_names,
17+
project,
18+
folder_names,
1819
export_root=None,
1920
image_list=None,
2021
annot_type='bbox',
2122
show_plots=False
2223
):
2324
"""Computes consensus score for each instance of given images that are present in at least 2 of the given projects:
2425
25-
:param project_names: list of project names to aggregate through
26-
:type project_names: list of str
26+
:param project: project name or metadata of the project
27+
:type project: str or dict
28+
:param folder_names: list of folder names in the project for which the scores will be computed
29+
:type folder_names: list of str
2730
:param export_root: root export path of the projects
2831
:type export_root: Pathlike (str or Path)
2932
:param image_list: List of image names from the projects list that must be used. If None, then all images from the projects list will be used. Default: None
@@ -33,26 +36,24 @@ def consensus(
3336
:param show_plots: If True, show plots based on results of consensus computation. Default: False
3437
:type show_plots: bool
3538
39+
:return: Pandas DateFrame with columns (creatorEmail, QA, imageName, instanceId, className, area, attribute, folderName, score)
40+
:rtype: pandas DataFrame
3641
"""
3742
supported_types = ['polygon', 'bbox', 'point']
3843
if annot_type not in supported_types:
3944
raise NotImplementedError
4045

41-
project_dfs = []
42-
for project_name in project_names:
43-
if export_root is None:
44-
with tempfile.TemporaryDirectory() as export_dir:
45-
proj_export_meta = prepare_export(project_name)
46-
download_export(project_name, proj_export_meta, export_dir)
47-
project_df = aggregate_annotations_as_df(export_dir)
48-
else:
49-
export_dir = Path(export_root) / project_name
46+
if export_root is None:
47+
with tempfile.TemporaryDirectory() as export_dir:
48+
proj_export_meta = prepare_export(project)
49+
download_export(project, proj_export_meta, export_dir)
5050
project_df = aggregate_annotations_as_df(export_dir)
51-
project_df["project"] = project_name
52-
project_dfs.append(project_df)
51+
else:
52+
project_df = aggregate_annotations_as_df(export_root)
5353

54-
all_projects_df = pd.concat(project_dfs)
55-
all_projects_df = all_projects_df[all_projects_df["instanceId"].notna()]
54+
all_projects_df = project_df[project_df["instanceId"].notna()]
55+
all_projects_df = all_projects_df.loc[
56+
all_projects_df["folderName"].isin(folder_names)]
5657

5758
if image_list is not None:
5859
all_projects_df = all_projects_df.loc[
@@ -84,13 +85,13 @@ def attribute_to_list(attribute_df):
8485
["attributeGroupName", "attributeName"], axis=1, inplace=True
8586
)
8687
instance_df.drop_duplicates(
87-
subset=["imageName", "instanceId", "project"], inplace=True
88+
subset=["imageName", "instanceId", "folderName"], inplace=True
8889
)
8990
instance_df["attributes"] = [attributes]
9091
return instance_df
9192

9293
all_projects_df = all_projects_df.groupby(
93-
["imageName", "instanceId", "project"]
94+
["imageName", "instanceId", "folderName"]
9495
)
9596
all_projects_df = all_projects_df.apply(aggregate_attributes).reset_index(
9697
drop=True
@@ -105,6 +106,6 @@ def attribute_to_list(attribute_df):
105106
consensus_df = pd.concat(all_consensus_data, ignore_index=True)
106107

107108
if show_plots:
108-
consensus_plot(consensus_df, project_names)
109+
consensus_plot(consensus_df, folder_names)
109110

110111
return consensus_df

superannotate/consensus_benchmark/helpers.py

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -39,10 +39,10 @@ def image_consensus(df, image_name, annot_type):
3939
4040
"""
4141
image_df = df[df["imageName"] == image_name]
42-
all_projects = list(set(df["project"]))
42+
all_projects = list(set(df["folderName"]))
4343
column_names = [
4444
"creatorEmail", "imageName", "instanceId", "area", "className",
45-
"attributes", "projectName", "score"
45+
"attributes", "folderName", "score"
4646
]
4747
instance_id = 0
4848
image_data = {}
@@ -52,8 +52,8 @@ def image_consensus(df, image_name, annot_type):
5252
projects_shaply_objs = {}
5353
# generate shapely objects of instances
5454
for _, row in image_df.iterrows():
55-
if row["project"] not in projects_shaply_objs:
56-
projects_shaply_objs[row["project"]] = []
55+
if row["folderName"] not in projects_shaply_objs:
56+
projects_shaply_objs[row["folderName"]] = []
5757
inst_data = row["meta"]
5858
if annot_type == 'bbox':
5959
inst_coords = inst_data["points"]
@@ -69,7 +69,7 @@ def image_consensus(df, image_name, annot_type):
6969
elif annot_type == 'point':
7070
inst = Point(inst_data["x"], inst_data["y"])
7171
if inst.is_valid:
72-
projects_shaply_objs[row["project"]].append(
72+
projects_shaply_objs[row["folderName"]].append(
7373
(
7474
inst, row["className"], row["creatorEmail"],
7575
row["attributes"]
@@ -113,7 +113,7 @@ def image_consensus(df, image_name, annot_type):
113113
image_data["imageName"].append(image_name)
114114
image_data["instanceId"].append(instance_id)
115115
image_data["className"].append(max_instances[0][2])
116-
image_data["projectName"].append(max_instances[0][0])
116+
image_data["folderName"].append(max_instances[0][0])
117117
image_data["score"].append(0)
118118
else:
119119
for curr_match_data in max_instances:
@@ -130,7 +130,7 @@ def image_consensus(df, image_name, annot_type):
130130
image_data["imageName"].append(image_name)
131131
image_data["instanceId"].append(instance_id)
132132
image_data["className"].append(curr_match_data[2])
133-
image_data["projectName"].append(curr_match_data[0])
133+
image_data["folderName"].append(curr_match_data[0])
134134
image_data["score"].append(
135135
proj_cons / (len(all_projects) - 1)
136136
)
@@ -156,10 +156,10 @@ def consensus_plot(consensus_df, projects):
156156
#project-wise boxplot
157157
project_box_fig = px.box(
158158
plot_data,
159-
x="projectName",
159+
x="folderName",
160160
y="score",
161161
points="all",
162-
color="projectName",
162+
color="folderName",
163163
color_discrete_sequence=px.colors.qualitative.Dark24
164164
)
165165
project_box_fig.show()
@@ -171,12 +171,12 @@ def consensus_plot(consensus_df, projects):
171171
y="score",
172172
color="className",
173173
symbol="creatorEmail",
174-
facet_col="projectName",
174+
facet_col="folderName",
175175
color_discrete_sequence=px.colors.qualitative.Dark24,
176176
hover_data={
177177
"className": False,
178178
"imageName": True,
179-
"projectName": False,
179+
"folderName": False,
180180
"area": False,
181181
"score": False
182182
},

0 commit comments

Comments
 (0)