11import pandas as pd
2- from tqdm import tqdm
32import plotly .express as px
43
5- def image_analytics (annotations_df , visualize = False ):
4+
5+ def image_analytics (annotations_df , visualize = False ):
66 """
77 Aggregates image analytics: num instances/annotation time in seconds per image
88 :param annotations_df: pandas DataFrame of project annotations
99 :type annotations_df: pandas.DataFrame
10- :param visulaize : enables image analytics scatter plot
10+ :param visualize : enables image analytics scatter plot
1111 :type visualize: bool
1212
1313 :return: DataFrame on image analytics with columns ["userEmail", "userRole", "imageName", "annotationTime", "instanceCount"]
@@ -19,16 +19,26 @@ def fix_spent_time(grp: pd.Series) -> pd.Series:
1919 grp_lost_msk = (grp > 600 ) | (grp .isna ())
2020 grp .loc [grp_lost_msk ] = grp [~ grp_lost_msk ].median ()
2121 return grp
22-
23- analytics = {"userEmail" : [], "userRole" : [], "imageName" : [], "annotationTime" : [], "instanceCount" : [] }
24- annot_cols = ["imageName" , "instanceId" , "createdAt" , "creatorEmail" , "creatorRole" ]
25- annotations_df = annotations_df [annotations_df ["creationType" ] == "Manual" ][annot_cols ].drop_duplicates ()
22+
23+ analytics = {
24+ "userEmail" : [],
25+ "userRole" : [],
26+ "imageName" : [],
27+ "annotationTime" : [],
28+ "instanceCount" : []
29+ }
30+ annot_cols = [
31+ "imageName" , "instanceId" , "createdAt" , "creatorEmail" , "creatorRole"
32+ ]
33+ annotations_df = annotations_df [annotations_df ["creationType" ] == "Manual"
34+ ][annot_cols ].drop_duplicates ()
2635
2736 for annot , grp in annotations_df .groupby (["creatorEmail" , "creatorRole" ]):
2837 grp_sorted = grp .sort_values ("createdAt" )
2938 time_spent = grp_sorted .createdAt .diff ().shift (- 1 ).dt .total_seconds ()
3039 grp ["time_spent" ] = fix_spent_time (time_spent )
31- img_time = grp .groupby ("imageName" , as_index = False )["time_spent" ].agg ("sum" )
40+ img_time = grp .groupby ("imageName" ,
41+ as_index = False )["time_spent" ].agg ("sum" )
3242 img_n_instance = grp .groupby ("imageName" )["instanceId" ].agg ("count" )
3343
3444 analytics ["imageName" ] += img_time .imageName .tolist ()
@@ -46,8 +56,12 @@ def fix_spent_time(grp: pd.Series) -> pd.Series:
4656 y = "annotationTime" ,
4757 color = "userEmail" ,
4858 facet_col = "userRole" ,
49- custom_data = ["imageName" ],
50- labels = {'userEmail' : "User Email" , "instanceCount" : "Number of Instances" , "annotationTime" : "Annotation time" },
59+ custom_data = ["imageName" ],
60+ labels = {
61+ 'userEmail' : "User Email" ,
62+ "instanceCount" : "Number of Instances" ,
63+ "annotationTime" : "Annotation time"
64+ },
5165 color_discrete_sequence = px .colors .qualitative .Dark24 ,
5266 )
5367 fig .for_each_annotation (lambda a : a .update (text = a .text .split ("=" )[- 1 ]))
0 commit comments