|
| 1 | +import pandas as pd |
| 2 | +from tqdm import tqdm |
| 3 | +import plotly.express as px |
| 4 | + |
| 5 | +def image_analytics(annotations_df, visualize = False): |
| 6 | + """ |
| 7 | + Aggregates image analytics: num instances/annotation time in seconds per image |
| 8 | + :param annotations_df: pandas DataFrame of project annotations |
| 9 | + :type annotations_df: pandas.DataFrame |
| 10 | + :param visulaize: enables image analytics scatter plot |
| 11 | + :type visualize: bool |
| 12 | +
|
| 13 | + :return: DataFrame on image analytics with columns ["image", "instances", "time"] |
| 14 | + :rtype: pandas DataFrame |
| 15 | +
|
| 16 | + """ |
| 17 | + def fix_spent_time(grp: pd.Series) -> pd.Series: |
| 18 | + grp = grp.copy() |
| 19 | + grp_lost_msk = (grp > 600) | (grp.isna()) |
| 20 | + grp.loc[grp_lost_msk] = grp[~grp_lost_msk].median() |
| 21 | + return grp |
| 22 | + |
| 23 | + analytics = {"user_id": [], "user_role": [], "image": [], "time": [], "ninstances": [] } |
| 24 | + annot_cols = ["imageName", "instanceId", "createdAt", "creatorEmail", "creatorRole"] |
| 25 | + annotations_df = annotations_df[annotations_df["creationType"] == "Manual"][annot_cols].drop_duplicates() |
| 26 | + |
| 27 | + for annot, grp in annotations_df.groupby(["creatorEmail", "creatorRole"]): |
| 28 | + grp_sorted = grp.sort_values("createdAt") |
| 29 | + time_spent = grp_sorted.createdAt.diff().shift(-1).dt.total_seconds() |
| 30 | + grp["time_spent"] = fix_spent_time(time_spent) |
| 31 | + img_time = grp.groupby("imageName", as_index=False)["time_spent"].agg("sum") |
| 32 | + img_n_instance = grp.groupby("imageName")["instanceId"].agg("count") |
| 33 | + |
| 34 | + analytics["image"] += img_time.imageName.tolist() |
| 35 | + analytics["time"] += img_time.time_spent.tolist() |
| 36 | + analytics["ninstances"] += img_n_instance.tolist() |
| 37 | + analytics["user_id"] += [annot[0]] * len(img_time) |
| 38 | + analytics["user_role"] += [annot[1]] * len(img_time) |
| 39 | + |
| 40 | + analytics_df = pd.DataFrame(analytics) |
| 41 | + if visualize: |
| 42 | + #scatter plot of number of instances vs annotation time |
| 43 | + fig = px.scatter( |
| 44 | + analytics_df, |
| 45 | + x="ninstances", |
| 46 | + y="time", |
| 47 | + color="user_id", |
| 48 | + facet_col="user_role", |
| 49 | + custom_data = ["image"], |
| 50 | + color_discrete_sequence=px.colors.qualitative.Dark24, |
| 51 | + labels = {'user_id': "User Email", "ninstances": "Number of Instances", "time": "Annotation time"} |
| 52 | + ) |
| 53 | + fig.for_each_annotation(lambda a: a.update(text=a.text.split("=")[-1])) |
| 54 | + fig.update_traces(hovertemplate="%{customdata[0]}") |
| 55 | + fig.show() |
| 56 | + return analytics_df |
0 commit comments