diff --git a/docs/results.md b/docs/results.md index 7b105bf..ee721fc 100644 --- a/docs/results.md +++ b/docs/results.md @@ -32,8 +32,9 @@ want to have experiment specific criteria for data cleansing process. gold_standard_bigger_equal:0 # if workers fail in these performance criteria their submissions will be failed. rater_min_acceptance_rate_current_test : 30 - rater_min_accepted_hits_current_test : 0 - block_rater_if_acceptance_and_used_rate_below : 20 + rater_min_accepted_hits_current_test : 0 + block_rater_if_acceptance_and_used_rate_below : 20 + block_rater_if_accept_and_use_failures_greater_equal : 5 ``` * `all_video_played_equal: 1` : All the videos has should have been watched until the end. @@ -43,10 +44,12 @@ want to have experiment specific criteria for data cleansing process. * `gold_standard_bigger_equal:0`: if you set it to 1, then the submissions with wrong answer to gold-clip will be rejected. * `rater_min_acceptance_rate_current_test : 30`: Minimum acceptance rate for a worker in this test. If they have an acceptance rate below this percentage all of their submission will be rejected. - * `rater_min_accepted_hits_current_test : 0` The minimum number of accepted submissions that a worker should have. - * `block_rater_if_acceptance_and_used_rate_below : 20`: If the accidence rate of a worker in this study is below this - threshold, the worker will be added to the "block" list with a proper message. You may upload the "block" list later - in your AMT account to block those workers. + * `rater_min_accepted_hits_current_test : 0` The minimum number of accepted submissions that a worker should have. + * `block_rater_if_acceptance_and_used_rate_below : 20`: If the accidence rate of a worker in this study is below this + threshold, the worker will be added to the "block" list with a proper message. You may upload the "block" list later + in your AMT account to block those workers. + * `block_rater_if_accept_and_use_failures_greater_equal : 5`: If a worker accumulates at least this many + "accept and use" failures in the current study, the worker will be added to the block list. 1. All submissions that are accepted and passed the following criteria are consider reliable and wil be used/aggregated. Consequently if they failed then the submission will not be used but the worker will be paid. @@ -60,8 +63,9 @@ want to have experiment specific criteria for data cleansing process. correct_matrix_bigger_equal: 2 # rater performance criteria # percentage of "accept and used" submissions in current job - rater_min_acceptance_rate_current_test : 80 - rater_min_accepted_hits_current_test : 1 + rater_min_acceptance_rate_current_test : 80 + rater_min_accepted_hits_current_test : 1 + block_rater_if_accept_and_use_failures_greater_equal : 5 ``` * `variance_bigger_equal: 0.15` Minimum variance in ratings of a session (beside votes to gold and trapping questions). It is to detect straightliners. @@ -71,9 +75,11 @@ want to have experiment specific criteria for data cleansing process. * `gold_standard_bigger_equal:1` Submissions with wrong answers to the gold questions will not be used. * `viewing_duration_over:1.15` If the overall play-back duration exceed 115% of videos' duration, the submission will not be used * `correct_matrix_bigger_equal: 2` Both brightness tests (matrix with images) should be answered correctly - * `rater_min_acceptance_rate_current_test : 80` Minimum acceptance rate for a worker in this test. If they have - an acceptance rate below this percentage all of their submission will to be used. - * `rater_min_accepted_hits_current_test : 1` The minimum number of accepted submissions that a worker should have. + * `rater_min_acceptance_rate_current_test : 80` Minimum acceptance rate for a worker in this test. If they have + an acceptance rate below this percentage all of their submission will to be used. + * `rater_min_accepted_hits_current_test : 1` The minimum number of accepted submissions that a worker should have. + * `block_rater_if_accept_and_use_failures_greater_equal : 5` Same as in the acceptance criteria section but applied + after data cleansing. Workers reaching this number of failures will be added to the block list. 1. Run `result_parser.py` diff --git a/src/assets_master_script/result_parser_template.cfg b/src/assets_master_script/result_parser_template.cfg index 2f62773..2c93eb6 100644 --- a/src/assets_master_script/result_parser_template.cfg +++ b/src/assets_master_script/result_parser_template.cfg @@ -53,7 +53,7 @@ gold_standard_bigger_equal:0 rater_min_acceptance_rate_current_test : 0 rater_min_accepted_hits_current_test : 0 # this only applies if more than 5 HITs are submitted -block_rater_if_acceptance_and_used_rate_below : 20 +block_rater_if_acceptance_and_used_rate_below : 20 [accept_and_use] # including acceptance_criteria @@ -65,8 +65,9 @@ correct_matrix_bigger_equal: 2 # rater performance criteria # percentage of "accept and used" submissions in current job rater_min_acceptance_rate_current_test : 60 -rater_min_accepted_hits_current_test : 1 -min_inter_rater_reliability: 0.75 +rater_min_accepted_hits_current_test : 1 +block_rater_if_accept_and_use_failures_greater_equal : 5 +min_inter_rater_reliability: 0.75 [bonus] diff --git a/src/result_parser.py b/src/result_parser.py index a05b5a9..d0529d6 100644 --- a/src/result_parser.py +++ b/src/result_parser.py @@ -851,6 +851,11 @@ def evaluate_rater_performance(data, use_sessions, reject_on_failure=False): tmp = grouped[(grouped.acceptance_rate < int(config[section]['block_rater_if_acceptance_and_used_rate_below'])) &((grouped['used_count'] + grouped['not_used_count']) >=5)] block_list = list(tmp['worker_id']) + if 'block_rater_if_accept_and_use_failures_greater_equal' in config[section]: + thr = int(config[section]['block_rater_if_accept_and_use_failures_greater_equal']) + tmp = grouped[grouped.not_used_count >= thr] + block_list = list(set(block_list + list(tmp['worker_id']))) + return result, u_session_update, num_not_used_submissions, block_list