microsoft · rosscutler · Jun 4, 2025
diff --git a/docs/results.md b/docs/results.md
@@ -32,8 +32,9 @@ want to have experiment specific criteria for data cleansing process.
         gold_standard_bigger_equal:0
         # if workers fail in these performance criteria their submissions will be failed.
         rater_min_acceptance_rate_current_test : 30
-        rater_min_accepted_hits_current_test : 0
-        block_rater_if_acceptance_and_used_rate_below : 20              
+        rater_min_accepted_hits_current_test : 0
+        block_rater_if_acceptance_and_used_rate_below : 20
+        block_rater_if_accept_and_use_failures_greater_equal : 5
     ```
 
     * `all_video_played_equal: 1` : All the videos has should have been watched until the end.        
@@ -43,10 +44,12 @@ want to have experiment specific criteria for data cleansing process.
     * `gold_standard_bigger_equal:0`: if you set it to 1, then the submissions with wrong answer to gold-clip will be rejected. 
     * `rater_min_acceptance_rate_current_test : 30`:  Minimum acceptance rate for a worker in this test. If they have 
     an acceptance rate below this percentage all of their submission will be rejected. 
-    * `rater_min_accepted_hits_current_test : 0` The minimum number of accepted submissions that a worker should have. 
-    * `block_rater_if_acceptance_and_used_rate_below : 20`: If the accidence rate of a worker in this study is below this
-    threshold, the worker will be added to the "block" list with a proper message. You may upload the "block" list later 
-    in your AMT account to block those workers.
+    * `rater_min_accepted_hits_current_test : 0` The minimum number of accepted submissions that a worker should have.
+    * `block_rater_if_acceptance_and_used_rate_below : 20`: If the accidence rate of a worker in this study is below this
+    threshold, the worker will be added to the "block" list with a proper message. You may upload the "block" list later
+    in your AMT account to block those workers.
+    * `block_rater_if_accept_and_use_failures_greater_equal : 5`: If a worker accumulates at least this many
+    "accept and use" failures in the current study, the worker will be added to the block list.
 
     1. All submissions that are accepted and passed the following criteria are consider reliable and wil be used/aggregated.
     Consequently if they failed then the submission will not be used but the worker will be paid.
@@ -60,8 +63,9 @@ want to have experiment specific criteria for data cleansing process.
         correct_matrix_bigger_equal: 2
         # rater performance criteria
         # percentage of "accept and used" submissions in current job
-        rater_min_acceptance_rate_current_test : 80
-        rater_min_accepted_hits_current_test : 1                    
+        rater_min_acceptance_rate_current_test : 80
+        rater_min_accepted_hits_current_test : 1
+        block_rater_if_accept_and_use_failures_greater_equal : 5
     ```
     * `variance_bigger_equal: 0.15` Minimum variance in ratings of a session (beside votes to gold and trapping questions). 
     It is to detect straightliners.
@@ -71,9 +75,11 @@ want to have experiment specific criteria for data cleansing process.
     * `gold_standard_bigger_equal:1` Submissions with wrong answers to the gold questions will not be used.
     * `viewing_duration_over:1.15` If the overall play-back duration exceed 115% of videos' duration, the submission will not be used
     * `correct_matrix_bigger_equal: 2` Both brightness tests (matrix with images) should be answered correctly
-    * `rater_min_acceptance_rate_current_test : 80` Minimum acceptance rate for a worker in this test. If they have 
-    an acceptance rate below this percentage all of their submission will to be used.
-    * `rater_min_accepted_hits_current_test : 1` The minimum number of accepted submissions that a worker should have.   
+    * `rater_min_acceptance_rate_current_test : 80` Minimum acceptance rate for a worker in this test. If they have
+    an acceptance rate below this percentage all of their submission will to be used.
+    * `rater_min_accepted_hits_current_test : 1` The minimum number of accepted submissions that a worker should have.
+    * `block_rater_if_accept_and_use_failures_greater_equal : 5` Same as in the acceptance criteria section but applied
+    after data cleansing. Workers reaching this number of failures will be added to the block list.
 
 1. Run `result_parser.py` 
 

diff --git a/src/assets_master_script/result_parser_template.cfg b/src/assets_master_script/result_parser_template.cfg
@@ -53,7 +53,7 @@ gold_standard_bigger_equal:0
 rater_min_acceptance_rate_current_test : 0
 rater_min_accepted_hits_current_test : 0
 # this only applies if more than 5 HITs are submitted
-block_rater_if_acceptance_and_used_rate_below : 20
+block_rater_if_acceptance_and_used_rate_below : 20
 
 [accept_and_use]
 # including acceptance_criteria
@@ -65,8 +65,9 @@ correct_matrix_bigger_equal: 2
 # rater performance criteria
 # percentage of "accept and used" submissions in current job
 rater_min_acceptance_rate_current_test : 60
-rater_min_accepted_hits_current_test : 1
-min_inter_rater_reliability: 0.75
+rater_min_accepted_hits_current_test : 1
+block_rater_if_accept_and_use_failures_greater_equal : 5
+min_inter_rater_reliability: 0.75
 
 
 [bonus]

diff --git a/src/result_parser.py b/src/result_parser.py
@@ -851,6 +851,11 @@ def evaluate_rater_performance(data, use_sessions, reject_on_failure=False):
         tmp = grouped[(grouped.acceptance_rate < int(config[section]['block_rater_if_acceptance_and_used_rate_below'])) &((grouped['used_count'] + grouped['not_used_count']) >=5)]
         block_list = list(tmp['worker_id'])
 
+    if 'block_rater_if_accept_and_use_failures_greater_equal' in config[section]:
+        thr = int(config[section]['block_rater_if_accept_and_use_failures_greater_equal'])
+        tmp = grouped[grouped.not_used_count >= thr]
+        block_list = list(set(block_list + list(tmp['worker_id'])))
+
     return result, u_session_update, num_not_used_submissions, block_list