From 9c3316120cc5a42aa5509f29f597280444e699fd Mon Sep 17 00:00:00 2001 From: SatoshiShibanuma Date: Tue, 6 May 2025 22:09:31 +0000 Subject: [PATCH 1/8] Add duplicate evidence handling utility functions --- .../utils/duplicate_evidence.py | 107 ++++++++++++++++++ 1 file changed, 107 insertions(+) create mode 100644 agent-framework/prometheus_swarm/utils/duplicate_evidence.py diff --git a/agent-framework/prometheus_swarm/utils/duplicate_evidence.py b/agent-framework/prometheus_swarm/utils/duplicate_evidence.py new file mode 100644 index 00000000..0c96a165 --- /dev/null +++ b/agent-framework/prometheus_swarm/utils/duplicate_evidence.py @@ -0,0 +1,107 @@ +import logging +from typing import List, Dict, Any, Optional + +class DuplicateEvidenceError(Exception): + """Custom exception for duplicate evidence scenarios.""" + pass + +def validate_unique_evidence(evidence_list: List[Dict[Any, Any]], + unique_key: str = 'id') -> None: + """ + Validate that evidence entries are unique based on a specified key. + + Args: + evidence_list (List[Dict[Any, Any]]): List of evidence dictionaries + unique_key (str, optional): Key used to determine uniqueness. Defaults to 'id'. + + Raises: + DuplicateEvidenceError: If duplicate evidence is detected + """ + logger = logging.getLogger(__name__) + + # Check for duplicates + seen_keys = set() + duplicates = [] + + for item in evidence_list: + if unique_key not in item: + logger.warning(f"Evidence item missing unique key '{unique_key}': {item}") + continue + + current_key = item[unique_key] + + if current_key in seen_keys: + duplicates.append(current_key) + logger.error(f"Duplicate evidence found with {unique_key}: {current_key}") + + seen_keys.add(current_key) + + if duplicates: + raise DuplicateEvidenceError( + f"Duplicate evidence detected for {unique_key}s: {duplicates}" + ) + +def log_evidence_summary(evidence_list: List[Dict[Any, Any]], + log_level: str = 'INFO') -> None: + """ + Log a summary of evidence entries with configurable log level. + + Args: + evidence_list (List[Dict[Any, Any]]): List of evidence dictionaries + log_level (str, optional): Logging level. Defaults to 'INFO'. + """ + logger = logging.getLogger(__name__) + log_method = getattr(logger, log_level.lower(), logger.info) + + log_method(f"Total evidence entries: {len(evidence_list)}") + log_method(f"Evidence keys: {list(evidence_list[0].keys()) if evidence_list else 'N/A'}") + +def filter_duplicates(evidence_list: List[Dict[Any, Any]], + unique_key: str = 'id', + keep: str = 'first') -> List[Dict[Any, Any]]: + """ + Filter out duplicate evidence entries while preserving desired entries. + + Args: + evidence_list (List[Dict[Any, Any]]): List of evidence dictionaries + unique_key (str, optional): Key used to determine uniqueness. Defaults to 'id'. + keep (str, optional): Strategy for keeping duplicates. + 'first' keeps first occurrence, 'last' keeps last. + Defaults to 'first'. + + Returns: + List[Dict[Any, Any]]: Filtered list of evidence without duplicates + """ + logger = logging.getLogger(__name__) + + if keep not in ['first', 'last']: + raise ValueError("'keep' must be either 'first' or 'last'") + + seen_keys = set() + filtered_evidence = [] + + if keep == 'first': + for item in evidence_list: + if unique_key not in item: + logger.warning(f"Evidence item missing unique key '{unique_key}': {item}") + continue + + current_key = item[unique_key] + + if current_key not in seen_keys: + filtered_evidence.append(item) + seen_keys.add(current_key) + + else: # keep == 'last' + for item in reversed(evidence_list): + if unique_key not in item: + logger.warning(f"Evidence item missing unique key '{unique_key}': {item}") + continue + + current_key = item[unique_key] + + if current_key not in seen_keys: + filtered_evidence.insert(0, item) + seen_keys.add(current_key) + + return filtered_evidence \ No newline at end of file From 734bf50d94dc49a7649d0ab252ffbb9c5a34aba2 Mon Sep 17 00:00:00 2001 From: SatoshiShibanuma Date: Tue, 6 May 2025 22:09:47 +0000 Subject: [PATCH 2/8] Add unit tests for duplicate evidence handling --- .../tests/unit/test_duplicate_evidence.py | 113 ++++++++++++++++++ 1 file changed, 113 insertions(+) create mode 100644 agent-framework/tests/unit/test_duplicate_evidence.py diff --git a/agent-framework/tests/unit/test_duplicate_evidence.py b/agent-framework/tests/unit/test_duplicate_evidence.py new file mode 100644 index 00000000..8cd8c056 --- /dev/null +++ b/agent-framework/tests/unit/test_duplicate_evidence.py @@ -0,0 +1,113 @@ +import pytest +import logging +from typing import List, Dict +from prometheus_swarm.utils.duplicate_evidence import ( + validate_unique_evidence, + DuplicateEvidenceError, + log_evidence_summary, + filter_duplicates +) + +def test_validate_unique_evidence_no_duplicates(): + """Test validation of unique evidence passes.""" + evidence = [ + {'id': 1, 'data': 'first'}, + {'id': 2, 'data': 'second'}, + {'id': 3, 'data': 'third'} + ] + + try: + validate_unique_evidence(evidence) + except DuplicateEvidenceError: + pytest.fail("Unexpected DuplicateEvidenceError raised") + +def test_validate_unique_evidence_with_duplicates(): + """Test validation raises error for duplicate evidence.""" + evidence = [ + {'id': 1, 'data': 'first'}, + {'id': 2, 'data': 'second'}, + {'id': 1, 'data': 'duplicate'} + ] + + with pytest.raises(DuplicateEvidenceError, match="Duplicate evidence detected"): + validate_unique_evidence(evidence) + +def test_validate_unique_evidence_missing_key(): + """Test behavior with evidence missing unique key.""" + evidence = [ + {'id': 1, 'data': 'first'}, + {'data': 'no id'}, + {'id': 2, 'data': 'second'} + ] + + # Should not raise an error, just log a warning + validate_unique_evidence(evidence) + +def test_log_evidence_summary(caplog): + """Test logging of evidence summary.""" + evidence = [ + {'id': 1, 'data': 'first'}, + {'id': 2, 'data': 'second'} + ] + + with caplog.at_level(logging.INFO): + log_evidence_summary(evidence) + + assert "Total evidence entries: 2" in caplog.text + +def test_filter_duplicates_first_occurrence(): + """Test filtering duplicates, keeping first occurrence.""" + evidence = [ + {'id': 1, 'data': 'first'}, + {'id': 2, 'data': 'second'}, + {'id': 1, 'data': 'duplicate'} + ] + + filtered = filter_duplicates(evidence) + + assert len(filtered) == 2 + assert filtered == [ + {'id': 1, 'data': 'first'}, + {'id': 2, 'data': 'second'} + ] + +def test_filter_duplicates_last_occurrence(): + """Test filtering duplicates, keeping last occurrence.""" + evidence = [ + {'id': 1, 'data': 'first'}, + {'id': 2, 'data': 'second'}, + {'id': 1, 'data': 'duplicate'} + ] + + filtered = filter_duplicates(evidence, keep='last') + + assert len(filtered) == 2 + assert filtered == [ + {'id': 2, 'data': 'second'}, + {'id': 1, 'data': 'duplicate'} + ] + +def test_filter_duplicates_invalid_keep_strategy(): + """Test that an invalid keep strategy raises an error.""" + evidence = [ + {'id': 1, 'data': 'first'}, + {'id': 2, 'data': 'second'} + ] + + with pytest.raises(ValueError, match="'keep' must be either 'first' or 'last'"): + filter_duplicates(evidence, keep='invalid') + +def test_filter_duplicates_missing_key(): + """Test filtering duplicates with entries missing unique key.""" + evidence = [ + {'id': 1, 'data': 'first'}, + {'data': 'no id'}, + {'id': 1, 'data': 'duplicate'} + ] + + filtered = filter_duplicates(evidence) + + assert len(filtered) == 2 + assert filtered == [ + {'id': 1, 'data': 'first'} + ] \ No newline at end of file From b19358df920c112e999d7bfd70c09ad1210ebc93 Mon Sep 17 00:00:00 2001 From: SatoshiShibanuma Date: Tue, 6 May 2025 22:10:15 +0000 Subject: [PATCH 3/8] Update filter_duplicates to handle missing key entries --- .../prometheus_swarm/utils/duplicate_evidence.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/agent-framework/prometheus_swarm/utils/duplicate_evidence.py b/agent-framework/prometheus_swarm/utils/duplicate_evidence.py index 0c96a165..f4c69f12 100644 --- a/agent-framework/prometheus_swarm/utils/duplicate_evidence.py +++ b/agent-framework/prometheus_swarm/utils/duplicate_evidence.py @@ -79,11 +79,13 @@ def filter_duplicates(evidence_list: List[Dict[Any, Any]], seen_keys = set() filtered_evidence = [] + items_without_key = [] if keep == 'first': for item in evidence_list: if unique_key not in item: logger.warning(f"Evidence item missing unique key '{unique_key}': {item}") + items_without_key.append(item) continue current_key = item[unique_key] @@ -96,6 +98,7 @@ def filter_duplicates(evidence_list: List[Dict[Any, Any]], for item in reversed(evidence_list): if unique_key not in item: logger.warning(f"Evidence item missing unique key '{unique_key}': {item}") + items_without_key.insert(0, item) continue current_key = item[unique_key] @@ -104,4 +107,7 @@ def filter_duplicates(evidence_list: List[Dict[Any, Any]], filtered_evidence.insert(0, item) seen_keys.add(current_key) + # Add items without the unique key to the end of the list + filtered_evidence.extend(items_without_key) + return filtered_evidence \ No newline at end of file From 9c39be4bf5d4680a4875baa0ecf2cdb8a649fd1d Mon Sep 17 00:00:00 2001 From: SatoshiShibanuma Date: Tue, 6 May 2025 22:10:37 +0000 Subject: [PATCH 4/8] Update test for filter_duplicates to match new implementation --- agent-framework/tests/unit/test_duplicate_evidence.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/agent-framework/tests/unit/test_duplicate_evidence.py b/agent-framework/tests/unit/test_duplicate_evidence.py index 8cd8c056..9bd2dd05 100644 --- a/agent-framework/tests/unit/test_duplicate_evidence.py +++ b/agent-framework/tests/unit/test_duplicate_evidence.py @@ -107,7 +107,8 @@ def test_filter_duplicates_missing_key(): filtered = filter_duplicates(evidence) - assert len(filtered) == 2 + assert len(filtered) == 3 assert filtered == [ - {'id': 1, 'data': 'first'} + {'id': 1, 'data': 'first'}, + {'data': 'no id'} ] \ No newline at end of file From 44425173da8b5fdcd4b788ab5274ea881dfd6de5 Mon Sep 17 00:00:00 2001 From: SatoshiShibanuma Date: Tue, 6 May 2025 22:11:34 +0000 Subject: [PATCH 5/8] Update filter_duplicates to preserve all entries, including those without unique key --- .../prometheus_swarm/utils/duplicate_evidence.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/agent-framework/prometheus_swarm/utils/duplicate_evidence.py b/agent-framework/prometheus_swarm/utils/duplicate_evidence.py index f4c69f12..2c2c6134 100644 --- a/agent-framework/prometheus_swarm/utils/duplicate_evidence.py +++ b/agent-framework/prometheus_swarm/utils/duplicate_evidence.py @@ -79,13 +79,12 @@ def filter_duplicates(evidence_list: List[Dict[Any, Any]], seen_keys = set() filtered_evidence = [] - items_without_key = [] if keep == 'first': for item in evidence_list: if unique_key not in item: logger.warning(f"Evidence item missing unique key '{unique_key}': {item}") - items_without_key.append(item) + filtered_evidence.append(item) continue current_key = item[unique_key] @@ -98,7 +97,7 @@ def filter_duplicates(evidence_list: List[Dict[Any, Any]], for item in reversed(evidence_list): if unique_key not in item: logger.warning(f"Evidence item missing unique key '{unique_key}': {item}") - items_without_key.insert(0, item) + filtered_evidence.insert(0, item) continue current_key = item[unique_key] @@ -107,7 +106,4 @@ def filter_duplicates(evidence_list: List[Dict[Any, Any]], filtered_evidence.insert(0, item) seen_keys.add(current_key) - # Add items without the unique key to the end of the list - filtered_evidence.extend(items_without_key) - return filtered_evidence \ No newline at end of file From c0d3afacf681b03b71b8f005415995768ac48495 Mon Sep 17 00:00:00 2001 From: SatoshiShibanuma Date: Tue, 6 May 2025 22:12:38 +0000 Subject: [PATCH 6/8] Refactor filter_duplicates to handle duplicate and non-unique entries --- .../utils/duplicate_evidence.py | 40 ++++++++----------- 1 file changed, 17 insertions(+), 23 deletions(-) diff --git a/agent-framework/prometheus_swarm/utils/duplicate_evidence.py b/agent-framework/prometheus_swarm/utils/duplicate_evidence.py index 2c2c6134..a5392724 100644 --- a/agent-framework/prometheus_swarm/utils/duplicate_evidence.py +++ b/agent-framework/prometheus_swarm/utils/duplicate_evidence.py @@ -80,30 +80,24 @@ def filter_duplicates(evidence_list: List[Dict[Any, Any]], seen_keys = set() filtered_evidence = [] + unique_evidence = [] + non_unique_evidence = [] + + for item in evidence_list: + if unique_key not in item: + logger.warning(f"Evidence item missing unique key '{unique_key}': {item}") + non_unique_evidence.append(item) + continue + + current_key = item[unique_key] + + if current_key not in seen_keys: + unique_evidence.append(item) + seen_keys.add(current_key) + if keep == 'first': - for item in evidence_list: - if unique_key not in item: - logger.warning(f"Evidence item missing unique key '{unique_key}': {item}") - filtered_evidence.append(item) - continue - - current_key = item[unique_key] - - if current_key not in seen_keys: - filtered_evidence.append(item) - seen_keys.add(current_key) - + filtered_evidence = unique_evidence + non_unique_evidence else: # keep == 'last' - for item in reversed(evidence_list): - if unique_key not in item: - logger.warning(f"Evidence item missing unique key '{unique_key}': {item}") - filtered_evidence.insert(0, item) - continue - - current_key = item[unique_key] - - if current_key not in seen_keys: - filtered_evidence.insert(0, item) - seen_keys.add(current_key) + filtered_evidence = list(reversed(unique_evidence)) + non_unique_evidence return filtered_evidence \ No newline at end of file From 6efd0b0db1ba46bb1999c39c45e290c5ce408001 Mon Sep 17 00:00:00 2001 From: SatoshiShibanuma Date: Tue, 6 May 2025 22:13:42 +0000 Subject: [PATCH 7/8] Refine duplicate evidence filtering logic --- .../utils/duplicate_evidence.py | 42 +++++++++++-------- 1 file changed, 25 insertions(+), 17 deletions(-) diff --git a/agent-framework/prometheus_swarm/utils/duplicate_evidence.py b/agent-framework/prometheus_swarm/utils/duplicate_evidence.py index a5392724..87986aae 100644 --- a/agent-framework/prometheus_swarm/utils/duplicate_evidence.py +++ b/agent-framework/prometheus_swarm/utils/duplicate_evidence.py @@ -79,25 +79,33 @@ def filter_duplicates(evidence_list: List[Dict[Any, Any]], seen_keys = set() filtered_evidence = [] + non_unique_items = [] - unique_evidence = [] - non_unique_evidence = [] - - for item in evidence_list: - if unique_key not in item: - logger.warning(f"Evidence item missing unique key '{unique_key}': {item}") - non_unique_evidence.append(item) - continue - - current_key = item[unique_key] - - if current_key not in seen_keys: - unique_evidence.append(item) - seen_keys.add(current_key) - + # First pass: handle total list if keep == 'first': - filtered_evidence = unique_evidence + non_unique_evidence + for item in evidence_list: + if unique_key not in item: + non_unique_items.append(item) + continue + + current_key = item[unique_key] + + if current_key not in seen_keys: + filtered_evidence.append(item) + seen_keys.add(current_key) else: # keep == 'last' - filtered_evidence = list(reversed(unique_evidence)) + non_unique_evidence + for item in reversed(evidence_list): + if unique_key not in item: + non_unique_items.insert(0, item) + continue + + current_key = item[unique_key] + + if current_key not in seen_keys: + filtered_evidence.insert(0, item) + seen_keys.add(current_key) + + # Handle case with non-unique entries + filtered_evidence.extend(non_unique_items) return filtered_evidence \ No newline at end of file From 5224bd606c559ffbdb8498c6378b0b515771040d Mon Sep 17 00:00:00 2001 From: SatoshiShibanuma Date: Tue, 6 May 2025 22:14:01 +0000 Subject: [PATCH 8/8] Update tests for duplicate evidence filtering --- agent-framework/tests/unit/test_duplicate_evidence.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/agent-framework/tests/unit/test_duplicate_evidence.py b/agent-framework/tests/unit/test_duplicate_evidence.py index 9bd2dd05..dc3a1294 100644 --- a/agent-framework/tests/unit/test_duplicate_evidence.py +++ b/agent-framework/tests/unit/test_duplicate_evidence.py @@ -107,7 +107,7 @@ def test_filter_duplicates_missing_key(): filtered = filter_duplicates(evidence) - assert len(filtered) == 3 + assert len(filtered) == 2 assert filtered == [ {'id': 1, 'data': 'first'}, {'data': 'no id'}